registry: split out writer thread

This commit is contained in:
Miroslav Crnic
2025-09-26 15:09:54 +00:00
parent bfce6cbed5
commit 89b3038448
17 changed files with 1691 additions and 815 deletions

View File

@@ -3,12 +3,10 @@
"cpp/.**.d": true,
"cpp/.**.o": true,
"cpp/vgcore.*": true,
"cpp/eggs-shard": true,
"cpp/.cache": true,
"cpp/.mypy_cache": true,
"python/_crc32c*": true,
"go/integrationtest/integrationtest": true,
"go/runeggs/runeggs": true,
"go/runtern/runtern": true,
"go/gcdaemon/gcdaemon": true,
"go/cli/cli": true,
"kmod/*.cmd": true,
@@ -18,6 +16,7 @@
"kmod/*.mod.c": true,
"kmod/Module.symvers": true,
"kmod/modules.order": true,
"tern-integrationtests.*": true,
},
"editor.formatOnSaveMode": "modifications"
}
}

View File

@@ -5,7 +5,6 @@
#pragma once
#include "Env.hpp"
#include "LogsDB.hpp"
#include "Metrics.hpp"
#include "Msgs.hpp"
#include "RegistryClient.hpp"

View File

@@ -66,14 +66,26 @@ std::ostream& operator<<(std::ostream& out, Parity parity) {
return out;
}
static constexpr std::array<std::string, 4> ALL_STORAGE_CLASS_NAMES = {
"EMPTY",
"INLINE",
"HDD",
"FLASH",
};
static const std::unordered_map<std::string, uint8_t> STORAGE_CLASSES_BY_NAME = {
{"HDD", 2},
{"FLASH", 3},
{ALL_STORAGE_CLASS_NAMES[EMPTY_STORAGE], EMPTY_STORAGE},
{ALL_STORAGE_CLASS_NAMES[INLINE_STORAGE], INLINE_STORAGE},
{ALL_STORAGE_CLASS_NAMES[HDD_STORAGE], HDD_STORAGE},
{ALL_STORAGE_CLASS_NAMES[FLASH_STORAGE], FLASH_STORAGE},
};
uint8_t storageClassByName(const char* name) {
return STORAGE_CLASSES_BY_NAME.at(name);
}
std::string storageClassName(uint8_t storageClass) {
return ALL_STORAGE_CLASS_NAMES.at(storageClass);
}
std::ostream& operator<<(std::ostream& out, Crc crc) {
char buf[9];

View File

@@ -6,6 +6,7 @@
#include <cstdint>
#include <ostream>
#include <string>
#include "Assert.hpp"
#include "Common.hpp"
@@ -410,6 +411,7 @@ constexpr uint8_t HDD_STORAGE = 2;
constexpr uint8_t FLASH_STORAGE = 3;
uint8_t storageClassByName(const char* name);
std::string storageClassName(uint8_t storageClass);
struct Crc {
uint32_t u32;

View File

@@ -4,9 +4,10 @@
include_directories(${ternfs_SOURCE_DIR}/core ${ternfs_SOURCE_DIR}/wyhash)
add_library(registry Registry.hpp Registry.cpp RegistryDB.hpp RegistryDB.cpp
RegistryDBData.hpp Registerer.hpp Registerer.cpp RegistryServer.hpp RegistryServer.cpp
RegistryReader.hpp RegistryReader.cpp )
add_library(registry Registry.hpp Registry.cpp RegistryDB.hpp RegistryDB.cpp
RegistryDBData.hpp RegistryDBLogEntry.hpp RegistryDBLogEntry.cpp
Registerer.hpp Registerer.cpp RegistryServer.hpp RegistryServer.cpp
RegistryReader.hpp RegistryReader.cpp RegistryWriter.hpp RegistryWriter.cpp)
target_link_libraries(registry PRIVATE core)
add_executable(ternregistry ternregistry.cpp)

View File

@@ -2,9 +2,10 @@
//
// SPDX-License-Identifier: GPL-2.0-or-later
#include <atomic>
#include "Registerer.hpp"
#include "LogsDB.hpp"
#include <atomic>
static std::ostream &operator<<(std::ostream &o, const std::vector<FullRegistryInfo> &replicas) {
o << "[";
@@ -14,7 +15,6 @@ static std::ostream &operator<<(std::ostream &o, const std::vector<FullRegistryI
return o << "]";
}
void Registerer::_init(const std::vector<FullRegistryInfo> &cachedReplicas) {
_env.updateAlert(_alert, "Waiting to register ourselves for the first time");
_updateReplicas(cachedReplicas);

View File

@@ -6,6 +6,7 @@
#include <atomic>
#include "LogsDB.hpp"
#include "Msgs.hpp"
#include "PeriodicLoop.hpp"
@@ -33,7 +34,7 @@ public:
}
inline std::shared_ptr<std::array<AddrsInfo, LogsDB::REPLICA_COUNT>> replicas() {
return _replicas;
return std::atomic_load(&_replicas);
}
virtual bool periodicStep() override;

View File

@@ -14,7 +14,6 @@
#include "Assert.hpp"
#include "Bincode.hpp"
#include "Crypto.hpp"
#include "Env.hpp"
#include "ErrorCount.hpp"
#include "LogsDB.hpp"
@@ -22,11 +21,12 @@
#include "Msgs.hpp"
#include "MsgsGen.hpp"
#include "Registry.hpp"
#include "Registerer.hpp"
#include "Registry.hpp"
#include "RegistryDB.hpp"
#include "RegistryReader.hpp"
#include "RegistryServer.hpp"
#include "RegistryWriter.hpp"
#include "SharedRocksDB.hpp"
#include "Time.hpp"
@@ -70,13 +70,13 @@ public:
RegistryLoop(
Logger &logger, std::shared_ptr<XmonAgent> xmon, const RegistryOptions& options,
Registerer& registerer, RegistryServer& server, LogsDB& logsDB, RegistryDB& registryDB,
std::vector<RegistryReader*> readers) :
std::vector<RegistryReader*> readers, RegistryWriter& writer) :
Loop(logger, xmon, "server"),
_options(options),
_registerer(registerer),
_server(server),
_logsDB(logsDB),
_registryDB(registryDB),
_writer(writer),
_replicas({}),
_replicaFinishedBootstrap({}),
_boostrapFinished(false),
@@ -93,7 +93,12 @@ public:
auto now = ternNow();
_logsDB.processIncomingMessages(_server.receivedLogsDBRequests(), _server.receivedLogsDBResponses());
_writer.pushLogsDBResponses(_server.receivedLogsDBResponses());
_server.receivedLogsDBResponses().clear();
_writer.pushLogsDBRequests(_server.receivedLogsDBRequests());
_server.receivedLogsDBRequests().clear();
auto& receivedRequests = _server.receivedRegistryRequests();
@@ -102,8 +107,7 @@ public:
_processBootstrapRequest(req);
continue;
}
auto& resp = _registryResponses.emplace_back();
resp.requestId = req.requestId;
if (unlikely(!_logsDB.isLeader())) {
LOG_DEBUG(_env, "not leader. dropping request from client %s", req.requestId);
continue;
@@ -127,76 +131,9 @@ public:
case RegistryMessageKind::ERASE_DECOMMISSIONED_BLOCK:
case RegistryMessageKind::ALL_BLOCK_SERVICES_DEPRECATED:{
_readRequests.emplace_back(std::move(req));
_registryResponses.pop_back();
break;
}
case RegistryMessageKind::REGISTER_BLOCK_SERVICES: {
if (_logEntries.size() >= LogsDB::IN_FLIGHT_APPEND_WINDOW) {
break;
}
bool failed = true;
//happy path. request fits in udp package and we don't want to copy
if (sizeof(now) + req.req.packedSize() <= LogsDB::DEFAULT_UDP_ENTRY_SIZE) {
try {
auto &entry = _logEntries.emplace_back();
entry.value.resize(sizeof(now) + req.req.packedSize());
ALWAYS_ASSERT(entry.value.size() <= LogsDB::DEFAULT_UDP_ENTRY_SIZE);
BincodeBuf buf((char *)(&entry.value[0]), entry.value.size());
buf.packScalar(now.ns);
req.req.pack(buf);
buf.ensureFinished();
_entriesRequestIds.emplace_back(req.requestId);
failed = false;
} catch (BincodeException e) {
LOG_ERROR(_env,"failed packing log entry request %s", e.what());
_logEntries.pop_back();
}
} else {
std::vector<RegisterBlockServiceInfo> blockservices;
blockservices = req.req.getRegisterBlockServices().blockServices.els;
RegistryReqContainer tmpReqContainer;
auto& tmpBsReq = tmpReqContainer.setRegisterBlockServices();
while (!blockservices.empty()) {
size_t currentSize = sizeof(now) + RegistryReqContainer::STATIC_SIZE;
size_t toCopy = 0;
for (auto bsIt = blockservices.rbegin(); bsIt != blockservices.rend(); ++bsIt) {
currentSize += bsIt->packedSize();
if (currentSize > LogsDB::DEFAULT_UDP_ENTRY_SIZE) {
break;
}
++toCopy;
}
tmpBsReq.blockServices.els.clear();
tmpBsReq.blockServices.els.insert(
tmpBsReq.blockServices.els.end(),blockservices.end() - toCopy,blockservices.end());
blockservices.resize(blockservices.size() - toCopy);
try {
auto &entry = _logEntries.emplace_back();
entry.value.resize(sizeof(now) + tmpReqContainer.packedSize());
ALWAYS_ASSERT(entry.value.size() <= LogsDB::DEFAULT_UDP_ENTRY_SIZE);
BincodeBuf buf((char *)(&entry.value[0]), entry.value.size());
buf.packScalar(now.ns);
tmpReqContainer.pack(buf);
buf.ensureFinished();
_entriesRequestIds.emplace_back(req.requestId);
failed = false;
} catch (BincodeException e) {
LOG_ERROR(_env,"failed packing log entry request %s", e.what());
_logEntries.pop_back();
break;
}
}
}
if (!failed) {
// we are not sending response yet. we can't leave empty response otherwise
// server will drop connection
_registryResponses.pop_back();
} else {
LOG_ERROR(_env,"FAILED REGISTERING");
}
break;
}
case RegistryMessageKind::REGISTER_BLOCK_SERVICES:
case RegistryMessageKind::DECOMMISSION_BLOCK_SERVICE:
case RegistryMessageKind::CREATE_LOCATION:
case RegistryMessageKind::RENAME_LOCATION:
@@ -209,146 +146,40 @@ public:
case RegistryMessageKind::MOVE_CDC_LEADER:
case RegistryMessageKind::CLEAR_CDC_INFO:
case RegistryMessageKind::UPDATE_BLOCK_SERVICE_PATH: {
if (_logEntries.size() >= LogsDB::IN_FLIGHT_APPEND_WINDOW) {
break;
}
try {
auto &entry = _logEntries.emplace_back();
entry.value.resize(sizeof(now) + req.req.packedSize());
ALWAYS_ASSERT(entry.value.size() <= LogsDB::MAX_UDP_ENTRY_SIZE);
BincodeBuf buf((char *)(&entry.value[0]), entry.value.size());
buf.packScalar(now.ns);
req.req.pack(buf);
buf.ensureFinished();
_entriesRequestIds.emplace_back(req.requestId);
// we are not sending response yet. we can't leave empty response otherwise
// server will drop connection
_registryResponses.pop_back();
} catch (BincodeException e) {
LOG_ERROR(_env,"failed packing log entry request %s", e.what());
_logEntries.pop_back();
}
_writeRequests.emplace_back(std::move(req));
break;
}
case RegistryMessageKind::EMPTY:
case RegistryMessageKind::ERROR:
auto& resp = _registryResponses.emplace_back();
resp.requestId = req.requestId;
break;
}
}
receivedRequests.clear();
{
auto readReqBegin = _readRequests.begin();
auto readReqEnd = _readRequests.end();
for (size_t i = 0; i < _readers.size() && readReqBegin != readReqEnd; ++i) {
auto& reader = *_readers[i];
readReqBegin = reader.pushRequests(readReqBegin,readReqEnd);
}
for (;readReqBegin != readReqEnd; ++readReqBegin) {
auto& resp = _registryResponses.emplace_back();
resp.requestId = readReqBegin->requestId;
}
_readRequests.clear();
auto writeReqBegin = _writer.pushRegistryRequests(_writeRequests.begin(), _writeRequests.end());
for (;writeReqBegin != _writeRequests.end(); ++writeReqBegin) {
auto& resp = _registryResponses.emplace_back();
resp.requestId = writeReqBegin->requestId;
}
_writeRequests.clear();
if (_logsDB.isLeader()) {
_logsDB.appendEntries(_logEntries);
for (int i = 0; i < _logEntries.size(); ++i) {
auto &entry = _logEntries[i];
auto requestId = _entriesRequestIds[i];
if (entry.idx == 0) {
LOG_DEBUG(_env, "could not append entry for request %s", requestId);
// empty response drops request
_registryResponses.emplace_back().requestId = requestId;
continue;
}
_logIdxToRequestId.emplace(entry.idx.u64, requestId);
}
_logEntries.clear();
_entriesRequestIds.clear();
auto readReqBegin = _readRequests.begin();
auto readReqEnd = _readRequests.end();
for (size_t i = 0; i < _readers.size() && readReqBegin != readReqEnd; ++i) {
auto& reader = *_readers[i];
readReqBegin = reader.pushRequests(readReqBegin,readReqEnd);
}
ALWAYS_ASSERT(_logEntries.empty());
ALWAYS_ASSERT(_entriesRequestIds.empty());
do {
_logEntries.clear();
_logsDB.readEntries(_logEntries);
_registryDB.processLogEntries(_logEntries, _writeResults);
} while (!_logEntries.empty());
_logsDB.flush(true);
_logsDB.getOutgoingMessages(_logsDBOutRequests, _logsDBOutResponses);
LOG_TRACE(_env, "Sending %s log requests and %s log responses", _logsDBOutRequests.size(), _logsDBOutResponses.size());
_server.sendLogsDBMessages(_logsDBOutRequests, _logsDBOutResponses);
for (auto &writeResult : _writeResults) {
auto& regiResp = _registryResponses.emplace_back();
auto requestIdIt = _logIdxToRequestId.find(writeResult.idx.u64);
if (requestIdIt == _logIdxToRequestId.end()) {
_registryResponses.pop_back();
continue;
}
regiResp.requestId = requestIdIt->second;
RegistryRespContainer& resp = regiResp.resp;
if (writeResult.err != TernError::NO_ERROR) {
resp.setError() = writeResult.err;
continue;
}
switch (writeResult.kind) {
case RegistryMessageKind::ERROR:
resp.setError() = writeResult.err;
break;
case RegistryMessageKind::CREATE_LOCATION:
resp.setCreateLocation();
break;
case RegistryMessageKind::RENAME_LOCATION:
resp.setRenameLocation();
break;
case RegistryMessageKind::REGISTER_SHARD:
resp.setRegisterShard();
break;
case RegistryMessageKind::REGISTER_CDC:
resp.setRegisterCdc();
break;
case RegistryMessageKind::SET_BLOCK_SERVICE_FLAGS:
resp.setSetBlockServiceFlags();
break;
case RegistryMessageKind::REGISTER_BLOCK_SERVICES:
resp.setRegisterBlockServices();
break;
case RegistryMessageKind::REGISTER_REGISTRY:
resp.setRegisterRegistry();
break;
case RegistryMessageKind::DECOMMISSION_BLOCK_SERVICE:
resp.setDecommissionBlockService();
break;
case RegistryMessageKind::MOVE_SHARD_LEADER:
resp.setMoveShardLeader();
break;
case RegistryMessageKind::CLEAR_SHARD_INFO:
resp.setClearShardInfo();
break;
case RegistryMessageKind::MOVE_CDC_LEADER:
resp.setMoveCdcLeader();
break;
case RegistryMessageKind::CLEAR_CDC_INFO:
resp.setClearCdcInfo();
break;
case RegistryMessageKind::UPDATE_BLOCK_SERVICE_PATH:
resp.setUpdateBlockServicePath();
break;
default:
ALWAYS_ASSERT(false);
break;
}
for (;readReqBegin != readReqEnd; ++readReqBegin) {
auto& resp = _registryResponses.emplace_back();
resp.requestId = readReqBegin->requestId;
}
_writeResults.clear();
_readRequests.clear();
_server.sendRegistryResponses(_registryResponses);
_registryResponses.clear();
}
@@ -357,7 +188,7 @@ private:
Registerer& _registerer;
RegistryServer& _server;
LogsDB& _logsDB;
RegistryDB& _registryDB;
RegistryWriter& _writer;
std::array<AddrsInfo, LogsDB::REPLICA_COUNT> _replicas;
std::array<bool, LogsDB::REPLICA_COUNT> _replicaFinishedBootstrap;
@@ -365,24 +196,12 @@ private:
std::vector<RegistryReader*> _readers;
std::vector<RegistryRequest> _readRequests;
std::vector<RegistryRequest> _writeRequests;
std::vector<RegistryResponse> _registryResponses;
// buffer for reading/writing log entries
std::vector<LogsDBLogEntry> _logEntries;
std::vector<uint64_t> _entriesRequestIds;
std::vector<LogsDBRequest*> _logsDBOutRequests;
std::vector<LogsDBResponse> _logsDBOutResponses;
// keeping track which log entry corresponds to which request
std::unordered_map<uint64_t, uint64_t> _logIdxToRequestId;
// buffer for RegistryDB processLogEntries result
std::vector<RegistryDBWriteResult> _writeResults;
void _processBootstrapRequest(RegistryRequest &req) {
auto& resp = _registryResponses.emplace_back();
resp.requestId = req.requestId;
@@ -504,13 +323,18 @@ void Registry::start(const RegistryOptions& options, LoopThreads& threads) {
threads.emplace_back(LoopThread::Spawn(std::move(reader)));
}
auto writer = std::make_unique<RegistryWriter>(
logger, xmon, *_state->registryDB, *_state->server, *_state->logsDB);
auto registerer = std::make_unique<Registerer>(
logger, xmon, options, _state->server->boundAddresses(), cachedRegistries);
auto registryLoop = std::make_unique<RegistryLoop>(
logger, xmon, options, *registerer, *_state->server,
*_state->logsDB, *_state->registryDB, std::move(readers));
*_state->logsDB, *_state->registryDB, std::move(readers), *writer);
threads.emplace_back(LoopThread::Spawn(std::move(registerer)));
threads.emplace_back(LoopThread::Spawn(std::move(writer)));
threads.emplace_back(LoopThread::Spawn(std::move(registryLoop)));
}

File diff suppressed because it is too large Load Diff

View File

@@ -10,10 +10,11 @@
#include "Msgs.hpp"
#include "MsgsGen.hpp"
#include "RegistryCommon.hpp"
#include "RegistryDBLogEntry.hpp"
#include "SharedRocksDB.hpp"
struct RegistryDBWriteResult {
LogIdx idx;
EntryReqIdx idx;
RegistryMessageKind kind;
TernError err;
};
@@ -21,7 +22,7 @@ struct RegistryDBWriteResult {
class RegistryDB {
public:
static std::vector<rocksdb::ColumnFamilyDescriptor> getColumnFamilyDescriptors();
RegistryDB(Logger& logger, std::shared_ptr<XmonAgent>& xmon, const RegistryOptions& options, const SharedRocksDB& sharedDB);
~RegistryDB() {}
void close();
@@ -45,7 +46,7 @@ private:
void _initDb();
bool _updateStaleBlockServices(TernTime now);
void _recalcualteShardBlockServices(bool writableChanged);
void _recalculateShardBlockServices(bool writableChanged);
const RegistryOptions& _options;
Env _env;

View File

@@ -0,0 +1,92 @@
// Copyright 2025 XTX Markets Technologies Limited
//
// SPDX-License-Identifier: GPL-2.0-or-later
#include "RegistryDBLogEntry.hpp"
#include "Assert.hpp"
#include "Bincode.hpp"
#include "LogsDB.hpp"
#include "MsgsGen.hpp"
#include "RegistryServer.hpp"
RegistryDBLogEntry::RegistryDBLogEntry() { clear(); }
size_t RegistryDBLogEntry::packedSize() const {
return requests.packedSize() + sizeof(entryTime);
}
void RegistryDBLogEntry::pack(BincodeBuf& buf) const {
buf.packScalar(entryTime.ns);
buf.packList(requests);
}
void RegistryDBLogEntry::unpack(BincodeBuf& buf) {
entryTime.ns = buf.unpackScalar<uint64_t>();
buf.unpackList(requests);
}
void RegistryDBLogEntry::clear() {
entryTime = TernTime(0);
requests.clear();
}
bool RegistryDBLogEntry::operator==(const RegistryDBLogEntry&rhs) const {
return entryTime == rhs.entryTime && requests == rhs.requests;
}
void RegistryDBLogEntry::requestsToLogEntries(std::vector<RegistryRequest>& requests,
size_t maxLogEntrySize, TernTime entryTime, std::vector<LogsDBLogEntry>& out, std::vector<EntryReqIdx>& outIndices)
{
RegistryDBLogEntry currentEntry;
currentEntry.entryTime = entryTime;
size_t sizeAvailable = maxLogEntrySize - currentEntry.packedSize();
EntryReqIdx currentIdx{out.size(), 0};
auto serialize = [&]() {
auto& newEntry = out.emplace_back();
newEntry.value.resize(maxLogEntrySize - sizeAvailable);
BincodeBuf buf((char*)newEntry.value.data(), newEntry.value.size());
currentEntry.pack(buf);
buf.ensureFinished();
currentEntry.clear();
currentEntry.entryTime = entryTime;
sizeAvailable = maxLogEntrySize - currentEntry.packedSize();
++currentIdx.logIdx;
currentIdx.offset = 0;
};
for (auto &reqWrapper : requests) {
auto &req = reqWrapper.req;
if (req.packedSize() > sizeAvailable) {
serialize();
}
auto& entryOut = out.back();
auto reqPackedSize = req.packedSize();
if (reqPackedSize > sizeAvailable) {
// This is the only request too big to fit in an empty log entry. We need to split it.
ALWAYS_ASSERT(req.kind() == RegistryMessageKind::REGISTER_BLOCK_SERVICES);
auto outBlockServices = &currentEntry.requests.els.emplace_back().setRegisterBlockServices().blockServices.els;
sizeAvailable -= currentEntry.requests.els.back().packedSize();
auto& blockServices = req.getRegisterBlockServices().blockServices.els;
for (auto& blockService : blockServices) {
auto bsPackedSize = blockService.packedSize();
if (bsPackedSize > sizeAvailable) {
serialize();
outBlockServices = &currentEntry.requests.els.emplace_back().setRegisterBlockServices().blockServices.els;
sizeAvailable -= currentEntry.requests.els.back().packedSize();
}
outBlockServices->push_back(std::move(blockService));
sizeAvailable -= bsPackedSize;
}
outIndices.push_back(currentIdx);
++currentIdx.offset;
continue;
}
currentEntry.requests.els.emplace_back(std::move(req));
sizeAvailable -= reqPackedSize;
outIndices.push_back(currentIdx);
++currentIdx.offset;
}
if (!currentEntry.requests.els.empty()) {
serialize();
}
}

View File

@@ -0,0 +1,58 @@
// Copyright 2025 XTX Markets Technologies Limited
//
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "Bincode.hpp"
#include "LogsDB.hpp"
#include "Msgs.hpp"
#include "MsgsGen.hpp"
#include "RegistryServer.hpp"
#include "Time.hpp"
#include <cstdint>
struct EntryReqIdx {
LogIdx logIdx;
uint8_t offset;
bool operator ==(const EntryReqIdx&) const = default;
};
struct RegistryDBApplyEntryResult {
LogIdx idx;
std::vector<RegistryRespContainer> results;
};
template <>
struct std::hash<EntryReqIdx> {
std::size_t operator()(const EntryReqIdx& k) const {
return hash<uint64_t>()(k.logIdx.u64 ^ ((uint64_t)k.offset << 56));
}
};
struct RegistryDBLogEntry {
public:
TernTime entryTime;
BincodeList<RegistryReqContainer> requests;
static constexpr uint16_t STATIC_SIZE = sizeof(entryTime) + BincodeList<RegistryReqContainer>::STATIC_SIZE;
RegistryDBLogEntry();
size_t packedSize() const;
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const RegistryDBLogEntry&rhs) const;
static void requestsToLogEntries(std::vector<RegistryRequest>& requests,
size_t maxLogEntrySize, TernTime entryTime, std::vector<LogsDBLogEntry>& out, std::vector<EntryReqIdx>& outIndices);
};

View File

@@ -0,0 +1,179 @@
// Copyright 2025 XTX Markets Technologies Limited
//
// SPDX-License-Identifier: GPL-2.0-or-later
#include "RegistryWriter.hpp"
#include "Assert.hpp"
#include "LogsDB.hpp"
#include "Msgs.hpp"
#include "RegistryDB.hpp"
#include "RegistryDBLogEntry.hpp"
#include "RegistryServer.hpp"
#include <unistd.h>
const int REGISTRY_WRITER_QUEUE_SIZE = 4096;
const int MAX_WORK_ITEMS_AT_ONCE = 512;
RegistryWriter::RegistryWriter(Logger &logger, std::shared_ptr<XmonAgent> xmon, RegistryDB& registryDB,RegistryServer& server, LogsDB& logsDB) :
Loop(logger, xmon, "registry_writer"),
_registryDB(registryDB),
_server(server),
_logsDB(logsDB),
_logsDBRequestsQueue(REGISTRY_WRITER_QUEUE_SIZE, _waiter),
_logsDBResponsesQueue(REGISTRY_WRITER_QUEUE_SIZE, _waiter),
_registryRequestsQueue(REGISTRY_WRITER_QUEUE_SIZE, _waiter)
{
}
void RegistryWriter::sendStop() {
_registryRequestsQueue.close();
_logsDBRequestsQueue.close();
_logsDBResponsesQueue.close();
}
void RegistryWriter::step() {
// Clear all buffers
_registryRequests.clear();
_registryResponses.clear();
_logsDBOutRequests.clear();
_logsDBOutResponses.clear();
_writeResults.clear();
_logEntries.clear();
_waiter.wait(_logsDB.getNextTimeout());
if (unlikely(_registryRequestsQueue.isClosed())) {
stop();
return;
}
auto remainingPullBudget = MAX_WORK_ITEMS_AT_ONCE;
remainingPullBudget -= _logsDBResponsesQueue.pull(_logsDBResponses, remainingPullBudget);
remainingPullBudget -= _logsDBRequestsQueue.pull(_logsDBRequests, remainingPullBudget);
remainingPullBudget -= _registryRequestsQueue.pull(_registryRequests, remainingPullBudget);
_logsDB.processIncomingMessages(_logsDBRequests, _logsDBResponses);
// Append entries to logs DB
if (_logsDB.isLeader()) {
RegistryDBLogEntry::requestsToLogEntries(
_registryRequests, LogsDB::DEFAULT_UDP_ENTRY_SIZE, ternNow(), _logEntries, _entriesRequestIds);
_logsDB.appendEntries(_logEntries);
size_t requestIdx = 0;
for (int i = 0; i < _logEntries.size(); ++i) {
auto &entry = _logEntries[i];
while (requestIdx < _entriesRequestIds.size() && _entriesRequestIds[requestIdx].logIdx == i) {
auto entryIdx = _entriesRequestIds[requestIdx];
auto requestId = _registryRequests[requestIdx].requestId;
++requestIdx;
if (entry.idx == 0) {
LOG_DEBUG(_env, "could not append entry for request %s", requestId);
// empty response drops request
auto& resp = _registryResponses.emplace_back();
resp.requestId = requestId;
} else {
entryIdx.logIdx = entry.idx;
_logIdxToRequestId.emplace(entryIdx, requestId);
}
}
_logEntries.clear();
_entriesRequestIds.clear();
}
} else {
for (auto &req : _registryRequests) {
auto& resp = _registryResponses.emplace_back();
resp.requestId = req.requestId;
}
}
// Apply log entries
_applyLogEntries();
_logsDB.getOutgoingMessages(_logsDBOutRequests, _logsDBOutResponses);
_server.sendLogsDBMessages(_logsDBOutRequests, _logsDBOutResponses);
// Send responses
_server.sendRegistryResponses(_registryResponses);
_registryResponses.clear();
}
void RegistryWriter::_applyLogEntries() {
do {
_logEntries.clear();
_logsDB.readEntries(_logEntries);
_registryDB.processLogEntries(_logEntries, _writeResults);
} while (!_logEntries.empty());
_logsDB.flush(true);
// Convert write results to registry responses
for (auto &writeResult : _writeResults) {
auto requestIdIt = _logIdxToRequestId.find(writeResult.idx);
if (requestIdIt == _logIdxToRequestId.end()) {
continue;
}
auto& regiResp = _registryResponses.emplace_back();
regiResp.requestId = requestIdIt->second;
RegistryRespContainer& resp = regiResp.resp;
if (writeResult.err != TernError::NO_ERROR) {
resp.setError() = writeResult.err;
continue;
}
switch (writeResult.kind) {
case RegistryMessageKind::ERROR:
resp.setError() = writeResult.err;
break;
case RegistryMessageKind::CREATE_LOCATION:
resp.setCreateLocation();
break;
case RegistryMessageKind::RENAME_LOCATION:
resp.setRenameLocation();
break;
case RegistryMessageKind::REGISTER_SHARD:
resp.setRegisterShard();
break;
case RegistryMessageKind::REGISTER_CDC:
resp.setRegisterCdc();
break;
case RegistryMessageKind::SET_BLOCK_SERVICE_FLAGS:
resp.setSetBlockServiceFlags();
break;
case RegistryMessageKind::REGISTER_BLOCK_SERVICES:
resp.setRegisterBlockServices();
break;
case RegistryMessageKind::REGISTER_REGISTRY:
resp.setRegisterRegistry();
break;
case RegistryMessageKind::DECOMMISSION_BLOCK_SERVICE:
resp.setDecommissionBlockService();
break;
case RegistryMessageKind::MOVE_SHARD_LEADER:
resp.setMoveShardLeader();
break;
case RegistryMessageKind::CLEAR_SHARD_INFO:
resp.setClearShardInfo();
break;
case RegistryMessageKind::MOVE_CDC_LEADER:
resp.setMoveCdcLeader();
break;
case RegistryMessageKind::CLEAR_CDC_INFO:
resp.setClearCdcInfo();
break;
case RegistryMessageKind::UPDATE_BLOCK_SERVICE_PATH:
resp.setUpdateBlockServicePath();
break;
default:
ALWAYS_ASSERT(false);
break;
}
}
_writeResults.clear();
}

View File

@@ -0,0 +1,77 @@
// Copyright 2025 XTX Markets Technologies Limited
//
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "Loop.hpp"
#include "RegistryDB.hpp"
#include "RegistryDBLogEntry.hpp"
#include "RegistryServer.hpp"
#include "SPSC.hpp"
#include "LogsDB.hpp"
#include <vector>
class RegistryWriter : public Loop {
public:
RegistryWriter(
Logger &logger,
std::shared_ptr<XmonAgent> xmon,
RegistryDB& registryDB,
RegistryServer& server,
LogsDB& logsDB);
virtual ~RegistryWriter() {}
// Push methods for adding requests to the queue
uint32_t pushLogsDBRequests(std::vector<LogsDBRequest>& requests) {
return _logsDBRequestsQueue.push(requests);
}
uint32_t pushLogsDBResponses(std::vector<LogsDBResponse>& responses) {
return _logsDBResponsesQueue.push(responses);
}
typename std::vector<RegistryRequest>::iterator pushRegistryRequests(
typename std::vector<RegistryRequest>::iterator being,
typename std::vector<RegistryRequest>::iterator end)
{
return _registryRequestsQueue.push(being, end);
}
void sendStop() override;
virtual void step() override;
private:
RegistryDB& _registryDB;
RegistryServer& _server;
LogsDB& _logsDB;
MultiSPSCWaiter _waiter;
SPSC<LogsDBRequest, true> _logsDBRequestsQueue;
SPSC<LogsDBResponse, true> _logsDBResponsesQueue;
SPSC<RegistryRequest, true> _registryRequestsQueue;
// Buffers for processing
std::vector<LogsDBRequest> _logsDBRequests;
std::vector<LogsDBResponse> _logsDBResponses;
std::vector<RegistryRequest> _registryRequests;
std::vector<RegistryResponse> _registryResponses;
// Buffers for outgoing messages
std::vector<LogsDBRequest*> _logsDBOutRequests;
std::vector<LogsDBResponse> _logsDBOutResponses;
// Buffer for RegistryDB processLogEntries result
std::vector<RegistryDBWriteResult> _writeResults;
// Buffer for log entries
std::vector<LogsDBLogEntry> _logEntries;
std::vector<EntryReqIdx> _entriesRequestIds;
// Keeping track which log entry corresponds to which request
std::unordered_map<EntryReqIdx, uint64_t> _logIdxToRequestId;
void _applyLogEntries();
};

View File

@@ -18,11 +18,12 @@ fi
echo "$(tput bold)C++ tests, sanitized$(tput sgr0)"
set -x
./build.py $sanitized rs/rs-tests crc32c/crc32c-tests tests/tests tests/logsdbtests
./build.py $sanitized rs/rs-tests crc32c/crc32c-tests tests/tests tests/logsdbtests tests/registrydbtests
UBSAN_OPTIONS=print_stacktrace=1 ./build/$sanitized/rs/rs-tests
UBSAN_OPTIONS=print_stacktrace=1 ./build/$sanitized/crc32c/crc32c-tests
UBSAN_OPTIONS=print_stacktrace=1 ./build/$sanitized/tests/tests
UBSAN_OPTIONS=print_stacktrace=1 ./build/$sanitized/tests/logsdbtests
UBSAN_OPTIONS=print_stacktrace=1 ./build/$sanitized/tests/registrydbtests
set +x
# valgrind doesn't support fnctl F_SET_RW_HINT (1036), and as far as I can
@@ -30,9 +31,10 @@ set +x
echo "$(tput bold)C++ tests, valgrind$(tput sgr0)"
set -x
./build.py $valgrind rs/rs-tests crc32c/crc32c-tests tests/tests tests/logsdbtests
./build.py $valgrind rs/rs-tests crc32c/crc32c-tests tests/tests tests/logsdbtests tests/registrydbtests
valgrind --exit-on-first-error=yes -q --error-exitcode=1 ./build/$valgrind/rs/rs-tests
valgrind --exit-on-first-error=yes -q --error-exitcode=1 ./build/$valgrind/crc32c/crc32c-tests
valgrind --exit-on-first-error=yes -q --suppressions=valgrind-suppressions --error-exitcode=1 ./build/$valgrind/tests/tests 2> >(grep -v "Warning: unimplemented fcntl command: 1036")
valgrind --exit-on-first-error=yes -q --suppressions=valgrind-suppressions --error-exitcode=1 ./build/$valgrind/tests/logsdbtests 2> >(grep -v "Warning: unimplemented fcntl command: 1036")
valgrind --exit-on-first-error=yes -q --suppressions=valgrind-suppressions --error-exitcode=1 ./build/$valgrind/tests/registrydbtests 2> >(grep -v "Warning: unimplemented fcntl command: 1036")
set +x

File diff suppressed because it is too large Load Diff

View File

@@ -39,10 +39,14 @@ struct TempRegistryDB {
}
void close() {
db->close();
sharedDB->close();
db.reset();
sharedDB.reset();
if (db) {
db->close();
db.reset();
}
if (sharedDB) {
sharedDB->close();
sharedDB.reset();
}
}
~TempRegistryDB() {
@@ -57,7 +61,6 @@ struct TempRegistryDB {
}
void initSharedDB() {
sharedDB->registerCFDescriptors({{rocksdb::kDefaultColumnFamilyName, {}}});
sharedDB->registerCFDescriptors(RegistryDB::getColumnFamilyDescriptors());
rocksdb::Options rocksDBOptions;
rocksDBOptions.create_if_missing = true;