shard: distributed log implementation and shard can use it with a flag set

This commit is contained in:
Miroslav Crnic
2024-03-12 11:02:04 +00:00
committed by GitHub Enterprise
parent d5fb66b694
commit b240de53b5
32 changed files with 4697 additions and 145 deletions
+56 -12
View File
@@ -1,4 +1,5 @@
#include <chrono>
#include <cstring>
#include <fstream>
#include <memory>
#include <mutex>
@@ -21,6 +22,7 @@
#include "Env.hpp"
#include "Exception.hpp"
#include "Msgs.hpp"
#include "MsgsGen.hpp"
#include "Shard.hpp"
#include "Time.hpp"
#include "CDCDB.hpp"
@@ -38,6 +40,8 @@
struct CDCShared {
CDCDB& db;
std::array<std::atomic<uint16_t>, 2> ownPorts;
std::mutex replicasLock;
std::array<AddrsInfo, 5> replicas;
std::mutex shardsMutex;
std::array<ShardInfo, 256> shards;
// How long it took us to process the entire request, from parse to response.
@@ -826,39 +830,77 @@ public:
struct CDCRegisterer : PeriodicLoop {
CDCShared& _shared;
uint32_t _ownIp1;
uint32_t _ownIp2;
std::string _shuckleHost;
uint16_t _shucklePort;
bool _hasSecondIp;
XmonNCAlert _alert;
ReplicaId _replicaId;
ReplicaId _leaderReplicaId;
AddrsInfo _info;
bool _infoLoaded;
bool _registerCompleted;
public:
CDCRegisterer(Logger& logger, std::shared_ptr<XmonAgent>& xmon, const CDCOptions& options, CDCShared& shared):
PeriodicLoop(logger, xmon, "registerer", { 1_sec, 1_mins }),
_shared(shared),
_ownIp1(options.ipPorts[0].ip),
_ownIp2(options.ipPorts[1].ip),
_shuckleHost(options.shuckleHost),
_shucklePort(options.shucklePort),
_hasSecondIp(options.ipPorts[1].ip != 0),
_alert(10_sec)
{}
_alert(10_sec),
_replicaId(options.replicaId),
_leaderReplicaId(options.leaderReplicaId),
_infoLoaded(false),
_registerCompleted(false)
{
uint32_t ip1 = options.ipPorts[0].ip;
uint32_t ip2 = options.ipPorts[1].ip;
uint32_t ip = htonl(ip1);
memcpy(_info.ip1.data.data(), &ip, 4);
ip = htonl(ip2);
memcpy(_info.ip2.data.data(), &ip, 4);
}
virtual ~CDCRegisterer() = default;
virtual bool periodicStep() override {
uint16_t port1 = _shared.ownPorts[0].load();
uint16_t port2 = _shared.ownPorts[1].load();
if (port1 == 0 || (_hasSecondIp && port2 == 0)) {
return false;
if (unlikely(!_infoLoaded)) {
uint16_t port1 = _shared.ownPorts[0].load();
uint16_t port2 = _shared.ownPorts[1].load();
if (port1 == 0 || (_hasSecondIp && port2 == 0)) {
return false;
}
_info.port1 = port1;
_info.port2 = port2;
_infoLoaded = true;
}
LOG_DEBUG(_env, "Registering ourselves (CDC, %s:%s, %s:%s) with shuckle", in_addr{htonl(_ownIp1)}, port1, in_addr{htonl(_ownIp2)}, port2);
std::string err = registerCDC(_shuckleHost, _shucklePort, 10_sec, _ownIp1, port1, _ownIp2, port2);
std::string err;
if(likely(_registerCompleted)) {
std::array<AddrsInfo, 5> replicas;
LOG_INFO(_env, "Fetching replicas for CDC from shuckle");
err = fetchCDCReplicas(_shuckleHost, _shucklePort, 10_sec, replicas);
if (!err.empty()) {
_env.updateAlert(_alert, "Failed getting CDC replicas from shuckle: %s", err);
return false;
}
if (_info != replicas[_replicaId.u8]) {
_env.updateAlert(_alert, "AddrsInfo in shuckle: %s , not matching local AddrsInfo: %s", replicas[_replicaId.u8], _info);
return false;
}
{
std::lock_guard guard(_shared.replicasLock);
_shared.replicas = replicas;
}
}
LOG_DEBUG(_env, "Registering ourselves (CDC %s, %s) with shuckle", _replicaId, _info);
err = registerCDCReplica(_shuckleHost, _shucklePort, 10_sec, _replicaId, _replicaId == _leaderReplicaId, _info);
if (!err.empty()) {
_env.updateAlert(_alert, "Couldn't register ourselves with shuckle: %s", err);
return false;
}
_env.clearAlert(_alert);
_registerCompleted = true;
return true;
}
};
@@ -1018,6 +1060,8 @@ void runCDC(const std::string& dbDir, const CDCOptions& options) {
LOG_INFO(env, "Running CDC with options:");
LOG_INFO(env, " level = %s", options.logLevel);
LOG_INFO(env, " logFile = '%s'", options.logFile);
LOG_INFO(env, " replicaId = %s", options.replicaId);
LOG_INFO(env, " leaderReplicaId = %s", options.leaderReplicaId);
LOG_INFO(env, " port = %s", options.port);
LOG_INFO(env, " shuckleHost = '%s'", options.shuckleHost);
LOG_INFO(env, " shucklePort = %s", options.shucklePort);
+4 -1
View File
@@ -3,6 +3,7 @@
#include "Env.hpp"
#include "Shard.hpp"
#include "Time.hpp"
#include <cstdint>
struct CDCOptions {
LogLevel logLevel = LogLevel::LOG_INFO;
@@ -17,6 +18,8 @@ struct CDCOptions {
bool xmon = false;
bool xmonProd = false;
bool metrics = false;
ReplicaId replicaId = 0;
ReplicaId leaderReplicaId = 0;
};
void runCDC(const std::string& dbDir, const CDCOptions& options);
void runCDC(const std::string& dbDir, const CDCOptions& options);
+31 -5
View File
@@ -9,7 +9,7 @@
#define die(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while(false)
void usage(const char* binary) {
fprintf(stderr, "Usage: %s DIRECTORY\n\n", binary);
fprintf(stderr, "Usage: %s DIRECTORY [REPLICA_ID]\n\n", binary);
fprintf(stderr, "Options:\n");
fprintf(stderr, " -log-level trace|debug|info|error\n");
fprintf(stderr, " Note that 'trace' will only work for debug builds.\n");
@@ -29,6 +29,8 @@ void usage(const char* binary) {
fprintf(stderr, " Enable Xmon alerts.\n");
fprintf(stderr, " -metrics\n");
fprintf(stderr, " Enable metrics.\n");
fprintf(stderr, " -leader-replica-id\n");
fprintf(stderr, " Specify which replica we consider as leader. 0 by default.\n");
}
static uint32_t parseIpv4(const char* binary, const std::string& arg) {
@@ -69,10 +71,23 @@ static std::pair<uint32_t, uint16_t> parseIpv4Addr(const char* binary, const std
return {ip, port};
}
static uint8_t parseReplicaId(const std::string& arg) {
size_t idx;
unsigned long replicaId = std::stoul(arg, &idx);
if (idx != arg.size()) {
die("Runoff character in number %s", arg.c_str());
}
if (replicaId > 4) {
die("Bad replicaId %s", arg.c_str());
}
return replicaId;
}
int main(int argc, char** argv) {
namespace fs = std::filesystem;
const auto dieWithUsage = [&argv]() {
const auto dieWithUsage = [&argv]() {
usage(argv[0]);
exit(2);
};
@@ -143,15 +158,20 @@ int main(int argc, char** argv) {
}
} else if (arg == "-metrics") {
options.metrics = true;
} else {
} else if (arg == "-leader-replica-id") {
options.leaderReplicaId = parseReplicaId(getNextArg());
} else{
args.emplace_back(std::move(arg));
}
}
if (args.size() != 1) {
fprintf(stderr, "Expecting one positional argument (DIRECTORY), got %ld.\n", args.size());
if (args.size() == 0 || args.size() > 2) {
fprintf(stderr, "Expecting one or two positional argument (DIRECTORY) [REPLICA_ID], got %ld.\n", args.size());
dieWithUsage();
}
if (args.size() < 2) {
args.emplace_back("0");
}
#ifndef EGGS_DEBUG
if (options.logLevel <= LogLevel::LOG_TRACE) {
@@ -177,6 +197,12 @@ int main(int argc, char** argv) {
throw EXPLICIT_SYSCALL_EXCEPTION(err.value(), "mkdir");
}
}
size_t processed;
int replicaId = std::stoi(args.at(1), &processed);
if (processed != args.at(1).size() || replicaId < 0 || replicaId > 4) {
die("Invalid replicaId '%s', expecting a number between 0 and 4.\n", args.at(2).c_str());
}
options.replicaId = replicaId;
runCDC(dbDir, options);
+1720 -3
View File
File diff suppressed because it is too large Load Diff
+105
View File
@@ -1,11 +1,116 @@
#pragma once
#include <ostream>
#include <vector>
#include <rocksdb/db.h>
#include "Env.hpp"
#include "Msgs.hpp"
#include "SharedRocksDB.hpp"
#include "Time.hpp"
// ** Releases **
// Released records are records which have been confirmed by the leader to have been at some point correctly replicated.
// Leader only confirms record with LogIdx X if all records up to and including X have been correctly replicated.
// This guarantee simplifies the message structure as we can move release point with a single message.
// Releases are not required from correctness perspective but they improve performance as we have a guarantee that any record
// before the release point can be read without having a quorum of replicas. It also allows followers to apply release records
// to a state machine wihout first checking if the record has been correctly replicated.
// It also simplifies cleanup during leader election. Imagine following scenario:
// Leader is replica 0, R indicates that a record is replicated to specific replica X that it's not
// Leader replication window
// ReplicaId LogIdx(2) LogIdx(3) LogIdx(4)
// 0 R R R
// 1 X R X
// 2 R X R
// 3 R X R
// 4 R X R
// Replicas (0,1) suddenly go away
// Replicas (2,3,4) elect 2 as new leader
// During "recovery" (a process which cleans up after previous leader from LastReleasedIdx)
// They don't see Record with LogIdx(3). Because they got a majority of nodes saying there is no record
// They know it couldn't have been correctly replicated. It is safe to drop it and all records after it.
// Because we don't want holes or conflicts we rewind logIndex to 3 and next appended record will come with 3
// What happens when replicas (0,1) come back. They will receive a movement of release point to arbitraty point in the future
// However they will, based on the leader token, notice it comes from a different leader.
// Since they were not part of the leader election they know their records after last released point have not been taken into
// account and could have been overwriten. They at this point drop these records and catch up from lastReleased point.
struct LogsDBLogEntry {
LogIdx idx;
std::vector<uint8_t> value;
bool operator==(const LogsDBLogEntry& oth) const {
return idx == oth.idx && value == oth.value;
}
};
std::ostream& operator<<(std::ostream& out, const LogsDBLogEntry& entry);
struct LogsDBRequest {
ReplicaId replicaId;
EggsTime sentTime;
LogRequestHeader header;
LogReqContainer requestContainer;
};
std::ostream& operator<<(std::ostream& out, const LogsDBRequest& entry);
struct LogsDBResponse {
ReplicaId replicaId;
LogResponseHeader header;
LogRespContainer responseContainer;
};
std::ostream& operator<<(std::ostream& out, const LogsDBResponse& entry);
class LogsDBImpl;
class LogsDB {
public:
static constexpr size_t REPLICA_COUNT = 5;
static constexpr Duration PARTITION_TIME_SPAN = 12_hours;
static constexpr Duration RESPONSE_TIMEOUT = 10_ms;
static constexpr Duration SEND_RELEASE_INTERVAL = 300_ms;
static constexpr Duration LEADER_INACTIVE_TIMEOUT = 1_sec;
static constexpr size_t IN_FLIGHT_APPEND_WINDOW = 1 << 8;
static constexpr size_t CATCHUP_WINDOW = 1 << 4 ;
LogsDB() = delete;
// On start we verify last released data is less than 1.5 * PARTITION_TIME_SPAN old to guarantee we can catchup.
// If initialStart is set to true we skip the checks. In this case user is responsible to have their
// own state sufficiently up to date to be able to catch up
LogsDB(
Env& env,
SharedRocksDB& sharedDB,
ReplicaId replicaId,
LogIdx lastRead,
bool dontWaitForReplication,
bool dontDoReplication,
bool forceLeader,
bool avoidBeingLeader,
bool initialStart,
LogIdx forcedLastReleased);
~LogsDB();
void close();
void flush(bool sync);
void processIncomingMessages(std::vector<LogsDBRequest>& requests, std::vector<LogsDBResponse>& responses);
void getOutgoingMessages(std::vector<LogsDBRequest*>& requests, std::vector<LogsDBResponse>& responses);
bool isLeader() const;
EggsError appendEntries(std::vector<LogsDBLogEntry>& entries);
void readEntries(std::vector<LogsDBLogEntry>& entries);
Duration getNextTimeout() const;
static std::vector<rocksdb::ColumnFamilyDescriptor> getColumnFamilyDescriptors();
private:
LogsDBImpl* _impl;
};
+23
View File
@@ -0,0 +1,23 @@
#pragma once
#include <cstdint>
#include <rocksdb/slice.h>
enum class LogsDBMetadataKey : uint8_t {
PARTITION_0_FIRST_WRITE_TIME = 0,
PARTITION_1_FIRST_WRITE_TIME = 1,
LEADER_TOKEN = 2,
LAST_RELEASED_IDX = 3,
LAST_RELEASED_TIME = 4,
};
constexpr LogsDBMetadataKey PARTITION_0_FIRST_WRITE_TIME_KEY = LogsDBMetadataKey::PARTITION_0_FIRST_WRITE_TIME;
constexpr LogsDBMetadataKey PARTITION_1_FIRST_WRITE_TIME_KEY = LogsDBMetadataKey::PARTITION_1_FIRST_WRITE_TIME;
constexpr LogsDBMetadataKey LEADER_TOKEN_KEY = LogsDBMetadataKey::LEADER_TOKEN;
constexpr LogsDBMetadataKey LAST_RELEASED_IDX_KEY = LogsDBMetadataKey::LAST_RELEASED_IDX;
constexpr LogsDBMetadataKey LAST_RELEASED_TIME_KEY = LogsDBMetadataKey::LAST_RELEASED_TIME;
inline rocksdb::Slice logsDBMetadataKey(const LogsDBMetadataKey& k) {
return rocksdb::Slice((const char*)&k, sizeof(LogsDBMetadataKey));
}
+10
View File
@@ -60,3 +60,13 @@ std::ostream& operator<<(std::ostream& out, BlockServiceId id) {
out << buf;
return out;
}
std::ostream& operator<<(std::ostream& out, LogIdx idx) {
out << idx.u64;
return out;
}
std::ostream& operator<<(std::ostream& out, LeaderToken token) {
out << token.idx() << ":" << token.replica();
return out;
}
+159
View File
@@ -323,6 +323,97 @@ struct BlockServiceId {
std::ostream& operator<<(std::ostream& out, BlockServiceId crc);
// we reserve 3 bits so that we can fit ReplicaId in LeaderToken
struct LogIdx {
uint64_t u64;
constexpr LogIdx(): u64(0) {}
constexpr LogIdx(uint64_t idx): u64(idx) {
ALWAYS_ASSERT(valid());
}
LogIdx operator+(uint64_t offset) const {
return u64 + offset;
}
LogIdx& operator++() {
++u64;
return *this;
}
bool operator==(LogIdx rhs) const {
return u64 == rhs.u64;
}
bool operator<(LogIdx other) const {
return u64 < other.u64;
}
bool operator<=(LogIdx other) const {
return u64 <= other.u64;
}
void pack(BincodeBuf& buf) const {
buf.packScalar<uint64_t>(u64);
}
void unpack(BincodeBuf& buf) {
u64 = buf.unpackScalar<uint64_t>();
}
constexpr bool valid() const {
return u64 < 0x2000000000000000ull;
}
};
constexpr LogIdx MAX_LOG_IDX = LogIdx(0xffffffffffffffffull >> 3);
std::ostream& operator<<(std::ostream& out, LogIdx idx);
struct LeaderToken {
uint64_t u64;
constexpr LeaderToken(): u64(0) {}
constexpr LeaderToken(ReplicaId replicaId, LogIdx idx): u64(idx.u64 << 3 | replicaId.u8) {
ALWAYS_ASSERT(replicaId.valid() && idx.valid());
}
bool operator==(LeaderToken rhs) const {
return u64 == rhs.u64;
}
constexpr bool operator<(LeaderToken rhs) const {
return u64 < rhs.u64;
}
constexpr ReplicaId replica() const {
return ReplicaId(u64 & 0x7);
}
constexpr LogIdx idx() const {
return LogIdx(u64 >> 3);
}
constexpr bool valid() const {
// we don't need to check LogIdx is valid as it any value is valid
return replica().valid();
}
void pack(BincodeBuf& buf) const {
buf.packScalar<uint64_t>(u64);
}
void unpack(BincodeBuf& buf) {
u64 = buf.unpackScalar<uint64_t>();
}
};
std::ostream& operator<<(std::ostream& out, LeaderToken token);
#include "MsgsGen.hpp"
// We often use this as a optional<EggsError>;
@@ -340,6 +431,10 @@ constexpr uint32_t SHARD_REQ_PROTOCOL_VERSION = 0x414853;
// '1414853'
constexpr uint32_t SHARD_RESP_PROTOCOL_VERSION = 0x1414853;
// >>> format(struct.unpack('<I', b'SHA\2')[0], 'x')
// '2414853'
constexpr uint32_t SHARD_LOG_PROTOCOL_VERSION = 0x2414853;
// >>> format(struct.unpack('<I', b'CDC\0')[0], 'x')
// '434443'
constexpr uint32_t CDC_REQ_PROTOCOL_VERSION = 0x434443;
@@ -356,6 +451,14 @@ constexpr uint32_t SHUCKLE_REQ_PROTOCOL_VERSION = 0x554853;
// '1554853'
constexpr uint32_t SHUCKLE_RESP_PROTOCOL_VERSION = 0x1554853;
// >>> format(struct.unpack('<I', b'LOG\0')[0], 'x')
// '474f4c'
constexpr uint32_t LOG_REQ_PROTOCOL_VERSION = 0x474f4c;
// >>> format(struct.unpack('<I', b'LOG\1')[0], 'x')
// '1474f4c'
constexpr uint32_t LOG_RESP_PROTOCOL_VERSION = 0x1474f4c;
// If this doesn't parse, no point in continuing attempting to parse
// the request.
struct ShardRequestHeader {
@@ -443,6 +546,62 @@ struct CDCResponseHeader {
}
};
// If this doesn't parse, no point in continuing attempting to parse
// the request.
struct LogRequestHeader {
uint64_t requestId;
// This is not guaranteed to be a valid log request kind yet.
// The caller will have to validate.
LogMessageKind kind;
static constexpr uint16_t STATIC_SIZE = 4 + 8 + 1;
LogRequestHeader() = default;
LogRequestHeader(uint64_t requestId_, LogMessageKind kind_): requestId(requestId_), kind(kind_) {}
void pack(BincodeBuf& buf) const {
buf.packScalar<uint32_t>(LOG_REQ_PROTOCOL_VERSION);
buf.packScalar<uint64_t>(requestId);
buf.packScalar<uint8_t>((uint8_t)kind);
}
void unpack(BincodeBuf& buf) {
uint32_t version = buf.unpackScalar<uint32_t>();
if (version != LOG_REQ_PROTOCOL_VERSION) {
throw BINCODE_EXCEPTION("bad log req protocol version %s, expected %s", version, LOG_REQ_PROTOCOL_VERSION);
}
requestId = buf.unpackScalar<uint64_t>();
kind = (LogMessageKind)buf.unpackScalar<uint8_t>();
}
};
struct LogResponseHeader {
uint64_t requestId;
LogMessageKind kind;
// protocol + requestId + kind
static constexpr uint16_t STATIC_SIZE = 4 + 8 + 1;
LogResponseHeader() = default;
LogResponseHeader(uint64_t requestId_, LogMessageKind kind_): requestId(requestId_), kind(kind_) {}
void pack(BincodeBuf& buf) {
buf.packScalar<uint32_t>(LOG_RESP_PROTOCOL_VERSION);
buf.packScalar<uint64_t>(requestId);
buf.packScalar<uint8_t>((uint8_t)kind);
}
void unpack(BincodeBuf& buf) {
uint32_t version = buf.unpackScalar<uint32_t>();
if (version != LOG_RESP_PROTOCOL_VERSION) {
throw BINCODE_EXCEPTION("bad log resp protocol version %s, expected %s", version, LOG_RESP_PROTOCOL_VERSION);
}
requestId = buf.unpackScalar<uint64_t>();
kind = (LogMessageKind)buf.unpackScalar<uint8_t>();
}
};
static constexpr uint8_t SNAPSHOT_POLICY_TAG = 1;
static constexpr uint8_t SPAN_POLICY_TAG = 2;
static constexpr uint8_t BLOCK_POLICY_TAG = 3;
+830 -1
View File
@@ -196,6 +196,21 @@ std::ostream& operator<<(std::ostream& out, EggsError err) {
case EggsError::DIFFERENT_ADDRS_INFO:
out << "DIFFERENT_ADDRS_INFO";
break;
case EggsError::LEADER_PREEMPTED:
out << "LEADER_PREEMPTED";
break;
case EggsError::LOG_ENTRY_MISSING:
out << "LOG_ENTRY_MISSING";
break;
case EggsError::LOG_ENTRY_TRIMMED:
out << "LOG_ENTRY_TRIMMED";
break;
case EggsError::LOG_ENTRY_UNRELEASED:
out << "LOG_ENTRY_UNRELEASED";
break;
case EggsError::LOG_ENTRY_RELEASED:
out << "LOG_ENTRY_RELEASED";
break;
default:
out << "EggsError(" << ((int)err) << ")";
break;
@@ -437,6 +452,36 @@ std::ostream& operator<<(std::ostream& out, BlocksMessageKind kind) {
return out;
}
std::ostream& operator<<(std::ostream& out, LogMessageKind kind) {
switch (kind) {
case LogMessageKind::LOG_WRITE:
out << "LOG_WRITE";
break;
case LogMessageKind::RELEASE:
out << "RELEASE";
break;
case LogMessageKind::LOG_READ:
out << "LOG_READ";
break;
case LogMessageKind::NEW_LEADER:
out << "NEW_LEADER";
break;
case LogMessageKind::NEW_LEADER_CONFIRM:
out << "NEW_LEADER_CONFIRM";
break;
case LogMessageKind::LOG_RECOVERY_READ:
out << "LOG_RECOVERY_READ";
break;
case LogMessageKind::LOG_RECOVERY_WRITE:
out << "LOG_RECOVERY_WRITE";
break;
default:
out << "LogMessageKind(" << ((int)kind) << ")";
break;
}
return out;
}
void FailureDomain::pack(BincodeBuf& buf) const {
buf.packFixedBytes<16>(name);
}
@@ -4089,6 +4134,302 @@ std::ostream& operator<<(std::ostream& out, const CheckBlockResp& x) {
return out;
}
void LogWriteReq::pack(BincodeBuf& buf) const {
token.pack(buf);
lastReleased.pack(buf);
idx.pack(buf);
buf.packList<uint8_t>(value);
}
void LogWriteReq::unpack(BincodeBuf& buf) {
token.unpack(buf);
lastReleased.unpack(buf);
idx.unpack(buf);
buf.unpackList<uint8_t>(value);
}
void LogWriteReq::clear() {
token = LeaderToken();
lastReleased = LogIdx();
idx = LogIdx();
value.clear();
}
bool LogWriteReq::operator==(const LogWriteReq& rhs) const {
if ((LeaderToken)this->token != (LeaderToken)rhs.token) { return false; };
if ((LogIdx)this->lastReleased != (LogIdx)rhs.lastReleased) { return false; };
if ((LogIdx)this->idx != (LogIdx)rhs.idx) { return false; };
if (value != rhs.value) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const LogWriteReq& x) {
out << "LogWriteReq(" << "Token=" << x.token << ", " << "LastReleased=" << x.lastReleased << ", " << "Idx=" << x.idx << ", " << "Value=" << x.value << ")";
return out;
}
void LogWriteResp::pack(BincodeBuf& buf) const {
buf.packScalar<uint16_t>(result);
}
void LogWriteResp::unpack(BincodeBuf& buf) {
result = buf.unpackScalar<uint16_t>();
}
void LogWriteResp::clear() {
result = uint16_t(0);
}
bool LogWriteResp::operator==(const LogWriteResp& rhs) const {
if ((uint16_t)this->result != (uint16_t)rhs.result) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const LogWriteResp& x) {
out << "LogWriteResp(" << "Result=" << x.result << ")";
return out;
}
void ReleaseReq::pack(BincodeBuf& buf) const {
token.pack(buf);
lastReleased.pack(buf);
}
void ReleaseReq::unpack(BincodeBuf& buf) {
token.unpack(buf);
lastReleased.unpack(buf);
}
void ReleaseReq::clear() {
token = LeaderToken();
lastReleased = LogIdx();
}
bool ReleaseReq::operator==(const ReleaseReq& rhs) const {
if ((LeaderToken)this->token != (LeaderToken)rhs.token) { return false; };
if ((LogIdx)this->lastReleased != (LogIdx)rhs.lastReleased) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const ReleaseReq& x) {
out << "ReleaseReq(" << "Token=" << x.token << ", " << "LastReleased=" << x.lastReleased << ")";
return out;
}
void ReleaseResp::pack(BincodeBuf& buf) const {
buf.packScalar<uint16_t>(result);
}
void ReleaseResp::unpack(BincodeBuf& buf) {
result = buf.unpackScalar<uint16_t>();
}
void ReleaseResp::clear() {
result = uint16_t(0);
}
bool ReleaseResp::operator==(const ReleaseResp& rhs) const {
if ((uint16_t)this->result != (uint16_t)rhs.result) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const ReleaseResp& x) {
out << "ReleaseResp(" << "Result=" << x.result << ")";
return out;
}
void LogReadReq::pack(BincodeBuf& buf) const {
idx.pack(buf);
}
void LogReadReq::unpack(BincodeBuf& buf) {
idx.unpack(buf);
}
void LogReadReq::clear() {
idx = LogIdx();
}
bool LogReadReq::operator==(const LogReadReq& rhs) const {
if ((LogIdx)this->idx != (LogIdx)rhs.idx) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const LogReadReq& x) {
out << "LogReadReq(" << "Idx=" << x.idx << ")";
return out;
}
void LogReadResp::pack(BincodeBuf& buf) const {
buf.packScalar<uint16_t>(result);
buf.packList<uint8_t>(value);
}
void LogReadResp::unpack(BincodeBuf& buf) {
result = buf.unpackScalar<uint16_t>();
buf.unpackList<uint8_t>(value);
}
void LogReadResp::clear() {
result = uint16_t(0);
value.clear();
}
bool LogReadResp::operator==(const LogReadResp& rhs) const {
if ((uint16_t)this->result != (uint16_t)rhs.result) { return false; };
if (value != rhs.value) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const LogReadResp& x) {
out << "LogReadResp(" << "Result=" << x.result << ", " << "Value=" << x.value << ")";
return out;
}
void NewLeaderReq::pack(BincodeBuf& buf) const {
nomineeToken.pack(buf);
}
void NewLeaderReq::unpack(BincodeBuf& buf) {
nomineeToken.unpack(buf);
}
void NewLeaderReq::clear() {
nomineeToken = LeaderToken();
}
bool NewLeaderReq::operator==(const NewLeaderReq& rhs) const {
if ((LeaderToken)this->nomineeToken != (LeaderToken)rhs.nomineeToken) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const NewLeaderReq& x) {
out << "NewLeaderReq(" << "NomineeToken=" << x.nomineeToken << ")";
return out;
}
void NewLeaderResp::pack(BincodeBuf& buf) const {
buf.packScalar<uint16_t>(result);
lastReleased.pack(buf);
}
void NewLeaderResp::unpack(BincodeBuf& buf) {
result = buf.unpackScalar<uint16_t>();
lastReleased.unpack(buf);
}
void NewLeaderResp::clear() {
result = uint16_t(0);
lastReleased = LogIdx();
}
bool NewLeaderResp::operator==(const NewLeaderResp& rhs) const {
if ((uint16_t)this->result != (uint16_t)rhs.result) { return false; };
if ((LogIdx)this->lastReleased != (LogIdx)rhs.lastReleased) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const NewLeaderResp& x) {
out << "NewLeaderResp(" << "Result=" << x.result << ", " << "LastReleased=" << x.lastReleased << ")";
return out;
}
void NewLeaderConfirmReq::pack(BincodeBuf& buf) const {
nomineeToken.pack(buf);
releasedIdx.pack(buf);
}
void NewLeaderConfirmReq::unpack(BincodeBuf& buf) {
nomineeToken.unpack(buf);
releasedIdx.unpack(buf);
}
void NewLeaderConfirmReq::clear() {
nomineeToken = LeaderToken();
releasedIdx = LogIdx();
}
bool NewLeaderConfirmReq::operator==(const NewLeaderConfirmReq& rhs) const {
if ((LeaderToken)this->nomineeToken != (LeaderToken)rhs.nomineeToken) { return false; };
if ((LogIdx)this->releasedIdx != (LogIdx)rhs.releasedIdx) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const NewLeaderConfirmReq& x) {
out << "NewLeaderConfirmReq(" << "NomineeToken=" << x.nomineeToken << ", " << "ReleasedIdx=" << x.releasedIdx << ")";
return out;
}
void NewLeaderConfirmResp::pack(BincodeBuf& buf) const {
buf.packScalar<uint16_t>(result);
}
void NewLeaderConfirmResp::unpack(BincodeBuf& buf) {
result = buf.unpackScalar<uint16_t>();
}
void NewLeaderConfirmResp::clear() {
result = uint16_t(0);
}
bool NewLeaderConfirmResp::operator==(const NewLeaderConfirmResp& rhs) const {
if ((uint16_t)this->result != (uint16_t)rhs.result) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const NewLeaderConfirmResp& x) {
out << "NewLeaderConfirmResp(" << "Result=" << x.result << ")";
return out;
}
void LogRecoveryReadReq::pack(BincodeBuf& buf) const {
nomineeToken.pack(buf);
idx.pack(buf);
}
void LogRecoveryReadReq::unpack(BincodeBuf& buf) {
nomineeToken.unpack(buf);
idx.unpack(buf);
}
void LogRecoveryReadReq::clear() {
nomineeToken = LeaderToken();
idx = LogIdx();
}
bool LogRecoveryReadReq::operator==(const LogRecoveryReadReq& rhs) const {
if ((LeaderToken)this->nomineeToken != (LeaderToken)rhs.nomineeToken) { return false; };
if ((LogIdx)this->idx != (LogIdx)rhs.idx) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const LogRecoveryReadReq& x) {
out << "LogRecoveryReadReq(" << "NomineeToken=" << x.nomineeToken << ", " << "Idx=" << x.idx << ")";
return out;
}
void LogRecoveryReadResp::pack(BincodeBuf& buf) const {
buf.packScalar<uint16_t>(result);
buf.packList<uint8_t>(value);
}
void LogRecoveryReadResp::unpack(BincodeBuf& buf) {
result = buf.unpackScalar<uint16_t>();
buf.unpackList<uint8_t>(value);
}
void LogRecoveryReadResp::clear() {
result = uint16_t(0);
value.clear();
}
bool LogRecoveryReadResp::operator==(const LogRecoveryReadResp& rhs) const {
if ((uint16_t)this->result != (uint16_t)rhs.result) { return false; };
if (value != rhs.value) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const LogRecoveryReadResp& x) {
out << "LogRecoveryReadResp(" << "Result=" << x.result << ", " << "Value=" << x.value << ")";
return out;
}
void LogRecoveryWriteReq::pack(BincodeBuf& buf) const {
nomineeToken.pack(buf);
idx.pack(buf);
buf.packList<uint8_t>(value);
}
void LogRecoveryWriteReq::unpack(BincodeBuf& buf) {
nomineeToken.unpack(buf);
idx.unpack(buf);
buf.unpackList<uint8_t>(value);
}
void LogRecoveryWriteReq::clear() {
nomineeToken = LeaderToken();
idx = LogIdx();
value.clear();
}
bool LogRecoveryWriteReq::operator==(const LogRecoveryWriteReq& rhs) const {
if ((LeaderToken)this->nomineeToken != (LeaderToken)rhs.nomineeToken) { return false; };
if ((LogIdx)this->idx != (LogIdx)rhs.idx) { return false; };
if (value != rhs.value) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const LogRecoveryWriteReq& x) {
out << "LogRecoveryWriteReq(" << "NomineeToken=" << x.nomineeToken << ", " << "Idx=" << x.idx << ", " << "Value=" << x.value << ")";
return out;
}
void LogRecoveryWriteResp::pack(BincodeBuf& buf) const {
buf.packScalar<uint16_t>(result);
}
void LogRecoveryWriteResp::unpack(BincodeBuf& buf) {
result = buf.unpackScalar<uint16_t>();
}
void LogRecoveryWriteResp::clear() {
result = uint16_t(0);
}
bool LogRecoveryWriteResp::operator==(const LogRecoveryWriteResp& rhs) const {
if ((uint16_t)this->result != (uint16_t)rhs.result) { return false; };
return true;
}
std::ostream& operator<<(std::ostream& out, const LogRecoveryWriteResp& x) {
out << "LogRecoveryWriteResp(" << "Result=" << x.result << ")";
return out;
}
const LookupReq& ShardReqContainer::getLookup() const {
ALWAYS_ASSERT(_kind == ShardMessageKind::LOOKUP, "%s != %s", _kind, ShardMessageKind::LOOKUP);
return std::get<0>(_data);
@@ -7541,6 +7882,490 @@ std::ostream& operator<<(std::ostream& out, const ShuckleRespContainer& x) {
return out;
}
const LogWriteReq& LogReqContainer::getLogWrite() const {
ALWAYS_ASSERT(_kind == LogMessageKind::LOG_WRITE, "%s != %s", _kind, LogMessageKind::LOG_WRITE);
return std::get<0>(_data);
}
LogWriteReq& LogReqContainer::setLogWrite() {
_kind = LogMessageKind::LOG_WRITE;
auto& x = _data.emplace<0>();
return x;
}
const ReleaseReq& LogReqContainer::getRelease() const {
ALWAYS_ASSERT(_kind == LogMessageKind::RELEASE, "%s != %s", _kind, LogMessageKind::RELEASE);
return std::get<1>(_data);
}
ReleaseReq& LogReqContainer::setRelease() {
_kind = LogMessageKind::RELEASE;
auto& x = _data.emplace<1>();
return x;
}
const LogReadReq& LogReqContainer::getLogRead() const {
ALWAYS_ASSERT(_kind == LogMessageKind::LOG_READ, "%s != %s", _kind, LogMessageKind::LOG_READ);
return std::get<2>(_data);
}
LogReadReq& LogReqContainer::setLogRead() {
_kind = LogMessageKind::LOG_READ;
auto& x = _data.emplace<2>();
return x;
}
const NewLeaderReq& LogReqContainer::getNewLeader() const {
ALWAYS_ASSERT(_kind == LogMessageKind::NEW_LEADER, "%s != %s", _kind, LogMessageKind::NEW_LEADER);
return std::get<3>(_data);
}
NewLeaderReq& LogReqContainer::setNewLeader() {
_kind = LogMessageKind::NEW_LEADER;
auto& x = _data.emplace<3>();
return x;
}
const NewLeaderConfirmReq& LogReqContainer::getNewLeaderConfirm() const {
ALWAYS_ASSERT(_kind == LogMessageKind::NEW_LEADER_CONFIRM, "%s != %s", _kind, LogMessageKind::NEW_LEADER_CONFIRM);
return std::get<4>(_data);
}
NewLeaderConfirmReq& LogReqContainer::setNewLeaderConfirm() {
_kind = LogMessageKind::NEW_LEADER_CONFIRM;
auto& x = _data.emplace<4>();
return x;
}
const LogRecoveryReadReq& LogReqContainer::getLogRecoveryRead() const {
ALWAYS_ASSERT(_kind == LogMessageKind::LOG_RECOVERY_READ, "%s != %s", _kind, LogMessageKind::LOG_RECOVERY_READ);
return std::get<5>(_data);
}
LogRecoveryReadReq& LogReqContainer::setLogRecoveryRead() {
_kind = LogMessageKind::LOG_RECOVERY_READ;
auto& x = _data.emplace<5>();
return x;
}
const LogRecoveryWriteReq& LogReqContainer::getLogRecoveryWrite() const {
ALWAYS_ASSERT(_kind == LogMessageKind::LOG_RECOVERY_WRITE, "%s != %s", _kind, LogMessageKind::LOG_RECOVERY_WRITE);
return std::get<6>(_data);
}
LogRecoveryWriteReq& LogReqContainer::setLogRecoveryWrite() {
_kind = LogMessageKind::LOG_RECOVERY_WRITE;
auto& x = _data.emplace<6>();
return x;
}
LogReqContainer::LogReqContainer() {
clear();
}
LogReqContainer::LogReqContainer(const LogReqContainer& other) {
*this = other;
}
LogReqContainer::LogReqContainer(LogReqContainer&& other) {
_data = std::move(other._data);
_kind = other._kind;
other._kind = (LogMessageKind)0;
}
void LogReqContainer::operator=(const LogReqContainer& other) {
if (other.kind() == (LogMessageKind)0) { clear(); return; }
switch (other.kind()) {
case LogMessageKind::LOG_WRITE:
setLogWrite() = other.getLogWrite();
break;
case LogMessageKind::RELEASE:
setRelease() = other.getRelease();
break;
case LogMessageKind::LOG_READ:
setLogRead() = other.getLogRead();
break;
case LogMessageKind::NEW_LEADER:
setNewLeader() = other.getNewLeader();
break;
case LogMessageKind::NEW_LEADER_CONFIRM:
setNewLeaderConfirm() = other.getNewLeaderConfirm();
break;
case LogMessageKind::LOG_RECOVERY_READ:
setLogRecoveryRead() = other.getLogRecoveryRead();
break;
case LogMessageKind::LOG_RECOVERY_WRITE:
setLogRecoveryWrite() = other.getLogRecoveryWrite();
break;
default:
throw EGGS_EXCEPTION("bad LogMessageKind kind %s", other.kind());
}
}
void LogReqContainer::operator=(LogReqContainer&& other) {
_data = std::move(other._data);
_kind = other._kind;
other._kind = (LogMessageKind)0;
}
size_t LogReqContainer::packedSize() const {
switch (_kind) {
case LogMessageKind::LOG_WRITE:
return std::get<0>(_data).packedSize();
case LogMessageKind::RELEASE:
return std::get<1>(_data).packedSize();
case LogMessageKind::LOG_READ:
return std::get<2>(_data).packedSize();
case LogMessageKind::NEW_LEADER:
return std::get<3>(_data).packedSize();
case LogMessageKind::NEW_LEADER_CONFIRM:
return std::get<4>(_data).packedSize();
case LogMessageKind::LOG_RECOVERY_READ:
return std::get<5>(_data).packedSize();
case LogMessageKind::LOG_RECOVERY_WRITE:
return std::get<6>(_data).packedSize();
default:
throw EGGS_EXCEPTION("bad LogMessageKind kind %s", _kind);
}
}
void LogReqContainer::pack(BincodeBuf& buf) const {
switch (_kind) {
case LogMessageKind::LOG_WRITE:
std::get<0>(_data).pack(buf);
break;
case LogMessageKind::RELEASE:
std::get<1>(_data).pack(buf);
break;
case LogMessageKind::LOG_READ:
std::get<2>(_data).pack(buf);
break;
case LogMessageKind::NEW_LEADER:
std::get<3>(_data).pack(buf);
break;
case LogMessageKind::NEW_LEADER_CONFIRM:
std::get<4>(_data).pack(buf);
break;
case LogMessageKind::LOG_RECOVERY_READ:
std::get<5>(_data).pack(buf);
break;
case LogMessageKind::LOG_RECOVERY_WRITE:
std::get<6>(_data).pack(buf);
break;
default:
throw EGGS_EXCEPTION("bad LogMessageKind kind %s", _kind);
}
}
void LogReqContainer::unpack(BincodeBuf& buf, LogMessageKind kind) {
_kind = kind;
switch (kind) {
case LogMessageKind::LOG_WRITE:
_data.emplace<0>().unpack(buf);
break;
case LogMessageKind::RELEASE:
_data.emplace<1>().unpack(buf);
break;
case LogMessageKind::LOG_READ:
_data.emplace<2>().unpack(buf);
break;
case LogMessageKind::NEW_LEADER:
_data.emplace<3>().unpack(buf);
break;
case LogMessageKind::NEW_LEADER_CONFIRM:
_data.emplace<4>().unpack(buf);
break;
case LogMessageKind::LOG_RECOVERY_READ:
_data.emplace<5>().unpack(buf);
break;
case LogMessageKind::LOG_RECOVERY_WRITE:
_data.emplace<6>().unpack(buf);
break;
default:
throw BINCODE_EXCEPTION("bad LogMessageKind kind %s", kind);
}
}
bool LogReqContainer::operator==(const LogReqContainer& other) const {
if (_kind != other.kind()) { return false; }
if (_kind == (LogMessageKind)0) { return true; }
switch (_kind) {
case LogMessageKind::LOG_WRITE:
return getLogWrite() == other.getLogWrite();
case LogMessageKind::RELEASE:
return getRelease() == other.getRelease();
case LogMessageKind::LOG_READ:
return getLogRead() == other.getLogRead();
case LogMessageKind::NEW_LEADER:
return getNewLeader() == other.getNewLeader();
case LogMessageKind::NEW_LEADER_CONFIRM:
return getNewLeaderConfirm() == other.getNewLeaderConfirm();
case LogMessageKind::LOG_RECOVERY_READ:
return getLogRecoveryRead() == other.getLogRecoveryRead();
case LogMessageKind::LOG_RECOVERY_WRITE:
return getLogRecoveryWrite() == other.getLogRecoveryWrite();
default:
throw BINCODE_EXCEPTION("bad LogMessageKind kind %s", _kind);
}
}
std::ostream& operator<<(std::ostream& out, const LogReqContainer& x) {
switch (x.kind()) {
case LogMessageKind::LOG_WRITE:
out << x.getLogWrite();
break;
case LogMessageKind::RELEASE:
out << x.getRelease();
break;
case LogMessageKind::LOG_READ:
out << x.getLogRead();
break;
case LogMessageKind::NEW_LEADER:
out << x.getNewLeader();
break;
case LogMessageKind::NEW_LEADER_CONFIRM:
out << x.getNewLeaderConfirm();
break;
case LogMessageKind::LOG_RECOVERY_READ:
out << x.getLogRecoveryRead();
break;
case LogMessageKind::LOG_RECOVERY_WRITE:
out << x.getLogRecoveryWrite();
break;
default:
throw EGGS_EXCEPTION("bad LogMessageKind kind %s", x.kind());
}
return out;
}
const LogWriteResp& LogRespContainer::getLogWrite() const {
ALWAYS_ASSERT(_kind == LogMessageKind::LOG_WRITE, "%s != %s", _kind, LogMessageKind::LOG_WRITE);
return std::get<0>(_data);
}
LogWriteResp& LogRespContainer::setLogWrite() {
_kind = LogMessageKind::LOG_WRITE;
auto& x = _data.emplace<0>();
return x;
}
const ReleaseResp& LogRespContainer::getRelease() const {
ALWAYS_ASSERT(_kind == LogMessageKind::RELEASE, "%s != %s", _kind, LogMessageKind::RELEASE);
return std::get<1>(_data);
}
ReleaseResp& LogRespContainer::setRelease() {
_kind = LogMessageKind::RELEASE;
auto& x = _data.emplace<1>();
return x;
}
const LogReadResp& LogRespContainer::getLogRead() const {
ALWAYS_ASSERT(_kind == LogMessageKind::LOG_READ, "%s != %s", _kind, LogMessageKind::LOG_READ);
return std::get<2>(_data);
}
LogReadResp& LogRespContainer::setLogRead() {
_kind = LogMessageKind::LOG_READ;
auto& x = _data.emplace<2>();
return x;
}
const NewLeaderResp& LogRespContainer::getNewLeader() const {
ALWAYS_ASSERT(_kind == LogMessageKind::NEW_LEADER, "%s != %s", _kind, LogMessageKind::NEW_LEADER);
return std::get<3>(_data);
}
NewLeaderResp& LogRespContainer::setNewLeader() {
_kind = LogMessageKind::NEW_LEADER;
auto& x = _data.emplace<3>();
return x;
}
const NewLeaderConfirmResp& LogRespContainer::getNewLeaderConfirm() const {
ALWAYS_ASSERT(_kind == LogMessageKind::NEW_LEADER_CONFIRM, "%s != %s", _kind, LogMessageKind::NEW_LEADER_CONFIRM);
return std::get<4>(_data);
}
NewLeaderConfirmResp& LogRespContainer::setNewLeaderConfirm() {
_kind = LogMessageKind::NEW_LEADER_CONFIRM;
auto& x = _data.emplace<4>();
return x;
}
const LogRecoveryReadResp& LogRespContainer::getLogRecoveryRead() const {
ALWAYS_ASSERT(_kind == LogMessageKind::LOG_RECOVERY_READ, "%s != %s", _kind, LogMessageKind::LOG_RECOVERY_READ);
return std::get<5>(_data);
}
LogRecoveryReadResp& LogRespContainer::setLogRecoveryRead() {
_kind = LogMessageKind::LOG_RECOVERY_READ;
auto& x = _data.emplace<5>();
return x;
}
const LogRecoveryWriteResp& LogRespContainer::getLogRecoveryWrite() const {
ALWAYS_ASSERT(_kind == LogMessageKind::LOG_RECOVERY_WRITE, "%s != %s", _kind, LogMessageKind::LOG_RECOVERY_WRITE);
return std::get<6>(_data);
}
LogRecoveryWriteResp& LogRespContainer::setLogRecoveryWrite() {
_kind = LogMessageKind::LOG_RECOVERY_WRITE;
auto& x = _data.emplace<6>();
return x;
}
LogRespContainer::LogRespContainer() {
clear();
}
LogRespContainer::LogRespContainer(const LogRespContainer& other) {
*this = other;
}
LogRespContainer::LogRespContainer(LogRespContainer&& other) {
_data = std::move(other._data);
_kind = other._kind;
other._kind = (LogMessageKind)0;
}
void LogRespContainer::operator=(const LogRespContainer& other) {
if (other.kind() == (LogMessageKind)0) { clear(); return; }
switch (other.kind()) {
case LogMessageKind::LOG_WRITE:
setLogWrite() = other.getLogWrite();
break;
case LogMessageKind::RELEASE:
setRelease() = other.getRelease();
break;
case LogMessageKind::LOG_READ:
setLogRead() = other.getLogRead();
break;
case LogMessageKind::NEW_LEADER:
setNewLeader() = other.getNewLeader();
break;
case LogMessageKind::NEW_LEADER_CONFIRM:
setNewLeaderConfirm() = other.getNewLeaderConfirm();
break;
case LogMessageKind::LOG_RECOVERY_READ:
setLogRecoveryRead() = other.getLogRecoveryRead();
break;
case LogMessageKind::LOG_RECOVERY_WRITE:
setLogRecoveryWrite() = other.getLogRecoveryWrite();
break;
default:
throw EGGS_EXCEPTION("bad LogMessageKind kind %s", other.kind());
}
}
void LogRespContainer::operator=(LogRespContainer&& other) {
_data = std::move(other._data);
_kind = other._kind;
other._kind = (LogMessageKind)0;
}
size_t LogRespContainer::packedSize() const {
switch (_kind) {
case LogMessageKind::LOG_WRITE:
return std::get<0>(_data).packedSize();
case LogMessageKind::RELEASE:
return std::get<1>(_data).packedSize();
case LogMessageKind::LOG_READ:
return std::get<2>(_data).packedSize();
case LogMessageKind::NEW_LEADER:
return std::get<3>(_data).packedSize();
case LogMessageKind::NEW_LEADER_CONFIRM:
return std::get<4>(_data).packedSize();
case LogMessageKind::LOG_RECOVERY_READ:
return std::get<5>(_data).packedSize();
case LogMessageKind::LOG_RECOVERY_WRITE:
return std::get<6>(_data).packedSize();
default:
throw EGGS_EXCEPTION("bad LogMessageKind kind %s", _kind);
}
}
void LogRespContainer::pack(BincodeBuf& buf) const {
switch (_kind) {
case LogMessageKind::LOG_WRITE:
std::get<0>(_data).pack(buf);
break;
case LogMessageKind::RELEASE:
std::get<1>(_data).pack(buf);
break;
case LogMessageKind::LOG_READ:
std::get<2>(_data).pack(buf);
break;
case LogMessageKind::NEW_LEADER:
std::get<3>(_data).pack(buf);
break;
case LogMessageKind::NEW_LEADER_CONFIRM:
std::get<4>(_data).pack(buf);
break;
case LogMessageKind::LOG_RECOVERY_READ:
std::get<5>(_data).pack(buf);
break;
case LogMessageKind::LOG_RECOVERY_WRITE:
std::get<6>(_data).pack(buf);
break;
default:
throw EGGS_EXCEPTION("bad LogMessageKind kind %s", _kind);
}
}
void LogRespContainer::unpack(BincodeBuf& buf, LogMessageKind kind) {
_kind = kind;
switch (kind) {
case LogMessageKind::LOG_WRITE:
_data.emplace<0>().unpack(buf);
break;
case LogMessageKind::RELEASE:
_data.emplace<1>().unpack(buf);
break;
case LogMessageKind::LOG_READ:
_data.emplace<2>().unpack(buf);
break;
case LogMessageKind::NEW_LEADER:
_data.emplace<3>().unpack(buf);
break;
case LogMessageKind::NEW_LEADER_CONFIRM:
_data.emplace<4>().unpack(buf);
break;
case LogMessageKind::LOG_RECOVERY_READ:
_data.emplace<5>().unpack(buf);
break;
case LogMessageKind::LOG_RECOVERY_WRITE:
_data.emplace<6>().unpack(buf);
break;
default:
throw BINCODE_EXCEPTION("bad LogMessageKind kind %s", kind);
}
}
bool LogRespContainer::operator==(const LogRespContainer& other) const {
if (_kind != other.kind()) { return false; }
if (_kind == (LogMessageKind)0) { return true; }
switch (_kind) {
case LogMessageKind::LOG_WRITE:
return getLogWrite() == other.getLogWrite();
case LogMessageKind::RELEASE:
return getRelease() == other.getRelease();
case LogMessageKind::LOG_READ:
return getLogRead() == other.getLogRead();
case LogMessageKind::NEW_LEADER:
return getNewLeader() == other.getNewLeader();
case LogMessageKind::NEW_LEADER_CONFIRM:
return getNewLeaderConfirm() == other.getNewLeaderConfirm();
case LogMessageKind::LOG_RECOVERY_READ:
return getLogRecoveryRead() == other.getLogRecoveryRead();
case LogMessageKind::LOG_RECOVERY_WRITE:
return getLogRecoveryWrite() == other.getLogRecoveryWrite();
default:
throw BINCODE_EXCEPTION("bad LogMessageKind kind %s", _kind);
}
}
std::ostream& operator<<(std::ostream& out, const LogRespContainer& x) {
switch (x.kind()) {
case LogMessageKind::LOG_WRITE:
out << x.getLogWrite();
break;
case LogMessageKind::RELEASE:
out << x.getRelease();
break;
case LogMessageKind::LOG_READ:
out << x.getLogRead();
break;
case LogMessageKind::NEW_LEADER:
out << x.getNewLeader();
break;
case LogMessageKind::NEW_LEADER_CONFIRM:
out << x.getNewLeaderConfirm();
break;
case LogMessageKind::LOG_RECOVERY_READ:
out << x.getLogRecoveryRead();
break;
case LogMessageKind::LOG_RECOVERY_WRITE:
out << x.getLogRecoveryWrite();
break;
default:
throw EGGS_EXCEPTION("bad LogMessageKind kind %s", x.kind());
}
return out;
}
std::ostream& operator<<(std::ostream& out, ShardLogEntryKind err) {
switch (err) {
case ShardLogEntryKind::CONSTRUCT_FILE:
@@ -8049,6 +8874,7 @@ std::ostream& operator<<(std::ostream& out, const UpdateBlockServicesEntry& x) {
}
void AddSpanInitiateEntry::pack(BincodeBuf& buf) const {
buf.packScalar<bool>(withReference);
fileId.pack(buf);
buf.packScalar<uint64_t>(byteOffset);
buf.packScalar<uint32_t>(size);
@@ -8061,6 +8887,7 @@ void AddSpanInitiateEntry::pack(BincodeBuf& buf) const {
buf.packList<Crc>(bodyStripes);
}
void AddSpanInitiateEntry::unpack(BincodeBuf& buf) {
withReference = buf.unpackScalar<bool>();
fileId.unpack(buf);
byteOffset = buf.unpackScalar<uint64_t>();
size = buf.unpackScalar<uint32_t>();
@@ -8073,6 +8900,7 @@ void AddSpanInitiateEntry::unpack(BincodeBuf& buf) {
buf.unpackList<Crc>(bodyStripes);
}
void AddSpanInitiateEntry::clear() {
withReference = bool(0);
fileId = InodeId();
byteOffset = uint64_t(0);
size = uint32_t(0);
@@ -8085,6 +8913,7 @@ void AddSpanInitiateEntry::clear() {
bodyStripes.clear();
}
bool AddSpanInitiateEntry::operator==(const AddSpanInitiateEntry& rhs) const {
if ((bool)this->withReference != (bool)rhs.withReference) { return false; };
if ((InodeId)this->fileId != (InodeId)rhs.fileId) { return false; };
if ((uint64_t)this->byteOffset != (uint64_t)rhs.byteOffset) { return false; };
if ((uint32_t)this->size != (uint32_t)rhs.size) { return false; };
@@ -8098,7 +8927,7 @@ bool AddSpanInitiateEntry::operator==(const AddSpanInitiateEntry& rhs) const {
return true;
}
std::ostream& operator<<(std::ostream& out, const AddSpanInitiateEntry& x) {
out << "AddSpanInitiateEntry(" << "FileId=" << x.fileId << ", " << "ByteOffset=" << x.byteOffset << ", " << "Size=" << x.size << ", " << "Crc=" << x.crc << ", " << "StorageClass=" << (int)x.storageClass << ", " << "Parity=" << x.parity << ", " << "Stripes=" << (int)x.stripes << ", " << "CellSize=" << x.cellSize << ", " << "BodyBlocks=" << x.bodyBlocks << ", " << "BodyStripes=" << x.bodyStripes << ")";
out << "AddSpanInitiateEntry(" << "WithReference=" << x.withReference << ", " << "FileId=" << x.fileId << ", " << "ByteOffset=" << x.byteOffset << ", " << "Size=" << x.size << ", " << "Crc=" << x.crc << ", " << "StorageClass=" << (int)x.storageClass << ", " << "Parity=" << x.parity << ", " << "Stripes=" << (int)x.stripes << ", " << "CellSize=" << x.cellSize << ", " << "BodyBlocks=" << x.bodyBlocks << ", " << "BodyStripes=" << x.bodyStripes << ")";
return out;
}
+403 -2
View File
@@ -68,6 +68,11 @@ enum class EggsError : uint16_t {
BLOCK_IO_ERROR_FILE = 71,
INVALID_REPLICA = 72,
DIFFERENT_ADDRS_INFO = 73,
LEADER_PREEMPTED = 74,
LOG_ENTRY_MISSING = 75,
LOG_ENTRY_TRIMMED = 76,
LOG_ENTRY_UNRELEASED = 77,
LOG_ENTRY_RELEASED = 78,
};
std::ostream& operator<<(std::ostream& out, EggsError err);
@@ -137,9 +142,14 @@ const std::vector<EggsError> allEggsErrors {
EggsError::BLOCK_IO_ERROR_FILE,
EggsError::INVALID_REPLICA,
EggsError::DIFFERENT_ADDRS_INFO,
EggsError::LEADER_PREEMPTED,
EggsError::LOG_ENTRY_MISSING,
EggsError::LOG_ENTRY_TRIMMED,
EggsError::LOG_ENTRY_UNRELEASED,
EggsError::LOG_ENTRY_RELEASED,
};
constexpr int maxEggsError = 74;
constexpr int maxEggsError = 79;
enum class ShardMessageKind : uint8_t {
ERROR = 0,
@@ -317,6 +327,31 @@ constexpr int maxBlocksMessageKind = 6;
std::ostream& operator<<(std::ostream& out, BlocksMessageKind kind);
enum class LogMessageKind : uint8_t {
ERROR = 0,
LOG_WRITE = 1,
RELEASE = 2,
LOG_READ = 3,
NEW_LEADER = 4,
NEW_LEADER_CONFIRM = 5,
LOG_RECOVERY_READ = 6,
LOG_RECOVERY_WRITE = 7,
};
const std::vector<LogMessageKind> allLogMessageKind {
LogMessageKind::LOG_WRITE,
LogMessageKind::RELEASE,
LogMessageKind::LOG_READ,
LogMessageKind::NEW_LEADER,
LogMessageKind::NEW_LEADER_CONFIRM,
LogMessageKind::LOG_RECOVERY_READ,
LogMessageKind::LOG_RECOVERY_WRITE,
};
constexpr int maxLogMessageKind = 7;
std::ostream& operator<<(std::ostream& out, LogMessageKind kind);
struct FailureDomain {
BincodeFixedBytes<16> name;
@@ -3840,6 +3875,294 @@ struct CheckBlockResp {
std::ostream& operator<<(std::ostream& out, const CheckBlockResp& x);
struct LogWriteReq {
LeaderToken token;
LogIdx lastReleased;
LogIdx idx;
BincodeList<uint8_t> value;
static constexpr uint16_t STATIC_SIZE = 8 + 8 + 8 + BincodeList<uint8_t>::STATIC_SIZE; // token + lastReleased + idx + value
LogWriteReq() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 8; // token
_size += 8; // lastReleased
_size += 8; // idx
_size += value.packedSize(); // value
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const LogWriteReq&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const LogWriteReq& x);
struct LogWriteResp {
uint16_t result;
static constexpr uint16_t STATIC_SIZE = 2; // result
LogWriteResp() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 2; // result
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const LogWriteResp&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const LogWriteResp& x);
struct ReleaseReq {
LeaderToken token;
LogIdx lastReleased;
static constexpr uint16_t STATIC_SIZE = 8 + 8; // token + lastReleased
ReleaseReq() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 8; // token
_size += 8; // lastReleased
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const ReleaseReq&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const ReleaseReq& x);
struct ReleaseResp {
uint16_t result;
static constexpr uint16_t STATIC_SIZE = 2; // result
ReleaseResp() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 2; // result
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const ReleaseResp&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const ReleaseResp& x);
struct LogReadReq {
LogIdx idx;
static constexpr uint16_t STATIC_SIZE = 8; // idx
LogReadReq() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 8; // idx
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const LogReadReq&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const LogReadReq& x);
struct LogReadResp {
uint16_t result;
BincodeList<uint8_t> value;
static constexpr uint16_t STATIC_SIZE = 2 + BincodeList<uint8_t>::STATIC_SIZE; // result + value
LogReadResp() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 2; // result
_size += value.packedSize(); // value
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const LogReadResp&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const LogReadResp& x);
struct NewLeaderReq {
LeaderToken nomineeToken;
static constexpr uint16_t STATIC_SIZE = 8; // nomineeToken
NewLeaderReq() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 8; // nomineeToken
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const NewLeaderReq&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const NewLeaderReq& x);
struct NewLeaderResp {
uint16_t result;
LogIdx lastReleased;
static constexpr uint16_t STATIC_SIZE = 2 + 8; // result + lastReleased
NewLeaderResp() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 2; // result
_size += 8; // lastReleased
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const NewLeaderResp&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const NewLeaderResp& x);
struct NewLeaderConfirmReq {
LeaderToken nomineeToken;
LogIdx releasedIdx;
static constexpr uint16_t STATIC_SIZE = 8 + 8; // nomineeToken + releasedIdx
NewLeaderConfirmReq() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 8; // nomineeToken
_size += 8; // releasedIdx
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const NewLeaderConfirmReq&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const NewLeaderConfirmReq& x);
struct NewLeaderConfirmResp {
uint16_t result;
static constexpr uint16_t STATIC_SIZE = 2; // result
NewLeaderConfirmResp() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 2; // result
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const NewLeaderConfirmResp&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const NewLeaderConfirmResp& x);
struct LogRecoveryReadReq {
LeaderToken nomineeToken;
LogIdx idx;
static constexpr uint16_t STATIC_SIZE = 8 + 8; // nomineeToken + idx
LogRecoveryReadReq() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 8; // nomineeToken
_size += 8; // idx
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const LogRecoveryReadReq&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const LogRecoveryReadReq& x);
struct LogRecoveryReadResp {
uint16_t result;
BincodeList<uint8_t> value;
static constexpr uint16_t STATIC_SIZE = 2 + BincodeList<uint8_t>::STATIC_SIZE; // result + value
LogRecoveryReadResp() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 2; // result
_size += value.packedSize(); // value
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const LogRecoveryReadResp&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const LogRecoveryReadResp& x);
struct LogRecoveryWriteReq {
LeaderToken nomineeToken;
LogIdx idx;
BincodeList<uint8_t> value;
static constexpr uint16_t STATIC_SIZE = 8 + 8 + BincodeList<uint8_t>::STATIC_SIZE; // nomineeToken + idx + value
LogRecoveryWriteReq() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 8; // nomineeToken
_size += 8; // idx
_size += value.packedSize(); // value
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const LogRecoveryWriteReq&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const LogRecoveryWriteReq& x);
struct LogRecoveryWriteResp {
uint16_t result;
static constexpr uint16_t STATIC_SIZE = 2; // result
LogRecoveryWriteResp() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 2; // result
return _size;
}
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf);
void clear();
bool operator==(const LogRecoveryWriteResp&rhs) const;
};
std::ostream& operator<<(std::ostream& out, const LogRecoveryWriteResp& x);
struct ShardReqContainer {
private:
ShardMessageKind _kind = (ShardMessageKind)0;
@@ -4228,6 +4551,82 @@ public:
std::ostream& operator<<(std::ostream& out, const ShuckleRespContainer& x);
struct LogReqContainer {
private:
LogMessageKind _kind = (LogMessageKind)0;
std::variant<LogWriteReq, ReleaseReq, LogReadReq, NewLeaderReq, NewLeaderConfirmReq, LogRecoveryReadReq, LogRecoveryWriteReq> _data;
public:
LogReqContainer();
LogReqContainer(const LogReqContainer& other);
LogReqContainer(LogReqContainer&& other);
void operator=(const LogReqContainer& other);
void operator=(LogReqContainer&& other);
LogMessageKind kind() const { return _kind; }
const LogWriteReq& getLogWrite() const;
LogWriteReq& setLogWrite();
const ReleaseReq& getRelease() const;
ReleaseReq& setRelease();
const LogReadReq& getLogRead() const;
LogReadReq& setLogRead();
const NewLeaderReq& getNewLeader() const;
NewLeaderReq& setNewLeader();
const NewLeaderConfirmReq& getNewLeaderConfirm() const;
NewLeaderConfirmReq& setNewLeaderConfirm();
const LogRecoveryReadReq& getLogRecoveryRead() const;
LogRecoveryReadReq& setLogRecoveryRead();
const LogRecoveryWriteReq& getLogRecoveryWrite() const;
LogRecoveryWriteReq& setLogRecoveryWrite();
void clear() { _kind = (LogMessageKind)0; };
size_t packedSize() const;
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf, LogMessageKind kind);
bool operator==(const LogReqContainer& other) const;
};
std::ostream& operator<<(std::ostream& out, const LogReqContainer& x);
struct LogRespContainer {
private:
LogMessageKind _kind = (LogMessageKind)0;
std::variant<LogWriteResp, ReleaseResp, LogReadResp, NewLeaderResp, NewLeaderConfirmResp, LogRecoveryReadResp, LogRecoveryWriteResp> _data;
public:
LogRespContainer();
LogRespContainer(const LogRespContainer& other);
LogRespContainer(LogRespContainer&& other);
void operator=(const LogRespContainer& other);
void operator=(LogRespContainer&& other);
LogMessageKind kind() const { return _kind; }
const LogWriteResp& getLogWrite() const;
LogWriteResp& setLogWrite();
const ReleaseResp& getRelease() const;
ReleaseResp& setRelease();
const LogReadResp& getLogRead() const;
LogReadResp& setLogRead();
const NewLeaderResp& getNewLeader() const;
NewLeaderResp& setNewLeader();
const NewLeaderConfirmResp& getNewLeaderConfirm() const;
NewLeaderConfirmResp& setNewLeaderConfirm();
const LogRecoveryReadResp& getLogRecoveryRead() const;
LogRecoveryReadResp& setLogRecoveryRead();
const LogRecoveryWriteResp& getLogRecoveryWrite() const;
LogRecoveryWriteResp& setLogRecoveryWrite();
void clear() { _kind = (LogMessageKind)0; };
size_t packedSize() const;
void pack(BincodeBuf& buf) const;
void unpack(BincodeBuf& buf, LogMessageKind kind);
bool operator==(const LogRespContainer& other) const;
};
std::ostream& operator<<(std::ostream& out, const LogRespContainer& x);
enum class ShardLogEntryKind : uint16_t {
CONSTRUCT_FILE = 1,
LINK_FILE = 2,
@@ -4630,6 +5029,7 @@ struct UpdateBlockServicesEntry {
std::ostream& operator<<(std::ostream& out, const UpdateBlockServicesEntry& x);
struct AddSpanInitiateEntry {
bool withReference;
InodeId fileId;
uint64_t byteOffset;
uint32_t size;
@@ -4641,11 +5041,12 @@ struct AddSpanInitiateEntry {
BincodeList<EntryNewBlockInfo> bodyBlocks;
BincodeList<Crc> bodyStripes;
static constexpr uint16_t STATIC_SIZE = 8 + 8 + 4 + 4 + 1 + 1 + 1 + 4 + BincodeList<EntryNewBlockInfo>::STATIC_SIZE + BincodeList<Crc>::STATIC_SIZE; // fileId + byteOffset + size + crc + storageClass + parity + stripes + cellSize + bodyBlocks + bodyStripes
static constexpr uint16_t STATIC_SIZE = 1 + 8 + 8 + 4 + 4 + 1 + 1 + 1 + 4 + BincodeList<EntryNewBlockInfo>::STATIC_SIZE + BincodeList<Crc>::STATIC_SIZE; // withReference + fileId + byteOffset + size + crc + storageClass + parity + stripes + cellSize + bodyBlocks + bodyStripes
AddSpanInitiateEntry() { clear(); }
size_t packedSize() const {
size_t _size = 0;
_size += 1; // withReference
_size += 8; // fileId
_size += 8; // byteOffset
_size += 4; // size
+67
View File
@@ -0,0 +1,67 @@
#pragma once
#include <array>
#include <cstdint>
#include <netinet/in.h>
#include <sys/socket.h>
#include <vector>
#include "Assert.hpp"
#include "Bincode.hpp"
#include "Common.hpp"
#include "Env.hpp"
struct Message {
Message(uint64_t socketId_, sockaddr_in* clientAddr_, BincodeBuf buf_) : socketId(socketId_), clientAddr(clientAddr_), buf(buf_) {}
uint64_t socketId;
sockaddr_in* clientAddr;
BincodeBuf buf;
};
template<size_t ProtocolCount, std::array<uint32_t, ProtocolCount> Protocols>
class MultiplexedChannel {
public:
MultiplexedChannel(Env& env, size_t reserveSize = 100) : _env(env) {
for (auto& v : _messages) {
v.reserve(reserveSize);
}
}
void demultiplexMessage(uint64_t socketId, mmsghdr& hdr) {
sockaddr_in* clientAddr = (sockaddr_in*)hdr.msg_hdr.msg_name;
BincodeBuf reqBbuf((char*)hdr.msg_hdr.msg_iov[0].iov_base, hdr.msg_len);
uint32_t protocol = reqBbuf.unpackScalar<uint32_t>();
reqBbuf.cursor = reqBbuf.data;
auto protocolIndex = lookupProtocol(protocol);
if (unlikely(protocolIndex == ProtocolCount)) {
RAISE_ALERT(_env, "Could not parse request header. Unexpected protocol %s from %s, dropping it.", protocol, *clientAddr);
}
_messages[protocolIndex].emplace_back(socketId, clientAddr, reqBbuf);
}
std::vector<Message>& getProtocolMessages(uint32_t protocol) {
auto protocolIdx = lookupProtocol(protocol);
ALWAYS_ASSERT(protocolIdx != ProtocolCount);
return _messages[protocolIdx];
}
void clear() {
for (auto& v : _messages) {
v.clear();
}
}
private:
Env& _env;
std::array<std::vector<Message>, ProtocolCount> _messages;
static constexpr size_t lookupProtocol(uint32_t prot) {
for (size_t i = 0 ; i < Protocols.size(); ++i) {
if (Protocols[i] == prot) {
return i;
}
}
return ProtocolCount;
}
};
+35
View File
@@ -1,7 +1,10 @@
#include "SharedRocksDB.hpp"
#include <fstream>
#include <mutex>
#include <rocksdb/db.h>
#include <rocksdb/statistics.h>
#include <shared_mutex>
#include <utility>
#include "RocksDBUtils.hpp"
@@ -19,6 +22,7 @@ SharedRocksDB::~SharedRocksDB() {
}
void SharedRocksDB::open(rocksdb::Options options, const std::string& path) {
std::unique_lock<std::shared_mutex> _(_stateMutex);
ALWAYS_ASSERT(_db.get() == nullptr);
ALWAYS_ASSERT(options.statistics.get() == nullptr);
_dbStatistics = rocksdb::CreateDBStatistics();
@@ -47,6 +51,7 @@ void SharedRocksDB::open(rocksdb::Options options, const std::string& path) {
}
void SharedRocksDB::close() {
std::unique_lock<std::shared_mutex> _(_stateMutex);
if (_db.get() == nullptr) {
return;
}
@@ -60,11 +65,13 @@ void SharedRocksDB::close() {
}
void SharedRocksDB::registerCFDescriptors(const std::vector<rocksdb::ColumnFamilyDescriptor>& cfDescriptors) {
std::unique_lock<std::shared_mutex> _(_stateMutex);
ALWAYS_ASSERT(_db.get() == nullptr);
_cfDescriptors.insert(_cfDescriptors.end(), cfDescriptors.begin(), cfDescriptors.end());
}
rocksdb::ColumnFamilyHandle* SharedRocksDB::getCF(const std::string& name) const {
std::shared_lock<std::shared_mutex> _(_stateMutex);
ALWAYS_ASSERT(_db.get() != nullptr);
auto it = _cfs.find(name);
if (it == _cfs.end()) {
@@ -73,17 +80,45 @@ rocksdb::ColumnFamilyHandle* SharedRocksDB::getCF(const std::string& name) const
return it->second;
}
void SharedRocksDB::deleteCF(const std::string& name) {
std::unique_lock<std::shared_mutex> _(_stateMutex);
ALWAYS_ASSERT(_db.get() != nullptr);
auto it = _cfs.find(name);
if (it == _cfs.end()) {
return;
}
ROCKS_DB_CHECKED(_db->DropColumnFamily(it->second));
ROCKS_DB_CHECKED(_db->DestroyColumnFamilyHandle(it->second));
_cfs.erase(it);
}
rocksdb::ColumnFamilyHandle* SharedRocksDB::createCF(const rocksdb::ColumnFamilyDescriptor& descriptor) {
std::unique_lock<std::shared_mutex> _(_stateMutex);
ASSERT(_db.get() != nullptr);
auto it = _cfs.find(descriptor.name);
if (it != _cfs.end()) {
return it->second;
}
rocksdb::ColumnFamilyHandle* handle;
ROCKS_DB_CHECKED(_db->CreateColumnFamily(descriptor.options, descriptor.name, &handle));
_cfs.emplace(descriptor.name, handle);
return handle;
}
rocksdb::DB* SharedRocksDB::db() const {
std::shared_lock<std::shared_mutex> _(_stateMutex);
ALWAYS_ASSERT(_db.get() != nullptr);
return _db.get();
}
void SharedRocksDB::rocksDBMetrics(std::unordered_map<std::string, uint64_t>& stats) {
std::shared_lock<std::shared_mutex> _(_stateMutex);
ALWAYS_ASSERT(_db.get() != nullptr);
::rocksDBMetrics(_env, _db.get(), *_dbStatistics, stats);
}
void SharedRocksDB::dumpRocksDBStatistics() {
std::shared_lock<std::shared_mutex> _(_stateMutex);
ALWAYS_ASSERT(_db.get() != nullptr);
LOG_INFO(_env, "Dumping statistics to %s", _dbStatisticsFile);
std::ofstream file(_dbStatisticsFile);
+6
View File
@@ -1,6 +1,7 @@
#pragma once
#include <memory>
#include <shared_mutex>
#include <string>
#include <unordered_map>
#include <vector>
@@ -18,7 +19,11 @@ public:
void close();
void registerCFDescriptors(const std::vector<rocksdb::ColumnFamilyDescriptor>& _cfDescriptors);
rocksdb::ColumnFamilyHandle* getCF(const std::string& name) const;
void deleteCF(const std::string& name);
rocksdb::ColumnFamilyHandle* createCF(const rocksdb::ColumnFamilyDescriptor& descriptor);
rocksdb::DB* db() const;
void rocksDBMetrics(std::unordered_map<std::string, uint64_t>& stats);
@@ -30,5 +35,6 @@ private:
std::shared_ptr<rocksdb::Statistics> _dbStatistics;
std::string _dbStatisticsFile;
std::vector<rocksdb::ColumnFamilyDescriptor> _cfDescriptors;
mutable std::shared_mutex _stateMutex;
std::unordered_map<std::string, rocksdb::ColumnFamilyHandle*> _cfs;
};
+81 -29
View File
@@ -191,9 +191,9 @@ std::string fetchBlockServices(const std::string& addr, uint16_t port, Duration
return {};
}
std::string registerShard(
const std::string& addr, uint16_t port, Duration timeout, ShardId shid,
uint32_t ip1, uint16_t port1, uint32_t ip2, uint16_t port2
std::string registerShardReplica(
const std::string& addr, uint16_t port, Duration timeout, ShardReplicaId shrid, bool isLeader,
const AddrsInfo& info
) {
std::string errString;
auto sock = shuckleSock(addr, port, timeout, errString);
@@ -202,18 +202,10 @@ std::string registerShard(
}
ShuckleReqContainer reqContainer;
auto& req = reqContainer.setRegisterShard();
req.id = shid;
{
uint32_t ip = htonl(ip1);
memcpy(req.info.ip1.data.data(), &ip, 4);
}
req.info.port1 = port1;
{
uint32_t ip = htonl(ip2);
memcpy(req.info.ip2.data.data(), &ip, 4);
}
req.info.port2 = port2;
auto& req = reqContainer.setRegisterShardReplica();
req.shrid = shrid;
req.isLeader = isLeader;
req.info = info;
errString = writeShuckleRequest(sock.fd, reqContainer);
if (!errString.empty()) {
return errString;
@@ -224,12 +216,46 @@ std::string registerShard(
if (!errString.empty()) {
return errString;
}
respContainer.getRegisterShard();
respContainer.getRegisterShardReplica();
return {};
}
std::string registerCDC(const std::string& host, uint16_t port, Duration timeout, uint32_t ip1, uint16_t port1, uint32_t ip2, uint16_t port2) {
std::string fetchShardReplicas(
const std::string& addr, uint16_t port, Duration timeout, ShardReplicaId shrid, std::array<AddrsInfo, 5>& replicas
) {
std::string errString;
auto sock = shuckleSock(addr, port, timeout, errString);
if (sock.fd < 0) {
return errString;
}
ShuckleReqContainer reqContainer;
auto& req = reqContainer.setShardReplicas();
req.id = shrid.shardId();
errString = writeShuckleRequest(sock.fd, reqContainer);
if (!errString.empty()) {
return errString;
}
ShuckleRespContainer respContainer;
errString = readShuckleResponse(sock.fd, respContainer);
if (!errString.empty()) {
return errString;
}
if (respContainer.getShardReplicas().replicas.els.size() != replicas.size()) {
throw EGGS_EXCEPTION("expecting %s replicas, got %s", replicas.size(), respContainer.getShardReplicas().replicas.els.size());
}
for (int i = 0; i < replicas.size(); i++) {
replicas[i] = respContainer.getShardReplicas().replicas.els[i];
}
return {};
}
std::string registerCDCReplica(const std::string& host, uint16_t port, Duration timeout, ReplicaId replicaId, bool isLeader, const AddrsInfo& info) {
std::string errString;
auto sock = shuckleSock(host, port, timeout, errString);
if (sock.fd < 0) {
@@ -237,17 +263,10 @@ std::string registerCDC(const std::string& host, uint16_t port, Duration timeout
}
ShuckleReqContainer reqContainer;
auto& req = reqContainer.setRegisterCdc();
{
uint32_t ip = htonl(ip1);
memcpy(req.ip1.data.data(), &ip, 4);
}
req.port1 = port1;
{
uint32_t ip = htonl(ip2);
memcpy(req.ip2.data.data(), &ip, 4);
}
req.port2 = port2;
auto& req = reqContainer.setRegisterCdcReplica();
req.replica = replicaId;
req.isLeader = isLeader;
req.info = info;
errString = writeShuckleRequest(sock.fd, reqContainer);
if (!errString.empty()) {
return errString;
@@ -258,7 +277,40 @@ std::string registerCDC(const std::string& host, uint16_t port, Duration timeout
if (!errString.empty()) {
return errString;
}
respContainer.getRegisterCdc();
respContainer.getRegisterCdcReplica();
return {};
}
std::string fetchCDCReplicas(
const std::string& addr, uint16_t port, Duration timeout, std::array<AddrsInfo, 5>& replicas
) {
std::string errString;
auto sock = shuckleSock(addr, port, timeout, errString);
if (sock.fd < 0) {
return errString;
}
ShuckleReqContainer reqContainer;
auto& req = reqContainer.setCdcReplicas();
errString = writeShuckleRequest(sock.fd, reqContainer);
if (!errString.empty()) {
return errString;
}
ShuckleRespContainer respContainer;
errString = readShuckleResponse(sock.fd, respContainer);
if (!errString.empty()) {
return errString;
}
if (respContainer.getCdcReplicas().replicas.els.size() != replicas.size()) {
throw EGGS_EXCEPTION("expecting %s replicas, got %s", replicas.size(), respContainer.getCdcReplicas().replicas.els.size());
}
for (int i = 0; i < replicas.size(); i++) {
replicas[i] = respContainer.getCdcReplicas().replicas.els[i];
}
return {};
}
+25 -12
View File
@@ -1,6 +1,7 @@
#pragma once
#include "Msgs.hpp"
#include "MsgsGen.hpp"
// The host here is the scheme + host + port, e.g. `http://localhost:5000`.
//
@@ -21,25 +22,37 @@ std::string fetchBlockServices(
UpdateBlockServicesEntry& blocks
);
std::string registerShard(
std::string registerShardReplica(
const std::string& shuckleHost,
uint16_t shucklePort,
Duration timeout,
ShardId shid,
uint32_t ip1,
uint16_t port1,
uint32_t ip2,
uint16_t port2
ShardReplicaId shrid,
bool isLeader,
const AddrsInfo& info
);
std::string registerCDC(
std::string fetchShardReplicas(
const std::string& shuckleHost,
uint16_t shucklePort,
Duration timeout,
uint32_t ip1,
uint16_t port1,
uint32_t ip2,
uint16_t port2
ShardReplicaId shrid,
std::array<AddrsInfo, 5>& replicas
);
std::string registerCDCReplica(
const std::string& shuckleHost,
uint16_t shucklePort,
Duration timeout,
ReplicaId replicaId,
bool isLeader,
const AddrsInfo& info
);
std::string fetchCDCReplicas(
const std::string& shuckleHost,
uint16_t shucklePort,
Duration timeout,
std::array<AddrsInfo, 5>& replicas
);
std::string fetchShards(
@@ -57,4 +70,4 @@ std::string insertStats(
);
const std::string defaultShuckleAddress = "REDACTED";
bool parseShuckleAddress(const std::string& fullShuckleAddress, std::string& shuckleHost, uint16_t& shucklePort);
bool parseShuckleAddress(const std::string& fullShuckleAddress, std::string& shuckleHost, uint16_t& shucklePort);
+12 -1
View File
@@ -1,3 +1,4 @@
#include <atomic>
#include <stdio.h>
#include <time.h>
#include <chrono>
@@ -61,7 +62,17 @@ static void checkClockRes() {
}
}
static std::atomic<EggsTime> _currentTimeInTest = EggsTime(0);
void _setCurrentTime(EggsTime time) {
_currentTimeInTest.store(time, std::memory_order_relaxed);
}
EggsTime eggsNow() {
auto timeInTest = _currentTimeInTest.load(std::memory_order_relaxed);
if (unlikely( timeInTest != 0)) {
return timeInTest;
}
struct timespec now;
if (clock_gettime(CLOCK_REALTIME, &now) != 0) {
@@ -89,4 +100,4 @@ std::ostream& operator<<(std::ostream& out, EggsTime eggst) {
void sleepFor(Duration dt) {
std::this_thread::sleep_for(std::chrono::nanoseconds(dt.ns));
}
}
+12 -2
View File
@@ -101,10 +101,17 @@ struct EggsTime {
ns = buf.unpackScalar<uint64_t>();
}
EggsTime operator+(Duration d) {
EggsTime operator+(Duration d) const {
return EggsTime(ns + d.ns);
}
EggsTime operator-(Duration d) const {
if (unlikely(d.ns > ns)) {
return 0;
}
return EggsTime(ns - d.ns);
}
// Two positive times might give one negative
// duration.
#ifdef __clang__
@@ -117,4 +124,7 @@ struct EggsTime {
std::ostream& operator<<(std::ostream& out, EggsTime t);
EggsTime eggsNow();
// DO NOT USE UNLESS TESTING TIME SENSITIVE BEHAVIOR
void _setCurrentTime(EggsTime time);
EggsTime eggsNow();
+144 -35
View File
@@ -8,6 +8,7 @@
#include <sys/ioctl.h>
#include <unistd.h>
#include <poll.h>
#include <vector>
#include "Assert.hpp"
#include "Bincode.hpp"
@@ -18,6 +19,7 @@
#include "Msgs.hpp"
#include "Shard.hpp"
#include "Env.hpp"
#include "MultiplexedChannel.hpp"
#include "ShardDB.hpp"
#include "CDCKey.hpp"
#include "SharedRocksDB.hpp"
@@ -65,6 +67,8 @@ struct ShardShared {
std::atomic<double> logEntriesQueueSize;
std::array<std::atomic<double>, 2> receivedRequests; // how many requests we got at once from each socket
std::atomic<double> pulledWriteRequests; // how many requests we got from write queue
std::array<AddrsInfo,5> replicas;
std::mutex replicasLock;
ShardShared() = delete;
ShardShared(SharedRocksDB& sharedDB_, ShardDB& shardDB_): sharedDB(sharedDB_), shardDB(shardDB_), ips{0, 0}, ports{0, 0}, blockServicesWritten(false), logEntriesQueue(LOG_ENTRIES_QUEUE_SIZE), logEntriesQueueSize(0), pulledWriteRequests(0) {
@@ -189,6 +193,7 @@ private:
std::vector<char> _sendBuf;
std::array<std::vector<struct mmsghdr>, 2> _sendHdrs; // one per socket
std::array<std::vector<struct iovec>, 2> _sendVecs;
MultiplexedChannel<3, std::array<uint32_t, 3>{SHARD_REQ_PROTOCOL_VERSION, LOG_REQ_PROTOCOL_VERSION, LOG_RESP_PROTOCOL_VERSION}> _channel;
public:
ShardServer(Logger& logger, std::shared_ptr<XmonAgent>& xmon, ShardId shid, const ShardOptions& options, ShardShared& shared) :
@@ -198,7 +203,8 @@ public:
_ipPorts(options.ipPorts),
_packetDropRand(eggsNow().ns),
_incomingPacketDropProbability(0),
_outgoingPacketDropProbability(0)
_outgoingPacketDropProbability(0),
_channel(_env, MAX_RECV_MSGS * _ipPorts.size())
{
auto convertProb = [this](const std::string& what, double prob, uint64_t& iprob) {
if (prob != 0.0) {
@@ -275,11 +281,9 @@ private:
}
}
void _handleRequest(int sockIx, struct sockaddr_in* clientAddr, char* buf, size_t len) {
void _handleRequest(int sockIx, struct sockaddr_in* clientAddr, BincodeBuf& reqBbuf) {
LOG_DEBUG(_env, "received message from %s", *clientAddr);
BincodeBuf reqBbuf(buf, len);
// First, try to parse the header
ShardRequestHeader reqHeader;
try {
@@ -374,6 +378,7 @@ public:
_logEntries.clear();
_sendBuf.clear();
_channel.clear();
for (int i = 0; i < 2; i++) {
_sendHdrs[i].clear();
_sendVecs[i].clear();
@@ -398,12 +403,15 @@ public:
_shared.receivedRequests[sockIx] = _shared.receivedRequests[sockIx]*0.95 + ((double)msgs)*0.05;
}
for (int msgIx = 0; msgIx < msgs; msgIx++) {
auto& hdr = _recvHdrs[sockIx][msgIx];
auto clientAddr = (struct sockaddr_in *)hdr.msg_hdr.msg_name;
_handleRequest(sockIx, clientAddr, (char*)hdr.msg_hdr.msg_iov->iov_base, hdr.msg_len);
_channel.demultiplexMessage(sockIx, _recvHdrs[sockIx][msgIx]);
}
}
for (auto& msg : _channel.getProtocolMessages(SHARD_REQ_PROTOCOL_VERSION)) {
_handleRequest(msg.socketId, msg.clientAddr, msg.buf);
}
// write out write requests to queue
{
size_t numLogEntries = _logEntries.size();
@@ -451,6 +459,12 @@ private:
uint64_t _currentLogIndex;
ShardRespContainer _respContainer;
std::vector<QueuedShardLogEntry> _logEntries;
std::vector<LogsDBRequest> _logsdbRequests;
std::vector<LogsDBResponse> _logsdbResponses;
std::vector<LogsDBRequest *> outRequests;
std::vector<LogsDBResponse> outResponses;
std::unique_ptr<LogsDB> _logsDB;
// sendmmsg data (one per socket)
std::vector<char> _sendBuf;
@@ -466,7 +480,7 @@ private:
}
public:
ShardWriter(Logger& logger, std::shared_ptr<XmonAgent>& xmon, const ShardOptions& options, ShardShared& shared) :
ShardWriter(Logger& logger, std::shared_ptr<XmonAgent>& xmon, ShardReplicaId shrid, const ShardOptions& options, ShardShared& shared) :
Loop(logger, xmon, "writer"),
_shared(shared),
_packetDropRand(eggsNow().ns),
@@ -484,6 +498,11 @@ public:
convertProb("incoming", options.simulateIncomingPacketDrop, _incomingPacketDropProbability);
convertProb("outgoing", options.simulateOutgoingPacketDrop, _outgoingPacketDropProbability);
_logEntries.reserve(MAX_WRITES_AT_ONCE);
if (options.writeToLogsDB) {
_logsDB.reset(new LogsDB(_env,_shared.sharedDB,shrid.replicaId(), _currentLogIndex, options.dontWaitForReplication, options.dontDoReplication, options.forceLeader, options.avoidBeingLeader, options.initialStart, options.initialStart ? _currentLogIndex : 0));
_logsDB->processIncomingMessages(_logsdbRequests, _logsdbResponses);
}
}
virtual ~ShardWriter() = default;
@@ -504,6 +523,30 @@ public:
stop();
}
std::vector<LogsDBLogEntry> entries;
if (_logsDB) {
std::vector<uint8_t> data;
data.resize(MAX_UDP_MTU);
entries.reserve(_logEntries.size());
for (auto& logEntry : _logEntries) {
entries.emplace_back();
auto& entry = entries.back();
BincodeBuf buf((char*)&data[0], MAX_UDP_MTU);
logEntry.logEntry.pack(buf);
entry.value.assign(buf.data, buf.cursor);
}
auto err = _logsDB->appendEntries(entries);
ALWAYS_ASSERT(err == NO_ERROR);
_logsDB->processIncomingMessages(_logsdbRequests, _logsdbResponses);
_logsDB->getOutgoingMessages(outRequests, outResponses);
entries.clear();
_logsDB->readEntries(entries);
ALWAYS_ASSERT(entries.size() == _logEntries.size());
}
size_t entriesIdx = 0;
for (auto& logEntry : _logEntries) {
if (likely(logEntry.requestId)) {
LOG_DEBUG(_env, "applying log entry for request %s kind %s from %s", logEntry.requestId, logEntry.requestKind, logEntry.clientAddr);
@@ -511,7 +554,19 @@ public:
LOG_DEBUG(_env, "applying request-less log entry");
}
_currentLogIndex++;
EggsError err = _shared.shardDB.applyLogEntry(logEntry.requestKind, _currentLogIndex, logEntry.logEntry, _respContainer);
EggsError err = NO_ERROR;
if (_logsDB) {
auto& logsdbEntry = entries[entriesIdx++];
ALWAYS_ASSERT(_currentLogIndex == logsdbEntry.idx);
ALWAYS_ASSERT(logsdbEntry.value.size() > 0);
BincodeBuf buf((char*)&logsdbEntry.value.front(), logsdbEntry.value.size());
ShardLogEntry shardEntry;
shardEntry.unpack(buf);
ALWAYS_ASSERT(shardEntry == logEntry.logEntry);
err = _shared.shardDB.applyLogEntry(logsdbEntry.idx.u64, shardEntry, _respContainer);
} else {
err = _shared.shardDB.applyLogEntry(_currentLogIndex, logEntry.logEntry, _respContainer);
}
if (likely(logEntry.requestId)) {
Duration elapsed = eggsNow() - logEntry.receivedAt;
bool dropArtificially = wyhash64(&_packetDropRand) % 10'000 < _outgoingPacketDropProbability;
@@ -559,19 +614,26 @@ struct ShardRegisterer : PeriodicLoop {
private:
ShardShared& _shared;
Stopper _stopper;
ShardId _shid;
ShardReplicaId _shrid;
std::string _shuckleHost;
uint16_t _shucklePort;
bool _hasSecondIp;
XmonNCAlert _alert;
AddrsInfo _info;
bool _infoLoaded;
bool _registerCompleted;
uint8_t _leaderReplicaId;
public:
ShardRegisterer(Logger& logger, std::shared_ptr<XmonAgent>& xmon, ShardId shid, const ShardOptions& options, ShardShared& shared) :
PeriodicLoop(logger, xmon, "registerer", {1_sec, 1_mins}),
ShardRegisterer(Logger& logger, std::shared_ptr<XmonAgent>& xmon, ShardReplicaId shrid, const ShardOptions& options, ShardShared& shared) :
PeriodicLoop(logger, xmon, "registerer", {1_sec, 1, 1_mins, 0.1}),
_shared(shared),
_shid(shid),
_shrid(shrid),
_shuckleHost(options.shuckleHost),
_shucklePort(options.shucklePort),
_hasSecondIp(options.ipPorts[1].port != 0)
_hasSecondIp(options.ipPorts[1].port != 0),
_infoLoaded(false),
_registerCompleted(false),
_leaderReplicaId(options.leaderReplicaId)
{}
virtual ~ShardRegisterer() = default;
@@ -581,23 +643,61 @@ public:
}
virtual bool periodicStep() {
uint16_t port1 = _shared.ports[0].load();
uint16_t port2 = _shared.ports[1].load();
// Avoid registering with only one port, so that clients can just wait on
// the first port being ready and they always have both.
if (port1 == 0 || (_hasSecondIp && port2 == 0)) {
// shard server isn't up yet
return false;
if (unlikely(!_infoLoaded)) {
uint16_t port1 = _shared.ports[0].load();
uint16_t port2 = _shared.ports[1].load();
// Avoid registering with only one port, so that clients can just wait on
// the first port being ready and they always have both.
if (port1 == 0 || (_hasSecondIp && port2 == 0)) {
// shard server isn't up yet
return false;
}
uint32_t ip1 = _shared.ips[0].load();
uint32_t ip2 = _shared.ips[1].load();
uint32_t ip = htonl(ip1);
memcpy(_info.ip1.data.data(), &ip, 4);
_info.port1 = port1;
ip = htonl(ip2);
memcpy(_info.ip2.data.data(), &ip, 4);
_info.port2 = port2;
_infoLoaded = true;
}
uint32_t ip1 = _shared.ips[0].load();
uint32_t ip2 = _shared.ips[1].load();
LOG_INFO(_env, "Registering ourselves (shard %s, %s:%s, %s:%s) with shuckle", _shid, in_addr{htonl(ip1)}, port1, in_addr{htonl(ip2)}, port2);
std::string err = registerShard(_shuckleHost, _shucklePort, 10_sec, _shid, ip1, port1, ip2, port2);
std::string err;
if (likely(_registerCompleted)) {
std::array<AddrsInfo, 5> replicas;
LOG_INFO(_env, "Fetching replicas for shardId %s from shuckle", _shrid.shardId());
err = fetchShardReplicas(_shuckleHost, _shucklePort, 10_sec, _shrid, replicas);
if (!err.empty()) {
_env.updateAlert(_alert, "Failed getting shard replicas from shuckle: %s", err);
return false;
}
if (_info != replicas[_shrid.replicaId().u8]) {
_env.updateAlert(_alert, "AddrsInfo in shuckle: %s , not matching local AddrsInfo: %s", replicas[_shrid.replicaId().u8], _info);
return false;
}
{
std::lock_guard guard(_shared.replicasLock);
_shared.replicas = replicas;
}
}
LOG_INFO(_env, "Registering ourselves (shard %s, %s) with shuckle", _shrid, _info);
err = registerShardReplica(_shuckleHost, _shucklePort, 10_sec, _shrid, _shrid.replicaId() == _leaderReplicaId, _info);
if (!err.empty()) {
_env.updateAlert(_alert, "Couldn't register ourselves with shuckle: %s", err);
return false;
}
_env.clearAlert(_alert);
if (unlikely(!_registerCompleted)){
_registerCompleted = true;
// Even though we registered successfully we want to do another loop quickly to fetch replica information
return false;
}
return true;
}
};
@@ -830,7 +930,7 @@ public:
}
};
void runShard(ShardId shid, const std::string& dbDir, const ShardOptions& options) {
void runShard(ShardReplicaId shrid, const std::string& dbDir, const ShardOptions& options) {
int logOutFd = STDOUT_FILENO;
if (!options.logFile.empty()) {
logOutFd = open(options.logFile.c_str(), O_WRONLY|O_CREAT|O_APPEND, 0644);
@@ -848,7 +948,7 @@ void runShard(ShardId shid, const std::string& dbDir, const ShardOptions& option
Env env(logger, xmon, "startup");
{
LOG_INFO(env, "Running shard %s with options:", shid);
LOG_INFO(env, "Running shard %s with options:", shrid);
LOG_INFO(env, " level = %s", options.logLevel);
LOG_INFO(env, " logFile = '%s'", options.logFile);
LOG_INFO(env, " shuckleHost = '%s'", options.shuckleHost);
@@ -864,6 +964,14 @@ void runShard(ShardId shid, const std::string& dbDir, const ShardOptions& option
LOG_INFO(env, " simulateIncomingPacketDrop = %s", options.simulateIncomingPacketDrop);
LOG_INFO(env, " simulateOutgoingPacketDrop = %s", options.simulateOutgoingPacketDrop);
LOG_INFO(env, " syslog = %s", (int)options.syslog);
if (options.writeToLogsDB) {
LOG_INFO(env, "Using LogsDB with options:");
LOG_INFO(env, " dontWaitForReplication = '%s'", options.dontWaitForReplication);
LOG_INFO(env, " dontDoReplication = '%s'", options.dontDoReplication);
LOG_INFO(env, " forceLeader = '%s'", options.forceLeader);
LOG_INFO(env, " avoidBeingLeader = '%s'", options.leaderReplicaId);
LOG_INFO(env, " initialStart = '%s'", options.initialStart);
}
}
// Immediately start xmon: we want the database initializing update to
@@ -874,7 +982,7 @@ void runShard(ShardId shid, const std::string& dbDir, const ShardOptions& option
XmonConfig config;
{
std::ostringstream ss;
ss << std::setw(3) << std::setfill('0') << shid;
ss << std::setw(5) << std::setfill('0') << shrid;
config.appInstance = "eggsshard" + ss.str();
}
config.prod = options.xmonProd;
@@ -902,21 +1010,22 @@ void runShard(ShardId shid, const std::string& dbDir, const ShardOptions& option
rocksDBOptions.manual_wal_flush = true;
sharedDB.open(rocksDBOptions, dbDir);
ShardDB shardDB(logger, xmon, shid, options.transientDeadlineInterval, sharedDB);
ShardDB shardDB(logger, xmon, shrid.shardId(), options.transientDeadlineInterval, sharedDB);
env.clearAlert(dbInitAlert);
ShardShared shared(sharedDB, shardDB);
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardServer>(logger, xmon, shid, options, shared)));
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardWriter>(logger, xmon, options, shared)));
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardRegisterer>(logger, xmon, shid, options, shared)));
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardBlockServiceUpdater>(logger, xmon, shid, options, shared)));
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardStatsInserter>(logger, xmon, shid, options, shared)));
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardServer>(logger, xmon, shrid.shardId(), options, shared)));
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardWriter>(logger, xmon, shrid, options, shared)));
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardRegisterer>(logger, xmon, shrid, options, shared)));
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardBlockServiceUpdater>(logger, xmon, shrid.shardId(), options, shared)));
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardStatsInserter>(logger, xmon, shrid.shardId(), options, shared)));
if (options.metrics) {
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardMetricsInserter>(logger, xmon, shid, shared)));
threads.emplace_back(LoopThread::Spawn(std::make_unique<ShardMetricsInserter>(logger, xmon, shrid.shardId(), shared)));
}
// from this point on termination on SIGINT/SIGTERM will be graceful
LoopThread::waitUntilStopped(threads);
threads.clear();
shardDB.close();
sharedDB.close();
+8 -1
View File
@@ -28,6 +28,13 @@ struct ShardOptions {
bool xmonProd = false;
bool metrics = false;
Duration transientDeadlineInterval = DEFAULT_DEADLINE_INTERVAL;
uint8_t leaderReplicaId = 0;
bool dontWaitForReplication = true;
bool dontDoReplication = true;
bool forceLeader = false;
bool avoidBeingLeader = true;
bool initialStart = true;
bool writeToLogsDB = false;
};
void runShard(ShardId shid, const std::string& dbDir, const ShardOptions& options);
void runShard(ShardReplicaId shrid, const std::string& dbDir, const ShardOptions& options);
+13 -10
View File
@@ -121,12 +121,15 @@ static int pickMtu(uint16_t mtu) {
}
void ShardLogEntry::pack(BincodeBuf& buf) const {
buf.packScalar<uint32_t>(SHARD_LOG_PROTOCOL_VERSION);
time.pack(buf);
buf.packScalar<uint16_t>((uint16_t)body.kind());
body.pack(buf);
}
void ShardLogEntry::unpack(BincodeBuf& buf) {
uint32_t protocol = buf.unpackScalar<uint32_t>();
ALWAYS_ASSERT(protocol == SHARD_LOG_PROTOCOL_VERSION);
time.unpack(buf);
ShardLogEntryKind kind = (ShardLogEntryKind)buf.unpackScalar<uint16_t>();
body.unpack(buf, kind);
@@ -1370,7 +1373,7 @@ struct ShardDBImpl {
return NO_ERROR;
}
EggsError _prepareAddSpanInitiate(const rocksdb::ReadOptions& options, EggsTime time, const AddSpanInitiateReq& req, InodeId reference, AddSpanInitiateEntry& entry) {
EggsError _prepareAddSpanInitiate(const rocksdb::ReadOptions& options, EggsTime time, const AddSpanInitiateReq& req, InodeId reference, AddSpanInitiateEntry& entry, bool withReference) {
if (req.fileId.type() != InodeType::FILE && req.fileId.type() != InodeType::SYMLINK) {
return EggsError::TYPE_IS_DIRECTORY;
}
@@ -1399,6 +1402,7 @@ struct ShardDBImpl {
}
// start filling in entry
entry.withReference = withReference;
entry.fileId = req.fileId;
entry.byteOffset = req.byteOffset;
entry.storageClass = req.storageClass;
@@ -1721,11 +1725,11 @@ struct ShardDBImpl {
break;
case ShardMessageKind::ADD_SPAN_INITIATE: {
const auto& addSpanReq = req.getAddSpanInitiate();
err = _prepareAddSpanInitiate(options, time, addSpanReq, addSpanReq.fileId, logEntryBody.setAddSpanInitiate());
err = _prepareAddSpanInitiate(options, time, addSpanReq, addSpanReq.fileId, logEntryBody.setAddSpanInitiate(), false);
break; }
case ShardMessageKind::ADD_SPAN_INITIATE_WITH_REFERENCE: {
const auto& addSpanReq = req.getAddSpanInitiateWithReference();
err = _prepareAddSpanInitiate(options, time, addSpanReq.req, addSpanReq.reference, logEntryBody.setAddSpanInitiate());
err = _prepareAddSpanInitiate(options, time, addSpanReq.req, addSpanReq.reference, logEntryBody.setAddSpanInitiate(), true);
break; }
case ShardMessageKind::ADD_SPAN_CERTIFY:
err = _prepareAddSpanCertify(time, req.getAddSpanCertify(), logEntryBody.setAddSpanCertify());
@@ -3437,7 +3441,7 @@ struct ShardDBImpl {
return NO_ERROR;
}
EggsError applyLogEntry(ShardMessageKind reqKind, uint64_t logIndex, const ShardLogEntry& logEntry, ShardRespContainer& resp) {
EggsError applyLogEntry(uint64_t logIndex, const ShardLogEntry& logEntry, ShardRespContainer& resp) {
// TODO figure out the story with what regards time monotonicity (possibly drop non-monotonic log
// updates?)
@@ -3516,12 +3520,11 @@ struct ShardDBImpl {
err = _applyAddInlineSpan(time, batch, logEntryBody.getAddInlineSpan(), resp.setAddInlineSpan());
break;
case ShardLogEntryKind::ADD_SPAN_INITIATE: {
if (reqKind == ShardMessageKind::ADD_SPAN_INITIATE) {
err = _applyAddSpanInitiate(time, batch, logEntryBody.getAddSpanInitiate(), resp.setAddSpanInitiate());
} else {
ALWAYS_ASSERT(reqKind == ShardMessageKind::ADD_SPAN_INITIATE_WITH_REFERENCE);
if (logEntryBody.getAddSpanInitiate().withReference) {
auto& refResp = resp.setAddSpanInitiateWithReference();
err = _applyAddSpanInitiate(time, batch, logEntryBody.getAddSpanInitiate(), refResp.resp);
} else {
err = _applyAddSpanInitiate(time, batch, logEntryBody.getAddSpanInitiate(), resp.setAddSpanInitiate());
}
break; }
case ShardLogEntryKind::ADD_SPAN_CERTIFY:
@@ -3809,8 +3812,8 @@ EggsError ShardDB::prepareLogEntry(const ShardReqContainer& req, ShardLogEntry&
return ((ShardDBImpl*)_impl)->prepareLogEntry(req, logEntry);
}
EggsError ShardDB::applyLogEntry(ShardMessageKind reqKind, uint64_t logEntryIx, const ShardLogEntry& logEntry, ShardRespContainer& resp) {
return ((ShardDBImpl*)_impl)->applyLogEntry(reqKind, logEntryIx, logEntry, resp);
EggsError ShardDB::applyLogEntry(uint64_t logEntryIx, const ShardLogEntry& logEntry, ShardRespContainer& resp) {
return ((ShardDBImpl*)_impl)->applyLogEntry(logEntryIx, logEntry, resp);
}
uint64_t ShardDB::lastAppliedLogEntry() {
+6 -1
View File
@@ -1,6 +1,7 @@
#pragma once
#include <array>
#include <cstdint>
#include <vector>
#include <rocksdb/db.h>
@@ -13,6 +14,10 @@ struct ShardLogEntry {
EggsTime time;
ShardLogEntryContainer body;
bool operator==(const ShardLogEntry& rhs) const {
return time == rhs.time && body == rhs.body;
}
void clear() {
time = 0;
body.clear();
@@ -98,7 +103,7 @@ public:
// This function does NOT persist the changes (in fact it doesn't even write
// to the WAL). You need to call flush(). But this allows you to apply many
// log entries without any write/fsync.
EggsError applyLogEntry(ShardMessageKind reqKind, uint64_t logEntryIx, const ShardLogEntry& logEntry, ShardRespContainer& resp);
EggsError applyLogEntry(uint64_t logEntryIx, const ShardLogEntry& logEntry, ShardRespContainer& resp);
// Flushes the changes to the WAL, and persists it if sync=true (won't be
// required when we have a distributed log).
+45 -5
View File
@@ -1,5 +1,5 @@
#include <arpa/inet.h>
#include <charconv>
#include <cstdint>
#include <pthread.h>
#include <stdio.h>
#include <filesystem>
@@ -13,7 +13,7 @@
#define die(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while(false)
static void usage(const char* binary) {
fprintf(stderr, "Usage: %s DIRECTORY SHARD_ID\n\n", binary);
fprintf(stderr, "Usage: %s DIRECTORY SHARD_ID [REPLICA_ID]\n\n", binary);
fprintf(stderr, "Options:\n");
fprintf(stderr, " -log-level trace|debug|info|error\n");
fprintf(stderr, " Note that 'trace' will only work for debug builds.\n");
@@ -37,6 +37,8 @@ static void usage(const char* binary) {
fprintf(stderr, " Enable metrics.\n");
fprintf(stderr, " -transient-deadline-interval\n");
fprintf(stderr, " Tweaks the interval with wich the deadline for transient file gets bumped.\n");
fprintf(stderr, " -use-logsdb LEADER|FOLLOWER|NONE\n");
fprintf(stderr, " Specify in which mode to use LogsDB, as LEADER or FOLLOWER or don't use. Default is don't use. Only replica id 0 can be leader. \n");
}
static double parseDouble(const std::string& arg) {
@@ -183,16 +185,35 @@ int main(int argc, char** argv) {
options.metrics = true;
} else if (arg == "-transient-deadline-interval") {
options.transientDeadlineInterval = parseDuration(getNextArg());
} else if (arg == "-use-logsdb") {
std::string logsDBMode = getNextArg();
if (logsDBMode == "LEADER") {
options.forceLeader = true;
options.avoidBeingLeader = false;
options.writeToLogsDB = true;
options.initialStart = true;
} else if (logsDBMode == "FOLLOWER") {
options.writeToLogsDB = true;
} else if (logsDBMode == "NONE") {
} else {
fprintf(stderr, "Invalid logsDB mode %s", logsDBMode.c_str());
dieWithUsage();
}
} else {
args.emplace_back(std::move(arg));
}
}
if (args.size() != 2) {
fprintf(stderr, "Expecting two positional arguments (DIRECTORY SHARD_ID), got %ld.\n", args.size());
if (args.size() < 2 || args.size() > 3) {
fprintf(stderr, "Expecting two or three positional arguments (DIRECTORY SHARD_ID [REPLICA_ID]), got %ld.\n", args.size());
dieWithUsage();
}
// Add default 0 for replica id to simplify rollout
if (args.size() == 2) {
args.emplace_back("0");
}
#ifndef EGGS_DEBUG
if (options.logLevel <= LogLevel::LOG_TRACE) {
die("Cannot use trace for non-debug builds (it won't work).");
@@ -225,7 +246,26 @@ int main(int argc, char** argv) {
}
ShardId shid(shardId);
runShard(shid, dbDir, options);
int replicaId = std::stoi(args.at(2), &processed);
if (processed != args.at(2).size() || replicaId < 0 || replicaId > 4) {
die("Invalid replicaId '%s', expecting a number between 0 and 4.\n", args.at(2).c_str());
}
ShardReplicaId shrid(shid, replicaId);
if (options.writeToLogsDB) {
if (replicaId == 0) {
if (!options.forceLeader) {
die("When using LogsDB replica 0 needs to run in LEADER mode");
}
} else {
if (!options.avoidBeingLeader) {
die("When using LogsDB replicas other than 0 need to run in FOLLOWER mode");
}
}
}
runShard(shrid, dbDir, options);
return 0;
}
+4 -2
View File
@@ -5,10 +5,11 @@ cd "$(dirname "$0")"
echo "$(tput bold)C++ tests, sanitized$(tput sgr0)"
set -x
./build.py sanitized rs/rs-tests crc32c/crc32c-tests tests/tests
./build.py sanitized rs/rs-tests crc32c/crc32c-tests tests/tests tests/logsdbtests
UBSAN_OPTIONS=print_stacktrace=1 ./build/sanitized/rs/rs-tests
UBSAN_OPTIONS=print_stacktrace=1 ./build/sanitized/crc32c/crc32c-tests
UBSAN_OPTIONS=print_stacktrace=1 ./build/sanitized/tests/tests
UBSAN_OPTIONS=print_stacktrace=1 ./build/sanitized/tests/logsdbtests
set +x
# valgrind doesn't support fnctl F_SET_RW_HINT (1036), and as far as I can
@@ -16,8 +17,9 @@ set +x
echo "$(tput bold)C++ tests, valgrind$(tput sgr0)"
set -x
./build.py valgrind rs/rs-tests crc32c/crc32c-tests tests/tests
./build.py valgrind rs/rs-tests crc32c/crc32c-tests tests/tests tests/logsdbtests
valgrind --exit-on-first-error=yes -q --error-exitcode=1 ./build/valgrind/rs/rs-tests
valgrind --exit-on-first-error=yes -q --error-exitcode=1 ./build/valgrind/crc32c/crc32c-tests
valgrind --exit-on-first-error=yes -q --suppressions=valgrind-suppressions --error-exitcode=1 ./build/valgrind/tests/tests 2> >(grep -v "Warning: unimplemented fcntl command: 1036")
valgrind --exit-on-first-error=yes -q --suppressions=valgrind-suppressions --error-exitcode=1 ./build/valgrind/tests/logsdbtests 2> >(grep -v "Warning: unimplemented fcntl command: 1036")
set +x
+3
View File
@@ -2,3 +2,6 @@ include_directories(${eggsfs_SOURCE_DIR}/core ${eggsfs_SOURCE_DIR}/shard ${eggsf
add_executable(tests tests.cpp doctest.h)
target_link_libraries(tests PRIVATE core shard cdc)
add_executable(logsdbtests logsdbtests.cpp doctest.h)
target_link_libraries(logsdbtests PRIVATE core)
+208
View File
@@ -0,0 +1,208 @@
#include <iostream>
#include <ostream>
#include <resolv.h>
#include <unordered_set>
#include <vector>
#include "LogsDB.hpp"
#include "Msgs.hpp"
#include "MsgsGen.hpp"
#include "Time.hpp"
#include "utils/TempLogsDB.hpp"
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "doctest.h"
REGISTER_EXCEPTION_TRANSLATOR(AbstractException& ex) {
std::stringstream ss;
// Before, we had stack traces and this was useful, now a bit less
ss << std::endl << ex.what() << std::endl;
return doctest::String(ss.str().c_str());
}
std::ostream& operator<<(std::ostream& out, const std::vector<LogsDBRequest*>& data) {
for (auto& d : data) {
out << *d;
}
return out;
}
TEST_CASE("EmptyLogsDBNoOverrides") {
// init time control
_setCurrentTime(eggsNow());
TempLogsDB db(LogLevel::LOG_ERROR);
std::vector<LogsDBLogEntry> entries;
std::vector<LogsDBRequest> inReq;
std::vector<LogsDBResponse> inResp;
std::vector<LogsDBRequest*> outReq;
std::vector<LogsDBResponse> outResp;
// verify empty db with no flags starts as follower and does not try to pass any messages
{
REQUIRE_FALSE(db->isLeader());
std::vector<LogsDBLogEntry> entries{
initEntry(1, "entry1"),
initEntry(4, "entry4"),
initEntry(5, "entry5"),
};
REQUIRE(db->appendEntries(entries) == EggsError::LEADER_PREEMPTED);
db->getOutgoingMessages(outReq, outResp);
REQUIRE(outReq.empty());
REQUIRE(outResp.empty());
entries.clear();
db->readEntries(entries);
REQUIRE(entries.empty());
db->processIncomingMessages(inReq, inResp);
REQUIRE(db->getNextTimeout() == LogsDB::LEADER_INACTIVE_TIMEOUT);
}
// verify writting to follower succeeds
{
size_t requestId{0};
LeaderToken token(1, 1);
std::unordered_set<size_t> reqIds;
entries = {initEntry(1, "entry1"), initEntry(3, "entry3"), initEntry(2, "entry2")};
for (auto& entry : entries) {
inReq.emplace_back();
auto& req = inReq.back();
req.replicaId = token.replica();
req.header.kind = LogMessageKind::LOG_WRITE;
req.header.requestId = requestId++;
reqIds.emplace(req.header.requestId);
auto& writeReq = req.requestContainer.setLogWrite();
writeReq.idx = entry.idx;
writeReq.token = token;
writeReq.value.els = entry.value;
writeReq.lastReleased = 0;
}
db->processIncomingMessages(inReq, inResp);
db->getOutgoingMessages(outReq, outResp);
REQUIRE(outReq.empty());
REQUIRE(outResp.size() == entries.size());
for (auto& resp : outResp) {
REQUIRE(resp.replicaId == token.replica());
REQUIRE(resp.header.kind == LogMessageKind::LOG_WRITE);
REQUIRE(resp.responseContainer.getLogWrite().result == 0);
reqIds.erase(resp.header.requestId);
}
REQUIRE(reqIds.empty());
entries.clear();
db->readEntries(entries);
REQUIRE(entries.empty());
}
// Release written data verify it's readable and no catchup requests
{
size_t requestId{0};
LeaderToken token(1, 1);
std::unordered_set<size_t> reqIds;
inReq.clear();
inReq.emplace_back();
auto& req = inReq.back();
req.replicaId = token.replica();
req.header.kind = LogMessageKind::RELEASE;
req.header.requestId = requestId++;
reqIds.emplace(req.header.requestId);
auto& releaseReq = req.requestContainer.setRelease();
releaseReq.lastReleased = 3;
releaseReq.token = token;
db->processIncomingMessages(inReq, inResp);
db->getOutgoingMessages(outReq, outResp);
std::cerr << outReq << std::endl;
REQUIRE(outReq.empty());
REQUIRE(outResp.empty());
db->readEntries(entries);
REQUIRE(entries.size() == 3);
}
}
TEST_CASE("LogsDBStandAloneLeader") {
_setCurrentTime(eggsNow());
LogIdx readUpTo = 100;
TempLogsDB db(LogLevel::LOG_ERROR, 0, readUpTo, true, true, true, false, true, readUpTo);
std::vector<LogsDBRequest> inReq;
std::vector<LogsDBResponse> inResp;
std::vector<LogsDBRequest*> outReq;
std::vector<LogsDBResponse> outResp;
db->processIncomingMessages(inReq, inResp);
REQUIRE(db->isLeader());
std::vector<LogsDBLogEntry> entries{
initEntry(1, "entry1"),
initEntry(4, "entry4"),
initEntry(5, "entry5"),
};
auto err = db->appendEntries(entries);
db->processIncomingMessages(inReq, inResp);
REQUIRE(err == NO_ERROR);
for(size_t i = 0; i < entries.size(); ++i) {
REQUIRE(entries[i].idx == readUpTo + i + 1);
}
std::vector<LogsDBLogEntry> readEntries;
db->readEntries(readEntries);
REQUIRE(entries == readEntries);
}
TEST_CASE("LogsDBAvoidBeingLeader") {
_setCurrentTime(eggsNow());
TempLogsDB db(LogLevel::LOG_ERROR, 0, 0, false, false, false, true);
REQUIRE_FALSE(db->isLeader());
std::vector<LogsDBRequest> inReq;
std::vector<LogsDBResponse> inResp;
db->processIncomingMessages(inReq, inResp);
std::vector<LogsDBRequest*> outReq;
std::vector<LogsDBResponse> outResp;
db->getOutgoingMessages(outReq, outResp);
REQUIRE(outResp.empty());
REQUIRE(outReq.empty());
REQUIRE(db->getNextTimeout() == LogsDB::LEADER_INACTIVE_TIMEOUT);
_setCurrentTime(eggsNow() + LogsDB::LEADER_INACTIVE_TIMEOUT + 1_ms);
// Tick
db->processIncomingMessages(inReq, inResp);
db->getOutgoingMessages(outReq, outResp);
REQUIRE(outResp.empty());
REQUIRE(outReq.empty());
REQUIRE(db->getNextTimeout() == LogsDB::LEADER_INACTIVE_TIMEOUT);
}
TEST_CASE("EmptyLogsDBLeaderElection") {
_setCurrentTime(eggsNow());
TempLogsDB db(LogLevel::LOG_ERROR);
REQUIRE_FALSE(db->isLeader());
std::vector<LogsDBRequest> inReq;
std::vector<LogsDBResponse> inResp;
db->processIncomingMessages(inReq, inResp);
std::vector<LogsDBRequest*> outReq;
std::vector<LogsDBResponse> outResp;
db->getOutgoingMessages(outReq, outResp);
REQUIRE(outResp.empty());
REQUIRE(outReq.empty());
REQUIRE(db->getNextTimeout() == LogsDB::LEADER_INACTIVE_TIMEOUT);
_setCurrentTime(eggsNow() + LogsDB::LEADER_INACTIVE_TIMEOUT + 1_ms);
// Tick
db->processIncomingMessages(inReq, inResp);
db->getOutgoingMessages(outReq, outResp);
REQUIRE(outResp.empty());
REQUIRE(db->getNextTimeout() == LogsDB::RESPONSE_TIMEOUT);
//expect leader election messages
REQUIRE(outReq.size() == LogsDB::REPLICA_COUNT - 1);
std::unordered_set<uint8_t> replicaIds{1,2,3,4};
for (size_t replicaId = 1, reqIdx = 0; replicaId < LogsDB::REPLICA_COUNT; ++replicaId, ++reqIdx) {
auto& req = *outReq[reqIdx];
replicaIds.erase(req.replicaId.u8);
REQUIRE(req.header.kind == LogMessageKind::NEW_LEADER);
REQUIRE(req.requestContainer.getNewLeader().nomineeToken == LeaderToken(0, 1));
}
REQUIRE(replicaIds.empty());
}
+5 -5
View File
@@ -456,7 +456,7 @@ TEST_CASE("touch file") {
req.note = "test note";
NO_EGGS_ERROR(db->prepareLogEntry(*reqContainer, *logEntry));
constructTime = logEntry->time;
NO_EGGS_ERROR(db->applyLogEntry(ShardMessageKind::CONSTRUCT_FILE, ++logEntryIndex, *logEntry, *respContainer));
NO_EGGS_ERROR(db->applyLogEntry(++logEntryIndex, *logEntry, *respContainer));
db->flush(false);
auto& resp = respContainer->getConstructFile();
id = resp.id;
@@ -479,7 +479,7 @@ TEST_CASE("touch file") {
req.name = name;
NO_EGGS_ERROR(db->prepareLogEntry(*reqContainer, *logEntry));
linkTime = logEntry->time;
NO_EGGS_ERROR(db->applyLogEntry(ShardMessageKind::LINK_FILE, ++logEntryIndex, *logEntry, *respContainer));
NO_EGGS_ERROR(db->applyLogEntry(++logEntryIndex, *logEntry, *respContainer));
db->flush(false);
}
{
@@ -529,7 +529,7 @@ TEST_CASE("override") {
req.type = (uint8_t)InodeType::FILE;
req.note = "test note";
NO_EGGS_ERROR(db->prepareLogEntry(*reqContainer, *logEntry));
NO_EGGS_ERROR(db->applyLogEntry(ShardMessageKind::CONSTRUCT_FILE, ++logEntryIndex, *logEntry, *respContainer));
NO_EGGS_ERROR(db->applyLogEntry(++logEntryIndex, *logEntry, *respContainer));
db->flush(false);
auto& resp = respContainer->getConstructFile();
id = resp.id;
@@ -547,7 +547,7 @@ TEST_CASE("override") {
req.ownerId = ROOT_DIR_INODE_ID;
req.name = name;
NO_EGGS_ERROR(db->prepareLogEntry(*reqContainer, *logEntry));
NO_EGGS_ERROR(db->applyLogEntry(ShardMessageKind::LINK_FILE, ++logEntryIndex, *logEntry, *respContainer));
NO_EGGS_ERROR(db->applyLogEntry(++logEntryIndex, *logEntry, *respContainer));
db->flush(false);
creationTime = respContainer->getLinkFile().creationTime;
}
@@ -565,7 +565,7 @@ TEST_CASE("override") {
req.oldCreationTime = fooCreationTime;
req.newName = "bar";
NO_EGGS_ERROR(db->prepareLogEntry(*reqContainer, *logEntry));
NO_EGGS_ERROR(db->applyLogEntry(ShardMessageKind::SAME_DIRECTORY_RENAME, ++logEntryIndex, *logEntry, *respContainer));
NO_EGGS_ERROR(db->applyLogEntry(++logEntryIndex, *logEntry, *respContainer));
db->flush(false);
}
{
+100
View File
@@ -0,0 +1,100 @@
#pragma once
#include <filesystem>
#include <ostream>
#include "Env.hpp"
#include "LogsDB.hpp"
struct TempLogsDB {
std::string dbDir;
Logger logger;
std::shared_ptr<XmonAgent> xmon;
std::unique_ptr<Env> env;
std::unique_ptr<SharedRocksDB> sharedDB;
std::unique_ptr<LogsDB> db;
TempLogsDB(
LogLevel level,
ReplicaId replicaId = 0,
LogIdx lastRead = 0,
bool dontWaitForReplication = false,
bool dontDoReplication = false,
bool forceLeader = false,
bool avoidBeingLeader = false,
bool initialStart = true,
LogIdx forcedLastReleased = false): logger(level, STDERR_FILENO, false, false), env(new Env(logger, xmon, "LogsDB"))
{
dbDir = std::string("temp-logs-db.XXXXXX");
if (mkdtemp(dbDir.data()) == nullptr) {
throw SYSCALL_EXCEPTION("mkdtemp");
}
sharedDB = std::make_unique<SharedRocksDB>(logger, xmon);
initSharedDB();
db = std::make_unique<LogsDB>(*env, *sharedDB, replicaId, lastRead, dontWaitForReplication, dontDoReplication, forceLeader, avoidBeingLeader, initialStart, forcedLastReleased);
}
// useful to test recovery
void restart(
ReplicaId replicaId = 0,
LogIdx lastRead = 0,
bool dontWaitForReplication = false,
bool dontDoReplication = false,
bool forceLeader = false,
bool avoidBeingLeader = false,
bool initialStart = true,
LogIdx forcedLastReleased = false)
{
db->close();
sharedDB = std::make_unique<SharedRocksDB>(logger, xmon);
initSharedDB();
db = std::make_unique<LogsDB>(*env, *sharedDB, replicaId, lastRead, dontWaitForReplication, dontDoReplication, forceLeader, avoidBeingLeader, initialStart, forcedLastReleased);
}
~TempLogsDB() {
std::error_code err;
if (std::filesystem::remove_all(std::filesystem::path(dbDir), err) < 0) {
std::cerr << "Could not remove " << dbDir << ": " << err << std::endl;
}
}
std::unique_ptr<LogsDB>& operator->() {
return db;
}
void initSharedDB() {
sharedDB->registerCFDescriptors({{rocksdb::kDefaultColumnFamilyName, {}}});
sharedDB->registerCFDescriptors(LogsDB::getColumnFamilyDescriptors());
rocksdb::Options rocksDBOptions;
rocksDBOptions.create_if_missing = true;
rocksDBOptions.create_missing_column_families = true;
rocksDBOptions.compression = rocksdb::kLZ4Compression;
rocksDBOptions.bottommost_compression = rocksdb::kZSTD;
// 1000*256 = 256k open files at once, given that we currently run on a
// single machine this is appropriate.
rocksDBOptions.max_open_files = 1000;
// We batch writes and flush manually.
rocksDBOptions.manual_wal_flush = true;
sharedDB->open(rocksDBOptions, dbDir);
}
};
inline LogsDBLogEntry initEntry(uint64_t idx, std::string data) {
LogsDBLogEntry e;
e.idx = idx;
e.value.assign(data.begin(), data.end());
return e;
}
inline std::ostream& operator<<(std::ostream& out, const std::vector<LogsDBLogEntry>& entries) {
out << "{ ";
for (auto& entry : entries) {
out << "{" << entry << "}" << ",";
}
out << "} ";
return out;
}
+68 -7
View File
@@ -294,7 +294,7 @@ func generateGoErrorCodes(out io.Writer, errors []string) {
//go:embed msgs_bincode.go.header
var goHeader string
func generateGo(errors []string, shardReqResps []reqRespType, cdcReqResps []reqRespType, shuckleReqResps []reqRespType, blocksReqResps []reqRespType, extras []reflect.Type) []byte {
func generateGo(errors []string, shardReqResps []reqRespType, cdcReqResps []reqRespType, shuckleReqResps []reqRespType, blocksReqResps []reqRespType, logReqResps []reqRespType, extras []reflect.Type) []byte {
out := new(bytes.Buffer)
out.Write([]byte(goHeader))
@@ -305,6 +305,7 @@ func generateGo(errors []string, shardReqResps []reqRespType, cdcReqResps []reqR
generateGoMsgKind(out, "CDCMessageKind", "CDCRequest", "CDCResponse", "MkCDCMessage", cdcReqResps)
generateGoMsgKind(out, "ShuckleMessageKind", "ShuckleRequest", "ShuckleResponse", "MkShuckleMessage", shuckleReqResps)
generateGoMsgKind(out, "BlocksMessageKind", "BlocksRequest", "BlocksResponse", "MkBlocksMessage", blocksReqResps)
generateGoMsgKind(out, "LogMessageKind", "LogRequest", "LogResponse", "MkLogMessage", logReqResps)
for _, reqResp := range shardReqResps {
generateGoReqResp(out, reqResp, "ShardMessageKind", "ShardRequestKind", "ShardResponseKind")
@@ -321,6 +322,9 @@ func generateGo(errors []string, shardReqResps []reqRespType, cdcReqResps []reqR
for _, reqResp := range blocksReqResps {
generateGoReqResp(out, reqResp, "BlocksMessageKind", "BlocksRequestKind", "BlocksResponseKind")
}
for _, reqResp := range logReqResps {
generateGoReqResp(out, reqResp, "LogMessageKind", "LogRequestKind", "LogResponseKind")
}
return out.Bytes()
}
@@ -750,7 +754,10 @@ func generateKmod(errors []string, shardReqResps []reqRespType, cdcReqResps []re
}
func cppType(t reflect.Type) string {
if t.Name() == "InodeId" || t.Name() == "InodeIdExtra" || t.Name() == "Parity" || t.Name() == "EggsTime" || t.Name() == "ShardId" || t.Name() == "CDCMessageKind" || t.Name() == "Crc" || t.Name() == "BlockServiceId" || t.Name() == "ReplicaId" || t.Name() == "ShardReplicaId" {
if t.Name() == "InodeId" || t.Name() == "InodeIdExtra" || t.Name() == "Parity" ||
t.Name() == "EggsTime" || t.Name() == "ShardId" || t.Name() == "CDCMessageKind" ||
t.Name() == "Crc" || t.Name() == "BlockServiceId" || t.Name() == "ReplicaId" ||
t.Name() == "ShardReplicaId" || t.Name() == "LogIdx" || t.Name() == "LeaderToken" {
return t.Name()
}
if t.Name() == "Blob" {
@@ -827,7 +834,10 @@ func (cg *cppCodegen) gen(expr *subexpr) {
// pack/unpack
// we want InodeId/InodeIdExtra/Parity to be here because of some checks we perform
// when unpacking
if k == reflect.Struct || expr.typ.Name() == "InodeId" || expr.typ.Name() == "InodeIdExtra" || expr.typ.Name() == "Parity" || expr.typ.Name() == "EggsTime" || expr.typ.Name() == "ShardId" || expr.typ.Name() == "Crc" || expr.typ.Name() == "BlockServiceId" || expr.typ.Name() == "ReplicaId" || expr.typ.Name() == "ShardReplicaId" {
if k == reflect.Struct || expr.typ.Name() == "InodeId" || expr.typ.Name() == "InodeIdExtra" ||
expr.typ.Name() == "Parity" || expr.typ.Name() == "EggsTime" || expr.typ.Name() == "ShardId" ||
expr.typ.Name() == "Crc" || expr.typ.Name() == "BlockServiceId" || expr.typ.Name() == "ReplicaId" ||
expr.typ.Name() == "ShardReplicaId" || expr.typ.Name() == "LogIdx" || expr.typ.Name() == "LeaderToken" {
cg.pline(fmt.Sprintf("%s.pack(buf)", expr.fld))
cg.uline(fmt.Sprintf("%s.unpack(buf)", expr.fld))
} else if k == reflect.Bool || k == reflect.Uint8 || k == reflect.Uint16 || k == reflect.Uint32 || k == reflect.Uint64 {
@@ -857,7 +867,9 @@ func (cg *cppCodegen) gen(expr *subexpr) {
// clear/eq
switch k {
case reflect.Bool, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
if expr.typ.Name() == "ShardId" || expr.typ.Name() == "InodeId" || expr.typ.Name() == "InodeIdExtra" || expr.typ.Name() == "Parity" || expr.typ.Name() == "EggsTime" || expr.typ.Name() == "ShardReplicaId" || expr.typ.Name() == "ReplicaId" {
if expr.typ.Name() == "ShardId" || expr.typ.Name() == "InodeId" || expr.typ.Name() == "InodeIdExtra" ||
expr.typ.Name() == "Parity" || expr.typ.Name() == "EggsTime" || expr.typ.Name() == "ShardReplicaId" ||
expr.typ.Name() == "ReplicaId" || expr.typ.Name() == "LogIdx" || expr.typ.Name() == "LeaderToken" {
cg.cline(fmt.Sprintf("%s = %s()", expr.fld, cppType(expr.typ)))
} else {
cg.cline(fmt.Sprintf("%s = %s(0)", expr.fld, cppType(expr.typ)))
@@ -1247,7 +1259,7 @@ func generateCppReqResp(hpp io.Writer, cpp io.Writer, what string, reqResps []re
//go:embed FetchedSpan.hpp
var fetchedSpanCpp string
func generateCpp(errors []string, shardReqResps []reqRespType, cdcReqResps []reqRespType, shuckleReqResps []reqRespType, blocksReqResps []reqRespType, extras []reflect.Type) ([]byte, []byte) {
func generateCpp(errors []string, shardReqResps []reqRespType, cdcReqResps []reqRespType, shuckleReqResps []reqRespType, blocksReqResps []reqRespType, logReqResps []reqRespType, extras []reflect.Type) ([]byte, []byte) {
hppOut := new(bytes.Buffer)
cppOut := new(bytes.Buffer)
@@ -1268,6 +1280,7 @@ func generateCpp(errors []string, shardReqResps []reqRespType, cdcReqResps []req
generateCppKind(hppOut, cppOut, "CDC", cdcReqResps)
generateCppKind(hppOut, cppOut, "Shuckle", shuckleReqResps)
generateCppKind(hppOut, cppOut, "Blocks", blocksReqResps)
generateCppKind(hppOut, cppOut, "Log", logReqResps)
for _, typ := range extras {
generateCppSingle(hppOut, cppOut, typ)
@@ -1292,10 +1305,15 @@ func generateCpp(errors []string, shardReqResps []reqRespType, cdcReqResps []req
generateCppSingle(hppOut, cppOut, reqResp.req)
generateCppSingle(hppOut, cppOut, reqResp.resp)
}
for _, reqResp := range logReqResps {
generateCppSingle(hppOut, cppOut, reqResp.req)
generateCppSingle(hppOut, cppOut, reqResp.resp)
}
generateCppReqResp(hppOut, cppOut, "Shard", shardReqResps)
generateCppReqResp(hppOut, cppOut, "CDC", cdcReqResps)
generateCppReqResp(hppOut, cppOut, "Shuckle", shuckleReqResps)
generateCppReqResp(hppOut, cppOut, "Log", logReqResps)
generateCppLogEntries(
hppOut,
@@ -1429,6 +1447,11 @@ func main() {
"BLOCK_IO_ERROR_FILE",
"INVALID_REPLICA",
"DIFFERENT_ADDRS_INFO",
"LEADER_PREEMPTED",
"LOG_ENTRY_MISSING",
"LOG_ENTRY_TRIMMED",
"LOG_ENTRY_UNRELEASED",
"LOG_ENTRY_RELEASED",
}
kernelShardReqResps := []reqRespType{
@@ -1794,6 +1817,44 @@ func main() {
},
}...)
logReqResps := []reqRespType{
{
0x01,
reflect.TypeOf(msgs.LogWriteReq{}),
reflect.TypeOf(msgs.LogWriteResp{}),
},
{
0x02,
reflect.TypeOf(msgs.ReleaseReq{}),
reflect.TypeOf(msgs.ReleaseResp{}),
},
{
0x03,
reflect.TypeOf(msgs.LogReadReq{}),
reflect.TypeOf(msgs.LogReadResp{}),
},
{
0x04,
reflect.TypeOf(msgs.NewLeaderReq{}),
reflect.TypeOf(msgs.NewLeaderResp{}),
},
{
0x05,
reflect.TypeOf(msgs.NewLeaderConfirmReq{}),
reflect.TypeOf(msgs.NewLeaderConfirmResp{}),
},
{
0x06,
reflect.TypeOf(msgs.LogRecoveryReadReq{}),
reflect.TypeOf(msgs.LogRecoveryReadResp{}),
},
{
0x07,
reflect.TypeOf(msgs.LogRecoveryWriteReq{}),
reflect.TypeOf(msgs.LogRecoveryWriteResp{}),
},
}
kernelExtras := []reflect.Type{
reflect.TypeOf(msgs.DirectoryInfoEntry{}),
reflect.TypeOf(msgs.DirectoryInfo{}),
@@ -1827,7 +1888,7 @@ func main() {
reflect.TypeOf(msgs.Stat{}),
}...)...)
goCode := generateGo(errors, shardReqResps, cdcReqResps, shuckleReqResps, blocksReqResps, extras)
goCode := generateGo(errors, shardReqResps, cdcReqResps, shuckleReqResps, blocksReqResps, logReqResps, extras)
goOutFileName := fmt.Sprintf("%s/msgs_bincode.go", cwd)
writeIfChanged(goOutFileName, goCode)
@@ -1835,7 +1896,7 @@ func main() {
writeIfChanged(fmt.Sprintf("%s/../../kmod/bincodegen.h", cwd), kmodHBytes)
writeIfChanged(fmt.Sprintf("%s/../../kmod/bincodegen.c", cwd), kmodCBytes)
hppBytes, cppBytes := generateCpp(errors, shardReqResps, cdcReqResps, shuckleReqResps, blocksReqResps, extras)
hppBytes, cppBytes := generateCpp(errors, shardReqResps, cdcReqResps, shuckleReqResps, blocksReqResps, logReqResps, extras)
writeIfChanged(fmt.Sprintf("%s/../../cpp/core/MsgsGen.hpp", cwd), hppBytes)
writeIfChanged(fmt.Sprintf("%s/../../cpp/core/MsgsGen.cpp", cwd), cppBytes)
}
+105 -10
View File
@@ -38,6 +38,8 @@ type BlockServiceFlags uint8
type Crc uint32
type NameHash uint64
type Cookie [8]byte
type LogIdx uint64
type LeaderToken uint64
// These four below are the magic number to identify UDP packets. After a three-letter
// string identifying the service we have a version number. The idea is that when the
@@ -76,6 +78,14 @@ const BLOCKS_REQ_PROTOCOL_VERSION uint32 = 0x4f4c42
// '14f4c42'
const BLOCKS_RESP_PROTOCOL_VERSION uint32 = 0x14f4c42
// >>> format(struct.unpack('<I', b'LOG\0')[0], 'x')
// '474f4c'
const LOG_REQ_PROTOCOL_VERSION uint32 = 0x474f4c
// >>> format(struct.unpack('<I', b'LOG\1')[0], 'x')
// '1474f4c'
const LOG_RESP_PROTOCOL_VERSION uint32 = 0x1474f4c
// For CDC/SHARD we use 0 as an error kind
const ERROR_KIND uint8 = 0
@@ -472,6 +482,8 @@ type ShuckleMessageKind uint8
type BlocksMessageKind uint8
type LogMessageKind uint8
const ERROR uint8 = 0
const (
@@ -1660,16 +1672,17 @@ type EntryNewBlockInfo struct {
}
type AddSpanInitiateEntry struct {
FileId InodeId
ByteOffset uint64
Size uint32
Crc Crc
StorageClass StorageClass
Parity rs.Parity
Stripes uint8 // [1, 16]
CellSize uint32
BodyBlocks []EntryNewBlockInfo
BodyStripes []Crc // the CRCs
WithReference bool
FileId InodeId
ByteOffset uint64
Size uint32
Crc Crc
StorageClass StorageClass
Parity rs.Parity
Stripes uint8 // [1, 16]
CellSize uint32
BodyBlocks []EntryNewBlockInfo
BodyStripes []Crc // the CRCs
}
type AddSpanCertifyEntry struct {
@@ -1972,3 +1985,85 @@ type GetStatsResp struct {
NextName string
Stats []Stat
}
// --------------------------------------------------------------------
// Distributed log requests/responses
type LogRequest interface {
bincode.Packable
bincode.Unpackable
LogRequestKind() LogMessageKind
}
type LogResponse interface {
bincode.Packable
bincode.Unpackable
LogResponseKind() LogMessageKind
}
type LogWriteReq struct {
Token LeaderToken
LastReleased LogIdx
Idx LogIdx
Value bincode.Blob
}
type LogWriteResp struct {
Result ErrCode
}
type ReleaseReq struct {
Token LeaderToken
LastReleased LogIdx
}
type ReleaseResp struct {
Result ErrCode
}
type LogReadReq struct {
Idx LogIdx
}
type LogReadResp struct {
Result ErrCode
Value bincode.Blob
}
type NewLeaderReq struct {
NomineeToken LeaderToken
}
type NewLeaderResp struct {
Result ErrCode
LastReleased LogIdx
}
type NewLeaderConfirmReq struct {
NomineeToken LeaderToken
ReleasedIdx LogIdx
}
type NewLeaderConfirmResp struct {
Result ErrCode
}
type LogRecoveryReadReq struct {
NomineeToken LeaderToken
Idx LogIdx
}
type LogRecoveryReadResp struct {
Result ErrCode
Value bincode.Blob
}
type LogRecoveryWriteReq struct {
NomineeToken LeaderToken
Idx LogIdx
Value bincode.Blob
}
type LogRecoveryWriteResp struct {
Result ErrCode
}
+398
View File
@@ -179,6 +179,11 @@ const (
BLOCK_IO_ERROR_FILE ErrCode = 71
INVALID_REPLICA ErrCode = 72
DIFFERENT_ADDRS_INFO ErrCode = 73
LEADER_PREEMPTED ErrCode = 74
LOG_ENTRY_MISSING ErrCode = 75
LOG_ENTRY_TRIMMED ErrCode = 76
LOG_ENTRY_UNRELEASED ErrCode = 77
LOG_ENTRY_RELEASED ErrCode = 78
)
func (err ErrCode) String() string {
@@ -311,6 +316,16 @@ func (err ErrCode) String() string {
return "INVALID_REPLICA"
case 73:
return "DIFFERENT_ADDRS_INFO"
case 74:
return "LEADER_PREEMPTED"
case 75:
return "LOG_ENTRY_MISSING"
case 76:
return "LOG_ENTRY_TRIMMED"
case 77:
return "LOG_ENTRY_UNRELEASED"
case 78:
return "LOG_ENTRY_RELEASED"
default:
return fmt.Sprintf("ErrCode(%d)", err)
}
@@ -804,6 +819,71 @@ func MkBlocksMessage(k string) (BlocksRequest, BlocksResponse, error) {
}
}
func (k LogMessageKind) String() string {
switch k {
case 1:
return "LOG_WRITE"
case 2:
return "RELEASE"
case 3:
return "LOG_READ"
case 4:
return "NEW_LEADER"
case 5:
return "NEW_LEADER_CONFIRM"
case 6:
return "LOG_RECOVERY_READ"
case 7:
return "LOG_RECOVERY_WRITE"
default:
return fmt.Sprintf("LogMessageKind(%d)", k)
}
}
const (
LOG_WRITE LogMessageKind = 0x1
RELEASE LogMessageKind = 0x2
LOG_READ LogMessageKind = 0x3
NEW_LEADER LogMessageKind = 0x4
NEW_LEADER_CONFIRM LogMessageKind = 0x5
LOG_RECOVERY_READ LogMessageKind = 0x6
LOG_RECOVERY_WRITE LogMessageKind = 0x7
)
var AllLogMessageKind = [...]LogMessageKind{
LOG_WRITE,
RELEASE,
LOG_READ,
NEW_LEADER,
NEW_LEADER_CONFIRM,
LOG_RECOVERY_READ,
LOG_RECOVERY_WRITE,
}
const MaxLogMessageKind LogMessageKind = 7
func MkLogMessage(k string) (LogRequest, LogResponse, error) {
switch {
case k == "LOG_WRITE":
return &LogWriteReq{}, &LogWriteResp{}, nil
case k == "RELEASE":
return &ReleaseReq{}, &ReleaseResp{}, nil
case k == "LOG_READ":
return &LogReadReq{}, &LogReadResp{}, nil
case k == "NEW_LEADER":
return &NewLeaderReq{}, &NewLeaderResp{}, nil
case k == "NEW_LEADER_CONFIRM":
return &NewLeaderConfirmReq{}, &NewLeaderConfirmResp{}, nil
case k == "LOG_RECOVERY_READ":
return &LogRecoveryReadReq{}, &LogRecoveryReadResp{}, nil
case k == "LOG_RECOVERY_WRITE":
return &LogRecoveryWriteReq{}, &LogRecoveryWriteResp{}, nil
default:
return nil, nil, fmt.Errorf("bad kind string %s", k)
}
}
func (v *LookupReq) ShardRequestKind() ShardMessageKind {
return LOOKUP
}
@@ -5081,3 +5161,321 @@ func (v *CheckBlockResp) Unpack(r io.Reader) error {
return nil
}
func (v *LogWriteReq) LogRequestKind() LogMessageKind {
return LOG_WRITE
}
func (v *LogWriteReq) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint64(v.Token)); err != nil {
return err
}
if err := bincode.PackScalar(w, uint64(v.LastReleased)); err != nil {
return err
}
if err := bincode.PackScalar(w, uint64(v.Idx)); err != nil {
return err
}
if err := bincode.PackBlob(w, v.Value); err != nil {
return err
}
return nil
}
func (v *LogWriteReq) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint64)(&v.Token)); err != nil {
return err
}
if err := bincode.UnpackScalar(r, (*uint64)(&v.LastReleased)); err != nil {
return err
}
if err := bincode.UnpackScalar(r, (*uint64)(&v.Idx)); err != nil {
return err
}
if err := bincode.UnpackBlob(r, &v.Value); err != nil {
return err
}
return nil
}
func (v *LogWriteResp) LogResponseKind() LogMessageKind {
return LOG_WRITE
}
func (v *LogWriteResp) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint16(v.Result)); err != nil {
return err
}
return nil
}
func (v *LogWriteResp) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint16)(&v.Result)); err != nil {
return err
}
return nil
}
func (v *ReleaseReq) LogRequestKind() LogMessageKind {
return RELEASE
}
func (v *ReleaseReq) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint64(v.Token)); err != nil {
return err
}
if err := bincode.PackScalar(w, uint64(v.LastReleased)); err != nil {
return err
}
return nil
}
func (v *ReleaseReq) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint64)(&v.Token)); err != nil {
return err
}
if err := bincode.UnpackScalar(r, (*uint64)(&v.LastReleased)); err != nil {
return err
}
return nil
}
func (v *ReleaseResp) LogResponseKind() LogMessageKind {
return RELEASE
}
func (v *ReleaseResp) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint16(v.Result)); err != nil {
return err
}
return nil
}
func (v *ReleaseResp) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint16)(&v.Result)); err != nil {
return err
}
return nil
}
func (v *LogReadReq) LogRequestKind() LogMessageKind {
return LOG_READ
}
func (v *LogReadReq) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint64(v.Idx)); err != nil {
return err
}
return nil
}
func (v *LogReadReq) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint64)(&v.Idx)); err != nil {
return err
}
return nil
}
func (v *LogReadResp) LogResponseKind() LogMessageKind {
return LOG_READ
}
func (v *LogReadResp) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint16(v.Result)); err != nil {
return err
}
if err := bincode.PackBlob(w, v.Value); err != nil {
return err
}
return nil
}
func (v *LogReadResp) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint16)(&v.Result)); err != nil {
return err
}
if err := bincode.UnpackBlob(r, &v.Value); err != nil {
return err
}
return nil
}
func (v *NewLeaderReq) LogRequestKind() LogMessageKind {
return NEW_LEADER
}
func (v *NewLeaderReq) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint64(v.NomineeToken)); err != nil {
return err
}
return nil
}
func (v *NewLeaderReq) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint64)(&v.NomineeToken)); err != nil {
return err
}
return nil
}
func (v *NewLeaderResp) LogResponseKind() LogMessageKind {
return NEW_LEADER
}
func (v *NewLeaderResp) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint16(v.Result)); err != nil {
return err
}
if err := bincode.PackScalar(w, uint64(v.LastReleased)); err != nil {
return err
}
return nil
}
func (v *NewLeaderResp) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint16)(&v.Result)); err != nil {
return err
}
if err := bincode.UnpackScalar(r, (*uint64)(&v.LastReleased)); err != nil {
return err
}
return nil
}
func (v *NewLeaderConfirmReq) LogRequestKind() LogMessageKind {
return NEW_LEADER_CONFIRM
}
func (v *NewLeaderConfirmReq) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint64(v.NomineeToken)); err != nil {
return err
}
if err := bincode.PackScalar(w, uint64(v.ReleasedIdx)); err != nil {
return err
}
return nil
}
func (v *NewLeaderConfirmReq) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint64)(&v.NomineeToken)); err != nil {
return err
}
if err := bincode.UnpackScalar(r, (*uint64)(&v.ReleasedIdx)); err != nil {
return err
}
return nil
}
func (v *NewLeaderConfirmResp) LogResponseKind() LogMessageKind {
return NEW_LEADER_CONFIRM
}
func (v *NewLeaderConfirmResp) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint16(v.Result)); err != nil {
return err
}
return nil
}
func (v *NewLeaderConfirmResp) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint16)(&v.Result)); err != nil {
return err
}
return nil
}
func (v *LogRecoveryReadReq) LogRequestKind() LogMessageKind {
return LOG_RECOVERY_READ
}
func (v *LogRecoveryReadReq) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint64(v.NomineeToken)); err != nil {
return err
}
if err := bincode.PackScalar(w, uint64(v.Idx)); err != nil {
return err
}
return nil
}
func (v *LogRecoveryReadReq) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint64)(&v.NomineeToken)); err != nil {
return err
}
if err := bincode.UnpackScalar(r, (*uint64)(&v.Idx)); err != nil {
return err
}
return nil
}
func (v *LogRecoveryReadResp) LogResponseKind() LogMessageKind {
return LOG_RECOVERY_READ
}
func (v *LogRecoveryReadResp) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint16(v.Result)); err != nil {
return err
}
if err := bincode.PackBlob(w, v.Value); err != nil {
return err
}
return nil
}
func (v *LogRecoveryReadResp) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint16)(&v.Result)); err != nil {
return err
}
if err := bincode.UnpackBlob(r, &v.Value); err != nil {
return err
}
return nil
}
func (v *LogRecoveryWriteReq) LogRequestKind() LogMessageKind {
return LOG_RECOVERY_WRITE
}
func (v *LogRecoveryWriteReq) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint64(v.NomineeToken)); err != nil {
return err
}
if err := bincode.PackScalar(w, uint64(v.Idx)); err != nil {
return err
}
if err := bincode.PackBlob(w, v.Value); err != nil {
return err
}
return nil
}
func (v *LogRecoveryWriteReq) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint64)(&v.NomineeToken)); err != nil {
return err
}
if err := bincode.UnpackScalar(r, (*uint64)(&v.Idx)); err != nil {
return err
}
if err := bincode.UnpackBlob(r, &v.Value); err != nil {
return err
}
return nil
}
func (v *LogRecoveryWriteResp) LogResponseKind() LogMessageKind {
return LOG_RECOVERY_WRITE
}
func (v *LogRecoveryWriteResp) Pack(w io.Writer) error {
if err := bincode.PackScalar(w, uint16(v.Result)); err != nil {
return err
}
return nil
}
func (v *LogRecoveryWriteResp) Unpack(r io.Reader) error {
if err := bincode.UnpackScalar(r, (*uint16)(&v.Result)); err != nil {
return err
}
return nil
}
+5
View File
@@ -64,6 +64,11 @@ const char* eggsfs_err_str(int err) {
case 71: return "BLOCK_IO_ERROR_FILE";
case 72: return "INVALID_REPLICA";
case 73: return "DIFFERENT_ADDRS_INFO";
case 74: return "LEADER_PREEMPTED";
case 75: return "LOG_ENTRY_MISSING";
case 76: return "LOG_ENTRY_TRIMMED";
case 77: return "LOG_ENTRY_UNRELEASED";
case 78: return "LOG_ENTRY_RELEASED";
default: return "UNKNOWN";
}
}
+6 -1
View File
@@ -65,8 +65,13 @@
#define EGGSFS_ERR_BLOCK_IO_ERROR_FILE 71
#define EGGSFS_ERR_INVALID_REPLICA 72
#define EGGSFS_ERR_DIFFERENT_ADDRS_INFO 73
#define EGGSFS_ERR_LEADER_PREEMPTED 74
#define EGGSFS_ERR_LOG_ENTRY_MISSING 75
#define EGGSFS_ERR_LOG_ENTRY_TRIMMED 76
#define EGGSFS_ERR_LOG_ENTRY_UNRELEASED 77
#define EGGSFS_ERR_LOG_ENTRY_RELEASED 78
#define __print_eggsfs_err(i) __print_symbolic(i, { 10, "INTERNAL_ERROR" }, { 11, "FATAL_ERROR" }, { 12, "TIMEOUT" }, { 13, "MALFORMED_REQUEST" }, { 14, "MALFORMED_RESPONSE" }, { 15, "NOT_AUTHORISED" }, { 16, "UNRECOGNIZED_REQUEST" }, { 17, "FILE_NOT_FOUND" }, { 18, "DIRECTORY_NOT_FOUND" }, { 19, "NAME_NOT_FOUND" }, { 20, "EDGE_NOT_FOUND" }, { 21, "EDGE_IS_LOCKED" }, { 22, "TYPE_IS_DIRECTORY" }, { 23, "TYPE_IS_NOT_DIRECTORY" }, { 24, "BAD_COOKIE" }, { 25, "INCONSISTENT_STORAGE_CLASS_PARITY" }, { 26, "LAST_SPAN_STATE_NOT_CLEAN" }, { 27, "COULD_NOT_PICK_BLOCK_SERVICES" }, { 28, "BAD_SPAN_BODY" }, { 29, "SPAN_NOT_FOUND" }, { 30, "BLOCK_SERVICE_NOT_FOUND" }, { 31, "CANNOT_CERTIFY_BLOCKLESS_SPAN" }, { 32, "BAD_NUMBER_OF_BLOCKS_PROOFS" }, { 33, "BAD_BLOCK_PROOF" }, { 34, "CANNOT_OVERRIDE_NAME" }, { 35, "NAME_IS_LOCKED" }, { 36, "MTIME_IS_TOO_RECENT" }, { 37, "MISMATCHING_TARGET" }, { 38, "MISMATCHING_OWNER" }, { 39, "MISMATCHING_CREATION_TIME" }, { 40, "DIRECTORY_NOT_EMPTY" }, { 41, "FILE_IS_TRANSIENT" }, { 42, "OLD_DIRECTORY_NOT_FOUND" }, { 43, "NEW_DIRECTORY_NOT_FOUND" }, { 44, "LOOP_IN_DIRECTORY_RENAME" }, { 45, "DIRECTORY_HAS_OWNER" }, { 46, "FILE_IS_NOT_TRANSIENT" }, { 47, "FILE_NOT_EMPTY" }, { 48, "CANNOT_REMOVE_ROOT_DIRECTORY" }, { 49, "FILE_EMPTY" }, { 50, "CANNOT_REMOVE_DIRTY_SPAN" }, { 51, "BAD_SHARD" }, { 52, "BAD_NAME" }, { 53, "MORE_RECENT_SNAPSHOT_EDGE" }, { 54, "MORE_RECENT_CURRENT_EDGE" }, { 55, "BAD_DIRECTORY_INFO" }, { 56, "DEADLINE_NOT_PASSED" }, { 57, "SAME_SOURCE_AND_DESTINATION" }, { 58, "SAME_DIRECTORIES" }, { 59, "SAME_SHARD" }, { 60, "BAD_PROTOCOL_VERSION" }, { 61, "BAD_CERTIFICATE" }, { 62, "BLOCK_TOO_RECENT_FOR_DELETION" }, { 63, "BLOCK_FETCH_OUT_OF_BOUNDS" }, { 64, "BAD_BLOCK_CRC" }, { 65, "BLOCK_TOO_BIG" }, { 66, "BLOCK_NOT_FOUND" }, { 67, "CANNOT_UNSET_DECOMMISSIONED" }, { 68, "CANNOT_REGISTER_DECOMMISSIONED" }, { 69, "BLOCK_TOO_OLD_FOR_WRITE" }, { 70, "BLOCK_IO_ERROR_DEVICE" }, { 71, "BLOCK_IO_ERROR_FILE" }, { 72, "INVALID_REPLICA" }, { 73, "DIFFERENT_ADDRS_INFO" })
#define __print_eggsfs_err(i) __print_symbolic(i, { 10, "INTERNAL_ERROR" }, { 11, "FATAL_ERROR" }, { 12, "TIMEOUT" }, { 13, "MALFORMED_REQUEST" }, { 14, "MALFORMED_RESPONSE" }, { 15, "NOT_AUTHORISED" }, { 16, "UNRECOGNIZED_REQUEST" }, { 17, "FILE_NOT_FOUND" }, { 18, "DIRECTORY_NOT_FOUND" }, { 19, "NAME_NOT_FOUND" }, { 20, "EDGE_NOT_FOUND" }, { 21, "EDGE_IS_LOCKED" }, { 22, "TYPE_IS_DIRECTORY" }, { 23, "TYPE_IS_NOT_DIRECTORY" }, { 24, "BAD_COOKIE" }, { 25, "INCONSISTENT_STORAGE_CLASS_PARITY" }, { 26, "LAST_SPAN_STATE_NOT_CLEAN" }, { 27, "COULD_NOT_PICK_BLOCK_SERVICES" }, { 28, "BAD_SPAN_BODY" }, { 29, "SPAN_NOT_FOUND" }, { 30, "BLOCK_SERVICE_NOT_FOUND" }, { 31, "CANNOT_CERTIFY_BLOCKLESS_SPAN" }, { 32, "BAD_NUMBER_OF_BLOCKS_PROOFS" }, { 33, "BAD_BLOCK_PROOF" }, { 34, "CANNOT_OVERRIDE_NAME" }, { 35, "NAME_IS_LOCKED" }, { 36, "MTIME_IS_TOO_RECENT" }, { 37, "MISMATCHING_TARGET" }, { 38, "MISMATCHING_OWNER" }, { 39, "MISMATCHING_CREATION_TIME" }, { 40, "DIRECTORY_NOT_EMPTY" }, { 41, "FILE_IS_TRANSIENT" }, { 42, "OLD_DIRECTORY_NOT_FOUND" }, { 43, "NEW_DIRECTORY_NOT_FOUND" }, { 44, "LOOP_IN_DIRECTORY_RENAME" }, { 45, "DIRECTORY_HAS_OWNER" }, { 46, "FILE_IS_NOT_TRANSIENT" }, { 47, "FILE_NOT_EMPTY" }, { 48, "CANNOT_REMOVE_ROOT_DIRECTORY" }, { 49, "FILE_EMPTY" }, { 50, "CANNOT_REMOVE_DIRTY_SPAN" }, { 51, "BAD_SHARD" }, { 52, "BAD_NAME" }, { 53, "MORE_RECENT_SNAPSHOT_EDGE" }, { 54, "MORE_RECENT_CURRENT_EDGE" }, { 55, "BAD_DIRECTORY_INFO" }, { 56, "DEADLINE_NOT_PASSED" }, { 57, "SAME_SOURCE_AND_DESTINATION" }, { 58, "SAME_DIRECTORIES" }, { 59, "SAME_SHARD" }, { 60, "BAD_PROTOCOL_VERSION" }, { 61, "BAD_CERTIFICATE" }, { 62, "BLOCK_TOO_RECENT_FOR_DELETION" }, { 63, "BLOCK_FETCH_OUT_OF_BOUNDS" }, { 64, "BAD_BLOCK_CRC" }, { 65, "BLOCK_TOO_BIG" }, { 66, "BLOCK_NOT_FOUND" }, { 67, "CANNOT_UNSET_DECOMMISSIONED" }, { 68, "CANNOT_REGISTER_DECOMMISSIONED" }, { 69, "BLOCK_TOO_OLD_FOR_WRITE" }, { 70, "BLOCK_IO_ERROR_DEVICE" }, { 71, "BLOCK_IO_ERROR_FILE" }, { 72, "INVALID_REPLICA" }, { 73, "DIFFERENT_ADDRS_INFO" }, { 74, "LEADER_PREEMPTED" }, { 75, "LOG_ENTRY_MISSING" }, { 76, "LOG_ENTRY_TRIMMED" }, { 77, "LOG_ENTRY_UNRELEASED" }, { 78, "LOG_ENTRY_RELEASED" })
const char* eggsfs_err_str(int err);
#define EGGSFS_SHARD_LOOKUP 0x1