mirror of
https://github.com/XTXMarkets/ternfs.git
synced 2025-12-16 16:26:47 -06:00
cdc: Fix various RenameDirectory issues
RenameDirectory state machine was not handling target not found correctly. This would have caused asserts (which result in crashes in production builds) There was also a bug in the rollback logic which would have caused a lingering lock on the source link. While breaking assumptions this was a benign bug as any operation on that directory would try and succeed acquiring this lock again. It would succeed as lock requests are idempotent.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -19,3 +19,4 @@ linux-*.tar.gz
|
||||
tern-integrationtest.*
|
||||
.go-cache
|
||||
ternfs-client*
|
||||
_codeql_detected_source_root
|
||||
|
||||
@@ -1012,13 +1012,13 @@ struct RenameDirectoryStateMachine {
|
||||
|
||||
// Check that changing this parent-child relationship wouldn't create
|
||||
// loops in directory structure.
|
||||
bool loopCheck() {
|
||||
TernError loopCheck() const {
|
||||
std::unordered_set<InodeId> visited;
|
||||
InodeId cursor = req.targetId;
|
||||
for (;;) {
|
||||
if (visited.count(cursor) > 0) {
|
||||
LOG_INFO(env.env, "Re-encountered %s in loop check, will return false", cursor);
|
||||
return false;
|
||||
return TernError::LOOP_IN_DIRECTORY_RENAME;
|
||||
}
|
||||
LOG_DEBUG(env.env, "Performing loop check for %s", cursor);
|
||||
visited.insert(cursor);
|
||||
@@ -1027,14 +1027,18 @@ struct RenameDirectoryStateMachine {
|
||||
} else {
|
||||
auto k = InodeIdKey::Static(cursor);
|
||||
std::string v;
|
||||
ROCKS_DB_CHECKED(env.dbTxn.Get({}, env.parentCf, k.toSlice(), &v));
|
||||
auto status = env.dbTxn.Get({}, env.parentCf, k.toSlice(), &v);
|
||||
if (status == rocksdb::Status::NotFound()) {
|
||||
return TernError::DIRECTORY_NOT_FOUND;
|
||||
}
|
||||
ROCKS_DB_CHECKED(status);
|
||||
cursor = ExternalValue<InodeIdValue>(v)().id();
|
||||
}
|
||||
if (cursor == ROOT_DIR_INODE_ID) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return TernError::NO_ERROR;
|
||||
}
|
||||
|
||||
void start() {
|
||||
@@ -1042,9 +1046,9 @@ struct RenameDirectoryStateMachine {
|
||||
env.finishWithError(TernError::TYPE_IS_NOT_DIRECTORY);
|
||||
} else if (req.oldOwnerId == req.newOwnerId) {
|
||||
env.finishWithError(TernError::SAME_DIRECTORIES);
|
||||
} else if (!loopCheck()) {
|
||||
} else if (auto err = loopCheck(); err != TernError::NO_ERROR) {
|
||||
// First, check if we'd create a loop
|
||||
env.finishWithError(TernError::LOOP_IN_DIRECTORY_RENAME);
|
||||
env.finishWithError(err);
|
||||
} else {
|
||||
// Now, actually start by locking the old edge
|
||||
lockOldEdge();
|
||||
@@ -1077,7 +1081,7 @@ struct RenameDirectoryStateMachine {
|
||||
}
|
||||
|
||||
void lookupOldCreationTime(bool repeated = false) {
|
||||
auto& shardReq = env.needsShard(RENAME_FILE_LOOKUP_OLD_CREATION_TIME, req.newOwnerId.shard(), repeated).setFullReadDir();
|
||||
auto& shardReq = env.needsShard(RENAME_DIRECTORY_LOOKUP_OLD_CREATION_TIME, req.newOwnerId.shard(), repeated).setFullReadDir();
|
||||
shardReq.dirId = req.newOwnerId;
|
||||
shardReq.flags = FULL_READ_DIR_BACKWARDS | FULL_READ_DIR_SAME_NAME | FULL_READ_DIR_CURRENT;
|
||||
shardReq.limit = 1;
|
||||
@@ -1097,7 +1101,7 @@ struct RenameDirectoryStateMachine {
|
||||
ALWAYS_ASSERT(err == TernError::NO_ERROR);
|
||||
// there might be no existing edge
|
||||
const auto& fullReadDir = resp.getFullReadDir();
|
||||
ALWAYS_ASSERT(fullReadDir.results.els.size() < 2); // we have limit=1
|
||||
ALWAYS_ASSERT(fullReadDir.results.els.size() < 2); // we have current and same name
|
||||
if (fullReadDir.results.els.size() == 0) {
|
||||
state.setNewOldCreationTime(0); // there was nothing present for this name
|
||||
} else {
|
||||
@@ -1123,7 +1127,7 @@ struct RenameDirectoryStateMachine {
|
||||
} else if (err == TernError::MISMATCHING_CREATION_TIME) {
|
||||
// we need to lookup the creation time again.
|
||||
lookupOldCreationTime();
|
||||
} else if (err == TernError::CANNOT_OVERRIDE_NAME) {
|
||||
} else if (err == TernError::CANNOT_OVERRIDE_NAME || err == TernError::DIRECTORY_NOT_FOUND) {
|
||||
state.setExitError(err);
|
||||
rollback();
|
||||
} else {
|
||||
@@ -1210,7 +1214,7 @@ struct RenameDirectoryStateMachine {
|
||||
shardReq.name = req.oldName;
|
||||
shardReq.targetId = req.targetId;
|
||||
shardReq.wasMoved = false;
|
||||
shardReq.creationTime = state.newCreationTime();
|
||||
shardReq.creationTime = req.oldCreationTime;
|
||||
}
|
||||
|
||||
void afterRollback(const ShardRespContainer& resp) {
|
||||
@@ -1750,7 +1754,7 @@ struct CDCDBImpl {
|
||||
// we've finished doing other stuff
|
||||
std::vector<CDCTxnId>& txnIds
|
||||
) {
|
||||
LOG_DEBUG(_env, "advancing txn %s of kind %s with state", txnId, req.kind());
|
||||
LOG_DEBUG(_env, "advancing txn %s with state %s", txnId, state());
|
||||
StateMachineEnv sm(_env, _defaultCf, _parentCf, dbTxn, txnId, state().step(), step);
|
||||
switch (req.kind()) {
|
||||
case CDCMessageKind::MAKE_DIRECTORY:
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include <bit>
|
||||
#include <cstddef>
|
||||
#include <ostream>
|
||||
#include <rocksdb/slice.h>
|
||||
|
||||
#include "Assert.hpp"
|
||||
@@ -127,6 +128,15 @@ struct MakeDirectoryState {
|
||||
}
|
||||
};
|
||||
|
||||
static inline std::ostream& operator<<(std::ostream& out, const MakeDirectoryState& x) {
|
||||
out << "MakeDirectoryState(dirId=" << x.dirId()
|
||||
<< ", oldCreationTime=" << x.oldCreationTime()
|
||||
<< ", creationTime=" << x.creationTime()
|
||||
<< ", exitError=" << x.exitError()
|
||||
<< ")";
|
||||
return out;
|
||||
}
|
||||
|
||||
struct RenameFileState {
|
||||
FIELDS(
|
||||
LE, TernTime, newOldCreationTime, setNewOldCreationTime,
|
||||
@@ -142,6 +152,14 @@ struct RenameFileState {
|
||||
}
|
||||
};
|
||||
|
||||
static inline std::ostream& operator<<(std::ostream& out, const RenameFileState& x) {
|
||||
out << "RenameFileState(newOldCreationTime=" << x.newOldCreationTime()
|
||||
<< ", newCreationTime=" << x.newCreationTime()
|
||||
<< ", exitError=" << x.exitError()
|
||||
<< ")";
|
||||
return out;
|
||||
}
|
||||
|
||||
struct SoftUnlinkDirectoryState {
|
||||
FIELDS(
|
||||
LE, InodeId, statDirId, setStatDirId,
|
||||
@@ -154,6 +172,13 @@ struct SoftUnlinkDirectoryState {
|
||||
}
|
||||
};
|
||||
|
||||
static inline std::ostream& operator<<(std::ostream& out, const SoftUnlinkDirectoryState& x) {
|
||||
out << "SoftUnlinkDirectoryState(statDirId=" << x.statDirId()
|
||||
<< ", exitError=" << x.exitError()
|
||||
<< ")";
|
||||
return out;
|
||||
}
|
||||
|
||||
struct RenameDirectoryState {
|
||||
FIELDS(
|
||||
LE, TernTime, newOldCreationTime, setNewOldCreationTime,
|
||||
@@ -169,16 +194,34 @@ struct RenameDirectoryState {
|
||||
}
|
||||
};
|
||||
|
||||
static inline std::ostream& operator<<(std::ostream& out, const RenameDirectoryState& x) {
|
||||
out << "RenameDirectoryState(newOldCreationTime=" << x.newOldCreationTime()
|
||||
<< ", newCreationTime=" << x.newCreationTime()
|
||||
<< ", exitError=" << x.exitError()
|
||||
<< ")";
|
||||
return out;
|
||||
}
|
||||
|
||||
struct HardUnlinkDirectoryState {
|
||||
FIELDS(END_STATIC)
|
||||
void start() {}
|
||||
};
|
||||
|
||||
static inline std::ostream& operator<<(std::ostream& out, const HardUnlinkDirectoryState& x) {
|
||||
out << "HardUnlinkDirectoryState()";
|
||||
return out;
|
||||
}
|
||||
|
||||
struct CrossShardHardUnlinkFileState {
|
||||
FIELDS(END_STATIC)
|
||||
void start() {}
|
||||
};
|
||||
|
||||
static inline std::ostream& operator<<(std::ostream& out, const CrossShardHardUnlinkFileState& _) {
|
||||
out << "CrossShardHardUnlinkFileState()";
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename Type, typename ...Types>
|
||||
constexpr size_t maxMaxSize() {
|
||||
if constexpr (sizeof...(Types) > 0) {
|
||||
@@ -226,7 +269,7 @@ struct TxnState {
|
||||
}
|
||||
|
||||
#define TXN_STATE(kind, type, getName, startName) \
|
||||
type getName() { \
|
||||
type getName() const { \
|
||||
ALWAYS_ASSERT(reqKind() == CDCMessageKind::kind); \
|
||||
type v; \
|
||||
v._data = _data + MIN_SIZE; \
|
||||
@@ -265,3 +308,31 @@ struct TxnState {
|
||||
memset(_data+MIN_SIZE, 0, size()-MIN_SIZE);
|
||||
}
|
||||
};
|
||||
|
||||
static inline std::ostream& operator<<(std::ostream& out, const TxnState& x) {
|
||||
out << "TxnState(reqKind=" << x.reqKind() << ", step=" << (uint32_t)x.step();
|
||||
switch (x.reqKind()) {
|
||||
case CDCMessageKind::MAKE_DIRECTORY:
|
||||
out << ", " << x.getMakeDirectory();
|
||||
break;
|
||||
case CDCMessageKind::RENAME_FILE:
|
||||
out << ", " << x.getRenameFile();
|
||||
break;
|
||||
case CDCMessageKind::SOFT_UNLINK_DIRECTORY:
|
||||
out << ", " << x.getSoftUnlinkDirectory();
|
||||
break;
|
||||
case CDCMessageKind::RENAME_DIRECTORY:
|
||||
out << ", " << x.getRenameDirectory();
|
||||
break;
|
||||
case CDCMessageKind::HARD_UNLINK_DIRECTORY:
|
||||
out << ", " << x.getHardUnlinkDirectory();
|
||||
break;
|
||||
case CDCMessageKind::CROSS_SHARD_HARD_UNLINK_FILE:
|
||||
out << ", " << x.getCrossShardHardUnlinkFile();
|
||||
break;
|
||||
default:
|
||||
throw TERN_EXCEPTION("bad cdc message kind %s", x.reqKind());
|
||||
}
|
||||
out << ")";
|
||||
return out;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user