mirror of
https://github.com/XTXMarkets/ternfs.git
synced 2025-12-19 17:50:44 -06:00
451 lines
15 KiB
C
451 lines
15 KiB
C
// Copyright 2025 XTX Markets Technologies Limited
|
|
//
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include "dentry.h"
|
|
|
|
#include "bincode.h"
|
|
#include "inode_compat.h"
|
|
#include "log.h"
|
|
#include "dir.h"
|
|
#include "err.h"
|
|
#include "inode.h"
|
|
#include "metadata.h"
|
|
#include "policy.h"
|
|
#include "trace.h"
|
|
|
|
TERNFS_DEFINE_COUNTER(ternfs_stat_dir_revalidations);
|
|
|
|
static inline int ternfs_dir_needs_reval(struct ternfs_inode* dir, struct dentry* dentry, bool in_rcu) {
|
|
// 0 => invalid
|
|
// -ECHILD => revalidate parent dir
|
|
// 1 => valid
|
|
u64 dentry_dir_mtime = dentry->d_time;
|
|
int ret;
|
|
u64 t = get_jiffies_64();
|
|
if (t >= dir->dir.mtime_expiry) { ret = -ECHILD; }
|
|
else if (dentry_dir_mtime != dir->mtime) {
|
|
struct inode* inode;
|
|
if (in_rcu) {
|
|
inode = d_inode_rcu(dentry);
|
|
} else {
|
|
inode = dentry->d_inode;
|
|
}
|
|
// We are fine invalidating negative or dentries for non-directories
|
|
// as this is not expensive
|
|
if (inode == NULL || !S_ISDIR(inode->i_mode)) { ret = 0; }
|
|
// we want to confirm we are still valid but we need to do lookup
|
|
// so we need to drop into non-rcu mode
|
|
else if (in_rcu) { ret = -ECHILD; }
|
|
else {
|
|
u64 ino, creation_time;
|
|
if (ternfs_shard_lookup(
|
|
inode->i_sb->s_fs_info, dir->inode.i_ino,
|
|
dentry->d_name.name, dentry->d_name.len,
|
|
&ino, &creation_time) < 0)
|
|
{
|
|
ret = 0;
|
|
} else {
|
|
if (inode->i_ino != ino) {
|
|
ret = 0;
|
|
} else {
|
|
dentry->d_time = dir->mtime;
|
|
ret = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else { ret = 1; }
|
|
|
|
ternfs_debug(
|
|
"t=%llu dir=%p dir_id=0x%016lx dir_mtime=%llu dir_mtime_expiry=%llu dentry=%pd dentry_dir_mtime=%llu -> ret=%d",
|
|
t, dir, dir->inode.i_ino, dir->mtime, dir->dir.mtime_expiry, dentry, dentry_dir_mtime, ret
|
|
);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int ternfs_d_revalidate(struct dentry* dentry, unsigned int flags) {
|
|
struct dentry* parent;
|
|
struct inode* dir;
|
|
int ret;
|
|
|
|
ternfs_debug("dentry=%pd flags=%x[rcu=%d]", dentry, flags, !!(flags & LOOKUP_RCU));
|
|
|
|
if (IS_ROOT(dentry)) { return 1; }
|
|
|
|
if (dentry->d_name.len > TERNFS_MAX_FILENAME) {
|
|
return -ENAMETOOLONG;
|
|
}
|
|
|
|
if (flags & LOOKUP_RCU) {
|
|
parent = READ_ONCE(dentry->d_parent);
|
|
dir = d_inode_rcu(parent);
|
|
if (!dir) { return -ECHILD; }
|
|
// we are either asked for validiti of our root dentry or we are bound mounted
|
|
// either way we can't return invalid
|
|
if (parent->d_sb != dentry->d_sb) { return 1; }
|
|
ret = ternfs_dir_needs_reval(TERNFS_I(dir), dentry, true);
|
|
if (parent != READ_ONCE(dentry->d_parent)) { return -ECHILD; }
|
|
return ret;
|
|
} else {
|
|
parent = dget_parent(dentry);
|
|
dir = d_inode(parent);
|
|
ret = ternfs_dir_needs_reval(TERNFS_I(dir), dentry, false);
|
|
if (ret == -ECHILD) {
|
|
ternfs_counter_inc(ternfs_stat_dir_revalidations);
|
|
ret = ternfs_dir_revalidate(TERNFS_I(dir));
|
|
if (ret) { goto out; }
|
|
ret = ternfs_dir_needs_reval(TERNFS_I(dir), dentry, false);
|
|
if (ret == -ECHILD) { ret = 1; }
|
|
}
|
|
out:
|
|
dput(parent);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
struct dentry_operations ternfs_dentry_ops = {
|
|
.d_revalidate = ternfs_d_revalidate,
|
|
};
|
|
|
|
|
|
static void ternfs_dcache_handle_error(struct inode* dir, struct dentry* dentry, int err) {
|
|
ternfs_debug("err=%d", err);
|
|
switch (err) {
|
|
case TERNFS_ERR_CANNOT_OVERRIDE_NAME:
|
|
WARN_ON(dentry->d_inode);
|
|
trace_eggsfs_dcache_invalidate_neg_entry(dir, dentry);
|
|
WRITE_ONCE(TERNFS_I(dir)->dir.mtime_expiry, 0);
|
|
d_invalidate(dentry);
|
|
break;
|
|
// TODO is this correct? verify, I've changed the code from an earlier error
|
|
// which does not exist anymore.
|
|
case TERNFS_ERR_DIRECTORY_NOT_FOUND:
|
|
WARN_ON(dentry->d_inode);
|
|
trace_eggsfs_dcache_delete_inode(dir);
|
|
WRITE_ONCE(TERNFS_I(dir)->dir.mtime_expiry, 0);
|
|
clear_nlink(dir);
|
|
d_invalidate(dentry->d_parent);
|
|
break;
|
|
case TERNFS_ERR_DIRECTORY_NOT_EMPTY:
|
|
WARN_ON(!dentry->d_inode);
|
|
if (dentry->d_inode) {
|
|
trace_eggsfs_dcache_invalidate_dir(dentry->d_inode);
|
|
WRITE_ONCE(TERNFS_I(dentry->d_inode)->dir.mtime_expiry, 0);
|
|
}
|
|
break;
|
|
case TERNFS_ERR_NAME_NOT_FOUND:
|
|
WARN_ON(!dentry->d_inode);
|
|
if (dentry->d_inode) {
|
|
trace_eggsfs_dcache_delete_entry(dir, dentry, dentry->d_inode);
|
|
WRITE_ONCE(TERNFS_I(dentry->d_inode)->dir.mtime_expiry, 0);
|
|
clear_nlink(dentry->d_inode);
|
|
d_invalidate(dentry);
|
|
}
|
|
break;
|
|
case TERNFS_ERR_MISMATCHING_TARGET:
|
|
case TERNFS_ERR_MISMATCHING_CREATION_TIME:
|
|
WARN_ON(!dentry->d_inode);
|
|
if (dentry->d_inode) {
|
|
trace_eggsfs_dcache_invalidate_entry(dir, dentry, dentry->d_inode);
|
|
WRITE_ONCE(TERNFS_I(dentry->d_inode)->dir.mtime_expiry, 0);
|
|
d_invalidate(dentry);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// vfs: shared dir.i_rwsem
|
|
struct dentry* ternfs_lookup(struct inode* dir, struct dentry* dentry, unsigned int flags) {
|
|
ternfs_debug("dir=0x%016lx, name=%*pE", dir->i_ino, dentry->d_name.len, dentry->d_name.name);
|
|
int err;
|
|
|
|
trace_eggsfs_vfs_lookup_enter(dir, dentry);
|
|
|
|
if (dentry->d_name.len > TERNFS_MAX_FILENAME) {
|
|
err = -ENAMETOOLONG;
|
|
goto out_err;
|
|
}
|
|
|
|
u64 ino, creation_time;
|
|
// need to keep the tern error around for the TERNFS_ERR_NAME_NOT_FOUND
|
|
err = ternfs_shard_lookup(
|
|
dir->i_sb->s_fs_info, dir->i_ino, dentry->d_name.name, dentry->d_name.len, &ino, &creation_time
|
|
);
|
|
if (unlikely(err < 0)) {
|
|
goto out_err;
|
|
}
|
|
if (err) { // not unlikely since vfs will do lookups for entries likely to not exist
|
|
if (err == TERNFS_ERR_NAME_NOT_FOUND) {
|
|
dentry->d_time = TERNFS_I(dir)->mtime;
|
|
trace_eggsfs_vfs_lookup_exit(dir, dentry, NULL, 0);
|
|
return d_splice_alias(NULL, dentry);
|
|
}
|
|
err = ternfs_error_to_linux(err);
|
|
goto out_err;
|
|
}
|
|
|
|
struct inode* inode = ternfs_get_inode_normal(dentry->d_sb, TERNFS_I(dir), ino);
|
|
if (IS_ERR(inode)) {
|
|
err = PTR_ERR(inode);
|
|
goto out_err;
|
|
}
|
|
inode->i_ino = ino;
|
|
TERNFS_I(inode)->edge_creation_time = creation_time;
|
|
if (S_ISDIR(inode->i_mode)) {
|
|
// We want to fetch mtime and policies for directory
|
|
// if they were not fetched before.
|
|
// We can't fetch them asynchronously as this lookup call
|
|
// might be part of a lookup chain and will be followed by immediate
|
|
// child lookup.
|
|
// We need to have mtime and policies populated when we are doing a
|
|
// child lookup as it will inherit the parent policies immediately and use
|
|
// mtime in subsequent d_invalidate calls to determine if dentry is
|
|
// valid or not.
|
|
ternfs_do_getattr(TERNFS_I(inode), ATTR_CACHE_DIR_TIMEOUT);
|
|
}
|
|
|
|
dentry->d_time = TERNFS_I(dir)->mtime;
|
|
|
|
trace_eggsfs_vfs_lookup_exit(dir, dentry, inode, 0);
|
|
return d_splice_alias(inode, dentry);
|
|
|
|
out_err:
|
|
trace_eggsfs_vfs_lookup_exit(dir, dentry, NULL, err);
|
|
return ERR_PTR(err);
|
|
}
|
|
|
|
// vfs: exclusive dir.i_rwsem, lockref dentry
|
|
int COMPAT_FUNC_UNS_IMP(ternfs_mkdir, struct inode* dir, struct dentry* dentry, umode_t mode) {
|
|
int err;
|
|
|
|
struct ternfs_inode* dir_enode = TERNFS_I(dir);
|
|
|
|
trace_eggsfs_vfs_mkdir_enter(dir, dentry);
|
|
BUG_ON(dentry->d_inode);
|
|
|
|
if (dentry->d_name.len > TERNFS_MAX_FILENAME) {
|
|
err = -ENAMETOOLONG;
|
|
goto out_err;
|
|
}
|
|
|
|
u64 ino, creation_time;
|
|
err = ternfs_cdc_mkdir(
|
|
dir->i_sb->s_fs_info,
|
|
dir->i_ino,
|
|
dentry->d_name.name,
|
|
dentry->d_name.len,
|
|
&ino,
|
|
&creation_time
|
|
);
|
|
if (unlikely(err)) {
|
|
if (err < 0) { goto out_err; }
|
|
ternfs_dcache_handle_error(dir, dentry, err);
|
|
err = ternfs_error_to_linux(err);
|
|
goto out_err;
|
|
}
|
|
ternfs_dir_drop_cache(TERNFS_I(dir));
|
|
|
|
struct inode* inode = ternfs_get_inode_normal(dentry->d_sb, TERNFS_I(dir), ino);
|
|
if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_err; }
|
|
struct ternfs_inode* enode = TERNFS_I(inode);
|
|
enode->edge_creation_time = creation_time;
|
|
dentry->d_time = dir_enode->mtime;
|
|
d_instantiate(dentry, inode);
|
|
|
|
trace_eggsfs_vfs_mkdir_exit(dir, dentry, inode, 0);
|
|
return 0;
|
|
|
|
out_err:
|
|
trace_eggsfs_vfs_mkdir_exit(dir, dentry, NULL, err);
|
|
return err;
|
|
}
|
|
|
|
// vfs: exclusive dir,d_entry->d_inode.i_rwsem
|
|
int ternfs_rmdir(struct inode* dir, struct dentry* dentry) {
|
|
int err;
|
|
|
|
trace_eggsfs_vfs_rmdir_enter(dir, dentry, dentry->d_inode);
|
|
BUG_ON(!dentry->d_inode);
|
|
|
|
if (unlikely(dentry->d_name.len > TERNFS_MAX_FILENAME)) {
|
|
pr_err("ternfs: ternfs_rmdir dir=%lu dentry=%pd exceed max filename length %d\n", dir->i_ino, dentry, TERNFS_MAX_FILENAME);
|
|
err = -ENAMETOOLONG;
|
|
goto out_err;
|
|
}
|
|
|
|
err = ternfs_cdc_rmdir((struct ternfs_fs_info*)dir->i_sb->s_fs_info, dir->i_ino, dentry->d_inode->i_ino, TERNFS_I(dentry->d_inode)->edge_creation_time, dentry->d_name.name, dentry->d_name.len);
|
|
if (unlikely(err)) {
|
|
if (err < 0) { goto out_err; }
|
|
ternfs_dcache_handle_error(dir, dentry, err);
|
|
err = ternfs_error_to_linux(err);
|
|
goto out_err;
|
|
} else {
|
|
ternfs_dir_drop_cache(TERNFS_I(dir));
|
|
clear_nlink(dentry->d_inode);
|
|
}
|
|
|
|
out_err:
|
|
trace_eggsfs_vfs_rmdir_exit(dir, dentry, err);
|
|
return err;
|
|
}
|
|
|
|
#define POLICY_DELETE_AFTER_TIME_ACTIVE_AND_ZERO (1ull << 63)
|
|
#define POLICY_DELETE_AFTER_VERSIONS_ACTIVE_AND_ZERO (1 << 15)
|
|
|
|
static int ternfs_delete_file(
|
|
struct ternfs_fs_info* info, u64 dir, u64 file, const char* name, int name_len, u64 creation_time,
|
|
struct ternfs_policy* snapshot_policy
|
|
) {
|
|
int err;
|
|
|
|
u64 delete_creation_time;
|
|
err = ternfs_shard_soft_unlink_file(
|
|
info,
|
|
dir,
|
|
file,
|
|
name,
|
|
name_len,
|
|
creation_time,
|
|
&delete_creation_time
|
|
);
|
|
struct ternfs_policy_body snapshot_policy_body;
|
|
ternfs_get_policy_body(snapshot_policy, &snapshot_policy_body);
|
|
struct ternfs_snapshot_policy policy;
|
|
ternfs_snapshot_policy_get(&snapshot_policy_body, &policy);
|
|
if (likely(policy.delete_after_time != POLICY_DELETE_AFTER_TIME_ACTIVE_AND_ZERO &&
|
|
policy.delete_after_versions != POLICY_DELETE_AFTER_VERSIONS_ACTIVE_AND_ZERO)) {
|
|
goto out;
|
|
}
|
|
|
|
if (unlikely(err)) {
|
|
goto out;
|
|
}
|
|
|
|
if (unlikely(ternfs_inode_shard(dir) != ternfs_inode_shard(file))) {
|
|
err = ternfs_cdc_cross_shard_hard_unlink_file(
|
|
info,
|
|
dir,
|
|
file,
|
|
name,
|
|
name_len,
|
|
creation_time
|
|
);
|
|
} else {
|
|
err = ternfs_shard_hard_unlink_file(
|
|
info,
|
|
dir,
|
|
file,
|
|
name,
|
|
name_len,
|
|
creation_time
|
|
);
|
|
}
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
// vfs: exclusive dir,d_entry->d_inode.i_rwsem
|
|
int ternfs_unlink(struct inode* dir, struct dentry* dentry) {
|
|
int err;
|
|
|
|
trace_eggsfs_vfs_unlink_enter(dir, dentry, dentry->d_inode);
|
|
BUG_ON(!dentry->d_inode);
|
|
|
|
if (unlikely(dentry->d_name.len > TERNFS_MAX_FILENAME)) {
|
|
pr_err("ternfs: ternfs_unlink dir=%lu dentry=%pd exceed max filename length %d\n", dir->i_ino, dentry, TERNFS_MAX_FILENAME);
|
|
err = -ENAMETOOLONG;
|
|
goto out_err;
|
|
}
|
|
|
|
struct ternfs_inode* enode = TERNFS_I(dentry->d_inode);
|
|
err = ternfs_delete_file(
|
|
(struct ternfs_fs_info*)dir->i_sb->s_fs_info,
|
|
dir->i_ino,
|
|
dentry->d_inode->i_ino,
|
|
dentry->d_name.name,
|
|
dentry->d_name.len,
|
|
enode->edge_creation_time,
|
|
enode->snapshot_policy
|
|
);
|
|
if (unlikely(err)) {
|
|
if (err < 0) { goto out_err; }
|
|
ternfs_dcache_handle_error(dir, dentry, err);
|
|
err = ternfs_error_to_linux(err);
|
|
goto out_err;
|
|
} else {
|
|
ternfs_dir_drop_cache(TERNFS_I(dir));
|
|
// no hard links, inode is gone
|
|
clear_nlink(dentry->d_inode);
|
|
}
|
|
|
|
out_err:
|
|
trace_eggsfs_vfs_unlink_exit(dir, dentry, err);
|
|
return err;
|
|
}
|
|
|
|
// vfs: exclusive i_rwsem for all
|
|
int COMPAT_FUNC_UNS_IMP(ternfs_rename, struct inode* old_dir, struct dentry* old_dentry, struct inode* new_dir, struct dentry* new_dentry, unsigned int flags) {
|
|
int err;
|
|
|
|
trace_eggsfs_vfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
|
|
|
|
if (!old_dentry->d_inode) { // TODO can this ever happen?
|
|
ternfs_warn("got no inodes for old dentry!");
|
|
return -EIO;
|
|
}
|
|
struct ternfs_inode* enode = TERNFS_I(old_dentry->d_inode);
|
|
u64 ino = enode->inode.i_ino;
|
|
|
|
if (flags) { err = -EINVAL; goto out; }
|
|
|
|
u64 old_creation_time = enode->edge_creation_time;
|
|
u64 new_creation_time;
|
|
struct ternfs_fs_info* info = old_dir->i_sb->s_fs_info;
|
|
if (old_dir == new_dir) {
|
|
err = ternfs_error_to_linux(ternfs_shard_rename(
|
|
info,
|
|
old_dir->i_ino,
|
|
ino, old_dentry->d_name.name, old_dentry->d_name.len,
|
|
old_creation_time,
|
|
new_dentry->d_name.name, new_dentry->d_name.len,
|
|
&new_creation_time
|
|
));
|
|
} else if (S_ISDIR(old_dentry->d_inode->i_mode)) {
|
|
err = ternfs_error_to_linux(ternfs_cdc_rename_directory(
|
|
info,
|
|
ino,
|
|
old_dir->i_ino, new_dir->i_ino,
|
|
old_dentry->d_name.name, old_dentry->d_name.len, old_creation_time,
|
|
new_dentry->d_name.name, new_dentry->d_name.len,
|
|
&new_creation_time
|
|
));
|
|
} else {
|
|
err = ternfs_error_to_linux(ternfs_cdc_rename_file(
|
|
info,
|
|
ino,
|
|
old_dir->i_ino, new_dir->i_ino,
|
|
old_dentry->d_name.name, old_dentry->d_name.len, old_creation_time,
|
|
new_dentry->d_name.name, new_dentry->d_name.len,
|
|
&new_creation_time
|
|
));
|
|
}
|
|
// TODO do we need to drop the dentries in case of error? The old code
|
|
// thought so, but I'm not sure.
|
|
if (!err) {
|
|
old_dentry->d_time = new_creation_time; // used by d_revalidate
|
|
enode->edge_creation_time = new_creation_time;
|
|
struct ternfs_inode* new_edir = TERNFS_I(new_dir);
|
|
enode->block_policy = new_edir->block_policy;
|
|
enode->span_policy = new_edir->span_policy;
|
|
enode->stripe_policy = new_edir->stripe_policy;
|
|
d_move(old_dentry, new_dentry);
|
|
}
|
|
|
|
out:
|
|
trace_eggsfs_vfs_rename_exit(old_dir, old_dentry, new_dir, new_dentry, err);
|
|
ternfs_debug("err=%d", err);
|
|
return err;
|
|
}
|