mirror of
https://github.com/XTXMarkets/ternfs.git
synced 2025-12-20 10:10:17 -06:00
189 lines
6.2 KiB
C
189 lines
6.2 KiB
C
// Copyright 2025 XTX Markets Technologies Limited
|
|
//
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#ifndef _TERNFS_INODE_H
|
|
#define _TERNFS_INODE_H
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include "bincode.h"
|
|
#include "latch.h"
|
|
#include "log.h"
|
|
#include "net.h"
|
|
#include "policy.h"
|
|
#include "rs.h"
|
|
#include "span.h"
|
|
|
|
|
|
#define TERNFS_ROOT_INODE 0x2000000000000000ull
|
|
|
|
extern unsigned ternfs_disable_ftruncate;
|
|
|
|
struct ternfs_transient_span;
|
|
|
|
#define TERNFS_FILE_STATUS_NONE 0 // we have created the inode, we haven't opened it yet
|
|
#define TERNFS_FILE_STATUS_READING 1 // the file has been linked (we can reopen it at will)
|
|
#define TERNFS_FILE_STATUS_WRITING 2 // the file is transient, we're writing it
|
|
|
|
struct ternfs_inode_file {
|
|
int status;
|
|
|
|
// Normal file stuff
|
|
|
|
struct ternfs_file_spans spans;
|
|
|
|
// Transient file stuff. Only initialized on file creation (rather than opening),
|
|
// otherwise it's garbage.
|
|
// Could be factored out to separate data structure since it's completely useless
|
|
// when reading.
|
|
|
|
u64 cookie;
|
|
// If we've encountered an error such that we want to stop writing to this file
|
|
// forever.
|
|
atomic_t transient_err;
|
|
// Span we're currently writing to. Might be NULL.
|
|
struct ternfs_transient_span* writing_span;
|
|
// Whether we're currently flushing a span (block write + add span certify)
|
|
struct semaphore flushing_span_sema;
|
|
// We use this to track where we should close the file from.
|
|
struct task_struct* owner;
|
|
// We store these one separatedly from `owner` above because when a process exits
|
|
// it frees the mm before it frees the files. And we need the mm to account the
|
|
// MM_FILEPAGES in the file flushing logic.
|
|
struct mm_struct* mm;
|
|
};
|
|
|
|
struct ternfs_dirents {
|
|
// used to know when we need to refresh the dirents
|
|
u64 mtime;
|
|
// used to know when we're done quickly
|
|
u64 max_hash;
|
|
// to GC this structure
|
|
atomic_t refcount;
|
|
// to synchronize between modifications of the reference count
|
|
struct rcu_head rcu_head;
|
|
// the list of pages with the dirents in them. always at least
|
|
// one page in it.
|
|
//
|
|
// we use fields of each page to store stuff:
|
|
// * ->index: number of entries in the page
|
|
// * ->private: first hash appearing in the page
|
|
struct list_head pages;
|
|
};
|
|
|
|
struct ternfs_inode_dir {
|
|
u64 mtime_expiry; // in jiffies
|
|
|
|
// In `struct page`, we use:
|
|
//
|
|
// ->mapping to store the next page
|
|
// ->private to store the number of entries stored in the page (high 32 bits),
|
|
// and a reference count with how many people need the cache (low 32 bits).
|
|
// The reference count is only stored in the "head" page.
|
|
// ->rcu_head to synchronize between modifications to the reference count.
|
|
// ->index in the first page to store the dir mtime we've tagged the dir with
|
|
// (used to invalidate the dir contents).
|
|
struct ternfs_dirents __rcu * dirents;
|
|
struct ternfs_latch dirents_latch;
|
|
};
|
|
|
|
struct ternfs_inode {
|
|
struct inode inode;
|
|
|
|
// We cache things based on the ternfs mtime, but we need to decide when the
|
|
// mtime itself is stale for the purposes of dir lookups, which we do using
|
|
// `mtime_expiry`.
|
|
u64 mtime; // in ternfs time
|
|
|
|
u64 edge_creation_time; // in ternfs time, used for operations (re)moving the edge
|
|
|
|
// These are relevant for directoriese (obviously), but we also use them for transient
|
|
// files when we create them, since we need it in many places.
|
|
struct ternfs_policy* block_policy;
|
|
struct ternfs_policy* span_policy;
|
|
struct ternfs_policy* stripe_policy;
|
|
// We use the snapshot policy to immediately declare file transient if policy
|
|
// requests it. This is to reduce load on gc for high churn directories where we
|
|
// don't care about snapshots.
|
|
struct ternfs_policy* snapshot_policy;
|
|
|
|
union {
|
|
struct ternfs_inode_file file;
|
|
struct ternfs_inode_dir dir;
|
|
};
|
|
|
|
// There is always at most one metadata request in flight for getattr.
|
|
// This is regulated by `getattr_update_latch`. We do getattr in two ways:
|
|
// 1. Synchronously, so we just take the `getattr_update_latch` and
|
|
// do the metadata request normally;
|
|
// 2. Asynchronously, where we take the `getattr_update_latch` and then
|
|
// have `getattr_async_work` complete it, without retries.
|
|
// Due to async nature of completion it may race with init and relase the
|
|
// latch before init completes. To avoid it we use `getattr_update_init_latch`.
|
|
// Method 2 is used when doing speculative getattrs when opening directories.
|
|
u64 getattr_expiry;
|
|
struct ternfs_latch getattr_update_latch;
|
|
struct ternfs_latch getattr_update_init_latch;
|
|
struct ternfs_metadata_request getattr_async_req;
|
|
struct delayed_work getattr_async_work;
|
|
s64 getattr_async_seqno;
|
|
};
|
|
|
|
#define TERNFS_I(ptr) container_of(ptr, struct ternfs_inode, inode)
|
|
|
|
#define TERNFS_INODE_DIRECTORY 1
|
|
#define TERNFS_INODE_FILE 2
|
|
#define TERNFS_INODE_SYMLINK 3
|
|
|
|
static inline u64 ternfs_inode_type(u64 ino) {
|
|
return (ino >> 61) & 0x03;
|
|
}
|
|
|
|
static inline u32 ternfs_inode_shard(u64 ino) {
|
|
return ino & 0xff;
|
|
}
|
|
|
|
struct inode* ternfs_get_inode(
|
|
struct super_block* sb,
|
|
// Are we OK with not having `parent`? This is currently only OK
|
|
// in the context of NFS.
|
|
bool allow_no_parent,
|
|
struct ternfs_inode* parent,
|
|
u64 ino
|
|
);
|
|
|
|
static inline struct inode* ternfs_get_inode_normal(
|
|
struct super_block* sb,
|
|
struct ternfs_inode* parent,
|
|
u64 ino
|
|
) {
|
|
return ternfs_get_inode(sb, false, parent, ino);
|
|
}
|
|
|
|
static inline struct inode* ternfs_get_inode_export(
|
|
struct super_block* sb,
|
|
struct ternfs_inode* parent,
|
|
u64 ino
|
|
) {
|
|
return ternfs_get_inode(sb, true, parent, ino);
|
|
}
|
|
|
|
// super ops
|
|
struct inode* ternfs_inode_alloc(struct super_block* sb);
|
|
void ternfs_inode_evict(struct inode* inode);
|
|
void ternfs_inode_free(struct inode* inode);
|
|
|
|
// inode ops
|
|
enum { ATTR_CACHE_NORM_TIMEOUT, ATTR_CACHE_DIR_TIMEOUT, ATTR_CACHE_NO_TIMEOUT };
|
|
int ternfs_do_getattr(struct ternfs_inode* enode, int cache_timeout_type);
|
|
// 0: not started
|
|
// 1: started
|
|
// -n: error
|
|
int ternfs_start_async_getattr(struct ternfs_inode* enode);
|
|
|
|
int __init ternfs_inode_init(void);
|
|
void __cold ternfs_inode_exit(void);
|
|
|
|
#endif
|