diff --git a/kmod/Makefile b/kmod/Makefile index 476d1b45..d313f2c7 100644 --- a/kmod/Makefile +++ b/kmod/Makefile @@ -34,8 +34,7 @@ ternfs-client-objs += \ span.o \ bincode.o \ revision.o \ - policy.o \ - block_services.o + policy.o EXTRA_CFLAGS = -I$(src) -g -DDEBUG -fdiagnostics-color=always -Wno-declaration-after-statement @@ -60,7 +59,7 @@ ternfs-client-tests: revision.c extra-files ternfs-client-clean: $(MAKE) -C $(KDIR) M=$(PWD) clean - rm -f *.o *.ko + rm -f *.o *.ko bincode_tests: bincode_tests.c bincodegen.h bincode.h diff --git a/kmod/block.h b/kmod/block.h index 5dde4007..b72ac3f7 100644 --- a/kmod/block.h +++ b/kmod/block.h @@ -20,7 +20,6 @@ extern int ternfs_fetch_block_timeout_jiffies; extern int ternfs_write_block_timeout_jiffies; extern int ternfs_block_service_connect_timeout_jiffies; -#define TERNFS_BLOCK_SERVICE_EXPECTED_PADDING 3 struct ternfs_block_service { u64 id; u32 ip1; @@ -28,14 +27,8 @@ struct ternfs_block_service { u16 port1; u16 port2; u8 flags; - u8 _[TERNFS_BLOCK_SERVICE_EXPECTED_PADDING]; }; -static inline bool ternfs_block_services_equal(struct ternfs_block_service* l, struct ternfs_block_service* r) { - BUILD_BUG_ON(sizeof(struct ternfs_block_service) != offsetof(struct ternfs_block_service, _) + TERNFS_BLOCK_SERVICE_EXPECTED_PADDING); - return memcmp(l, r, offsetof(struct ternfs_block_service, _)) == 0; -} - // Returns an error immediately if it can't connect to the block service or anyway // if it thinks the block service is no good. // diff --git a/kmod/block_services.c b/kmod/block_services.c deleted file mode 100644 index efd39bd1..00000000 --- a/kmod/block_services.c +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright 2025 XTX Markets Technologies Limited -// -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include -#include -#include - -#include "block_services.h" -#include "trace.h" - -struct ternfs_stored_block_service { - struct hlist_node hnode; - spinlock_t lock; - u64 id; - struct ternfs_block_service __rcu* bs; -}; - -#define BS_BITS 14 -#define BS_BUCKETS (1<id == bs_id)) { - return bs; - } - } - return NULL; -} - -struct ternfs_stored_block_service* ternfs_upsert_block_service(struct ternfs_block_service* bs) { - // It is currently not possible to remove block services from hash map. - // Furthermore returned block service nodes are stored for arbitrary time period so this can't - // really work with RCU. If we add support for removing block services this code needs rethinking. - struct ternfs_stored_block_service* bs_node = find_block_service(bs->id); - if (likely(bs_node != NULL)) { - // We found one, check if we need to update - rcu_read_lock(); - { - struct ternfs_block_service* existing_bs = rcu_dereference(bs_node->bs); - if (ternfs_block_services_equal(existing_bs, bs)) { // still the same, no update needed - rcu_read_unlock(); - trace_eggsfs_upsert_block_service(bs->id, TERNFS_UPSERT_BLOCKSERVICE_MATCH); - return bs_node; - } - } - rcu_read_unlock(); - trace_eggsfs_upsert_block_service(bs->id, TERNFS_UPSERT_BLOCKSERVICE_NOMATCH); - // Things differ, we do need to update - struct ternfs_block_service* new_bs = kmalloc(sizeof(struct ternfs_block_service), GFP_KERNEL); - if (new_bs == NULL) { - return ERR_PTR(-ENOMEM); - } - memcpy(new_bs, bs, sizeof(*bs)); - - // Swap the pointers - spin_lock(&bs_node->lock); - struct ternfs_block_service* old_bs = rcu_dereference_protected(bs_node->bs, lockdep_is_held(&bs_node->lock)); - rcu_assign_pointer(bs_node->bs, new_bs); - spin_unlock(&bs_node->lock); - - // TODO: switch to call_rcu - synchronize_rcu(); - kfree(old_bs); - return bs_node; - } - - trace_eggsfs_upsert_block_service(bs->id, TERNFS_UPSERT_BLOCKSERVICE_NEW); - - // We need to add a new one. Allocate both struct and body - struct ternfs_stored_block_service* new_bs_node = kmalloc(sizeof(struct ternfs_stored_block_service), GFP_KERNEL); - if (new_bs_node == NULL) { - return ERR_PTR(-ENOMEM); - } - - struct ternfs_block_service* new_bs = kmalloc(sizeof(struct ternfs_block_service), GFP_KERNEL); - if (new_bs == NULL) { - kfree(new_bs_node); - return ERR_PTR(-ENOMEM); - } - memcpy(new_bs, bs, sizeof(*bs)); - rcu_assign_pointer(new_bs_node->bs, new_bs); - - new_bs_node->id = bs->id; - spin_lock_init(&new_bs_node->lock); - - // Hashing not strictly needed, the block service ids are already - // random... - int bucket = hash_min(bs->id, HASH_BITS(block_services)); - spin_lock(&block_services_locks[bucket]); - // Check if somebody got to it first - bs_node = find_block_service(bs->id); - if (unlikely(bs_node != NULL)) { - // Let's not bother updating to our thing in this racy case - spin_unlock(&block_services_locks[bucket]); - kfree(new_bs); - kfree(new_bs_node); - return bs_node; - } - // Add it - hlist_add_head_rcu(&new_bs_node->hnode, &block_services[bucket]); - spin_unlock(&block_services_locks[bucket]); - - return new_bs_node; -} - -void ternfs_get_block_service(struct ternfs_stored_block_service* bs_node, struct ternfs_block_service* out_bs) { - rcu_read_lock(); - struct ternfs_block_service* bs = rcu_dereference(bs_node->bs); - memcpy(out_bs, bs, sizeof(*bs)); - rcu_read_unlock(); -} - -int ternfs_block_service_init(void) { - int i; - for (i = 0; i < BS_BUCKETS; i++) { - spin_lock_init(&block_services_locks[i]); - } - return 0; -} - -void ternfs_block_service_exit(void) { - int bucket; - struct hlist_node* tmp; - struct ternfs_stored_block_service* bs; - // While this pattern is not safe in general, at this point everything should be unmounted - // and nothing should be accessing block services anyway - rcu_read_lock(); - hash_for_each_safe(block_services, bucket, tmp, bs, hnode) { - kfree(rcu_dereference(bs->bs)); - kfree(bs); - } - rcu_read_unlock(); -} diff --git a/kmod/block_services.h b/kmod/block_services.h deleted file mode 100644 index d2f471c3..00000000 --- a/kmod/block_services.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2025 XTX Markets Technologies Limited -// -// SPDX-License-Identifier: GPL-2.0-or-later - -// This cache is never cleared -- but it is small. For 100k disks, which is -// what we're targeting, it'd be ~5MB. -#ifndef _TERNFS_BLOCK_SERVICE_H -#define _TERNFS_BLOCK_SERVICE_H - -#include "block.h" - -struct ternfs_stored_block_service; - -// Creates or updates a specific block service. Very fast unless the block service is -// unseen so far, which should be rare. -struct ternfs_stored_block_service* ternfs_upsert_block_service(struct ternfs_block_service* bs); - -// Gets block service. -void ternfs_get_block_service(struct ternfs_stored_block_service* bs_node, struct ternfs_block_service* bs); - -int __init ternfs_block_service_init(void); -void __cold ternfs_block_service_exit(void); - -#endif diff --git a/kmod/kmod.c b/kmod/kmod.c index a2112d13..34210190 100644 --- a/kmod/kmod.c +++ b/kmod/kmod.c @@ -23,7 +23,6 @@ #include "file.h" #include "debugfs.h" #include "policy.h" -#include "block_services.h" MODULE_LICENSE("GPL"); @@ -53,9 +52,6 @@ static int __init ternfs_init(void) { err = ternfs_policy_init(); if (err) { goto out_policy; } - err = ternfs_block_service_init(); - if (err) { goto out_block_service; } - err = ternfs_sysfs_init(); if (err) { goto out_sysfs; } @@ -102,8 +98,6 @@ out_block: out_sysctl: ternfs_sysfs_exit(); out_sysfs: - ternfs_block_service_exit(); -out_block_service: ternfs_policy_exit(); out_policy: ternfs_rs_exit(); @@ -124,7 +118,6 @@ static void __exit ternfs_exit(void) { ternfs_block_exit(); ternfs_sysctl_exit(); ternfs_sysfs_exit(); - ternfs_block_service_exit(); ternfs_policy_exit(); ternfs_rs_exit(); diff --git a/kmod/span.c b/kmod/span.c index b9a0792f..807d5483 100644 --- a/kmod/span.c +++ b/kmod/span.c @@ -22,9 +22,8 @@ #include "trace.h" #include "intrshims.h" #include "sysctl.h" -#include "block_services.h" -int ternfs_span_cache_retention_jiffies = 24 * 60 * 60 * HZ; // 1 day +int ternfs_span_cache_retention_jiffies = 10 * 60 * HZ; // 10 minutes static struct kmem_cache* ternfs_block_span_cachep; static struct kmem_cache* ternfs_inline_span_cachep; @@ -375,16 +374,13 @@ static int fetch_span_blocks(struct fetch_span_pages_state* st) { } } - //fetch_stripe_trace(st, TERNFS_FETCH_STRIPE_BLOCK_START, i, 0); hold_fetch_span_pages(st); ternfs_debug("block start st=%p block_service=%016llx block_id=%016llx", st, block->id, block->id); - struct ternfs_block_service bs; - ternfs_get_block_service(block->bs, &bs); // Fetches a single cell from the block int block_err = ternfs_fetch_block_pages_with_crc( &span_block_done, (void*)st, - &bs, &st->blocks_pages[i], span->span.ino, block->id, block->crc, st->start_offset, st->size + &block->bs, &st->blocks_pages[i], span->span.ino, block->id, block->crc, st->start_offset, st->size ); if (block_err) { BUG_ON(list_empty(&st->blocks_pages[i])); @@ -906,19 +902,13 @@ static void file_spans_cb_block( block->id = block_id; block->crc = crc; - // Populate bs cache - struct ternfs_block_service bs; - bs.id = bs_id; - bs.ip1 = ip1; - bs.port1 = port1; - bs.ip2 = ip2; - bs.port2 = port2; - bs.flags = flags; - block->bs = ternfs_upsert_block_service(&bs); - if (IS_ERR(block->bs)) { - ctx->err = PTR_ERR(block->bs); - return; - } + struct ternfs_block_service* bs = &block->bs; + bs->id = bs_id; + bs->ip1 = ip1; + bs->port1 = port1; + bs->ip2 = ip2; + bs->port2 = port2; + bs->flags = flags; } static void file_spans_cb_inline_span(void* data, u64 offset, u32 size, u8 len, const char* body) { diff --git a/kmod/span.h b/kmod/span.h index 81d223a4..fe82fb4d 100644 --- a/kmod/span.h +++ b/kmod/span.h @@ -59,7 +59,7 @@ struct ternfs_inline_span { }) struct ternfs_block { - struct ternfs_stored_block_service* bs; + struct ternfs_block_service bs; u64 id; u32 crc; }; diff --git a/kmod/super.c b/kmod/super.c index 45a0a446..f723b933 100644 --- a/kmod/super.c +++ b/kmod/super.c @@ -12,7 +12,6 @@ #include #include -#include "block_services.h" #include "log.h" #include "inode.h" #include "export.h" @@ -171,92 +170,6 @@ static int ternfs_refresh_fs_info(struct ternfs_fs_info* info) { atomic64_set(&info->cdc_addr2, ternfs_mk_addr(cdc_ip2.x, cdc_port2.x)); } } - - { - { - char changed_block_services_req[TERNFS_REGISTRY_REQ_HEADER_SIZE + TERNFS_LOCAL_CHANGED_BLOCK_SERVICES_REQ_SIZE]; - struct ternfs_bincode_put_ctx ctx = { - .start = changed_block_services_req + TERNFS_REGISTRY_REQ_HEADER_SIZE, - .cursor = changed_block_services_req + TERNFS_REGISTRY_REQ_HEADER_SIZE, - .end = changed_block_services_req + sizeof(changed_block_services_req), - }; - ternfs_local_changed_block_services_req_put_start(&ctx, start); - ternfs_local_changed_block_services_req_put_changed_since(&ctx, start, changed_since, info->block_services_last_changed_time); - ternfs_local_changed_block_services_req_put_end(&ctx, changed_since, end); - ternfs_write_registry_req_header(changed_block_services_req, TERNFS_LOCAL_CHANGED_BLOCK_SERVICES_REQ_SIZE, TERNFS_REGISTRY_LOCAL_CHANGED_BLOCK_SERVICES); - if (err = sendloop(registry_sock, changed_block_services_req, sizeof changed_block_services_req), err < 0) goto out_sock; - } - u32 registry_resp_len; - u8 registry_resp_kind; - { - char block_services_resp_header[TERNFS_REGISTRY_RESP_HEADER_SIZE]; - if (err = recvloop(registry_sock, block_services_resp_header, sizeof block_services_resp_header), err < 0) goto out_sock; - err = ternfs_read_registry_resp_header(block_services_resp_header, ®istry_resp_len, ®istry_resp_kind); - if (err < 0) { goto out_sock; } - } - u64 last_changed; - u16 block_services_len; - { - char last_changed_and_len[sizeof(last_changed) + sizeof(block_services_len)]; - if (registry_resp_len < sizeof(last_changed_and_len)) { - ternfs_debug("expected size of at least %ld for BlockServicesWithFlagChangeResp, got %d", sizeof(last_changed_and_len), registry_resp_len); - err = -EINVAL; - goto out_sock; - } - if (err = recvloop(registry_sock, last_changed_and_len, sizeof last_changed_and_len), err < 0) goto out_sock; - last_changed = get_unaligned_le64(last_changed_and_len); - block_services_len = get_unaligned_le16(last_changed_and_len + sizeof(last_changed)); - registry_resp_len -= sizeof(last_changed_and_len); - } - { - if (registry_resp_len != TERNFS_BLOCK_SERVICE_SIZE * block_services_len) { - ternfs_debug("expected size of at least %d for %d BlockServices in BlockServicesWithFlagChangeResp, got %d", - TERNFS_BLOCK_SERVICE_SIZE * block_services_len, block_services_len, registry_resp_len); - err = -EINVAL; - goto out_sock; - } - u16 block_service_idx; - for (block_service_idx = 0; block_service_idx < block_services_len; block_service_idx++) { - char block_service_buf[TERNFS_BLOCK_SERVICE_SIZE]; - if (err = recvloop(registry_sock, block_service_buf, sizeof block_service_buf), err < 0) goto out_sock; - struct ternfs_bincode_get_ctx bs_ctx = { - .buf = block_service_buf, - .end = block_service_buf + sizeof(block_service_buf), - .err = 0, - }; - ternfs_block_service_get_start(&bs_ctx, start); - ternfs_block_service_get_addrs(&bs_ctx, start, addr_start); - ternfs_addrs_info_get_addr1(&bs_ctx, addr_start, ipport1_start); - ternfs_ip_port_get_addrs(&bs_ctx, ipport1_start, ip1); - ternfs_ip_port_get_port(&bs_ctx, ip1, port1); - ternfs_ip_port_get_end(&bs_ctx, port1, ipport1_end); - ternfs_addrs_info_get_addr2(&bs_ctx, ipport1_end, ipport2_start); - ternfs_ip_port_get_addrs(&bs_ctx, ipport2_start, ip2); - ternfs_ip_port_get_port(&bs_ctx, ip2, port2); - ternfs_ip_port_get_end(&bs_ctx, port2, ipport2_end); - ternfs_addrs_info_get_end(&bs_ctx, ipport2_end, addr_end); - ternfs_block_service_get_id(&bs_ctx, addr_end, bs_id); - ternfs_block_service_get_flags(&bs_ctx, bs_id, bs_flags); - ternfs_block_service_get_end(&bs_ctx, bs_flags, end); - ternfs_block_service_get_finish(&bs_ctx, end); - if (bs_ctx.err != 0) { err = ternfs_error_to_linux(bs_ctx.err); goto out_sock; } - - struct ternfs_block_service bs; - bs.id = bs_id.x; - bs.ip1 = ip1.x; - bs.port1 = port1.x; - bs.ip2 = ip2.x; - bs.port2 = port2.x; - bs.flags = bs_flags.x; - struct ternfs_stored_block_service* sbs = ternfs_upsert_block_service(&bs); - if (IS_ERR(sbs)) { - err = PTR_ERR(sbs); - goto out_sock; - } - } - } - info->block_services_last_changed_time = last_changed; - } { static_assert(TERNFS_INFO_REQ_SIZE == 0); char info_req[TERNFS_REGISTRY_REQ_HEADER_SIZE]; @@ -409,12 +322,6 @@ static struct ternfs_fs_info* ternfs_init_fs_info(struct net* net, const char* d err = ternfs_init_shard_socket(&ternfs_info->sock); if (err) { goto out_addr; } - // for the first update we will ask for everything that changed in last day. - // this is more than enough time for any older changed to be visible to shards and propagated through block info - u64 atime_ns = ktime_get_real_ns(); - atime_ns -= min(atime_ns, 86400000000000ull); - ternfs_info->block_services_last_changed_time = atime_ns; - err = ternfs_refresh_fs_info(ternfs_info); if (err != 0) { goto out_socket; } diff --git a/kmod/super.h b/kmod/super.h index 8bca39db..29ac4d9f 100644 --- a/kmod/super.h +++ b/kmod/super.h @@ -28,8 +28,6 @@ struct ternfs_fs_info { atomic64_t capacity; atomic64_t available; - u64 block_services_last_changed_time; - struct delayed_work registry_refresh_work; kuid_t uid;