From ca5debd8e77c74e079852c6f261cccbb0bd56920 Mon Sep 17 00:00:00 2001 From: Francesco Mazzoli Date: Thu, 6 Jul 2023 12:01:57 +0000 Subject: [PATCH] Configurable timeouts --- kmod/bincode_tests.c | 6 ++-- kmod/dir.c | 5 ++-- kmod/dir.h | 2 +- kmod/file.c | 2 ++ kmod/file.h | 2 ++ kmod/inode.c | 2 +- kmod/net.c | 35 ++++++++++++++-------- kmod/net.h | 7 +++++ kmod/rs.c | 3 ++ kmod/rs.h | 2 ++ kmod/sysctl.c | 70 +++++++++++++++++++++++++++----------------- kmod/sysctl.h | 8 ----- 12 files changed, 89 insertions(+), 55 deletions(-) diff --git a/kmod/bincode_tests.c b/kmod/bincode_tests.c index 0cdd9ead..e3a666b7 100644 --- a/kmod/bincode_tests.c +++ b/kmod/bincode_tests.c @@ -13,8 +13,6 @@ typedef uint64_t u64; #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) -#define UDP_MTU 1472 - #define BUG_ON(x) \ if (unlikely(x)) { \ fprintf(stderr, "bug: " #x); \ @@ -22,6 +20,8 @@ typedef uint64_t u64; exit(1); \ } +#define EGGSFS_UDP_MTU 1472 + static inline u64 get_unaligned_le64(const void* p) { u64 x; memcpy(&x, p, sizeof(x)); @@ -103,7 +103,7 @@ int main(void) { } { - char read_dir_resp[UDP_MTU]; + char read_dir_resp[EGGSFS_UDP_MTU]; struct eggsfs_bincode_put_ctx put_ctx = { .start = read_dir_resp, .cursor = read_dir_resp, diff --git a/kmod/dir.c b/kmod/dir.c index 101caf48..e18d0901 100644 --- a/kmod/dir.c +++ b/kmod/dir.c @@ -9,9 +9,10 @@ #include "metadata.h" #include "trace.h" +#define MSECS_TO_JIFFIES(_ms) ((_ms * HZ) / 1000) + // sysctls -int eggsfs_dir_refresh_time; // in jiffies - // +int eggsfs_dir_refresh_time_jiffies = MSECS_TO_JIFFIES(250); #define eggsfs_dir_get_page_n(_page) ({ *((((u32*)&(_page)->private))+1); }) #define eggsfs_dir_set_page_n(_page, _n) ({ *((((u32*)&(_page)->private))+1) = _n; }) diff --git a/kmod/dir.h b/kmod/dir.h index 045b50c0..4b833ded 100644 --- a/kmod/dir.h +++ b/kmod/dir.h @@ -8,7 +8,7 @@ extern struct file_operations eggsfs_dir_operations; -extern int eggsfs_dir_refresh_time; +extern int eggsfs_dir_refresh_time_jiffies; int eggsfs_dir_readdir_entry_cb(void* ptr, const char* name, int name_len, u64 hash, u64 edge_creation_time, u64 ino); diff --git a/kmod/file.c b/kmod/file.c index 7abf6139..9261c335 100644 --- a/kmod/file.c +++ b/kmod/file.c @@ -17,6 +17,8 @@ #include "wq.h" #include "bincode.h" +unsigned eggsfs_max_write_span_attempts = 5; + static struct kmem_cache* eggsfs_transient_span_cachep; struct eggsfs_transient_span { diff --git a/kmod/file.h b/kmod/file.h index 7b0765da..6c1dd6f5 100644 --- a/kmod/file.h +++ b/kmod/file.h @@ -3,6 +3,8 @@ #include "inode.h" +extern unsigned eggsfs_max_write_span_attempts; + ssize_t eggsfs_file_write(struct eggsfs_inode* enode, int flags, loff_t* ppos, struct iov_iter* from); int eggsfs_file_flush(struct eggsfs_inode* enode, struct dentry* dentry); diff --git a/kmod/inode.c b/kmod/inode.c index 4b9ffa22..91ab5bc0 100644 --- a/kmod/inode.c +++ b/kmod/inode.c @@ -109,7 +109,7 @@ again: // progress: whoever wins the lock won't try again if (target_stripe_size) { enode->target_stripe_size = target_stripe_size; } - expiry = ts + eggsfs_dir_refresh_time; + expiry = ts + eggsfs_dir_refresh_time_jiffies; } } else { u64 size; diff --git a/kmod/net.c b/kmod/net.c index 8a068da7..579102ec 100644 --- a/kmod/net.c +++ b/kmod/net.c @@ -6,6 +6,15 @@ #include "trace.h" #include "err.h" +#define MSECS_TO_JIFFIES(_ms) ((_ms * HZ) / 1000) + +unsigned eggsfs_initial_shard_timeout_jiffies = MSECS_TO_JIFFIES(100); +unsigned eggsfs_max_shard_timeout_jiffies = MSECS_TO_JIFFIES(2000); +unsigned eggsfs_overall_shard_timeout_jiffies = MSECS_TO_JIFFIES(10000); +unsigned eggsfs_initial_cdc_timeout_jiffies = MSECS_TO_JIFFIES(500); +unsigned eggsfs_max_cdc_timeout_jiffies = MSECS_TO_JIFFIES(2000); +unsigned eggsfs_overall_cdc_timeout_jiffies = MSECS_TO_JIFFIES(10000); + static struct eggsfs_shard_request* get_shard_request(struct eggsfs_shard_socket* s, u64 request_id) __must_hold(s->lock) { struct rb_node* node = s->requests.rb_node; while (node) { @@ -26,7 +35,7 @@ static void sock_readable(struct sock* sk) { read_lock_bh(&sk->sk_callback_lock); s = (struct eggsfs_shard_socket*)sk->sk_user_data; BUG_ON(!s); - while (1) { + for (;;) { skb = skb_recv_udp(sk, 0, 1, &err); if (!skb) { read_unlock_bh(&sk->sk_callback_lock); @@ -193,15 +202,15 @@ struct sk_buff* eggsfs_metadata_request( vec.iov_base = p; vec.iov_len = len; - unsigned timeout = shard_id < 0 ? eggsfs_initial_cdc_timeout_ms : eggsfs_initial_shard_timeout_ms; - unsigned max_timeout = shard_id < 0 ? eggsfs_max_cdc_timeout_ms : eggsfs_max_shard_timeout_ms; - unsigned overall_timeout = shard_id < 0 ? eggsfs_overall_cdc_timeout_ms : eggsfs_overall_shard_timeout_ms; - u64 start_t_ms = jiffies64_to_msecs(get_jiffies_64()); - u64 elapsed_ms = 0; + unsigned timeout = shard_id < 0 ? eggsfs_initial_cdc_timeout_jiffies : eggsfs_initial_shard_timeout_jiffies; + unsigned max_timeout = shard_id < 0 ? eggsfs_max_cdc_timeout_jiffies : eggsfs_max_shard_timeout_jiffies; + unsigned overall_timeout = shard_id < 0 ? eggsfs_overall_cdc_timeout_jiffies : eggsfs_overall_shard_timeout_jiffies; + u64 start_t = get_jiffies_64(); + u64 elapsed = 0; #define LOG_STR "req_id=%llu shard_id=%d kind_str=%s kind=%d addr=%pI4:%d attempts=%d elapsed=%llums" -#define LOG_ARGS req_id, shard_id, kind_str, kind, &addr->sin_addr, ntohs(addr->sin_port), *attempts, elapsed_ms -#define WARN_LATE if (elapsed_ms > 1000) { eggsfs_warn("late request: " LOG_STR, LOG_ARGS); } +#define LOG_ARGS req_id, shard_id, kind_str, kind, &addr->sin_addr, ntohs(addr->sin_port), *attempts, jiffies64_to_msecs(elapsed) +#define WARN_LATE if (elapsed > MSECS_TO_JIFFIES(1000)) { eggsfs_warn("late request: " LOG_STR, LOG_ARGS); } for (;;) { trace_eggsfs_metadata_request(msg, req_id, len, shard_id, kind, *attempts, 0, EGGSFS_METADATA_REQUEST_ATTEMPT, 0); // which socket? @@ -222,8 +231,8 @@ struct sk_buff* eggsfs_metadata_request( } err = wait_for_request(sock, &req, timeout); - u64 t_ms = jiffies64_to_msecs(get_jiffies_64()); - elapsed_ms = t_ms - start_t_ms; + u64 t = get_jiffies_64(); + elapsed = t - start_t; (*attempts)++; timeout = min(max_timeout, (timeout * 3) / 2); // 1.5 exponential backoff if (!err) { @@ -246,9 +255,9 @@ struct sk_buff* eggsfs_metadata_request( } eggsfs_debug("err=%d", err); - if (err != -ETIMEDOUT || elapsed_ms >= overall_timeout) { + if (err != -ETIMEDOUT || elapsed >= overall_timeout) { if (err != -ERESTARTSYS) { - eggsfs_info("giving up (might be too much time passed): " LOG_STR " overall_timeout=%ums err=%d", LOG_ARGS, overall_timeout, err); + eggsfs_info("giving up (might be that too much time passed): " LOG_STR " overall_timeout=%ums err=%d", LOG_ARGS, overall_timeout, err); } goto out_err; } @@ -261,7 +270,7 @@ out_unregister: BUG_ON(req.skb); rb_erase(&req.node, &sock->requests); spin_unlock_bh(&sock->lock); - elapsed_ms = jiffies64_to_msecs(get_jiffies_64()) - start_t_ms; + elapsed = get_jiffies_64() - start_t; out_err: WARN_LATE diff --git a/kmod/net.h b/kmod/net.h index cb27c452..afad88af 100644 --- a/kmod/net.h +++ b/kmod/net.h @@ -8,6 +8,13 @@ #define EGGSFS_UDP_MTU 1472 +extern unsigned eggsfs_initial_shard_timeout_jiffies; +extern unsigned eggsfs_max_shard_timeout_jiffies; +extern unsigned eggsfs_overall_shard_timeout_jiffies; +extern unsigned eggsfs_initial_cdc_timeout_jiffies; +extern unsigned eggsfs_max_cdc_timeout_jiffies; +extern unsigned eggsfs_overall_cdc_timeout_jiffies; + struct eggsfs_shard_socket { struct socket* sock; struct rb_root requests; diff --git a/kmod/rs.c b/kmod/rs.c index 359258ff..296531c6 100644 --- a/kmod/rs.c +++ b/kmod/rs.c @@ -17,6 +17,9 @@ enum rs_cpu_level { RS_CPU_GFNI = 3, }; +int eggsfs_rs_cpu_level_min = 1; +int eggsfs_rs_cpu_level_max = 3; + static inline bool rs_detect_valgrind(void) { return false; } diff --git a/kmod/rs.h b/kmod/rs.h index 30b6e95c..29ef1715 100644 --- a/kmod/rs.h +++ b/kmod/rs.h @@ -8,6 +8,8 @@ #define EGGSFS_MAX_BLOCKS (EGGSFS_MAX_DATA+EGGSFS_MAX_PARITY) extern int eggsfs_rs_cpu_level; +extern int eggsfs_rs_cpu_level_min; +extern int eggsfs_rs_cpu_level_max; static inline u8 eggsfs_data_blocks(u8 parity) { return parity & 0x0F; diff --git a/kmod/sysctl.c b/kmod/sysctl.c index 19e6ed0f..c0ce507a 100644 --- a/kmod/sysctl.c +++ b/kmod/sysctl.c @@ -4,17 +4,11 @@ #include "span.h" #include "log.h" #include "sysctl.h" +#include "net.h" +#include "file.h" int eggsfs_debug_output = 0; int eggsfs_prefetch = 1; -unsigned eggsfs_max_write_span_attempts = 5; -unsigned eggsfs_initial_shard_timeout_ms = 100; -unsigned eggsfs_max_shard_timeout_ms = 2000; -unsigned eggsfs_overall_shard_timeout_ms = 10000; -unsigned eggsfs_initial_cdc_timeout_ms = 500; -unsigned eggsfs_max_cdc_timeout_ms = 2000; -unsigned eggsfs_overall_cdc_timeout_ms = 10000; -extern int eggsfs_rs_cpu_level; static int drop_cached_spans; @@ -39,12 +33,35 @@ static int eggsfs_drop_spans_sysctl(struct ctl_table* table, int write, void __u .proc_handler = proc_doulongvec_minmax, \ } -#define EGGSFS_CTL_INT_TIME(_name) \ +#define EGGSFS_CTL_UINT(_name) \ { \ - .procname = #_name "_ms", \ + .procname = #_name, \ .data = &eggsfs_##_name, \ .maxlen = sizeof(eggsfs_##_name), \ .mode = 0644, \ + .proc_handler = proc_douintvec, \ + } + +static int bool_off = 0; +static int bool_on = 1; + +#define EGGSFS_CTL_BOOL(_name) \ + { \ + .procname = #_name, \ + .data = &eggsfs_##_name, \ + .maxlen = sizeof(eggsfs_##_name), \ + .mode = 0644, \ + .proc_handler = proc_dointvec_minmax, \ + .extra1 = &bool_off, \ + .extra2 = &bool_on, \ + } + +#define EGGSFS_CTL_INT_JIFFIES(_name) \ + { \ + .procname = #_name "_ms", \ + .data = &eggsfs_##_name##_jiffies, \ + .maxlen = sizeof(eggsfs_##_name##_jiffies), \ + .mode = 0644, \ .proc_handler = proc_dointvec_ms_jiffies, \ } @@ -54,7 +71,9 @@ static struct ctl_table eggsfs_cb_sysctls[] = { .data = &eggsfs_debug_output, .maxlen = sizeof(eggsfs_debug_output), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &bool_off, + .extra2 = &bool_on, }, { @@ -62,7 +81,9 @@ static struct ctl_table eggsfs_cb_sysctls[] = { .data = &eggsfs_rs_cpu_level, .maxlen = sizeof(eggsfs_rs_cpu_level), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &eggsfs_rs_cpu_level_min, + .extra2 = &eggsfs_rs_cpu_level_max, }, { @@ -73,23 +94,18 @@ static struct ctl_table eggsfs_cb_sysctls[] = { .proc_handler = eggsfs_drop_spans_sysctl, }, - { - .procname = "prefetch", - .data = &eggsfs_prefetch, - .maxlen = sizeof(eggsfs_prefetch), - .mode = 0644, - .proc_handler = proc_dointvec, - }, + EGGSFS_CTL_BOOL(prefetch), - { - .procname = "max_write_span_attempts", - .data = &eggsfs_max_write_span_attempts, - .maxlen = sizeof(eggsfs_max_write_span_attempts), - .mode = 0644, - .proc_handler = proc_douintvec, - }, + EGGSFS_CTL_INT_JIFFIES(dir_refresh_time), - EGGSFS_CTL_INT_TIME(dir_refresh_time), + EGGSFS_CTL_INT_JIFFIES(initial_shard_timeout), + EGGSFS_CTL_INT_JIFFIES(max_shard_timeout), + EGGSFS_CTL_INT_JIFFIES(overall_shard_timeout), + EGGSFS_CTL_INT_JIFFIES(initial_cdc_timeout), + EGGSFS_CTL_INT_JIFFIES(max_cdc_timeout), + EGGSFS_CTL_INT_JIFFIES(overall_cdc_timeout), + + EGGSFS_CTL_UINT(max_write_span_attempts), EGGSFS_CTL_ULONG(span_cache_max_size_async), EGGSFS_CTL_ULONG(span_cache_min_avail_mem_async), diff --git a/kmod/sysctl.h b/kmod/sysctl.h index 3ded92d5..cadb09a0 100644 --- a/kmod/sysctl.h +++ b/kmod/sysctl.h @@ -5,14 +5,6 @@ extern int eggsfs_prefetch; extern int eggsfs_debug_output; -extern unsigned eggsfs_max_write_span_attempts; -extern unsigned eggsfs_max_write_span_attempts; -extern unsigned eggsfs_initial_shard_timeout_ms; -extern unsigned eggsfs_max_shard_timeout_ms; -extern unsigned eggsfs_overall_shard_timeout_ms; -extern unsigned eggsfs_initial_cdc_timeout_ms; -extern unsigned eggsfs_max_cdc_timeout_ms; -extern unsigned eggsfs_overall_cdc_timeout_ms; int __init eggsfs_sysctl_init(void); void __cold eggsfs_sysctl_exit(void);