Configurable timeouts

This commit is contained in:
Francesco Mazzoli
2023-07-06 12:01:57 +00:00
committed by Francesco Mazzoli
parent 7954d01b41
commit ca5debd8e7
12 changed files with 89 additions and 55 deletions

View File

@@ -13,8 +13,6 @@ typedef uint64_t u64;
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#define UDP_MTU 1472
#define BUG_ON(x) \
if (unlikely(x)) { \
fprintf(stderr, "bug: " #x); \
@@ -22,6 +20,8 @@ typedef uint64_t u64;
exit(1); \
}
#define EGGSFS_UDP_MTU 1472
static inline u64 get_unaligned_le64(const void* p) {
u64 x;
memcpy(&x, p, sizeof(x));
@@ -103,7 +103,7 @@ int main(void) {
}
{
char read_dir_resp[UDP_MTU];
char read_dir_resp[EGGSFS_UDP_MTU];
struct eggsfs_bincode_put_ctx put_ctx = {
.start = read_dir_resp,
.cursor = read_dir_resp,

View File

@@ -9,9 +9,10 @@
#include "metadata.h"
#include "trace.h"
#define MSECS_TO_JIFFIES(_ms) ((_ms * HZ) / 1000)
// sysctls
int eggsfs_dir_refresh_time; // in jiffies
//
int eggsfs_dir_refresh_time_jiffies = MSECS_TO_JIFFIES(250);
#define eggsfs_dir_get_page_n(_page) ({ *((((u32*)&(_page)->private))+1); })
#define eggsfs_dir_set_page_n(_page, _n) ({ *((((u32*)&(_page)->private))+1) = _n; })

View File

@@ -8,7 +8,7 @@
extern struct file_operations eggsfs_dir_operations;
extern int eggsfs_dir_refresh_time;
extern int eggsfs_dir_refresh_time_jiffies;
int eggsfs_dir_readdir_entry_cb(void* ptr, const char* name, int name_len, u64 hash, u64 edge_creation_time, u64 ino);

View File

@@ -17,6 +17,8 @@
#include "wq.h"
#include "bincode.h"
unsigned eggsfs_max_write_span_attempts = 5;
static struct kmem_cache* eggsfs_transient_span_cachep;
struct eggsfs_transient_span {

View File

@@ -3,6 +3,8 @@
#include "inode.h"
extern unsigned eggsfs_max_write_span_attempts;
ssize_t eggsfs_file_write(struct eggsfs_inode* enode, int flags, loff_t* ppos, struct iov_iter* from);
int eggsfs_file_flush(struct eggsfs_inode* enode, struct dentry* dentry);

View File

@@ -109,7 +109,7 @@ again: // progress: whoever wins the lock won't try again
if (target_stripe_size) {
enode->target_stripe_size = target_stripe_size;
}
expiry = ts + eggsfs_dir_refresh_time;
expiry = ts + eggsfs_dir_refresh_time_jiffies;
}
} else {
u64 size;

View File

@@ -6,6 +6,15 @@
#include "trace.h"
#include "err.h"
#define MSECS_TO_JIFFIES(_ms) ((_ms * HZ) / 1000)
unsigned eggsfs_initial_shard_timeout_jiffies = MSECS_TO_JIFFIES(100);
unsigned eggsfs_max_shard_timeout_jiffies = MSECS_TO_JIFFIES(2000);
unsigned eggsfs_overall_shard_timeout_jiffies = MSECS_TO_JIFFIES(10000);
unsigned eggsfs_initial_cdc_timeout_jiffies = MSECS_TO_JIFFIES(500);
unsigned eggsfs_max_cdc_timeout_jiffies = MSECS_TO_JIFFIES(2000);
unsigned eggsfs_overall_cdc_timeout_jiffies = MSECS_TO_JIFFIES(10000);
static struct eggsfs_shard_request* get_shard_request(struct eggsfs_shard_socket* s, u64 request_id) __must_hold(s->lock) {
struct rb_node* node = s->requests.rb_node;
while (node) {
@@ -26,7 +35,7 @@ static void sock_readable(struct sock* sk) {
read_lock_bh(&sk->sk_callback_lock);
s = (struct eggsfs_shard_socket*)sk->sk_user_data;
BUG_ON(!s);
while (1) {
for (;;) {
skb = skb_recv_udp(sk, 0, 1, &err);
if (!skb) {
read_unlock_bh(&sk->sk_callback_lock);
@@ -193,15 +202,15 @@ struct sk_buff* eggsfs_metadata_request(
vec.iov_base = p;
vec.iov_len = len;
unsigned timeout = shard_id < 0 ? eggsfs_initial_cdc_timeout_ms : eggsfs_initial_shard_timeout_ms;
unsigned max_timeout = shard_id < 0 ? eggsfs_max_cdc_timeout_ms : eggsfs_max_shard_timeout_ms;
unsigned overall_timeout = shard_id < 0 ? eggsfs_overall_cdc_timeout_ms : eggsfs_overall_shard_timeout_ms;
u64 start_t_ms = jiffies64_to_msecs(get_jiffies_64());
u64 elapsed_ms = 0;
unsigned timeout = shard_id < 0 ? eggsfs_initial_cdc_timeout_jiffies : eggsfs_initial_shard_timeout_jiffies;
unsigned max_timeout = shard_id < 0 ? eggsfs_max_cdc_timeout_jiffies : eggsfs_max_shard_timeout_jiffies;
unsigned overall_timeout = shard_id < 0 ? eggsfs_overall_cdc_timeout_jiffies : eggsfs_overall_shard_timeout_jiffies;
u64 start_t = get_jiffies_64();
u64 elapsed = 0;
#define LOG_STR "req_id=%llu shard_id=%d kind_str=%s kind=%d addr=%pI4:%d attempts=%d elapsed=%llums"
#define LOG_ARGS req_id, shard_id, kind_str, kind, &addr->sin_addr, ntohs(addr->sin_port), *attempts, elapsed_ms
#define WARN_LATE if (elapsed_ms > 1000) { eggsfs_warn("late request: " LOG_STR, LOG_ARGS); }
#define LOG_ARGS req_id, shard_id, kind_str, kind, &addr->sin_addr, ntohs(addr->sin_port), *attempts, jiffies64_to_msecs(elapsed)
#define WARN_LATE if (elapsed > MSECS_TO_JIFFIES(1000)) { eggsfs_warn("late request: " LOG_STR, LOG_ARGS); }
for (;;) {
trace_eggsfs_metadata_request(msg, req_id, len, shard_id, kind, *attempts, 0, EGGSFS_METADATA_REQUEST_ATTEMPT, 0); // which socket?
@@ -222,8 +231,8 @@ struct sk_buff* eggsfs_metadata_request(
}
err = wait_for_request(sock, &req, timeout);
u64 t_ms = jiffies64_to_msecs(get_jiffies_64());
elapsed_ms = t_ms - start_t_ms;
u64 t = get_jiffies_64();
elapsed = t - start_t;
(*attempts)++;
timeout = min(max_timeout, (timeout * 3) / 2); // 1.5 exponential backoff
if (!err) {
@@ -246,9 +255,9 @@ struct sk_buff* eggsfs_metadata_request(
}
eggsfs_debug("err=%d", err);
if (err != -ETIMEDOUT || elapsed_ms >= overall_timeout) {
if (err != -ETIMEDOUT || elapsed >= overall_timeout) {
if (err != -ERESTARTSYS) {
eggsfs_info("giving up (might be too much time passed): " LOG_STR " overall_timeout=%ums err=%d", LOG_ARGS, overall_timeout, err);
eggsfs_info("giving up (might be that too much time passed): " LOG_STR " overall_timeout=%ums err=%d", LOG_ARGS, overall_timeout, err);
}
goto out_err;
}
@@ -261,7 +270,7 @@ out_unregister:
BUG_ON(req.skb);
rb_erase(&req.node, &sock->requests);
spin_unlock_bh(&sock->lock);
elapsed_ms = jiffies64_to_msecs(get_jiffies_64()) - start_t_ms;
elapsed = get_jiffies_64() - start_t;
out_err:
WARN_LATE

View File

@@ -8,6 +8,13 @@
#define EGGSFS_UDP_MTU 1472
extern unsigned eggsfs_initial_shard_timeout_jiffies;
extern unsigned eggsfs_max_shard_timeout_jiffies;
extern unsigned eggsfs_overall_shard_timeout_jiffies;
extern unsigned eggsfs_initial_cdc_timeout_jiffies;
extern unsigned eggsfs_max_cdc_timeout_jiffies;
extern unsigned eggsfs_overall_cdc_timeout_jiffies;
struct eggsfs_shard_socket {
struct socket* sock;
struct rb_root requests;

View File

@@ -17,6 +17,9 @@ enum rs_cpu_level {
RS_CPU_GFNI = 3,
};
int eggsfs_rs_cpu_level_min = 1;
int eggsfs_rs_cpu_level_max = 3;
static inline bool rs_detect_valgrind(void) {
return false;
}

View File

@@ -8,6 +8,8 @@
#define EGGSFS_MAX_BLOCKS (EGGSFS_MAX_DATA+EGGSFS_MAX_PARITY)
extern int eggsfs_rs_cpu_level;
extern int eggsfs_rs_cpu_level_min;
extern int eggsfs_rs_cpu_level_max;
static inline u8 eggsfs_data_blocks(u8 parity) {
return parity & 0x0F;

View File

@@ -4,17 +4,11 @@
#include "span.h"
#include "log.h"
#include "sysctl.h"
#include "net.h"
#include "file.h"
int eggsfs_debug_output = 0;
int eggsfs_prefetch = 1;
unsigned eggsfs_max_write_span_attempts = 5;
unsigned eggsfs_initial_shard_timeout_ms = 100;
unsigned eggsfs_max_shard_timeout_ms = 2000;
unsigned eggsfs_overall_shard_timeout_ms = 10000;
unsigned eggsfs_initial_cdc_timeout_ms = 500;
unsigned eggsfs_max_cdc_timeout_ms = 2000;
unsigned eggsfs_overall_cdc_timeout_ms = 10000;
extern int eggsfs_rs_cpu_level;
static int drop_cached_spans;
@@ -39,12 +33,35 @@ static int eggsfs_drop_spans_sysctl(struct ctl_table* table, int write, void __u
.proc_handler = proc_doulongvec_minmax, \
}
#define EGGSFS_CTL_INT_TIME(_name) \
#define EGGSFS_CTL_UINT(_name) \
{ \
.procname = #_name "_ms", \
.procname = #_name, \
.data = &eggsfs_##_name, \
.maxlen = sizeof(eggsfs_##_name), \
.mode = 0644, \
.proc_handler = proc_douintvec, \
}
static int bool_off = 0;
static int bool_on = 1;
#define EGGSFS_CTL_BOOL(_name) \
{ \
.procname = #_name, \
.data = &eggsfs_##_name, \
.maxlen = sizeof(eggsfs_##_name), \
.mode = 0644, \
.proc_handler = proc_dointvec_minmax, \
.extra1 = &bool_off, \
.extra2 = &bool_on, \
}
#define EGGSFS_CTL_INT_JIFFIES(_name) \
{ \
.procname = #_name "_ms", \
.data = &eggsfs_##_name##_jiffies, \
.maxlen = sizeof(eggsfs_##_name##_jiffies), \
.mode = 0644, \
.proc_handler = proc_dointvec_ms_jiffies, \
}
@@ -54,7 +71,9 @@ static struct ctl_table eggsfs_cb_sysctls[] = {
.data = &eggsfs_debug_output,
.maxlen = sizeof(eggsfs_debug_output),
.mode = 0644,
.proc_handler = proc_dointvec,
.proc_handler = proc_dointvec_minmax,
.extra1 = &bool_off,
.extra2 = &bool_on,
},
{
@@ -62,7 +81,9 @@ static struct ctl_table eggsfs_cb_sysctls[] = {
.data = &eggsfs_rs_cpu_level,
.maxlen = sizeof(eggsfs_rs_cpu_level),
.mode = 0644,
.proc_handler = proc_dointvec,
.proc_handler = proc_dointvec_minmax,
.extra1 = &eggsfs_rs_cpu_level_min,
.extra2 = &eggsfs_rs_cpu_level_max,
},
{
@@ -73,23 +94,18 @@ static struct ctl_table eggsfs_cb_sysctls[] = {
.proc_handler = eggsfs_drop_spans_sysctl,
},
{
.procname = "prefetch",
.data = &eggsfs_prefetch,
.maxlen = sizeof(eggsfs_prefetch),
.mode = 0644,
.proc_handler = proc_dointvec,
},
EGGSFS_CTL_BOOL(prefetch),
{
.procname = "max_write_span_attempts",
.data = &eggsfs_max_write_span_attempts,
.maxlen = sizeof(eggsfs_max_write_span_attempts),
.mode = 0644,
.proc_handler = proc_douintvec,
},
EGGSFS_CTL_INT_JIFFIES(dir_refresh_time),
EGGSFS_CTL_INT_TIME(dir_refresh_time),
EGGSFS_CTL_INT_JIFFIES(initial_shard_timeout),
EGGSFS_CTL_INT_JIFFIES(max_shard_timeout),
EGGSFS_CTL_INT_JIFFIES(overall_shard_timeout),
EGGSFS_CTL_INT_JIFFIES(initial_cdc_timeout),
EGGSFS_CTL_INT_JIFFIES(max_cdc_timeout),
EGGSFS_CTL_INT_JIFFIES(overall_cdc_timeout),
EGGSFS_CTL_UINT(max_write_span_attempts),
EGGSFS_CTL_ULONG(span_cache_max_size_async),
EGGSFS_CTL_ULONG(span_cache_min_avail_mem_async),

View File

@@ -5,14 +5,6 @@
extern int eggsfs_prefetch;
extern int eggsfs_debug_output;
extern unsigned eggsfs_max_write_span_attempts;
extern unsigned eggsfs_max_write_span_attempts;
extern unsigned eggsfs_initial_shard_timeout_ms;
extern unsigned eggsfs_max_shard_timeout_ms;
extern unsigned eggsfs_overall_shard_timeout_ms;
extern unsigned eggsfs_initial_cdc_timeout_ms;
extern unsigned eggsfs_max_cdc_timeout_ms;
extern unsigned eggsfs_overall_cdc_timeout_ms;
int __init eggsfs_sysctl_init(void);
void __cold eggsfs_sysctl_exit(void);