From a4bc32a18f0165cc00da0f36337813e1b8a0ecca Mon Sep 17 00:00:00 2001 From: Francesco Mazzoli Date: Fri, 26 May 2023 14:33:17 +0000 Subject: [PATCH] Span drop improvements We could get into situations where async droppings were scheduled at every read. --- cpp/ktools/eggsktools.cpp | 92 +++++++++++++++++++++++++-- kmod/net.c | 8 +-- kmod/restartsession.sh | 1 + kmod/span.c | 129 ++++++++++++++++++++++++++------------ kmod/span.h | 3 + kmod/sysctl.c | 2 + kmod/trace.h | 99 ++++++++++++++++++++++++----- 7 files changed, 267 insertions(+), 67 deletions(-) diff --git a/cpp/ktools/eggsktools.cpp b/cpp/ktools/eggsktools.cpp index bfa37112..06178798 100644 --- a/cpp/ktools/eggsktools.cpp +++ b/cpp/ktools/eggsktools.cpp @@ -7,17 +7,27 @@ #include #include #include +#include +#include #define die(fmt, ...) do { fprintf(stderr, fmt "\n" __VA_OPT__(,) __VA_ARGS__); exit(1); } while(false) const char* exe = NULL; #define badUsage(...) do { \ - fprintf(stderr, "Bad usage, expecting %s writefile \n", exe); \ + fprintf(stderr, "Bad usage, expecting %s writefile|readfile \n", exe); \ __VA_OPT__(fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n");) \ exit(2); \ } while(0) \ +static uint64_t nanosNow() { + struct timespec tp; + if (clock_gettime(CLOCK_REALTIME, &tp) < 0) { + die("could not get time: %d (%s)", errno, strerror(errno)); + } + return tp.tv_sec*1000000000ull + tp.tv_nsec; +} + // Just a super dumb file test, to have a controlled environment // where every syscall is accounted for. static void writeFile(int argc, const char** argv) { @@ -59,12 +69,15 @@ static void writeFile(int argc, const char** argv) { die("could not allocate: %d (%s)", errno, strerror(errno)); } - while (fileSize > 0) { - ssize_t res = write(fd, buffer, fileSize > bufSize ? bufSize : fileSize); + uint64_t start = nanosNow(); + + ssize_t toWrite = fileSize; + while (toWrite > 0) { + ssize_t res = write(fd, buffer, toWrite > bufSize ? bufSize : toWrite); if (res < 0) { die("couldn't write %s: %d (%s)", filename, errno, strerror(errno)); } - fileSize -= res; + toWrite -= res; } printf("finished writing, will now close\n"); @@ -73,7 +86,74 @@ static void writeFile(int argc, const char** argv) { die("couldn't close %s: %d (%s)", filename, errno, strerror(errno)); } - printf("done.\n"); + uint64_t elapsed = nanosNow() - start; + printf("done (%fGB/s).\n", (double)fileSize/(double)elapsed); +} + +// Same as writeFile, but for reading. +static void readFile(int argc, const char** argv) { + ssize_t bufSize = -1; // if -1, all in one go + const char* filename = NULL; + + for (int i = 0; i < argc; i++) { + if (std::string(argv[i]) == "-buf-size") { + if (i+1 >= argc) { badUsage("No argument after -buf-size"); } i++; + bufSize = strtoull(argv[i], NULL, 0); + if (bufSize == ULLONG_MAX) { + badUsage("Bad -buf-size: %d (%s)", errno, strerror(errno)); + } + } else { + if (filename != NULL) { badUsage("Filename already specified: %s", filename); } + filename = argv[i]; + } + } + + size_t fileSize; + { + struct stat st; + if(stat(filename, &st) != 0) { + die("couldn't stat %s: %d (%s)", filename, errno, strerror(errno)); + } + fileSize = st.st_size; + } + + printf("reading %ld bytes with bufsize %ld to %s\n", fileSize, bufSize, filename); + + int fd = open(filename, O_RDONLY); + if (fd < 0) { + die("could not open file %s: %d (%s)", filename, errno, strerror(errno)); + } + + uint8_t* buffer = (uint8_t*)malloc(bufSize); + if (buffer == NULL) { + die("could not allocate: %d (%s)", errno, strerror(errno)); + } + + uint64_t start = nanosNow(); + + size_t readSize = 0; + for (;;) { + ssize_t ret = read(fd, buffer, bufSize); + if (ret < 0) { + die("could not read file %s: %d (%s)", filename, errno, strerror(errno)); + } + if (ret == 0) { break; } + readSize += ret; + } + + if (readSize != fileSize) { + die("expected to read %lu (file size), but read %lu instead", fileSize, readSize); + } + + printf("finished reading, will now close\n"); + + if (close(fd) < 0) { + die("couldn't close %s: %d (%s)", filename, errno, strerror(errno)); + } + + uint64_t elapsed = nanosNow() - start; + + printf("done (%fGB/s).\n", (double)fileSize/(double)elapsed); } int main(int argc, const char** argv) { @@ -85,6 +165,8 @@ int main(int argc, const char** argv) { if (cmd == "writefile") { writeFile(argc - 2, argv + 2); + } else if (cmd == "readfile") { + readFile(argc - 2, argv + 2); } else { badUsage("Bad command %s", cmd.c_str()); } diff --git a/kmod/net.c b/kmod/net.c index afa3e457..b3f6eb2e 100644 --- a/kmod/net.c +++ b/kmod/net.c @@ -220,8 +220,6 @@ struct sk_buff* eggsfs_metadata_request( vec.iov_base = p; vec.iov_len = len; - u64 start = jiffies64_to_nsecs(get_jiffies_64()); - u64 elapsed; int max_attempts = shard_id < 0 ? CDC_ATTEMPTS : SHARD_ATTEMPTS; const u64* timeouts_10ms = shard_id < 0 ? cdc_timeouts_10ms : shard_timeouts_10ms; @@ -246,8 +244,7 @@ struct sk_buff* eggsfs_metadata_request( if (!err) { eggsfs_debug_print("got response"); BUG_ON(!req.skb); - elapsed = jiffies64_to_nsecs(get_jiffies_64()) - start; - trace_eggsfs_metadata_request_exit(msg, req_id, len, shard_id, kind, elapsed, *attempts, req.skb->len, 0); + trace_eggsfs_metadata_request_exit(msg, req_id, len, shard_id, kind, *attempts, req.skb->len, 0); return req.skb; } @@ -267,8 +264,7 @@ out_unregister: spin_unlock_bh(&sock->lock); out_err: - elapsed = jiffies64_to_nsecs(get_jiffies_64()) - start; - trace_eggsfs_metadata_request_exit(msg, req_id, len, shard_id, kind, elapsed, *attempts, 0, err); + trace_eggsfs_metadata_request_exit(msg, req_id, len, shard_id, kind, *attempts, 0, err); eggsfs_info_print("err=%d", err); return ERR_PTR(err); } diff --git a/kmod/restartsession.sh b/kmod/restartsession.sh index b2daaa14..543a2a44 100755 --- a/kmod/restartsession.sh +++ b/kmod/restartsession.sh @@ -80,3 +80,4 @@ tmux attach-session -t uovo:1 # ./eggstests -kmod -filter 'mounted fs$' -cfg fsTest.checkThreads=10 -cfg fsTest.numFiles=100 -cfg fsTest.numDirs=10 -short -binaries-dir $(pwd) # ./eggstests -drop-cached-spans-every 100ms -kmod -filter 'mounted fs$' -cfg fsTest.checkThreads=10 -cfg fsTest.numFiles=100 -cfg fsTest.numDirs=10 -short -binaries-dir $(pwd) +# ./eggstests -kmod -filter 'mounted fs$' -cfg fsTest.checkThreads=100 -cfg fsTest.numFiles=10 -cfg fsTest.numDirs=1 -short -binaries-dir $(pwd) diff --git a/kmod/span.c b/kmod/span.c index c0f5287c..711a7706 100644 --- a/kmod/span.c +++ b/kmod/span.c @@ -16,10 +16,14 @@ EGGSFS_DEFINE_COUNTER(eggsfs_stat_cached_spans); // reclaimed), just because the code is a bit simpler this way. atomic64_t eggsfs_stat_cached_span_pages = ATOMIC64_INIT(0); -// These numbers do not mean anything in particular. -unsigned long eggsfs_span_cache_max_size_async = (50ul << 30); // 50GiB -unsigned long eggsfs_span_cache_min_avail_mem_async = (1ull << 30); // 2GiB -unsigned long eggsfs_span_cache_max_size_sync = (100ul << 30); // 100GiB +// These numbers do not mean anything in particular. We do want to avoid +// flickering sync drops though, therefore we go down to 25GiB from +// 50GiB, and similarly for free memory. +unsigned long eggsfs_span_cache_max_size_async = (50ull << 30); // 50GiB +unsigned long eggsfs_span_cache_min_avail_mem_async = (2ull << 30); // 2GiB +unsigned long eggsfs_span_cache_max_size_drop = (45ull << 30); // 25GiB +unsigned long eggsfs_span_cache_min_avail_mem_drop = (2ull << 30) + (500ull << 20); // 2GiB + 500MiB +unsigned long eggsfs_span_cache_max_size_sync = (100ull << 30); // 100GiB unsigned long eggsfs_span_cache_min_avail_mem_sync = (1ull << 30); // 1GiB struct eggsfs_span_lru { @@ -215,10 +219,17 @@ struct eggsfs_span* eggsfs_get_span(struct eggsfs_inode* enode, u64 offset) { eggsfs_debug_print("ino=%016lx, pid=%d, off=%llu getting span", enode->inode.i_ino, get_current()->pid, offset); + trace_eggsfs_get_span_enter(enode->inode.i_ino, offset); + +#define GET_SPAN_EXIT(s) do { \ + trace_eggsfs_get_span_exit(enode->inode.i_ino, offset, IS_ERR(s) ? PTR_ERR(s) : 0); \ + return s; \ + } while(0) + // This helps below: it means that we _must_ have a span. So if we // get NULL at any point, we can retry, because it means we're conflicting // with a reclaimer. - if (offset >= enode->inode.i_size) { return NULL; } + if (offset >= enode->inode.i_size) { GET_SPAN_EXIT(NULL); } u64 iterations = 0; @@ -229,11 +240,12 @@ retry: // adding it to the LRU. if (unlikely(iterations == 10)) { eggsfs_warn_print("we've been fetching the same span for %llu iterations, we're probably stuck on a yet-to-be enabled span we just fetched", iterations); + GET_SPAN_EXIT(ERR_PTR(-EIO)); } // Try to read the semaphore if it's already there. err = down_read_killable(&enode->file.spans_lock); - if (err) { return ERR_PTR(err); } + if (err) { GET_SPAN_EXIT(ERR_PTR(err)); } { struct eggsfs_span* span = eggsfs_lookup_span(&file->spans, offset); if (likely(span)) { @@ -242,14 +254,14 @@ retry: goto retry; } up_read(&enode->file.spans_lock); - return span; + GET_SPAN_EXIT(span); } up_read(&enode->file.spans_lock); } // We need to fetch the spans. err = down_write_killable(&file->spans_lock); - if (err) { return ERR_PTR(err); } + if (err) { GET_SPAN_EXIT(ERR_PTR(err)); } // Check if somebody go to it first. { @@ -260,7 +272,7 @@ retry: goto retry; } up_write(&file->spans_lock); - return span; + GET_SPAN_EXIT(span); } } @@ -285,7 +297,7 @@ retry: eggsfs_free_span(span); } up_write(&file->spans_lock); - return ERR_PTR(err); + GET_SPAN_EXIT(ERR_PTR(err)); } // add them to enode spans and LRU for (;;) { @@ -318,6 +330,8 @@ retry: // We now restart, we know that the span must be there (unless the shard is broken). // It might get reclaimed in the meantime though. goto retry; + +#undef GET_SPAN_EXIT } // If it returns -1, there are no spans to drop. Otherwise, returns the @@ -381,8 +395,18 @@ static int eggsfs_drop_one_span(int lru_ix, u64* dropped_pages) { return i%EGGSFS_SPAN_LRUS; } +static inline void eggsfs_drop_spans_enter(const char* type) { + trace_eggsfs_drop_spans_enter(type, PAGE_SIZE*si_mem_available(), atomic64_read(&eggsfs_stat_cached_span_pages), eggsfs_counter_get(&eggsfs_stat_cached_spans)); +} + +static inline void eggsfs_drop_spans_exit(const char* type, u64 dropped_pages) { + trace_eggsfs_drop_spans_exit(type, PAGE_SIZE*si_mem_available(), atomic64_read(&eggsfs_stat_cached_span_pages), eggsfs_counter_get(&eggsfs_stat_cached_spans), dropped_pages); +} + // returns the number of dropped pages u64 eggsfs_drop_all_spans(void) { + eggsfs_drop_spans_enter("all"); + u64 dropped_pages = 0; s64 spans_begin = eggsfs_counter_get(&eggsfs_stat_cached_spans); int lru_ix = 0; @@ -392,21 +416,24 @@ u64 eggsfs_drop_all_spans(void) { } s64 spans_end = eggsfs_counter_get(&eggsfs_stat_cached_spans); eggsfs_info_print("reclaimed %llu pages, %lld spans (approx)", dropped_pages, spans_begin-spans_end); + eggsfs_drop_spans_exit("all", dropped_pages); return dropped_pages; } -static u64 eggsfs_drop_spans(bool async) { - u64 start = get_jiffies_64(); - trace_eggsfs_drop_spans_enter(async, PAGE_SIZE*si_mem_available(), atomic64_read(&eggsfs_stat_cached_span_pages), eggsfs_counter_get(&eggsfs_stat_cached_spans)); +static DEFINE_MUTEX(eggsfs_drop_spans_mu); + +static u64 eggsfs_drop_spans(const char* type) { + mutex_lock(&eggsfs_drop_spans_mu); + + eggsfs_drop_spans_enter(type); + u64 dropped_pages = 0; int lru_ix = 0; for (;;) { u64 pages = atomic64_read(&eggsfs_stat_cached_span_pages); - // This is pretty lazy, we use the strict numbers so that we won't - // continuously trigger span drops. if ( - pages*PAGE_SIZE < eggsfs_span_cache_max_size_sync && - si_mem_available()*PAGE_SIZE > eggsfs_span_cache_min_avail_mem_sync + pages*PAGE_SIZE < eggsfs_span_cache_max_size_drop && + si_mem_available()*PAGE_SIZE > eggsfs_span_cache_min_avail_mem_drop ) { break; } @@ -414,13 +441,18 @@ static u64 eggsfs_drop_spans(bool async) { if (lru_ix < 0) { break; } } eggsfs_debug_print("dropped %llu pages", dropped_pages); - u64 elapsed = get_jiffies_64() - start; - trace_eggsfs_drop_spans_exit(async, PAGE_SIZE*si_mem_available(), atomic64_read(&eggsfs_stat_cached_span_pages), eggsfs_counter_get(&eggsfs_stat_cached_spans), jiffies_to_nsecs(elapsed)); + + eggsfs_drop_spans_exit(type, dropped_pages); + + mutex_unlock(&eggsfs_drop_spans_mu); + return dropped_pages; } static void eggsfs_reclaim_spans_async(struct work_struct* work) { - eggsfs_drop_spans(true); + if (!mutex_is_locked(&eggsfs_drop_spans_mu)) { // somebody's already taking care of it + eggsfs_drop_spans("async"); + } } static DECLARE_WORK(eggsfs_reclaim_spans_work, eggsfs_reclaim_spans_async); @@ -428,18 +460,23 @@ static DECLARE_WORK(eggsfs_reclaim_spans_work, eggsfs_reclaim_spans_async); static unsigned long eggsfs_span_shrinker_count(struct shrinker* shrinker, struct shrink_control* sc) { u64 pages = atomic64_read(&eggsfs_stat_cached_span_pages); if (pages == 0) { return SHRINK_EMPTY; } - // We won't do much if this is true - if ( - pages*PAGE_SIZE < eggsfs_span_cache_max_size_sync && - si_mem_available()*PAGE_SIZE > eggsfs_span_cache_min_avail_mem_sync - ) { - return 0; - } return pages; } +static int eggsfs_span_shrinker_lru_ix = 0; +static u64 eggsfs_span_shrinker_pages_round = 25600; // We drop at most 100MiB in one shrinker round + static unsigned long eggsfs_span_shrinker_scan(struct shrinker* shrinker, struct shrink_control* sc) { - return eggsfs_drop_spans(true); + eggsfs_drop_spans_enter("shrinker"); + u64 dropped_pages; + int lru_ix = eggsfs_span_shrinker_lru_ix; + for (dropped_pages = 0; dropped_pages < eggsfs_span_shrinker_pages_round;) { + lru_ix = eggsfs_drop_one_span(lru_ix, &dropped_pages); + if (lru_ix < 0) { break; } + } + eggsfs_span_shrinker_lru_ix = lru_ix >= 0 ? lru_ix : 0; + eggsfs_drop_spans_exit("shrinker", dropped_pages); + return dropped_pages; } static struct shrinker eggsfs_span_shrinker = { @@ -497,10 +534,17 @@ again: } struct page* eggsfs_get_span_page(struct eggsfs_block_span* span, u32 page_ix) { + trace_eggsfs_get_span_page_enter(span->span.enode->inode.i_ino, span->span.start, page_ix*PAGE_SIZE); + +#define GET_PAGE_EXIT(p) do { \ + trace_eggsfs_get_span_page_exit(span->span.enode->inode.i_ino, span->span.start, page_ix*PAGE_SIZE, IS_ERR(p) ? PTR_ERR(p) : 0); \ + return p; \ + } while(0) + // this should be guaranteed by the caller but we rely on it below, so let's check if (span->cell_size%PAGE_SIZE != 0) { eggsfs_warn_print("cell_size=%u, PAGE_SIZE=%lu, span->cell_size%%PAGE_SIZE=%lu", span->cell_size, PAGE_SIZE, span->cell_size%PAGE_SIZE); - return ERR_PTR(-EIO); + GET_PAGE_EXIT(ERR_PTR(-EIO)); } struct page* page; @@ -509,7 +553,7 @@ struct page* eggsfs_get_span_page(struct eggsfs_block_span* span, u32 page_ix) { again: page = xa_load(&span->pages, page_ix); - if (page != NULL) { return page; } + if (page != NULL) { GET_PAGE_EXIT(page); } // We need to load the stripe int D = eggsfs_data_blocks(span->parity); @@ -518,7 +562,7 @@ again: // TODO better error? if (stripe > span->stripes) { eggsfs_warn_print("span_offset=%u, stripe=%u, stripes=%u", span_offset, stripe, span->stripes); - return ERR_PTR(-EIO); + GET_PAGE_EXIT(ERR_PTR(-EIO)); } start_page = (span->cell_size/PAGE_SIZE)*D*stripe; @@ -528,7 +572,7 @@ again: int seqno; if (!eggsfs_latch_try_acquire(&span->stripe_latches[stripe], seqno)) { int err = eggsfs_latch_wait_killable(&span->stripe_latches[stripe], seqno); - if (err) { return ERR_PTR(err); } + if (err) { GET_PAGE_EXIT(ERR_PTR(err)); } goto again; } @@ -623,19 +667,23 @@ out: } // Reclaim pages if we went over the limit - { - u64 pages = atomic64_read(&eggsfs_stat_cached_span_pages); - u64 free_pages = si_mem_available(); - if (pages*PAGE_SIZE > eggsfs_span_cache_max_size_sync || free_pages*PAGE_SIZE < eggsfs_span_cache_min_avail_mem_sync) { - eggsfs_drop_spans(false); - } else if (pages*PAGE_SIZE > eggsfs_span_cache_max_size_async || free_pages*PAGE_SIZE < eggsfs_span_cache_min_avail_mem_async) { + u64 pages = atomic64_read(&eggsfs_stat_cached_span_pages); + u64 free_pages = si_mem_available(); + if (pages*PAGE_SIZE > eggsfs_span_cache_max_size_sync || free_pages*PAGE_SIZE < eggsfs_span_cache_min_avail_mem_sync) { + // sync dropping, apply backpressure + mutex_lock(&eggsfs_drop_spans_mu); + eggsfs_drop_spans("sync"); + mutex_unlock(&eggsfs_drop_spans_mu); + } else if (pages*PAGE_SIZE > eggsfs_span_cache_max_size_async || free_pages*PAGE_SIZE < eggsfs_span_cache_min_avail_mem_async) { + // don't bother submitting if another span dropper is running already + if (!mutex_is_locked(&eggsfs_drop_spans_mu)) { // TODO Is it a good idea to do it on system_long_wq rather than eggsfs_wq? The freeing // job might face heavy contention, so maybe yes? queue_work(system_long_wq, &eggsfs_reclaim_spans_work); } } - return err == 0 ? page : ERR_PTR(err); + GET_PAGE_EXIT(err == 0 ? page : ERR_PTR(err)); out_err: eggsfs_debug_print("getting span page failed, err=%d", err); @@ -646,6 +694,9 @@ out_err: xa_erase(&span->pages, curr_page); } goto out; + +#undef GET_PAGE_EXIT + } int eggsfs_span_init(void) { diff --git a/kmod/span.h b/kmod/span.h index daec2853..14af9c4e 100644 --- a/kmod/span.h +++ b/kmod/span.h @@ -8,10 +8,13 @@ EGGSFS_DECLARE_COUNTER(eggsfs_stat_cached_spans); extern atomic64_t eggsfs_stat_cached_span_pages; + extern unsigned long eggsfs_span_cache_max_size_async; extern unsigned long eggsfs_span_cache_min_avail_mem_async; extern unsigned long eggsfs_span_cache_max_size_sync; extern unsigned long eggsfs_span_cache_min_avail_mem_sync; +extern unsigned long eggsfs_span_cache_max_size_drop; +extern unsigned long eggsfs_span_cache_min_avail_mem_drop; struct eggsfs_span { struct eggsfs_inode* enode; diff --git a/kmod/sysctl.c b/kmod/sysctl.c index 982f8ff5..4c4cd83e 100644 --- a/kmod/sysctl.c +++ b/kmod/sysctl.c @@ -71,6 +71,8 @@ static struct ctl_table eggsfs_cb_sysctls[] = { EGGSFS_CTL_ULONG(span_cache_min_avail_mem_async), EGGSFS_CTL_ULONG(span_cache_max_size_sync), EGGSFS_CTL_ULONG(span_cache_min_avail_mem_sync), + EGGSFS_CTL_ULONG(span_cache_max_size_drop), + EGGSFS_CTL_ULONG(span_cache_min_avail_mem_drop), {} }; diff --git a/kmod/trace.h b/kmod/trace.h index 7ffbcade..616a0106 100644 --- a/kmod/trace.h +++ b/kmod/trace.h @@ -255,8 +255,8 @@ TRACE_EVENT(eggsfs_metadata_request_enter, ); TRACE_EVENT(eggsfs_metadata_request_exit, - TP_PROTO(struct msghdr* msg, u64 req_id, u32 len, s16 shard_id, u8 kind, u64 elapsed, u32 n_attempts, u32 resp_len, int error), - TP_ARGS(msg, req_id, len, shard_id, kind, elapsed, n_attempts, resp_len, error), + TP_PROTO(struct msghdr* msg, u64 req_id, u32 len, s16 shard_id, u8 kind, u32 n_attempts, u32 resp_len, int error), + TP_ARGS(msg, req_id, len, shard_id, kind, n_attempts, resp_len, error), TP_STRUCT__entry( __array(u8, addr, sizeof(struct sockaddr_in)) @@ -264,7 +264,6 @@ TRACE_EVENT(eggsfs_metadata_request_exit, __field(u32, len) __field(s16, shard_id) // -1 is used for CDC __field(u8, kind) - __field(u64, elapsed) __field(u32, n_attempts) __field(u32, resp_len) __field(int, error) @@ -275,12 +274,11 @@ TRACE_EVENT(eggsfs_metadata_request_exit, __entry->len = len; __entry->shard_id = shard_id; __entry->kind = kind; - __entry->elapsed = elapsed; __entry->n_attempts = n_attempts; __entry->resp_len = resp_len; __entry->error = error; ), - TP_printk("dst=%pISp req_id=%llu shard_id=%d kind=%d len=%u elapsed=%llu n_attempts=%u resp_len=%u error=%d", __entry->addr, __entry->req_id, __entry->shard_id, __entry->kind, __entry->len, __entry->elapsed, __entry->n_attempts, __entry->resp_len, __entry->error) + TP_printk("dst=%pISp req_id=%llu shard_id=%d kind=%d len=%u n_attempts=%u resp_len=%u error=%d", __entry->addr, __entry->req_id, __entry->shard_id, __entry->kind, __entry->len, __entry->n_attempts, __entry->resp_len, __entry->error) ); TRACE_EVENT(eggsfs_get_inode_enter, @@ -424,43 +422,110 @@ TRACE_EVENT(eggsfs_span_add, ); TRACE_EVENT(eggsfs_drop_spans_enter, - TP_PROTO(bool async, long mem_available, u64 cached_pages, u64 cached_spans), - TP_ARGS( async, mem_available, cached_pages, cached_spans), + TP_PROTO(const char* type, long mem_available, u64 cached_pages, u64 cached_spans), + TP_ARGS( type, mem_available, cached_pages, cached_spans), TP_STRUCT__entry( - __field(bool, async) + __field(const char*, type) __field(long, mem_available) __field(u64, cached_pages) __field(u64, cached_spans) ), TP_fast_assign( - __entry->async = async; + __entry->type = type; __entry->mem_available = mem_available; __entry->cached_pages = cached_pages; __entry->cached_spans = cached_spans; ), - TP_printk("async=%d mem_available=%ld cached_pages=%llu cached_spans=%llu", (int)__entry->async, __entry->mem_available, __entry->cached_pages, __entry->cached_spans) + TP_printk("type=%s mem_available=%ld cached_pages=%llu cached_spans=%llu", __entry->type, __entry->mem_available, __entry->cached_pages, __entry->cached_spans) ); TRACE_EVENT(eggsfs_drop_spans_exit, - TP_PROTO(bool async, long mem_available, u64 cached_pages, u64 cached_spans, u64 elapsed), - TP_ARGS( async, mem_available, cached_pages, cached_spans, elapsed), + TP_PROTO(const char* type, long mem_available, u64 cached_pages, u64 cached_spans, u64 dropped_pages), + TP_ARGS( type, mem_available, cached_pages, cached_spans, dropped_pages), TP_STRUCT__entry( - __field(bool, async) + __field(const char*, type) __field(long, mem_available) __field(u64, cached_pages) __field(u64, cached_spans) - __field(u64, elapsed) + __field(u64, dropped_pages) ), TP_fast_assign( - __entry->async = async; + __entry->type = type; __entry->mem_available = mem_available; __entry->cached_pages = cached_pages; __entry->cached_spans = cached_spans; - __entry->elapsed = elapsed; + __entry->dropped_pages = dropped_pages; ), - TP_printk("async=%d mem_available=%ld cached_pages=%llu cached_spans=%llu elapsed=%llu", (int)__entry->async, __entry->mem_available, __entry->cached_pages, __entry->cached_spans, __entry->elapsed) + TP_printk("type=%s mem_available=%ld cached_pages=%llu cached_spans=%llu, dropped_pages=%llu", __entry->type, __entry->mem_available, __entry->cached_pages, __entry->cached_spans, __entry->dropped_pages) +); + +TRACE_EVENT(eggsfs_get_span_enter, + TP_PROTO(u64 file_id, u64 offset), + TP_ARGS( file_id, offset), + + TP_STRUCT__entry( + __field(u64, file_id) + __field(u64, offset) + ), + TP_fast_assign( + __entry->file_id = file_id; + __entry->offset = offset; + ), + TP_printk("file_id=%016llx offset=%llu", __entry->file_id, __entry->offset) +); + +TRACE_EVENT(eggsfs_get_span_exit, + TP_PROTO(u64 file_id, u64 offset, int err), + TP_ARGS( file_id, offset, err), + + TP_STRUCT__entry( + __field(u64, file_id) + __field(u64, offset) + __field(int, err) + ), + TP_fast_assign( + __entry->file_id = file_id; + __entry->offset = offset; + ), + TP_printk("file_id=%016llx offset=%llu, err=%d", __entry->file_id, __entry->offset, __entry->err) +); + +TRACE_EVENT(eggsfs_get_span_page_enter, + TP_PROTO(u64 file_id, u64 span_offset, u32 page_offset), + TP_ARGS( file_id, span_offset, page_offset), + + TP_STRUCT__entry( + __field(u64, file_id) + __field(u64, span_offset) + __field(u32, page_offset) + ), + TP_fast_assign( + __entry->file_id = file_id; + __entry->span_offset = span_offset; + __entry->page_offset = page_offset; + ), + TP_printk("file_id=%016llx span_offset=%llu page_offset=%u", __entry->file_id, __entry->span_offset, __entry->page_offset) +); + +TRACE_EVENT(eggsfs_get_span_page_exit, + TP_PROTO(u64 file_id, u64 span_offset, u32 page_offset, int err), + TP_ARGS( file_id, span_offset, page_offset, err), + + TP_STRUCT__entry( + __field(u64, file_id) + __field(u64, span_offset) + __field(u32, page_offset) + __field(int, err) + ), + TP_fast_assign( + __entry->file_id = file_id; + __entry->span_offset = span_offset; + __entry->page_offset = page_offset; + __entry->err = err; + ), + TP_printk("file_id=%016llx span_offset=%llu page_offset=%u err=%d", __entry->file_id, __entry->span_offset, __entry->page_offset, __entry->err) ); #endif /* _TRACE_EGGFS_H */