mirror of
https://github.com/XTXMarkets/ternfs.git
synced 2026-05-04 16:59:37 -05:00
Span drop improvements
We could get into situations where async droppings were scheduled at every read.
This commit is contained in:
@@ -7,17 +7,27 @@
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <time.h>
|
||||
|
||||
#define die(fmt, ...) do { fprintf(stderr, fmt "\n" __VA_OPT__(,) __VA_ARGS__); exit(1); } while(false)
|
||||
|
||||
const char* exe = NULL;
|
||||
|
||||
#define badUsage(...) do { \
|
||||
fprintf(stderr, "Bad usage, expecting %s writefile <command arguments>\n", exe); \
|
||||
fprintf(stderr, "Bad usage, expecting %s writefile|readfile <command arguments>\n", exe); \
|
||||
__VA_OPT__(fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n");) \
|
||||
exit(2); \
|
||||
} while(0) \
|
||||
|
||||
static uint64_t nanosNow() {
|
||||
struct timespec tp;
|
||||
if (clock_gettime(CLOCK_REALTIME, &tp) < 0) {
|
||||
die("could not get time: %d (%s)", errno, strerror(errno));
|
||||
}
|
||||
return tp.tv_sec*1000000000ull + tp.tv_nsec;
|
||||
}
|
||||
|
||||
// Just a super dumb file test, to have a controlled environment
|
||||
// where every syscall is accounted for.
|
||||
static void writeFile(int argc, const char** argv) {
|
||||
@@ -59,12 +69,15 @@ static void writeFile(int argc, const char** argv) {
|
||||
die("could not allocate: %d (%s)", errno, strerror(errno));
|
||||
}
|
||||
|
||||
while (fileSize > 0) {
|
||||
ssize_t res = write(fd, buffer, fileSize > bufSize ? bufSize : fileSize);
|
||||
uint64_t start = nanosNow();
|
||||
|
||||
ssize_t toWrite = fileSize;
|
||||
while (toWrite > 0) {
|
||||
ssize_t res = write(fd, buffer, toWrite > bufSize ? bufSize : toWrite);
|
||||
if (res < 0) {
|
||||
die("couldn't write %s: %d (%s)", filename, errno, strerror(errno));
|
||||
}
|
||||
fileSize -= res;
|
||||
toWrite -= res;
|
||||
}
|
||||
|
||||
printf("finished writing, will now close\n");
|
||||
@@ -73,7 +86,74 @@ static void writeFile(int argc, const char** argv) {
|
||||
die("couldn't close %s: %d (%s)", filename, errno, strerror(errno));
|
||||
}
|
||||
|
||||
printf("done.\n");
|
||||
uint64_t elapsed = nanosNow() - start;
|
||||
printf("done (%fGB/s).\n", (double)fileSize/(double)elapsed);
|
||||
}
|
||||
|
||||
// Same as writeFile, but for reading.
|
||||
static void readFile(int argc, const char** argv) {
|
||||
ssize_t bufSize = -1; // if -1, all in one go
|
||||
const char* filename = NULL;
|
||||
|
||||
for (int i = 0; i < argc; i++) {
|
||||
if (std::string(argv[i]) == "-buf-size") {
|
||||
if (i+1 >= argc) { badUsage("No argument after -buf-size"); } i++;
|
||||
bufSize = strtoull(argv[i], NULL, 0);
|
||||
if (bufSize == ULLONG_MAX) {
|
||||
badUsage("Bad -buf-size: %d (%s)", errno, strerror(errno));
|
||||
}
|
||||
} else {
|
||||
if (filename != NULL) { badUsage("Filename already specified: %s", filename); }
|
||||
filename = argv[i];
|
||||
}
|
||||
}
|
||||
|
||||
size_t fileSize;
|
||||
{
|
||||
struct stat st;
|
||||
if(stat(filename, &st) != 0) {
|
||||
die("couldn't stat %s: %d (%s)", filename, errno, strerror(errno));
|
||||
}
|
||||
fileSize = st.st_size;
|
||||
}
|
||||
|
||||
printf("reading %ld bytes with bufsize %ld to %s\n", fileSize, bufSize, filename);
|
||||
|
||||
int fd = open(filename, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
die("could not open file %s: %d (%s)", filename, errno, strerror(errno));
|
||||
}
|
||||
|
||||
uint8_t* buffer = (uint8_t*)malloc(bufSize);
|
||||
if (buffer == NULL) {
|
||||
die("could not allocate: %d (%s)", errno, strerror(errno));
|
||||
}
|
||||
|
||||
uint64_t start = nanosNow();
|
||||
|
||||
size_t readSize = 0;
|
||||
for (;;) {
|
||||
ssize_t ret = read(fd, buffer, bufSize);
|
||||
if (ret < 0) {
|
||||
die("could not read file %s: %d (%s)", filename, errno, strerror(errno));
|
||||
}
|
||||
if (ret == 0) { break; }
|
||||
readSize += ret;
|
||||
}
|
||||
|
||||
if (readSize != fileSize) {
|
||||
die("expected to read %lu (file size), but read %lu instead", fileSize, readSize);
|
||||
}
|
||||
|
||||
printf("finished reading, will now close\n");
|
||||
|
||||
if (close(fd) < 0) {
|
||||
die("couldn't close %s: %d (%s)", filename, errno, strerror(errno));
|
||||
}
|
||||
|
||||
uint64_t elapsed = nanosNow() - start;
|
||||
|
||||
printf("done (%fGB/s).\n", (double)fileSize/(double)elapsed);
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv) {
|
||||
@@ -85,6 +165,8 @@ int main(int argc, const char** argv) {
|
||||
|
||||
if (cmd == "writefile") {
|
||||
writeFile(argc - 2, argv + 2);
|
||||
} else if (cmd == "readfile") {
|
||||
readFile(argc - 2, argv + 2);
|
||||
} else {
|
||||
badUsage("Bad command %s", cmd.c_str());
|
||||
}
|
||||
|
||||
+2
-6
@@ -220,8 +220,6 @@ struct sk_buff* eggsfs_metadata_request(
|
||||
vec.iov_base = p;
|
||||
vec.iov_len = len;
|
||||
|
||||
u64 start = jiffies64_to_nsecs(get_jiffies_64());
|
||||
u64 elapsed;
|
||||
int max_attempts = shard_id < 0 ? CDC_ATTEMPTS : SHARD_ATTEMPTS;
|
||||
const u64* timeouts_10ms = shard_id < 0 ? cdc_timeouts_10ms : shard_timeouts_10ms;
|
||||
|
||||
@@ -246,8 +244,7 @@ struct sk_buff* eggsfs_metadata_request(
|
||||
if (!err) {
|
||||
eggsfs_debug_print("got response");
|
||||
BUG_ON(!req.skb);
|
||||
elapsed = jiffies64_to_nsecs(get_jiffies_64()) - start;
|
||||
trace_eggsfs_metadata_request_exit(msg, req_id, len, shard_id, kind, elapsed, *attempts, req.skb->len, 0);
|
||||
trace_eggsfs_metadata_request_exit(msg, req_id, len, shard_id, kind, *attempts, req.skb->len, 0);
|
||||
return req.skb;
|
||||
}
|
||||
|
||||
@@ -267,8 +264,7 @@ out_unregister:
|
||||
spin_unlock_bh(&sock->lock);
|
||||
|
||||
out_err:
|
||||
elapsed = jiffies64_to_nsecs(get_jiffies_64()) - start;
|
||||
trace_eggsfs_metadata_request_exit(msg, req_id, len, shard_id, kind, elapsed, *attempts, 0, err);
|
||||
trace_eggsfs_metadata_request_exit(msg, req_id, len, shard_id, kind, *attempts, 0, err);
|
||||
eggsfs_info_print("err=%d", err);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
@@ -80,3 +80,4 @@ tmux attach-session -t uovo:1
|
||||
|
||||
# ./eggstests -kmod -filter 'mounted fs$' -cfg fsTest.checkThreads=10 -cfg fsTest.numFiles=100 -cfg fsTest.numDirs=10 -short -binaries-dir $(pwd)
|
||||
# ./eggstests -drop-cached-spans-every 100ms -kmod -filter 'mounted fs$' -cfg fsTest.checkThreads=10 -cfg fsTest.numFiles=100 -cfg fsTest.numDirs=10 -short -binaries-dir $(pwd)
|
||||
# ./eggstests -kmod -filter 'mounted fs$' -cfg fsTest.checkThreads=100 -cfg fsTest.numFiles=10 -cfg fsTest.numDirs=1 -short -binaries-dir $(pwd)
|
||||
|
||||
+90
-39
@@ -16,10 +16,14 @@ EGGSFS_DEFINE_COUNTER(eggsfs_stat_cached_spans);
|
||||
// reclaimed), just because the code is a bit simpler this way.
|
||||
atomic64_t eggsfs_stat_cached_span_pages = ATOMIC64_INIT(0);
|
||||
|
||||
// These numbers do not mean anything in particular.
|
||||
unsigned long eggsfs_span_cache_max_size_async = (50ul << 30); // 50GiB
|
||||
unsigned long eggsfs_span_cache_min_avail_mem_async = (1ull << 30); // 2GiB
|
||||
unsigned long eggsfs_span_cache_max_size_sync = (100ul << 30); // 100GiB
|
||||
// These numbers do not mean anything in particular. We do want to avoid
|
||||
// flickering sync drops though, therefore we go down to 25GiB from
|
||||
// 50GiB, and similarly for free memory.
|
||||
unsigned long eggsfs_span_cache_max_size_async = (50ull << 30); // 50GiB
|
||||
unsigned long eggsfs_span_cache_min_avail_mem_async = (2ull << 30); // 2GiB
|
||||
unsigned long eggsfs_span_cache_max_size_drop = (45ull << 30); // 25GiB
|
||||
unsigned long eggsfs_span_cache_min_avail_mem_drop = (2ull << 30) + (500ull << 20); // 2GiB + 500MiB
|
||||
unsigned long eggsfs_span_cache_max_size_sync = (100ull << 30); // 100GiB
|
||||
unsigned long eggsfs_span_cache_min_avail_mem_sync = (1ull << 30); // 1GiB
|
||||
|
||||
struct eggsfs_span_lru {
|
||||
@@ -215,10 +219,17 @@ struct eggsfs_span* eggsfs_get_span(struct eggsfs_inode* enode, u64 offset) {
|
||||
|
||||
eggsfs_debug_print("ino=%016lx, pid=%d, off=%llu getting span", enode->inode.i_ino, get_current()->pid, offset);
|
||||
|
||||
trace_eggsfs_get_span_enter(enode->inode.i_ino, offset);
|
||||
|
||||
#define GET_SPAN_EXIT(s) do { \
|
||||
trace_eggsfs_get_span_exit(enode->inode.i_ino, offset, IS_ERR(s) ? PTR_ERR(s) : 0); \
|
||||
return s; \
|
||||
} while(0)
|
||||
|
||||
// This helps below: it means that we _must_ have a span. So if we
|
||||
// get NULL at any point, we can retry, because it means we're conflicting
|
||||
// with a reclaimer.
|
||||
if (offset >= enode->inode.i_size) { return NULL; }
|
||||
if (offset >= enode->inode.i_size) { GET_SPAN_EXIT(NULL); }
|
||||
|
||||
u64 iterations = 0;
|
||||
|
||||
@@ -229,11 +240,12 @@ retry:
|
||||
// adding it to the LRU.
|
||||
if (unlikely(iterations == 10)) {
|
||||
eggsfs_warn_print("we've been fetching the same span for %llu iterations, we're probably stuck on a yet-to-be enabled span we just fetched", iterations);
|
||||
GET_SPAN_EXIT(ERR_PTR(-EIO));
|
||||
}
|
||||
|
||||
// Try to read the semaphore if it's already there.
|
||||
err = down_read_killable(&enode->file.spans_lock);
|
||||
if (err) { return ERR_PTR(err); }
|
||||
if (err) { GET_SPAN_EXIT(ERR_PTR(err)); }
|
||||
{
|
||||
struct eggsfs_span* span = eggsfs_lookup_span(&file->spans, offset);
|
||||
if (likely(span)) {
|
||||
@@ -242,14 +254,14 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
up_read(&enode->file.spans_lock);
|
||||
return span;
|
||||
GET_SPAN_EXIT(span);
|
||||
}
|
||||
up_read(&enode->file.spans_lock);
|
||||
}
|
||||
|
||||
// We need to fetch the spans.
|
||||
err = down_write_killable(&file->spans_lock);
|
||||
if (err) { return ERR_PTR(err); }
|
||||
if (err) { GET_SPAN_EXIT(ERR_PTR(err)); }
|
||||
|
||||
// Check if somebody go to it first.
|
||||
{
|
||||
@@ -260,7 +272,7 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
up_write(&file->spans_lock);
|
||||
return span;
|
||||
GET_SPAN_EXIT(span);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -285,7 +297,7 @@ retry:
|
||||
eggsfs_free_span(span);
|
||||
}
|
||||
up_write(&file->spans_lock);
|
||||
return ERR_PTR(err);
|
||||
GET_SPAN_EXIT(ERR_PTR(err));
|
||||
}
|
||||
// add them to enode spans and LRU
|
||||
for (;;) {
|
||||
@@ -318,6 +330,8 @@ retry:
|
||||
// We now restart, we know that the span must be there (unless the shard is broken).
|
||||
// It might get reclaimed in the meantime though.
|
||||
goto retry;
|
||||
|
||||
#undef GET_SPAN_EXIT
|
||||
}
|
||||
|
||||
// If it returns -1, there are no spans to drop. Otherwise, returns the
|
||||
@@ -381,8 +395,18 @@ static int eggsfs_drop_one_span(int lru_ix, u64* dropped_pages) {
|
||||
return i%EGGSFS_SPAN_LRUS;
|
||||
}
|
||||
|
||||
static inline void eggsfs_drop_spans_enter(const char* type) {
|
||||
trace_eggsfs_drop_spans_enter(type, PAGE_SIZE*si_mem_available(), atomic64_read(&eggsfs_stat_cached_span_pages), eggsfs_counter_get(&eggsfs_stat_cached_spans));
|
||||
}
|
||||
|
||||
static inline void eggsfs_drop_spans_exit(const char* type, u64 dropped_pages) {
|
||||
trace_eggsfs_drop_spans_exit(type, PAGE_SIZE*si_mem_available(), atomic64_read(&eggsfs_stat_cached_span_pages), eggsfs_counter_get(&eggsfs_stat_cached_spans), dropped_pages);
|
||||
}
|
||||
|
||||
// returns the number of dropped pages
|
||||
u64 eggsfs_drop_all_spans(void) {
|
||||
eggsfs_drop_spans_enter("all");
|
||||
|
||||
u64 dropped_pages = 0;
|
||||
s64 spans_begin = eggsfs_counter_get(&eggsfs_stat_cached_spans);
|
||||
int lru_ix = 0;
|
||||
@@ -392,21 +416,24 @@ u64 eggsfs_drop_all_spans(void) {
|
||||
}
|
||||
s64 spans_end = eggsfs_counter_get(&eggsfs_stat_cached_spans);
|
||||
eggsfs_info_print("reclaimed %llu pages, %lld spans (approx)", dropped_pages, spans_begin-spans_end);
|
||||
eggsfs_drop_spans_exit("all", dropped_pages);
|
||||
return dropped_pages;
|
||||
}
|
||||
|
||||
static u64 eggsfs_drop_spans(bool async) {
|
||||
u64 start = get_jiffies_64();
|
||||
trace_eggsfs_drop_spans_enter(async, PAGE_SIZE*si_mem_available(), atomic64_read(&eggsfs_stat_cached_span_pages), eggsfs_counter_get(&eggsfs_stat_cached_spans));
|
||||
static DEFINE_MUTEX(eggsfs_drop_spans_mu);
|
||||
|
||||
static u64 eggsfs_drop_spans(const char* type) {
|
||||
mutex_lock(&eggsfs_drop_spans_mu);
|
||||
|
||||
eggsfs_drop_spans_enter(type);
|
||||
|
||||
u64 dropped_pages = 0;
|
||||
int lru_ix = 0;
|
||||
for (;;) {
|
||||
u64 pages = atomic64_read(&eggsfs_stat_cached_span_pages);
|
||||
// This is pretty lazy, we use the strict numbers so that we won't
|
||||
// continuously trigger span drops.
|
||||
if (
|
||||
pages*PAGE_SIZE < eggsfs_span_cache_max_size_sync &&
|
||||
si_mem_available()*PAGE_SIZE > eggsfs_span_cache_min_avail_mem_sync
|
||||
pages*PAGE_SIZE < eggsfs_span_cache_max_size_drop &&
|
||||
si_mem_available()*PAGE_SIZE > eggsfs_span_cache_min_avail_mem_drop
|
||||
) {
|
||||
break;
|
||||
}
|
||||
@@ -414,13 +441,18 @@ static u64 eggsfs_drop_spans(bool async) {
|
||||
if (lru_ix < 0) { break; }
|
||||
}
|
||||
eggsfs_debug_print("dropped %llu pages", dropped_pages);
|
||||
u64 elapsed = get_jiffies_64() - start;
|
||||
trace_eggsfs_drop_spans_exit(async, PAGE_SIZE*si_mem_available(), atomic64_read(&eggsfs_stat_cached_span_pages), eggsfs_counter_get(&eggsfs_stat_cached_spans), jiffies_to_nsecs(elapsed));
|
||||
|
||||
eggsfs_drop_spans_exit(type, dropped_pages);
|
||||
|
||||
mutex_unlock(&eggsfs_drop_spans_mu);
|
||||
|
||||
return dropped_pages;
|
||||
}
|
||||
|
||||
static void eggsfs_reclaim_spans_async(struct work_struct* work) {
|
||||
eggsfs_drop_spans(true);
|
||||
if (!mutex_is_locked(&eggsfs_drop_spans_mu)) { // somebody's already taking care of it
|
||||
eggsfs_drop_spans("async");
|
||||
}
|
||||
}
|
||||
|
||||
static DECLARE_WORK(eggsfs_reclaim_spans_work, eggsfs_reclaim_spans_async);
|
||||
@@ -428,18 +460,23 @@ static DECLARE_WORK(eggsfs_reclaim_spans_work, eggsfs_reclaim_spans_async);
|
||||
static unsigned long eggsfs_span_shrinker_count(struct shrinker* shrinker, struct shrink_control* sc) {
|
||||
u64 pages = atomic64_read(&eggsfs_stat_cached_span_pages);
|
||||
if (pages == 0) { return SHRINK_EMPTY; }
|
||||
// We won't do much if this is true
|
||||
if (
|
||||
pages*PAGE_SIZE < eggsfs_span_cache_max_size_sync &&
|
||||
si_mem_available()*PAGE_SIZE > eggsfs_span_cache_min_avail_mem_sync
|
||||
) {
|
||||
return 0;
|
||||
}
|
||||
return pages;
|
||||
}
|
||||
|
||||
static int eggsfs_span_shrinker_lru_ix = 0;
|
||||
static u64 eggsfs_span_shrinker_pages_round = 25600; // We drop at most 100MiB in one shrinker round
|
||||
|
||||
static unsigned long eggsfs_span_shrinker_scan(struct shrinker* shrinker, struct shrink_control* sc) {
|
||||
return eggsfs_drop_spans(true);
|
||||
eggsfs_drop_spans_enter("shrinker");
|
||||
u64 dropped_pages;
|
||||
int lru_ix = eggsfs_span_shrinker_lru_ix;
|
||||
for (dropped_pages = 0; dropped_pages < eggsfs_span_shrinker_pages_round;) {
|
||||
lru_ix = eggsfs_drop_one_span(lru_ix, &dropped_pages);
|
||||
if (lru_ix < 0) { break; }
|
||||
}
|
||||
eggsfs_span_shrinker_lru_ix = lru_ix >= 0 ? lru_ix : 0;
|
||||
eggsfs_drop_spans_exit("shrinker", dropped_pages);
|
||||
return dropped_pages;
|
||||
}
|
||||
|
||||
static struct shrinker eggsfs_span_shrinker = {
|
||||
@@ -497,10 +534,17 @@ again:
|
||||
}
|
||||
|
||||
struct page* eggsfs_get_span_page(struct eggsfs_block_span* span, u32 page_ix) {
|
||||
trace_eggsfs_get_span_page_enter(span->span.enode->inode.i_ino, span->span.start, page_ix*PAGE_SIZE);
|
||||
|
||||
#define GET_PAGE_EXIT(p) do { \
|
||||
trace_eggsfs_get_span_page_exit(span->span.enode->inode.i_ino, span->span.start, page_ix*PAGE_SIZE, IS_ERR(p) ? PTR_ERR(p) : 0); \
|
||||
return p; \
|
||||
} while(0)
|
||||
|
||||
// this should be guaranteed by the caller but we rely on it below, so let's check
|
||||
if (span->cell_size%PAGE_SIZE != 0) {
|
||||
eggsfs_warn_print("cell_size=%u, PAGE_SIZE=%lu, span->cell_size%%PAGE_SIZE=%lu", span->cell_size, PAGE_SIZE, span->cell_size%PAGE_SIZE);
|
||||
return ERR_PTR(-EIO);
|
||||
GET_PAGE_EXIT(ERR_PTR(-EIO));
|
||||
}
|
||||
|
||||
struct page* page;
|
||||
@@ -509,7 +553,7 @@ struct page* eggsfs_get_span_page(struct eggsfs_block_span* span, u32 page_ix) {
|
||||
|
||||
again:
|
||||
page = xa_load(&span->pages, page_ix);
|
||||
if (page != NULL) { return page; }
|
||||
if (page != NULL) { GET_PAGE_EXIT(page); }
|
||||
|
||||
// We need to load the stripe
|
||||
int D = eggsfs_data_blocks(span->parity);
|
||||
@@ -518,7 +562,7 @@ again:
|
||||
// TODO better error?
|
||||
if (stripe > span->stripes) {
|
||||
eggsfs_warn_print("span_offset=%u, stripe=%u, stripes=%u", span_offset, stripe, span->stripes);
|
||||
return ERR_PTR(-EIO);
|
||||
GET_PAGE_EXIT(ERR_PTR(-EIO));
|
||||
}
|
||||
|
||||
start_page = (span->cell_size/PAGE_SIZE)*D*stripe;
|
||||
@@ -528,7 +572,7 @@ again:
|
||||
int seqno;
|
||||
if (!eggsfs_latch_try_acquire(&span->stripe_latches[stripe], seqno)) {
|
||||
int err = eggsfs_latch_wait_killable(&span->stripe_latches[stripe], seqno);
|
||||
if (err) { return ERR_PTR(err); }
|
||||
if (err) { GET_PAGE_EXIT(ERR_PTR(err)); }
|
||||
goto again;
|
||||
}
|
||||
|
||||
@@ -623,19 +667,23 @@ out:
|
||||
}
|
||||
|
||||
// Reclaim pages if we went over the limit
|
||||
{
|
||||
u64 pages = atomic64_read(&eggsfs_stat_cached_span_pages);
|
||||
u64 free_pages = si_mem_available();
|
||||
if (pages*PAGE_SIZE > eggsfs_span_cache_max_size_sync || free_pages*PAGE_SIZE < eggsfs_span_cache_min_avail_mem_sync) {
|
||||
eggsfs_drop_spans(false);
|
||||
} else if (pages*PAGE_SIZE > eggsfs_span_cache_max_size_async || free_pages*PAGE_SIZE < eggsfs_span_cache_min_avail_mem_async) {
|
||||
u64 pages = atomic64_read(&eggsfs_stat_cached_span_pages);
|
||||
u64 free_pages = si_mem_available();
|
||||
if (pages*PAGE_SIZE > eggsfs_span_cache_max_size_sync || free_pages*PAGE_SIZE < eggsfs_span_cache_min_avail_mem_sync) {
|
||||
// sync dropping, apply backpressure
|
||||
mutex_lock(&eggsfs_drop_spans_mu);
|
||||
eggsfs_drop_spans("sync");
|
||||
mutex_unlock(&eggsfs_drop_spans_mu);
|
||||
} else if (pages*PAGE_SIZE > eggsfs_span_cache_max_size_async || free_pages*PAGE_SIZE < eggsfs_span_cache_min_avail_mem_async) {
|
||||
// don't bother submitting if another span dropper is running already
|
||||
if (!mutex_is_locked(&eggsfs_drop_spans_mu)) {
|
||||
// TODO Is it a good idea to do it on system_long_wq rather than eggsfs_wq? The freeing
|
||||
// job might face heavy contention, so maybe yes?
|
||||
queue_work(system_long_wq, &eggsfs_reclaim_spans_work);
|
||||
}
|
||||
}
|
||||
|
||||
return err == 0 ? page : ERR_PTR(err);
|
||||
GET_PAGE_EXIT(err == 0 ? page : ERR_PTR(err));
|
||||
|
||||
out_err:
|
||||
eggsfs_debug_print("getting span page failed, err=%d", err);
|
||||
@@ -646,6 +694,9 @@ out_err:
|
||||
xa_erase(&span->pages, curr_page);
|
||||
}
|
||||
goto out;
|
||||
|
||||
#undef GET_PAGE_EXIT
|
||||
|
||||
}
|
||||
|
||||
int eggsfs_span_init(void) {
|
||||
|
||||
@@ -8,10 +8,13 @@
|
||||
|
||||
EGGSFS_DECLARE_COUNTER(eggsfs_stat_cached_spans);
|
||||
extern atomic64_t eggsfs_stat_cached_span_pages;
|
||||
|
||||
extern unsigned long eggsfs_span_cache_max_size_async;
|
||||
extern unsigned long eggsfs_span_cache_min_avail_mem_async;
|
||||
extern unsigned long eggsfs_span_cache_max_size_sync;
|
||||
extern unsigned long eggsfs_span_cache_min_avail_mem_sync;
|
||||
extern unsigned long eggsfs_span_cache_max_size_drop;
|
||||
extern unsigned long eggsfs_span_cache_min_avail_mem_drop;
|
||||
|
||||
struct eggsfs_span {
|
||||
struct eggsfs_inode* enode;
|
||||
|
||||
@@ -71,6 +71,8 @@ static struct ctl_table eggsfs_cb_sysctls[] = {
|
||||
EGGSFS_CTL_ULONG(span_cache_min_avail_mem_async),
|
||||
EGGSFS_CTL_ULONG(span_cache_max_size_sync),
|
||||
EGGSFS_CTL_ULONG(span_cache_min_avail_mem_sync),
|
||||
EGGSFS_CTL_ULONG(span_cache_max_size_drop),
|
||||
EGGSFS_CTL_ULONG(span_cache_min_avail_mem_drop),
|
||||
|
||||
{}
|
||||
};
|
||||
|
||||
+82
-17
@@ -255,8 +255,8 @@ TRACE_EVENT(eggsfs_metadata_request_enter,
|
||||
);
|
||||
|
||||
TRACE_EVENT(eggsfs_metadata_request_exit,
|
||||
TP_PROTO(struct msghdr* msg, u64 req_id, u32 len, s16 shard_id, u8 kind, u64 elapsed, u32 n_attempts, u32 resp_len, int error),
|
||||
TP_ARGS(msg, req_id, len, shard_id, kind, elapsed, n_attempts, resp_len, error),
|
||||
TP_PROTO(struct msghdr* msg, u64 req_id, u32 len, s16 shard_id, u8 kind, u32 n_attempts, u32 resp_len, int error),
|
||||
TP_ARGS(msg, req_id, len, shard_id, kind, n_attempts, resp_len, error),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(u8, addr, sizeof(struct sockaddr_in))
|
||||
@@ -264,7 +264,6 @@ TRACE_EVENT(eggsfs_metadata_request_exit,
|
||||
__field(u32, len)
|
||||
__field(s16, shard_id) // -1 is used for CDC
|
||||
__field(u8, kind)
|
||||
__field(u64, elapsed)
|
||||
__field(u32, n_attempts)
|
||||
__field(u32, resp_len)
|
||||
__field(int, error)
|
||||
@@ -275,12 +274,11 @@ TRACE_EVENT(eggsfs_metadata_request_exit,
|
||||
__entry->len = len;
|
||||
__entry->shard_id = shard_id;
|
||||
__entry->kind = kind;
|
||||
__entry->elapsed = elapsed;
|
||||
__entry->n_attempts = n_attempts;
|
||||
__entry->resp_len = resp_len;
|
||||
__entry->error = error;
|
||||
),
|
||||
TP_printk("dst=%pISp req_id=%llu shard_id=%d kind=%d len=%u elapsed=%llu n_attempts=%u resp_len=%u error=%d", __entry->addr, __entry->req_id, __entry->shard_id, __entry->kind, __entry->len, __entry->elapsed, __entry->n_attempts, __entry->resp_len, __entry->error)
|
||||
TP_printk("dst=%pISp req_id=%llu shard_id=%d kind=%d len=%u n_attempts=%u resp_len=%u error=%d", __entry->addr, __entry->req_id, __entry->shard_id, __entry->kind, __entry->len, __entry->n_attempts, __entry->resp_len, __entry->error)
|
||||
);
|
||||
|
||||
TRACE_EVENT(eggsfs_get_inode_enter,
|
||||
@@ -424,43 +422,110 @@ TRACE_EVENT(eggsfs_span_add,
|
||||
);
|
||||
|
||||
TRACE_EVENT(eggsfs_drop_spans_enter,
|
||||
TP_PROTO(bool async, long mem_available, u64 cached_pages, u64 cached_spans),
|
||||
TP_ARGS( async, mem_available, cached_pages, cached_spans),
|
||||
TP_PROTO(const char* type, long mem_available, u64 cached_pages, u64 cached_spans),
|
||||
TP_ARGS( type, mem_available, cached_pages, cached_spans),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(bool, async)
|
||||
__field(const char*, type)
|
||||
__field(long, mem_available)
|
||||
__field(u64, cached_pages)
|
||||
__field(u64, cached_spans)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->async = async;
|
||||
__entry->type = type;
|
||||
__entry->mem_available = mem_available;
|
||||
__entry->cached_pages = cached_pages;
|
||||
__entry->cached_spans = cached_spans;
|
||||
),
|
||||
TP_printk("async=%d mem_available=%ld cached_pages=%llu cached_spans=%llu", (int)__entry->async, __entry->mem_available, __entry->cached_pages, __entry->cached_spans)
|
||||
TP_printk("type=%s mem_available=%ld cached_pages=%llu cached_spans=%llu", __entry->type, __entry->mem_available, __entry->cached_pages, __entry->cached_spans)
|
||||
);
|
||||
|
||||
TRACE_EVENT(eggsfs_drop_spans_exit,
|
||||
TP_PROTO(bool async, long mem_available, u64 cached_pages, u64 cached_spans, u64 elapsed),
|
||||
TP_ARGS( async, mem_available, cached_pages, cached_spans, elapsed),
|
||||
TP_PROTO(const char* type, long mem_available, u64 cached_pages, u64 cached_spans, u64 dropped_pages),
|
||||
TP_ARGS( type, mem_available, cached_pages, cached_spans, dropped_pages),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(bool, async)
|
||||
__field(const char*, type)
|
||||
__field(long, mem_available)
|
||||
__field(u64, cached_pages)
|
||||
__field(u64, cached_spans)
|
||||
__field(u64, elapsed)
|
||||
__field(u64, dropped_pages)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->async = async;
|
||||
__entry->type = type;
|
||||
__entry->mem_available = mem_available;
|
||||
__entry->cached_pages = cached_pages;
|
||||
__entry->cached_spans = cached_spans;
|
||||
__entry->elapsed = elapsed;
|
||||
__entry->dropped_pages = dropped_pages;
|
||||
),
|
||||
TP_printk("async=%d mem_available=%ld cached_pages=%llu cached_spans=%llu elapsed=%llu", (int)__entry->async, __entry->mem_available, __entry->cached_pages, __entry->cached_spans, __entry->elapsed)
|
||||
TP_printk("type=%s mem_available=%ld cached_pages=%llu cached_spans=%llu, dropped_pages=%llu", __entry->type, __entry->mem_available, __entry->cached_pages, __entry->cached_spans, __entry->dropped_pages)
|
||||
);
|
||||
|
||||
TRACE_EVENT(eggsfs_get_span_enter,
|
||||
TP_PROTO(u64 file_id, u64 offset),
|
||||
TP_ARGS( file_id, offset),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, file_id)
|
||||
__field(u64, offset)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->file_id = file_id;
|
||||
__entry->offset = offset;
|
||||
),
|
||||
TP_printk("file_id=%016llx offset=%llu", __entry->file_id, __entry->offset)
|
||||
);
|
||||
|
||||
TRACE_EVENT(eggsfs_get_span_exit,
|
||||
TP_PROTO(u64 file_id, u64 offset, int err),
|
||||
TP_ARGS( file_id, offset, err),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, file_id)
|
||||
__field(u64, offset)
|
||||
__field(int, err)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->file_id = file_id;
|
||||
__entry->offset = offset;
|
||||
),
|
||||
TP_printk("file_id=%016llx offset=%llu, err=%d", __entry->file_id, __entry->offset, __entry->err)
|
||||
);
|
||||
|
||||
TRACE_EVENT(eggsfs_get_span_page_enter,
|
||||
TP_PROTO(u64 file_id, u64 span_offset, u32 page_offset),
|
||||
TP_ARGS( file_id, span_offset, page_offset),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, file_id)
|
||||
__field(u64, span_offset)
|
||||
__field(u32, page_offset)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->file_id = file_id;
|
||||
__entry->span_offset = span_offset;
|
||||
__entry->page_offset = page_offset;
|
||||
),
|
||||
TP_printk("file_id=%016llx span_offset=%llu page_offset=%u", __entry->file_id, __entry->span_offset, __entry->page_offset)
|
||||
);
|
||||
|
||||
TRACE_EVENT(eggsfs_get_span_page_exit,
|
||||
TP_PROTO(u64 file_id, u64 span_offset, u32 page_offset, int err),
|
||||
TP_ARGS( file_id, span_offset, page_offset, err),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, file_id)
|
||||
__field(u64, span_offset)
|
||||
__field(u32, page_offset)
|
||||
__field(int, err)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->file_id = file_id;
|
||||
__entry->span_offset = span_offset;
|
||||
__entry->page_offset = page_offset;
|
||||
__entry->err = err;
|
||||
),
|
||||
TP_printk("file_id=%016llx span_offset=%llu page_offset=%u err=%d", __entry->file_id, __entry->span_offset, __entry->page_offset, __entry->err)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_EGGFS_H */
|
||||
|
||||
Reference in New Issue
Block a user