From a01b1f036d4b2218b79e2e9134bf9a12e004cd92 Mon Sep 17 00:00:00 2001 From: Francesco Mazzoli Date: Thu, 27 Jul 2023 13:54:51 +0000 Subject: [PATCH] More alert-related fixes --- cpp/cdc/CDC.cpp | 2 +- cpp/shard/Shard.cpp | 7 ++++++- go/lib/log.go | 2 +- go/lib/metadatareq.go | 4 +++- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/cpp/cdc/CDC.cpp b/cpp/cdc/CDC.cpp index 5c786c5a..7654976e 100644 --- a/cpp/cdc/CDC.cpp +++ b/cpp/cdc/CDC.cpp @@ -540,7 +540,7 @@ private: int err = errno; // Note that we get EPERM on `sendto` when nf drops packets. if (err == EAGAIN || err == EPERM) { - _env.raiseAlert(alert, false, "we got %s/%s=%s when trying to send shard message, will wait and retry", err, translateErrno(err), safe_strerror(err)); + alert = _env.raiseAlert(alert, false, "we got %s/%s=%s when trying to send shard message, will wait and retry", err, translateErrno(err), safe_strerror(err)); sleepFor(100_ms); } else { throw EXPLICIT_SYSCALL_EXCEPTION(err, "sendto"); diff --git a/cpp/shard/Shard.cpp b/cpp/shard/Shard.cpp index 347c424d..1d0fd3a9 100644 --- a/cpp/shard/Shard.cpp +++ b/cpp/shard/Shard.cpp @@ -307,7 +307,12 @@ public: } if (sendto(sock, respBbuf.data, respBbuf.len(), 0, (struct sockaddr*)&clientAddr, sizeof(clientAddr)) != respBbuf.len()) { - throw SYSCALL_EXCEPTION("sendto"); + // we get this when nf drops packets + if (errno != EPERM) { + throw SYSCALL_EXCEPTION("sendto"); + } else { + LOG_INFO(_env, "dropping response %s to %s because of EPERM", respContainer->kind(), clientAddr); + } } LOG_DEBUG(_env, "sent response %s to %s", respContainer->kind(), clientAddr); } diff --git a/go/lib/log.go b/go/lib/log.go index 3fd1c4f5..aa1a1f7b 100644 --- a/go/lib/log.go +++ b/go/lib/log.go @@ -361,7 +361,7 @@ func (l *Logger) NewNCAlert() *NCAlert { } func (nc *NCAlert) Alert(f string, v ...any) { - if nc.alert == nil { + if nc.alert == nil && nc.l.troll != nil { nc.alert = nc.l.troll.NewUnbinnableAlertStatus() } nc.l.LogStack(1, ERROR, "nc alert "+f, v...) diff --git a/go/lib/metadatareq.go b/go/lib/metadatareq.go index ad1d56a3..37f2d0af 100644 --- a/go/lib/metadatareq.go +++ b/go/lib/metadatareq.go @@ -170,7 +170,7 @@ func (c *Client) metadataRequestInternal( if dontWait { log.Info("dontWait is on, we couldn't send the request due to EPERM %v, goodbye", err) } else { - epermAlert.Alert("got possibly transient EPERM when sending to shard %v, might retry: %v", shid, err) + epermAlert.Alert("got possibly transient EPERM when sending to shard %v, might retry after waiting for %v: %v", shid, timeout, err) time.Sleep(timeout) attempts++ continue @@ -178,6 +178,8 @@ func (c *Client) metadataRequestInternal( } else { return fmt.Errorf("couldn't send request to shard %v: %w", shid, err) } + } else { + epermAlert.Clear() } if written < len(reqBytes) { panic(fmt.Sprintf("incomplete send to shard %v -- %v bytes written instead of %v", shid, written, len(reqBytes)))