More alert-related fixes

This commit is contained in:
Francesco Mazzoli
2023-07-27 13:54:51 +00:00
parent 2c3c09180b
commit a01b1f036d
4 changed files with 11 additions and 4 deletions

View File

@@ -540,7 +540,7 @@ private:
int err = errno;
// Note that we get EPERM on `sendto` when nf drops packets.
if (err == EAGAIN || err == EPERM) {
_env.raiseAlert(alert, false, "we got %s/%s=%s when trying to send shard message, will wait and retry", err, translateErrno(err), safe_strerror(err));
alert = _env.raiseAlert(alert, false, "we got %s/%s=%s when trying to send shard message, will wait and retry", err, translateErrno(err), safe_strerror(err));
sleepFor(100_ms);
} else {
throw EXPLICIT_SYSCALL_EXCEPTION(err, "sendto");

View File

@@ -307,7 +307,12 @@ public:
}
if (sendto(sock, respBbuf.data, respBbuf.len(), 0, (struct sockaddr*)&clientAddr, sizeof(clientAddr)) != respBbuf.len()) {
throw SYSCALL_EXCEPTION("sendto");
// we get this when nf drops packets
if (errno != EPERM) {
throw SYSCALL_EXCEPTION("sendto");
} else {
LOG_INFO(_env, "dropping response %s to %s because of EPERM", respContainer->kind(), clientAddr);
}
}
LOG_DEBUG(_env, "sent response %s to %s", respContainer->kind(), clientAddr);
}

View File

@@ -361,7 +361,7 @@ func (l *Logger) NewNCAlert() *NCAlert {
}
func (nc *NCAlert) Alert(f string, v ...any) {
if nc.alert == nil {
if nc.alert == nil && nc.l.troll != nil {
nc.alert = nc.l.troll.NewUnbinnableAlertStatus()
}
nc.l.LogStack(1, ERROR, "nc alert "+f, v...)

View File

@@ -170,7 +170,7 @@ func (c *Client) metadataRequestInternal(
if dontWait {
log.Info("dontWait is on, we couldn't send the request due to EPERM %v, goodbye", err)
} else {
epermAlert.Alert("got possibly transient EPERM when sending to shard %v, might retry: %v", shid, err)
epermAlert.Alert("got possibly transient EPERM when sending to shard %v, might retry after waiting for %v: %v", shid, timeout, err)
time.Sleep(timeout)
attempts++
continue
@@ -178,6 +178,8 @@ func (c *Client) metadataRequestInternal(
} else {
return fmt.Errorf("couldn't send request to shard %v: %w", shid, err)
}
} else {
epermAlert.Clear()
}
if written < len(reqBytes) {
panic(fmt.Sprintf("incomplete send to shard %v -- %v bytes written instead of %v", shid, written, len(reqBytes)))