feat(go-sdk): capture panics and send to alerter (#403)

This commit is contained in:
abelanger5
2024-04-22 11:35:40 +02:00
committed by GitHub
parent ff90533458
commit 671a7e1474
4 changed files with 61 additions and 40 deletions
+15 -9
View File
@@ -38,6 +38,8 @@ type HatchetContext interface {
client() client.Client
action() *client.Action
index() int
inc()
}
@@ -67,7 +69,7 @@ type StepData map[string]interface{}
type hatchetContext struct {
context.Context
action *client.Action
a *client.Action
stepData *StepRunData
c client.Client
l *zerolog.Logger
@@ -86,7 +88,7 @@ func newHatchetContext(
) (HatchetContext, error) {
c := &hatchetContext{
Context: ctx,
action: action,
a: action,
c: client,
l: l,
}
@@ -112,6 +114,10 @@ func (h *hatchetContext) client() client.Client {
return h.c
}
func (h *hatchetContext) action() *client.Action {
return h.a
}
func (h *hatchetContext) SetContext(ctx context.Context) {
h.Context = ctx
}
@@ -137,19 +143,19 @@ func (h *hatchetContext) WorkflowInput(target interface{}) error {
}
func (h *hatchetContext) StepName() string {
return h.action.StepName
return h.a.StepName
}
func (h *hatchetContext) StepRunId() string {
return h.action.StepRunId
return h.a.StepRunId
}
func (h *hatchetContext) WorkflowRunId() string {
return h.action.WorkflowRunId
return h.a.WorkflowRunId
}
func (h *hatchetContext) Log(message string) {
err := h.c.Event().PutLog(h, h.action.StepRunId, message)
err := h.c.Event().PutLog(h, h.a.StepRunId, message)
if err != nil {
h.l.Err(err).Msg("could not put log")
@@ -157,7 +163,7 @@ func (h *hatchetContext) Log(message string) {
}
func (h *hatchetContext) StreamEvent(message []byte) {
err := h.c.Event().PutStreamEvent(h, h.action.StepRunId, message)
err := h.c.Event().PutStreamEvent(h, h.a.StepRunId, message)
if err != nil {
h.l.Err(err).Msg("could not put stream event")
@@ -236,7 +242,7 @@ func (h *hatchetContext) populateStepDataForGroupKeyRun() error {
inputData := map[string]interface{}{}
err := json.Unmarshal(h.action.ActionPayload, &inputData)
err := json.Unmarshal(h.a.ActionPayload, &inputData)
if err != nil {
return err
@@ -256,7 +262,7 @@ func (h *hatchetContext) populateStepData() error {
h.stepData = &StepRunData{}
jsonBytes := h.action.ActionPayload
jsonBytes := h.a.ActionPayload
if len(jsonBytes) == 0 {
jsonBytes = []byte("{}")
+9 -7
View File
@@ -2,6 +2,7 @@ package worker
import (
"fmt"
"runtime/debug"
"sync"
)
@@ -40,14 +41,11 @@ func run(ctx HatchetContext, fs []MiddlewareFunc, next func(HatchetContext) erro
})
}
func panicMiddleware(ctx HatchetContext, next func(HatchetContext) error) error {
func (w *Worker) panicMiddleware(ctx HatchetContext, next func(HatchetContext) error) error {
var err error
wg := sync.WaitGroup{}
wg.Add(1)
go func() {
func() {
defer func() {
defer wg.Done()
if r := recover(); r != nil {
var ok bool
err, ok = r.(error)
@@ -56,6 +54,12 @@ func panicMiddleware(ctx HatchetContext, next func(HatchetContext) error) error
err = fmt.Errorf("%v", r)
}
innerErr := w.sendFailureEvent(ctx, fmt.Errorf("recovered from panic: %w. Stack trace:\n%s", err, string(debug.Stack())))
if innerErr != nil {
w.l.Error().Err(innerErr).Msg("could not send failure event")
}
return
}
}()
@@ -63,7 +67,5 @@ func panicMiddleware(ctx HatchetContext, next func(HatchetContext) error) error
err = next(ctx)
}()
wg.Wait()
return err
}
+4
View File
@@ -56,6 +56,10 @@ func (c *testHatchetContext) StreamEvent(message []byte) {
panic("not implemented")
}
func (c *testHatchetContext) action() *client.Action {
panic("not implemented")
}
func (c *testHatchetContext) index() int {
panic("not implemented")
}
+33 -24
View File
@@ -161,8 +161,6 @@ func NewWorker(fs ...WorkerOpt) (*Worker, error) {
mws := newMiddlewares()
mws.add(panicMiddleware)
w := &Worker{
client: opts.client,
name: opts.name,
@@ -173,6 +171,8 @@ func NewWorker(fs ...WorkerOpt) (*Worker, error) {
maxRuns: opts.maxRuns,
}
mws.add(w.panicMiddleware)
// register all integrations
for _, integration := range opts.integrations {
actions := integration.Actions()
@@ -436,28 +436,7 @@ func (w *Worker) startStepRun(ctx context.Context, assignedAction *client.Action
}
if err != nil {
failureEvent := w.getActionEvent(assignedAction, client.ActionEventTypeFailed)
w.alerter.SendAlert(context.Background(), err, map[string]interface{}{
"actionId": assignedAction.ActionId,
"workerId": assignedAction.WorkerId,
"stepRunId": assignedAction.StepRunId,
"jobName": assignedAction.JobName,
"actionType": assignedAction.ActionType,
})
failureEvent.EventPayload = err.Error()
_, err := w.client.Dispatcher().SendStepActionEvent(
ctx,
failureEvent,
)
if err != nil {
return fmt.Errorf("could not send action event: %w", err)
}
return err
return w.sendFailureEvent(ctx, err)
}
// send a message that the step run completed
@@ -601,6 +580,36 @@ func (w *Worker) getGroupKeyActionFinishedEvent(action *client.Action, output st
return event, nil
}
func (w *Worker) sendFailureEvent(ctx HatchetContext, err error) error {
assignedAction := ctx.action()
failureEvent := w.getActionEvent(assignedAction, client.ActionEventTypeFailed)
w.alerter.SendAlert(context.Background(), err, map[string]interface{}{
"actionId": assignedAction.ActionId,
"workerId": assignedAction.WorkerId,
"stepRunId": assignedAction.StepRunId,
"jobName": assignedAction.JobName,
"actionType": assignedAction.ActionType,
})
failureEvent.EventPayload = err.Error()
innerCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
_, err = w.client.Dispatcher().SendStepActionEvent(
innerCtx,
failureEvent,
)
if err != nil {
return fmt.Errorf("could not send action event: %w", err)
}
return err
}
func getHostName() string {
hostName, err := os.Hostname()
if err != nil {