mirror of
https://github.com/hatchet-dev/hatchet.git
synced 2026-05-25 04:48:44 -05:00
feat(go-sdk): capture panics and send to alerter (#403)
This commit is contained in:
+15
-9
@@ -38,6 +38,8 @@ type HatchetContext interface {
|
||||
|
||||
client() client.Client
|
||||
|
||||
action() *client.Action
|
||||
|
||||
index() int
|
||||
inc()
|
||||
}
|
||||
@@ -67,7 +69,7 @@ type StepData map[string]interface{}
|
||||
|
||||
type hatchetContext struct {
|
||||
context.Context
|
||||
action *client.Action
|
||||
a *client.Action
|
||||
stepData *StepRunData
|
||||
c client.Client
|
||||
l *zerolog.Logger
|
||||
@@ -86,7 +88,7 @@ func newHatchetContext(
|
||||
) (HatchetContext, error) {
|
||||
c := &hatchetContext{
|
||||
Context: ctx,
|
||||
action: action,
|
||||
a: action,
|
||||
c: client,
|
||||
l: l,
|
||||
}
|
||||
@@ -112,6 +114,10 @@ func (h *hatchetContext) client() client.Client {
|
||||
return h.c
|
||||
}
|
||||
|
||||
func (h *hatchetContext) action() *client.Action {
|
||||
return h.a
|
||||
}
|
||||
|
||||
func (h *hatchetContext) SetContext(ctx context.Context) {
|
||||
h.Context = ctx
|
||||
}
|
||||
@@ -137,19 +143,19 @@ func (h *hatchetContext) WorkflowInput(target interface{}) error {
|
||||
}
|
||||
|
||||
func (h *hatchetContext) StepName() string {
|
||||
return h.action.StepName
|
||||
return h.a.StepName
|
||||
}
|
||||
|
||||
func (h *hatchetContext) StepRunId() string {
|
||||
return h.action.StepRunId
|
||||
return h.a.StepRunId
|
||||
}
|
||||
|
||||
func (h *hatchetContext) WorkflowRunId() string {
|
||||
return h.action.WorkflowRunId
|
||||
return h.a.WorkflowRunId
|
||||
}
|
||||
|
||||
func (h *hatchetContext) Log(message string) {
|
||||
err := h.c.Event().PutLog(h, h.action.StepRunId, message)
|
||||
err := h.c.Event().PutLog(h, h.a.StepRunId, message)
|
||||
|
||||
if err != nil {
|
||||
h.l.Err(err).Msg("could not put log")
|
||||
@@ -157,7 +163,7 @@ func (h *hatchetContext) Log(message string) {
|
||||
}
|
||||
|
||||
func (h *hatchetContext) StreamEvent(message []byte) {
|
||||
err := h.c.Event().PutStreamEvent(h, h.action.StepRunId, message)
|
||||
err := h.c.Event().PutStreamEvent(h, h.a.StepRunId, message)
|
||||
|
||||
if err != nil {
|
||||
h.l.Err(err).Msg("could not put stream event")
|
||||
@@ -236,7 +242,7 @@ func (h *hatchetContext) populateStepDataForGroupKeyRun() error {
|
||||
|
||||
inputData := map[string]interface{}{}
|
||||
|
||||
err := json.Unmarshal(h.action.ActionPayload, &inputData)
|
||||
err := json.Unmarshal(h.a.ActionPayload, &inputData)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -256,7 +262,7 @@ func (h *hatchetContext) populateStepData() error {
|
||||
|
||||
h.stepData = &StepRunData{}
|
||||
|
||||
jsonBytes := h.action.ActionPayload
|
||||
jsonBytes := h.a.ActionPayload
|
||||
|
||||
if len(jsonBytes) == 0 {
|
||||
jsonBytes = []byte("{}")
|
||||
|
||||
@@ -2,6 +2,7 @@ package worker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime/debug"
|
||||
"sync"
|
||||
)
|
||||
|
||||
@@ -40,14 +41,11 @@ func run(ctx HatchetContext, fs []MiddlewareFunc, next func(HatchetContext) erro
|
||||
})
|
||||
}
|
||||
|
||||
func panicMiddleware(ctx HatchetContext, next func(HatchetContext) error) error {
|
||||
func (w *Worker) panicMiddleware(ctx HatchetContext, next func(HatchetContext) error) error {
|
||||
var err error
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(1)
|
||||
|
||||
go func() {
|
||||
func() {
|
||||
defer func() {
|
||||
defer wg.Done()
|
||||
if r := recover(); r != nil {
|
||||
var ok bool
|
||||
err, ok = r.(error)
|
||||
@@ -56,6 +54,12 @@ func panicMiddleware(ctx HatchetContext, next func(HatchetContext) error) error
|
||||
err = fmt.Errorf("%v", r)
|
||||
}
|
||||
|
||||
innerErr := w.sendFailureEvent(ctx, fmt.Errorf("recovered from panic: %w. Stack trace:\n%s", err, string(debug.Stack())))
|
||||
|
||||
if innerErr != nil {
|
||||
w.l.Error().Err(innerErr).Msg("could not send failure event")
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
}()
|
||||
@@ -63,7 +67,5 @@ func panicMiddleware(ctx HatchetContext, next func(HatchetContext) error) error
|
||||
err = next(ctx)
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -56,6 +56,10 @@ func (c *testHatchetContext) StreamEvent(message []byte) {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (c *testHatchetContext) action() *client.Action {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (c *testHatchetContext) index() int {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
+33
-24
@@ -161,8 +161,6 @@ func NewWorker(fs ...WorkerOpt) (*Worker, error) {
|
||||
|
||||
mws := newMiddlewares()
|
||||
|
||||
mws.add(panicMiddleware)
|
||||
|
||||
w := &Worker{
|
||||
client: opts.client,
|
||||
name: opts.name,
|
||||
@@ -173,6 +171,8 @@ func NewWorker(fs ...WorkerOpt) (*Worker, error) {
|
||||
maxRuns: opts.maxRuns,
|
||||
}
|
||||
|
||||
mws.add(w.panicMiddleware)
|
||||
|
||||
// register all integrations
|
||||
for _, integration := range opts.integrations {
|
||||
actions := integration.Actions()
|
||||
@@ -436,28 +436,7 @@ func (w *Worker) startStepRun(ctx context.Context, assignedAction *client.Action
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
failureEvent := w.getActionEvent(assignedAction, client.ActionEventTypeFailed)
|
||||
|
||||
w.alerter.SendAlert(context.Background(), err, map[string]interface{}{
|
||||
"actionId": assignedAction.ActionId,
|
||||
"workerId": assignedAction.WorkerId,
|
||||
"stepRunId": assignedAction.StepRunId,
|
||||
"jobName": assignedAction.JobName,
|
||||
"actionType": assignedAction.ActionType,
|
||||
})
|
||||
|
||||
failureEvent.EventPayload = err.Error()
|
||||
|
||||
_, err := w.client.Dispatcher().SendStepActionEvent(
|
||||
ctx,
|
||||
failureEvent,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not send action event: %w", err)
|
||||
}
|
||||
|
||||
return err
|
||||
return w.sendFailureEvent(ctx, err)
|
||||
}
|
||||
|
||||
// send a message that the step run completed
|
||||
@@ -601,6 +580,36 @@ func (w *Worker) getGroupKeyActionFinishedEvent(action *client.Action, output st
|
||||
return event, nil
|
||||
}
|
||||
|
||||
func (w *Worker) sendFailureEvent(ctx HatchetContext, err error) error {
|
||||
assignedAction := ctx.action()
|
||||
|
||||
failureEvent := w.getActionEvent(assignedAction, client.ActionEventTypeFailed)
|
||||
|
||||
w.alerter.SendAlert(context.Background(), err, map[string]interface{}{
|
||||
"actionId": assignedAction.ActionId,
|
||||
"workerId": assignedAction.WorkerId,
|
||||
"stepRunId": assignedAction.StepRunId,
|
||||
"jobName": assignedAction.JobName,
|
||||
"actionType": assignedAction.ActionType,
|
||||
})
|
||||
|
||||
failureEvent.EventPayload = err.Error()
|
||||
|
||||
innerCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
_, err = w.client.Dispatcher().SendStepActionEvent(
|
||||
innerCtx,
|
||||
failureEvent,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not send action event: %w", err)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func getHostName() string {
|
||||
hostName, err := os.Hostname()
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user