mirror of
https://github.com/hatchet-dev/hatchet.git
synced 2026-01-07 17:29:39 -06:00
Logic for requeueing and reassigning did not limit the number of step runs to requeue, so when events accumulate with no worker present it causes memory to spike along with a very high query latency on the database. This commit limits the number of step runs returned in the requeue and reassign queries, and also properly locks step run rows for these queries so only a step run in a PENDING or PENDING_ASSIGNMENT state can be requeued. It also improves performance of the `AssignStepRunToWorker` query and ensures that `maxRuns` on workers are always respected through the introduction of a `WorkerSemaphore` model. The value gets decremented when a step run is assigned and incremented when a step run is in a final state. Co-authored-by: Luca Steeb <contact@luca-steeb.com> * Update controller.go --------- Co-authored-by: steebchen <contact@luca-steeb.com>
118 lines
2.3 KiB
Go
118 lines
2.3 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/hatchet-dev/hatchet/pkg/client"
|
|
"github.com/hatchet-dev/hatchet/pkg/worker"
|
|
)
|
|
|
|
type stepOneOutput struct {
|
|
Message string `json:"message"`
|
|
}
|
|
|
|
func getConcurrencyKey(ctx worker.HatchetContext) (string, error) {
|
|
return "my-key", nil
|
|
}
|
|
|
|
func run(ctx context.Context, delay time.Duration, executions chan<- time.Duration, concurrency int) (int64, int64) {
|
|
c, err := client.New(
|
|
client.WithLogLevel("warn"),
|
|
)
|
|
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
w, err := worker.NewWorker(
|
|
worker.WithClient(
|
|
c,
|
|
),
|
|
worker.WithLogLevel("warn"),
|
|
worker.WithMaxRuns(200),
|
|
)
|
|
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
mx := sync.Mutex{}
|
|
var count int64
|
|
var uniques int64
|
|
var executed []int64
|
|
|
|
var concurrencyOpts *worker.WorkflowConcurrency
|
|
if concurrency > 0 {
|
|
concurrencyOpts = worker.Concurrency(getConcurrencyKey).MaxRuns(int32(concurrency))
|
|
}
|
|
|
|
err = w.On(
|
|
worker.Event("load-test:event"),
|
|
&worker.WorkflowJob{
|
|
Name: "load-test",
|
|
Description: "Load testing",
|
|
Concurrency: concurrencyOpts,
|
|
Steps: []*worker.WorkflowStep{
|
|
worker.Fn(func(ctx worker.HatchetContext) (result *stepOneOutput, err error) {
|
|
var input Event
|
|
err = ctx.WorkflowInput(&input)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
took := time.Since(input.CreatedAt)
|
|
l.Info().Msgf("executing %d took %s", input.ID, took)
|
|
|
|
mx.Lock()
|
|
executions <- took
|
|
// detect duplicate in executed slice
|
|
var duplicate bool
|
|
for i := 0; i < len(executed)-1; i++ {
|
|
if executed[i] == input.ID {
|
|
duplicate = true
|
|
break
|
|
}
|
|
}
|
|
if duplicate {
|
|
l.Warn().Str("step-run-id", ctx.StepRunId()).Msgf("duplicate %d", input.ID)
|
|
}
|
|
if !duplicate {
|
|
uniques++
|
|
}
|
|
count++
|
|
executed = append(executed, input.ID)
|
|
mx.Unlock()
|
|
|
|
time.Sleep(delay)
|
|
|
|
return &stepOneOutput{
|
|
Message: "This ran at: " + time.Now().Format(time.RFC3339Nano),
|
|
}, nil
|
|
}).SetName("step-one"),
|
|
},
|
|
},
|
|
)
|
|
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
cleanup, err := w.Start()
|
|
if err != nil {
|
|
panic(fmt.Errorf("error starting worker: %w", err))
|
|
}
|
|
|
|
<-ctx.Done()
|
|
|
|
if err := cleanup(); err != nil {
|
|
panic(fmt.Errorf("error cleaning up: %w", err))
|
|
}
|
|
|
|
mx.Lock()
|
|
defer mx.Unlock()
|
|
return count, uniques
|
|
}
|