Files
hatchet/examples/loadtest/cli/run.go
abelanger5 7b7fbe3668 fix: update Requeue and Reassign logic to fix performance degradation when many events are queued (#310)
Logic for requeueing and reassigning did not limit the number of step runs to requeue, so when events accumulate with no worker present it causes memory to spike along with a very high query latency on the database. This commit limits the number of step runs returned in the requeue and reassign queries, and also properly locks step run rows for these queries so only a step run in a PENDING or PENDING_ASSIGNMENT state can be requeued.

It also improves performance of the `AssignStepRunToWorker` query and ensures that `maxRuns` on workers are always respected through the introduction of a `WorkerSemaphore` model. The value gets decremented when a step run is assigned and incremented when a step run is in a final state. 

Co-authored-by: Luca Steeb <contact@luca-steeb.com>

* Update controller.go

---------

Co-authored-by: steebchen <contact@luca-steeb.com>
2024-04-01 12:33:18 -04:00

118 lines
2.3 KiB
Go

package main
import (
"context"
"fmt"
"sync"
"time"
"github.com/hatchet-dev/hatchet/pkg/client"
"github.com/hatchet-dev/hatchet/pkg/worker"
)
type stepOneOutput struct {
Message string `json:"message"`
}
func getConcurrencyKey(ctx worker.HatchetContext) (string, error) {
return "my-key", nil
}
func run(ctx context.Context, delay time.Duration, executions chan<- time.Duration, concurrency int) (int64, int64) {
c, err := client.New(
client.WithLogLevel("warn"),
)
if err != nil {
panic(err)
}
w, err := worker.NewWorker(
worker.WithClient(
c,
),
worker.WithLogLevel("warn"),
worker.WithMaxRuns(200),
)
if err != nil {
panic(err)
}
mx := sync.Mutex{}
var count int64
var uniques int64
var executed []int64
var concurrencyOpts *worker.WorkflowConcurrency
if concurrency > 0 {
concurrencyOpts = worker.Concurrency(getConcurrencyKey).MaxRuns(int32(concurrency))
}
err = w.On(
worker.Event("load-test:event"),
&worker.WorkflowJob{
Name: "load-test",
Description: "Load testing",
Concurrency: concurrencyOpts,
Steps: []*worker.WorkflowStep{
worker.Fn(func(ctx worker.HatchetContext) (result *stepOneOutput, err error) {
var input Event
err = ctx.WorkflowInput(&input)
if err != nil {
return nil, err
}
took := time.Since(input.CreatedAt)
l.Info().Msgf("executing %d took %s", input.ID, took)
mx.Lock()
executions <- took
// detect duplicate in executed slice
var duplicate bool
for i := 0; i < len(executed)-1; i++ {
if executed[i] == input.ID {
duplicate = true
break
}
}
if duplicate {
l.Warn().Str("step-run-id", ctx.StepRunId()).Msgf("duplicate %d", input.ID)
}
if !duplicate {
uniques++
}
count++
executed = append(executed, input.ID)
mx.Unlock()
time.Sleep(delay)
return &stepOneOutput{
Message: "This ran at: " + time.Now().Format(time.RFC3339Nano),
}, nil
}).SetName("step-one"),
},
},
)
if err != nil {
panic(err)
}
cleanup, err := w.Start()
if err != nil {
panic(fmt.Errorf("error starting worker: %w", err))
}
<-ctx.Done()
if err := cleanup(); err != nil {
panic(fmt.Errorf("error cleaning up: %w", err))
}
mx.Lock()
defer mx.Unlock()
return count, uniques
}