feat: deduplicated enqueue (#735)

* wip

* wip: functional query

* feat: expose affinity config

* feat: add weight to proto

* feat: upsert affinity state on worker start

* fix: linting

* feat: add upsert proto

* feat: upsert handler

* feat: revise model

* fix: labels

* feat: functional desired worker

* wip: ui

* feat: add state to step run events

* fix: filter empty keys

* fix: labels as badges

* feat: empty state and descriptive text

* chore: add todo

* chore: whitespace

* chore: cleanup

* chore: cleanup

* chore: fix hash

* chore: squash migrations

* fix: fair worker assignment

* fix: remaining slots on valid desired workers

* wip: sticky

* fix: count slots

* chore: rm log line

* feat: expose sticky config

* wip: sticky dag

* feat: expose desired worker id to trigger

* feat: trigger on desired worker

* feat: typescript docs

* feat: sticky python

* feat: py sticky children

* wip: py affinity

* serverless note

* feat: complete python examples

* linting

* feat: deduplicated enqueue

* fix: address changes from PR review

* chore: generate

---------

Co-authored-by: gabriel ruttner <gabriel.ruttner@gmail.com>
This commit is contained in:
abelanger5
2024-07-26 09:47:46 -07:00
committed by GitHub
parent 2711fb84cb
commit 1ea4dfc5de
14 changed files with 5988 additions and 4 deletions

View File

@@ -55,7 +55,7 @@ if [ $? -eq 0 ] && [ -n "$MIGRATION_NAME" ]; then
--baseline "$MIGRATION_NAME" \
--dir "file://sql/migrations"
else
echo "No prisma migration found. Applying all migrations..."
echo "No prisma migration found. Applying migrations via atlas..."
atlas migrate apply \
--url "$DATABASE_URL" \

View File

@@ -139,6 +139,15 @@ func (a *AdminServiceImpl) TriggerWorkflow(ctx context.Context, req *contracts.T
workflowRunId, err := a.repo.WorkflowRun().CreateNewWorkflowRun(ctx, tenantId, createOpts)
dedupeTarget := repository.ErrDedupeValueExists{}
if errors.As(err, &dedupeTarget) {
return nil, status.Error(
codes.AlreadyExists,
fmt.Sprintf("workflow run with deduplication value %s already exists", dedupeTarget.DedupeValue),
)
}
if err == metered.ErrResourceExhausted {
return nil, status.Errorf(codes.ResourceExhausted, "resource exhausted: workflow run limit exceeded for tenant")
}

File diff suppressed because it is too large Load Diff

View File

@@ -1208,6 +1208,16 @@ type WorkflowRun struct {
Duration pgtype.Int4 `json:"duration"`
}
type WorkflowRunDedupe struct {
ID int64 `json:"id"`
CreatedAt pgtype.Timestamp `json:"createdAt"`
UpdatedAt pgtype.Timestamp `json:"updatedAt"`
TenantId pgtype.UUID `json:"tenantId"`
WorkflowId pgtype.UUID `json:"workflowId"`
WorkflowRunId pgtype.UUID `json:"workflowRunId"`
Value string `json:"value"`
}
type WorkflowRunStickyState struct {
ID int64 `json:"id"`
CreatedAt pgtype.Timestamp `json:"createdAt"`

View File

@@ -679,6 +679,17 @@ CREATE TABLE "WorkflowRun" (
CONSTRAINT "WorkflowRun_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "WorkflowRunDedupe" (
"id" BIGSERIAL NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"tenantId" UUID NOT NULL,
"workflowId" UUID NOT NULL,
"workflowRunId" UUID NOT NULL,
"value" TEXT NOT NULL
);
-- CreateTable
CREATE TABLE "WorkflowRunStickyState" (
"id" BIGSERIAL NOT NULL,
@@ -1098,6 +1109,15 @@ CREATE INDEX "WorkflowRun_tenantId_idx" ON "WorkflowRun"("tenantId" ASC);
-- CreateIndex
CREATE INDEX "WorkflowRun_workflowVersionId_idx" ON "WorkflowRun"("workflowVersionId" ASC);
-- CreateIndex
CREATE UNIQUE INDEX "WorkflowRunDedupe_id_key" ON "WorkflowRunDedupe"("id" ASC);
-- CreateIndex
CREATE INDEX "WorkflowRunDedupe_tenantId_value_idx" ON "WorkflowRunDedupe"("tenantId" ASC, "value" ASC);
-- CreateIndex
CREATE UNIQUE INDEX "WorkflowRunDedupe_tenantId_workflowId_value_key" ON "WorkflowRunDedupe"("tenantId" ASC, "workflowId" ASC, "value" ASC);
-- CreateIndex
CREATE UNIQUE INDEX "WorkflowRunStickyState_workflowRunId_key" ON "WorkflowRunStickyState"("workflowRunId" ASC);
@@ -1392,6 +1412,9 @@ ALTER TABLE "WorkflowRun" ADD CONSTRAINT "WorkflowRun_tenantId_fkey" FOREIGN KEY
-- AddForeignKey
ALTER TABLE "WorkflowRun" ADD CONSTRAINT "WorkflowRun_workflowVersionId_fkey" FOREIGN KEY ("workflowVersionId") REFERENCES "WorkflowVersion"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "WorkflowRunDedupe" ADD CONSTRAINT "WorkflowRunDedupe_tenantId_fkey" FOREIGN KEY ("tenantId") REFERENCES "Tenant"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "WorkflowRunStickyState" ADD CONSTRAINT "WorkflowRunStickyState_workflowRunId_fkey" FOREIGN KEY ("workflowRunId") REFERENCES "WorkflowRun"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -437,6 +437,28 @@ INSERT INTO "WorkflowRun" (
@additionalMetadata::jsonb
) RETURNING *;
-- name: CreateWorkflowRunDedupe :one
WITH workflow_id AS (
SELECT w."id" FROM "Workflow" w
JOIN "WorkflowVersion" wv ON wv."workflowId" = w."id"
WHERE wv."id" = @workflowVersionId::uuid
)
INSERT INTO "WorkflowRunDedupe" (
"createdAt",
"updatedAt",
"tenantId",
"workflowId",
"workflowRunId",
"value"
) VALUES (
CURRENT_TIMESTAMP,
CURRENT_TIMESTAMP,
@tenantId::uuid,
(SELECT "id" FROM workflow_id),
@workflowRunId::uuid,
sqlc.narg('value')::text
) RETURNING *;
-- name: CreateWorkflowRunStickyState :one
WITH workflow_version AS (
SELECT "sticky"

View File

@@ -445,6 +445,56 @@ func (q *Queries) CreateWorkflowRun(ctx context.Context, db DBTX, arg CreateWork
return &i, err
}
const createWorkflowRunDedupe = `-- name: CreateWorkflowRunDedupe :one
WITH workflow_id AS (
SELECT w."id" FROM "Workflow" w
JOIN "WorkflowVersion" wv ON wv."workflowId" = w."id"
WHERE wv."id" = $4::uuid
)
INSERT INTO "WorkflowRunDedupe" (
"createdAt",
"updatedAt",
"tenantId",
"workflowId",
"workflowRunId",
"value"
) VALUES (
CURRENT_TIMESTAMP,
CURRENT_TIMESTAMP,
$1::uuid,
(SELECT "id" FROM workflow_id),
$2::uuid,
$3::text
) RETURNING id, "createdAt", "updatedAt", "tenantId", "workflowId", "workflowRunId", value
`
type CreateWorkflowRunDedupeParams struct {
Tenantid pgtype.UUID `json:"tenantid"`
Workflowrunid pgtype.UUID `json:"workflowrunid"`
Value pgtype.Text `json:"value"`
Workflowversionid pgtype.UUID `json:"workflowversionid"`
}
func (q *Queries) CreateWorkflowRunDedupe(ctx context.Context, db DBTX, arg CreateWorkflowRunDedupeParams) (*WorkflowRunDedupe, error) {
row := db.QueryRow(ctx, createWorkflowRunDedupe,
arg.Tenantid,
arg.Workflowrunid,
arg.Value,
arg.Workflowversionid,
)
var i WorkflowRunDedupe
err := row.Scan(
&i.ID,
&i.CreatedAt,
&i.UpdatedAt,
&i.TenantId,
&i.WorkflowId,
&i.WorkflowRunId,
&i.Value,
)
return &i, err
}
const createWorkflowRunStickyState = `-- name: CreateWorkflowRunStickyState :one
WITH workflow_version AS (
SELECT "sticky"

View File

@@ -5,6 +5,7 @@ import (
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"github.com/google/uuid"
@@ -678,6 +679,43 @@ func createNewWorkflowRun(ctx context.Context, pool *pgxpool.Pool, queries *dbsq
return nil, err
}
createParams.Additionalmetadata = additionalMetadataBytes
// if additional metadata contains a "dedupe" key, use it as the dedupe value
if dedupeValue, ok := opts.AdditionalMetadata["dedupe"]; ok {
if dedupeStr, ok := dedupeValue.(string); ok {
opts.DedupeValue = &dedupeStr
}
if dedupeInt, ok := dedupeValue.(int); ok {
dedupeStr := fmt.Sprintf("%d", dedupeInt)
opts.DedupeValue = &dedupeStr
}
}
}
// create the dedupe value
if opts.DedupeValue != nil {
_, err = queries.CreateWorkflowRunDedupe(
tx1Ctx,
tx,
dbsqlc.CreateWorkflowRunDedupeParams{
Tenantid: pgTenantId,
Workflowversionid: sqlchelpers.UUIDFromStr(opts.WorkflowVersionId),
Value: sqlchelpers.TextFromStr(*opts.DedupeValue),
Workflowrunid: sqlchelpers.UUIDFromStr(workflowRunId),
},
)
if err != nil {
// if this is a unique violation, return stable error
if isUniqueViolationOnDedupe(err) {
return nil, repository.ErrDedupeValueExists{
DedupeValue: *opts.DedupeValue,
}
}
return nil, err
}
}
// create a workflow
@@ -843,6 +881,14 @@ func createNewWorkflowRun(ctx context.Context, pool *pgxpool.Pool, queries *dbsq
err = tx.Commit(tx1Ctx)
if err != nil {
// check unique violation again on commit, to account for inserts which were uncommitted
// at the time of the first check
if isUniqueViolationOnDedupe(err) {
return nil, repository.ErrDedupeValueExists{
DedupeValue: *opts.DedupeValue,
}
}
return nil, err
}
@@ -886,3 +932,12 @@ func defaultWorkflowRunPopulator() []db.WorkflowRunRelationWith {
),
}
}
func isUniqueViolationOnDedupe(err error) bool {
if err == nil {
return false
}
return strings.Contains(err.Error(), "WorkflowRunDedupe_tenantId_workflowId_value_key") &&
strings.Contains(err.Error(), "SQLSTATE 23505")
}

View File

@@ -54,6 +54,9 @@ type CreateWorkflowRunOpts struct {
// (optional) the desired worker id for sticky state
DesiredWorkerId *string `validate:"omitempty,uuid"`
// (optional) the deduplication value for the workflow run
DedupeValue *string `validate:"omitempty"`
}
type CreateGroupKeyRunOpts struct {
@@ -362,7 +365,17 @@ type WorkflowRunAPIRepository interface {
GetWorkflowRunById(tenantId, runId string) (*db.WorkflowRunModel, error)
}
var ErrWorkflowRunNotFound = fmt.Errorf("workflow run not found")
var (
ErrWorkflowRunNotFound = fmt.Errorf("workflow run not found")
)
type ErrDedupeValueExists struct {
DedupeValue string
}
func (e ErrDedupeValueExists) Error() string {
return fmt.Sprintf("workflow run with dedupe value %s already exists", e.DedupeValue)
}
type WorkflowRunEngineRepository interface {
RegisterCreateCallback(callback Callback[*dbsqlc.WorkflowRun])

View File

@@ -36,6 +36,17 @@ CREATE TABLE "WorkflowRunStickyState" (
CONSTRAINT "WorkflowRunStickyState_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "WorkflowRunDedupe" (
"id" BIGSERIAL NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"tenantId" UUID NOT NULL,
"workflowId" UUID NOT NULL,
"workflowRunId" UUID NOT NULL,
"value" TEXT NOT NULL
);
-- CreateTable
CREATE TABLE "WorkerLabel" (
"id" BIGSERIAL NOT NULL,
@@ -58,6 +69,15 @@ CREATE UNIQUE INDEX "StepDesiredWorkerLabel_stepId_key_key" ON "StepDesiredWorke
-- CreateIndex
CREATE UNIQUE INDEX "WorkflowRunStickyState_workflowRunId_key" ON "WorkflowRunStickyState"("workflowRunId");
-- CreateIndex
CREATE UNIQUE INDEX "WorkflowRunDedupe_id_key" ON "WorkflowRunDedupe"("id");
-- CreateIndex
CREATE INDEX "WorkflowRunDedupe_tenantId_value_idx" ON "WorkflowRunDedupe"("tenantId", "value");
-- CreateIndex
CREATE UNIQUE INDEX "WorkflowRunDedupe_tenantId_workflowId_value_key" ON "WorkflowRunDedupe"("tenantId", "workflowId", "value");
-- CreateIndex
CREATE INDEX "WorkerLabel_workerId_idx" ON "WorkerLabel"("workerId");
@@ -70,5 +90,8 @@ ALTER TABLE "StepDesiredWorkerLabel" ADD CONSTRAINT "StepDesiredWorkerLabel_step
-- AddForeignKey
ALTER TABLE "WorkflowRunStickyState" ADD CONSTRAINT "WorkflowRunStickyState_workflowRunId_fkey" FOREIGN KEY ("workflowRunId") REFERENCES "WorkflowRun"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "WorkflowRunDedupe" ADD CONSTRAINT "WorkflowRunDedupe_tenantId_fkey" FOREIGN KEY ("tenantId") REFERENCES "Tenant"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "WorkerLabel" ADD CONSTRAINT "WorkerLabel_workerId_fkey" FOREIGN KEY ("workerId") REFERENCES "Worker"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -206,6 +206,7 @@ model Tenant {
limits TenantResourceLimit[]
limitAlerts TenantResourceLimitAlert[]
webhookWorkers WebhookWorker[]
dedupes WorkflowRunDedupe[]
@@index([controllerPartitionId])
@@index([workerPartitionId])
@@ -928,6 +929,29 @@ model WorkflowRun {
@@index([deletedAt])
}
model WorkflowRunDedupe {
id BigInt @unique @default(autoincrement()) @db.BigInt
createdAt DateTime @default(now())
updatedAt DateTime @default(now()) @updatedAt
// the parent tenant
tenant Tenant @relation(fields: [tenantId], references: [id], onDelete: Cascade, onUpdate: Cascade)
tenantId String @db.Uuid
// the parent workflow
workflowId String @db.Uuid
// the workflow run id which used this dedupe value
workflowRunId String @db.Uuid
// the dedupe value
value String
// DO NOT REMOVE - this uniqueness constraint is cased on in code
@@unique([tenantId, workflowId, value])
@@index([tenantId, value])
}
model GetGroupKeyRun {
// base fields
id String @id @unique @default(uuid()) @db.Uuid

View File

@@ -16,6 +16,14 @@ CREATE TABLE "WorkerLabel" ("id" bigserial NOT NULL, "createdAt" timestamp(3) NO
CREATE INDEX "WorkerLabel_workerId_idx" ON "WorkerLabel" ("workerId");
-- Create index "WorkerLabel_workerId_key_key" to table: "WorkerLabel"
CREATE UNIQUE INDEX "WorkerLabel_workerId_key_key" ON "WorkerLabel" ("workerId", "key");
-- Create "WorkflowRunDedupe" table
CREATE TABLE "WorkflowRunDedupe" ("id" bigserial NOT NULL, "createdAt" timestamp(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, "updatedAt" timestamp(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, "tenantId" uuid NOT NULL, "workflowId" uuid NOT NULL, "workflowRunId" uuid NOT NULL, "value" text NOT NULL, CONSTRAINT "WorkflowRunDedupe_tenantId_fkey" FOREIGN KEY ("tenantId") REFERENCES "Tenant" ("id") ON UPDATE CASCADE ON DELETE CASCADE);
-- Create index "WorkflowRunDedupe_id_key" to table: "WorkflowRunDedupe"
CREATE UNIQUE INDEX "WorkflowRunDedupe_id_key" ON "WorkflowRunDedupe" ("id");
-- Create index "WorkflowRunDedupe_tenantId_value_idx" to table: "WorkflowRunDedupe"
CREATE INDEX "WorkflowRunDedupe_tenantId_value_idx" ON "WorkflowRunDedupe" ("tenantId", "value");
-- Create index "WorkflowRunDedupe_tenantId_workflowId_value_key" to table: "WorkflowRunDedupe"
CREATE UNIQUE INDEX "WorkflowRunDedupe_tenantId_workflowId_value_key" ON "WorkflowRunDedupe" ("tenantId", "workflowId", "value");
-- Create "WorkflowRunStickyState" table
CREATE TABLE "WorkflowRunStickyState" ("id" bigserial NOT NULL, "createdAt" timestamp(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, "updatedAt" timestamp(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, "tenantId" uuid NOT NULL, "workflowRunId" uuid NOT NULL, "desiredWorkerId" uuid NULL, "strategy" "StickyStrategy" NOT NULL, PRIMARY KEY ("id"), CONSTRAINT "WorkflowRunStickyState_workflowRunId_fkey" FOREIGN KEY ("workflowRunId") REFERENCES "WorkflowRun" ("id") ON UPDATE CASCADE ON DELETE CASCADE);
-- Create index "WorkflowRunStickyState_workflowRunId_key" to table: "WorkflowRunStickyState"

View File

@@ -1,4 +1,4 @@
h1:kXmOiopDoafOUuWrjoJBQQrfPe2okuepiKk2+zCEKZw=
h1:Na3jSewCytIQSUJl0NhcVDf9deeZ4K9bOXp+7IGMcHc=
20240115180414_init.sql h1:Ef3ZyjAHkmJPdGF/dEWCahbwgcg6uGJKnDxW2JCRi2k=
20240122014727_v0_6_0.sql h1:o/LdlteAeFgoHJ3e/M4Xnghqt9826IE/Y/h0q95Acuo=
20240126235456_v0_7_0.sql h1:KiVzt/hXgQ6esbdC6OMJOOWuYEXmy1yeCpmsVAHTFKs=
@@ -41,4 +41,4 @@ h1:kXmOiopDoafOUuWrjoJBQQrfPe2okuepiKk2+zCEKZw=
20240715154334_v0.37.0.sql h1:/lu8OPyH2rHPJRk3wL+LBsHp698YMyh0wLz+bRu7qXU=
20240716125857_v0.38.0.sql h1:BFa19pXab9GHd0xkSqLRT3eNer9QKoVf7SpR6O03l+Y=
20240716143349_v0.39.0.sql h1:K0m6v5XamYBYJgBKpm69Jh3QOOSXKTCSNoU9hR3sLM4=
20240719180704_v0.40.0.sql h1:yXBrJvtTiYonzAEOxfsskFBjYUokD4zkv4UkIu0ZRXU=
20240726160629_v0.40.0.sql h1:XmnKVQ/AMUTPnL1SZPwLhmY0KR4sT9B6+uhVbElYx34=

View File

@@ -679,6 +679,17 @@ CREATE TABLE "WorkflowRun" (
CONSTRAINT "WorkflowRun_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "WorkflowRunDedupe" (
"id" BIGSERIAL NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"tenantId" UUID NOT NULL,
"workflowId" UUID NOT NULL,
"workflowRunId" UUID NOT NULL,
"value" TEXT NOT NULL
);
-- CreateTable
CREATE TABLE "WorkflowRunStickyState" (
"id" BIGSERIAL NOT NULL,
@@ -1098,6 +1109,15 @@ CREATE INDEX "WorkflowRun_tenantId_idx" ON "WorkflowRun"("tenantId" ASC);
-- CreateIndex
CREATE INDEX "WorkflowRun_workflowVersionId_idx" ON "WorkflowRun"("workflowVersionId" ASC);
-- CreateIndex
CREATE UNIQUE INDEX "WorkflowRunDedupe_id_key" ON "WorkflowRunDedupe"("id" ASC);
-- CreateIndex
CREATE INDEX "WorkflowRunDedupe_tenantId_value_idx" ON "WorkflowRunDedupe"("tenantId" ASC, "value" ASC);
-- CreateIndex
CREATE UNIQUE INDEX "WorkflowRunDedupe_tenantId_workflowId_value_key" ON "WorkflowRunDedupe"("tenantId" ASC, "workflowId" ASC, "value" ASC);
-- CreateIndex
CREATE UNIQUE INDEX "WorkflowRunStickyState_workflowRunId_key" ON "WorkflowRunStickyState"("workflowRunId" ASC);
@@ -1392,6 +1412,9 @@ ALTER TABLE "WorkflowRun" ADD CONSTRAINT "WorkflowRun_tenantId_fkey" FOREIGN KEY
-- AddForeignKey
ALTER TABLE "WorkflowRun" ADD CONSTRAINT "WorkflowRun_workflowVersionId_fkey" FOREIGN KEY ("workflowVersionId") REFERENCES "WorkflowVersion"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "WorkflowRunDedupe" ADD CONSTRAINT "WorkflowRunDedupe_tenantId_fkey" FOREIGN KEY ("tenantId") REFERENCES "Tenant"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "WorkflowRunStickyState" ADD CONSTRAINT "WorkflowRunStickyState_workflowRunId_fkey" FOREIGN KEY ("workflowRunId") REFERENCES "WorkflowRun"("id") ON DELETE CASCADE ON UPDATE CASCADE;