mirror of
https://github.com/hatchet-dev/hatchet.git
synced 2026-04-21 17:28:59 -05:00
Fix: Payload fallbacks, WAL conflict handling, WAL eviction (#2372)
* fix: improve error handling * fix: add default operation * fix: dont write operation * fix: refactor offload to always evict * fix: err check * fix: err
This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
-- +goose Up
|
||||
-- +goose StatementBegin
|
||||
ALTER TABLE v1_payload_wal ALTER COLUMN operation SET DEFAULT 'CREATE';
|
||||
-- +goose StatementEnd
|
||||
|
||||
-- +goose Down
|
||||
-- +goose StatementBegin
|
||||
ALTER TABLE v1_payload_wal ALTER COLUMN operation DROP DEFAULT;
|
||||
-- +goose StatementEnd
|
||||
@@ -254,7 +254,7 @@ func (d *DispatcherImpl) handleTaskBulkAssignedTask(ctx context.Context, msg *ms
|
||||
TenantId: task.TenantID,
|
||||
}]
|
||||
|
||||
if input == nil || !ok {
|
||||
if !ok {
|
||||
// If the input wasn't found in the payload store,
|
||||
// fall back to the input stored on the task itself.
|
||||
d.l.Error().Msgf("handleTaskBulkAssignedTask-1: task %s has empty payload, falling back to input", task.ExternalID.String())
|
||||
@@ -311,7 +311,7 @@ func (d *DispatcherImpl) handleTaskBulkAssignedTask(ctx context.Context, msg *ms
|
||||
TenantId: task.TenantID,
|
||||
}]
|
||||
|
||||
if input == nil || !ok {
|
||||
if !ok {
|
||||
// If the input wasn't found in the payload store,
|
||||
// fall back to the input stored on the task itself.
|
||||
d.l.Error().Msgf("handleTaskBulkAssignedTask-2: task %s has empty payload, falling back to input", task.ExternalID.String())
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/hatchet-dev/hatchet/internal/telemetry"
|
||||
"github.com/hatchet-dev/hatchet/pkg/repository/postgres/sqlchelpers"
|
||||
"github.com/hatchet-dev/hatchet/pkg/repository/v1/sqlcv1"
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/jackc/pgx/v5/pgtype"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
"github.com/rs/zerolog"
|
||||
@@ -102,7 +103,6 @@ func (p *payloadStoreRepositoryImpl) Store(ctx context.Context, tx sqlcv1.DBTX,
|
||||
payloadTypes := make([]string, 0, len(payloads))
|
||||
inlineContents := make([][]byte, 0, len(payloads))
|
||||
offloadAts := make([]pgtype.Timestamptz, 0, len(payloads))
|
||||
operations := make([]string, 0, len(payloads))
|
||||
tenantIds := make([]pgtype.UUID, 0, len(payloads))
|
||||
locations := make([]string, 0, len(payloads))
|
||||
|
||||
@@ -134,7 +134,6 @@ func (p *payloadStoreRepositoryImpl) Store(ctx context.Context, tx sqlcv1.DBTX,
|
||||
tenantIds = append(tenantIds, tenantId)
|
||||
locations = append(locations, string(sqlcv1.V1PayloadLocationINLINE))
|
||||
inlineContents = append(inlineContents, payload.Payload)
|
||||
operations = append(operations, string(sqlcv1.V1PayloadWalOperationCREATE))
|
||||
|
||||
if p.externalStoreEnabled {
|
||||
offloadAts = append(offloadAts, pgtype.Timestamptz{Time: payload.InsertedAt.Time.Add(*p.inlineStoreTTL), Valid: true})
|
||||
@@ -164,7 +163,6 @@ func (p *payloadStoreRepositoryImpl) Store(ctx context.Context, tx sqlcv1.DBTX,
|
||||
Payloadinsertedats: taskInsertedAts,
|
||||
Payloadtypes: payloadTypes,
|
||||
Offloadats: offloadAts,
|
||||
Operations: operations,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
@@ -185,7 +183,7 @@ func (p *payloadStoreRepositoryImpl) Retrieve(ctx context.Context, opts Retrieve
|
||||
payload, ok := payloadMap[opts]
|
||||
|
||||
if !ok {
|
||||
return nil, nil
|
||||
return nil, pgx.ErrNoRows
|
||||
}
|
||||
|
||||
return payload, nil
|
||||
@@ -366,7 +364,7 @@ func (p *payloadStoreRepositoryImpl) ProcessPayloadWAL(ctx context.Context, part
|
||||
|
||||
span.SetAttributes(attrs...)
|
||||
|
||||
for opts, payload := range payloads {
|
||||
for _, opts := range retrieveOpts {
|
||||
offloadAt, ok := retrieveOptsToOffloadAt[opts]
|
||||
|
||||
if !ok {
|
||||
@@ -378,7 +376,7 @@ func (p *payloadStoreRepositoryImpl) ProcessPayloadWAL(ctx context.Context, part
|
||||
Id: opts.Id,
|
||||
InsertedAt: opts.InsertedAt,
|
||||
Type: opts.Type,
|
||||
Payload: payload,
|
||||
Payload: payloads[opts],
|
||||
TenantId: opts.TenantId.String(),
|
||||
},
|
||||
OffloadAt: offloadAt.Time,
|
||||
@@ -406,7 +404,7 @@ func (p *payloadStoreRepositoryImpl) ProcessPayloadWAL(ctx context.Context, part
|
||||
tenantIds := make([]pgtype.UUID, 0, len(retrieveOptsToStoredKey))
|
||||
externalLocationKeys := make([]string, 0, len(retrieveOptsToStoredKey))
|
||||
|
||||
for opt := range retrieveOptsToStoredKey {
|
||||
for _, opt := range retrieveOpts {
|
||||
offloadAt, exists := retrieveOptsToOffloadAt[opt]
|
||||
|
||||
if !exists {
|
||||
@@ -420,7 +418,12 @@ func (p *payloadStoreRepositoryImpl) ProcessPayloadWAL(ctx context.Context, part
|
||||
|
||||
key, ok := retrieveOptsToStoredKey[opt]
|
||||
if !ok {
|
||||
return false, fmt.Errorf("external location key not found for opts: %+v", opt)
|
||||
// important: if there's no key here, it's likely because the payloads table did not contain the payload
|
||||
// this is okay - it can happen if e.g. a payload partition is dropped before the WAL is processed (not a great situation, but not catastrophic)
|
||||
// if this happens, we log an error and set the key to `""` which will allow it to be evicted from the WAL. it'll never cause
|
||||
// an update in the payloads table because there won't be a matching row
|
||||
p.l.Error().Int64("id", opt.Id).Time("insertedAt", opt.InsertedAt.Time).Msg("external location key not found for opts")
|
||||
key = ""
|
||||
}
|
||||
|
||||
externalLocationKeys = append(externalLocationKeys, string(key))
|
||||
|
||||
@@ -72,7 +72,6 @@ WITH inputs AS (
|
||||
UNNEST(@payloadInsertedAts::TIMESTAMPTZ[]) AS payload_inserted_at,
|
||||
UNNEST(CAST(@payloadTypes::TEXT[] AS v1_payload_type[])) AS payload_type,
|
||||
UNNEST(@offloadAts::TIMESTAMPTZ[]) AS offload_at,
|
||||
UNNEST(CAST(@operations::TEXT[] AS v1_payload_wal_operation[])) AS operation,
|
||||
UNNEST(@tenantIds::UUID[]) AS tenant_id
|
||||
)
|
||||
|
||||
@@ -81,20 +80,17 @@ INSERT INTO v1_payload_wal (
|
||||
offload_at,
|
||||
payload_id,
|
||||
payload_inserted_at,
|
||||
payload_type,
|
||||
operation
|
||||
payload_type
|
||||
)
|
||||
SELECT
|
||||
i.tenant_id,
|
||||
i.offload_at,
|
||||
i.payload_id,
|
||||
i.payload_inserted_at,
|
||||
i.payload_type,
|
||||
i.operation
|
||||
i.payload_type
|
||||
FROM
|
||||
inputs i
|
||||
ON CONFLICT (offload_at, tenant_id, payload_id, payload_inserted_at, payload_type) DO UPDATE
|
||||
SET operation = EXCLUDED.operation
|
||||
ON CONFLICT DO NOTHING
|
||||
;
|
||||
|
||||
-- name: PollPayloadWALForRecordsToOffload :many
|
||||
|
||||
@@ -217,8 +217,7 @@ WITH inputs AS (
|
||||
UNNEST($2::TIMESTAMPTZ[]) AS payload_inserted_at,
|
||||
UNNEST(CAST($3::TEXT[] AS v1_payload_type[])) AS payload_type,
|
||||
UNNEST($4::TIMESTAMPTZ[]) AS offload_at,
|
||||
UNNEST(CAST($5::TEXT[] AS v1_payload_wal_operation[])) AS operation,
|
||||
UNNEST($6::UUID[]) AS tenant_id
|
||||
UNNEST($5::UUID[]) AS tenant_id
|
||||
)
|
||||
|
||||
INSERT INTO v1_payload_wal (
|
||||
@@ -226,20 +225,17 @@ INSERT INTO v1_payload_wal (
|
||||
offload_at,
|
||||
payload_id,
|
||||
payload_inserted_at,
|
||||
payload_type,
|
||||
operation
|
||||
payload_type
|
||||
)
|
||||
SELECT
|
||||
i.tenant_id,
|
||||
i.offload_at,
|
||||
i.payload_id,
|
||||
i.payload_inserted_at,
|
||||
i.payload_type,
|
||||
i.operation
|
||||
i.payload_type
|
||||
FROM
|
||||
inputs i
|
||||
ON CONFLICT (offload_at, tenant_id, payload_id, payload_inserted_at, payload_type) DO UPDATE
|
||||
SET operation = EXCLUDED.operation
|
||||
ON CONFLICT DO NOTHING
|
||||
`
|
||||
|
||||
type WritePayloadWALParams struct {
|
||||
@@ -247,7 +243,6 @@ type WritePayloadWALParams struct {
|
||||
Payloadinsertedats []pgtype.Timestamptz `json:"payloadinsertedats"`
|
||||
Payloadtypes []string `json:"payloadtypes"`
|
||||
Offloadats []pgtype.Timestamptz `json:"offloadats"`
|
||||
Operations []string `json:"operations"`
|
||||
Tenantids []pgtype.UUID `json:"tenantids"`
|
||||
}
|
||||
|
||||
@@ -257,7 +252,6 @@ func (q *Queries) WritePayloadWAL(ctx context.Context, db DBTX, arg WritePayload
|
||||
arg.Payloadinsertedats,
|
||||
arg.Payloadtypes,
|
||||
arg.Offloadats,
|
||||
arg.Operations,
|
||||
arg.Tenantids,
|
||||
)
|
||||
return err
|
||||
|
||||
@@ -2850,11 +2850,11 @@ func (r *TaskRepositoryImpl) ReplayTasks(ctx context.Context, tenantId string, t
|
||||
TenantId: sqlchelpers.UUIDFromStr(tenantId),
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
if err != nil && err != pgx.ErrNoRows {
|
||||
return nil, fmt.Errorf("failed to retrieve task input: %w", err)
|
||||
}
|
||||
|
||||
if input == nil {
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
// If the input wasn't found in the payload store,
|
||||
// fall back to the input stored on the task itself.
|
||||
r.l.Error().Msgf("ReplayTasks: task %s has empty payload, falling back to input", task.ExternalID.String())
|
||||
|
||||
@@ -1658,7 +1658,7 @@ CREATE TABLE v1_payload_wal (
|
||||
payload_id BIGINT NOT NULL,
|
||||
payload_inserted_at TIMESTAMPTZ NOT NULL,
|
||||
payload_type v1_payload_type NOT NULL,
|
||||
operation v1_payload_wal_operation NOT NULL,
|
||||
operation v1_payload_wal_operation NOT NULL DEFAULT 'CREATE',
|
||||
|
||||
PRIMARY KEY (offload_at, tenant_id, payload_id, payload_inserted_at, payload_type),
|
||||
CONSTRAINT "v1_payload_wal_payload" FOREIGN KEY (payload_id, payload_inserted_at, payload_type, tenant_id) REFERENCES v1_payload (id, inserted_at, type, tenant_id) ON DELETE CASCADE
|
||||
|
||||
Reference in New Issue
Block a user