Feat: OLAP Payloads (#2410)

* feat: olap payloads table

* feat: olap queue messages for payload puts

* feat: wire up writes on task write

* driveby: add + ignore psql-connect

* fix: down migration

* fix: use external id for pk

* fix: insert query

* fix: more external ids

* fix: bit more cleanup

* feat: dags

* fix: the rest of the refs

* fix: placeholder uuid

* fix: write external ids

* feat: wire up messages over the queue

* fix: panic

* Revert "fix: panic"

This reverts commit c0adccf2ea.

* Revert "feat: wire up messages over the queue"

This reverts commit 36f425f3c1.

* fix: rm unused method

* fix: rm more

* fix: rm cruft

* feat: wire up failures

* feat: start wiring up completed events

* fix: more wiring

* fix: finish wiring up completed event payloads

* fix: lint

* feat: start wiring up external ids in the core

* feat: olap pub

* fix: add returning

* fix: wiring

* debug: log lines for pubs

* fix: external id writes

* Revert "debug: log lines for pubs"

This reverts commit fe430840bd.

* fix: rm sample

* debug: rm pub buffer param

* Revert "debug: rm pub buffer param"

This reverts commit b42a5cacbb.

* debug: stuck queries

* debug: more logs

* debug: yet more logs

* fix: rename BulkRetrieve -> Retrieve

* chore: lint

* fix: naming

* fix: conn leak in putpayloads

* fix: revert debug

* Revert "debug: more logs"

This reverts commit 95da7de64f.

* Revert "debug: stuck queries"

This reverts commit 8fda64adc4.

* feat: improve getters, olap getter

* fix: key type

* feat: first pass at pulling olap payloads from the payload store

* fix: start fixing bugs

* fix: start reworking `includePayloads` param

* fix: include payloads wiring

* feat: analyze for payloads

* fix: simplify writes more + write event payloads

* feat: read out event payloads

* feat: env vars for dual writes

* refactor: clean up task prop drilling a bit

* feat: add include payloads params to python for tests

* fix: tx commit

* fix: dual writes

* fix: not null constraint

* fix: one more

* debug: logging

* fix: more debugging, tweak function sig

* fix: function sig

* fix: refs

* debug: more logging

* debug: more logging

* debug: fix condition

* debug: overwrite properly

* fix: revert debug

* fix: rm more drilling

* fix: comments

* fix: partitioning jobs

* chore: ver

* fix: bug, docs

* hack: dummy id and inserted at for payload offloads

* fix: bug

* fix: no need to handle offloads for task event data

* hack: jitter + current ts

* fix: short circuit

* fix: offload payloads in a tx

* fix: uncomment sampling

* fix: don't offload if external store is disabled

* chore: gen sqlc

* fix: migration

* fix: start reworking types

* fix: couple more

* fix: rm unused code

* fix: drill includePayloads down again

* fix: silence annoying error in some cases

* fix: always store payloads

* debug: use workflow run id for input

* fix: improve logging

* debug: logging on retrieve

* debug: task input

* fix: use correct field

* debug: write even null payloads to limit errors

* debug: hide error lines

* fix: quieting more errors

* fix: duplicate example names, remove print lines

* debug: add logging for olap event writes

* hack: immediate event offloads and cutovers

* fix: rm log line

* fix: import

* fix: short circuit events

* fix: duped names
This commit is contained in:
matt
2025-10-20 09:09:49 -04:00
committed by GitHub
parent 8f57989730
commit c6e154fd03
43 changed files with 1597 additions and 356 deletions
+3
View File
@@ -99,3 +99,6 @@ openapitools.json
# Generated docs content
frontend/app/src/next/lib/docs/generated/
frontend/docs/lib/generated/
# Scripts
hack/dev/psql-connect.sh
@@ -23,7 +23,7 @@ func (t *TasksService) V1DagListTasks(ctx echo.Context, request gen.V1DagListTas
ctx.Request().Context(),
tenantId,
pguuids,
true,
false,
)
if err != nil {
@@ -61,6 +61,7 @@ func (t *V1WorkflowRunsService) getWorkflowRunDetails(
ctx,
tenantId,
taskMetadata,
true,
)
if err != nil {
@@ -68,6 +69,7 @@ func (t *V1WorkflowRunsService) getWorkflowRunDetails(
}
stepIdToTaskExternalId := make(map[pgtype.UUID]pgtype.UUID)
for _, task := range tasks {
stepIdToTaskExternalId[task.StepID] = task.ExternalID
}
+21 -24
View File
@@ -57,12 +57,18 @@ func (t *V1WorkflowRunsService) WithDags(ctx context.Context, request gen.V1Work
workflowIds = *request.Params.WorkflowIds
}
includePayloads := false
if request.Params.IncludePayloads != nil {
includePayloads = *request.Params.IncludePayloads
}
opts := v1.ListWorkflowRunOpts{
CreatedAfter: since,
Statuses: statuses,
WorkflowIds: workflowIds,
Limit: limit,
Offset: offset,
CreatedAfter: since,
Statuses: statuses,
WorkflowIds: workflowIds,
Limit: limit,
Offset: offset,
IncludePayloads: includePayloads,
}
additionalMetadataFilters := make(map[string]interface{})
@@ -93,13 +99,6 @@ func (t *V1WorkflowRunsService) WithDags(ctx context.Context, request gen.V1Work
opts.TriggeringEventExternalId = &id
}
includePayloads := true
if request.Params.IncludePayloads != nil {
includePayloads = *request.Params.IncludePayloads
}
opts.IncludePayloads = includePayloads
dags, total, err := t.config.V1.OLAP().ListWorkflowRuns(
ctx,
tenantId,
@@ -146,17 +145,13 @@ func (t *V1WorkflowRunsService) WithDags(ctx context.Context, request gen.V1Work
}
taskIdToWorkflowName := make(map[int64]string)
for _, task := range tasks {
if name, ok := workflowNames[task.WorkflowID]; ok {
taskIdToWorkflowName[task.ID] = name
}
}
taskIdToActionId := make(map[int64]string)
for _, task := range tasks {
taskIdToActionId[task.ID] = task.ActionID
if name, ok := workflowNames[task.WorkflowID]; ok {
taskIdToWorkflowName[task.ID] = name
}
}
parsedTasks := transformers.TaskRunDataRowToWorkflowRunsMany(tasks, taskIdToWorkflowName, total, limit, offset)
@@ -221,6 +216,11 @@ func (t *V1WorkflowRunsService) OnlyTasks(ctx context.Context, request gen.V1Wor
workflowIds = *request.Params.WorkflowIds
}
includePayloads := false
if request.Params.IncludePayloads != nil {
includePayloads = *request.Params.IncludePayloads
}
opts := v1.ListTaskRunOpts{
CreatedAfter: since,
Statuses: statuses,
@@ -228,7 +228,7 @@ func (t *V1WorkflowRunsService) OnlyTasks(ctx context.Context, request gen.V1Wor
Limit: limit,
Offset: offset,
WorkerId: request.Params.WorkerId,
IncludePayloads: true,
IncludePayloads: includePayloads,
}
additionalMetadataFilters := make(map[string]interface{})
@@ -252,10 +252,6 @@ func (t *V1WorkflowRunsService) OnlyTasks(ctx context.Context, request gen.V1Wor
opts.TriggeringEventExternalId = request.Params.TriggeringEventExternalId
}
if request.Params.IncludePayloads != nil {
opts.IncludePayloads = *request.Params.IncludePayloads
}
tasks, total, err := t.config.V1.OLAP().ListTasks(
ctx,
tenantId,
@@ -282,6 +278,7 @@ func (t *V1WorkflowRunsService) OnlyTasks(ctx context.Context, request gen.V1Wor
}
taskIdToWorkflowName := make(map[int64]string)
for _, task := range tasks {
if name, ok := workflowIdToName[task.WorkflowID]; ok {
taskIdToWorkflowName[task.ID] = name
+1 -1
View File
@@ -61,7 +61,7 @@ func ToEventFromSQLC(eventRow *dbsqlc.ListEventsRow) (*gen.Event, error) {
return res, nil
}
func ToEventFromSQLCV1(event *v1.ListEventsRow) (*gen.Event, error) {
func ToEventFromSQLCV1(event *v1.EventWithPayload) (*gen.Event, error) {
var metadata map[string]interface{}
if event.EventAdditionalMetadata != nil {
+4 -2
View File
@@ -49,7 +49,7 @@ func parseTriggeredRuns(triggeredRuns []byte) ([]gen.V1EventTriggeredRun, error)
return result, nil
}
func ToV1EventList(events []*v1.ListEventsRow, limit, offset, total int64) gen.V1EventList {
func ToV1EventList(events []*v1.EventWithPayload, limit, offset, total int64) gen.V1EventList {
rows := make([]gen.V1Event, len(events))
numPages := int64(math.Ceil(float64(total) / float64(limit)))
@@ -70,7 +70,9 @@ func ToV1EventList(events []*v1.ListEventsRow, limit, offset, total int64) gen.V
for i, row := range events {
additionalMetadata := jsonToMap(row.EventAdditionalMetadata)
payload := jsonToMap(row.EventPayload)
payload := jsonToMap(row.Payload)
triggeredRuns, err := parseTriggeredRuns(row.TriggeredRuns)
if err != nil {
+20 -19
View File
@@ -22,7 +22,7 @@ func jsonToMap(jsonBytes []byte) map[string]interface{} {
return result
}
func ToTaskSummary(task *sqlcv1.PopulateTaskRunDataRow) gen.V1TaskSummary {
func ToTaskSummary(task *v1.TaskWithPayloads) gen.V1TaskSummary {
workflowVersionID := uuid.MustParse(sqlchelpers.UUIDToStr(task.WorkflowVersionID))
additionalMetadata := jsonToMap(task.AdditionalMetadata)
@@ -56,8 +56,8 @@ func ToTaskSummary(task *sqlcv1.PopulateTaskRunDataRow) gen.V1TaskSummary {
CreatedAt: task.InsertedAt.Time,
UpdatedAt: task.InsertedAt.Time,
},
Input: jsonToMap(task.Input),
Output: jsonToMap(task.Output),
Input: jsonToMap(task.InputPayload),
Output: jsonToMap(task.OutputPayload),
Type: gen.V1WorkflowTypeTASK,
DisplayName: task.DisplayName,
Duration: durationPtr,
@@ -81,7 +81,7 @@ func ToTaskSummary(task *sqlcv1.PopulateTaskRunDataRow) gen.V1TaskSummary {
}
func ToTaskSummaryRows(
tasks []*sqlcv1.PopulateTaskRunDataRow,
tasks []*v1.TaskWithPayloads,
) []gen.V1TaskSummary {
toReturn := make([]gen.V1TaskSummary, len(tasks))
@@ -93,13 +93,14 @@ func ToTaskSummaryRows(
}
func ToDagChildren(
tasks []*sqlcv1.PopulateTaskRunDataRow,
tasks []*v1.TaskWithPayloads,
taskIdToDagExternalId map[int64]uuid.UUID,
) []gen.V1DagChildren {
dagIdToTasks := make(map[uuid.UUID][]gen.V1TaskSummary)
for _, task := range tasks {
dagId := taskIdToDagExternalId[task.ID]
dagIdToTasks[dagId] = append(dagIdToTasks[dagId], ToTaskSummary(task))
}
@@ -119,7 +120,7 @@ func ToDagChildren(
}
func ToTaskSummaryMany(
tasks []*sqlcv1.PopulateTaskRunDataRow,
tasks []*v1.TaskWithPayloads,
total int, limit, offset int64,
) gen.V1TaskSummaryList {
toReturn := ToTaskSummaryRows(tasks)
@@ -175,13 +176,13 @@ func ToTaskRunEventMany(
}
func ToWorkflowRunTaskRunEventsMany(
events []*sqlcv1.ListTaskEventsForWorkflowRunRow,
events []*v1.TaskEventWithPayloads,
) gen.V1TaskEventList {
toReturn := make([]gen.V1TaskEvent, len(events))
for i, event := range events {
workerId := uuid.MustParse(sqlchelpers.UUIDToStr(event.WorkerID))
output := string(event.Output)
output := string(event.OutputPayload)
taskExternalId := uuid.MustParse(sqlchelpers.UUIDToStr(event.TaskExternalID))
retryCount := int(event.RetryCount)
@@ -238,7 +239,7 @@ func ToTaskRunMetrics(metrics *[]v1.TaskRunMetric) gen.V1TaskRunMetrics {
return toReturn
}
func ToTask(taskWithData *sqlcv1.PopulateSingleTaskRunDataRow, workflowRunExternalId pgtype.UUID, workflowVersion *dbsqlc.GetWorkflowVersionByIdRow) gen.V1TaskSummary {
func ToTask(taskWithData *v1.TaskWithPayloads, workflowRunExternalId pgtype.UUID, workflowVersion *dbsqlc.GetWorkflowVersionByIdRow) gen.V1TaskSummary {
workflowVersionID := uuid.MustParse(sqlchelpers.UUIDToStr(taskWithData.WorkflowVersionID))
additionalMetadata := jsonToMap(taskWithData.AdditionalMetadata)
@@ -263,11 +264,11 @@ func ToTask(taskWithData *sqlcv1.PopulateSingleTaskRunDataRow, workflowRunExtern
output := make(map[string]interface{})
if taskWithData.Output != nil {
output = jsonToMap(taskWithData.Output)
if len(taskWithData.OutputPayload) > 0 {
output = jsonToMap(taskWithData.OutputPayload)
}
input := jsonToMap(taskWithData.Input)
input := jsonToMap(taskWithData.InputPayload)
stepId := uuid.MustParse(sqlchelpers.UUIDToStr(taskWithData.StepID))
@@ -308,11 +309,11 @@ func ToTask(taskWithData *sqlcv1.PopulateSingleTaskRunDataRow, workflowRunExtern
Input: input,
TenantId: uuid.MustParse(sqlchelpers.UUIDToStr(taskWithData.TenantID)),
WorkflowId: uuid.MustParse(sqlchelpers.UUIDToStr(taskWithData.WorkflowID)),
ErrorMessage: &taskWithData.ErrorMessage.String,
ErrorMessage: &taskWithData.ErrorMessage,
WorkflowRunExternalId: uuid.MustParse(sqlchelpers.UUIDToStr(workflowRunExternalId)),
TaskExternalId: uuid.MustParse(sqlchelpers.UUIDToStr(taskWithData.ExternalID)),
Type: gen.V1WorkflowTypeTASK,
NumSpawnedChildren: int(taskWithData.SpawnedChildren.Int64),
NumSpawnedChildren: int(taskWithData.NumSpawnedChildren),
StepId: &stepId,
ActionId: &taskWithData.ActionID,
WorkflowVersionId: &workflowVersionID,
@@ -324,10 +325,10 @@ func ToTask(taskWithData *sqlcv1.PopulateSingleTaskRunDataRow, workflowRunExtern
}
func ToWorkflowRunDetails(
taskRunEvents []*sqlcv1.ListTaskEventsForWorkflowRunRow,
taskRunEvents []*v1.TaskEventWithPayloads,
workflowRun *v1.WorkflowRunData,
shape []*dbsqlc.GetWorkflowRunShapeRow,
tasks []*sqlcv1.PopulateTaskRunDataRow,
tasks []*v1.TaskWithPayloads,
stepIdToTaskExternalId map[pgtype.UUID]pgtype.UUID,
workflowVersion *dbsqlc.GetWorkflowVersionByIdRow,
) (gen.V1WorkflowRunDetails, error) {
@@ -337,8 +338,8 @@ func ToWorkflowRunDetails(
output := make(map[string]interface{})
if workflowRun.Output != nil {
output = jsonToMap(*workflowRun.Output)
if len(workflowRun.Output) > 0 {
output = jsonToMap(workflowRun.Output)
}
additionalMetadata := jsonToMap(workflowRun.AdditionalMetadata)
@@ -394,7 +395,7 @@ func ToWorkflowRunDetails(
for i, event := range taskRunEvents {
workerId := uuid.MustParse(sqlchelpers.UUIDToStr(event.WorkerID))
output := string(event.Output)
output := string(event.OutputPayload)
retryCount := int(event.RetryCount)
attempt := retryCount + 1
@@ -39,8 +39,8 @@ func WorkflowRunDataToV1TaskSummary(task *v1.WorkflowRunData, workflowIdsToNames
var output map[string]interface{}
if task.Output != nil {
output = jsonToMap(*task.Output)
if len(task.Output) > 0 {
output = jsonToMap(task.Output)
}
workflowVersionId := uuid.MustParse(sqlchelpers.UUIDToStr(task.WorkflowVersionId))
@@ -146,7 +146,7 @@ func ToWorkflowRunMany(
}
}
func PopulateTaskRunDataRowToV1TaskSummary(task *sqlcv1.PopulateTaskRunDataRow, workflowName *string) gen.V1TaskSummary {
func PopulateTaskRunDataRowToV1TaskSummary(task *v1.TaskWithPayloads, workflowName *string) gen.V1TaskSummary {
workflowVersionID := uuid.MustParse(sqlchelpers.UUIDToStr(task.WorkflowVersionID))
additionalMetadata := jsonToMap(task.AdditionalMetadata)
@@ -171,6 +171,7 @@ func PopulateTaskRunDataRowToV1TaskSummary(task *sqlcv1.PopulateTaskRunDataRow,
input := jsonToMap(task.Input)
output := jsonToMap(task.Output)
stepId := uuid.MustParse(sqlchelpers.UUIDToStr(task.StepID))
retryCount := int(task.RetryCount)
@@ -209,7 +210,7 @@ func PopulateTaskRunDataRowToV1TaskSummary(task *sqlcv1.PopulateTaskRunDataRow,
}
func TaskRunDataRowToWorkflowRunsMany(
tasks []*sqlcv1.PopulateTaskRunDataRow,
tasks []*v1.TaskWithPayloads,
taskIdToWorkflowName map[int64]string,
total int, limit, offset int64,
) gen.V1TaskSummaryList {
@@ -0,0 +1,41 @@
-- +goose Up
-- +goose StatementBegin
CREATE TYPE v1_payload_location_olap AS ENUM ('INLINE', 'EXTERNAL');
CREATE TABLE v1_payloads_olap (
tenant_id UUID NOT NULL,
external_id UUID NOT NULL,
location v1_payload_location_olap NOT NULL,
external_location_key TEXT,
inline_content JSONB,
inserted_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (tenant_id, external_id, inserted_at),
CHECK (
location = 'INLINE'
OR
(location = 'EXTERNAL' AND inline_content IS NULL AND external_location_key IS NOT NULL)
)
) PARTITION BY RANGE(inserted_at);
SELECT create_v1_range_partition('v1_payloads_olap'::TEXT, NOW()::DATE);
SELECT create_v1_range_partition('v1_payloads_olap'::TEXT, (NOW() + INTERVAL '1 day')::DATE);
SELECT create_v1_range_partition('v1_payloads_olap'::TEXT, (NOW() + INTERVAL '2 day')::DATE);
ALTER TABLE v1_task_events_olap ADD COLUMN external_id UUID;
ALTER TABLE v1_payload ADD COLUMN external_id UUID;
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
ALTER TABLE v1_task_events_olap DROP COLUMN external_id;
ALTER TABLE v1_payload DROP COLUMN external_id;
DROP TABLE v1_payloads_olap;
DROP TYPE v1_payload_location_olap;
-- +goose StatementEnd
@@ -23,7 +23,7 @@ class WorkflowInput(BaseModel):
concurrency_workflow_level_workflow = hatchet.workflow(
name="ConcurrencyWorkflowManyKeys",
name="ConcurrencyWorkflowLevel",
input_validator=WorkflowInput,
concurrency=[
ConcurrencyExpression(
+1 -1
View File
@@ -11,7 +11,7 @@ class DynamicCronInput(BaseModel):
async def create_cron() -> None:
dynamic_cron_workflow = hatchet.workflow(
name="CronWorkflow", input_validator=DynamicCronInput
name="DynamicCronWorkflow", input_validator=DynamicCronInput
)
# > Create
+1 -1
View File
@@ -10,7 +10,7 @@ class DynamicCronInput(BaseModel):
dynamic_cron_workflow = hatchet.workflow(
name="CronWorkflow", input_validator=DynamicCronInput
name="DynamicCronWorkflow", input_validator=DynamicCronInput
)
# > Create
@@ -67,6 +67,7 @@ export const useRuns = ({
worker_id: workerId,
only_tasks: onlyTasks,
triggering_event_external_id: triggeringEventExternalId,
include_payloads: false,
}),
placeholderData: (prev) => prev,
refetchInterval:
+6 -6
View File
@@ -142,9 +142,9 @@ Parameters:
Returns:
| Type | Description |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------- |
| `Callable[[Callable[[EmptyModel, Context], R \| CoroutineLike[R]]], Standalone[EmptyModel, R]] \| Callable[[Callable[[TWorkflowInput, Context], R \| CoroutineLike[R]]], Standalone[TWorkflowInput, R]]` | A decorator which creates a `Standalone` task object. |
| Type | Description |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------- |
| `Callable[[Callable[Concatenate[EmptyModel, Context, P], R \| CoroutineLike[R]]], Standalone[EmptyModel, R]] \| Callable[[Callable[Concatenate[TWorkflowInput, Context, P], R \| CoroutineLike[R]]], Standalone[TWorkflowInput, R]]` | A decorator which creates a `Standalone` task object. |
#### `durable_task`
@@ -174,6 +174,6 @@ Parameters:
Returns:
| Type | Description |
| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------- |
| `Callable[[Callable[[EmptyModel, DurableContext], R \| CoroutineLike[R]]], Standalone[EmptyModel, R]] \| Callable[[Callable[[TWorkflowInput, DurableContext], R \| CoroutineLike[R]]], Standalone[TWorkflowInput, R]]` | A decorator which creates a `Standalone` task object. |
| Type | Description |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------- |
| `Callable[[Callable[Concatenate[EmptyModel, DurableContext, P], R \| CoroutineLike[R]]], Standalone[EmptyModel, R]] \| Callable[[Callable[Concatenate[TWorkflowInput, DurableContext, P], R \| CoroutineLike[R]]], Standalone[TWorkflowInput, R]]` | A decorator which creates a `Standalone` task object. |
+12 -10
View File
@@ -4,8 +4,8 @@ The Hatchet Context class provides helper methods and useful data to tasks at ru
There are two types of context classes you'll encounter:
- `Context`: The standard context for regular tasks with methods for logging, task output retrieval, cancellation, and more.
- `DurableContext`: An extended context for durable tasks that includes additional methods for durable execution like `aio_wait_for` and `aio_sleep_for`.
- `Context` - The standard context for regular tasks with methods for logging, task output retrieval, cancellation, and more.
- `DurableContext` - An extended context for durable tasks that includes additional methods for durable execution like `aio_wait_for` and `aio_sleep_for`.
## Context
@@ -22,7 +22,7 @@ There are two types of context classes you'll encounter:
| `release_slot` | Manually release the slot for the current step run to free up a slot on the worker. Note that this is an advanced feature and should be used with caution. |
| `put_stream` | Put a stream event to the Hatchet API. This will send the data to the Hatchet API and return immediately. You can then subscribe to the stream from a separate consumer. |
| `refresh_timeout` | Refresh the timeout for the current task run. You can read about refreshing timeouts in [the docs](../../home/timeouts#refreshing-timeouts). |
| `fetch_task_run_error` | A helper intended to be used in an on-failure step to retrieve the error that occurred in a specific upstream task run. |
| `fetch_task_run_error` | **DEPRECATED**: Use `get_task_run_error` instead. |
### Attributes
@@ -88,9 +88,9 @@ The additional metadata sent with the current task run.
Returns:
| Type | Description |
| --------------------------------- | --------------------------------------------------------------------------------------------------- |
| `JSONSerializableMapping \| None` | The additional metadata sent with the current task run, or None if no additional metadata was sent. |
| Type | Description |
| ------------------------- | --------------------------------------------------------------------------------------------------- |
| `JSONSerializableMapping` | The additional metadata sent with the current task run, or None if no additional metadata was sent. |
#### `parent_workflow_run_id`
@@ -273,6 +273,8 @@ Returns:
#### `fetch_task_run_error`
**DEPRECATED**: Use `get_task_run_error` instead.
A helper intended to be used in an on-failure step to retrieve the error that occurred in a specific upstream task run.
Parameters:
@@ -306,10 +308,10 @@ Durably wait for either a sleep or an event.
Parameters:
| Name | Type | Description | Default |
| ------------- | -------------------------------------- | -------------------------------------------------------------------------------------------- | ---------- |
| `signal_key` | `str` | The key to use for the durable event. This is used to identify the event in the Hatchet API. | _required_ |
| `*conditions` | `SleepCondition \| UserEventCondition` | The conditions to wait for. Can be a SleepCondition or UserEventCondition. | `()` |
| Name | Type | Description | Default |
| ------------- | ------------------------------------------------- | -------------------------------------------------------------------------------------------- | ---------- |
| `signal_key` | `str` | The key to use for the durable event. This is used to identify the event in the Hatchet API. | _required_ |
| `*conditions` | `SleepCondition \| UserEventCondition \| OrGroup` | The conditions to wait for. Can be a SleepCondition or UserEventCondition. | `()` |
Returns:
@@ -78,11 +78,12 @@ List filters for a given tenant.
Parameters:
| Name | Type | Description | Default |
| ------------------------- | ------------------------------- | --------------------------------------------------------------------------------------------------------------------- | ------- |
| `limit` | `int \| None` | The maximum number of filters to return. | `None` |
| `offset` | `int \| None` | The number of filters to skip before starting to collect the result set. | `None` |
| `workflow_id_scope_pairs` | `list[tuple[str, str]] \| None` | A list of tuples containing workflow IDs and scopes to filter by. The workflow id is first, then the scope is second. | `None` |
| Name | Type | Description | Default |
| -------------- | ------------------- | ------------------------------------------------------------------------ | ------- |
| `limit` | `int \| None` | The maximum number of filters to return. | `None` |
| `offset` | `int \| None` | The number of filters to skip before starting to collect the result set. | `None` |
| `workflow_ids` | `list[str] \| None` | A list of workflow IDs to filter by. | `None` |
| `scopes` | `list[str] \| None` | A list of scopes to filter by. | `None` |
Returns:
@@ -164,11 +165,12 @@ List filters for a given tenant.
Parameters:
| Name | Type | Description | Default |
| ------------------------- | ------------------------------- | --------------------------------------------------------------------------------------------------------------------- | ------- |
| `limit` | `int \| None` | The maximum number of filters to return. | `None` |
| `offset` | `int \| None` | The number of filters to skip before starting to collect the result set. | `None` |
| `workflow_id_scope_pairs` | `list[tuple[str, str]] \| None` | A list of tuples containing workflow IDs and scopes to filter by. The workflow id is first, then the scope is second. | `None` |
| Name | Type | Description | Default |
| -------------- | ------------------- | ------------------------------------------------------------------------ | ------- |
| `limit` | `int \| None` | The maximum number of filters to return. | `None` |
| `offset` | `int \| None` | The number of filters to skip before starting to collect the result set. | `None` |
| `workflow_ids` | `list[str] \| None` | A list of workflow IDs to filter by. | `None` |
| `scopes` | `list[str] \| None` | A list of scopes to filter by. | `None` |
Returns:
@@ -120,6 +120,7 @@ Parameters:
| `worker_id` | `str \| None` | The worker ID to filter task runs by. | `None` |
| `parent_task_external_id` | `str \| None` | The parent task external ID to filter task runs by. | `None` |
| `triggering_event_external_id` | `str \| None` | The event id that triggered the task run. | `None` |
| `include_payloads` | `bool` | Whether to include payloads in the response. | `True` |
Returns:
@@ -146,6 +147,7 @@ Parameters:
| `worker_id` | `str \| None` | The worker ID to filter task runs by. | `None` |
| `parent_task_external_id` | `str \| None` | The parent task external ID to filter task runs by. | `None` |
| `triggering_event_external_id` | `str \| None` | The event id that triggered the task run. | `None` |
| `include_payloads` | `bool` | Whether to include payloads in the response. | `True` |
Returns:
+24 -22
View File
@@ -2,7 +2,7 @@
`Runnables` in the Hatchet SDK are things that can be run, namely tasks and workflows. The two main types of runnables you'll encounter are:
- `Workflow`, which lets you define tasks and call all of the run, schedule, etc. methods.
- `Workflow`, which lets you define tasks and call all of the run, schedule, etc. methods
- `Standalone`, which is a single task that's returned by `hatchet.task` and can be run, scheduled, etc.
## Workflow
@@ -188,7 +188,7 @@ Parameters:
| `name` | `str \| None` | The name of the on-success task. If not specified, defaults to the name of the function being wrapped by the `on_success_task` decorator. | `None` |
| `schedule_timeout` | `Duration` | The maximum time to wait for the task to be scheduled. The run will be canceled if the task does not begin within this time. | `timedelta(minutes=5)` |
| `execution_timeout` | `Duration` | The maximum time to wait for the task to complete. The run will be canceled if the task does not complete within this time. | `timedelta(seconds=60)` |
| `retries` | `int` | The number of times to retry the on-success task before failing. | `0` |
| `retries` | `int` | The number of times to retry the on-success task before failing | `0` |
| `rate_limits` | `list[RateLimit] \| None` | A list of rate limit configurations for the on-success task. | `None` |
| `backoff_factor` | `float \| None` | The backoff factor for controlling exponential backoff in retries. | `None` |
| `backoff_max_seconds` | `int \| None` | The maximum number of seconds to allow retries with exponential backoff to continue. | `None` |
@@ -196,9 +196,9 @@ Parameters:
Returns:
| Type | Description |
| --------------------------------------------------------------------------------------------------------------- | ------------------------------------------ |
| `Callable[[Callable[Concatenate[TWorkflowInput, Context, P], R \| CoroutineLike[R]]], Task[TWorkflowInput, R]]` | A decorator which creates a `Task` object. |
| Type | Description |
| --------------------------------------------------------------------------------------------------------------- | ---------------------------------------- |
| `Callable[[Callable[Concatenate[TWorkflowInput, Context, P], R \| CoroutineLike[R]]], Task[TWorkflowInput, R]]` | A decorator which creates a Task object. |
#### `run`
@@ -728,9 +728,9 @@ Parameters:
Returns:
| Type | Description |
| ------------------------------------- | ---------------------------------------- |
| `list[R] \| list[R \| BaseException]` | A list of results for each workflow run. |
| Type | Description |
| -------- | ------------------------- | ---------------------------------------- |
| `list[R] | list[R \| BaseException]` | A list of results for each workflow run. |
#### `run_many_no_wait`
@@ -1014,13 +1014,14 @@ Mimic the execution of a task. This method is intended to be used to unit test t
Parameters:
| Name | Type | Description | Default |
| --------------------- | -------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
| `input` | `TWorkflowInput \| None` | The input to the task. | `None` |
| `additional_metadata` | `JSONSerializableMapping \| None` | Additional metadata to attach to the task. | `None` |
| `parent_outputs` | `dict[str, JSONSerializableMapping] \| None` | Outputs from parent tasks, if any. This is useful for mimicking DAG functionality. For instance, if you have a task `step_2` that has a `parent` which is `step_1`, you can pass `parent_outputs={"step_1": {"result": "Hello, world!"}}` to `step_2.mock_run()` to be able to access `ctx.task_output(step_1)` in `step_2`. | `None` |
| `retry_count` | `int` | The number of times the task has been retried. | `0` |
| `lifespan` | `Any` | The lifespan to be used in the task, which is useful if one was set on the worker. This will allow you to access `ctx.lifespan` inside of your task. | `None` |
| Name | Type | Description | Default |
| --------------------- | -------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
| `input` | `TWorkflowInput \| None` | The input to the task. | `None` |
| `additional_metadata` | `JSONSerializableMapping \| None` | Additional metadata to attach to the task. | `None` |
| `parent_outputs` | `dict[str, JSONSerializableMapping] \| None` | Outputs from parent tasks, if any. This is useful for mimicking DAG functionality. For instance, if you have a task `step_2` that has a `parent` which is `step_1`, you can pass `parent_outputs={"step_1": {"result": "Hello, world!"}}` to `step_2.mock_run()` to be able to access `ctx.task_output(step_1)` in `step_2`. | `None` |
| `retry_count` | `int` | The number of times the task has been retried. | `0` |
| `lifespan` | `Any` | The lifespan to be used in the task, which is useful if one was set on the worker. This will allow you to access `ctx.lifespan` inside of your task. | `None` |
| `dependencies` | `dict[str, Any] \| None` | Dependencies to be injected into the task. This is useful for tasks that have dependencies defined using `Depends`. **IMPORTANT**: You must pass the dependencies _directly_, **not** the `Depends` objects themselves. For example, if you have a task that has a dependency `config: Annotated[str, Depends(get_config)]`, you should pass `dependencies={"config": "config_value"}` to `aio_mock_run`. | `None` |
Returns:
@@ -1034,13 +1035,14 @@ Mimic the execution of a task. This method is intended to be used to unit test t
Parameters:
| Name | Type | Description | Default |
| --------------------- | -------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
| `input` | `TWorkflowInput \| None` | The input to the task. | `None` |
| `additional_metadata` | `JSONSerializableMapping \| None` | Additional metadata to attach to the task. | `None` |
| `parent_outputs` | `dict[str, JSONSerializableMapping] \| None` | Outputs from parent tasks, if any. This is useful for mimicking DAG functionality. For instance, if you have a task `step_2` that has a `parent` which is `step_1`, you can pass `parent_outputs={"step_1": {"result": "Hello, world!"}}` to `step_2.mock_run()` to be able to access `ctx.task_output(step_1)` in `step_2`. | `None` |
| `retry_count` | `int` | The number of times the task has been retried. | `0` |
| `lifespan` | `Any` | The lifespan to be used in the task, which is useful if one was set on the worker. This will allow you to access `ctx.lifespan` inside of your task. | `None` |
| Name | Type | Description | Default |
| --------------------- | -------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
| `input` | `TWorkflowInput \| None` | The input to the task. | `None` |
| `additional_metadata` | `JSONSerializableMapping \| None` | Additional metadata to attach to the task. | `None` |
| `parent_outputs` | `dict[str, JSONSerializableMapping] \| None` | Outputs from parent tasks, if any. This is useful for mimicking DAG functionality. For instance, if you have a task `step_2` that has a `parent` which is `step_1`, you can pass `parent_outputs={"step_1": {"result": "Hello, world!"}}` to `step_2.mock_run()` to be able to access `ctx.task_output(step_1)` in `step_2`. | `None` |
| `retry_count` | `int` | The number of times the task has been retried. | `0` |
| `lifespan` | `Any` | The lifespan to be used in the task, which is useful if one was set on the worker. This will allow you to access `ctx.lifespan` inside of your task. | `None` |
| `dependencies` | `dict[str, Any] \| None` | Dependencies to be injected into the task. This is useful for tasks that have dependencies defined using `Depends`. **IMPORTANT**: You must pass the dependencies _directly_, **not** the `Depends` objects themselves. For example, if you have a task that has a dependency `config: Annotated[str, Depends(get_config)]`, you should pass `dependencies={"config": "config_value"}` to `aio_mock_run`. | `None` |
Returns:
@@ -5,10 +5,12 @@ import (
"errors"
"fmt"
"hash/fnv"
"math/rand"
"sync"
"time"
"github.com/go-co-op/gocron/v2"
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgtype"
"github.com/rs/zerolog"
@@ -364,11 +366,32 @@ func (tc *OLAPControllerImpl) handleBufferedMsgs(tenantId, msgId string, payload
return tc.handleFailedWebhookValidation(context.Background(), tenantId, payloads)
case "cel-evaluation-failure":
return tc.handleCelEvaluationFailure(context.Background(), tenantId, payloads)
case "offload-payload":
return tc.handlePayloadOffload(context.Background(), tenantId, payloads)
}
return fmt.Errorf("unknown message id: %s", msgId)
}
func (tc *OLAPControllerImpl) handlePayloadOffload(ctx context.Context, tenantId string, payloads [][]byte) error {
offloads := make([]v1.OffloadPayloadOpts, 0)
msgs := msgqueue.JSONConvert[v1.OLAPPayloadsToOffload](payloads)
for _, msg := range msgs {
for _, payload := range msg.Payloads {
if !tc.sample(payload.ExternalId.String()) {
tc.l.Debug().Msgf("skipping payload offload external id %s", payload.ExternalId)
continue
}
offloads = append(offloads, v1.OffloadPayloadOpts(payload))
}
}
return tc.repo.OLAP().OffloadPayloads(ctx, tenantId, offloads)
}
func (tc *OLAPControllerImpl) handleCelEvaluationFailure(ctx context.Context, tenantId string, payloads [][]byte) error {
failures := make([]v1.CELEvaluationFailure, 0)
@@ -537,6 +560,7 @@ func (tc *OLAPControllerImpl) handleCreateMonitoringEvent(ctx context.Context, t
eventPayloads := make([]string, 0)
eventMessages := make([]string, 0)
timestamps := make([]pgtype.Timestamptz, 0)
eventExternalIds := make([]pgtype.UUID, 0)
for _, msg := range msgs {
taskMeta := taskIdsToMetas[msg.TaskId]
@@ -559,6 +583,7 @@ func (tc *OLAPControllerImpl) handleCreateMonitoringEvent(ctx context.Context, t
eventPayloads = append(eventPayloads, msg.EventPayload)
eventMessages = append(eventMessages, msg.EventMessage)
timestamps = append(timestamps, sqlchelpers.TimestamptzFromTime(msg.EventTimestamp))
eventExternalIds = append(eventExternalIds, sqlchelpers.UUIDFromStr(uuid.New().String()))
if msg.WorkerId != nil {
workerIds = append(workerIds, *msg.WorkerId)
@@ -630,6 +655,7 @@ func (tc *OLAPControllerImpl) handleCreateMonitoringEvent(ctx context.Context, t
RetryCount: retryCounts[i],
WorkerID: workerId,
AdditionalEventMessage: sqlchelpers.TextFromStr(eventMessages[i]),
ExternalID: eventExternalIds[i],
}
switch eventTypes[i] {
@@ -646,7 +672,76 @@ func (tc *OLAPControllerImpl) handleCreateMonitoringEvent(ctx context.Context, t
opts = append(opts, event)
}
return tc.repo.OLAP().CreateTaskEvents(ctx, tenantId, opts)
err = tc.repo.OLAP().CreateTaskEvents(ctx, tenantId, opts)
if err != nil {
return err
}
if !tc.repo.OLAP().PayloadStore().ExternalStoreEnabled() {
return nil
}
offloadToExternalOpts := make([]v1.OffloadToExternalStoreOpts, 0)
idInsertedAtToExternalId := make(map[v1.IdInsertedAt]pgtype.UUID)
for _, opt := range opts {
// generating a dummy id + inserted at to use for creating the external keys for the task events
// we do this since we don't have the id + inserted at of the events themselves on the opts, and we don't
// actually need those for anything once the keys are created.
dummyId := rand.Int63()
// randomly jitter the inserted at time by +/- 300ms to make collisions virtually impossible
dummyInsertedAt := time.Now().Add(time.Duration(rand.Intn(2*300+1)-300) * time.Millisecond)
idInsertedAtToExternalId[v1.IdInsertedAt{
ID: dummyId,
InsertedAt: sqlchelpers.TimestamptzFromTime(dummyInsertedAt),
}] = opt.ExternalID
offloadToExternalOpts = append(offloadToExternalOpts, v1.OffloadToExternalStoreOpts{
StorePayloadOpts: &v1.StorePayloadOpts{
Id: dummyId,
InsertedAt: sqlchelpers.TimestamptzFromTime(dummyInsertedAt),
ExternalId: opt.ExternalID,
Type: sqlcv1.V1PayloadTypeTASKEVENTDATA,
Payload: opt.Output,
TenantId: tenantId,
},
OffloadAt: time.Now(),
})
}
if len(offloadToExternalOpts) == 0 {
return nil
}
retrieveOptsToKey, err := tc.repo.OLAP().PayloadStore().ExternalStore().Store(ctx, offloadToExternalOpts...)
if err != nil {
return err
}
offloadOpts := make([]v1.OffloadPayloadOpts, 0)
for opt, key := range retrieveOptsToKey {
externalId := idInsertedAtToExternalId[v1.IdInsertedAt{
ID: opt.Id,
InsertedAt: opt.InsertedAt,
}]
offloadOpts = append(offloadOpts, v1.OffloadPayloadOpts{
ExternalId: externalId,
ExternalLocationKey: string(key),
})
}
err = tc.repo.OLAP().OffloadPayloads(ctx, tenantId, offloadOpts)
if err != nil {
return err
}
return nil
}
func (tc *OLAPControllerImpl) handleFailedWebhookValidation(ctx context.Context, tenantId string, payloads [][]byte) error {
@@ -19,7 +19,7 @@ func (tc *TasksControllerImpl) runProcessPayloadWAL(ctx context.Context) func()
}
func (tc *TasksControllerImpl) processPayloadWAL(ctx context.Context, partitionNumber int64) (bool, error) {
return tc.repov1.Payloads().ProcessPayloadWAL(ctx, partitionNumber)
return tc.repov1.Payloads().ProcessPayloadWAL(ctx, partitionNumber, tc.pubBuffer)
}
func (tc *TasksControllerImpl) runProcessPayloadExternalCutovers(ctx context.Context) func() {
@@ -237,7 +237,7 @@ func (d *DispatcherImpl) handleTaskBulkAssignedTask(ctx context.Context, msg *ms
}
}
inputs, err := d.repov1.Payloads().BulkRetrieve(ctx, retrievePayloadOpts...)
inputs, err := d.repov1.Payloads().Retrieve(ctx, retrievePayloadOpts...)
if err != nil {
d.l.Error().Err(err).Msgf("could not bulk retrieve inputs for %d tasks", len(bulkDatas))
+1
View File
@@ -292,6 +292,7 @@ func (c *ConfigLoader) InitDataLayer() (res *database.Layer, err error) {
payloadStoreOpts := repov1.PayloadStoreRepositoryOpts{
EnablePayloadDualWrites: scf.PayloadStore.EnablePayloadDualWrites,
EnableTaskEventPayloadDualWrites: scf.PayloadStore.EnableTaskEventPayloadDualWrites,
EnableOLAPPayloadDualWrites: scf.PayloadStore.EnableOLAPPayloadDualWrites,
EnableDagDataPayloadDualWrites: scf.PayloadStore.EnableDagDataPayloadDualWrites,
WALPollLimit: scf.PayloadStore.WALPollLimit,
WALProcessInterval: scf.PayloadStore.WALProcessInterval,
+2
View File
@@ -616,6 +616,7 @@ type PayloadStoreConfig struct {
EnablePayloadDualWrites bool `mapstructure:"enablePayloadDualWrites" json:"enablePayloadDualWrites,omitempty" default:"true"`
EnableTaskEventPayloadDualWrites bool `mapstructure:"enableTaskEventPayloadDualWrites" json:"enableTaskEventPayloadDualWrites,omitempty" default:"true"`
EnableDagDataPayloadDualWrites bool `mapstructure:"enableDagDataPayloadDualWrites" json:"enableDagDataPayloadDualWrites,omitempty" default:"true"`
EnableOLAPPayloadDualWrites bool `mapstructure:"enableOLAPPayloadDualWrites" json:"enableOLAPPayloadDualWrites,omitempty" default:"true"`
WALPollLimit int `mapstructure:"walPollLimit" json:"walPollLimit,omitempty" default:"1000"`
WALProcessInterval time.Duration `mapstructure:"walProcessInterval" json:"walProcessInterval,omitempty" default:"15s"`
ExternalCutoverProcessInterval time.Duration `mapstructure:"externalCutoverProcessInterval" json:"externalCutoverProcessInterval,omitempty" default:"15s"`
@@ -887,6 +888,7 @@ func BindAllEnv(v *viper.Viper) {
_ = v.BindEnv("payloadStore.enablePayloadDualWrites", "SERVER_PAYLOAD_STORE_ENABLE_PAYLOAD_DUAL_WRITES")
_ = v.BindEnv("payloadStore.enableTaskEventPayloadDualWrites", "SERVER_PAYLOAD_STORE_ENABLE_TASK_EVENT_PAYLOAD_DUAL_WRITES")
_ = v.BindEnv("payloadStore.enableDagDataPayloadDualWrites", "SERVER_PAYLOAD_STORE_ENABLE_DAG_DATA_PAYLOAD_DUAL_WRITES")
_ = v.BindEnv("payloadStore.enableOLAPPayloadDualWrites", "SERVER_PAYLOAD_STORE_ENABLE_OLAP_PAYLOAD_DUAL_WRITES")
_ = v.BindEnv("payloadStore.walPollLimit", "SERVER_PAYLOAD_STORE_WAL_POLL_LIMIT")
_ = v.BindEnv("payloadStore.walProcessInterval", "SERVER_PAYLOAD_STORE_WAL_PROCESS_INTERVAL")
_ = v.BindEnv("payloadStore.externalCutoverProcessInterval", "SERVER_PAYLOAD_STORE_EXTERNAL_CUTOVER_PROCESS_INTERVAL")
+1 -1
View File
@@ -180,7 +180,7 @@ func (s *sharedRepository) generateExternalIdsForChildWorkflows(ctx context.Cont
}
}
payloads, err := s.payloadStore.BulkRetrieve(ctx, retrievePayloadOpts...)
payloads, err := s.payloadStore.Retrieve(ctx, retrievePayloadOpts...)
if err != nil {
return err
+3 -1
View File
@@ -262,6 +262,7 @@ func (m *MatchRepositoryImpl) ProcessInternalEventMatches(ctx context.Context, t
storePayloadOpts[i] = StorePayloadOpts{
Id: task.ID,
InsertedAt: task.InsertedAt,
ExternalId: task.ExternalID,
Type: sqlcv1.V1PayloadTypeTASKINPUT,
Payload: task.Payload,
TenantId: task.TenantID.String(),
@@ -304,6 +305,7 @@ func (m *MatchRepositoryImpl) ProcessUserEventMatches(ctx context.Context, tenan
storePayloadOpts[i] = StorePayloadOpts{
Id: task.ID,
InsertedAt: task.InsertedAt,
ExternalId: task.ExternalID,
Type: sqlcv1.V1PayloadTypeTASKINPUT,
Payload: task.Payload,
TenantId: task.TenantID.String(),
@@ -493,7 +495,7 @@ func (m *sharedRepository) processEventMatches(ctx context.Context, tx sqlcv1.DB
}
}
payloads, err := m.payloadStore.BulkRetrieve(ctx, retrievePayloadOpts...)
payloads, err := m.payloadStore.Retrieve(ctx, retrievePayloadOpts...)
if err != nil {
return nil, fmt.Errorf("failed to retrieve dag input payloads: %w", err)
+658 -58
View File
File diff suppressed because it is too large Load Diff
+27
View File
@@ -0,0 +1,27 @@
package v1
import (
msgqueue "github.com/hatchet-dev/hatchet/internal/msgqueue/v1"
"github.com/jackc/pgx/v5/pgtype"
)
type OLAPPayloadToOffload struct {
ExternalId pgtype.UUID
ExternalLocationKey string
}
type OLAPPayloadsToOffload struct {
Payloads []OLAPPayloadToOffload
}
func OLAPPayloadOffloadMessage(tenantId string, payloads []OLAPPayloadToOffload) (*msgqueue.Message, error) {
return msgqueue.NewTenantMessage(
tenantId,
"offload-payload",
false,
true,
OLAPPayloadsToOffload{
Payloads: payloads,
},
)
}
+77 -22
View File
@@ -6,6 +6,7 @@ import (
"sort"
"time"
msgqueue "github.com/hatchet-dev/hatchet/internal/msgqueue/v1"
"github.com/hatchet-dev/hatchet/pkg/repository/postgres/sqlchelpers"
"github.com/hatchet-dev/hatchet/pkg/repository/v1/sqlcv1"
"github.com/hatchet-dev/hatchet/pkg/telemetry"
@@ -18,11 +19,18 @@ import (
type StorePayloadOpts struct {
Id int64
InsertedAt pgtype.Timestamptz
ExternalId pgtype.UUID
Type sqlcv1.V1PayloadType
Payload []byte
TenantId string
}
type StoreOLAPPayloadOpts struct {
ExternalId pgtype.UUID
InsertedAt pgtype.Timestamptz
Payload []byte
}
type OffloadToExternalStoreOpts struct {
*StorePayloadOpts
OffloadAt time.Time
@@ -38,28 +46,27 @@ type RetrievePayloadOpts struct {
type PayloadLocation string
type ExternalPayloadLocationKey string
type BulkRetrievePayloadOpts struct {
Keys []ExternalPayloadLocationKey
TenantId string
}
type ExternalStore interface {
Store(ctx context.Context, payloads ...OffloadToExternalStoreOpts) (map[RetrievePayloadOpts]ExternalPayloadLocationKey, error)
BulkRetrieve(ctx context.Context, opts ...BulkRetrievePayloadOpts) (map[ExternalPayloadLocationKey][]byte, error)
Retrieve(ctx context.Context, keys ...ExternalPayloadLocationKey) (map[ExternalPayloadLocationKey][]byte, error)
}
type PayloadStoreRepository interface {
Store(ctx context.Context, tx sqlcv1.DBTX, payloads ...StorePayloadOpts) error
BulkRetrieve(ctx context.Context, opts ...RetrievePayloadOpts) (map[RetrievePayloadOpts][]byte, error)
ProcessPayloadWAL(ctx context.Context, partitionNumber int64) (bool, error)
Retrieve(ctx context.Context, opts ...RetrievePayloadOpts) (map[RetrievePayloadOpts][]byte, error)
RetrieveFromExternal(ctx context.Context, keys ...ExternalPayloadLocationKey) (map[ExternalPayloadLocationKey][]byte, error)
ProcessPayloadWAL(ctx context.Context, partitionNumber int64, pubBuffer *msgqueue.MQPubBuffer) (bool, error)
ProcessPayloadExternalCutovers(ctx context.Context, partitionNumber int64) (bool, error)
OverwriteExternalStore(store ExternalStore, inlineStoreTTL time.Duration)
DualWritesEnabled() bool
TaskEventDualWritesEnabled() bool
DagDataDualWritesEnabled() bool
OLAPDualWritesEnabled() bool
WALPollLimit() int
WALProcessInterval() time.Duration
ExternalCutoverProcessInterval() time.Duration
ExternalStoreEnabled() bool
ExternalStore() ExternalStore
}
type payloadStoreRepositoryImpl struct {
@@ -72,6 +79,7 @@ type payloadStoreRepositoryImpl struct {
enablePayloadDualWrites bool
enableTaskEventPayloadDualWrites bool
enableDagDataPayloadDualWrites bool
enableOLAPPayloadDualWrites bool
walPollLimit int
walProcessInterval time.Duration
externalCutoverProcessInterval time.Duration
@@ -81,6 +89,7 @@ type PayloadStoreRepositoryOpts struct {
EnablePayloadDualWrites bool
EnableTaskEventPayloadDualWrites bool
EnableDagDataPayloadDualWrites bool
EnableOLAPPayloadDualWrites bool
WALPollLimit int
WALProcessInterval time.Duration
ExternalCutoverProcessInterval time.Duration
@@ -103,6 +112,7 @@ func NewPayloadStoreRepository(
enablePayloadDualWrites: opts.EnablePayloadDualWrites,
enableTaskEventPayloadDualWrites: opts.EnableTaskEventPayloadDualWrites,
enableDagDataPayloadDualWrites: opts.EnableDagDataPayloadDualWrites,
enableOLAPPayloadDualWrites: opts.EnableOLAPPayloadDualWrites,
walPollLimit: opts.WALPollLimit,
walProcessInterval: opts.WALProcessInterval,
externalCutoverProcessInterval: opts.ExternalCutoverProcessInterval,
@@ -124,6 +134,7 @@ func (p *payloadStoreRepositoryImpl) Store(ctx context.Context, tx sqlcv1.DBTX,
offloadAts := make([]pgtype.Timestamptz, 0, len(payloads))
tenantIds := make([]pgtype.UUID, 0, len(payloads))
locations := make([]string, 0, len(payloads))
externalIds := make([]pgtype.UUID, 0, len(payloads))
seenPayloadUniqueKeys := make(map[PayloadUniqueKey]struct{})
@@ -153,6 +164,7 @@ func (p *payloadStoreRepositoryImpl) Store(ctx context.Context, tx sqlcv1.DBTX,
tenantIds = append(tenantIds, tenantId)
locations = append(locations, string(sqlcv1.V1PayloadLocationINLINE))
inlineContents = append(inlineContents, payload.Payload)
externalIds = append(externalIds, payload.ExternalId)
if p.externalStoreEnabled {
offloadAts = append(offloadAts, pgtype.Timestamptz{Time: payload.InsertedAt.Time.Add(*p.inlineStoreTTL), Valid: true})
@@ -168,6 +180,7 @@ func (p *payloadStoreRepositoryImpl) Store(ctx context.Context, tx sqlcv1.DBTX,
Locations: locations,
Tenantids: tenantIds,
Inlinecontents: inlineContents,
Externalids: externalIds,
})
if err != nil {
@@ -192,11 +205,19 @@ func (p *payloadStoreRepositoryImpl) Store(ctx context.Context, tx sqlcv1.DBTX,
return err
}
func (p *payloadStoreRepositoryImpl) BulkRetrieve(ctx context.Context, opts ...RetrievePayloadOpts) (map[RetrievePayloadOpts][]byte, error) {
return p.bulkRetrieve(ctx, p.pool, opts...)
func (p *payloadStoreRepositoryImpl) Retrieve(ctx context.Context, opts ...RetrievePayloadOpts) (map[RetrievePayloadOpts][]byte, error) {
return p.retrieve(ctx, p.pool, opts...)
}
func (p *payloadStoreRepositoryImpl) bulkRetrieve(ctx context.Context, tx sqlcv1.DBTX, opts ...RetrievePayloadOpts) (map[RetrievePayloadOpts][]byte, error) {
func (p *payloadStoreRepositoryImpl) RetrieveFromExternal(ctx context.Context, keys ...ExternalPayloadLocationKey) (map[ExternalPayloadLocationKey][]byte, error) {
if !p.externalStoreEnabled {
return nil, fmt.Errorf("external store not enabled")
}
return p.externalStore.Retrieve(ctx, keys...)
}
func (p *payloadStoreRepositoryImpl) retrieve(ctx context.Context, tx sqlcv1.DBTX, opts ...RetrievePayloadOpts) (map[RetrievePayloadOpts][]byte, error) {
if len(opts) == 0 {
return make(map[RetrievePayloadOpts][]byte), nil
}
@@ -227,7 +248,7 @@ func (p *payloadStoreRepositoryImpl) bulkRetrieve(ctx context.Context, tx sqlcv1
optsToPayload := make(map[RetrievePayloadOpts][]byte)
externalKeysToOpts := make(map[ExternalPayloadLocationKey]RetrievePayloadOpts)
retrievePayloadOpts := make([]BulkRetrievePayloadOpts, 0)
externalKeys := make([]ExternalPayloadLocationKey, 0)
for _, payload := range payloads {
if payload == nil {
@@ -244,17 +265,14 @@ func (p *payloadStoreRepositoryImpl) bulkRetrieve(ctx context.Context, tx sqlcv1
if payload.Location == sqlcv1.V1PayloadLocationEXTERNAL {
key := ExternalPayloadLocationKey(payload.ExternalLocationKey.String)
externalKeysToOpts[key] = opts
retrievePayloadOpts = append(retrievePayloadOpts, BulkRetrievePayloadOpts{
Keys: []ExternalPayloadLocationKey{key},
TenantId: opts.TenantId.String(),
})
externalKeys = append(externalKeys, key)
} else {
optsToPayload[opts] = payload.InlineContent
}
}
if len(retrievePayloadOpts) > 0 {
externalData, err := p.externalStore.BulkRetrieve(ctx, retrievePayloadOpts...)
if len(externalKeys) > 0 {
externalData, err := p.RetrieveFromExternal(ctx, externalKeys...)
if err != nil {
return nil, fmt.Errorf("failed to retrieve external payloads: %w", err)
}
@@ -283,7 +301,7 @@ func (p *payloadStoreRepositoryImpl) offloadToExternal(ctx context.Context, payl
return p.externalStore.Store(ctx, payloads...)
}
func (p *payloadStoreRepositoryImpl) ProcessPayloadWAL(ctx context.Context, partitionNumber int64) (bool, error) {
func (p *payloadStoreRepositoryImpl) ProcessPayloadWAL(ctx context.Context, partitionNumber int64, pubBuffer *msgqueue.MQPubBuffer) (bool, error) {
// no need to process the WAL if external store is not enabled
if !p.externalStoreEnabled {
return false, nil
@@ -340,7 +358,7 @@ func (p *payloadStoreRepositoryImpl) ProcessPayloadWAL(ctx context.Context, part
retrieveOptsToOffloadAt[opts] = record.OffloadAt
}
payloads, err := p.bulkRetrieve(ctx, tx, retrieveOpts...)
payloads, err := p.retrieve(ctx, tx, retrieveOpts...)
if err != nil {
return false, err
@@ -441,7 +459,7 @@ func (p *payloadStoreRepositoryImpl) ProcessPayloadWAL(ctx context.Context, part
return false, fmt.Errorf("failed to prepare transaction for offloading: %w", err)
}
err = p.queries.SetPayloadExternalKeys(ctx, tx, sqlcv1.SetPayloadExternalKeysParams{
updatedPayloads, err := p.queries.SetPayloadExternalKeys(ctx, tx, sqlcv1.SetPayloadExternalKeysParams{
Ids: ids,
Insertedats: insertedAts,
Payloadtypes: types,
@@ -454,10 +472,35 @@ func (p *payloadStoreRepositoryImpl) ProcessPayloadWAL(ctx context.Context, part
return false, err
}
tenantIdToPayloads := make(map[string][]OLAPPayloadToOffload)
for _, updatedPayload := range updatedPayloads {
if updatedPayload == nil || updatedPayload.Type == sqlcv1.V1PayloadTypeTASKEVENTDATA {
continue
}
tenantIdToPayloads[updatedPayload.TenantID.String()] = append(tenantIdToPayloads[updatedPayload.TenantID.String()], OLAPPayloadToOffload{
ExternalId: updatedPayload.ExternalID,
ExternalLocationKey: updatedPayload.ExternalLocationKey.String,
})
}
if err := commit(ctx); err != nil {
return false, err
}
// todo: make this transactionally safe
// there's no application-level risk here because the worst case if
// we miss an event is we don't mark the payload as external and there's a bit
// of disk bloat, but it'd be good to not need to worry about that
for tenantId, payloads := range tenantIdToPayloads {
msg, err := OLAPPayloadOffloadMessage(tenantId, payloads)
if err != nil {
return false, fmt.Errorf("failed to create OLAP payload offload message: %w", err)
}
pubBuffer.Pub(ctx, msgqueue.OLAP_QUEUE, msg, false)
}
return hasMoreWALRecords, nil
}
@@ -528,6 +571,10 @@ func (p *payloadStoreRepositoryImpl) DagDataDualWritesEnabled() bool {
return p.enableDagDataPayloadDualWrites
}
func (p *payloadStoreRepositoryImpl) OLAPDualWritesEnabled() bool {
return p.enableOLAPPayloadDualWrites
}
func (p *payloadStoreRepositoryImpl) WALPollLimit() int {
return p.walPollLimit
}
@@ -540,12 +587,20 @@ func (p *payloadStoreRepositoryImpl) ExternalCutoverProcessInterval() time.Durat
return p.externalCutoverProcessInterval
}
func (p *payloadStoreRepositoryImpl) ExternalStoreEnabled() bool {
return p.externalStoreEnabled
}
func (p *payloadStoreRepositoryImpl) ExternalStore() ExternalStore {
return p.externalStore
}
type NoOpExternalStore struct{}
func (n *NoOpExternalStore) Store(ctx context.Context, payloads ...OffloadToExternalStoreOpts) (map[RetrievePayloadOpts]ExternalPayloadLocationKey, error) {
return nil, fmt.Errorf("external store disabled")
}
func (n *NoOpExternalStore) BulkRetrieve(ctx context.Context, opts ...BulkRetrievePayloadOpts) (map[ExternalPayloadLocationKey][]byte, error) {
func (n *NoOpExternalStore) Retrieve(ctx context.Context, keys ...ExternalPayloadLocationKey) (map[ExternalPayloadLocationKey][]byte, error) {
return nil, fmt.Errorf("external store disabled")
}
+2 -1
View File
@@ -197,6 +197,7 @@ func (r iteratorForCreateTaskEventsOLAP) Values() ([]interface{}, error) {
r.rows[0].WorkerID,
r.rows[0].AdditionalEventData,
r.rows[0].AdditionalEventMessage,
r.rows[0].ExternalID,
}, nil
}
@@ -205,7 +206,7 @@ func (r iteratorForCreateTaskEventsOLAP) Err() error {
}
func (q *Queries) CreateTaskEventsOLAP(ctx context.Context, db DBTX, arg []CreateTaskEventsOLAPParams) (int64, error) {
return db.CopyFrom(ctx, []string{"v1_task_events_olap"}, []string{"tenant_id", "task_id", "task_inserted_at", "event_type", "workflow_id", "event_timestamp", "readable_status", "retry_count", "error_message", "output", "worker_id", "additional__event_data", "additional__event_message"}, &iteratorForCreateTaskEventsOLAP{rows: arg})
return db.CopyFrom(ctx, []string{"v1_task_events_olap"}, []string{"tenant_id", "task_id", "task_inserted_at", "event_type", "workflow_id", "event_timestamp", "readable_status", "retry_count", "error_message", "output", "worker_id", "additional__event_data", "additional__event_message", "external_id"}, &iteratorForCreateTaskEventsOLAP{rows: arg})
}
// iteratorForCreateTaskEventsOLAPTmp implements pgx.CopyFromSource.
+54
View File
@@ -1436,6 +1436,48 @@ func (ns NullV1PayloadLocation) Value() (driver.Value, error) {
return string(ns.V1PayloadLocation), nil
}
type V1PayloadLocationOlap string
const (
V1PayloadLocationOlapINLINE V1PayloadLocationOlap = "INLINE"
V1PayloadLocationOlapEXTERNAL V1PayloadLocationOlap = "EXTERNAL"
)
func (e *V1PayloadLocationOlap) Scan(src interface{}) error {
switch s := src.(type) {
case []byte:
*e = V1PayloadLocationOlap(s)
case string:
*e = V1PayloadLocationOlap(s)
default:
return fmt.Errorf("unsupported scan type for V1PayloadLocationOlap: %T", src)
}
return nil
}
type NullV1PayloadLocationOlap struct {
V1PayloadLocationOlap V1PayloadLocationOlap `json:"v1_payload_location_olap"`
Valid bool `json:"valid"` // Valid is true if V1PayloadLocationOlap is not NULL
}
// Scan implements the Scanner interface.
func (ns *NullV1PayloadLocationOlap) Scan(value interface{}) error {
if value == nil {
ns.V1PayloadLocationOlap, ns.Valid = "", false
return nil
}
ns.Valid = true
return ns.V1PayloadLocationOlap.Scan(value)
}
// Value implements the driver Valuer interface.
func (ns NullV1PayloadLocationOlap) Value() (driver.Value, error) {
if !ns.Valid {
return nil, nil
}
return string(ns.V1PayloadLocationOlap), nil
}
type V1PayloadType string
const (
@@ -3079,6 +3121,7 @@ type V1Payload struct {
TenantID pgtype.UUID `json:"tenant_id"`
ID int64 `json:"id"`
InsertedAt pgtype.Timestamptz `json:"inserted_at"`
ExternalID pgtype.UUID `json:"external_id"`
Type V1PayloadType `json:"type"`
Location V1PayloadLocation `json:"location"`
ExternalLocationKey pgtype.Text `json:"external_location_key"`
@@ -3103,6 +3146,16 @@ type V1PayloadWal struct {
Operation V1PayloadWalOperation `json:"operation"`
}
type V1PayloadsOlap struct {
TenantID pgtype.UUID `json:"tenant_id"`
ExternalID pgtype.UUID `json:"external_id"`
Location V1PayloadLocationOlap `json:"location"`
ExternalLocationKey pgtype.Text `json:"external_location_key"`
InlineContent []byte `json:"inline_content"`
InsertedAt pgtype.Timestamptz `json:"inserted_at"`
UpdatedAt pgtype.Timestamptz `json:"updated_at"`
}
type V1Queue struct {
TenantID pgtype.UUID `json:"tenant_id"`
Name string `json:"name"`
@@ -3262,6 +3315,7 @@ type V1TaskEventsOlap struct {
TenantID pgtype.UUID `json:"tenant_id"`
ID int64 `json:"id"`
InsertedAt pgtype.Timestamptz `json:"inserted_at"`
ExternalID pgtype.UUID `json:"external_id"`
TaskID int64 `json:"task_id"`
TaskInsertedAt pgtype.Timestamptz `json:"task_inserted_at"`
EventType V1EventTypeOlap `json:"event_type"`
+112 -6
View File
@@ -13,7 +13,8 @@ SELECT
create_v1_range_partition('v1_event_to_run_olap'::text, @date::date),
create_v1_weekly_range_partition('v1_event_lookup_table_olap'::text, @date::date),
create_v1_range_partition('v1_incoming_webhook_validation_failures_olap'::text, @date::date),
create_v1_range_partition('v1_cel_evaluation_failures_olap'::text, @date::date)
create_v1_range_partition('v1_cel_evaluation_failures_olap'::text, @date::date),
create_v1_range_partition('v1_payloads_olap'::text, @date::date)
;
-- name: AnalyzeV1RunsOLAP :exec
@@ -28,6 +29,9 @@ ANALYZE v1_dags_olap;
-- name: AnalyzeV1DAGToTaskOLAP :exec
ANALYZE v1_dag_to_task_olap;
-- name: AnalyzeV1PayloadsOLAP :exec
ANALYZE v1_payloads_olap;
-- name: ListOLAPPartitionsBeforeDate :many
WITH task_partitions AS (
SELECT 'v1_tasks_olap' AS parent_table, p::text as partition_name FROM get_v1_partitions_before_date('v1_tasks_olap'::text, @date::date) AS p
@@ -45,6 +49,8 @@ WITH task_partitions AS (
SELECT 'v1_incoming_webhook_validation_failures_olap' AS parent_table, p::TEXT AS partition_name FROM get_v1_partitions_before_date('v1_incoming_webhook_validation_failures_olap', @date::date) AS p
), cel_evaluation_failures_partitions AS (
SELECT 'v1_cel_evaluation_failures_olap' AS parent_table, p::TEXT AS partition_name FROM get_v1_partitions_before_date('v1_cel_evaluation_failures_olap', @date::date) AS p
), payloads_partitions AS (
SELECT 'v1_payloads_olap' AS parent_table, p::TEXT AS partition_name FROM get_v1_partitions_before_date('v1_payloads_olap', @date::date) AS p
), candidates AS (
SELECT
*
@@ -99,6 +105,13 @@ WITH task_partitions AS (
*
FROM
cel_evaluation_failures_partitions
UNION ALL
SELECT
*
FROM
payloads_partitions
)
SELECT *
@@ -106,7 +119,9 @@ FROM candidates
WHERE
CASE
WHEN @shouldPartitionEventsTables::BOOLEAN THEN TRUE
ELSE parent_table NOT IN ('v1_events_olap', 'v1_event_to_run_olap', 'v1_cel_evaluation_failures_olap', 'v1_incoming_webhook_validation_failures_olap')
-- this is a list of all of the tables which are hypertables in timescale, so we should not manually drop their
-- partitions if @shouldPartitionEventsTables is false
ELSE parent_table NOT IN ('v1_events_olap', 'v1_event_to_run_olap', 'v1_cel_evaluation_failures_olap', 'v1_incoming_webhook_validation_failures_olap', 'v1_payloads_olap')
END
;
@@ -217,7 +232,8 @@ INSERT INTO v1_task_events_olap (
output,
worker_id,
additional__event_data,
additional__event_message
additional__event_message,
external_id
) VALUES (
$1,
$2,
@@ -231,7 +247,8 @@ INSERT INTO v1_task_events_olap (
$10,
$11,
$12,
$13
$13,
$14
);
-- name: ReadTaskByExternalID :one
@@ -248,6 +265,7 @@ WITH lookup_task AS (
SELECT
t.*,
e.output,
e.external_id AS event_external_id,
e.error_message
FROM
v1_tasks_olap t
@@ -336,6 +354,7 @@ SELECT
t.readable_status,
t.error_message,
t.output,
t.external_id AS event_external_id,
t.worker_id,
t.additional__event_data,
t.additional__event_message
@@ -386,6 +405,7 @@ SELECT
t.readable_status,
t.error_message,
t.output,
t.external_id AS event_external_id,
t.worker_id,
t.additional__event_data,
t.additional__event_message,
@@ -439,8 +459,16 @@ WITH selected_retry_count AS (
relevant_events
WHERE
event_type = 'STARTED'
), queued_at AS (
SELECT
MAX(event_timestamp) AS queued_at
FROM
relevant_events
WHERE
event_type = 'QUEUED'
), task_output AS (
SELECT
external_id,
output
FROM
relevant_events
@@ -483,7 +511,9 @@ SELECT
st.readable_status::v1_readable_status_olap as status,
f.finished_at::timestamptz as finished_at,
s.started_at::timestamptz as started_at,
o.output::jsonb as output,
q.queued_at::timestamptz as queued_at,
o.external_id::UUID AS output_event_external_id,
o.output as output,
e.error_message as error_message,
sc.spawned_children,
(SELECT retry_count FROM selected_retry_count) as retry_count
@@ -493,6 +523,8 @@ LEFT JOIN
finished_at f ON true
LEFT JOIN
started_at s ON true
LEFT JOIN
queued_at q ON true
LEFT JOIN
task_output o ON true
LEFT JOIN
@@ -511,6 +543,7 @@ WITH metadata AS (
MAX(event_timestamp) FILTER (WHERE event_type = 'STARTED')::TIMESTAMPTZ AS started_at,
MAX(event_timestamp) FILTER (WHERE readable_status = ANY(ARRAY['COMPLETED', 'FAILED', 'CANCELLED']::v1_readable_status_olap[]))::TIMESTAMPTZ AS finished_at,
MAX(output::TEXT) FILTER (WHERE readable_status = 'COMPLETED')::JSONB AS output,
MAX(external_id::TEXT) FILTER (WHERE readable_status = 'COMPLETED')::UUID AS output_event_external_id,
MAX(error_message) FILTER (WHERE readable_status = 'FAILED')::TEXT AS error_message
FROM
v1_task_events_olap
@@ -562,7 +595,8 @@ SELECT
CASE
WHEN @includePayloads::BOOLEAN THEN m.output::JSONB
ELSE '{}'::JSONB
END::JSONB AS output
END::JSONB AS output,
m.output_event_external_id::UUID AS output_event_external_id
FROM
v1_tasks_olap t, metadata m
WHERE
@@ -983,6 +1017,7 @@ WITH run AS (
MAX(e.event_timestamp) FILTER (WHERE e.readable_status IN ('COMPLETED', 'CANCELLED', 'FAILED'))::timestamptz AS finished_at,
MAX(e.error_message) FILTER (WHERE e.readable_status = 'FAILED') AS error_message,
MAX(e.output::TEXT) FILTER (WHERE e.event_type = 'FINISHED')::JSONB AS output,
MAX(e.external_id::TEXT) FILTER (WHERE e.event_type = 'FINISHED')::UUID AS output_event_external_id,
MAX(e.retry_count) AS max_retry_count
FROM relevant_events e
WHERE e.retry_count = (
@@ -999,6 +1034,7 @@ SELECT
-- hack to force this to string since sqlc can't figure out that this should be pgtype.Text
COALESCE(m.error_message, '')::TEXT AS error_message,
m.output::JSONB AS output,
m.output_event_external_id::UUID AS output_event_external_id,
COALESCE(m.max_retry_count, 0)::int as retry_count
FROM run r, metadata m
;
@@ -1583,6 +1619,76 @@ SELECT @tenantId::UUID, source, error
FROM inputs
;
-- name: PutPayloads :exec
WITH inputs AS (
SELECT
UNNEST(@externalIds::UUID[]) AS external_id,
UNNEST(@insertedAts::TIMESTAMPTZ[]) AS inserted_at,
UNNEST(@payloads::JSONB[]) AS payload,
UNNEST(@tenantIds::UUID[]) AS tenant_id,
UNNEST(CAST(@locations::TEXT[] AS v1_payload_location_olap[])) AS location
)
INSERT INTO v1_payloads_olap (
tenant_id,
external_id,
inserted_at,
location,
external_location_key,
inline_content
)
SELECT
i.tenant_id,
i.external_id,
i.inserted_at,
i.location,
CASE
WHEN i.location = 'EXTERNAL' THEN i.payload
ELSE NULL
END,
CASE
WHEN i.location = 'INLINE' THEN i.payload
ELSE NULL
END AS inline_content
FROM inputs i
ON CONFLICT (tenant_id, external_id, inserted_at) DO UPDATE
SET
location = EXCLUDED.location,
external_location_key = EXCLUDED.external_location_key,
inline_content = EXCLUDED.inline_content,
updated_at = NOW()
;
-- name: OffloadPayloads :exec
WITH inputs AS (
SELECT
UNNEST(@externalIds::UUID[]) AS external_id,
UNNEST(@tenantIds::UUID[]) AS tenant_id,
UNNEST(@externalLocationKeys::TEXT[]) AS external_location_key
)
UPDATE v1_payloads_olap
SET
location = 'EXTERNAL',
external_location_key = i.external_location_key,
inline_content = NULL,
updated_at = NOW()
FROM inputs i
WHERE
(v1_payloads_olap.tenant_id, v1_payloads_olap.external_id) = (i.tenant_id, i.external_id)
AND v1_payloads_olap.location = 'INLINE'
AND v1_payloads_olap.external_location_key IS NULL
;
-- name: ReadPayloadsOLAP :many
SELECT *
FROM v1_payloads_olap
WHERE
tenant_id = @tenantId::UUID
AND external_id = ANY(@externalIds::UUID[])
;
-- name: ListWorkflowRunExternalIds :many
SELECT external_id
FROM v1_runs_olap
+271 -83
View File
@@ -29,6 +29,15 @@ func (q *Queries) AnalyzeV1DAGsOLAP(ctx context.Context, db DBTX) error {
return err
}
const analyzeV1PayloadsOLAP = `-- name: AnalyzeV1PayloadsOLAP :exec
ANALYZE v1_payloads_olap
`
func (q *Queries) AnalyzeV1PayloadsOLAP(ctx context.Context, db DBTX) error {
_, err := db.Exec(ctx, analyzeV1PayloadsOLAP)
return err
}
const analyzeV1RunsOLAP = `-- name: AnalyzeV1RunsOLAP :exec
ANALYZE v1_runs_olap
`
@@ -191,7 +200,8 @@ SELECT
create_v1_range_partition('v1_event_to_run_olap'::text, $1::date),
create_v1_weekly_range_partition('v1_event_lookup_table_olap'::text, $1::date),
create_v1_range_partition('v1_incoming_webhook_validation_failures_olap'::text, $1::date),
create_v1_range_partition('v1_cel_evaluation_failures_olap'::text, $1::date)
create_v1_range_partition('v1_cel_evaluation_failures_olap'::text, $1::date),
create_v1_range_partition('v1_payloads_olap'::text, $1::date)
`
func (q *Queries) CreateOLAPEventPartitions(ctx context.Context, db DBTX, date pgtype.Date) error {
@@ -232,6 +242,7 @@ type CreateTaskEventsOLAPParams struct {
WorkerID pgtype.UUID `json:"worker_id"`
AdditionalEventData pgtype.Text `json:"additional__event_data"`
AdditionalEventMessage pgtype.Text `json:"additional__event_message"`
ExternalID pgtype.UUID `json:"external_id"`
}
type CreateTaskEventsOLAPTmpParams struct {
@@ -1028,6 +1039,8 @@ WITH task_partitions AS (
SELECT 'v1_incoming_webhook_validation_failures_olap' AS parent_table, p::TEXT AS partition_name FROM get_v1_partitions_before_date('v1_incoming_webhook_validation_failures_olap', $2::date) AS p
), cel_evaluation_failures_partitions AS (
SELECT 'v1_cel_evaluation_failures_olap' AS parent_table, p::TEXT AS partition_name FROM get_v1_partitions_before_date('v1_cel_evaluation_failures_olap', $2::date) AS p
), payloads_partitions AS (
SELECT 'v1_payloads_olap' AS parent_table, p::TEXT AS partition_name FROM get_v1_partitions_before_date('v1_payloads_olap', $2::date) AS p
), candidates AS (
SELECT
parent_table, partition_name
@@ -1082,6 +1095,13 @@ WITH task_partitions AS (
parent_table, partition_name
FROM
cel_evaluation_failures_partitions
UNION ALL
SELECT
parent_table, partition_name
FROM
payloads_partitions
)
SELECT parent_table, partition_name
@@ -1089,7 +1109,9 @@ FROM candidates
WHERE
CASE
WHEN $1::BOOLEAN THEN TRUE
ELSE parent_table NOT IN ('v1_events_olap', 'v1_event_to_run_olap', 'v1_cel_evaluation_failures_olap', 'v1_incoming_webhook_validation_failures_olap')
-- this is a list of all of the tables which are hypertables in timescale, so we should not manually drop their
-- partitions if @shouldPartitionEventsTables is false
ELSE parent_table NOT IN ('v1_events_olap', 'v1_event_to_run_olap', 'v1_cel_evaluation_failures_olap', 'v1_incoming_webhook_validation_failures_olap', 'v1_payloads_olap')
END
`
@@ -1156,6 +1178,7 @@ SELECT
t.readable_status,
t.error_message,
t.output,
t.external_id AS event_external_id,
t.worker_id,
t.additional__event_data,
t.additional__event_message
@@ -1188,6 +1211,7 @@ type ListTaskEventsRow struct {
ReadableStatus V1ReadableStatusOlap `json:"readable_status"`
ErrorMessage pgtype.Text `json:"error_message"`
Output []byte `json:"output"`
EventExternalID pgtype.UUID `json:"event_external_id"`
WorkerID pgtype.UUID `json:"worker_id"`
AdditionalEventData pgtype.Text `json:"additional__event_data"`
AdditionalEventMessage pgtype.Text `json:"additional__event_message"`
@@ -1216,6 +1240,7 @@ func (q *Queries) ListTaskEvents(ctx context.Context, db DBTX, arg ListTaskEvent
&i.ReadableStatus,
&i.ErrorMessage,
&i.Output,
&i.EventExternalID,
&i.WorkerID,
&i.AdditionalEventData,
&i.AdditionalEventMessage,
@@ -1269,6 +1294,7 @@ SELECT
t.readable_status,
t.error_message,
t.output,
t.external_id AS event_external_id,
t.worker_id,
t.additional__event_data,
t.additional__event_message,
@@ -1304,6 +1330,7 @@ type ListTaskEventsForWorkflowRunRow struct {
ReadableStatus V1ReadableStatusOlap `json:"readable_status"`
ErrorMessage pgtype.Text `json:"error_message"`
Output []byte `json:"output"`
EventExternalID pgtype.UUID `json:"event_external_id"`
WorkerID pgtype.UUID `json:"worker_id"`
AdditionalEventData pgtype.Text `json:"additional__event_data"`
AdditionalEventMessage pgtype.Text `json:"additional__event_message"`
@@ -1334,6 +1361,7 @@ func (q *Queries) ListTaskEventsForWorkflowRun(ctx context.Context, db DBTX, arg
&i.ReadableStatus,
&i.ErrorMessage,
&i.Output,
&i.EventExternalID,
&i.WorkerID,
&i.AdditionalEventData,
&i.AdditionalEventMessage,
@@ -1560,6 +1588,38 @@ func (q *Queries) ListWorkflowRunExternalIds(ctx context.Context, db DBTX, arg L
return items, nil
}
const offloadPayloads = `-- name: OffloadPayloads :exec
WITH inputs AS (
SELECT
UNNEST($1::UUID[]) AS external_id,
UNNEST($2::UUID[]) AS tenant_id,
UNNEST($3::TEXT[]) AS external_location_key
)
UPDATE v1_payloads_olap
SET
location = 'EXTERNAL',
external_location_key = i.external_location_key,
inline_content = NULL,
updated_at = NOW()
FROM inputs i
WHERE
(v1_payloads_olap.tenant_id, v1_payloads_olap.external_id) = (i.tenant_id, i.external_id)
AND v1_payloads_olap.location = 'INLINE'
AND v1_payloads_olap.external_location_key IS NULL
`
type OffloadPayloadsParams struct {
Externalids []pgtype.UUID `json:"externalids"`
Tenantids []pgtype.UUID `json:"tenantids"`
Externallocationkeys []string `json:"externallocationkeys"`
}
func (q *Queries) OffloadPayloads(ctx context.Context, db DBTX, arg OffloadPayloadsParams) error {
_, err := db.Exec(ctx, offloadPayloads, arg.Externalids, arg.Tenantids, arg.Externallocationkeys)
return err
}
const populateDAGMetadata = `-- name: PopulateDAGMetadata :one
WITH run AS (
SELECT
@@ -1590,7 +1650,7 @@ WITH run AS (
AND r.tenant_id = $4::UUID
AND r.kind = 'DAG'
), relevant_events AS (
SELECT e.tenant_id, e.id, e.inserted_at, e.task_id, e.task_inserted_at, e.event_type, e.workflow_id, e.event_timestamp, e.readable_status, e.retry_count, e.error_message, e.output, e.worker_id, e.additional__event_data, e.additional__event_message
SELECT e.tenant_id, e.id, e.inserted_at, e.external_id, e.task_id, e.task_inserted_at, e.event_type, e.workflow_id, e.event_timestamp, e.readable_status, e.retry_count, e.error_message, e.output, e.worker_id, e.additional__event_data, e.additional__event_message
FROM run r
JOIN v1_dag_to_task_olap dt ON (r.dag_id, r.inserted_at) = (dt.dag_id, dt.dag_inserted_at)
JOIN v1_task_events_olap e ON (e.task_id, e.task_inserted_at) = (dt.task_id, dt.task_inserted_at)
@@ -1601,6 +1661,7 @@ WITH run AS (
MAX(e.event_timestamp) FILTER (WHERE e.readable_status IN ('COMPLETED', 'CANCELLED', 'FAILED'))::timestamptz AS finished_at,
MAX(e.error_message) FILTER (WHERE e.readable_status = 'FAILED') AS error_message,
MAX(e.output::TEXT) FILTER (WHERE e.event_type = 'FINISHED')::JSONB AS output,
MAX(e.external_id::TEXT) FILTER (WHERE e.event_type = 'FINISHED')::UUID AS output_event_external_id,
MAX(e.retry_count) AS max_retry_count
FROM relevant_events e
WHERE e.retry_count = (
@@ -1617,6 +1678,7 @@ SELECT
-- hack to force this to string since sqlc can't figure out that this should be pgtype.Text
COALESCE(m.error_message, '')::TEXT AS error_message,
m.output::JSONB AS output,
m.output_event_external_id::UUID AS output_event_external_id,
COALESCE(m.max_retry_count, 0)::int as retry_count
FROM run r, metadata m
`
@@ -1629,25 +1691,26 @@ type PopulateDAGMetadataParams struct {
}
type PopulateDAGMetadataRow struct {
DagID int64 `json:"dag_id"`
RunID int64 `json:"run_id"`
TenantID pgtype.UUID `json:"tenant_id"`
InsertedAt pgtype.Timestamptz `json:"inserted_at"`
ExternalID pgtype.UUID `json:"external_id"`
ReadableStatus V1ReadableStatusOlap `json:"readable_status"`
Kind V1RunKind `json:"kind"`
WorkflowID pgtype.UUID `json:"workflow_id"`
DisplayName string `json:"display_name"`
Input []byte `json:"input"`
AdditionalMetadata []byte `json:"additional_metadata"`
WorkflowVersionID pgtype.UUID `json:"workflow_version_id"`
ParentTaskExternalID pgtype.UUID `json:"parent_task_external_id"`
CreatedAt pgtype.Timestamptz `json:"created_at"`
StartedAt pgtype.Timestamptz `json:"started_at"`
FinishedAt pgtype.Timestamptz `json:"finished_at"`
ErrorMessage string `json:"error_message"`
Output []byte `json:"output"`
RetryCount int32 `json:"retry_count"`
DagID int64 `json:"dag_id"`
RunID int64 `json:"run_id"`
TenantID pgtype.UUID `json:"tenant_id"`
InsertedAt pgtype.Timestamptz `json:"inserted_at"`
ExternalID pgtype.UUID `json:"external_id"`
ReadableStatus V1ReadableStatusOlap `json:"readable_status"`
Kind V1RunKind `json:"kind"`
WorkflowID pgtype.UUID `json:"workflow_id"`
DisplayName string `json:"display_name"`
Input []byte `json:"input"`
AdditionalMetadata []byte `json:"additional_metadata"`
WorkflowVersionID pgtype.UUID `json:"workflow_version_id"`
ParentTaskExternalID pgtype.UUID `json:"parent_task_external_id"`
CreatedAt pgtype.Timestamptz `json:"created_at"`
StartedAt pgtype.Timestamptz `json:"started_at"`
FinishedAt pgtype.Timestamptz `json:"finished_at"`
ErrorMessage string `json:"error_message"`
Output []byte `json:"output"`
OutputEventExternalID pgtype.UUID `json:"output_event_external_id"`
RetryCount int32 `json:"retry_count"`
}
func (q *Queries) PopulateDAGMetadata(ctx context.Context, db DBTX, arg PopulateDAGMetadataParams) (*PopulateDAGMetadataRow, error) {
@@ -1677,6 +1740,7 @@ func (q *Queries) PopulateDAGMetadata(ctx context.Context, db DBTX, arg Populate
&i.FinishedAt,
&i.ErrorMessage,
&i.Output,
&i.OutputEventExternalID,
&i.RetryCount,
)
return &i, err
@@ -1760,7 +1824,7 @@ WITH selected_retry_count AS (
LIMIT 1
), relevant_events AS (
SELECT
tenant_id, id, inserted_at, task_id, task_inserted_at, event_type, workflow_id, event_timestamp, readable_status, retry_count, error_message, output, worker_id, additional__event_data, additional__event_message
tenant_id, id, inserted_at, external_id, task_id, task_inserted_at, event_type, workflow_id, event_timestamp, readable_status, retry_count, error_message, output, worker_id, additional__event_data, additional__event_message
FROM
v1_task_events_olap
WHERE
@@ -1782,8 +1846,16 @@ WITH selected_retry_count AS (
relevant_events
WHERE
event_type = 'STARTED'
), queued_at AS (
SELECT
MAX(event_timestamp) AS queued_at
FROM
relevant_events
WHERE
event_type = 'QUEUED'
), task_output AS (
SELECT
external_id,
output
FROM
relevant_events
@@ -1826,7 +1898,9 @@ SELECT
st.readable_status::v1_readable_status_olap as status,
f.finished_at::timestamptz as finished_at,
s.started_at::timestamptz as started_at,
o.output::jsonb as output,
q.queued_at::timestamptz as queued_at,
o.external_id::UUID AS output_event_external_id,
o.output as output,
e.error_message as error_message,
sc.spawned_children,
(SELECT retry_count FROM selected_retry_count) as retry_count
@@ -1836,6 +1910,8 @@ LEFT JOIN
finished_at f ON true
LEFT JOIN
started_at s ON true
LEFT JOIN
queued_at q ON true
LEFT JOIN
task_output o ON true
LEFT JOIN
@@ -1856,37 +1932,39 @@ type PopulateSingleTaskRunDataParams struct {
}
type PopulateSingleTaskRunDataRow struct {
TenantID pgtype.UUID `json:"tenant_id"`
ID int64 `json:"id"`
InsertedAt pgtype.Timestamptz `json:"inserted_at"`
ExternalID pgtype.UUID `json:"external_id"`
Queue string `json:"queue"`
ActionID string `json:"action_id"`
StepID pgtype.UUID `json:"step_id"`
WorkflowID pgtype.UUID `json:"workflow_id"`
WorkflowVersionID pgtype.UUID `json:"workflow_version_id"`
WorkflowRunID pgtype.UUID `json:"workflow_run_id"`
ScheduleTimeout string `json:"schedule_timeout"`
StepTimeout pgtype.Text `json:"step_timeout"`
Priority pgtype.Int4 `json:"priority"`
Sticky V1StickyStrategyOlap `json:"sticky"`
DesiredWorkerID pgtype.UUID `json:"desired_worker_id"`
DisplayName string `json:"display_name"`
Input []byte `json:"input"`
AdditionalMetadata []byte `json:"additional_metadata"`
ReadableStatus V1ReadableStatusOlap `json:"readable_status"`
LatestRetryCount int32 `json:"latest_retry_count"`
LatestWorkerID pgtype.UUID `json:"latest_worker_id"`
DagID pgtype.Int8 `json:"dag_id"`
DagInsertedAt pgtype.Timestamptz `json:"dag_inserted_at"`
ParentTaskExternalID pgtype.UUID `json:"parent_task_external_id"`
Status V1ReadableStatusOlap `json:"status"`
FinishedAt pgtype.Timestamptz `json:"finished_at"`
StartedAt pgtype.Timestamptz `json:"started_at"`
Output []byte `json:"output"`
ErrorMessage pgtype.Text `json:"error_message"`
SpawnedChildren pgtype.Int8 `json:"spawned_children"`
RetryCount int32 `json:"retry_count"`
TenantID pgtype.UUID `json:"tenant_id"`
ID int64 `json:"id"`
InsertedAt pgtype.Timestamptz `json:"inserted_at"`
ExternalID pgtype.UUID `json:"external_id"`
Queue string `json:"queue"`
ActionID string `json:"action_id"`
StepID pgtype.UUID `json:"step_id"`
WorkflowID pgtype.UUID `json:"workflow_id"`
WorkflowVersionID pgtype.UUID `json:"workflow_version_id"`
WorkflowRunID pgtype.UUID `json:"workflow_run_id"`
ScheduleTimeout string `json:"schedule_timeout"`
StepTimeout pgtype.Text `json:"step_timeout"`
Priority pgtype.Int4 `json:"priority"`
Sticky V1StickyStrategyOlap `json:"sticky"`
DesiredWorkerID pgtype.UUID `json:"desired_worker_id"`
DisplayName string `json:"display_name"`
Input []byte `json:"input"`
AdditionalMetadata []byte `json:"additional_metadata"`
ReadableStatus V1ReadableStatusOlap `json:"readable_status"`
LatestRetryCount int32 `json:"latest_retry_count"`
LatestWorkerID pgtype.UUID `json:"latest_worker_id"`
DagID pgtype.Int8 `json:"dag_id"`
DagInsertedAt pgtype.Timestamptz `json:"dag_inserted_at"`
ParentTaskExternalID pgtype.UUID `json:"parent_task_external_id"`
Status V1ReadableStatusOlap `json:"status"`
FinishedAt pgtype.Timestamptz `json:"finished_at"`
StartedAt pgtype.Timestamptz `json:"started_at"`
QueuedAt pgtype.Timestamptz `json:"queued_at"`
OutputEventExternalID pgtype.UUID `json:"output_event_external_id"`
Output []byte `json:"output"`
ErrorMessage pgtype.Text `json:"error_message"`
SpawnedChildren pgtype.Int8 `json:"spawned_children"`
RetryCount int32 `json:"retry_count"`
}
func (q *Queries) PopulateSingleTaskRunData(ctx context.Context, db DBTX, arg PopulateSingleTaskRunDataParams) (*PopulateSingleTaskRunDataRow, error) {
@@ -1925,6 +2003,8 @@ func (q *Queries) PopulateSingleTaskRunData(ctx context.Context, db DBTX, arg Po
&i.Status,
&i.FinishedAt,
&i.StartedAt,
&i.QueuedAt,
&i.OutputEventExternalID,
&i.Output,
&i.ErrorMessage,
&i.SpawnedChildren,
@@ -1940,6 +2020,7 @@ WITH metadata AS (
MAX(event_timestamp) FILTER (WHERE event_type = 'STARTED')::TIMESTAMPTZ AS started_at,
MAX(event_timestamp) FILTER (WHERE readable_status = ANY(ARRAY['COMPLETED', 'FAILED', 'CANCELLED']::v1_readable_status_olap[]))::TIMESTAMPTZ AS finished_at,
MAX(output::TEXT) FILTER (WHERE readable_status = 'COMPLETED')::JSONB AS output,
MAX(external_id::TEXT) FILTER (WHERE readable_status = 'COMPLETED')::UUID AS output_event_external_id,
MAX(error_message) FILTER (WHERE readable_status = 'FAILED')::TEXT AS error_message
FROM
v1_task_events_olap
@@ -1991,7 +2072,8 @@ SELECT
CASE
WHEN $1::BOOLEAN THEN m.output::JSONB
ELSE '{}'::JSONB
END::JSONB AS output
END::JSONB AS output,
m.output_event_external_id::UUID AS output_event_external_id
FROM
v1_tasks_olap t, metadata m
WHERE
@@ -2009,31 +2091,32 @@ type PopulateTaskRunDataParams struct {
}
type PopulateTaskRunDataRow struct {
TenantID pgtype.UUID `json:"tenant_id"`
ID int64 `json:"id"`
InsertedAt pgtype.Timestamptz `json:"inserted_at"`
ExternalID pgtype.UUID `json:"external_id"`
Queue string `json:"queue"`
ActionID string `json:"action_id"`
StepID pgtype.UUID `json:"step_id"`
WorkflowID pgtype.UUID `json:"workflow_id"`
WorkflowVersionID pgtype.UUID `json:"workflow_version_id"`
ScheduleTimeout string `json:"schedule_timeout"`
StepTimeout pgtype.Text `json:"step_timeout"`
Priority pgtype.Int4 `json:"priority"`
Sticky V1StickyStrategyOlap `json:"sticky"`
DisplayName string `json:"display_name"`
AdditionalMetadata []byte `json:"additional_metadata"`
ParentTaskExternalID pgtype.UUID `json:"parent_task_external_id"`
Input []byte `json:"input"`
Status V1ReadableStatusOlap `json:"status"`
WorkflowRunID pgtype.UUID `json:"workflow_run_id"`
FinishedAt pgtype.Timestamptz `json:"finished_at"`
StartedAt pgtype.Timestamptz `json:"started_at"`
QueuedAt pgtype.Timestamptz `json:"queued_at"`
ErrorMessage string `json:"error_message"`
RetryCount int32 `json:"retry_count"`
Output []byte `json:"output"`
TenantID pgtype.UUID `json:"tenant_id"`
ID int64 `json:"id"`
InsertedAt pgtype.Timestamptz `json:"inserted_at"`
ExternalID pgtype.UUID `json:"external_id"`
Queue string `json:"queue"`
ActionID string `json:"action_id"`
StepID pgtype.UUID `json:"step_id"`
WorkflowID pgtype.UUID `json:"workflow_id"`
WorkflowVersionID pgtype.UUID `json:"workflow_version_id"`
ScheduleTimeout string `json:"schedule_timeout"`
StepTimeout pgtype.Text `json:"step_timeout"`
Priority pgtype.Int4 `json:"priority"`
Sticky V1StickyStrategyOlap `json:"sticky"`
DisplayName string `json:"display_name"`
AdditionalMetadata []byte `json:"additional_metadata"`
ParentTaskExternalID pgtype.UUID `json:"parent_task_external_id"`
Input []byte `json:"input"`
Status V1ReadableStatusOlap `json:"status"`
WorkflowRunID pgtype.UUID `json:"workflow_run_id"`
FinishedAt pgtype.Timestamptz `json:"finished_at"`
StartedAt pgtype.Timestamptz `json:"started_at"`
QueuedAt pgtype.Timestamptz `json:"queued_at"`
ErrorMessage string `json:"error_message"`
RetryCount int32 `json:"retry_count"`
Output []byte `json:"output"`
OutputEventExternalID pgtype.UUID `json:"output_event_external_id"`
}
func (q *Queries) PopulateTaskRunData(ctx context.Context, db DBTX, arg PopulateTaskRunDataParams) (*PopulateTaskRunDataRow, error) {
@@ -2070,10 +2153,71 @@ func (q *Queries) PopulateTaskRunData(ctx context.Context, db DBTX, arg Populate
&i.ErrorMessage,
&i.RetryCount,
&i.Output,
&i.OutputEventExternalID,
)
return &i, err
}
const putPayloads = `-- name: PutPayloads :exec
WITH inputs AS (
SELECT
UNNEST($1::UUID[]) AS external_id,
UNNEST($2::TIMESTAMPTZ[]) AS inserted_at,
UNNEST($3::JSONB[]) AS payload,
UNNEST($4::UUID[]) AS tenant_id,
UNNEST(CAST($5::TEXT[] AS v1_payload_location_olap[])) AS location
)
INSERT INTO v1_payloads_olap (
tenant_id,
external_id,
inserted_at,
location,
external_location_key,
inline_content
)
SELECT
i.tenant_id,
i.external_id,
i.inserted_at,
i.location,
CASE
WHEN i.location = 'EXTERNAL' THEN i.payload
ELSE NULL
END,
CASE
WHEN i.location = 'INLINE' THEN i.payload
ELSE NULL
END AS inline_content
FROM inputs i
ON CONFLICT (tenant_id, external_id, inserted_at) DO UPDATE
SET
location = EXCLUDED.location,
external_location_key = EXCLUDED.external_location_key,
inline_content = EXCLUDED.inline_content,
updated_at = NOW()
`
type PutPayloadsParams struct {
Externalids []pgtype.UUID `json:"externalids"`
Insertedats []pgtype.Timestamptz `json:"insertedats"`
Payloads [][]byte `json:"payloads"`
Tenantids []pgtype.UUID `json:"tenantids"`
Locations []string `json:"locations"`
}
func (q *Queries) PutPayloads(ctx context.Context, db DBTX, arg PutPayloadsParams) error {
_, err := db.Exec(ctx, putPayloads,
arg.Externalids,
arg.Insertedats,
arg.Payloads,
arg.Tenantids,
arg.Locations,
)
return err
}
const readDAGByExternalID = `-- name: ReadDAGByExternalID :one
WITH lookup_task AS (
SELECT
@@ -2113,6 +2257,47 @@ func (q *Queries) ReadDAGByExternalID(ctx context.Context, db DBTX, externalid p
return &i, err
}
const readPayloadsOLAP = `-- name: ReadPayloadsOLAP :many
SELECT tenant_id, external_id, location, external_location_key, inline_content, inserted_at, updated_at
FROM v1_payloads_olap
WHERE
tenant_id = $1::UUID
AND external_id = ANY($2::UUID[])
`
type ReadPayloadsOLAPParams struct {
Tenantid pgtype.UUID `json:"tenantid"`
Externalids []pgtype.UUID `json:"externalids"`
}
func (q *Queries) ReadPayloadsOLAP(ctx context.Context, db DBTX, arg ReadPayloadsOLAPParams) ([]*V1PayloadsOlap, error) {
rows, err := db.Query(ctx, readPayloadsOLAP, arg.Tenantid, arg.Externalids)
if err != nil {
return nil, err
}
defer rows.Close()
var items []*V1PayloadsOlap
for rows.Next() {
var i V1PayloadsOlap
if err := rows.Scan(
&i.TenantID,
&i.ExternalID,
&i.Location,
&i.ExternalLocationKey,
&i.InlineContent,
&i.InsertedAt,
&i.UpdatedAt,
); err != nil {
return nil, err
}
items = append(items, &i)
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const readTaskByExternalID = `-- name: ReadTaskByExternalID :one
WITH lookup_task AS (
SELECT
@@ -2127,6 +2312,7 @@ WITH lookup_task AS (
SELECT
t.tenant_id, t.id, t.inserted_at, t.external_id, t.queue, t.action_id, t.step_id, t.workflow_id, t.workflow_version_id, t.workflow_run_id, t.schedule_timeout, t.step_timeout, t.priority, t.sticky, t.desired_worker_id, t.display_name, t.input, t.additional_metadata, t.readable_status, t.latest_retry_count, t.latest_worker_id, t.dag_id, t.dag_inserted_at, t.parent_task_external_id,
e.output,
e.external_id AS event_external_id,
e.error_message
FROM
v1_tasks_olap t
@@ -2162,6 +2348,7 @@ type ReadTaskByExternalIDRow struct {
DagInsertedAt pgtype.Timestamptz `json:"dag_inserted_at"`
ParentTaskExternalID pgtype.UUID `json:"parent_task_external_id"`
Output []byte `json:"output"`
EventExternalID pgtype.UUID `json:"event_external_id"`
ErrorMessage pgtype.Text `json:"error_message"`
}
@@ -2194,6 +2381,7 @@ func (q *Queries) ReadTaskByExternalID(ctx context.Context, db DBTX, externalid
&i.DagInsertedAt,
&i.ParentTaskExternalID,
&i.Output,
&i.EventExternalID,
&i.ErrorMessage,
)
return &i, err
@@ -2254,7 +2442,7 @@ WITH runs AS (
AND lt.task_id IS NOT NULL
), relevant_events AS (
SELECT
e.tenant_id, e.id, e.inserted_at, e.task_id, e.task_inserted_at, e.event_type, e.workflow_id, e.event_timestamp, e.readable_status, e.retry_count, e.error_message, e.output, e.worker_id, e.additional__event_data, e.additional__event_message
e.tenant_id, e.id, e.inserted_at, e.external_id, e.task_id, e.task_inserted_at, e.event_type, e.workflow_id, e.event_timestamp, e.readable_status, e.retry_count, e.error_message, e.output, e.worker_id, e.additional__event_data, e.additional__event_message
FROM runs r
JOIN v1_dag_to_task_olap dt ON r.dag_id = dt.dag_id AND r.inserted_at = dt.dag_inserted_at
JOIN v1_task_events_olap e ON (e.task_id, e.task_inserted_at) = (dt.task_id, dt.task_inserted_at)
@@ -2263,7 +2451,7 @@ WITH runs AS (
UNION ALL
SELECT
e.tenant_id, e.id, e.inserted_at, e.task_id, e.task_inserted_at, e.event_type, e.workflow_id, e.event_timestamp, e.readable_status, e.retry_count, e.error_message, e.output, e.worker_id, e.additional__event_data, e.additional__event_message
e.tenant_id, e.id, e.inserted_at, e.external_id, e.task_id, e.task_inserted_at, e.event_type, e.workflow_id, e.event_timestamp, e.readable_status, e.retry_count, e.error_message, e.output, e.worker_id, e.additional__event_data, e.additional__event_message
FROM runs r
JOIN v1_task_events_olap e ON e.task_id = r.task_id AND e.task_inserted_at = r.inserted_at
WHERE r.task_id IS NOT NULL
+14 -17
View File
@@ -1,13 +1,3 @@
-- name: ReadPayload :one
SELECT *
FROM v1_payload
WHERE
tenant_id = @tenantId::UUID
AND type = @type::v1_payload_type
AND id = @id::BIGINT
AND inserted_at = @insertedAt::TIMESTAMPTZ
;
-- name: ReadPayloads :many
WITH inputs AS (
SELECT
@@ -30,6 +20,7 @@ WITH inputs AS (
SELECT DISTINCT
UNNEST(@ids::BIGINT[]) AS id,
UNNEST(@insertedAts::TIMESTAMPTZ[]) AS inserted_at,
UNNEST(@externalIds::UUID[]) AS external_id,
UNNEST(CAST(@types::TEXT[] AS v1_payload_type[])) AS type,
UNNEST(CAST(@locations::TEXT[] AS v1_payload_location[])) AS location,
UNNEST(@externalLocationKeys::TEXT[]) AS external_location_key,
@@ -41,6 +32,7 @@ INSERT INTO v1_payload (
tenant_id,
id,
inserted_at,
external_id,
type,
location,
external_location_key,
@@ -50,6 +42,7 @@ SELECT
i.tenant_id,
i.id,
i.inserted_at,
i.external_id,
i.type,
i.location,
CASE WHEN i.external_location_key = '' OR i.location != 'EXTERNAL' THEN NULL ELSE i.external_location_key END,
@@ -110,7 +103,7 @@ LIMIT @pollLimit::INT
FOR UPDATE SKIP LOCKED
;
-- name: SetPayloadExternalKeys :exec
-- name: SetPayloadExternalKeys :many
WITH inputs AS (
SELECT
UNNEST(@ids::BIGINT[]) AS id,
@@ -129,6 +122,7 @@ WITH inputs AS (
v1_payload.id = i.id
AND v1_payload.inserted_at = i.inserted_at
AND v1_payload.tenant_id = i.tenant_id
RETURNING v1_payload.*
), cutover_queue_items AS (
INSERT INTO v1_payload_cutover_queue_item (
tenant_id,
@@ -146,14 +140,17 @@ WITH inputs AS (
FROM
inputs i
ON CONFLICT DO NOTHING
), deletions AS (
DELETE FROM v1_payload_wal
WHERE
(offload_at, payload_id, payload_inserted_at, payload_type, tenant_id) IN (
SELECT offload_at, id, inserted_at, type, tenant_id
FROM inputs
)
)
DELETE FROM v1_payload_wal
WHERE
(offload_at, payload_id, payload_inserted_at, payload_type, tenant_id) IN (
SELECT offload_at, id, inserted_at, type, tenant_id
FROM inputs
)
SELECT *
FROM payload_updates
;
+63 -54
View File
@@ -123,44 +123,6 @@ func (q *Queries) PollPayloadWALForRecordsToReplicate(ctx context.Context, db DB
return items, nil
}
const readPayload = `-- name: ReadPayload :one
SELECT tenant_id, id, inserted_at, type, location, external_location_key, inline_content, updated_at
FROM v1_payload
WHERE
tenant_id = $1::UUID
AND type = $2::v1_payload_type
AND id = $3::BIGINT
AND inserted_at = $4::TIMESTAMPTZ
`
type ReadPayloadParams struct {
Tenantid pgtype.UUID `json:"tenantid"`
Type V1PayloadType `json:"type"`
ID int64 `json:"id"`
Insertedat pgtype.Timestamptz `json:"insertedat"`
}
func (q *Queries) ReadPayload(ctx context.Context, db DBTX, arg ReadPayloadParams) (*V1Payload, error) {
row := db.QueryRow(ctx, readPayload,
arg.Tenantid,
arg.Type,
arg.ID,
arg.Insertedat,
)
var i V1Payload
err := row.Scan(
&i.TenantID,
&i.ID,
&i.InsertedAt,
&i.Type,
&i.Location,
&i.ExternalLocationKey,
&i.InlineContent,
&i.UpdatedAt,
)
return &i, err
}
const readPayloads = `-- name: ReadPayloads :many
WITH inputs AS (
SELECT
@@ -170,7 +132,7 @@ WITH inputs AS (
UNNEST(CAST($4::TEXT[] AS v1_payload_type[])) AS type
)
SELECT tenant_id, id, inserted_at, type, location, external_location_key, inline_content, updated_at
SELECT tenant_id, id, inserted_at, external_id, type, location, external_location_key, inline_content, updated_at
FROM v1_payload
WHERE (tenant_id, id, inserted_at, type) IN (
SELECT tenant_id, id, inserted_at, type
@@ -203,6 +165,7 @@ func (q *Queries) ReadPayloads(ctx context.Context, db DBTX, arg ReadPayloadsPar
&i.TenantID,
&i.ID,
&i.InsertedAt,
&i.ExternalID,
&i.Type,
&i.Location,
&i.ExternalLocationKey,
@@ -219,7 +182,7 @@ func (q *Queries) ReadPayloads(ctx context.Context, db DBTX, arg ReadPayloadsPar
return items, nil
}
const setPayloadExternalKeys = `-- name: SetPayloadExternalKeys :exec
const setPayloadExternalKeys = `-- name: SetPayloadExternalKeys :many
WITH inputs AS (
SELECT
UNNEST($1::BIGINT[]) AS id,
@@ -238,6 +201,7 @@ WITH inputs AS (
v1_payload.id = i.id
AND v1_payload.inserted_at = i.inserted_at
AND v1_payload.tenant_id = i.tenant_id
RETURNING v1_payload.tenant_id, v1_payload.id, v1_payload.inserted_at, v1_payload.external_id, v1_payload.type, v1_payload.location, v1_payload.external_location_key, v1_payload.inline_content, v1_payload.updated_at
), cutover_queue_items AS (
INSERT INTO v1_payload_cutover_queue_item (
tenant_id,
@@ -255,14 +219,17 @@ WITH inputs AS (
FROM
inputs i
ON CONFLICT DO NOTHING
), deletions AS (
DELETE FROM v1_payload_wal
WHERE
(offload_at, payload_id, payload_inserted_at, payload_type, tenant_id) IN (
SELECT offload_at, id, inserted_at, type, tenant_id
FROM inputs
)
)
DELETE FROM v1_payload_wal
WHERE
(offload_at, payload_id, payload_inserted_at, payload_type, tenant_id) IN (
SELECT offload_at, id, inserted_at, type, tenant_id
FROM inputs
)
SELECT tenant_id, id, inserted_at, external_id, type, location, external_location_key, inline_content, updated_at
FROM payload_updates
`
type SetPayloadExternalKeysParams struct {
@@ -274,8 +241,20 @@ type SetPayloadExternalKeysParams struct {
Tenantids []pgtype.UUID `json:"tenantids"`
}
func (q *Queries) SetPayloadExternalKeys(ctx context.Context, db DBTX, arg SetPayloadExternalKeysParams) error {
_, err := db.Exec(ctx, setPayloadExternalKeys,
type SetPayloadExternalKeysRow struct {
TenantID pgtype.UUID `json:"tenant_id"`
ID int64 `json:"id"`
InsertedAt pgtype.Timestamptz `json:"inserted_at"`
ExternalID pgtype.UUID `json:"external_id"`
Type V1PayloadType `json:"type"`
Location V1PayloadLocation `json:"location"`
ExternalLocationKey pgtype.Text `json:"external_location_key"`
InlineContent []byte `json:"inline_content"`
UpdatedAt pgtype.Timestamptz `json:"updated_at"`
}
func (q *Queries) SetPayloadExternalKeys(ctx context.Context, db DBTX, arg SetPayloadExternalKeysParams) ([]*SetPayloadExternalKeysRow, error) {
rows, err := db.Query(ctx, setPayloadExternalKeys,
arg.Ids,
arg.Insertedats,
arg.Payloadtypes,
@@ -283,7 +262,32 @@ func (q *Queries) SetPayloadExternalKeys(ctx context.Context, db DBTX, arg SetPa
arg.Externallocationkeys,
arg.Tenantids,
)
return err
if err != nil {
return nil, err
}
defer rows.Close()
var items []*SetPayloadExternalKeysRow
for rows.Next() {
var i SetPayloadExternalKeysRow
if err := rows.Scan(
&i.TenantID,
&i.ID,
&i.InsertedAt,
&i.ExternalID,
&i.Type,
&i.Location,
&i.ExternalLocationKey,
&i.InlineContent,
&i.UpdatedAt,
); err != nil {
return nil, err
}
items = append(items, &i)
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const writePayloadWAL = `-- name: WritePayloadWAL :exec
@@ -338,17 +342,19 @@ WITH inputs AS (
SELECT DISTINCT
UNNEST($1::BIGINT[]) AS id,
UNNEST($2::TIMESTAMPTZ[]) AS inserted_at,
UNNEST(CAST($3::TEXT[] AS v1_payload_type[])) AS type,
UNNEST(CAST($4::TEXT[] AS v1_payload_location[])) AS location,
UNNEST($5::TEXT[]) AS external_location_key,
UNNEST($6::JSONB[]) AS inline_content,
UNNEST($7::UUID[]) AS tenant_id
UNNEST($3::UUID[]) AS external_id,
UNNEST(CAST($4::TEXT[] AS v1_payload_type[])) AS type,
UNNEST(CAST($5::TEXT[] AS v1_payload_location[])) AS location,
UNNEST($6::TEXT[]) AS external_location_key,
UNNEST($7::JSONB[]) AS inline_content,
UNNEST($8::UUID[]) AS tenant_id
)
INSERT INTO v1_payload (
tenant_id,
id,
inserted_at,
external_id,
type,
location,
external_location_key,
@@ -358,6 +364,7 @@ SELECT
i.tenant_id,
i.id,
i.inserted_at,
i.external_id,
i.type,
i.location,
CASE WHEN i.external_location_key = '' OR i.location != 'EXTERNAL' THEN NULL ELSE i.external_location_key END,
@@ -375,6 +382,7 @@ DO UPDATE SET
type WritePayloadsParams struct {
Ids []int64 `json:"ids"`
Insertedats []pgtype.Timestamptz `json:"insertedats"`
Externalids []pgtype.UUID `json:"externalids"`
Types []string `json:"types"`
Locations []string `json:"locations"`
Externallocationkeys []string `json:"externallocationkeys"`
@@ -386,6 +394,7 @@ func (q *Queries) WritePayloads(ctx context.Context, db DBTX, arg WritePayloadsP
_, err := db.Exec(ctx, writePayloads,
arg.Ids,
arg.Insertedats,
arg.Externalids,
arg.Types,
arg.Locations,
arg.Externallocationkeys,
+7 -4
View File
@@ -1106,7 +1106,7 @@ func (r *TaskRepositoryImpl) listTaskOutputEvents(ctx context.Context, tx sqlcv1
matchedEventToRetrieveOpts[event] = opt
}
payloads, err := r.payloadStore.BulkRetrieve(ctx, retrieveOpts...)
payloads, err := r.payloadStore.Retrieve(ctx, retrieveOpts...)
if err != nil {
return nil, err
@@ -1381,6 +1381,7 @@ func (r *TaskRepositoryImpl) ProcessDurableSleeps(ctx context.Context, tenantId
Id: task.ID,
InsertedAt: task.InsertedAt,
Type: sqlcv1.V1PayloadTypeTASKINPUT,
ExternalId: task.ExternalID,
Payload: task.Payload,
TenantId: task.TenantID.String(),
}
@@ -2358,6 +2359,7 @@ func (r *sharedRepository) replayTasks(
Id: taskIds[i],
InsertedAt: taskInsertedAts[i],
Type: sqlcv1.V1PayloadTypeTASKINPUT,
ExternalId: sqlchelpers.UUIDFromStr(task.ExternalId),
Payload: input,
TenantId: tenantId,
}
@@ -2699,6 +2701,7 @@ func (r *sharedRepository) createTaskEvents(
storePayloadOpts[i] = StorePayloadOpts{
Id: taskEvent.ID,
InsertedAt: taskEvent.InsertedAt,
ExternalId: taskEvent.ExternalID,
Type: sqlcv1.V1PayloadTypeTASKEVENTDATA,
Payload: data,
TenantId: tenantId,
@@ -2869,7 +2872,7 @@ func (r *TaskRepositoryImpl) ReplayTasks(ctx context.Context, tenantId string, t
}
}
payloads, err := r.payloadStore.BulkRetrieve(ctx, retrieveOpts...)
payloads, err := r.payloadStore.Retrieve(ctx, retrieveOpts...)
if err != nil {
return nil, fmt.Errorf("failed to bulk retrieve task inputs: %w", err)
@@ -3477,7 +3480,7 @@ func (r *TaskRepositoryImpl) ListTaskParentOutputs(ctx context.Context, tenantId
retrieveOptToPayload[opt] = outputTask.Output
}
payloads, err := r.payloadStore.BulkRetrieve(ctx, retrieveOpts...)
payloads, err := r.payloadStore.Retrieve(ctx, retrieveOpts...)
if err != nil {
return nil, fmt.Errorf("failed to retrieve task output payloads: %w", err)
@@ -3553,7 +3556,7 @@ func (r *TaskRepositoryImpl) ListSignalCompletedEvents(ctx context.Context, tena
retrieveOpts[i] = retrieveOpt
}
payloads, err := r.payloadStore.BulkRetrieve(ctx, retrieveOpts...)
payloads, err := r.payloadStore.Retrieve(ctx, retrieveOpts...)
if err != nil {
return nil, fmt.Errorf("failed to retrieve task event payloads: %w", err)
+3 -1
View File
@@ -1257,6 +1257,7 @@ func (r *TriggerRepositoryImpl) triggerWorkflows(ctx context.Context, tenantId s
storePayloadOpts = append(storePayloadOpts, StorePayloadOpts{
Id: task.ID,
InsertedAt: task.InsertedAt,
ExternalId: task.ExternalID,
Type: sqlcv1.V1PayloadTypeTASKINPUT,
Payload: task.Payload,
TenantId: tenantId,
@@ -1267,6 +1268,7 @@ func (r *TriggerRepositoryImpl) triggerWorkflows(ctx context.Context, tenantId s
storePayloadOpts = append(storePayloadOpts, StorePayloadOpts{
Id: dag.ID,
InsertedAt: dag.InsertedAt,
ExternalId: dag.ExternalID,
Type: sqlcv1.V1PayloadTypeDAGINPUT,
Payload: dag.Input,
TenantId: tenantId,
@@ -1510,7 +1512,7 @@ func (r *TriggerRepositoryImpl) registerChildWorkflows(
}
}
payloads, err := r.payloadStore.BulkRetrieve(ctx, retrievePayloadOpts...)
payloads, err := r.payloadStore.Retrieve(ctx, retrievePayloadOpts...)
if err != nil {
return nil, fmt.Errorf("failed to retrieve payloads for signal created events: %w", err)
+6
View File
@@ -5,6 +5,12 @@ All notable changes to Hatchet's Python SDK will be documented in this changelog
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.20.2] - 2025-10-15
### Added
- Adds a `include_payloads` parameter to the `list` methods on the runs client (defaults to true, so no change in behavior).
## [1.20.1] - 2025-10-14
### Added
@@ -23,7 +23,7 @@ class WorkflowInput(BaseModel):
concurrency_workflow_level_workflow = hatchet.workflow(
name="ConcurrencyWorkflowManyKeys",
name="ConcurrencyWorkflowLevel",
input_validator=WorkflowInput,
concurrency=[
ConcurrencyExpression(
@@ -11,7 +11,7 @@ class DynamicCronInput(BaseModel):
async def create_cron() -> None:
dynamic_cron_workflow = hatchet.workflow(
name="CronWorkflow", input_validator=DynamicCronInput
name="DynamicCronWorkflow", input_validator=DynamicCronInput
)
# > Create
@@ -10,7 +10,7 @@ class DynamicCronInput(BaseModel):
dynamic_cron_workflow = hatchet.workflow(
name="CronWorkflow", input_validator=DynamicCronInput
name="DynamicCronWorkflow", input_validator=DynamicCronInput
)
# > Create
+12
View File
@@ -405,6 +405,7 @@ class RunsClient(BaseRestClient):
worker_id: str | None = None,
parent_task_external_id: str | None = None,
triggering_event_external_id: str | None = None,
include_payloads: bool = True,
) -> list[V1TaskSummary]:
"""
List task runs according to a set of filters, paginating through days
@@ -420,6 +421,7 @@ class RunsClient(BaseRestClient):
:param worker_id: The worker ID to filter task runs by.
:param parent_task_external_id: The parent task external ID to filter task runs by.
:param triggering_event_external_id: The event id that triggered the task run.
:param include_payloads: Whether to include payloads in the response.
:return: A list of task runs matching the specified filters.
"""
@@ -446,6 +448,7 @@ class RunsClient(BaseRestClient):
worker_id=worker_id,
parent_task_external_id=parent_task_external_id,
triggering_event_external_id=triggering_event_external_id,
include_payloads=include_payloads,
)
for s, u in date_ranges
]
@@ -475,6 +478,7 @@ class RunsClient(BaseRestClient):
worker_id: str | None = None,
parent_task_external_id: str | None = None,
triggering_event_external_id: str | None = None,
include_payloads: bool = True,
) -> list[V1TaskSummary]:
"""
List task runs according to a set of filters, paginating through days
@@ -490,6 +494,7 @@ class RunsClient(BaseRestClient):
:param worker_id: The worker ID to filter task runs by.
:param parent_task_external_id: The parent task external ID to filter task runs by.
:param triggering_event_external_id: The event id that triggered the task run.
:param include_payloads: Whether to include payloads in the response.
:return: A list of task runs matching the specified filters.
"""
@@ -517,6 +522,7 @@ class RunsClient(BaseRestClient):
worker_id=worker_id,
parent_task_external_id=parent_task_external_id,
triggering_event_external_id=triggering_event_external_id,
include_payloads=include_payloads,
)
for s, u in date_ranges
]
@@ -550,6 +556,7 @@ class RunsClient(BaseRestClient):
worker_id: str | None = None,
parent_task_external_id: str | None = None,
triggering_event_external_id: str | None = None,
include_payloads: bool = True,
) -> V1TaskSummaryList:
"""
List task runs according to a set of filters.
@@ -565,6 +572,7 @@ class RunsClient(BaseRestClient):
:param worker_id: The worker ID to filter task runs by.
:param parent_task_external_id: The parent task external ID to filter task runs by.
:param triggering_event_external_id: The event id that triggered the task run.
:param include_payloads: Whether to include payloads in the response.
:return: A list of task runs matching the specified filters.
"""
@@ -581,6 +589,7 @@ class RunsClient(BaseRestClient):
worker_id=worker_id,
parent_task_external_id=parent_task_external_id,
triggering_event_external_id=triggering_event_external_id,
include_payloads=include_payloads,
)
@retry
@@ -597,6 +606,7 @@ class RunsClient(BaseRestClient):
worker_id: str | None = None,
parent_task_external_id: str | None = None,
triggering_event_external_id: str | None = None,
include_payloads: bool = True,
) -> V1TaskSummaryList:
"""
List task runs according to a set of filters.
@@ -612,6 +622,7 @@ class RunsClient(BaseRestClient):
:param worker_id: The worker ID to filter task runs by.
:param parent_task_external_id: The parent task external ID to filter task runs by.
:param triggering_event_external_id: The event id that triggered the task run.
:param include_payloads: Whether to include payloads in the response.
:return: A list of task runs matching the specified filters.
"""
@@ -643,6 +654,7 @@ class RunsClient(BaseRestClient):
worker_id=worker_id,
parent_task_external_id=parent_task_external_id,
triggering_event_external_id=triggering_event_external_id,
include_payloads=include_payloads,
)
def create(
+3
View File
@@ -1641,12 +1641,15 @@ CREATE TABLE v1_durable_sleep (
);
CREATE TYPE v1_payload_type AS ENUM ('TASK_INPUT', 'DAG_INPUT', 'TASK_OUTPUT', 'TASK_EVENT_DATA');
-- IMPORTANT: Keep these values in sync with `v1_payload_type_olap` in the OLAP db
CREATE TYPE v1_payload_location AS ENUM ('INLINE', 'EXTERNAL');
CREATE TABLE v1_payload (
tenant_id UUID NOT NULL,
id BIGINT NOT NULL,
inserted_at TIMESTAMPTZ NOT NULL,
external_id UUID,
type v1_payload_type NOT NULL,
location v1_payload_location NOT NULL,
external_location_key TEXT,
+22
View File
@@ -318,6 +318,7 @@ CREATE TABLE v1_task_events_olap (
tenant_id UUID NOT NULL,
id bigint GENERATED ALWAYS AS IDENTITY,
inserted_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
external_id UUID,
task_id BIGINT NOT NULL,
task_inserted_at TIMESTAMPTZ NOT NULL,
event_type v1_event_type_olap NOT NULL,
@@ -354,6 +355,27 @@ CREATE TABLE v1_incoming_webhook_validation_failures_olap (
CREATE INDEX v1_incoming_webhook_validation_failures_olap_tenant_id_incoming_webhook_name_idx ON v1_incoming_webhook_validation_failures_olap (tenant_id, incoming_webhook_name);
-- IMPORTANT: Keep these values in sync with `v1_payload_type` in the core db
CREATE TYPE v1_payload_location_olap AS ENUM ('INLINE', 'EXTERNAL');
CREATE TABLE v1_payloads_olap (
tenant_id UUID NOT NULL,
external_id UUID NOT NULL,
location v1_payload_location_olap NOT NULL,
external_location_key TEXT,
inline_content JSONB,
inserted_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (tenant_id, external_id, inserted_at),
CHECK (
location = 'INLINE'
OR
(location = 'EXTERNAL' AND inline_content IS NULL AND external_location_key IS NOT NULL)
)
) PARTITION BY RANGE(inserted_at);
-- this is a hash-partitioned table on the dag_id, so that we can process batches of events in parallel
-- without needing to place conflicting locks on dags.