mirror of
https://github.com/hatchet-dev/hatchet.git
synced 2025-12-16 22:35:11 -06:00
feat: wire up messages over the queue
This commit is contained in:
@@ -558,6 +558,7 @@ func (tc *OLAPControllerImpl) handleCreateMonitoringEvent(ctx context.Context, t
|
||||
eventPayloads := make([]string, 0)
|
||||
eventMessages := make([]string, 0)
|
||||
timestamps := make([]pgtype.Timestamptz, 0)
|
||||
externalIds := make([]pgtype.UUID, 0)
|
||||
|
||||
for _, msg := range msgs {
|
||||
taskMeta := taskIdsToMetas[msg.TaskId]
|
||||
@@ -580,6 +581,7 @@ func (tc *OLAPControllerImpl) handleCreateMonitoringEvent(ctx context.Context, t
|
||||
eventPayloads = append(eventPayloads, msg.EventPayload)
|
||||
eventMessages = append(eventMessages, msg.EventMessage)
|
||||
timestamps = append(timestamps, sqlchelpers.TimestamptzFromTime(msg.EventTimestamp))
|
||||
externalIds = append(externalIds, sqlchelpers.UUIDFromStr(msg.EventExternalId))
|
||||
|
||||
if msg.WorkerId != nil {
|
||||
workerIds = append(workerIds, *msg.WorkerId)
|
||||
@@ -651,6 +653,7 @@ func (tc *OLAPControllerImpl) handleCreateMonitoringEvent(ctx context.Context, t
|
||||
RetryCount: retryCounts[i],
|
||||
WorkerID: workerId,
|
||||
AdditionalEventMessage: sqlchelpers.TextFromStr(eventMessages[i]),
|
||||
ExternalID: externalIds[i],
|
||||
}
|
||||
|
||||
switch eventTypes[i] {
|
||||
|
||||
@@ -561,11 +561,12 @@ func (tc *TasksControllerImpl) handleTaskFailed(ctx context.Context, tenantId st
|
||||
olapMsg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: msg.TaskId,
|
||||
RetryCount: msg.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapFAILED,
|
||||
EventTimestamp: time.Now().UTC(),
|
||||
EventPayload: msg.ErrorMsg,
|
||||
TaskId: msg.TaskId,
|
||||
RetryCount: msg.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapFAILED,
|
||||
EventTimestamp: time.Now().UTC(),
|
||||
EventPayload: msg.ErrorMsg,
|
||||
EventExternalId: msg.EventExternalId,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -739,11 +740,12 @@ func (tc *TasksControllerImpl) handleTaskCancelled(ctx context.Context, tenantId
|
||||
olapMsg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: taskId,
|
||||
RetryCount: msg.RetryCount,
|
||||
EventType: msg.EventType,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: msg.EventMessage,
|
||||
TaskId: taskId,
|
||||
RetryCount: msg.RetryCount,
|
||||
EventType: msg.EventType,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: msg.EventMessage,
|
||||
EventExternalId: msg.EventExternalId,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1620,11 +1622,12 @@ func (tc *TasksControllerImpl) signalTasksCreatedAndQueued(ctx context.Context,
|
||||
olapMsg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapQUEUED,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: msg,
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapQUEUED,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: msg,
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1685,10 +1688,11 @@ func (tc *TasksControllerImpl) signalTasksCreatedAndCancelled(ctx context.Contex
|
||||
// TODO: make this transactionally safe?
|
||||
for _, task := range tasks {
|
||||
msg, err := tasktypes.MonitoringEventMessageFromInternal(tenantId, tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapCANCELLED,
|
||||
EventTimestamp: time.Now(),
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapCANCELLED,
|
||||
EventTimestamp: time.Now(),
|
||||
EventExternalId: uuid.NewString(),
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
@@ -1750,11 +1754,12 @@ func (tc *TasksControllerImpl) signalTasksCreatedAndFailed(ctx context.Context,
|
||||
// TODO: make this transactionally safe?
|
||||
for _, task := range tasks {
|
||||
msg, err := tasktypes.MonitoringEventMessageFromInternal(tenantId, tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapFAILED,
|
||||
EventPayload: task.InitialStateReason.String,
|
||||
EventTimestamp: time.Now(),
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapFAILED,
|
||||
EventPayload: task.InitialStateReason.String,
|
||||
EventTimestamp: time.Now(),
|
||||
EventExternalId: uuid.NewString(),
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
@@ -1816,10 +1821,11 @@ func (tc *TasksControllerImpl) signalTasksCreatedAndSkipped(ctx context.Context,
|
||||
// TODO: make this transactionally safe?
|
||||
for _, task := range tasks {
|
||||
msg, err := tasktypes.MonitoringEventMessageFromInternal(tenantId, tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapSKIPPED,
|
||||
EventTimestamp: time.Now(),
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapSKIPPED,
|
||||
EventTimestamp: time.Now(),
|
||||
EventExternalId: uuid.NewString(),
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
@@ -1867,11 +1873,12 @@ func (tc *TasksControllerImpl) signalTasksReplayed(ctx context.Context, tenantId
|
||||
olapMsg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: task.Id,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapRETRIEDBYUSER,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: msg,
|
||||
TaskId: task.Id,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapRETRIEDBYUSER,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: msg,
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1916,11 +1923,12 @@ func (tc *TasksControllerImpl) pubRetryEvent(ctx context.Context, tenantId strin
|
||||
olapMsg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: taskId,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapRETRYING,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: retryMsg,
|
||||
TaskId: taskId,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapRETRYING,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: retryMsg,
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1943,10 +1951,11 @@ func (tc *TasksControllerImpl) pubRetryEvent(ctx context.Context, tenantId strin
|
||||
olapMsg, err = tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: taskId,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapQUEUED,
|
||||
EventTimestamp: time.Now(),
|
||||
TaskId: taskId,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapQUEUED,
|
||||
EventTimestamp: time.Now(),
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
msgqueue "github.com/hatchet-dev/hatchet/internal/msgqueue/v1"
|
||||
tasktypes "github.com/hatchet-dev/hatchet/internal/services/shared/tasktypes/v1"
|
||||
"github.com/hatchet-dev/hatchet/internal/telemetry"
|
||||
@@ -69,12 +70,13 @@ func (tc *TasksControllerImpl) processTaskReassignments(ctx context.Context, ten
|
||||
olapMsg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapREASSIGNED,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: "Worker did not send a heartbeat for 30 seconds",
|
||||
WorkerId: workerId,
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapREASSIGNED,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: "Worker did not send a heartbeat for 30 seconds",
|
||||
WorkerId: workerId,
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -96,13 +98,14 @@ func (tc *TasksControllerImpl) processTaskReassignments(ctx context.Context, ten
|
||||
olapMsg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapFAILED,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: "Task reached its maximum reassignment count",
|
||||
EventPayload: "Task reached its maximum reassignment count",
|
||||
WorkerId: workerId,
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapFAILED,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: "Task reached its maximum reassignment count",
|
||||
EventPayload: "Task reached its maximum reassignment count",
|
||||
WorkerId: workerId,
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
|
||||
msgqueue "github.com/hatchet-dev/hatchet/internal/msgqueue/v1"
|
||||
@@ -57,10 +58,11 @@ func (tc *TasksControllerImpl) processTaskRetryQueueItems(ctx context.Context, t
|
||||
taskId := task.TaskID
|
||||
|
||||
monitoringEvent := tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: taskId,
|
||||
RetryCount: task.TaskRetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapQUEUED,
|
||||
EventTimestamp: time.Now(),
|
||||
TaskId: taskId,
|
||||
RetryCount: task.TaskRetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapQUEUED,
|
||||
EventTimestamp: time.Now(),
|
||||
EventExternalId: uuid.NewString(),
|
||||
}
|
||||
|
||||
olapMsg, innerErr := tasktypes.MonitoringEventMessageFromInternal(
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
msgqueue "github.com/hatchet-dev/hatchet/internal/msgqueue/v1"
|
||||
tasktypes "github.com/hatchet-dev/hatchet/internal/services/shared/tasktypes/v1"
|
||||
"github.com/hatchet-dev/hatchet/internal/telemetry"
|
||||
@@ -67,11 +68,12 @@ func (tc *TasksControllerImpl) processTaskTimeouts(ctx context.Context, tenantId
|
||||
olapMsg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapTIMEDOUT,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: fmt.Sprintf("Task exceeded timeout of %s", task.StepTimeout.String),
|
||||
TaskId: task.ID,
|
||||
RetryCount: task.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapTIMEDOUT,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: fmt.Sprintf("Task exceeded timeout of %s", task.StepTimeout.String),
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"golang.org/x/sync/errgroup"
|
||||
@@ -411,6 +412,7 @@ func (d *DispatcherImpl) handleTaskBulkAssignedTask(ctx context.Context, msg *ms
|
||||
false,
|
||||
"Could not send task to worker",
|
||||
false,
|
||||
uuid.NewString(),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
|
||||
@@ -582,12 +582,14 @@ func (s *DispatcherImpl) sendStepActionEventV1(ctx context.Context, request *con
|
||||
func (s *DispatcherImpl) handleTaskStarted(inputCtx context.Context, task *sqlcv1.FlattenExternalIdsRow, retryCount int32, request *contracts.StepActionEvent) (*contracts.ActionEventResponse, error) {
|
||||
tenant := inputCtx.Value("tenant").(*dbsqlc.Tenant)
|
||||
tenantId := sqlchelpers.UUIDToStr(tenant.ID)
|
||||
eventExternalId := uuid.NewString()
|
||||
|
||||
msg, err := tasktypes.MonitoringEventMessageFromActionEvent(
|
||||
tenantId,
|
||||
task.ID,
|
||||
retryCount,
|
||||
request,
|
||||
eventExternalId,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
@@ -609,6 +611,7 @@ func (s *DispatcherImpl) handleTaskStarted(inputCtx context.Context, task *sqlcv
|
||||
func (s *DispatcherImpl) handleTaskCompleted(inputCtx context.Context, task *sqlcv1.FlattenExternalIdsRow, retryCount int32, request *contracts.StepActionEvent) (*contracts.ActionEventResponse, error) {
|
||||
tenant := inputCtx.Value("tenant").(*dbsqlc.Tenant)
|
||||
tenantId := sqlchelpers.UUIDToStr(tenant.ID)
|
||||
eventExternalId := uuid.NewString()
|
||||
|
||||
// if request.RetryCount == nil {
|
||||
// return nil, fmt.Errorf("retry count is required in v2")
|
||||
@@ -622,6 +625,7 @@ func (s *DispatcherImpl) handleTaskCompleted(inputCtx context.Context, task *sql
|
||||
sqlchelpers.UUIDToStr(task.WorkflowRunID),
|
||||
retryCount,
|
||||
[]byte(request.EventPayload),
|
||||
eventExternalId,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
@@ -644,6 +648,7 @@ func (s *DispatcherImpl) handleTaskCompleted(inputCtx context.Context, task *sql
|
||||
task.ID,
|
||||
retryCount,
|
||||
request,
|
||||
eventExternalId,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
@@ -664,6 +669,7 @@ func (s *DispatcherImpl) handleTaskCompleted(inputCtx context.Context, task *sql
|
||||
func (s *DispatcherImpl) handleTaskFailed(inputCtx context.Context, task *sqlcv1.FlattenExternalIdsRow, retryCount int32, request *contracts.StepActionEvent) (*contracts.ActionEventResponse, error) {
|
||||
tenant := inputCtx.Value("tenant").(*dbsqlc.Tenant)
|
||||
tenantId := sqlchelpers.UUIDToStr(tenant.ID)
|
||||
eventExternalId := uuid.NewString()
|
||||
|
||||
shouldNotRetry := false
|
||||
|
||||
@@ -681,6 +687,7 @@ func (s *DispatcherImpl) handleTaskFailed(inputCtx context.Context, task *sqlcv1
|
||||
true,
|
||||
request.EventPayload,
|
||||
shouldNotRetry,
|
||||
eventExternalId,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
@@ -729,12 +736,13 @@ func (d *DispatcherImpl) refreshTimeoutV1(ctx context.Context, tenant *dbsqlc.Te
|
||||
msg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: taskRuntime.TaskID,
|
||||
RetryCount: taskRuntime.RetryCount,
|
||||
WorkerId: &workerId,
|
||||
EventTimestamp: time.Now(),
|
||||
EventType: sqlcv1.V1EventTypeOlapTIMEOUTREFRESHED,
|
||||
EventMessage: fmt.Sprintf("Timeout refreshed by %s", request.IncrementTimeoutBy),
|
||||
TaskId: taskRuntime.TaskID,
|
||||
RetryCount: taskRuntime.RetryCount,
|
||||
WorkerId: &workerId,
|
||||
EventTimestamp: time.Now(),
|
||||
EventType: sqlcv1.V1EventTypeOlapTIMEOUTREFRESHED,
|
||||
EventMessage: fmt.Sprintf("Timeout refreshed by %s", request.IncrementTimeoutBy),
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -768,11 +776,12 @@ func (d *DispatcherImpl) releaseSlotV1(ctx context.Context, tenant *dbsqlc.Tenan
|
||||
msg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: releasedSlot.TaskID,
|
||||
RetryCount: releasedSlot.RetryCount,
|
||||
WorkerId: &workerId,
|
||||
EventTimestamp: time.Now(),
|
||||
EventType: sqlcv1.V1EventTypeOlapSLOTRELEASED,
|
||||
TaskId: releasedSlot.TaskID,
|
||||
RetryCount: releasedSlot.RetryCount,
|
||||
WorkerId: &workerId,
|
||||
EventTimestamp: time.Now(),
|
||||
EventType: sqlcv1.V1EventTypeOlapSLOTRELEASED,
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/golang-lru/v2/expirable"
|
||||
"github.com/rs/zerolog"
|
||||
@@ -422,11 +423,12 @@ func (s *Scheduler) scheduleStepRuns(ctx context.Context, tenantId string, res *
|
||||
assignedMsg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: taskId,
|
||||
RetryCount: bulkAssigned.QueueItem.RetryCount,
|
||||
WorkerId: &workerId,
|
||||
EventType: sqlcv1.V1EventTypeOlapASSIGNED,
|
||||
EventTimestamp: time.Now(),
|
||||
TaskId: taskId,
|
||||
RetryCount: bulkAssigned.QueueItem.RetryCount,
|
||||
WorkerId: &workerId,
|
||||
EventType: sqlcv1.V1EventTypeOlapASSIGNED,
|
||||
EventTimestamp: time.Now(),
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -483,11 +485,12 @@ func (s *Scheduler) scheduleStepRuns(ctx context.Context, tenantId string, res *
|
||||
msg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: rateLimited.TaskId,
|
||||
RetryCount: rateLimited.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapREQUEUEDRATELIMIT,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: message,
|
||||
TaskId: rateLimited.TaskId,
|
||||
RetryCount: rateLimited.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapREQUEUEDRATELIMIT,
|
||||
EventTimestamp: time.Now(),
|
||||
EventMessage: message,
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -521,6 +524,7 @@ func (s *Scheduler) scheduleStepRuns(ctx context.Context, tenantId string, res *
|
||||
sqlcv1.V1EventTypeOlapSCHEDULINGTIMEDOUT,
|
||||
"",
|
||||
false,
|
||||
uuid.NewString(),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
@@ -555,10 +559,11 @@ func (s *Scheduler) scheduleStepRuns(ctx context.Context, tenantId string, res *
|
||||
msg, err := tasktypes.MonitoringEventMessageFromInternal(
|
||||
tenantId,
|
||||
tasktypes.CreateMonitoringEventPayload{
|
||||
TaskId: taskId,
|
||||
RetryCount: unassigned.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapREQUEUEDNOWORKER,
|
||||
EventTimestamp: time.Now(),
|
||||
TaskId: taskId,
|
||||
RetryCount: unassigned.RetryCount,
|
||||
EventType: sqlcv1.V1EventTypeOlapREQUEUEDNOWORKER,
|
||||
EventTimestamp: time.Now(),
|
||||
EventExternalId: uuid.NewString(),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -597,6 +602,7 @@ func (s *Scheduler) internalRetry(ctx context.Context, tenantId string, assigned
|
||||
false,
|
||||
"could not assign step run to worker",
|
||||
false,
|
||||
uuid.NewString(),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
@@ -669,6 +675,7 @@ func (s *Scheduler) notifyAfterConcurrency(ctx context.Context, tenantId string,
|
||||
eventType,
|
||||
eventMessage,
|
||||
shouldNotify,
|
||||
uuid.NewString(),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
|
||||
@@ -117,12 +117,13 @@ type CreateMonitoringEventPayload struct {
|
||||
|
||||
EventType sqlcv1.V1EventTypeOlap `json:"event_type"`
|
||||
|
||||
EventTimestamp time.Time `json:"event_timestamp" validate:"required"`
|
||||
EventPayload string `json:"event_payload" validate:"required"`
|
||||
EventMessage string `json:"event_message,omitempty"`
|
||||
EventTimestamp time.Time `json:"event_timestamp" validate:"required"`
|
||||
EventPayload string `json:"event_payload" validate:"required"`
|
||||
EventMessage string `json:"event_message,omitempty"`
|
||||
EventExternalId string `json:"event_external_id,omitempty"`
|
||||
}
|
||||
|
||||
func MonitoringEventMessageFromActionEvent(tenantId string, taskId int64, retryCount int32, request *contracts.StepActionEvent) (*msgqueue.Message, error) {
|
||||
func MonitoringEventMessageFromActionEvent(tenantId string, taskId int64, retryCount int32, request *contracts.StepActionEvent, eventExternalId string) (*msgqueue.Message, error) {
|
||||
var workerId *string
|
||||
|
||||
if _, err := uuid.Parse(request.WorkerId); err == nil {
|
||||
@@ -130,11 +131,12 @@ func MonitoringEventMessageFromActionEvent(tenantId string, taskId int64, retryC
|
||||
}
|
||||
|
||||
payload := CreateMonitoringEventPayload{
|
||||
TaskId: taskId,
|
||||
RetryCount: retryCount,
|
||||
WorkerId: workerId,
|
||||
EventTimestamp: request.EventTimestamp.AsTime(),
|
||||
EventPayload: request.EventPayload,
|
||||
TaskId: taskId,
|
||||
RetryCount: retryCount,
|
||||
WorkerId: workerId,
|
||||
EventTimestamp: request.EventTimestamp.AsTime(),
|
||||
EventPayload: request.EventPayload,
|
||||
EventExternalId: eventExternalId,
|
||||
}
|
||||
|
||||
switch request.EventType {
|
||||
|
||||
@@ -36,6 +36,9 @@ type CompletedTaskPayload struct {
|
||||
|
||||
// (optional) the output data
|
||||
Output []byte
|
||||
|
||||
// the external id of the event
|
||||
EventExternalId string
|
||||
}
|
||||
|
||||
func CompletedTaskMessage(
|
||||
@@ -46,6 +49,7 @@ func CompletedTaskMessage(
|
||||
workflowRunId string,
|
||||
retryCount int32,
|
||||
output []byte,
|
||||
eventExternalId string,
|
||||
) (*msgqueue.Message, error) {
|
||||
return msgqueue.NewTenantMessage(
|
||||
tenantId,
|
||||
@@ -53,12 +57,13 @@ func CompletedTaskMessage(
|
||||
false,
|
||||
true,
|
||||
CompletedTaskPayload{
|
||||
TaskId: taskId,
|
||||
InsertedAt: taskInsertedAt,
|
||||
ExternalId: taskExternalId,
|
||||
WorkflowRunId: workflowRunId,
|
||||
RetryCount: retryCount,
|
||||
Output: output,
|
||||
TaskId: taskId,
|
||||
InsertedAt: taskInsertedAt,
|
||||
ExternalId: taskExternalId,
|
||||
WorkflowRunId: workflowRunId,
|
||||
RetryCount: retryCount,
|
||||
Output: output,
|
||||
EventExternalId: eventExternalId,
|
||||
},
|
||||
)
|
||||
}
|
||||
@@ -87,6 +92,9 @@ type FailedTaskPayload struct {
|
||||
|
||||
// (optional) A boolean flag to indicate whether the error is non-retryable, meaning it should _not_ be retried. Defaults to false.
|
||||
IsNonRetryable bool `json:"is_non_retryable"`
|
||||
|
||||
// the external id of the event
|
||||
EventExternalId string
|
||||
}
|
||||
|
||||
func FailedTaskMessage(
|
||||
@@ -99,6 +107,7 @@ func FailedTaskMessage(
|
||||
isAppError bool,
|
||||
errorMsg string,
|
||||
isNonRetryable bool,
|
||||
eventExternalId string,
|
||||
) (*msgqueue.Message, error) {
|
||||
return msgqueue.NewTenantMessage(
|
||||
tenantId,
|
||||
@@ -106,14 +115,15 @@ func FailedTaskMessage(
|
||||
false,
|
||||
true,
|
||||
FailedTaskPayload{
|
||||
TaskId: taskId,
|
||||
InsertedAt: taskInsertedAt,
|
||||
ExternalId: taskExternalId,
|
||||
WorkflowRunId: workflowRunId,
|
||||
RetryCount: retryCount,
|
||||
IsAppError: isAppError,
|
||||
ErrorMsg: errorMsg,
|
||||
IsNonRetryable: isNonRetryable,
|
||||
TaskId: taskId,
|
||||
InsertedAt: taskInsertedAt,
|
||||
ExternalId: taskExternalId,
|
||||
WorkflowRunId: workflowRunId,
|
||||
RetryCount: retryCount,
|
||||
IsAppError: isAppError,
|
||||
ErrorMsg: errorMsg,
|
||||
IsNonRetryable: isNonRetryable,
|
||||
EventExternalId: eventExternalId,
|
||||
},
|
||||
)
|
||||
}
|
||||
@@ -142,6 +152,9 @@ type CancelledTaskPayload struct {
|
||||
|
||||
// (optional) whether the task should notify the worker
|
||||
ShouldNotify bool
|
||||
|
||||
// the external id of the event
|
||||
EventExternalId string
|
||||
}
|
||||
|
||||
func CancelledTaskMessage(
|
||||
@@ -154,6 +167,7 @@ func CancelledTaskMessage(
|
||||
eventType sqlcv1.V1EventTypeOlap,
|
||||
eventMessage string,
|
||||
shouldNotify bool,
|
||||
eventExternalId string,
|
||||
) (*msgqueue.Message, error) {
|
||||
return msgqueue.NewTenantMessage(
|
||||
tenantId,
|
||||
@@ -161,14 +175,15 @@ func CancelledTaskMessage(
|
||||
false,
|
||||
true,
|
||||
CancelledTaskPayload{
|
||||
TaskId: taskId,
|
||||
InsertedAt: taskInsertedAt,
|
||||
ExternalId: taskExternalId,
|
||||
WorkflowRunId: workflowRunId,
|
||||
RetryCount: retryCount,
|
||||
EventType: eventType,
|
||||
EventMessage: eventMessage,
|
||||
ShouldNotify: shouldNotify,
|
||||
TaskId: taskId,
|
||||
InsertedAt: taskInsertedAt,
|
||||
ExternalId: taskExternalId,
|
||||
WorkflowRunId: workflowRunId,
|
||||
RetryCount: retryCount,
|
||||
EventType: eventType,
|
||||
EventMessage: eventMessage,
|
||||
ShouldNotify: shouldNotify,
|
||||
EventExternalId: eventExternalId,
|
||||
},
|
||||
)
|
||||
}
|
||||
@@ -185,6 +200,9 @@ type SignalTaskCancelledPayload struct {
|
||||
|
||||
// (required) the retry count
|
||||
RetryCount int32
|
||||
|
||||
// the external id of the event
|
||||
EventExternalId string
|
||||
}
|
||||
|
||||
type CancelTasksPayload struct {
|
||||
|
||||
@@ -255,6 +255,7 @@ func (s *sharedRepository) generateExternalIdsForChildWorkflows(ctx context.Cont
|
||||
datas,
|
||||
makeEventTypeArr(sqlcv1.V1TaskEventTypeSIGNALCREATED, len(taskIds)),
|
||||
newEventKeys,
|
||||
nil,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
|
||||
@@ -683,6 +683,7 @@ func (m *sharedRepository) processEventMatches(ctx context.Context, tx sqlcv1.DB
|
||||
datas,
|
||||
makeEventTypeArr(sqlcv1.V1TaskEventTypeSIGNALCOMPLETED, len(taskIds)),
|
||||
eventKeys,
|
||||
nil,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
|
||||
@@ -1882,7 +1882,16 @@ func (r *OLAPRepositoryImpl) PutPayloads(ctx context.Context, tenantId string, p
|
||||
payloads := make([][]byte, len(putPayloadOpts))
|
||||
locations := make([]string, len(putPayloadOpts))
|
||||
|
||||
seenExternalIds := make(map[pgtype.UUID]struct{})
|
||||
|
||||
for i, opt := range putPayloadOpts {
|
||||
_, exists := seenExternalIds[opt.ExternalId]
|
||||
if exists {
|
||||
continue
|
||||
}
|
||||
|
||||
seenExternalIds[opt.ExternalId] = struct{}{}
|
||||
|
||||
externalIds[i] = opt.ExternalId
|
||||
insertedAts[i] = opt.InsertedAt
|
||||
tenantIds[i] = sqlchelpers.UUIDFromStr(tenantId)
|
||||
|
||||
@@ -2119,6 +2119,7 @@ func (r *sharedRepository) insertTasks(
|
||||
eventDatas,
|
||||
eventTypes,
|
||||
make([]string, len(eventTaskIdRetryCounts)),
|
||||
nil,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
@@ -2416,6 +2417,7 @@ func (r *sharedRepository) replayTasks(
|
||||
eventDatas,
|
||||
eventTypes,
|
||||
make([]string, len(eventTaskIdRetryCounts)),
|
||||
nil,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
@@ -2586,6 +2588,7 @@ func (r *sharedRepository) createTaskEventsAfterRelease(
|
||||
filteredDatas,
|
||||
makeEventTypeArr(eventType, len(filteredExternalIds)),
|
||||
make([]string, len(filteredExternalIds)),
|
||||
nil,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -2598,6 +2601,7 @@ func (r *sharedRepository) createTaskEvents(
|
||||
eventDatas [][]byte,
|
||||
eventTypes []sqlcv1.V1TaskEventType,
|
||||
eventKeys []string,
|
||||
eventExternalIds *[]string,
|
||||
) ([]InternalTaskEvent, error) {
|
||||
if len(tasks) != len(eventDatas) {
|
||||
return nil, fmt.Errorf("mismatched task and event data lengths")
|
||||
@@ -2622,6 +2626,11 @@ func (r *sharedRepository) createTaskEvents(
|
||||
eventTypesStrs[i] = string(eventTypes[i])
|
||||
|
||||
externalId := sqlchelpers.UUIDFromStr(uuid.NewString())
|
||||
|
||||
if eventExternalIds != nil {
|
||||
externalId = sqlchelpers.UUIDFromStr((*eventExternalIds)[i])
|
||||
}
|
||||
|
||||
externalIds[i] = externalId
|
||||
|
||||
// important: if we don't set this to `eventDatas[i]` and instead allow it to be nil optionally
|
||||
|
||||
Reference in New Issue
Block a user