diff --git a/api-contracts/workflows/workflows.proto b/api-contracts/workflows/workflows.proto index e4964c807..0c3bd532d 100644 --- a/api-contracts/workflows/workflows.proto +++ b/api-contracts/workflows/workflows.proto @@ -48,9 +48,10 @@ message CreateWorkflowVersionOpts { enum ConcurrencyLimitStrategy { CANCEL_IN_PROGRESS = 0; - DROP_NEWEST = 1; - QUEUE_NEWEST = 2; + DROP_NEWEST = 1; // deprecated + QUEUE_NEWEST = 2; // deprecated GROUP_ROUND_ROBIN = 3; + CANCEL_NEWEST = 4; } message WorkflowConcurrencyOpts { diff --git a/internal/services/admin/contracts/workflows.pb.go b/internal/services/admin/contracts/workflows.pb.go index fd61cbbfe..07afa2190 100644 --- a/internal/services/admin/contracts/workflows.pb.go +++ b/internal/services/admin/contracts/workflows.pb.go @@ -120,9 +120,10 @@ type ConcurrencyLimitStrategy int32 const ( ConcurrencyLimitStrategy_CANCEL_IN_PROGRESS ConcurrencyLimitStrategy = 0 - ConcurrencyLimitStrategy_DROP_NEWEST ConcurrencyLimitStrategy = 1 - ConcurrencyLimitStrategy_QUEUE_NEWEST ConcurrencyLimitStrategy = 2 + ConcurrencyLimitStrategy_DROP_NEWEST ConcurrencyLimitStrategy = 1 // deprecated + ConcurrencyLimitStrategy_QUEUE_NEWEST ConcurrencyLimitStrategy = 2 // deprecated ConcurrencyLimitStrategy_GROUP_ROUND_ROBIN ConcurrencyLimitStrategy = 3 + ConcurrencyLimitStrategy_CANCEL_NEWEST ConcurrencyLimitStrategy = 4 ) // Enum value maps for ConcurrencyLimitStrategy. @@ -132,12 +133,14 @@ var ( 1: "DROP_NEWEST", 2: "QUEUE_NEWEST", 3: "GROUP_ROUND_ROBIN", + 4: "CANCEL_NEWEST", } ConcurrencyLimitStrategy_value = map[string]int32{ "CANCEL_IN_PROGRESS": 0, "DROP_NEWEST": 1, "QUEUE_NEWEST": 2, "GROUP_ROUND_ROBIN": 3, + "CANCEL_NEWEST": 4, } ) @@ -1986,55 +1989,56 @@ var file_workflows_proto_rawDesc = []byte{ 0x32, 0x0a, 0x0c, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x0c, 0x0a, 0x08, 0x46, 0x55, 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0x00, 0x12, 0x0b, 0x0a, 0x07, 0x44, 0x55, 0x52, 0x41, 0x42, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x44, 0x41, - 0x47, 0x10, 0x02, 0x2a, 0x6c, 0x0a, 0x18, 0x43, 0x6f, 0x6e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, + 0x47, 0x10, 0x02, 0x2a, 0x7f, 0x0a, 0x18, 0x43, 0x6f, 0x6e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x53, 0x74, 0x72, 0x61, 0x74, 0x65, 0x67, 0x79, 0x12, 0x16, 0x0a, 0x12, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x5f, 0x49, 0x4e, 0x5f, 0x50, 0x52, 0x4f, 0x47, 0x52, 0x45, 0x53, 0x53, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x44, 0x52, 0x4f, 0x50, 0x5f, 0x4e, 0x45, 0x57, 0x45, 0x53, 0x54, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x51, 0x55, 0x45, 0x55, 0x45, 0x5f, 0x4e, 0x45, 0x57, 0x45, 0x53, 0x54, 0x10, 0x02, 0x12, 0x15, 0x0a, 0x11, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x52, 0x4f, 0x55, 0x4e, 0x44, 0x5f, 0x52, 0x4f, 0x42, 0x49, 0x4e, 0x10, - 0x03, 0x2a, 0x85, 0x01, 0x0a, 0x15, 0x57, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x4c, 0x61, 0x62, 0x65, - 0x6c, 0x43, 0x6f, 0x6d, 0x70, 0x61, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x12, 0x09, 0x0a, 0x05, 0x45, - 0x51, 0x55, 0x41, 0x4c, 0x10, 0x00, 0x12, 0x0d, 0x0a, 0x09, 0x4e, 0x4f, 0x54, 0x5f, 0x45, 0x51, - 0x55, 0x41, 0x4c, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x47, 0x52, 0x45, 0x41, 0x54, 0x45, 0x52, - 0x5f, 0x54, 0x48, 0x41, 0x4e, 0x10, 0x02, 0x12, 0x19, 0x0a, 0x15, 0x47, 0x52, 0x45, 0x41, 0x54, - 0x45, 0x52, 0x5f, 0x54, 0x48, 0x41, 0x4e, 0x5f, 0x4f, 0x52, 0x5f, 0x45, 0x51, 0x55, 0x41, 0x4c, - 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x4c, 0x45, 0x53, 0x53, 0x5f, 0x54, 0x48, 0x41, 0x4e, 0x10, - 0x04, 0x12, 0x16, 0x0a, 0x12, 0x4c, 0x45, 0x53, 0x53, 0x5f, 0x54, 0x48, 0x41, 0x4e, 0x5f, 0x4f, - 0x52, 0x5f, 0x45, 0x51, 0x55, 0x41, 0x4c, 0x10, 0x05, 0x2a, 0x5d, 0x0a, 0x11, 0x52, 0x61, 0x74, - 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x0a, - 0x0a, 0x06, 0x53, 0x45, 0x43, 0x4f, 0x4e, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x4d, 0x49, - 0x4e, 0x55, 0x54, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x48, 0x4f, 0x55, 0x52, 0x10, 0x02, - 0x12, 0x07, 0x0a, 0x03, 0x44, 0x41, 0x59, 0x10, 0x03, 0x12, 0x08, 0x0a, 0x04, 0x57, 0x45, 0x45, - 0x4b, 0x10, 0x04, 0x12, 0x09, 0x0a, 0x05, 0x4d, 0x4f, 0x4e, 0x54, 0x48, 0x10, 0x05, 0x12, 0x08, - 0x0a, 0x04, 0x59, 0x45, 0x41, 0x52, 0x10, 0x06, 0x32, 0xdc, 0x02, 0x0a, 0x0f, 0x57, 0x6f, 0x72, - 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x34, 0x0a, 0x0b, - 0x50, 0x75, 0x74, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x12, 0x13, 0x2e, 0x50, 0x75, - 0x74, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x1a, 0x10, 0x2e, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x56, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x12, 0x3e, 0x0a, 0x10, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x57, 0x6f, - 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x12, 0x18, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, - 0x65, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x1a, 0x10, 0x2e, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x56, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x12, 0x44, 0x0a, 0x0f, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x57, 0x6f, 0x72, - 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x12, 0x17, 0x2e, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x57, - 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x18, - 0x2e, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x50, 0x0a, 0x13, 0x42, 0x75, 0x6c, 0x6b, - 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x12, - 0x1b, 0x2e, 0x42, 0x75, 0x6c, 0x6b, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x57, 0x6f, 0x72, - 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1c, 0x2e, 0x42, + 0x03, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x5f, 0x4e, 0x45, 0x57, 0x45, + 0x53, 0x54, 0x10, 0x04, 0x2a, 0x85, 0x01, 0x0a, 0x15, 0x57, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x4c, + 0x61, 0x62, 0x65, 0x6c, 0x43, 0x6f, 0x6d, 0x70, 0x61, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x12, 0x09, + 0x0a, 0x05, 0x45, 0x51, 0x55, 0x41, 0x4c, 0x10, 0x00, 0x12, 0x0d, 0x0a, 0x09, 0x4e, 0x4f, 0x54, + 0x5f, 0x45, 0x51, 0x55, 0x41, 0x4c, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x47, 0x52, 0x45, 0x41, + 0x54, 0x45, 0x52, 0x5f, 0x54, 0x48, 0x41, 0x4e, 0x10, 0x02, 0x12, 0x19, 0x0a, 0x15, 0x47, 0x52, + 0x45, 0x41, 0x54, 0x45, 0x52, 0x5f, 0x54, 0x48, 0x41, 0x4e, 0x5f, 0x4f, 0x52, 0x5f, 0x45, 0x51, + 0x55, 0x41, 0x4c, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x4c, 0x45, 0x53, 0x53, 0x5f, 0x54, 0x48, + 0x41, 0x4e, 0x10, 0x04, 0x12, 0x16, 0x0a, 0x12, 0x4c, 0x45, 0x53, 0x53, 0x5f, 0x54, 0x48, 0x41, + 0x4e, 0x5f, 0x4f, 0x52, 0x5f, 0x45, 0x51, 0x55, 0x41, 0x4c, 0x10, 0x05, 0x2a, 0x5d, 0x0a, 0x11, + 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x45, 0x43, 0x4f, 0x4e, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, + 0x06, 0x4d, 0x49, 0x4e, 0x55, 0x54, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x48, 0x4f, 0x55, + 0x52, 0x10, 0x02, 0x12, 0x07, 0x0a, 0x03, 0x44, 0x41, 0x59, 0x10, 0x03, 0x12, 0x08, 0x0a, 0x04, + 0x57, 0x45, 0x45, 0x4b, 0x10, 0x04, 0x12, 0x09, 0x0a, 0x05, 0x4d, 0x4f, 0x4e, 0x54, 0x48, 0x10, + 0x05, 0x12, 0x08, 0x0a, 0x04, 0x59, 0x45, 0x41, 0x52, 0x10, 0x06, 0x32, 0xdc, 0x02, 0x0a, 0x0f, + 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, + 0x34, 0x0a, 0x0b, 0x50, 0x75, 0x74, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x12, 0x13, + 0x2e, 0x50, 0x75, 0x74, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x1a, 0x10, 0x2e, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x56, 0x65, + 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x3e, 0x0a, 0x10, 0x53, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, + 0x65, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x12, 0x18, 0x2e, 0x53, 0x63, 0x68, 0x65, + 0x64, 0x75, 0x6c, 0x65, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x1a, 0x10, 0x2e, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x56, 0x65, + 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x44, 0x0a, 0x0f, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, + 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x12, 0x17, 0x2e, 0x54, 0x72, 0x69, 0x67, 0x67, + 0x65, 0x72, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x18, 0x2e, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x57, 0x6f, 0x72, 0x6b, 0x66, + 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x50, 0x0a, 0x13, 0x42, 0x75, 0x6c, 0x6b, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, - 0x6f, 0x77, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x3b, 0x0a, 0x0c, 0x50, 0x75, - 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x14, 0x2e, 0x50, 0x75, 0x74, - 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x1a, 0x15, 0x2e, 0x50, 0x75, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x42, 0x42, 0x5a, 0x40, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x68, 0x61, 0x74, 0x63, 0x68, 0x65, 0x74, 0x2d, 0x64, 0x65, - 0x76, 0x2f, 0x68, 0x61, 0x74, 0x63, 0x68, 0x65, 0x74, 0x2f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, - 0x61, 0x6c, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x2f, 0x61, 0x64, 0x6d, 0x69, - 0x6e, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x61, 0x63, 0x74, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x33, + 0x6f, 0x77, 0x12, 0x1b, 0x2e, 0x42, 0x75, 0x6c, 0x6b, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, + 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, + 0x1c, 0x2e, 0x42, 0x75, 0x6c, 0x6b, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x57, 0x6f, 0x72, + 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x3b, 0x0a, + 0x0c, 0x50, 0x75, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x14, 0x2e, + 0x50, 0x75, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x1a, 0x15, 0x2e, 0x50, 0x75, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, + 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x42, 0x42, 0x5a, 0x40, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x68, 0x61, 0x74, 0x63, 0x68, 0x65, 0x74, + 0x2d, 0x64, 0x65, 0x76, 0x2f, 0x68, 0x61, 0x74, 0x63, 0x68, 0x65, 0x74, 0x2f, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x2f, 0x61, + 0x64, 0x6d, 0x69, 0x6e, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x61, 0x63, 0x74, 0x73, 0x62, 0x06, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/internal/services/controllers/workflows/controller.go b/internal/services/controllers/workflows/controller.go index 44f4e6d0e..f460f3fa4 100644 --- a/internal/services/controllers/workflows/controller.go +++ b/internal/services/controllers/workflows/controller.go @@ -23,6 +23,7 @@ import ( hatcheterrors "github.com/hatchet-dev/hatchet/pkg/errors" "github.com/hatchet-dev/hatchet/pkg/logger" "github.com/hatchet-dev/hatchet/pkg/repository" + "github.com/hatchet-dev/hatchet/pkg/repository/cache" "github.com/hatchet-dev/hatchet/pkg/repository/prisma/db" "github.com/hatchet-dev/hatchet/pkg/repository/prisma/dbsqlc" "github.com/hatchet-dev/hatchet/pkg/repository/prisma/sqlchelpers" @@ -45,7 +46,8 @@ type WorkflowsControllerImpl struct { processWorkflowEventsOps *queueutils.OperationPool unpausedWorkflowRunsOps *queueutils.OperationPool bumpQueueOps *queueutils.OperationPool - queueMutex sync.Map + + workflowVersionCache *cache.Cache } type WorkflowsControllerOpt func(*WorkflowsControllerOpts) @@ -170,6 +172,10 @@ func New(fs ...WorkflowsControllerOpt) (*WorkflowsControllerImpl, error) { func (wc *WorkflowsControllerImpl) Start() (func() error, error) { wc.l.Debug().Msg("starting workflows controller") + workflowVersionCache := cache.New(60 * time.Second) + + wc.workflowVersionCache = workflowVersionCache + ctx, cancel := context.WithCancel(context.Background()) wg := sync.WaitGroup{} @@ -297,6 +303,8 @@ func (wc *WorkflowsControllerImpl) Start() (func() error, error) { return fmt.Errorf("could not shutdown scheduler: %w", err) } + workflowVersionCache.Stop() + return nil } @@ -536,11 +544,7 @@ func (wc *WorkflowsControllerImpl) runPollActiveQueuesTenant(ctx context.Context errGroup.Go(func() error { workflowVersionId := sqlchelpers.UUIDToStr(toQueue.WorkflowVersionId) tenantId := sqlchelpers.UUIDToStr(toQueue.TenantId) - var key *string - if toQueue.ConcurrencyGroupId.Valid { - key = &toQueue.ConcurrencyGroupId.String - } - err := wc.bumpQueue(ctx, tenantId, workflowVersionId, key) + err := wc.bumpQueue(ctx, tenantId, workflowVersionId) return err }) } @@ -548,8 +552,8 @@ func (wc *WorkflowsControllerImpl) runPollActiveQueuesTenant(ctx context.Context return false, errGroup.Wait() } -func (wc *WorkflowsControllerImpl) bumpQueue(ctx context.Context, tenantId string, workflowVersionId string, groupKey *string) error { - workflowVersion, err := wc.repo.Workflow().GetWorkflowVersionById(ctx, tenantId, workflowVersionId) +func (wc *WorkflowsControllerImpl) bumpQueue(ctx context.Context, tenantId string, workflowVersionId string) error { + workflowVersion, err := wc.getWorkflowVersion(ctx, tenantId, workflowVersionId) if err != nil { return fmt.Errorf("could not get workflow version: %w", err) @@ -558,12 +562,11 @@ func (wc *WorkflowsControllerImpl) bumpQueue(ctx context.Context, tenantId strin if workflowVersion.ConcurrencyLimitStrategy.Valid { switch workflowVersion.ConcurrencyLimitStrategy.ConcurrencyLimitStrategy { case dbsqlc.ConcurrencyLimitStrategyCANCELINPROGRESS: - if groupKey == nil { - return fmt.Errorf("group key is required for cancel in progress strategy") - } - err = wc.queueByCancelInProgress(ctx, tenantId, *groupKey, workflowVersion) + err = wc.queueByCancelInProgress(ctx, tenantId, workflowVersion) case dbsqlc.ConcurrencyLimitStrategyGROUPROUNDROBIN: err = wc.queueByGroupRoundRobin(ctx, tenantId, workflowVersion) + case dbsqlc.ConcurrencyLimitStrategyCANCELNEWEST: + err = wc.queueByCancelNewest(ctx, tenantId, workflowVersion) default: return fmt.Errorf("unimplemented concurrency limit strategy: %s", workflowVersion.ConcurrencyLimitStrategy.ConcurrencyLimitStrategy) } @@ -572,6 +575,24 @@ func (wc *WorkflowsControllerImpl) bumpQueue(ctx context.Context, tenantId strin return err } +func (wc *WorkflowsControllerImpl) getWorkflowVersion(ctx context.Context, tenantId, workflowVersionId string) (*dbsqlc.GetWorkflowVersionForEngineRow, error) { + cachedWorkflowVersion, ok := wc.workflowVersionCache.Get(workflowVersionId) + + if ok { + return cachedWorkflowVersion.(*dbsqlc.GetWorkflowVersionForEngineRow), nil + } + + workflowVersion, err := wc.repo.Workflow().GetWorkflowVersionById(ctx, tenantId, workflowVersionId) + + if err != nil { + return nil, fmt.Errorf("could not get workflow version: %w", err) + } + + wc.workflowVersionCache.Set(workflowVersionId, workflowVersion) + + return workflowVersion, nil +} + func (wc *WorkflowsControllerImpl) handleGroupKeyRunFailed(ctx context.Context, task *msgqueue.Message) error { ctx, span := telemetry.NewSpan(ctx, "handle-group-key-run-failed") // nolint: ineffassign defer span.End() diff --git a/internal/services/controllers/workflows/queue.go b/internal/services/controllers/workflows/queue.go index 90d4bdeca..ee453f082 100644 --- a/internal/services/controllers/workflows/queue.go +++ b/internal/services/controllers/workflows/queue.go @@ -5,7 +5,6 @@ import ( "encoding/json" "errors" "fmt" - "sync" "time" "golang.org/x/sync/errgroup" @@ -598,84 +597,91 @@ func (ec *WorkflowsControllerImpl) runGetGroupKeyRunReassignTenant(ctx context.C return g.Wait() } -func (wc *WorkflowsControllerImpl) getLock(key string) *sync.Mutex { - actual, _ := wc.queueMutex.LoadOrStore(key, &sync.Mutex{}) - return actual.(*sync.Mutex) -} - -func (wc *WorkflowsControllerImpl) queueByCancelInProgress(ctx context.Context, tenantId, groupKey string, workflowVersion *dbsqlc.GetWorkflowVersionForEngineRow) error { +func (wc *WorkflowsControllerImpl) queueByCancelInProgress(ctx context.Context, tenantId string, workflowVersion *dbsqlc.GetWorkflowVersionForEngineRow) error { ctx, span := telemetry.NewSpan(ctx, "queue-by-cancel-in-progress") defer span.End() - wc.l.Info().Msgf("handling queue with strategy CANCEL_IN_PROGRESS for %s", groupKey) - - mutex := wc.getLock(fmt.Sprintf("%s:%s", tenantId, groupKey)) - - if ok := mutex.TryLock(); !ok { - return nil - } - - defer mutex.Unlock() - - running := db.WorkflowRunStatusRunning - queued := db.WorkflowRunStatusQueued workflowVersionId := sqlchelpers.UUIDToStr(workflowVersion.WorkflowVersion.ID) maxRuns := int(workflowVersion.ConcurrencyMaxRuns.Int32) - runningWorkflowRuns, err := wc.repo.WorkflowRun().ListWorkflowRuns(ctx, tenantId, &repository.ListWorkflowRunsOpts{ - WorkflowVersionId: &workflowVersionId, - GroupKey: &groupKey, - Statuses: &[]db.WorkflowRunStatus{running}, - OrderBy: repository.StringPtr("createdAt"), - OrderDirection: repository.StringPtr("ASC"), - }) + toCancel, toStart, err := wc.repo.WorkflowRun().PopWorkflowRunsCancelInProgress(ctx, tenantId, workflowVersionId, maxRuns) + if err != nil { - return fmt.Errorf("could not list running workflow runs: %w", err) + return fmt.Errorf("could not pop workflow runs: %w", err) } - queuedWorkflowRuns, err := wc.repo.WorkflowRun().ListWorkflowRuns(ctx, tenantId, &repository.ListWorkflowRunsOpts{ - WorkflowVersionId: &workflowVersionId, - GroupKey: &groupKey, - Statuses: &[]db.WorkflowRunStatus{queued}, - OrderBy: repository.StringPtr("createdAt"), - OrderDirection: repository.StringPtr("ASC"), - }) - if err != nil { - return fmt.Errorf("could not list queued workflow runs: %w", err) - } - - runningCount := len(runningWorkflowRuns.Rows) - queuedCount := len(queuedWorkflowRuns.Rows) - - // Calculate how many runs we need to cancel - toCancel := max(0, runningCount+queuedCount-maxRuns) - // Cancel the oldest running workflows - for i := 0; i < toCancel && i < runningCount; i++ { - row := runningWorkflowRuns.Rows[i] - workflowRunId := sqlchelpers.UUIDToStr(row.WorkflowRun.ID) + for i := range toCancel { + row := toCancel[i] + workflowRunId := sqlchelpers.UUIDToStr(row.ID) err = wc.cancelWorkflowRun(ctx, tenantId, workflowRunId) if err != nil { return fmt.Errorf("could not cancel workflow run: %w", err) } - - err := wc.cancelWorkflowRunJobs(ctx, tenantId, workflowRunId, "CANCEL_IN_PROGRESS") - - if err != nil { - return fmt.Errorf("queueByCancelInProgress: could not cancel workflow run jobs: %w", err) - } - } - // Queue new runs - toQueue := min(maxRuns-(runningCount-toCancel), queuedCount) + for i := range toStart { + row := toStart[i] + workflowRunId := sqlchelpers.UUIDToStr(row.ID) + queuedStepRuns, err := wc.repo.WorkflowRun().QueueWorkflowRunJobs(ctx, tenantId, workflowRunId) - for i := 0; i < toQueue; i++ { - row := queuedWorkflowRuns.Rows[i] - workflowRunId := sqlchelpers.UUIDToStr(row.WorkflowRun.ID) + if err != nil { + return fmt.Errorf("could not queue workflow run jobs: %w", err) + } + g := new(errgroup.Group) + for _, stepRun := range queuedStepRuns { + stepRunCp := stepRun + + g.Go(func() error { + return wc.mq.AddMessage( + ctx, + msgqueue.JOB_PROCESSING_QUEUE, + tasktypes.StepRunQueuedToTask(stepRunCp), + ) + }) + } + + err = g.Wait() + + if err != nil { + return fmt.Errorf("could not start workflow run: %w", err) + } + } + + return nil +} + +func (wc *WorkflowsControllerImpl) queueByCancelNewest(ctx context.Context, tenantId string, workflowVersion *dbsqlc.GetWorkflowVersionForEngineRow) error { + ctx, span := telemetry.NewSpan(ctx, "queue-by-cancel-newest") + defer span.End() + + workflowVersionId := sqlchelpers.UUIDToStr(workflowVersion.WorkflowVersion.ID) + maxRuns := int(workflowVersion.ConcurrencyMaxRuns.Int32) + + toCancel, toStart, err := wc.repo.WorkflowRun().PopWorkflowRunsCancelNewest(ctx, tenantId, workflowVersionId, maxRuns) + + if err != nil { + return fmt.Errorf("could not pop workflow runs: %w", err) + } + + // Cancel the oldest running workflows + for i := range toCancel { + row := toCancel[i] + workflowRunId := sqlchelpers.UUIDToStr(row.ID) + + err = wc.cancelWorkflowRun(ctx, tenantId, workflowRunId) + + if err != nil { + return fmt.Errorf("could not cancel workflow run: %w", err) + } + } + + for i := range toStart { + row := toStart[i] + workflowRunId := sqlchelpers.UUIDToStr(row.ID) queuedStepRuns, err := wc.repo.WorkflowRun().QueueWorkflowRunJobs(ctx, tenantId, workflowRunId) if err != nil { @@ -710,12 +716,11 @@ func (wc *WorkflowsControllerImpl) queueByGroupRoundRobin(ctx context.Context, t defer span.End() workflowVersionId := sqlchelpers.UUIDToStr(workflowVersion.WorkflowVersion.ID) - workflowId := sqlchelpers.UUIDToStr(workflowVersion.WorkflowVersion.WorkflowId) maxRuns := int(workflowVersion.ConcurrencyMaxRuns.Int32) wc.l.Info().Msgf("handling queue with strategy GROUP_ROUND_ROBIN for workflow version %s", workflowVersionId) - _, startableStepRuns, err := wc.repo.WorkflowRun().PopWorkflowRunsRoundRobin(ctx, tenantId, workflowId, maxRuns) + _, startableStepRuns, err := wc.repo.WorkflowRun().PopWorkflowRunsRoundRobin(ctx, tenantId, workflowVersionId, maxRuns) if err != nil { return fmt.Errorf("could not pop workflow runs: %w", err) @@ -746,9 +751,7 @@ func (wc *WorkflowsControllerImpl) queueByGroupRoundRobin(ctx context.Context, t func (wc *WorkflowsControllerImpl) cancelWorkflowRun(ctx context.Context, tenantId, workflowRunId string) error { // cancel all running step runs - runningStatus := dbsqlc.StepRunStatusRUNNING stepRuns, err := wc.repo.StepRun().ListStepRuns(ctx, tenantId, &repository.ListStepRunsOpts{ - Status: &runningStatus, WorkflowRunIds: []string{ workflowRunId, }, diff --git a/pkg/client/admin.go b/pkg/client/admin.go index e319fd24d..34a0ccce4 100644 --- a/pkg/client/admin.go +++ b/pkg/client/admin.go @@ -444,6 +444,8 @@ func (a *adminClientImpl) getPutRequest(workflow *types.Workflow) (*admincontrac limitStrat = admincontracts.ConcurrencyLimitStrategy_CANCEL_IN_PROGRESS case types.GroupRoundRobin: limitStrat = admincontracts.ConcurrencyLimitStrategy_GROUP_ROUND_ROBIN + case types.CancelNewest: + limitStrat = admincontracts.ConcurrencyLimitStrategy_CANCEL_NEWEST default: limitStrat = admincontracts.ConcurrencyLimitStrategy_CANCEL_IN_PROGRESS } diff --git a/pkg/client/types/file.go b/pkg/client/types/file.go index 5c6806149..2ddf178ea 100644 --- a/pkg/client/types/file.go +++ b/pkg/client/types/file.go @@ -44,6 +44,7 @@ type WorkflowConcurrencyLimitStrategy string const ( CancelInProgress WorkflowConcurrencyLimitStrategy = "CANCEL_IN_PROGRESS" + CancelNewest WorkflowConcurrencyLimitStrategy = "CANCEL_NEWEST" GroupRoundRobin WorkflowConcurrencyLimitStrategy = "GROUP_ROUND_ROBIN" ) diff --git a/pkg/repository/prisma/dbsqlc/models.go b/pkg/repository/prisma/dbsqlc/models.go index 727c4bed4..7c7c7cee4 100644 --- a/pkg/repository/prisma/dbsqlc/models.go +++ b/pkg/repository/prisma/dbsqlc/models.go @@ -18,6 +18,7 @@ const ( ConcurrencyLimitStrategyDROPNEWEST ConcurrencyLimitStrategy = "DROP_NEWEST" ConcurrencyLimitStrategyQUEUENEWEST ConcurrencyLimitStrategy = "QUEUE_NEWEST" ConcurrencyLimitStrategyGROUPROUNDROBIN ConcurrencyLimitStrategy = "GROUP_ROUND_ROBIN" + ConcurrencyLimitStrategyCANCELNEWEST ConcurrencyLimitStrategy = "CANCEL_NEWEST" ) func (e *ConcurrencyLimitStrategy) Scan(src interface{}) error { @@ -987,11 +988,13 @@ func (ns NullWorkflowKind) Value() (driver.Value, error) { type WorkflowRunStatus string const ( - WorkflowRunStatusPENDING WorkflowRunStatus = "PENDING" - WorkflowRunStatusRUNNING WorkflowRunStatus = "RUNNING" - WorkflowRunStatusSUCCEEDED WorkflowRunStatus = "SUCCEEDED" - WorkflowRunStatusFAILED WorkflowRunStatus = "FAILED" - WorkflowRunStatusQUEUED WorkflowRunStatus = "QUEUED" + WorkflowRunStatusPENDING WorkflowRunStatus = "PENDING" + WorkflowRunStatusRUNNING WorkflowRunStatus = "RUNNING" + WorkflowRunStatusSUCCEEDED WorkflowRunStatus = "SUCCEEDED" + WorkflowRunStatusFAILED WorkflowRunStatus = "FAILED" + WorkflowRunStatusQUEUED WorkflowRunStatus = "QUEUED" + WorkflowRunStatusCANCELLING WorkflowRunStatus = "CANCELLING" + WorkflowRunStatusCANCELLED WorkflowRunStatus = "CANCELLED" ) func (e *WorkflowRunStatus) Scan(src interface{}) error { diff --git a/pkg/repository/prisma/dbsqlc/workflow_runs.sql b/pkg/repository/prisma/dbsqlc/workflow_runs.sql index 7fdacd4dd..fbfe201da 100644 --- a/pkg/repository/prisma/dbsqlc/workflow_runs.sql +++ b/pkg/repository/prisma/dbsqlc/workflow_runs.sql @@ -96,7 +96,7 @@ FROM -- name: WorkflowRunsMetricsCount :one SELECT COUNT(CASE WHEN runs."status" = 'PENDING' THEN 1 END) AS "PENDING", - COUNT(CASE WHEN runs."status" = 'RUNNING' THEN 1 END) AS "RUNNING", + COUNT(CASE WHEN runs."status" = 'RUNNING' OR runs."status" = 'CANCELLING' THEN 1 END) AS "RUNNING", COUNT(CASE WHEN runs."status" = 'SUCCEEDED' THEN 1 END) AS "SUCCEEDED", COUNT(CASE WHEN runs."status" = 'FAILED' THEN 1 END) AS "FAILED", COUNT(CASE WHEN runs."status" = 'QUEUED' THEN 1 END) AS "QUEUED" @@ -246,6 +246,50 @@ OFFSET LIMIT COALESCE(sqlc.narg('limit'), 50); +-- name: LockWorkflowRunsForQueueing :many +-- Locks any workflow runs which are in a RUNNING or QUEUED state, and have a matching concurrencyGroupId in a QUEUED state +WITH queued_wrs AS ( + SELECT + DISTINCT ON (wr."concurrencyGroupId") + wr."concurrencyGroupId" + FROM + "WorkflowRun" wr + LEFT JOIN + "WorkflowVersion" workflowVersion ON wr."workflowVersionId" = workflowVersion."id" + WHERE + wr."tenantId" = @tenantId::uuid AND + wr."deletedAt" IS NULL AND + workflowVersion."deletedAt" IS NULL AND + wr."status" = 'QUEUED' AND + workflowVersion."id" = @workflowVersionId::uuid +) +SELECT + wr.* +FROM + "WorkflowRun" wr +LEFT JOIN + "WorkflowVersion" workflowVersion ON wr."workflowVersionId" = workflowVersion."id" +WHERE + wr."tenantId" = @tenantId::uuid AND + wr."deletedAt" IS NULL AND + workflowVersion."deletedAt" IS NULL AND + (wr."status" = 'QUEUED' OR wr."status" = 'RUNNING') AND + workflowVersion."id" = @workflowVersionId::uuid AND + wr."concurrencyGroupId" IN (SELECT "concurrencyGroupId" FROM queued_wrs) +ORDER BY + wr."createdAt" ASC, wr."insertOrder" ASC +FOR UPDATE; + +-- name: MarkWorkflowRunsCancelling :exec +UPDATE + "WorkflowRun" +SET + "status" = 'CANCELLING' +WHERE + "tenantId" = @tenantId::uuid AND + "id" = ANY(@ids::uuid[]) AND + ("status" = 'PENDING' OR "status" = 'QUEUED' OR "status" = 'RUNNING'); + -- name: PopWorkflowRunsRoundRobin :many WITH workflow_runs AS ( SELECT @@ -262,7 +306,7 @@ WITH workflow_runs AS ( r2."deletedAt" IS NULL AND workflowVersion."deletedAt" IS NULL AND (r2."status" = 'QUEUED' OR r2."status" = 'RUNNING') AND - workflowVersion."workflowId" = @workflowId::uuid + workflowVersion."id" = @workflowVersionId::uuid ORDER BY rn, seqnum ASC ), min_rn AS ( @@ -1196,7 +1240,6 @@ WHERE RETURNING (SELECT has_more FROM has_more) as has_more; - -- name: ListActiveQueuedWorkflowVersions :many WITH QueuedRuns AS ( SELECT DISTINCT ON (wr."workflowVersionId") diff --git a/pkg/repository/prisma/dbsqlc/workflow_runs.sql.go b/pkg/repository/prisma/dbsqlc/workflow_runs.sql.go index eb43fecdd..299d3dbc1 100644 --- a/pkg/repository/prisma/dbsqlc/workflow_runs.sql.go +++ b/pkg/repository/prisma/dbsqlc/workflow_runs.sql.go @@ -2691,6 +2691,108 @@ func (q *Queries) ListWorkflowRuns(ctx context.Context, db DBTX, arg ListWorkflo return items, nil } +const lockWorkflowRunsForQueueing = `-- name: LockWorkflowRunsForQueueing :many +WITH queued_wrs AS ( + SELECT + DISTINCT ON (wr."concurrencyGroupId") + wr."concurrencyGroupId" + FROM + "WorkflowRun" wr + LEFT JOIN + "WorkflowVersion" workflowVersion ON wr."workflowVersionId" = workflowVersion."id" + WHERE + wr."tenantId" = $1::uuid AND + wr."deletedAt" IS NULL AND + workflowVersion."deletedAt" IS NULL AND + wr."status" = 'QUEUED' AND + workflowVersion."id" = $2::uuid +) +SELECT + wr."createdAt", wr."updatedAt", wr."deletedAt", wr."tenantId", wr."workflowVersionId", wr.status, wr.error, wr."startedAt", wr."finishedAt", wr."concurrencyGroupId", wr."displayName", wr.id, wr."childIndex", wr."childKey", wr."parentId", wr."parentStepRunId", wr."additionalMetadata", wr.duration, wr.priority, wr."insertOrder" +FROM + "WorkflowRun" wr +LEFT JOIN + "WorkflowVersion" workflowVersion ON wr."workflowVersionId" = workflowVersion."id" +WHERE + wr."tenantId" = $1::uuid AND + wr."deletedAt" IS NULL AND + workflowVersion."deletedAt" IS NULL AND + (wr."status" = 'QUEUED' OR wr."status" = 'RUNNING') AND + workflowVersion."id" = $2::uuid AND + wr."concurrencyGroupId" IN (SELECT "concurrencyGroupId" FROM queued_wrs) +ORDER BY + wr."createdAt" ASC, wr."insertOrder" ASC +FOR UPDATE +` + +type LockWorkflowRunsForQueueingParams struct { + Tenantid pgtype.UUID `json:"tenantid"` + Workflowversionid pgtype.UUID `json:"workflowversionid"` +} + +// Locks any workflow runs which are in a RUNNING or QUEUED state, and have a matching concurrencyGroupId in a QUEUED state +func (q *Queries) LockWorkflowRunsForQueueing(ctx context.Context, db DBTX, arg LockWorkflowRunsForQueueingParams) ([]*WorkflowRun, error) { + rows, err := db.Query(ctx, lockWorkflowRunsForQueueing, arg.Tenantid, arg.Workflowversionid) + if err != nil { + return nil, err + } + defer rows.Close() + var items []*WorkflowRun + for rows.Next() { + var i WorkflowRun + if err := rows.Scan( + &i.CreatedAt, + &i.UpdatedAt, + &i.DeletedAt, + &i.TenantId, + &i.WorkflowVersionId, + &i.Status, + &i.Error, + &i.StartedAt, + &i.FinishedAt, + &i.ConcurrencyGroupId, + &i.DisplayName, + &i.ID, + &i.ChildIndex, + &i.ChildKey, + &i.ParentId, + &i.ParentStepRunId, + &i.AdditionalMetadata, + &i.Duration, + &i.Priority, + &i.InsertOrder, + ); err != nil { + return nil, err + } + items = append(items, &i) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const markWorkflowRunsCancelling = `-- name: MarkWorkflowRunsCancelling :exec +UPDATE + "WorkflowRun" +SET + "status" = 'CANCELLING' +WHERE + "tenantId" = $1::uuid AND + "id" = ANY($2::uuid[]) AND + ("status" = 'PENDING' OR "status" = 'QUEUED' OR "status" = 'RUNNING') +` + +type MarkWorkflowRunsCancellingParams struct { + Tenantid pgtype.UUID `json:"tenantid"` + Ids []pgtype.UUID `json:"ids"` +} + +func (q *Queries) MarkWorkflowRunsCancelling(ctx context.Context, db DBTX, arg MarkWorkflowRunsCancellingParams) error { + _, err := db.Exec(ctx, markWorkflowRunsCancelling, arg.Tenantid, arg.Ids) + return err +} + const popWorkflowRunsRoundRobin = `-- name: PopWorkflowRunsRoundRobin :many WITH workflow_runs AS ( SELECT @@ -2707,7 +2809,7 @@ WITH workflow_runs AS ( r2."deletedAt" IS NULL AND workflowVersion."deletedAt" IS NULL AND (r2."status" = 'QUEUED' OR r2."status" = 'RUNNING') AND - workflowVersion."workflowId" = $2::uuid + workflowVersion."id" = $2::uuid ORDER BY rn, seqnum ASC ), min_rn AS ( @@ -2756,13 +2858,13 @@ RETURNING ` type PopWorkflowRunsRoundRobinParams struct { - Tenantid pgtype.UUID `json:"tenantid"` - Workflowid pgtype.UUID `json:"workflowid"` - Maxruns int32 `json:"maxruns"` + Tenantid pgtype.UUID `json:"tenantid"` + Workflowversionid pgtype.UUID `json:"workflowversionid"` + Maxruns int32 `json:"maxruns"` } func (q *Queries) PopWorkflowRunsRoundRobin(ctx context.Context, db DBTX, arg PopWorkflowRunsRoundRobinParams) ([]*WorkflowRun, error) { - rows, err := db.Query(ctx, popWorkflowRunsRoundRobin, arg.Tenantid, arg.Workflowid, arg.Maxruns) + rows, err := db.Query(ctx, popWorkflowRunsRoundRobin, arg.Tenantid, arg.Workflowversionid, arg.Maxruns) if err != nil { return nil, err } @@ -3329,7 +3431,7 @@ func (q *Queries) UpdateWorkflowRunStickyState(ctx context.Context, db DBTX, arg const workflowRunsMetricsCount = `-- name: WorkflowRunsMetricsCount :one SELECT COUNT(CASE WHEN runs."status" = 'PENDING' THEN 1 END) AS "PENDING", - COUNT(CASE WHEN runs."status" = 'RUNNING' THEN 1 END) AS "RUNNING", + COUNT(CASE WHEN runs."status" = 'RUNNING' OR runs."status" = 'CANCELLING' THEN 1 END) AS "RUNNING", COUNT(CASE WHEN runs."status" = 'SUCCEEDED' THEN 1 END) AS "SUCCEEDED", COUNT(CASE WHEN runs."status" = 'FAILED' THEN 1 END) AS "FAILED", COUNT(CASE WHEN runs."status" = 'QUEUED' THEN 1 END) AS "QUEUED" diff --git a/pkg/repository/prisma/workflow_run.go b/pkg/repository/prisma/workflow_run.go index bd62bbef7..9aa2fab83 100644 --- a/pkg/repository/prisma/workflow_run.go +++ b/pkg/repository/prisma/workflow_run.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "math" + "sort" "strconv" "strings" "time" @@ -838,7 +839,222 @@ func (w *workflowRunEngineRepository) GetScheduledChildWorkflowRun(ctx context.C return w.queries.GetScheduledChildWorkflowRun(ctx, w.pool, childParams) } -func (w *workflowRunEngineRepository) PopWorkflowRunsRoundRobin(ctx context.Context, tenantId string, workflowId string, maxRuns int) ([]*dbsqlc.WorkflowRun, []*dbsqlc.GetStepRunForEngineRow, error) { + +func (w *workflowRunEngineRepository) PopWorkflowRunsCancelInProgress(ctx context.Context, tenantId, workflowVersionId string, maxRuns int) ([]*dbsqlc.WorkflowRun, []*dbsqlc.WorkflowRun, error) { + ctx, span := telemetry.NewSpan(ctx, "queue-by-cancel-in-progress") + defer span.End() + + tx, commit, rollback, err := sqlchelpers.PrepareTx(ctx, w.pool, w.l, 15000) + + if err != nil { + return nil, nil, err + } + + defer rollback() + + // place a FOR UPDATE lock on queued and running workflow runs to prevent concurrent updates + allToProcess, err := w.queries.LockWorkflowRunsForQueueing(ctx, tx, dbsqlc.LockWorkflowRunsForQueueingParams{ + Tenantid: sqlchelpers.UUIDFromStr(tenantId), + Workflowversionid: sqlchelpers.UUIDFromStr(workflowVersionId), + }) + + if err != nil { + return nil, nil, fmt.Errorf("could not lock workflow runs for queueing: %w", err) + } + + // group each workflow run by concurrency key + keyToWorkflowRuns := make(map[string][]*dbsqlc.WorkflowRun) + + for _, row := range allToProcess { + key := row.ConcurrencyGroupId.String + + if _, ok := keyToWorkflowRuns[key]; !ok { + keyToWorkflowRuns[key] = make([]*dbsqlc.WorkflowRun, 0) + } + + keyToWorkflowRuns[key] = append(keyToWorkflowRuns[key], row) + } + + allToCancel := make([]*dbsqlc.WorkflowRun, 0) + allToStart := make([]*dbsqlc.WorkflowRun, 0) + cancelIds := make([]pgtype.UUID, 0) + + for _, toProcess := range keyToWorkflowRuns { + runningWorkflowRuns := make([]*dbsqlc.WorkflowRun, 0, len(toProcess)) + queuedWorkflowRuns := make([]*dbsqlc.WorkflowRun, 0, len(toProcess)) + + for _, row := range toProcess { + if row.Status == dbsqlc.WorkflowRunStatusRUNNING { + runningWorkflowRuns = append(runningWorkflowRuns, row) + } else if row.Status == dbsqlc.WorkflowRunStatusQUEUED { + queuedWorkflowRuns = append(queuedWorkflowRuns, row) + } + } + + // iterate over the running workflow runs and cancel them + toCancel := max(0, len(runningWorkflowRuns)+len(queuedWorkflowRuns)-maxRuns) + + workflowRunsToCancel := make([]*dbsqlc.WorkflowRun, 0, toCancel) + workflowRunsToStart := make([]*dbsqlc.WorkflowRun, 0, len(queuedWorkflowRuns)) + + for i := 0; i < toCancel && i < len(runningWorkflowRuns); i++ { + row := runningWorkflowRuns[i] + + workflowRunsToCancel = append(workflowRunsToCancel, row) + } + + toCancel -= len(workflowRunsToCancel) + + // additionally, cancel any queued workflow runs that aren't running but should be cancelled + for i := 0; i < toCancel && i < len(queuedWorkflowRuns); i++ { + row := queuedWorkflowRuns[i] + + workflowRunsToCancel = append(workflowRunsToCancel, row) + } + + // start the new runs. anything leftover in the queuedWorkflowRuns slice should be started + for i := toCancel; i < len(queuedWorkflowRuns); i++ { + row := queuedWorkflowRuns[i] + + workflowRunsToStart = append(workflowRunsToStart, row) + } + + for _, row := range workflowRunsToCancel { + cancelIds = append(cancelIds, row.ID) + } + + allToCancel = append(allToCancel, workflowRunsToCancel...) + allToStart = append(allToStart, workflowRunsToStart...) + } + + // cancel the workflow runs + err = w.queries.MarkWorkflowRunsCancelling(ctx, tx, dbsqlc.MarkWorkflowRunsCancellingParams{ + Tenantid: sqlchelpers.UUIDFromStr(tenantId), + Ids: cancelIds, + }) + + if err != nil { + return nil, nil, fmt.Errorf("could not mark workflow runs as cancelling: %w", err) + } + + // commit the transaction + err = commit(ctx) + + if err != nil { + return nil, nil, fmt.Errorf("could not commit transaction: %w", err) + } + + // return the workflow runs to cancel and the ones to start + return allToCancel, allToStart, nil +} + +func (w *workflowRunEngineRepository) PopWorkflowRunsCancelNewest(ctx context.Context, tenantId, workflowVersionId string, maxRuns int) ([]*dbsqlc.WorkflowRun, []*dbsqlc.WorkflowRun, error) { + ctx, span := telemetry.NewSpan(ctx, "queue-by-cancel-newest") + defer span.End() + + tx, commit, rollback, err := sqlchelpers.PrepareTx(ctx, w.pool, w.l, 15000) + + if err != nil { + return nil, nil, err + } + + defer rollback() + + // place a FOR UPDATE lock on queued and running workflow runs to prevent concurrent updates + allToProcess, err := w.queries.LockWorkflowRunsForQueueing(ctx, tx, dbsqlc.LockWorkflowRunsForQueueingParams{ + Tenantid: sqlchelpers.UUIDFromStr(tenantId), + Workflowversionid: sqlchelpers.UUIDFromStr(workflowVersionId), + }) + + if err != nil { + return nil, nil, fmt.Errorf("could not lock workflow runs for queueing: %w", err) + } + + // group each workflow run by concurrency key + keyToWorkflowRuns := make(map[string][]*dbsqlc.WorkflowRun) + + for _, row := range allToProcess { + key := row.ConcurrencyGroupId.String + + if _, ok := keyToWorkflowRuns[key]; !ok { + keyToWorkflowRuns[key] = make([]*dbsqlc.WorkflowRun, 0) + } + + keyToWorkflowRuns[key] = append(keyToWorkflowRuns[key], row) + } + + // reverse the order of the workflow runs + for _, toProcess := range keyToWorkflowRuns { + sort.SliceStable(toProcess, func(i, j int) bool { + return toProcess[i].CreatedAt.Time.After(toProcess[j].CreatedAt.Time) + }) + } + + allToCancel := make([]*dbsqlc.WorkflowRun, 0) + allToStart := make([]*dbsqlc.WorkflowRun, 0) + cancelIds := make([]pgtype.UUID, 0) + + for _, toProcess := range keyToWorkflowRuns { + runningWorkflowRuns := make([]*dbsqlc.WorkflowRun, 0, len(toProcess)) + queuedWorkflowRuns := make([]*dbsqlc.WorkflowRun, 0, len(toProcess)) + + for _, row := range toProcess { + if row.Status == dbsqlc.WorkflowRunStatusRUNNING { + runningWorkflowRuns = append(runningWorkflowRuns, row) + } else if row.Status == dbsqlc.WorkflowRunStatusQUEUED { + queuedWorkflowRuns = append(queuedWorkflowRuns, row) + } + } + + // iterate over the queued workflow runs and cancel them + toCancel := max(0, len(runningWorkflowRuns)+len(queuedWorkflowRuns)-maxRuns) + + workflowRunsToCancel := make([]*dbsqlc.WorkflowRun, 0, toCancel) + workflowRunsToStart := make([]*dbsqlc.WorkflowRun, 0, len(queuedWorkflowRuns)) + + for i := 0; i < toCancel && i < len(queuedWorkflowRuns); i++ { + row := queuedWorkflowRuns[i] + + workflowRunsToCancel = append(workflowRunsToCancel, row) + } + + // start the new runs. anything leftover in the queuedWorkflowRuns slice should be started + for i := toCancel; i < len(queuedWorkflowRuns); i++ { + row := queuedWorkflowRuns[i] + + workflowRunsToStart = append(workflowRunsToStart, row) + } + + for _, row := range workflowRunsToCancel { + cancelIds = append(cancelIds, row.ID) + } + + allToCancel = append(allToCancel, workflowRunsToCancel...) + allToStart = append(allToStart, workflowRunsToStart...) + } + + // cancel the workflow runs + err = w.queries.MarkWorkflowRunsCancelling(ctx, tx, dbsqlc.MarkWorkflowRunsCancellingParams{ + Tenantid: sqlchelpers.UUIDFromStr(tenantId), + Ids: cancelIds, + }) + + if err != nil { + return nil, nil, fmt.Errorf("could not mark workflow runs as cancelling: %w", err) + } + + // commit the transaction + err = commit(ctx) + + if err != nil { + return nil, nil, fmt.Errorf("could not commit transaction: %w", err) + } + + // return the workflow runs to cancel and the ones to start + return allToCancel, allToStart, nil +} + +func (w *workflowRunEngineRepository) PopWorkflowRunsRoundRobin(ctx context.Context, tenantId string, workflowVersionId string, maxRuns int) ([]*dbsqlc.WorkflowRun, []*dbsqlc.GetStepRunForEngineRow, error) { tx, commit, rollback, err := sqlchelpers.PrepareTx(ctx, w.pool, w.l, 15000) @@ -848,9 +1064,9 @@ func (w *workflowRunEngineRepository) PopWorkflowRunsRoundRobin(ctx context.Cont defer rollback() poppedWorkflowRuns, err := w.queries.PopWorkflowRunsRoundRobin(ctx, tx, dbsqlc.PopWorkflowRunsRoundRobinParams{ - Maxruns: int32(maxRuns), // nolint: gosec - Tenantid: sqlchelpers.UUIDFromStr(tenantId), - Workflowid: sqlchelpers.UUIDFromStr(workflowId), + Maxruns: int32(maxRuns), // nolint: gosec + Tenantid: sqlchelpers.UUIDFromStr(tenantId), + Workflowversionid: sqlchelpers.UUIDFromStr(workflowVersionId), }) if err != nil { diff --git a/pkg/repository/workflow.go b/pkg/repository/workflow.go index fedf83fb6..8a765e304 100644 --- a/pkg/repository/workflow.go +++ b/pkg/repository/workflow.go @@ -79,7 +79,7 @@ type CreateWorkflowConcurrencyOpts struct { MaxRuns *int32 // (optional) the strategy to use when the concurrency limit is reached, default CANCEL_IN_PROGRESS - LimitStrategy *string `validate:"omitnil,oneof=CANCEL_IN_PROGRESS DROP_NEWEST QUEUE_NEWEST GROUP_ROUND_ROBIN"` + LimitStrategy *string `validate:"omitnil,oneof=CANCEL_IN_PROGRESS GROUP_ROUND_ROBIN CANCEL_NEWEST"` // (optional) a concurrency expression for evaluating the concurrency key Expression *string `validate:"omitempty,celworkflowrunstr"` diff --git a/pkg/repository/workflow_run.go b/pkg/repository/workflow_run.go index 2d0e2b21f..ae5b63f9d 100644 --- a/pkg/repository/workflow_run.go +++ b/pkg/repository/workflow_run.go @@ -516,7 +516,11 @@ type WorkflowRunEngineRepository interface { GetScheduledChildWorkflowRun(ctx context.Context, parentId, parentStepRunId string, childIndex int, childkey *string) (*dbsqlc.WorkflowTriggerScheduledRef, error) - PopWorkflowRunsRoundRobin(ctx context.Context, tenantId, workflowId string, maxRuns int) ([]*dbsqlc.WorkflowRun, []*dbsqlc.GetStepRunForEngineRow, error) + PopWorkflowRunsCancelInProgress(ctx context.Context, tenantId, workflowVersionId string, maxRuns int) (toCancel []*dbsqlc.WorkflowRun, toStart []*dbsqlc.WorkflowRun, err error) + + PopWorkflowRunsCancelNewest(ctx context.Context, tenantId, workflowVersionId string, maxRuns int) (toCancel []*dbsqlc.WorkflowRun, toStart []*dbsqlc.WorkflowRun, err error) + + PopWorkflowRunsRoundRobin(ctx context.Context, tenantId, workflowVersionId string, maxRuns int) ([]*dbsqlc.WorkflowRun, []*dbsqlc.GetStepRunForEngineRow, error) // CreateNewWorkflowRun creates a new workflow run for a workflow version. CreateNewWorkflowRun(ctx context.Context, tenantId string, opts *CreateWorkflowRunOpts) (*dbsqlc.WorkflowRun, error) diff --git a/sql/migrations/20241217152316_v0.53.0.sql b/sql/migrations/20241217152316_v0.53.0.sql new file mode 100644 index 000000000..ca4fa77e4 --- /dev/null +++ b/sql/migrations/20241217152316_v0.53.0.sql @@ -0,0 +1,6 @@ +-- Add value to enum type: "ConcurrencyLimitStrategy" +ALTER TYPE "ConcurrencyLimitStrategy" ADD VALUE 'CANCEL_NEWEST'; +-- Add value to enum type: "WorkflowRunStatus" +ALTER TYPE "WorkflowRunStatus" ADD VALUE 'CANCELLING'; +-- Add value to enum type: "WorkflowRunStatus" +ALTER TYPE "WorkflowRunStatus" ADD VALUE 'CANCELLED'; \ No newline at end of file diff --git a/sql/migrations/atlas.sum b/sql/migrations/atlas.sum index 96a40715b..c69b93653 100644 --- a/sql/migrations/atlas.sum +++ b/sql/migrations/atlas.sum @@ -1,4 +1,4 @@ -h1:ZEPP1bZnEuI/eplSfrrKg/IMhf0SxmU/GojR0FBTmaA= +h1:kdjvzXzgnUl33aT2nO6fMW3ZNMIfO5zf4/dxS6cp1sQ= 20240115180414_init.sql h1:Ef3ZyjAHkmJPdGF/dEWCahbwgcg6uGJKnDxW2JCRi2k= 20240122014727_v0_6_0.sql h1:o/LdlteAeFgoHJ3e/M4Xnghqt9826IE/Y/h0q95Acuo= 20240126235456_v0_7_0.sql h1:KiVzt/hXgQ6esbdC6OMJOOWuYEXmy1yeCpmsVAHTFKs= @@ -79,3 +79,4 @@ h1:ZEPP1bZnEuI/eplSfrrKg/IMhf0SxmU/GojR0FBTmaA= 20241204191714_v0.52.5.sql h1:6oJgHJynK+YtwQoD/VnqiCMda409K96A4Oq2l8h3dQ0= 20241206231312_v0.52.12.sql h1:6L/zXbiVC24nqSzJzqItPFKCA3HPyMk0T5pBPnmXQgg= 20241216175807_v0.52.13.sql h1:rMwIaYvy3WX/F7/go1J3vI+WNYnABpASv0ATPJt1pE8= +20241217152316_v0.53.0.sql h1:sXmW2KigCn3hGZxCJhSPk6GjO3b+ppDgfMiLz5Xv3RQ= diff --git a/sql/schema/schema.sql b/sql/schema/schema.sql index a050924b2..08b878370 100644 --- a/sql/schema/schema.sql +++ b/sql/schema/schema.sql @@ -1,9 +1,10 @@ -- CreateEnum CREATE TYPE "ConcurrencyLimitStrategy" AS ENUM ( 'CANCEL_IN_PROGRESS', - 'DROP_NEWEST', - 'QUEUE_NEWEST', - 'GROUP_ROUND_ROBIN' + 'DROP_NEWEST', -- DEPRECATED + 'QUEUE_NEWEST', -- DEPRECATED + 'GROUP_ROUND_ROBIN', + 'CANCEL_NEWEST' ); @@ -130,7 +131,7 @@ CREATE TYPE "WorkerType" AS ENUM ('WEBHOOK', 'MANAGED', 'SELFHOSTED'); CREATE TYPE "WorkflowKind" AS ENUM ('FUNCTION', 'DURABLE', 'DAG'); -- CreateEnum -CREATE TYPE "WorkflowRunStatus" AS ENUM ('PENDING', 'RUNNING', 'SUCCEEDED', 'FAILED', 'QUEUED'); +CREATE TYPE "WorkflowRunStatus" AS ENUM ('PENDING', 'RUNNING', 'SUCCEEDED', 'FAILED', 'QUEUED', 'CANCELLING', 'CANCELLED'); -- CreateTable CREATE TABLE "APIToken" (