mirror of
https://github.com/formbricks/formbricks.git
synced 2026-04-15 18:12:27 -05:00
Compare commits
2 Commits
4.9.0-rc.3
...
release/4.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b7ede6c578 | ||
|
|
8204a5c652 |
@@ -51,7 +51,7 @@ describe("SSO Providers", () => {
|
||||
expect((samlProvider as any).authorization?.url).toBe("https://test-app.com/api/auth/saml/authorize");
|
||||
expect(samlProvider.token).toBe("https://test-app.com/api/auth/saml/token");
|
||||
expect(samlProvider.userinfo).toBe("https://test-app.com/api/auth/saml/userinfo");
|
||||
expect(googleProvider.allowDangerousEmailAccountLinking).toBeUndefined();
|
||||
expect(samlProvider.allowDangerousEmailAccountLinking).toBeUndefined();
|
||||
expect((googleProvider as any).options?.allowDangerousEmailAccountLinking).toBe(true);
|
||||
expect(samlProvider.allowDangerousEmailAccountLinking).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -26,6 +26,7 @@ export const getSSOProviders = () => [
|
||||
GoogleProvider({
|
||||
clientId: GOOGLE_CLIENT_ID || "",
|
||||
clientSecret: GOOGLE_CLIENT_SECRET || "",
|
||||
allowDangerousEmailAccountLinking: true,
|
||||
}),
|
||||
AzureAD({
|
||||
clientId: AZUREAD_CLIENT_ID || "",
|
||||
@@ -80,6 +81,7 @@ export const getSSOProviders = () => [
|
||||
clientId: "dummy",
|
||||
clientSecret: "dummy",
|
||||
},
|
||||
allowDangerousEmailAccountLinking: true,
|
||||
},
|
||||
];
|
||||
|
||||
|
||||
@@ -34,8 +34,6 @@ const LINKED_SSO_LOOKUP_SELECT = {
|
||||
identityProviderAccountId: true,
|
||||
} as const;
|
||||
|
||||
const OAUTH_ACCOUNT_NOT_LINKED_ERROR = "OAuthAccountNotLinked";
|
||||
|
||||
const syncSsoAccount = async (userId: string, account: Account, tx?: Prisma.TransactionClient) => {
|
||||
await upsertAccount(
|
||||
{
|
||||
@@ -219,7 +217,7 @@ export const handleSsoCallback = async ({
|
||||
}
|
||||
|
||||
// There is no existing linked account for this identity provider / account id
|
||||
// check if a user account with this email already exists and fail closed if so
|
||||
// check if a user account with this email already exists and auto-link it
|
||||
contextLogger.debug({ lookupType: "email" }, "No linked SSO account found, checking for user by email");
|
||||
|
||||
const existingUserWithEmail = await getUserByEmail(user.email);
|
||||
@@ -230,9 +228,10 @@ export const handleSsoCallback = async ({
|
||||
existingUserId: existingUserWithEmail.id,
|
||||
existingIdentityProvider: existingUserWithEmail.identityProvider,
|
||||
},
|
||||
"SSO callback blocked: existing user found by email without linked provider account"
|
||||
"SSO callback successful: existing user found by email"
|
||||
);
|
||||
throw new Error(OAUTH_ACCOUNT_NOT_LINKED_ERROR);
|
||||
await syncSsoAccount(existingUserWithEmail.id, account);
|
||||
return true;
|
||||
}
|
||||
|
||||
contextLogger.debug(
|
||||
|
||||
@@ -338,7 +338,7 @@ describe("handleSsoCallback", () => {
|
||||
);
|
||||
});
|
||||
|
||||
test("should reject verified email users whose SSO provider is not already linked", async () => {
|
||||
test("should auto-link verified email users whose SSO provider is not already linked", async () => {
|
||||
vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
|
||||
vi.mocked(getUserByEmail).mockResolvedValue({
|
||||
id: "existing-user-id",
|
||||
@@ -349,22 +349,26 @@ describe("handleSsoCallback", () => {
|
||||
isActive: true,
|
||||
});
|
||||
|
||||
await expect(
|
||||
handleSsoCallback({
|
||||
user: mockUser,
|
||||
account: mockAccount,
|
||||
callbackUrl: "http://localhost:3000",
|
||||
})
|
||||
).rejects.toThrow("OAuthAccountNotLinked");
|
||||
expect(upsertAccount).not.toHaveBeenCalled();
|
||||
const result = await handleSsoCallback({
|
||||
user: mockUser,
|
||||
account: mockAccount,
|
||||
callbackUrl: "http://localhost:3000",
|
||||
});
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(upsertAccount).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
userId: "existing-user-id",
|
||||
provider: mockAccount.provider,
|
||||
providerAccountId: mockAccount.providerAccountId,
|
||||
}),
|
||||
undefined
|
||||
);
|
||||
expect(updateUser).not.toHaveBeenCalled();
|
||||
expect(createUser).not.toHaveBeenCalled();
|
||||
expect(createMembership).not.toHaveBeenCalled();
|
||||
expect(createBrevoCustomer).not.toHaveBeenCalled();
|
||||
expect(capturePostHogEvent).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should reject unverified email users whose SSO provider is not already linked", async () => {
|
||||
test("should auto-link unverified email users whose SSO provider is not already linked", async () => {
|
||||
vi.mocked(prisma.account.findUnique).mockResolvedValue(null);
|
||||
vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
|
||||
vi.mocked(getUserByEmail).mockResolvedValue({
|
||||
@@ -376,22 +380,26 @@ describe("handleSsoCallback", () => {
|
||||
isActive: true,
|
||||
});
|
||||
|
||||
await expect(
|
||||
handleSsoCallback({
|
||||
user: mockUser,
|
||||
account: mockAccount,
|
||||
callbackUrl: "http://localhost:3000",
|
||||
})
|
||||
).rejects.toThrow("OAuthAccountNotLinked");
|
||||
expect(upsertAccount).not.toHaveBeenCalled();
|
||||
const result = await handleSsoCallback({
|
||||
user: mockUser,
|
||||
account: mockAccount,
|
||||
callbackUrl: "http://localhost:3000",
|
||||
});
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(upsertAccount).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
userId: "existing-user-id",
|
||||
provider: mockAccount.provider,
|
||||
providerAccountId: mockAccount.providerAccountId,
|
||||
}),
|
||||
undefined
|
||||
);
|
||||
expect(updateUser).not.toHaveBeenCalled();
|
||||
expect(createUser).not.toHaveBeenCalled();
|
||||
expect(createMembership).not.toHaveBeenCalled();
|
||||
expect(createBrevoCustomer).not.toHaveBeenCalled();
|
||||
expect(capturePostHogEvent).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should reject existing users from a different SSO provider when no link exists", async () => {
|
||||
test("should auto-link existing users from a different SSO provider when no link exists", async () => {
|
||||
vi.mocked(prisma.account.findUnique).mockResolvedValue(null);
|
||||
vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
|
||||
vi.mocked(getUserByEmail).mockResolvedValue({
|
||||
@@ -403,14 +411,53 @@ describe("handleSsoCallback", () => {
|
||||
isActive: true,
|
||||
});
|
||||
|
||||
await expect(
|
||||
handleSsoCallback({
|
||||
user: mockUser,
|
||||
account: mockAccount,
|
||||
callbackUrl: "http://localhost:3000",
|
||||
})
|
||||
).rejects.toThrow("OAuthAccountNotLinked");
|
||||
expect(upsertAccount).not.toHaveBeenCalled();
|
||||
const result = await handleSsoCallback({
|
||||
user: mockUser,
|
||||
account: mockAccount,
|
||||
callbackUrl: "http://localhost:3000",
|
||||
});
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(upsertAccount).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
userId: "existing-user-id",
|
||||
provider: mockAccount.provider,
|
||||
providerAccountId: mockAccount.providerAccountId,
|
||||
}),
|
||||
undefined
|
||||
);
|
||||
expect(updateUser).not.toHaveBeenCalled();
|
||||
expect(createUser).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should auto-link same-email users even when the stored legacy provider account id is stale", async () => {
|
||||
vi.mocked(prisma.account.findUnique).mockResolvedValue(null);
|
||||
vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
|
||||
vi.mocked(getUserByEmail).mockResolvedValue({
|
||||
id: "existing-user-id",
|
||||
email: mockUser.email,
|
||||
emailVerified: new Date(),
|
||||
identityProvider: "google",
|
||||
identityProviderAccountId: "old-provider-id",
|
||||
locale: mockUser.locale,
|
||||
isActive: true,
|
||||
} as any);
|
||||
|
||||
const result = await handleSsoCallback({
|
||||
user: mockUser,
|
||||
account: mockAccount,
|
||||
callbackUrl: "http://localhost:3000",
|
||||
});
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(upsertAccount).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
userId: "existing-user-id",
|
||||
provider: mockAccount.provider,
|
||||
providerAccountId: mockAccount.providerAccountId,
|
||||
}),
|
||||
undefined
|
||||
);
|
||||
expect(updateUser).not.toHaveBeenCalled();
|
||||
expect(createUser).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
@@ -1,401 +0,0 @@
|
||||
---
|
||||
title: "Background Job Processing"
|
||||
description: "How BullMQ works in Formbricks today, including the migrated response pipeline workload."
|
||||
icon: "code"
|
||||
---
|
||||
|
||||
This page documents the current BullMQ-based background job system in Formbricks and the first real workload that now runs on it: the response pipeline.
|
||||
|
||||
## Current State
|
||||
|
||||
Formbricks now uses BullMQ as an in-process background job system inside the Next.js web application.
|
||||
|
||||
The current implementation includes:
|
||||
|
||||
- a shared `@formbricks/jobs` package that owns queue creation, schemas, scheduling, and worker runtime concerns
|
||||
- a Next.js startup hook that starts one BullMQ worker runtime per Node.js process without blocking app boot
|
||||
- app-level enqueue helpers for request handlers
|
||||
- an app-owned BullMQ response pipeline processor that replaces the legacy internal HTTP pipeline route
|
||||
|
||||
The first migrated workload is:
|
||||
|
||||
- `response-pipeline.process`
|
||||
|
||||
This means response-related side effects no longer depend on an internal `fetch()` back into the same app process.
|
||||
|
||||
## Why This Exists
|
||||
|
||||
The original response pipeline lived behind an internal Next.js route:
|
||||
|
||||
```text
|
||||
apps/web/app/api/(internal)/pipeline
|
||||
```
|
||||
|
||||
That model had a few problems:
|
||||
|
||||
- it was tightly coupled to the request lifecycle
|
||||
- it relied on an internal HTTP hop instead of a typed background-job boundary
|
||||
- it was harder to observe, retry, and scale safely
|
||||
|
||||
BullMQ addresses that by moving post-response work behind a queue while keeping the first version operationally simple for self-hosted users.
|
||||
|
||||
## High-Level Architecture
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
A["API route or server code"] --> B["enqueueResponsePipelineEvents()"]
|
||||
B --> C["getResponseSnapshotForPipeline()"]
|
||||
B --> D["BackgroundJobProducer.enqueueResponsePipeline()"]
|
||||
D --> E["BullMQ queue: background-jobs"]
|
||||
F["instrumentation.ts"] --> G["registerJobsWorker()"]
|
||||
G --> H["startJobsRuntime()"]
|
||||
H --> I["BullMQ workers"]
|
||||
I --> J["response-pipeline.process override"]
|
||||
J --> K["processResponsePipelineJob()"]
|
||||
E --> I
|
||||
E --> L["Redis / Valkey"]
|
||||
I --> L
|
||||
```
|
||||
|
||||
## Responsibilities By Layer
|
||||
|
||||
### App Layer
|
||||
|
||||
- `apps/web/app/lib/pipelines.ts`
|
||||
Owns enqueueing for response pipeline events. It gates queueing, hydrates the response snapshot once, logs failures, and never throws back into request handlers.
|
||||
- `apps/web/modules/response-pipeline/lib/process-response-pipeline-job.ts`
|
||||
Owns app-specific execution of response-pipeline jobs.
|
||||
- `apps/web/modules/response-pipeline/lib/handle-integrations.ts`
|
||||
Owns Slack, Notion, Airtable, and Google Sheets integration fan-out for the pipeline.
|
||||
- `apps/web/modules/response-pipeline/lib/telemetry.ts`
|
||||
Owns telemetry dispatch logic used by the response-created path.
|
||||
- `apps/web/instrumentation-jobs.ts`
|
||||
Registers the app-owned response-pipeline handler override with the shared BullMQ runtime and schedules retry after transient startup failures.
|
||||
- `apps/web/lib/jobs/config.ts`
|
||||
Turns environment configuration into queueing and worker-bootstrap decisions. Queue producers depend on `REDIS_URL`; worker startup additionally depends on `BULLMQ_WORKER_ENABLED`.
|
||||
|
||||
### Shared Jobs Layer
|
||||
|
||||
- `packages/jobs/src/types.ts`
|
||||
Defines typed payload schemas such as `TResponsePipelineJobData`.
|
||||
- `packages/jobs/src/definitions.ts`
|
||||
Defines stable job names and payload validation.
|
||||
- `packages/jobs/src/queue.ts`
|
||||
Owns producer-side enqueueing and scheduling.
|
||||
- `packages/jobs/src/runtime.ts`
|
||||
Starts workers, connects Redis, and handles graceful shutdown.
|
||||
- `packages/jobs/src/processors/registry.ts`
|
||||
Validates payloads and dispatches named jobs, applying app-provided handler overrides when registered.
|
||||
|
||||
## Response Pipeline Flow
|
||||
|
||||
The response pipeline now runs fully in the background worker.
|
||||
|
||||
### Enqueueing
|
||||
|
||||
When a response is created or updated, the request path calls:
|
||||
|
||||
```ts
|
||||
enqueueResponsePipelineEvents({
|
||||
environmentId,
|
||||
surveyId,
|
||||
responseId,
|
||||
events,
|
||||
});
|
||||
```
|
||||
|
||||
That helper:
|
||||
|
||||
1. deduplicates requested events
|
||||
2. checks whether BullMQ queueing is enabled
|
||||
3. uses the just-written response snapshot when the caller already has it
|
||||
4. otherwise loads the latest response snapshot once via `getResponseSnapshotForPipeline(responseId)` using an uncached read
|
||||
5. enqueues one BullMQ job per event with the shared snapshot payload
|
||||
6. waits for the enqueue attempt to complete, then logs enqueue failures without failing the original request
|
||||
|
||||
### Execution
|
||||
|
||||
At worker startup, `apps/web/instrumentation-jobs.ts` registers an app-owned override for:
|
||||
|
||||
- `response-pipeline.process`
|
||||
|
||||
That override delegates to `processResponsePipelineJob(...)`, which performs:
|
||||
|
||||
- webhook delivery for all pipeline events
|
||||
- integrations for `responseFinished`
|
||||
- response-finished notification emails
|
||||
- follow-up delivery
|
||||
- survey auto-complete updates and audit logging
|
||||
- response-created billing metering
|
||||
- response-created telemetry dispatch
|
||||
|
||||
Current retry semantics are intentionally asymmetric:
|
||||
|
||||
- webhook delivery failures fail early BullMQ attempts so retries can happen at the job level
|
||||
- if webhook delivery is still failing on the final BullMQ attempt, the worker logs that retries are exhausted and continues with the remaining event-specific side effects
|
||||
- integration, email, telemetry, metering, follow-up, and survey auto-complete failures are logged inside the processor and do not fail the whole job
|
||||
|
||||
## Acceptance Criteria Review
|
||||
|
||||
### Pipeline Execution
|
||||
|
||||
Satisfied.
|
||||
|
||||
- New response create/update flows enqueue BullMQ jobs instead of calling an internal HTTP route.
|
||||
- The job payload contains `environmentId`, `surveyId`, `event`, and an authoritative response snapshot.
|
||||
- The response pipeline executes inside the BullMQ worker runtime.
|
||||
|
||||
### Feature Parity
|
||||
|
||||
Mostly satisfied for the legacy response pipeline behavior that existed in the old route.
|
||||
|
||||
The migrated BullMQ processor preserves:
|
||||
|
||||
- webhook delivery
|
||||
- integrations
|
||||
- response-finished emails
|
||||
- follow-up execution
|
||||
- survey auto-complete and audit logging
|
||||
- response-created billing metering
|
||||
- response-created telemetry
|
||||
|
||||
One important behavior change still exists today:
|
||||
|
||||
- webhook delivery failures delay the remaining side effects until the final BullMQ attempt
|
||||
|
||||
That is closer to the legacy route, because the pipeline eventually continues even if webhook delivery never succeeds. It is still not exact feature parity, though, because the legacy route continued immediately while the BullMQ worker waits until retries are exhausted before it degrades webhook failure into a logged condition.
|
||||
|
||||
### Architecture
|
||||
|
||||
Satisfied.
|
||||
|
||||
- Enqueueing lives in the app layer through `apps/web/app/lib/pipelines.ts`.
|
||||
- Execution lives in the worker path under `apps/web/modules/response-pipeline/lib`.
|
||||
- `@formbricks/jobs` stays responsible for queue/runtime concerns and typed job contracts.
|
||||
|
||||
### Cleanup
|
||||
|
||||
Satisfied.
|
||||
|
||||
The legacy internal route has been removed:
|
||||
|
||||
```text
|
||||
apps/web/app/api/(internal)/pipeline/route.ts
|
||||
```
|
||||
|
||||
The runtime path no longer depends on the old internal-route folder structure, and the remaining pipeline-only test mock under that deleted folder has been removed as part of the migration cleanup.
|
||||
|
||||
### Reliability
|
||||
|
||||
Satisfied at the current ticket scope.
|
||||
|
||||
BullMQ jobs use shared default retry behavior:
|
||||
|
||||
- `attempts: 3`
|
||||
- exponential backoff starting at `1000ms`
|
||||
|
||||
Failures are logged with structured metadata such as:
|
||||
|
||||
- `jobId`
|
||||
- `attempt`
|
||||
- `jobName`
|
||||
- `queueName`
|
||||
- `environmentId`
|
||||
- `surveyId`
|
||||
- `responseId`
|
||||
|
||||
Request handlers remain non-blocking:
|
||||
|
||||
- if Redis is unavailable
|
||||
- if queueing is disabled
|
||||
- if snapshot hydration fails
|
||||
- if enqueueing fails
|
||||
|
||||
the request still completes, and the failure is logged.
|
||||
|
||||
Worker startup is also non-blocking:
|
||||
|
||||
- Next.js boot does not await BullMQ readiness
|
||||
- startup failures are logged
|
||||
- the web app schedules a retry instead of requiring an immediate process restart
|
||||
|
||||
### Worker Integration
|
||||
|
||||
Satisfied.
|
||||
|
||||
The response pipeline is processed by the same BullMQ worker runtime started from Next.js instrumentation. No standalone worker service was introduced as part of this migration.
|
||||
|
||||
### Developer Experience
|
||||
|
||||
Satisfied.
|
||||
|
||||
The public app-level API for request handlers is intentionally small:
|
||||
|
||||
- `enqueueResponsePipelineEvents(...)`
|
||||
|
||||
This keeps queue names, Redis concerns, and BullMQ details out of response routes.
|
||||
|
||||
## Comparison With The Legacy Route
|
||||
|
||||
### Previous Implementation
|
||||
|
||||
The legacy internal route accepted a full response payload directly and then executed the entire pipeline synchronously inside the route handler.
|
||||
|
||||
Key characteristics of that model:
|
||||
|
||||
- request handlers performed an internal authenticated `fetch()` back into the same app
|
||||
- the route received the response payload directly instead of hydrating it from a queue-side snapshot
|
||||
- webhook failures were logged and did not block the rest of the pipeline
|
||||
- response-finished integrations, emails, follow-ups, and survey auto-complete ran in the same route execution
|
||||
- response-created metering was fire-and-forget while telemetry was awaited
|
||||
|
||||
### Current BullMQ Implementation
|
||||
|
||||
The current branch enqueues a typed snapshot-based BullMQ job and executes the pipeline inside the in-process worker registered from Next.js instrumentation.
|
||||
|
||||
Key characteristics of the current model:
|
||||
|
||||
- request handlers enqueue directly through `enqueueResponsePipelineEvents(...)`
|
||||
- handlers now pass the just-written `TResponse` snapshot when they already have it
|
||||
- callers that do not already have a response snapshot use an uncached pipeline-specific lookup
|
||||
- worker startup is non-blocking and retries after transient failures
|
||||
- webhook failures fail early attempts so BullMQ can retry them
|
||||
- on the final attempt, webhook failures are logged and the remaining side effects continue
|
||||
- response-created metering is awaited before the BullMQ job completes
|
||||
|
||||
### Net Result
|
||||
|
||||
Compared to the legacy route, the current branch is:
|
||||
|
||||
- architecturally stronger
|
||||
- safer to scale and operate
|
||||
- easier to observe through structured job logging
|
||||
- closer to legacy feature parity than the earlier BullMQ iterations on this branch
|
||||
|
||||
The main remaining semantic difference is timing:
|
||||
|
||||
- the legacy route continued past webhook failures immediately
|
||||
- the BullMQ worker now continues only after webhook retries are exhausted
|
||||
|
||||
That is an intentional trade-off in the current branch, not an accident.
|
||||
|
||||
## Current Queue Model
|
||||
|
||||
The queue remains intentionally small:
|
||||
|
||||
- queue name: `background-jobs`
|
||||
- prefix: `formbricks:jobs`
|
||||
- job names:
|
||||
- `system.test-log`
|
||||
- `response-pipeline.process`
|
||||
|
||||
The response pipeline is the first production workload on this queue.
|
||||
|
||||
## Local Development
|
||||
|
||||
Local development works end to end as long as Redis is available and the worker is enabled.
|
||||
|
||||
Required inputs:
|
||||
|
||||
- `REDIS_URL`
|
||||
- optionally `BULLMQ_WORKER_ENABLED`
|
||||
- optionally `BULLMQ_WORKER_COUNT`
|
||||
- optionally `BULLMQ_WORKER_CONCURRENCY`
|
||||
|
||||
Behavior:
|
||||
|
||||
- if `REDIS_URL` is missing, queueing is skipped
|
||||
- if `BULLMQ_WORKER_ENABLED=0`, the worker is not started, but request-side enqueueing can still stay enabled in deployments that point at a separate BullMQ worker
|
||||
- outside tests, the worker is enabled by default
|
||||
|
||||
This makes it possible to develop request flows without hard-failing when Redis is absent, while still supporting full local end-to-end verification when Redis is running.
|
||||
|
||||
## Operational Notes
|
||||
|
||||
### Logging
|
||||
|
||||
The current implementation logs:
|
||||
|
||||
- worker startup failures
|
||||
- Redis connection failures
|
||||
- enqueue failures
|
||||
- job failures
|
||||
- webhook delivery failures
|
||||
- integration failures
|
||||
- email delivery failures
|
||||
- follow-up failures
|
||||
- survey auto-complete update failures
|
||||
- metering failures
|
||||
- telemetry failures
|
||||
|
||||
### Shutdown
|
||||
|
||||
The worker runtime registers `SIGTERM` and `SIGINT` handlers, closes workers and queue handles, and then closes Redis connections. This keeps shutdown behavior predictable inside the web process.
|
||||
|
||||
## Current Limitations
|
||||
|
||||
The migration satisfies the ticket, but a few larger architectural limits remain by design.
|
||||
|
||||
### Dual-Write Boundary
|
||||
|
||||
Response writes happen in Postgres and background jobs are enqueued in Redis. Those are separate systems, so this remains a dual-write boundary.
|
||||
|
||||
This means Formbricks currently has:
|
||||
|
||||
- non-blocking enqueue semantics
|
||||
- at-least-once background execution
|
||||
- no transactional guarantee that the product write and Redis enqueue succeed together
|
||||
|
||||
That trade-off was accepted for this BullMQ phase.
|
||||
|
||||
### In-Process Workers
|
||||
|
||||
Workers run inside the Next.js app process.
|
||||
|
||||
That keeps self-hosting simple, but it also means:
|
||||
|
||||
- job capacity still shares resources with the web process
|
||||
- heavy background work is still Node.js-local
|
||||
- scaling job throughput also scales the app runtime
|
||||
|
||||
### Webhook-Gated Retries
|
||||
|
||||
Webhook delivery still happens before the rest of the `responseFinished` side effects.
|
||||
|
||||
That gives Formbricks job-level retries for webhook delivery, but it also means:
|
||||
|
||||
- `responseFinished` side effects do not run on the early retry attempts
|
||||
- the remaining side effects only continue after webhook retries are exhausted
|
||||
- this is closer to legacy behavior than failing forever, but it is still not immediate parity
|
||||
|
||||
This is the current behavior of the branch and should be evaluated explicitly if we want stricter feature parity with the legacy route.
|
||||
|
||||
### Logs-First Observability
|
||||
|
||||
The current system has strong structured logging, but it does not yet provide:
|
||||
|
||||
- queue dashboards
|
||||
- retry tooling
|
||||
- latency metrics
|
||||
- product-native workflow inspection
|
||||
|
||||
Those are future improvements, not blockers for the current migration.
|
||||
|
||||
## Recommended Next Steps
|
||||
|
||||
Now that the response pipeline is on BullMQ, the most useful next steps are:
|
||||
|
||||
1. migrate additional low-risk async workloads behind the same producer/runtime boundary
|
||||
2. add queue metrics and worker health visibility beyond logs
|
||||
3. define explicit idempotency rules for side-effect-heavy jobs
|
||||
4. decide which future workloads should remain Node-local and which should eventually move to a different runtime
|
||||
|
||||
## Practical Conclusion
|
||||
|
||||
Formbricks now has:
|
||||
|
||||
- a production-capable BullMQ foundation
|
||||
- a real migrated workload
|
||||
- a clean separation between request-time enqueueing and background execution
|
||||
|
||||
The response pipeline migration should be considered complete for the current ticket scope.
|
||||
@@ -32,16 +32,32 @@ export const delay = (ms: number): Promise<void> => {
|
||||
});
|
||||
};
|
||||
|
||||
// Module-level locks keyed by surveyId.
|
||||
// Survive ResponseQueue instance recreation (e.g. React useMemo recomputation)
|
||||
// so that only one sync/send runs at a time per survey, even across instances.
|
||||
const syncingBySurvey = new Map<string, boolean>();
|
||||
const requestInProgressBySurvey = new Map<string, boolean>();
|
||||
|
||||
/** @internal Exposed for tests only. */
|
||||
export const _syncLocks = {
|
||||
clear: () => {
|
||||
syncingBySurvey.clear();
|
||||
requestInProgressBySurvey.clear();
|
||||
},
|
||||
set: (surveyId: string, value: boolean) => syncingBySurvey.set(surveyId, value),
|
||||
get: (surveyId: string) => syncingBySurvey.get(surveyId) ?? false,
|
||||
setRequestInProgress: (surveyId: string, value: boolean) => requestInProgressBySurvey.set(surveyId, value),
|
||||
getRequestInProgress: (surveyId: string) => requestInProgressBySurvey.get(surveyId) ?? false,
|
||||
};
|
||||
|
||||
export class ResponseQueue {
|
||||
readonly queue: TResponseUpdate[] = [];
|
||||
readonly config: QueueConfig;
|
||||
private surveyState: SurveyState;
|
||||
private isRequestInProgress = false;
|
||||
readonly api: ApiClient;
|
||||
private responseRecaptchaToken?: string;
|
||||
// Maps in-memory queue index → IndexedDB id for cleanup after successful send
|
||||
private readonly pendingDbIds: Map<TResponseUpdate, number> = new Map();
|
||||
private isSyncing = false;
|
||||
|
||||
constructor(config: QueueConfig, surveyState: SurveyState) {
|
||||
this.config = config;
|
||||
@@ -52,6 +68,26 @@ export class ResponseQueue {
|
||||
});
|
||||
}
|
||||
|
||||
private get isSyncing(): boolean {
|
||||
return this.config.surveyId ? (syncingBySurvey.get(this.config.surveyId) ?? false) : false;
|
||||
}
|
||||
|
||||
private set isSyncing(value: boolean) {
|
||||
if (this.config.surveyId) {
|
||||
syncingBySurvey.set(this.config.surveyId, value);
|
||||
}
|
||||
}
|
||||
|
||||
private get isRequestInProgress(): boolean {
|
||||
return this.config.surveyId ? (requestInProgressBySurvey.get(this.config.surveyId) ?? false) : false;
|
||||
}
|
||||
|
||||
private set isRequestInProgress(value: boolean) {
|
||||
if (this.config.surveyId) {
|
||||
requestInProgressBySurvey.set(this.config.surveyId, value);
|
||||
}
|
||||
}
|
||||
|
||||
setResponseRecaptchaToken(token?: string) {
|
||||
this.responseRecaptchaToken = token;
|
||||
}
|
||||
@@ -111,8 +147,26 @@ export class ResponseQueue {
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
this.isRequestInProgress = true;
|
||||
// When offline support is active and there are multiple pending entries in
|
||||
// IndexedDB, defer to syncPersistedResponses which drains them in order.
|
||||
// This prevents processQueue and syncPersistedResponses from racing to
|
||||
// create the same response concurrently (duplicate POSTs).
|
||||
if (this.config.persistOffline && this.config.surveyId) {
|
||||
const pendingCount = await countPendingResponses(this.config.surveyId);
|
||||
|
||||
// Re-check after await — another processQueue/sync may have started during the yield
|
||||
if (this.isSyncing || this.isRequestInProgress || this.queue.length === 0) {
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
if (pendingCount > 1) {
|
||||
void this.syncPersistedResponses();
|
||||
return { success: false };
|
||||
}
|
||||
}
|
||||
|
||||
const responseUpdate = this.queue[0];
|
||||
this.isRequestInProgress = true;
|
||||
|
||||
const result = await this.sendResponseWithRetry(responseUpdate);
|
||||
|
||||
@@ -169,6 +223,11 @@ export class ResponseQueue {
|
||||
|
||||
// Concurrency guard: prevent duplicate syncs from online/offline flicker
|
||||
if (this.isSyncing) return { success: false, syncedCount: 0 };
|
||||
|
||||
// If processQueue already has a request in flight, don't start syncing —
|
||||
// let it finish first to avoid both paths creating the same response.
|
||||
if (this.isRequestInProgress) return { success: false, syncedCount: 0 };
|
||||
|
||||
this.isSyncing = true;
|
||||
|
||||
try {
|
||||
|
||||
@@ -3,7 +3,7 @@ import { afterAll, beforeAll, beforeEach, describe, expect, test, vi } from "vit
|
||||
import { err, ok } from "@formbricks/types/error-handlers";
|
||||
import { TResponseUpdate } from "@formbricks/types/responses";
|
||||
import { TResponseErrorCodesEnum } from "@/types/response-error-codes";
|
||||
import { ResponseQueue, delay } from "./response-queue";
|
||||
import { ResponseQueue, _syncLocks, delay } from "./response-queue";
|
||||
import { SurveyState } from "./survey-state";
|
||||
|
||||
// Suppress noisy console output from retry logic during tests
|
||||
@@ -86,6 +86,7 @@ describe("ResponseQueue", () => {
|
||||
queue = new ResponseQueue(config, surveyState);
|
||||
apiMock = queue.api;
|
||||
vi.clearAllMocks();
|
||||
_syncLocks.clear();
|
||||
});
|
||||
|
||||
test("constructor initializes properties", () => {
|
||||
@@ -110,26 +111,30 @@ describe("ResponseQueue", () => {
|
||||
});
|
||||
|
||||
test("processQueue does nothing if request in progress or queue empty", async () => {
|
||||
queue["isRequestInProgress"] = true;
|
||||
await queue.processQueue();
|
||||
queue["isRequestInProgress"] = false;
|
||||
queue.queue.length = 0;
|
||||
await queue.processQueue();
|
||||
const reqQueue = new ResponseQueue(getConfig({ surveyId: "s1" }), getSurveyState());
|
||||
_syncLocks.setRequestInProgress("s1", true);
|
||||
await reqQueue.processQueue();
|
||||
_syncLocks.setRequestInProgress("s1", false);
|
||||
reqQueue.queue.length = 0;
|
||||
await reqQueue.processQueue();
|
||||
expect(true).toBe(true); // just to ensure no errors
|
||||
});
|
||||
|
||||
test("processQueue sends response and removes from queue on success", async () => {
|
||||
queue.queue.push(responseUpdate);
|
||||
vi.spyOn(queue, "sendResponse").mockResolvedValue(ok(true));
|
||||
await queue.processQueue();
|
||||
expect(queue.queue.length).toBe(0);
|
||||
expect(queue["isRequestInProgress"]).toBe(false);
|
||||
const reqQueue = new ResponseQueue(getConfig({ surveyId: "s1" }), getSurveyState());
|
||||
reqQueue.queue.push(responseUpdate);
|
||||
vi.spyOn(reqQueue, "sendResponse").mockResolvedValue(ok(true));
|
||||
await reqQueue.processQueue();
|
||||
expect(reqQueue.queue.length).toBe(0);
|
||||
expect(_syncLocks.getRequestInProgress("s1")).toBe(false);
|
||||
});
|
||||
|
||||
test("processQueue retries and calls onResponseSendingFailed on recaptcha error", async () => {
|
||||
queue.queue.push(responseUpdate);
|
||||
const recaptchaConfig = getConfig({ surveyId: "s1" });
|
||||
const recaptchaQueue = new ResponseQueue(recaptchaConfig, getSurveyState());
|
||||
recaptchaQueue.queue.push(responseUpdate);
|
||||
|
||||
vi.spyOn(queue, "sendResponse").mockResolvedValue(
|
||||
vi.spyOn(recaptchaQueue, "sendResponse").mockResolvedValue(
|
||||
err({
|
||||
code: "internal_server_error",
|
||||
message: "An error occurred while sending the response.",
|
||||
@@ -139,29 +144,31 @@ describe("ResponseQueue", () => {
|
||||
},
|
||||
})
|
||||
);
|
||||
await queue.processQueue();
|
||||
expect(config.onResponseSendingFailed).toHaveBeenCalledWith(
|
||||
await recaptchaQueue.processQueue();
|
||||
expect(recaptchaConfig.onResponseSendingFailed).toHaveBeenCalledWith(
|
||||
responseUpdate,
|
||||
TResponseErrorCodesEnum.RecaptchaError
|
||||
);
|
||||
expect(queue["isRequestInProgress"]).toBe(false);
|
||||
expect(_syncLocks.getRequestInProgress("s1")).toBe(false);
|
||||
});
|
||||
|
||||
test("processQueue retries and calls onResponseSendingFailed after max attempts", async () => {
|
||||
queue.queue.push(responseUpdate);
|
||||
vi.spyOn(queue, "sendResponse").mockResolvedValue(
|
||||
const reqConfig = getConfig({ surveyId: "s1" });
|
||||
const reqQueue = new ResponseQueue(reqConfig, getSurveyState());
|
||||
reqQueue.queue.push(responseUpdate);
|
||||
vi.spyOn(reqQueue, "sendResponse").mockResolvedValue(
|
||||
err({
|
||||
code: "internal_server_error",
|
||||
message: "An error occurred while sending the response.",
|
||||
status: 500,
|
||||
})
|
||||
);
|
||||
await queue.processQueue();
|
||||
expect(config.onResponseSendingFailed).toHaveBeenCalledWith(
|
||||
await reqQueue.processQueue();
|
||||
expect(reqConfig.onResponseSendingFailed).toHaveBeenCalledWith(
|
||||
responseUpdate,
|
||||
TResponseErrorCodesEnum.ResponseSendingError
|
||||
);
|
||||
expect(queue["isRequestInProgress"]).toBe(false);
|
||||
expect(_syncLocks.getRequestInProgress("s1")).toBe(false);
|
||||
});
|
||||
|
||||
test("processQueue calls onResponseSendingFinished if finished", async () => {
|
||||
@@ -218,8 +225,9 @@ describe("ResponseQueue", () => {
|
||||
});
|
||||
|
||||
test("processQueueAsync returns success false if request in progress", async () => {
|
||||
queue["isRequestInProgress"] = true;
|
||||
const result = await queue.processQueue();
|
||||
const reqQueue = new ResponseQueue(getConfig({ surveyId: "s1" }), getSurveyState());
|
||||
_syncLocks.setRequestInProgress("s1", true);
|
||||
const result = await reqQueue.processQueue();
|
||||
expect(result.success).toBe(false);
|
||||
});
|
||||
|
||||
@@ -309,9 +317,13 @@ describe("ResponseQueue", () => {
|
||||
});
|
||||
|
||||
test("processQueue returns false when isSyncing is true", async () => {
|
||||
queue.queue.push(responseUpdate);
|
||||
queue["isSyncing"] = true;
|
||||
const result = await queue.processQueue();
|
||||
const offlineQueue = new ResponseQueue(
|
||||
getConfig({ persistOffline: true, surveyId: "s1" }),
|
||||
getSurveyState()
|
||||
);
|
||||
offlineQueue.queue.push(responseUpdate);
|
||||
_syncLocks.set("s1", true);
|
||||
const result = await offlineQueue.processQueue();
|
||||
expect(result.success).toBe(false);
|
||||
});
|
||||
|
||||
@@ -347,7 +359,7 @@ describe("ResponseQueue", () => {
|
||||
getConfig({ persistOffline: true, surveyId: "s1" }),
|
||||
getSurveyState()
|
||||
);
|
||||
offlineQueue["isSyncing"] = true;
|
||||
_syncLocks.set("s1", true);
|
||||
const result = await offlineQueue.syncPersistedResponses();
|
||||
expect(result).toEqual({ success: false, syncedCount: 0 });
|
||||
});
|
||||
@@ -382,7 +394,7 @@ describe("ResponseQueue", () => {
|
||||
expect(result).toEqual({ success: true, syncedCount: 1 });
|
||||
expect(removePendingResponse).toHaveBeenCalledWith(10);
|
||||
expect(offlineQueue.queue.length).toBe(0);
|
||||
expect(offlineQueue["isSyncing"]).toBe(false);
|
||||
expect(_syncLocks.get("s1")).toBe(false);
|
||||
});
|
||||
|
||||
test("syncPersistedResponses stops on server error", async () => {
|
||||
@@ -415,7 +427,7 @@ describe("ResponseQueue", () => {
|
||||
|
||||
const result = await offlineQueue.syncPersistedResponses();
|
||||
expect(result).toEqual({ success: false, syncedCount: 0 });
|
||||
expect(offlineQueue["isSyncing"]).toBe(false);
|
||||
expect(_syncLocks.get("s1")).toBe(false);
|
||||
});
|
||||
|
||||
test("syncPersistedResponses retries 404 as createResponse by resetting responseId", async () => {
|
||||
|
||||
Reference in New Issue
Block a user