Compare commits

..

2 Commits

Author SHA1 Message Date
Bhagya Amarasinghe
8204a5c652 fix: restore legacy SSO auto-linking hotfix (#7728) 2026-04-13 20:42:33 +05:30
Anshuman Pandey
e823e10f9a fix: backports missing posthog events fix (#7723) 2026-04-13 17:36:39 +05:30
12 changed files with 187 additions and 461 deletions

View File

@@ -0,0 +1,33 @@
import { capturePostHogEvent } from "@/lib/posthog";
interface SurveyResponsePostHogEventParams {
organizationId: string;
surveyId: string;
surveyType: string;
environmentId: string;
responseCount: number;
}
/**
* Captures a PostHog event for survey responses at milestones:
* 1st response, then every 100th (100, 200, 300, ...).
*/
export const captureSurveyResponsePostHogEvent = ({
organizationId,
surveyId,
surveyType,
environmentId,
responseCount,
}: SurveyResponsePostHogEventParams): void => {
if (responseCount !== 1 && responseCount % 100 !== 0) return;
capturePostHogEvent(organizationId, "survey_response_received", {
survey_id: surveyId,
survey_type: surveyType,
organization_id: organizationId,
environment_id: environmentId,
response_count: responseCount,
is_first_response: responseCount === 1,
milestone: responseCount === 1 ? "first" : String(responseCount),
});
};

View File

@@ -1,7 +1,6 @@
import { PipelineTriggers, Webhook } from "@prisma/client";
import { headers } from "next/headers";
import { v7 as uuidv7 } from "uuid";
import { createCacheKey } from "@formbricks/cache";
import { prisma } from "@formbricks/database";
import { logger } from "@formbricks/logger";
import { ResourceNotFoundError } from "@formbricks/types/errors";
@@ -9,12 +8,10 @@ import { sendTelemetryEvents } from "@/app/api/(internal)/pipeline/lib/telemetry
import { ZPipelineInput } from "@/app/api/(internal)/pipeline/types/pipelines";
import { responses } from "@/app/lib/api/response";
import { transformErrorToDetails } from "@/app/lib/api/validator";
import { cache } from "@/lib/cache";
import { CRON_SECRET, POSTHOG_KEY } from "@/lib/constants";
import { generateStandardWebhookSignature } from "@/lib/crypto";
import { getIntegrations } from "@/lib/integration/service";
import { getOrganizationByEnvironmentId } from "@/lib/organization/service";
import { capturePostHogEvent } from "@/lib/posthog";
import { getResponseCountBySurveyId } from "@/lib/response/service";
import { getSurvey, updateSurvey } from "@/lib/survey/service";
import { convertDatesInObject } from "@/lib/time";
@@ -27,6 +24,7 @@ import { resolveStorageUrlsInObject } from "@/modules/storage/utils";
import { sendFollowUpsForResponse } from "@/modules/survey/follow-ups/lib/follow-ups";
import { FollowUpSendError } from "@/modules/survey/follow-ups/types/follow-up";
import { handleIntegrations } from "./lib/handleIntegrations";
import { captureSurveyResponsePostHogEvent } from "./lib/posthog";
export const POST = async (request: Request) => {
const requestHeaders = await headers();
@@ -302,25 +300,16 @@ export const POST = async (request: Request) => {
logger.error({ error, responseId: response.id }, "Failed to record response meter event");
});
// Sampled PostHog tracking: first response + every 100th
if (POSTHOG_KEY) {
const responseCount = await cache.withCache(
() => getResponseCountBySurveyId(surveyId),
createCacheKey.response.countBySurveyId(surveyId),
60 * 1000
);
const responseCount = await getResponseCountBySurveyId(surveyId);
if (responseCount === 1 || responseCount % 100 === 0) {
capturePostHogEvent(organization.id, "survey_response_received", {
survey_id: surveyId,
survey_type: survey.type,
organization_id: organization.id,
environment_id: environmentId,
response_count: responseCount,
is_first_response: responseCount === 1,
milestone: responseCount === 1 ? "first" : String(responseCount),
});
}
captureSurveyResponsePostHogEvent({
organizationId: organization.id,
surveyId,
surveyType: survey.type,
environmentId,
responseCount,
});
}
// Send telemetry events

View File

@@ -1,5 +1,6 @@
import { google } from "googleapis";
import { getServerSession } from "next-auth";
import { logger } from "@formbricks/logger";
import { TIntegrationGoogleSheetsConfig } from "@formbricks/types/integration/google-sheet";
import { responses } from "@/app/lib/api/response";
import {
@@ -10,6 +11,8 @@ import {
} from "@/lib/constants";
import { hasUserEnvironmentAccess } from "@/lib/environment/auth";
import { createOrUpdateIntegration, getIntegrationByType } from "@/lib/integration/service";
import { capturePostHogEvent } from "@/lib/posthog";
import { getOrganizationIdFromEnvironmentId } from "@/lib/utils/helper";
import { authOptions } from "@/modules/auth/lib/authOptions";
export const GET = async (req: Request) => {
@@ -82,6 +85,16 @@ export const GET = async (req: Request) => {
const result = await createOrUpdateIntegration(environmentId, googleSheetIntegration);
if (result) {
try {
const organizationId = await getOrganizationIdFromEnvironmentId(environmentId);
capturePostHogEvent(session.user.id, "integration_connected", {
integration_type: "googleSheets",
organization_id: organizationId,
});
} catch (err) {
logger.error({ error: err }, "Failed to capture PostHog integration_connected event for googleSheets");
}
return Response.redirect(
`${WEBAPP_URL}/environments/${environmentId}/workspace/integrations/google-sheets`
);

View File

@@ -6,6 +6,8 @@ import { fetchAirtableAuthToken } from "@/lib/airtable/service";
import { AIRTABLE_CLIENT_ID, WEBAPP_URL } from "@/lib/constants";
import { hasUserEnvironmentAccess } from "@/lib/environment/auth";
import { createOrUpdateIntegration } from "@/lib/integration/service";
import { capturePostHogEvent } from "@/lib/posthog";
import { getOrganizationIdFromEnvironmentId } from "@/lib/utils/helper";
const getEmail = async (token: string) => {
const req_ = await fetch("https://api.airtable.com/v0/meta/whoami", {
@@ -86,6 +88,17 @@ export const GET = withV1ApiWrapper({
},
};
await createOrUpdateIntegration(environmentId, airtableIntegrationInput);
try {
const organizationId = await getOrganizationIdFromEnvironmentId(environmentId);
capturePostHogEvent(authentication.user.id, "integration_connected", {
integration_type: "airtable",
organization_id: organizationId,
});
} catch (err) {
logger.error({ error: err }, "Failed to capture PostHog integration_connected event for airtable");
}
return {
response: Response.redirect(
`${WEBAPP_URL}/environments/${environmentId}/workspace/integrations/airtable`

View File

@@ -1,3 +1,4 @@
import { logger } from "@formbricks/logger";
import { TIntegrationNotionConfigData, TIntegrationNotionInput } from "@formbricks/types/integration/notion";
import { responses } from "@/app/lib/api/response";
import { withV1ApiWrapper } from "@/app/lib/api/with-api-logging";
@@ -11,6 +12,8 @@ import {
import { symmetricEncrypt } from "@/lib/crypto";
import { hasUserEnvironmentAccess } from "@/lib/environment/auth";
import { createOrUpdateIntegration, getIntegrationByType } from "@/lib/integration/service";
import { capturePostHogEvent } from "@/lib/posthog";
import { getOrganizationIdFromEnvironmentId } from "@/lib/utils/helper";
export const GET = withV1ApiWrapper({
handler: async ({ req, authentication }) => {
@@ -96,6 +99,16 @@ export const GET = withV1ApiWrapper({
const result = await createOrUpdateIntegration(environmentId, notionIntegration);
if (result) {
try {
const organizationId = await getOrganizationIdFromEnvironmentId(environmentId);
capturePostHogEvent(authentication.user.id, "integration_connected", {
integration_type: "notion",
organization_id: organizationId,
});
} catch (err) {
logger.error({ error: err }, "Failed to capture PostHog integration_connected event for notion");
}
return {
response: Response.redirect(
`${WEBAPP_URL}/environments/${environmentId}/workspace/integrations/notion`

View File

@@ -1,3 +1,4 @@
import { logger } from "@formbricks/logger";
import {
TIntegrationSlackConfig,
TIntegrationSlackConfigData,
@@ -8,6 +9,8 @@ import { withV1ApiWrapper } from "@/app/lib/api/with-api-logging";
import { SLACK_CLIENT_ID, SLACK_CLIENT_SECRET, SLACK_REDIRECT_URI, WEBAPP_URL } from "@/lib/constants";
import { hasUserEnvironmentAccess } from "@/lib/environment/auth";
import { createOrUpdateIntegration, getIntegrationByType } from "@/lib/integration/service";
import { capturePostHogEvent } from "@/lib/posthog";
import { getOrganizationIdFromEnvironmentId } from "@/lib/utils/helper";
export const GET = withV1ApiWrapper({
handler: async ({ req, authentication }) => {
@@ -104,6 +107,16 @@ export const GET = withV1ApiWrapper({
const result = await createOrUpdateIntegration(environmentId, integration);
if (result) {
try {
const organizationId = await getOrganizationIdFromEnvironmentId(environmentId);
capturePostHogEvent(authentication.user.id, "integration_connected", {
integration_type: "slack",
organization_id: organizationId,
});
} catch (err) {
logger.error({ error: err }, "Failed to capture PostHog integration_connected event for slack");
}
return {
response: Response.redirect(
`${WEBAPP_URL}/environments/${environmentId}/workspace/integrations/slack`

View File

@@ -166,6 +166,11 @@ async function handleOrganizationCreation(ctx: ActionClientCtx, user: TCreatedUs
});
}
capturePostHogEvent(user.id, "organization_created", {
organization_id: organization.id,
is_first_org: true,
});
await updateUser(user.id, {
notificationSettings: {
...user.notificationSettings,

View File

@@ -51,7 +51,7 @@ describe("SSO Providers", () => {
expect((samlProvider as any).authorization?.url).toBe("https://test-app.com/api/auth/saml/authorize");
expect(samlProvider.token).toBe("https://test-app.com/api/auth/saml/token");
expect(samlProvider.userinfo).toBe("https://test-app.com/api/auth/saml/userinfo");
expect(googleProvider.allowDangerousEmailAccountLinking).toBeUndefined();
expect(samlProvider.allowDangerousEmailAccountLinking).toBeUndefined();
expect((googleProvider as any).options?.allowDangerousEmailAccountLinking).toBe(true);
expect(samlProvider.allowDangerousEmailAccountLinking).toBe(true);
});
});

View File

@@ -26,6 +26,7 @@ export const getSSOProviders = () => [
GoogleProvider({
clientId: GOOGLE_CLIENT_ID || "",
clientSecret: GOOGLE_CLIENT_SECRET || "",
allowDangerousEmailAccountLinking: true,
}),
AzureAD({
clientId: AZUREAD_CLIENT_ID || "",
@@ -80,6 +81,7 @@ export const getSSOProviders = () => [
clientId: "dummy",
clientSecret: "dummy",
},
allowDangerousEmailAccountLinking: true,
},
];

View File

@@ -34,8 +34,6 @@ const LINKED_SSO_LOOKUP_SELECT = {
identityProviderAccountId: true,
} as const;
const OAUTH_ACCOUNT_NOT_LINKED_ERROR = "OAuthAccountNotLinked";
const syncSsoAccount = async (userId: string, account: Account, tx?: Prisma.TransactionClient) => {
await upsertAccount(
{
@@ -219,7 +217,7 @@ export const handleSsoCallback = async ({
}
// There is no existing linked account for this identity provider / account id
// check if a user account with this email already exists and fail closed if so
// check if a user account with this email already exists and auto-link it
contextLogger.debug({ lookupType: "email" }, "No linked SSO account found, checking for user by email");
const existingUserWithEmail = await getUserByEmail(user.email);
@@ -230,9 +228,10 @@ export const handleSsoCallback = async ({
existingUserId: existingUserWithEmail.id,
existingIdentityProvider: existingUserWithEmail.identityProvider,
},
"SSO callback blocked: existing user found by email without linked provider account"
"SSO callback successful: existing user found by email"
);
throw new Error(OAUTH_ACCOUNT_NOT_LINKED_ERROR);
await syncSsoAccount(existingUserWithEmail.id, account);
return true;
}
contextLogger.debug(

View File

@@ -338,7 +338,7 @@ describe("handleSsoCallback", () => {
);
});
test("should reject verified email users whose SSO provider is not already linked", async () => {
test("should auto-link verified email users whose SSO provider is not already linked", async () => {
vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
vi.mocked(getUserByEmail).mockResolvedValue({
id: "existing-user-id",
@@ -349,22 +349,26 @@ describe("handleSsoCallback", () => {
isActive: true,
});
await expect(
handleSsoCallback({
user: mockUser,
account: mockAccount,
callbackUrl: "http://localhost:3000",
})
).rejects.toThrow("OAuthAccountNotLinked");
expect(upsertAccount).not.toHaveBeenCalled();
const result = await handleSsoCallback({
user: mockUser,
account: mockAccount,
callbackUrl: "http://localhost:3000",
});
expect(result).toBe(true);
expect(upsertAccount).toHaveBeenCalledWith(
expect.objectContaining({
userId: "existing-user-id",
provider: mockAccount.provider,
providerAccountId: mockAccount.providerAccountId,
}),
undefined
);
expect(updateUser).not.toHaveBeenCalled();
expect(createUser).not.toHaveBeenCalled();
expect(createMembership).not.toHaveBeenCalled();
expect(createBrevoCustomer).not.toHaveBeenCalled();
expect(capturePostHogEvent).not.toHaveBeenCalled();
});
test("should reject unverified email users whose SSO provider is not already linked", async () => {
test("should auto-link unverified email users whose SSO provider is not already linked", async () => {
vi.mocked(prisma.account.findUnique).mockResolvedValue(null);
vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
vi.mocked(getUserByEmail).mockResolvedValue({
@@ -376,22 +380,26 @@ describe("handleSsoCallback", () => {
isActive: true,
});
await expect(
handleSsoCallback({
user: mockUser,
account: mockAccount,
callbackUrl: "http://localhost:3000",
})
).rejects.toThrow("OAuthAccountNotLinked");
expect(upsertAccount).not.toHaveBeenCalled();
const result = await handleSsoCallback({
user: mockUser,
account: mockAccount,
callbackUrl: "http://localhost:3000",
});
expect(result).toBe(true);
expect(upsertAccount).toHaveBeenCalledWith(
expect.objectContaining({
userId: "existing-user-id",
provider: mockAccount.provider,
providerAccountId: mockAccount.providerAccountId,
}),
undefined
);
expect(updateUser).not.toHaveBeenCalled();
expect(createUser).not.toHaveBeenCalled();
expect(createMembership).not.toHaveBeenCalled();
expect(createBrevoCustomer).not.toHaveBeenCalled();
expect(capturePostHogEvent).not.toHaveBeenCalled();
});
test("should reject existing users from a different SSO provider when no link exists", async () => {
test("should auto-link existing users from a different SSO provider when no link exists", async () => {
vi.mocked(prisma.account.findUnique).mockResolvedValue(null);
vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
vi.mocked(getUserByEmail).mockResolvedValue({
@@ -403,14 +411,53 @@ describe("handleSsoCallback", () => {
isActive: true,
});
await expect(
handleSsoCallback({
user: mockUser,
account: mockAccount,
callbackUrl: "http://localhost:3000",
})
).rejects.toThrow("OAuthAccountNotLinked");
expect(upsertAccount).not.toHaveBeenCalled();
const result = await handleSsoCallback({
user: mockUser,
account: mockAccount,
callbackUrl: "http://localhost:3000",
});
expect(result).toBe(true);
expect(upsertAccount).toHaveBeenCalledWith(
expect.objectContaining({
userId: "existing-user-id",
provider: mockAccount.provider,
providerAccountId: mockAccount.providerAccountId,
}),
undefined
);
expect(updateUser).not.toHaveBeenCalled();
expect(createUser).not.toHaveBeenCalled();
});
test("should auto-link same-email users even when the stored legacy provider account id is stale", async () => {
vi.mocked(prisma.account.findUnique).mockResolvedValue(null);
vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
vi.mocked(getUserByEmail).mockResolvedValue({
id: "existing-user-id",
email: mockUser.email,
emailVerified: new Date(),
identityProvider: "google",
identityProviderAccountId: "old-provider-id",
locale: mockUser.locale,
isActive: true,
} as any);
const result = await handleSsoCallback({
user: mockUser,
account: mockAccount,
callbackUrl: "http://localhost:3000",
});
expect(result).toBe(true);
expect(upsertAccount).toHaveBeenCalledWith(
expect.objectContaining({
userId: "existing-user-id",
provider: mockAccount.provider,
providerAccountId: mockAccount.providerAccountId,
}),
undefined
);
expect(updateUser).not.toHaveBeenCalled();
expect(createUser).not.toHaveBeenCalled();
});

View File

@@ -1,401 +0,0 @@
---
title: "Background Job Processing"
description: "How BullMQ works in Formbricks today, including the migrated response pipeline workload."
icon: "code"
---
This page documents the current BullMQ-based background job system in Formbricks and the first real workload that now runs on it: the response pipeline.
## Current State
Formbricks now uses BullMQ as an in-process background job system inside the Next.js web application.
The current implementation includes:
- a shared `@formbricks/jobs` package that owns queue creation, schemas, scheduling, and worker runtime concerns
- a Next.js startup hook that starts one BullMQ worker runtime per Node.js process without blocking app boot
- app-level enqueue helpers for request handlers
- an app-owned BullMQ response pipeline processor that replaces the legacy internal HTTP pipeline route
The first migrated workload is:
- `response-pipeline.process`
This means response-related side effects no longer depend on an internal `fetch()` back into the same app process.
## Why This Exists
The original response pipeline lived behind an internal Next.js route:
```text
apps/web/app/api/(internal)/pipeline
```
That model had a few problems:
- it was tightly coupled to the request lifecycle
- it relied on an internal HTTP hop instead of a typed background-job boundary
- it was harder to observe, retry, and scale safely
BullMQ addresses that by moving post-response work behind a queue while keeping the first version operationally simple for self-hosted users.
## High-Level Architecture
```mermaid
graph TD
A["API route or server code"] --> B["enqueueResponsePipelineEvents()"]
B --> C["getResponseSnapshotForPipeline()"]
B --> D["BackgroundJobProducer.enqueueResponsePipeline()"]
D --> E["BullMQ queue: background-jobs"]
F["instrumentation.ts"] --> G["registerJobsWorker()"]
G --> H["startJobsRuntime()"]
H --> I["BullMQ workers"]
I --> J["response-pipeline.process override"]
J --> K["processResponsePipelineJob()"]
E --> I
E --> L["Redis / Valkey"]
I --> L
```
## Responsibilities By Layer
### App Layer
- `apps/web/app/lib/pipelines.ts`
Owns enqueueing for response pipeline events. It gates queueing, hydrates the response snapshot once, logs failures, and never throws back into request handlers.
- `apps/web/modules/response-pipeline/lib/process-response-pipeline-job.ts`
Owns app-specific execution of response-pipeline jobs.
- `apps/web/modules/response-pipeline/lib/handle-integrations.ts`
Owns Slack, Notion, Airtable, and Google Sheets integration fan-out for the pipeline.
- `apps/web/modules/response-pipeline/lib/telemetry.ts`
Owns telemetry dispatch logic used by the response-created path.
- `apps/web/instrumentation-jobs.ts`
Registers the app-owned response-pipeline handler override with the shared BullMQ runtime and schedules retry after transient startup failures.
- `apps/web/lib/jobs/config.ts`
Turns environment configuration into queueing and worker-bootstrap decisions. Queue producers depend on `REDIS_URL`; worker startup additionally depends on `BULLMQ_WORKER_ENABLED`.
### Shared Jobs Layer
- `packages/jobs/src/types.ts`
Defines typed payload schemas such as `TResponsePipelineJobData`.
- `packages/jobs/src/definitions.ts`
Defines stable job names and payload validation.
- `packages/jobs/src/queue.ts`
Owns producer-side enqueueing and scheduling.
- `packages/jobs/src/runtime.ts`
Starts workers, connects Redis, and handles graceful shutdown.
- `packages/jobs/src/processors/registry.ts`
Validates payloads and dispatches named jobs, applying app-provided handler overrides when registered.
## Response Pipeline Flow
The response pipeline now runs fully in the background worker.
### Enqueueing
When a response is created or updated, the request path calls:
```ts
enqueueResponsePipelineEvents({
environmentId,
surveyId,
responseId,
events,
});
```
That helper:
1. deduplicates requested events
2. checks whether BullMQ queueing is enabled
3. uses the just-written response snapshot when the caller already has it
4. otherwise loads the latest response snapshot once via `getResponseSnapshotForPipeline(responseId)` using an uncached read
5. enqueues one BullMQ job per event with the shared snapshot payload
6. waits for the enqueue attempt to complete, then logs enqueue failures without failing the original request
### Execution
At worker startup, `apps/web/instrumentation-jobs.ts` registers an app-owned override for:
- `response-pipeline.process`
That override delegates to `processResponsePipelineJob(...)`, which performs:
- webhook delivery for all pipeline events
- integrations for `responseFinished`
- response-finished notification emails
- follow-up delivery
- survey auto-complete updates and audit logging
- response-created billing metering
- response-created telemetry dispatch
Current retry semantics are intentionally asymmetric:
- webhook delivery failures fail early BullMQ attempts so retries can happen at the job level
- if webhook delivery is still failing on the final BullMQ attempt, the worker logs that retries are exhausted and continues with the remaining event-specific side effects
- integration, email, telemetry, metering, follow-up, and survey auto-complete failures are logged inside the processor and do not fail the whole job
## Acceptance Criteria Review
### Pipeline Execution
Satisfied.
- New response create/update flows enqueue BullMQ jobs instead of calling an internal HTTP route.
- The job payload contains `environmentId`, `surveyId`, `event`, and an authoritative response snapshot.
- The response pipeline executes inside the BullMQ worker runtime.
### Feature Parity
Mostly satisfied for the legacy response pipeline behavior that existed in the old route.
The migrated BullMQ processor preserves:
- webhook delivery
- integrations
- response-finished emails
- follow-up execution
- survey auto-complete and audit logging
- response-created billing metering
- response-created telemetry
One important behavior change still exists today:
- webhook delivery failures delay the remaining side effects until the final BullMQ attempt
That is closer to the legacy route, because the pipeline eventually continues even if webhook delivery never succeeds. It is still not exact feature parity, though, because the legacy route continued immediately while the BullMQ worker waits until retries are exhausted before it degrades webhook failure into a logged condition.
### Architecture
Satisfied.
- Enqueueing lives in the app layer through `apps/web/app/lib/pipelines.ts`.
- Execution lives in the worker path under `apps/web/modules/response-pipeline/lib`.
- `@formbricks/jobs` stays responsible for queue/runtime concerns and typed job contracts.
### Cleanup
Satisfied.
The legacy internal route has been removed:
```text
apps/web/app/api/(internal)/pipeline/route.ts
```
The runtime path no longer depends on the old internal-route folder structure, and the remaining pipeline-only test mock under that deleted folder has been removed as part of the migration cleanup.
### Reliability
Satisfied at the current ticket scope.
BullMQ jobs use shared default retry behavior:
- `attempts: 3`
- exponential backoff starting at `1000ms`
Failures are logged with structured metadata such as:
- `jobId`
- `attempt`
- `jobName`
- `queueName`
- `environmentId`
- `surveyId`
- `responseId`
Request handlers remain non-blocking:
- if Redis is unavailable
- if queueing is disabled
- if snapshot hydration fails
- if enqueueing fails
the request still completes, and the failure is logged.
Worker startup is also non-blocking:
- Next.js boot does not await BullMQ readiness
- startup failures are logged
- the web app schedules a retry instead of requiring an immediate process restart
### Worker Integration
Satisfied.
The response pipeline is processed by the same BullMQ worker runtime started from Next.js instrumentation. No standalone worker service was introduced as part of this migration.
### Developer Experience
Satisfied.
The public app-level API for request handlers is intentionally small:
- `enqueueResponsePipelineEvents(...)`
This keeps queue names, Redis concerns, and BullMQ details out of response routes.
## Comparison With The Legacy Route
### Previous Implementation
The legacy internal route accepted a full response payload directly and then executed the entire pipeline synchronously inside the route handler.
Key characteristics of that model:
- request handlers performed an internal authenticated `fetch()` back into the same app
- the route received the response payload directly instead of hydrating it from a queue-side snapshot
- webhook failures were logged and did not block the rest of the pipeline
- response-finished integrations, emails, follow-ups, and survey auto-complete ran in the same route execution
- response-created metering was fire-and-forget while telemetry was awaited
### Current BullMQ Implementation
The current branch enqueues a typed snapshot-based BullMQ job and executes the pipeline inside the in-process worker registered from Next.js instrumentation.
Key characteristics of the current model:
- request handlers enqueue directly through `enqueueResponsePipelineEvents(...)`
- handlers now pass the just-written `TResponse` snapshot when they already have it
- callers that do not already have a response snapshot use an uncached pipeline-specific lookup
- worker startup is non-blocking and retries after transient failures
- webhook failures fail early attempts so BullMQ can retry them
- on the final attempt, webhook failures are logged and the remaining side effects continue
- response-created metering is awaited before the BullMQ job completes
### Net Result
Compared to the legacy route, the current branch is:
- architecturally stronger
- safer to scale and operate
- easier to observe through structured job logging
- closer to legacy feature parity than the earlier BullMQ iterations on this branch
The main remaining semantic difference is timing:
- the legacy route continued past webhook failures immediately
- the BullMQ worker now continues only after webhook retries are exhausted
That is an intentional trade-off in the current branch, not an accident.
## Current Queue Model
The queue remains intentionally small:
- queue name: `background-jobs`
- prefix: `formbricks:jobs`
- job names:
- `system.test-log`
- `response-pipeline.process`
The response pipeline is the first production workload on this queue.
## Local Development
Local development works end to end as long as Redis is available and the worker is enabled.
Required inputs:
- `REDIS_URL`
- optionally `BULLMQ_WORKER_ENABLED`
- optionally `BULLMQ_WORKER_COUNT`
- optionally `BULLMQ_WORKER_CONCURRENCY`
Behavior:
- if `REDIS_URL` is missing, queueing is skipped
- if `BULLMQ_WORKER_ENABLED=0`, the worker is not started, but request-side enqueueing can still stay enabled in deployments that point at a separate BullMQ worker
- outside tests, the worker is enabled by default
This makes it possible to develop request flows without hard-failing when Redis is absent, while still supporting full local end-to-end verification when Redis is running.
## Operational Notes
### Logging
The current implementation logs:
- worker startup failures
- Redis connection failures
- enqueue failures
- job failures
- webhook delivery failures
- integration failures
- email delivery failures
- follow-up failures
- survey auto-complete update failures
- metering failures
- telemetry failures
### Shutdown
The worker runtime registers `SIGTERM` and `SIGINT` handlers, closes workers and queue handles, and then closes Redis connections. This keeps shutdown behavior predictable inside the web process.
## Current Limitations
The migration satisfies the ticket, but a few larger architectural limits remain by design.
### Dual-Write Boundary
Response writes happen in Postgres and background jobs are enqueued in Redis. Those are separate systems, so this remains a dual-write boundary.
This means Formbricks currently has:
- non-blocking enqueue semantics
- at-least-once background execution
- no transactional guarantee that the product write and Redis enqueue succeed together
That trade-off was accepted for this BullMQ phase.
### In-Process Workers
Workers run inside the Next.js app process.
That keeps self-hosting simple, but it also means:
- job capacity still shares resources with the web process
- heavy background work is still Node.js-local
- scaling job throughput also scales the app runtime
### Webhook-Gated Retries
Webhook delivery still happens before the rest of the `responseFinished` side effects.
That gives Formbricks job-level retries for webhook delivery, but it also means:
- `responseFinished` side effects do not run on the early retry attempts
- the remaining side effects only continue after webhook retries are exhausted
- this is closer to legacy behavior than failing forever, but it is still not immediate parity
This is the current behavior of the branch and should be evaluated explicitly if we want stricter feature parity with the legacy route.
### Logs-First Observability
The current system has strong structured logging, but it does not yet provide:
- queue dashboards
- retry tooling
- latency metrics
- product-native workflow inspection
Those are future improvements, not blockers for the current migration.
## Recommended Next Steps
Now that the response pipeline is on BullMQ, the most useful next steps are:
1. migrate additional low-risk async workloads behind the same producer/runtime boundary
2. add queue metrics and worker health visibility beyond logs
3. define explicit idempotency rules for side-effect-heavy jobs
4. decide which future workloads should remain Node-local and which should eventually move to a different runtime
## Practical Conclusion
Formbricks now has:
- a production-capable BullMQ foundation
- a real migrated workload
- a clean separation between request-time enqueueing and background execution
The response pipeline migration should be considered complete for the current ticket scope.