fix: restore legacy SSO auto-linking hotfix (#7728 )

fix: backports missing posthog events fix (#7723 )
2026-04-14 12:31:00 -05:00 · 2026-04-13 20:42:33 +05:30 · 2026-04-13 17:36:39 +05:30
12 changed files with 187 additions and 461 deletions
--- a/apps/web/app/api/(internal)/pipeline/lib/posthog.ts
+++ b/apps/web/app/api/(internal)/pipeline/lib/posthog.ts
@@ -0,0 +1,33 @@
+import { capturePostHogEvent } from "@/lib/posthog";
+
+interface SurveyResponsePostHogEventParams {
+  organizationId: string;
+  surveyId: string;
+  surveyType: string;
+  environmentId: string;
+  responseCount: number;
+}
+
+/**
+ * Captures a PostHog event for survey responses at milestones:
+ * 1st response, then every 100th (100, 200, 300, ...).
+ */
+export const captureSurveyResponsePostHogEvent = ({
+  organizationId,
+  surveyId,
+  surveyType,
+  environmentId,
+  responseCount,
+}: SurveyResponsePostHogEventParams): void => {
+  if (responseCount !== 1 && responseCount % 100 !== 0) return;
+
+  capturePostHogEvent(organizationId, "survey_response_received", {
+    survey_id: surveyId,
+    survey_type: surveyType,
+    organization_id: organizationId,
+    environment_id: environmentId,
+    response_count: responseCount,
+    is_first_response: responseCount === 1,
+    milestone: responseCount === 1 ? "first" : String(responseCount),
+  });
+};
--- a/apps/web/app/api/(internal)/pipeline/route.ts
+++ b/apps/web/app/api/(internal)/pipeline/route.ts
@@ -1,7 +1,6 @@
 import { PipelineTriggers, Webhook } from "@prisma/client";
 import { headers } from "next/headers";
 import { v7 as uuidv7 } from "uuid";
-import { createCacheKey } from "@formbricks/cache";
 import { prisma } from "@formbricks/database";
 import { logger } from "@formbricks/logger";
 import { ResourceNotFoundError } from "@formbricks/types/errors";
@@ -9,12 +8,10 @@ import { sendTelemetryEvents } from "@/app/api/(internal)/pipeline/lib/telemetry
 import { ZPipelineInput } from "@/app/api/(internal)/pipeline/types/pipelines";
 import { responses } from "@/app/lib/api/response";
 import { transformErrorToDetails } from "@/app/lib/api/validator";
-import { cache } from "@/lib/cache";
 import { CRON_SECRET, POSTHOG_KEY } from "@/lib/constants";
 import { generateStandardWebhookSignature } from "@/lib/crypto";
 import { getIntegrations } from "@/lib/integration/service";
 import { getOrganizationByEnvironmentId } from "@/lib/organization/service";
-import { capturePostHogEvent } from "@/lib/posthog";
 import { getResponseCountBySurveyId } from "@/lib/response/service";
 import { getSurvey, updateSurvey } from "@/lib/survey/service";
 import { convertDatesInObject } from "@/lib/time";
@@ -27,6 +24,7 @@ import { resolveStorageUrlsInObject } from "@/modules/storage/utils";
 import { sendFollowUpsForResponse } from "@/modules/survey/follow-ups/lib/follow-ups";
 import { FollowUpSendError } from "@/modules/survey/follow-ups/types/follow-up";
 import { handleIntegrations } from "./lib/handleIntegrations";
+import { captureSurveyResponsePostHogEvent } from "./lib/posthog";

 export const POST = async (request: Request) => {
  const requestHeaders = await headers();
@@ -302,25 +300,16 @@ export const POST = async (request: Request) => {
      logger.error({ error, responseId: response.id }, "Failed to record response meter event");
    });

-    // Sampled PostHog tracking: first response + every 100th
    if (POSTHOG_KEY) {
-      const responseCount = await cache.withCache(
-        () => getResponseCountBySurveyId(surveyId),
-        createCacheKey.response.countBySurveyId(surveyId),
-        60 * 1000
-      );
+      const responseCount = await getResponseCountBySurveyId(surveyId);

-      if (responseCount === 1 || responseCount % 100 === 0) {
-        capturePostHogEvent(organization.id, "survey_response_received", {
-          survey_id: surveyId,
-          survey_type: survey.type,
-          organization_id: organization.id,
-          environment_id: environmentId,
-          response_count: responseCount,
-          is_first_response: responseCount === 1,
-          milestone: responseCount === 1 ? "first" : String(responseCount),
-        });
-      }
+      captureSurveyResponsePostHogEvent({
+        organizationId: organization.id,
+        surveyId,
+        surveyType: survey.type,
+        environmentId,
+        responseCount,
+      });
    }

    // Send telemetry events
--- a/apps/web/app/api/google-sheet/callback/route.ts
+++ b/apps/web/app/api/google-sheet/callback/route.ts
@@ -1,5 +1,6 @@
 import { google } from "googleapis";
 import { getServerSession } from "next-auth";
+import { logger } from "@formbricks/logger";
 import { TIntegrationGoogleSheetsConfig } from "@formbricks/types/integration/google-sheet";
 import { responses } from "@/app/lib/api/response";
 import {
@@ -10,6 +11,8 @@ import {
 } from "@/lib/constants";
 import { hasUserEnvironmentAccess } from "@/lib/environment/auth";
 import { createOrUpdateIntegration, getIntegrationByType } from "@/lib/integration/service";
+import { capturePostHogEvent } from "@/lib/posthog";
+import { getOrganizationIdFromEnvironmentId } from "@/lib/utils/helper";
 import { authOptions } from "@/modules/auth/lib/authOptions";

 export const GET = async (req: Request) => {
@@ -82,6 +85,16 @@ export const GET = async (req: Request) => {

  const result = await createOrUpdateIntegration(environmentId, googleSheetIntegration);
  if (result) {
+    try {
+      const organizationId = await getOrganizationIdFromEnvironmentId(environmentId);
+      capturePostHogEvent(session.user.id, "integration_connected", {
+        integration_type: "googleSheets",
+        organization_id: organizationId,
+      });
+    } catch (err) {
+      logger.error({ error: err }, "Failed to capture PostHog integration_connected event for googleSheets");
+    }
+
    return Response.redirect(
      `${WEBAPP_URL}/environments/${environmentId}/workspace/integrations/google-sheets`
    );
--- a/apps/web/app/api/v1/integrations/airtable/callback/route.ts
+++ b/apps/web/app/api/v1/integrations/airtable/callback/route.ts
@@ -6,6 +6,8 @@ import { fetchAirtableAuthToken } from "@/lib/airtable/service";
 import { AIRTABLE_CLIENT_ID, WEBAPP_URL } from "@/lib/constants";
 import { hasUserEnvironmentAccess } from "@/lib/environment/auth";
 import { createOrUpdateIntegration } from "@/lib/integration/service";
+import { capturePostHogEvent } from "@/lib/posthog";
+import { getOrganizationIdFromEnvironmentId } from "@/lib/utils/helper";

 const getEmail = async (token: string) => {
  const req_ = await fetch("https://api.airtable.com/v0/meta/whoami", {
@@ -86,6 +88,17 @@ export const GET = withV1ApiWrapper({
        },
      };
      await createOrUpdateIntegration(environmentId, airtableIntegrationInput);
+
+      try {
+        const organizationId = await getOrganizationIdFromEnvironmentId(environmentId);
+        capturePostHogEvent(authentication.user.id, "integration_connected", {
+          integration_type: "airtable",
+          organization_id: organizationId,
+        });
+      } catch (err) {
+        logger.error({ error: err }, "Failed to capture PostHog integration_connected event for airtable");
+      }
+
      return {
        response: Response.redirect(
          `${WEBAPP_URL}/environments/${environmentId}/workspace/integrations/airtable`
--- a/apps/web/app/api/v1/integrations/notion/callback/route.ts
+++ b/apps/web/app/api/v1/integrations/notion/callback/route.ts
@@ -1,3 +1,4 @@
+import { logger } from "@formbricks/logger";
 import { TIntegrationNotionConfigData, TIntegrationNotionInput } from "@formbricks/types/integration/notion";
 import { responses } from "@/app/lib/api/response";
 import { withV1ApiWrapper } from "@/app/lib/api/with-api-logging";
@@ -11,6 +12,8 @@ import {
 import { symmetricEncrypt } from "@/lib/crypto";
 import { hasUserEnvironmentAccess } from "@/lib/environment/auth";
 import { createOrUpdateIntegration, getIntegrationByType } from "@/lib/integration/service";
+import { capturePostHogEvent } from "@/lib/posthog";
+import { getOrganizationIdFromEnvironmentId } from "@/lib/utils/helper";

 export const GET = withV1ApiWrapper({
  handler: async ({ req, authentication }) => {
@@ -96,6 +99,16 @@ export const GET = withV1ApiWrapper({
      const result = await createOrUpdateIntegration(environmentId, notionIntegration);

      if (result) {
+        try {
+          const organizationId = await getOrganizationIdFromEnvironmentId(environmentId);
+          capturePostHogEvent(authentication.user.id, "integration_connected", {
+            integration_type: "notion",
+            organization_id: organizationId,
+          });
+        } catch (err) {
+          logger.error({ error: err }, "Failed to capture PostHog integration_connected event for notion");
+        }
+
        return {
          response: Response.redirect(
            `${WEBAPP_URL}/environments/${environmentId}/workspace/integrations/notion`
--- a/apps/web/app/api/v1/integrations/slack/callback/route.ts
+++ b/apps/web/app/api/v1/integrations/slack/callback/route.ts
@@ -1,3 +1,4 @@
+import { logger } from "@formbricks/logger";
 import {
  TIntegrationSlackConfig,
  TIntegrationSlackConfigData,
@@ -8,6 +9,8 @@ import { withV1ApiWrapper } from "@/app/lib/api/with-api-logging";
 import { SLACK_CLIENT_ID, SLACK_CLIENT_SECRET, SLACK_REDIRECT_URI, WEBAPP_URL } from "@/lib/constants";
 import { hasUserEnvironmentAccess } from "@/lib/environment/auth";
 import { createOrUpdateIntegration, getIntegrationByType } from "@/lib/integration/service";
+import { capturePostHogEvent } from "@/lib/posthog";
+import { getOrganizationIdFromEnvironmentId } from "@/lib/utils/helper";

 export const GET = withV1ApiWrapper({
  handler: async ({ req, authentication }) => {
@@ -104,6 +107,16 @@ export const GET = withV1ApiWrapper({
      const result = await createOrUpdateIntegration(environmentId, integration);

      if (result) {
+        try {
+          const organizationId = await getOrganizationIdFromEnvironmentId(environmentId);
+          capturePostHogEvent(authentication.user.id, "integration_connected", {
+            integration_type: "slack",
+            organization_id: organizationId,
+          });
+        } catch (err) {
+          logger.error({ error: err }, "Failed to capture PostHog integration_connected event for slack");
+        }
+
        return {
          response: Response.redirect(
            `${WEBAPP_URL}/environments/${environmentId}/workspace/integrations/slack`
--- a/apps/web/modules/auth/signup/actions.ts
+++ b/apps/web/modules/auth/signup/actions.ts
@@ -166,6 +166,11 @@ async function handleOrganizationCreation(ctx: ActionClientCtx, user: TCreatedUs
    });
  }

+  capturePostHogEvent(user.id, "organization_created", {
+    organization_id: organization.id,
+    is_first_org: true,
+  });
+
  await updateUser(user.id, {
    notificationSettings: {
      ...user.notificationSettings,
--- a/apps/web/modules/ee/sso/lib/providers.test.ts
+++ b/apps/web/modules/ee/sso/lib/providers.test.ts
@@ -51,7 +51,7 @@ describe("SSO Providers", () => {
    expect((samlProvider as any).authorization?.url).toBe("https://test-app.com/api/auth/saml/authorize");
    expect(samlProvider.token).toBe("https://test-app.com/api/auth/saml/token");
    expect(samlProvider.userinfo).toBe("https://test-app.com/api/auth/saml/userinfo");
-    expect(googleProvider.allowDangerousEmailAccountLinking).toBeUndefined();
-    expect(samlProvider.allowDangerousEmailAccountLinking).toBeUndefined();
+    expect((googleProvider as any).options?.allowDangerousEmailAccountLinking).toBe(true);
+    expect(samlProvider.allowDangerousEmailAccountLinking).toBe(true);
  });
 });
--- a/apps/web/modules/ee/sso/lib/providers.ts
+++ b/apps/web/modules/ee/sso/lib/providers.ts
@@ -26,6 +26,7 @@ export const getSSOProviders = () => [
  GoogleProvider({
    clientId: GOOGLE_CLIENT_ID || "",
    clientSecret: GOOGLE_CLIENT_SECRET || "",
+    allowDangerousEmailAccountLinking: true,
  }),
  AzureAD({
    clientId: AZUREAD_CLIENT_ID || "",
@@ -80,6 +81,7 @@ export const getSSOProviders = () => [
      clientId: "dummy",
      clientSecret: "dummy",
    },
+    allowDangerousEmailAccountLinking: true,
  },
 ];

--- a/apps/web/modules/ee/sso/lib/sso-handlers.ts
+++ b/apps/web/modules/ee/sso/lib/sso-handlers.ts
@@ -34,8 +34,6 @@ const LINKED_SSO_LOOKUP_SELECT = {
  identityProviderAccountId: true,
 } as const;

-const OAUTH_ACCOUNT_NOT_LINKED_ERROR = "OAuthAccountNotLinked";
-
 const syncSsoAccount = async (userId: string, account: Account, tx?: Prisma.TransactionClient) => {
  await upsertAccount(
    {
@@ -219,7 +217,7 @@ export const handleSsoCallback = async ({
    }

    // There is no existing linked account for this identity provider / account id
-    // check if a user account with this email already exists and fail closed if so
+    // check if a user account with this email already exists and auto-link it
    contextLogger.debug({ lookupType: "email" }, "No linked SSO account found, checking for user by email");

    const existingUserWithEmail = await getUserByEmail(user.email);
@@ -230,9 +228,10 @@ export const handleSsoCallback = async ({
          existingUserId: existingUserWithEmail.id,
          existingIdentityProvider: existingUserWithEmail.identityProvider,
        },
-        "SSO callback blocked: existing user found by email without linked provider account"
+        "SSO callback successful: existing user found by email"
      );
-      throw new Error(OAUTH_ACCOUNT_NOT_LINKED_ERROR);
+      await syncSsoAccount(existingUserWithEmail.id, account);
+      return true;
    }

    contextLogger.debug(
--- a/apps/web/modules/ee/sso/lib/tests/sso-handlers.test.ts
+++ b/apps/web/modules/ee/sso/lib/tests/sso-handlers.test.ts
@@ -338,7 +338,7 @@ describe("handleSsoCallback", () => {
      );
    });

-    test("should reject verified email users whose SSO provider is not already linked", async () => {
+    test("should auto-link verified email users whose SSO provider is not already linked", async () => {
      vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
      vi.mocked(getUserByEmail).mockResolvedValue({
        id: "existing-user-id",
@@ -349,22 +349,26 @@ describe("handleSsoCallback", () => {
        isActive: true,
      });

-      await expect(
-        handleSsoCallback({
-          user: mockUser,
-          account: mockAccount,
-          callbackUrl: "http://localhost:3000",
-        })
-      ).rejects.toThrow("OAuthAccountNotLinked");
-      expect(upsertAccount).not.toHaveBeenCalled();
+      const result = await handleSsoCallback({
+        user: mockUser,
+        account: mockAccount,
+        callbackUrl: "http://localhost:3000",
+      });
+
+      expect(result).toBe(true);
+      expect(upsertAccount).toHaveBeenCalledWith(
+        expect.objectContaining({
+          userId: "existing-user-id",
+          provider: mockAccount.provider,
+          providerAccountId: mockAccount.providerAccountId,
+        }),
+        undefined
+      );
      expect(updateUser).not.toHaveBeenCalled();
      expect(createUser).not.toHaveBeenCalled();
-      expect(createMembership).not.toHaveBeenCalled();
-      expect(createBrevoCustomer).not.toHaveBeenCalled();
-      expect(capturePostHogEvent).not.toHaveBeenCalled();
    });

-    test("should reject unverified email users whose SSO provider is not already linked", async () => {
+    test("should auto-link unverified email users whose SSO provider is not already linked", async () => {
      vi.mocked(prisma.account.findUnique).mockResolvedValue(null);
      vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
      vi.mocked(getUserByEmail).mockResolvedValue({
@@ -376,22 +380,26 @@ describe("handleSsoCallback", () => {
        isActive: true,
      });

-      await expect(
-        handleSsoCallback({
-          user: mockUser,
-          account: mockAccount,
-          callbackUrl: "http://localhost:3000",
-        })
-      ).rejects.toThrow("OAuthAccountNotLinked");
-      expect(upsertAccount).not.toHaveBeenCalled();
+      const result = await handleSsoCallback({
+        user: mockUser,
+        account: mockAccount,
+        callbackUrl: "http://localhost:3000",
+      });
+
+      expect(result).toBe(true);
+      expect(upsertAccount).toHaveBeenCalledWith(
+        expect.objectContaining({
+          userId: "existing-user-id",
+          provider: mockAccount.provider,
+          providerAccountId: mockAccount.providerAccountId,
+        }),
+        undefined
+      );
      expect(updateUser).not.toHaveBeenCalled();
      expect(createUser).not.toHaveBeenCalled();
-      expect(createMembership).not.toHaveBeenCalled();
-      expect(createBrevoCustomer).not.toHaveBeenCalled();
-      expect(capturePostHogEvent).not.toHaveBeenCalled();
    });

-    test("should reject existing users from a different SSO provider when no link exists", async () => {
+    test("should auto-link existing users from a different SSO provider when no link exists", async () => {
      vi.mocked(prisma.account.findUnique).mockResolvedValue(null);
      vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
      vi.mocked(getUserByEmail).mockResolvedValue({
@@ -403,14 +411,53 @@ describe("handleSsoCallback", () => {
        isActive: true,
      });

-      await expect(
-        handleSsoCallback({
-          user: mockUser,
-          account: mockAccount,
-          callbackUrl: "http://localhost:3000",
-        })
-      ).rejects.toThrow("OAuthAccountNotLinked");
-      expect(upsertAccount).not.toHaveBeenCalled();
+      const result = await handleSsoCallback({
+        user: mockUser,
+        account: mockAccount,
+        callbackUrl: "http://localhost:3000",
+      });
+
+      expect(result).toBe(true);
+      expect(upsertAccount).toHaveBeenCalledWith(
+        expect.objectContaining({
+          userId: "existing-user-id",
+          provider: mockAccount.provider,
+          providerAccountId: mockAccount.providerAccountId,
+        }),
+        undefined
+      );
+      expect(updateUser).not.toHaveBeenCalled();
+      expect(createUser).not.toHaveBeenCalled();
+    });
+
+    test("should auto-link same-email users even when the stored legacy provider account id is stale", async () => {
+      vi.mocked(prisma.account.findUnique).mockResolvedValue(null);
+      vi.mocked(prisma.user.findFirst).mockResolvedValue(null);
+      vi.mocked(getUserByEmail).mockResolvedValue({
+        id: "existing-user-id",
+        email: mockUser.email,
+        emailVerified: new Date(),
+        identityProvider: "google",
+        identityProviderAccountId: "old-provider-id",
+        locale: mockUser.locale,
+        isActive: true,
+      } as any);
+
+      const result = await handleSsoCallback({
+        user: mockUser,
+        account: mockAccount,
+        callbackUrl: "http://localhost:3000",
+      });
+
+      expect(result).toBe(true);
+      expect(upsertAccount).toHaveBeenCalledWith(
+        expect.objectContaining({
+          userId: "existing-user-id",
+          provider: mockAccount.provider,
+          providerAccountId: mockAccount.providerAccountId,
+        }),
+        undefined
+      );
      expect(updateUser).not.toHaveBeenCalled();
      expect(createUser).not.toHaveBeenCalled();
    });
--- a/docs/development/technical-handbook/background-job-processing.mdx
+++ b/docs/development/technical-handbook/background-job-processing.mdx
@@ -1,401 +0,0 @@
---
-title: "Background Job Processing"
-description: "How BullMQ works in Formbricks today, including the migrated response pipeline workload."
-icon: "code"
---
-
-This page documents the current BullMQ-based background job system in Formbricks and the first real workload that now runs on it: the response pipeline.
-
-## Current State
-
-Formbricks now uses BullMQ as an in-process background job system inside the Next.js web application.
-
-The current implementation includes:
-
- a shared `@formbricks/jobs` package that owns queue creation, schemas, scheduling, and worker runtime concerns
- a Next.js startup hook that starts one BullMQ worker runtime per Node.js process without blocking app boot
- app-level enqueue helpers for request handlers
- an app-owned BullMQ response pipeline processor that replaces the legacy internal HTTP pipeline route
-
-The first migrated workload is:
-
- `response-pipeline.process`
-
-This means response-related side effects no longer depend on an internal `fetch()` back into the same app process.
-
-## Why This Exists
-
-The original response pipeline lived behind an internal Next.js route:
-
-```text
-apps/web/app/api/(internal)/pipeline
-```
-
-That model had a few problems:
-
- it was tightly coupled to the request lifecycle
- it relied on an internal HTTP hop instead of a typed background-job boundary
- it was harder to observe, retry, and scale safely
-
-BullMQ addresses that by moving post-response work behind a queue while keeping the first version operationally simple for self-hosted users.
-
-## High-Level Architecture
-
-```mermaid
-graph TD
-    A["API route or server code"] --> B["enqueueResponsePipelineEvents()"]
-    B --> C["getResponseSnapshotForPipeline()"]
-    B --> D["BackgroundJobProducer.enqueueResponsePipeline()"]
-    D --> E["BullMQ queue: background-jobs"]
-    F["instrumentation.ts"] --> G["registerJobsWorker()"]
-    G --> H["startJobsRuntime()"]
-    H --> I["BullMQ workers"]
-    I --> J["response-pipeline.process override"]
-    J --> K["processResponsePipelineJob()"]
-    E --> I
-    E --> L["Redis / Valkey"]
-    I --> L
-```
-
-## Responsibilities By Layer
-
-### App Layer
-
- `apps/web/app/lib/pipelines.ts`
-  Owns enqueueing for response pipeline events. It gates queueing, hydrates the response snapshot once, logs failures, and never throws back into request handlers.
- `apps/web/modules/response-pipeline/lib/process-response-pipeline-job.ts`
-  Owns app-specific execution of response-pipeline jobs.
- `apps/web/modules/response-pipeline/lib/handle-integrations.ts`
-  Owns Slack, Notion, Airtable, and Google Sheets integration fan-out for the pipeline.
- `apps/web/modules/response-pipeline/lib/telemetry.ts`
-  Owns telemetry dispatch logic used by the response-created path.
- `apps/web/instrumentation-jobs.ts`
-  Registers the app-owned response-pipeline handler override with the shared BullMQ runtime and schedules retry after transient startup failures.
- `apps/web/lib/jobs/config.ts`
-  Turns environment configuration into queueing and worker-bootstrap decisions. Queue producers depend on `REDIS_URL`; worker startup additionally depends on `BULLMQ_WORKER_ENABLED`.
-
-### Shared Jobs Layer
-
- `packages/jobs/src/types.ts`
-  Defines typed payload schemas such as `TResponsePipelineJobData`.
- `packages/jobs/src/definitions.ts`
-  Defines stable job names and payload validation.
- `packages/jobs/src/queue.ts`
-  Owns producer-side enqueueing and scheduling.
- `packages/jobs/src/runtime.ts`
-  Starts workers, connects Redis, and handles graceful shutdown.
- `packages/jobs/src/processors/registry.ts`
-  Validates payloads and dispatches named jobs, applying app-provided handler overrides when registered.
-
-## Response Pipeline Flow
-
-The response pipeline now runs fully in the background worker.
-
-### Enqueueing
-
-When a response is created or updated, the request path calls:
-
-```ts
-enqueueResponsePipelineEvents({
-  environmentId,
-  surveyId,
-  responseId,
-  events,
-});
-```
-
-That helper:
-
-1. deduplicates requested events
-2. checks whether BullMQ queueing is enabled
-3. uses the just-written response snapshot when the caller already has it
-4. otherwise loads the latest response snapshot once via `getResponseSnapshotForPipeline(responseId)` using an uncached read
-5. enqueues one BullMQ job per event with the shared snapshot payload
-6. waits for the enqueue attempt to complete, then logs enqueue failures without failing the original request
-
-### Execution
-
-At worker startup, `apps/web/instrumentation-jobs.ts` registers an app-owned override for:
-
- `response-pipeline.process`
-
-That override delegates to `processResponsePipelineJob(...)`, which performs:
-
- webhook delivery for all pipeline events
- integrations for `responseFinished`
- response-finished notification emails
- follow-up delivery
- survey auto-complete updates and audit logging
- response-created billing metering
- response-created telemetry dispatch
-
-Current retry semantics are intentionally asymmetric:
-
- webhook delivery failures fail early BullMQ attempts so retries can happen at the job level
- if webhook delivery is still failing on the final BullMQ attempt, the worker logs that retries are exhausted and continues with the remaining event-specific side effects
- integration, email, telemetry, metering, follow-up, and survey auto-complete failures are logged inside the processor and do not fail the whole job
-
-## Acceptance Criteria Review
-
-### Pipeline Execution
-
-Satisfied.
-
- New response create/update flows enqueue BullMQ jobs instead of calling an internal HTTP route.
- The job payload contains `environmentId`, `surveyId`, `event`, and an authoritative response snapshot.
- The response pipeline executes inside the BullMQ worker runtime.
-
-### Feature Parity
-
-Mostly satisfied for the legacy response pipeline behavior that existed in the old route.
-
-The migrated BullMQ processor preserves:
-
- webhook delivery
- integrations
- response-finished emails
- follow-up execution
- survey auto-complete and audit logging
- response-created billing metering
- response-created telemetry
-
-One important behavior change still exists today:
-
- webhook delivery failures delay the remaining side effects until the final BullMQ attempt
-
-That is closer to the legacy route, because the pipeline eventually continues even if webhook delivery never succeeds. It is still not exact feature parity, though, because the legacy route continued immediately while the BullMQ worker waits until retries are exhausted before it degrades webhook failure into a logged condition.
-
-### Architecture
-
-Satisfied.
-
- Enqueueing lives in the app layer through `apps/web/app/lib/pipelines.ts`.
- Execution lives in the worker path under `apps/web/modules/response-pipeline/lib`.
- `@formbricks/jobs` stays responsible for queue/runtime concerns and typed job contracts.
-
-### Cleanup
-
-Satisfied.
-
-The legacy internal route has been removed:
-
-```text
-apps/web/app/api/(internal)/pipeline/route.ts
-```
-
-The runtime path no longer depends on the old internal-route folder structure, and the remaining pipeline-only test mock under that deleted folder has been removed as part of the migration cleanup.
-
-### Reliability
-
-Satisfied at the current ticket scope.
-
-BullMQ jobs use shared default retry behavior:
-
- `attempts: 3`
- exponential backoff starting at `1000ms`
-
-Failures are logged with structured metadata such as:
-
- `jobId`
- `attempt`
- `jobName`
- `queueName`
- `environmentId`
- `surveyId`
- `responseId`
-
-Request handlers remain non-blocking:
-
- if Redis is unavailable
- if queueing is disabled
- if snapshot hydration fails
- if enqueueing fails
-
-the request still completes, and the failure is logged.
-
-Worker startup is also non-blocking:
-
- Next.js boot does not await BullMQ readiness
- startup failures are logged
- the web app schedules a retry instead of requiring an immediate process restart
-
-### Worker Integration
-
-Satisfied.
-
-The response pipeline is processed by the same BullMQ worker runtime started from Next.js instrumentation. No standalone worker service was introduced as part of this migration.
-
-### Developer Experience
-
-Satisfied.
-
-The public app-level API for request handlers is intentionally small:
-
- `enqueueResponsePipelineEvents(...)`
-
-This keeps queue names, Redis concerns, and BullMQ details out of response routes.
-
-## Comparison With The Legacy Route
-
-### Previous Implementation
-
-The legacy internal route accepted a full response payload directly and then executed the entire pipeline synchronously inside the route handler.
-
-Key characteristics of that model:
-
- request handlers performed an internal authenticated `fetch()` back into the same app
- the route received the response payload directly instead of hydrating it from a queue-side snapshot
- webhook failures were logged and did not block the rest of the pipeline
- response-finished integrations, emails, follow-ups, and survey auto-complete ran in the same route execution
- response-created metering was fire-and-forget while telemetry was awaited
-
-### Current BullMQ Implementation
-
-The current branch enqueues a typed snapshot-based BullMQ job and executes the pipeline inside the in-process worker registered from Next.js instrumentation.
-
-Key characteristics of the current model:
-
- request handlers enqueue directly through `enqueueResponsePipelineEvents(...)`
- handlers now pass the just-written `TResponse` snapshot when they already have it
- callers that do not already have a response snapshot use an uncached pipeline-specific lookup
- worker startup is non-blocking and retries after transient failures
- webhook failures fail early attempts so BullMQ can retry them
- on the final attempt, webhook failures are logged and the remaining side effects continue
- response-created metering is awaited before the BullMQ job completes
-
-### Net Result
-
-Compared to the legacy route, the current branch is:
-
- architecturally stronger
- safer to scale and operate
- easier to observe through structured job logging
- closer to legacy feature parity than the earlier BullMQ iterations on this branch
-
-The main remaining semantic difference is timing:
-
- the legacy route continued past webhook failures immediately
- the BullMQ worker now continues only after webhook retries are exhausted
-
-That is an intentional trade-off in the current branch, not an accident.
-
-## Current Queue Model
-
-The queue remains intentionally small:
-
- queue name: `background-jobs`
- prefix: `formbricks:jobs`
- job names:
-  - `system.test-log`
-  - `response-pipeline.process`
-
-The response pipeline is the first production workload on this queue.
-
-## Local Development
-
-Local development works end to end as long as Redis is available and the worker is enabled.
-
-Required inputs:
-
- `REDIS_URL`
- optionally `BULLMQ_WORKER_ENABLED`
- optionally `BULLMQ_WORKER_COUNT`
- optionally `BULLMQ_WORKER_CONCURRENCY`
-
-Behavior:
-
- if `REDIS_URL` is missing, queueing is skipped
- if `BULLMQ_WORKER_ENABLED=0`, the worker is not started, but request-side enqueueing can still stay enabled in deployments that point at a separate BullMQ worker
- outside tests, the worker is enabled by default
-
-This makes it possible to develop request flows without hard-failing when Redis is absent, while still supporting full local end-to-end verification when Redis is running.
-
-## Operational Notes
-
-### Logging
-
-The current implementation logs:
-
- worker startup failures
- Redis connection failures
- enqueue failures
- job failures
- webhook delivery failures
- integration failures
- email delivery failures
- follow-up failures
- survey auto-complete update failures
- metering failures
- telemetry failures
-
-### Shutdown
-
-The worker runtime registers `SIGTERM` and `SIGINT` handlers, closes workers and queue handles, and then closes Redis connections. This keeps shutdown behavior predictable inside the web process.
-
-## Current Limitations
-
-The migration satisfies the ticket, but a few larger architectural limits remain by design.
-
-### Dual-Write Boundary
-
-Response writes happen in Postgres and background jobs are enqueued in Redis. Those are separate systems, so this remains a dual-write boundary.
-
-This means Formbricks currently has:
-
- non-blocking enqueue semantics
- at-least-once background execution
- no transactional guarantee that the product write and Redis enqueue succeed together
-
-That trade-off was accepted for this BullMQ phase.
-
-### In-Process Workers
-
-Workers run inside the Next.js app process.
-
-That keeps self-hosting simple, but it also means:
-
- job capacity still shares resources with the web process
- heavy background work is still Node.js-local
- scaling job throughput also scales the app runtime
-
-### Webhook-Gated Retries
-
-Webhook delivery still happens before the rest of the `responseFinished` side effects.
-
-That gives Formbricks job-level retries for webhook delivery, but it also means:
-
- `responseFinished` side effects do not run on the early retry attempts
- the remaining side effects only continue after webhook retries are exhausted
- this is closer to legacy behavior than failing forever, but it is still not immediate parity
-
-This is the current behavior of the branch and should be evaluated explicitly if we want stricter feature parity with the legacy route.
-
-### Logs-First Observability
-
-The current system has strong structured logging, but it does not yet provide:
-
- queue dashboards
- retry tooling
- latency metrics
- product-native workflow inspection
-
-Those are future improvements, not blockers for the current migration.
-
-## Recommended Next Steps
-
-Now that the response pipeline is on BullMQ, the most useful next steps are:
-
-1. migrate additional low-risk async workloads behind the same producer/runtime boundary
-2. add queue metrics and worker health visibility beyond logs
-3. define explicit idempotency rules for side-effect-heavy jobs
-4. decide which future workloads should remain Node-local and which should eventually move to a different runtime
-
-## Practical Conclusion
-
-Formbricks now has:
-
- a production-capable BullMQ foundation
- a real migrated workload
- a clean separation between request-time enqueueing and background execution
-
-The response pipeline migration should be considered complete for the current ticket scope.
Author	SHA1	Message	Date
Bhagya Amarasinghe	8204a5c652	fix: restore legacy SSO auto-linking hotfix (#7728 )	2026-04-13 20:42:33 +05:30
Anshuman Pandey	e823e10f9a	fix: backports missing posthog events fix (#7723 )	2026-04-13 17:36:39 +05:30