migration

This commit is contained in:
pandeymangg
2026-02-09 16:17:31 +05:30
parent 562ac70067
commit f4f6b240c0
4 changed files with 482 additions and 85 deletions

View File

@@ -4,6 +4,14 @@ import { TBaseFilters, TSegmentWithSurveyNames } from "@formbricks/types/segment
import { getSegment } from "../segments";
import { segmentFilterToPrismaQuery } from "./prisma-query";
const mockQueryRawUnsafe = vi.fn();
vi.mock("@formbricks/database", () => ({
prisma: {
$queryRawUnsafe: (...args: unknown[]) => mockQueryRawUnsafe(...args),
},
}));
vi.mock("../segments", () => ({
getSegment: vi.fn(),
}));
@@ -18,6 +26,8 @@ describe("segmentFilterToPrismaQuery", () => {
beforeEach(() => {
vi.clearAllMocks();
// Default mock: number filter raw SQL returns one matching contact
mockQueryRawUnsafe.mockResolvedValue([{ contactId: "mock-contact-1" }]);
});
afterEach(() => {
@@ -135,16 +145,7 @@ describe("segmentFilterToPrismaQuery", () => {
},
},
],
OR: [
{
attributes: {
some: {
attributeKey: { key: "age" },
valueNumber: { gt: 30 },
},
},
},
],
OR: [{ id: { in: ["mock-contact-1"] } }],
});
}
});
@@ -756,12 +757,7 @@ describe("segmentFilterToPrismaQuery", () => {
});
expect(subgroup.AND[0].AND[2]).toStrictEqual({
attributes: {
some: {
attributeKey: { key: "age" },
valueNumber: { gte: 18 },
},
},
id: { in: ["mock-contact-1"] },
});
// Segment inclusion
@@ -1162,24 +1158,10 @@ describe("segmentFilterToPrismaQuery", () => {
},
});
// Second subgroup (numeric operators)
// Second subgroup (numeric operators - now use raw SQL subquery returning contact IDs)
const secondSubgroup = whereClause.AND?.[0];
expect(secondSubgroup.AND[1].AND).toContainEqual({
attributes: {
some: {
attributeKey: { key: "loginCount" },
valueNumber: { gt: 5 },
},
},
});
expect(secondSubgroup.AND[1].AND).toContainEqual({
attributes: {
some: {
attributeKey: { key: "purchaseAmount" },
valueNumber: { lte: 1000 },
},
},
id: { in: ["mock-contact-1"] },
});
// Third subgroup (negation operators in OR clause)
@@ -1251,7 +1233,10 @@ describe("segmentFilterToPrismaQuery", () => {
attributes: {
some: {
attributeKey: { key: "purchaseDate" },
valueDate: { lt: new Date(targetDate) },
OR: [
{ valueDate: { lt: new Date(targetDate) } },
{ valueDate: null, value: { lt: new Date(targetDate).toISOString() } },
],
},
},
},
@@ -1292,7 +1277,10 @@ describe("segmentFilterToPrismaQuery", () => {
attributes: {
some: {
attributeKey: { key: "signupDate" },
valueDate: { gt: new Date(targetDate) },
OR: [
{ valueDate: { gt: new Date(targetDate) } },
{ valueDate: null, value: { gt: new Date(targetDate).toISOString() } },
],
},
},
},
@@ -1334,7 +1322,16 @@ describe("segmentFilterToPrismaQuery", () => {
attributes: {
some: {
attributeKey: { key: "lastActivityDate" },
valueDate: { gte: new Date(startDate), lte: new Date(endDate) },
OR: [
{ valueDate: { gte: new Date(startDate), lte: new Date(endDate) } },
{
valueDate: null,
value: {
gte: new Date(startDate).toISOString(),
lte: new Date(endDate).toISOString(),
},
},
],
},
},
},
@@ -1369,10 +1366,12 @@ describe("segmentFilterToPrismaQuery", () => {
if (result.ok) {
const whereClause = result.data.whereClause as Prisma.ContactWhereInput;
const filterClause = (whereClause.AND as Prisma.ContactWhereInput[])?.[1];
// isSameDay should generate gte: startOfDay and lte: endOfDay
// isSameDay should generate OR with valueDate and string fallback
const dateAttr = (filterClause as unknown as any)?.AND?.[0]?.attributes;
expect(dateAttr).toBeDefined();
const valueDate = dateAttr?.some?.valueDate;
const orConditions = dateAttr?.some?.OR;
expect(orConditions).toHaveLength(2);
const valueDate = orConditions?.[0]?.valueDate;
expect(valueDate).toHaveProperty("gte");
expect(valueDate).toHaveProperty("lte");
// Verify the date range is for the same day
@@ -1418,7 +1417,9 @@ describe("segmentFilterToPrismaQuery", () => {
const filterClause = (whereClause.AND as Prisma.ContactWhereInput[])?.[1];
const dateAttr = (filterClause as unknown as any)?.AND?.[0]?.attributes;
expect(dateAttr).toBeDefined();
const valueDate = dateAttr?.some?.valueDate;
const orConditions = dateAttr?.some?.OR;
expect(orConditions).toHaveLength(2);
const valueDate = orConditions?.[0]?.valueDate;
expect(valueDate).toHaveProperty("lt");
// The threshold should be approximately 30 days ago
const threshold = valueDate.lt as Date;
@@ -1458,7 +1459,9 @@ describe("segmentFilterToPrismaQuery", () => {
const filterClause = (whereClause.AND as Prisma.ContactWhereInput[])?.[1];
const dateAttr = (filterClause as unknown as any)?.AND?.[0]?.attributes;
expect(dateAttr).toBeDefined();
const valueDate = dateAttr?.some?.valueDate;
const orConditions = dateAttr?.some?.OR;
expect(orConditions).toHaveLength(2);
const valueDate = orConditions?.[0]?.valueDate;
expect(valueDate).toHaveProperty("gte");
// The threshold should be approximately 2 weeks (14 days) ago
const threshold = valueDate.gte as Date;
@@ -1497,7 +1500,9 @@ describe("segmentFilterToPrismaQuery", () => {
const filterClause = (whereClause.AND as Prisma.ContactWhereInput[])?.[1];
const dateAttr = (filterClause as unknown as any)?.AND?.[0]?.attributes;
expect(dateAttr).toBeDefined();
const valueDate = dateAttr?.some?.valueDate;
const orConditions = dateAttr?.some?.OR;
expect(orConditions).toHaveLength(2);
const valueDate = orConditions?.[0]?.valueDate;
expect(valueDate).toHaveProperty("lt");
// The threshold should be approximately 6 months ago
const threshold = valueDate.lt as Date;
@@ -1555,15 +1560,17 @@ describe("segmentFilterToPrismaQuery", () => {
const andConditions = (filterClause as unknown as any).AND as Prisma.ContactWhereInput[];
expect(andConditions).toHaveLength(2);
// First filter: isAfter
// First filter: isAfter (with OR fallback for transition)
const firstFilter = andConditions[0] as unknown as any;
expect(firstFilter.attributes.some.attributeKey.key).toBe("signupDate");
expect(firstFilter.attributes.some.valueDate.gt).toEqual(new Date("2024-01-01"));
expect(firstFilter.attributes.some.OR[0].valueDate.gt).toEqual(new Date("2024-01-01"));
expect(firstFilter.attributes.some.OR[1].valueDate).toBeNull();
expect(firstFilter.attributes.some.OR[1].value.gt).toBe(new Date("2024-01-01").toISOString());
// Second filter: isNewerThan
// Second filter: isNewerThan (with OR fallback for transition)
const secondFilter = andConditions[1] as unknown as any;
expect(secondFilter.attributes.some.attributeKey.key).toBe("lastActivityDate");
expect(secondFilter.attributes.some.valueDate).toHaveProperty("gte");
expect(secondFilter.attributes.some.OR[0].valueDate).toHaveProperty("gte");
}
});
@@ -1631,11 +1638,11 @@ describe("segmentFilterToPrismaQuery", () => {
mode: "insensitive",
});
// Number filter uses 'valueNumber'
expect((andConditions[1] as unknown as any).attributes.some.valueNumber).toEqual({ gt: 5 });
// Number filter uses raw SQL subquery (transition code) returning contact IDs
expect(andConditions[1]).toEqual({ id: { in: ["mock-contact-1"] } });
// Date filter uses 'valueDate'
expect((andConditions[2] as unknown as any).attributes.some.valueDate).toHaveProperty("gte");
// Date filter uses OR fallback with 'valueDate' and string 'value'
expect((andConditions[2] as unknown as any).attributes.some.OR[0].valueDate).toHaveProperty("gte");
}
});
});

View File

@@ -1,5 +1,6 @@
import { Prisma } from "@prisma/client";
import { cache as reactCache } from "react";
import { prisma } from "@formbricks/database";
import { logger } from "@formbricks/logger";
import { err, ok } from "@formbricks/types/error-handlers";
import {
@@ -19,6 +20,17 @@ import { isResourceFilter } from "@/modules/ee/contacts/segments/lib/utils";
import { endOfDay, startOfDay, subtractTimeUnit } from "../date-utils";
import { getSegment } from "../segments";
// SQL operator mapping for number filters
const SQL_OPERATORS: Record<string, string> = {
greaterThan: ">",
greaterEqual: ">=",
lessThan: "<",
lessEqual: "<=",
};
// Regex pattern for validating numeric strings in SQL
const NUMBER_PATTERN_SQL = "^-?[0-9]+(\\\\.[0-9]+)?$";
// Type for the result of the segment filter to prisma query generation
export type SegmentFilterQueryResult = {
whereClause: Prisma.ContactWhereInput;
@@ -29,8 +41,10 @@ const valueIsRelativeDateValue = (value: TSegmentFilterValue): value is TRelativ
};
/**
* Builds a Prisma where clause for date attribute filters
* Uses the native valueDate column for performant DateTime comparisons
* Builds a Prisma where clause for date attribute filters.
* Uses an OR fallback to handle both migrated rows (valueDate populated)
* and un-migrated rows (valueDate NULL, value contains ISO string).
* ISO 8601 strings sort lexicographically correctly, so string gt/lt works.
*/
const buildDateAttributeFilterWhereClause = (filter: TSegmentAttributeFilter): Prisma.ContactWhereInput => {
const { root, qualifier, value } = filter;
@@ -39,37 +53,44 @@ const buildDateAttributeFilterWhereClause = (filter: TSegmentAttributeFilter): P
const now = new Date();
let dateCondition: Prisma.DateTimeNullableFilter = {};
let stringDateCondition: Prisma.StringFilter = {};
switch (operator) {
case "isOlderThan": {
// value should be { amount, unit }
if (valueIsRelativeDateValue(value)) {
const threshold = subtractTimeUnit(now, value.amount, value.unit);
dateCondition = { lt: threshold };
stringDateCondition = { lt: threshold.toISOString() };
}
break;
}
case "isNewerThan": {
// value should be { amount, unit }
if (valueIsRelativeDateValue(value)) {
const threshold = subtractTimeUnit(now, value.amount, value.unit);
dateCondition = { gte: threshold };
stringDateCondition = { gte: threshold.toISOString() };
}
break;
}
case "isBefore":
if (typeof value === "string") {
dateCondition = { lt: new Date(value) };
stringDateCondition = { lt: new Date(value).toISOString() };
}
break;
case "isAfter":
if (typeof value === "string") {
dateCondition = { gt: new Date(value) };
stringDateCondition = { gt: new Date(value).toISOString() };
}
break;
case "isBetween":
if (Array.isArray(value) && value.length === 2) {
dateCondition = { gte: new Date(value[0]), lte: new Date(value[1]) };
stringDateCondition = {
gte: new Date(value[0]).toISOString(),
lte: new Date(value[1]).toISOString(),
};
}
break;
case "isSameDay": {
@@ -77,6 +98,7 @@ const buildDateAttributeFilterWhereClause = (filter: TSegmentAttributeFilter): P
const dayStart = startOfDay(new Date(value));
const dayEnd = endOfDay(new Date(value));
dateCondition = { gte: dayStart, lte: dayEnd };
stringDateCondition = { gte: dayStart.toISOString(), lte: dayEnd.toISOString() };
}
break;
}
@@ -86,54 +108,68 @@ const buildDateAttributeFilterWhereClause = (filter: TSegmentAttributeFilter): P
attributes: {
some: {
attributeKey: { key: contactAttributeKey },
valueDate: dateCondition,
OR: [{ valueDate: dateCondition }, { valueDate: null, value: stringDateCondition }],
},
},
};
};
/**
* Builds a Prisma where clause for number attribute filters
* Uses the native valueNumber column for performant numeric comparisons
* Builds a Prisma where clause for number attribute filters.
* Uses a raw SQL subquery to handle both migrated rows (valueNumber populated)
* and un-migrated rows (valueNumber NULL, value contains numeric string).
* This is transition code for the deferred value backfill.
*
* TODO: After the backfill script has been run and all valueNumber columns are populated,
* revert this to the clean Prisma-only version that queries valueNumber directly.
*/
const buildNumberAttributeFilterWhereClause = (filter: TSegmentAttributeFilter): Prisma.ContactWhereInput => {
const buildNumberAttributeFilterWhereClause = async (
filter: TSegmentAttributeFilter
): Promise<Prisma.ContactWhereInput> => {
const { root, qualifier, value } = filter;
const { contactAttributeKey } = root;
const { operator } = qualifier;
const numericValue = typeof value === "number" ? value : Number(value);
const sqlOp = SQL_OPERATORS[operator];
let numberCondition: Prisma.FloatNullableFilter = {};
switch (operator) {
case "greaterThan":
numberCondition = { gt: numericValue };
break;
case "greaterEqual":
numberCondition = { gte: numericValue };
break;
case "lessThan":
numberCondition = { lt: numericValue };
break;
case "lessEqual":
numberCondition = { lte: numericValue };
break;
if (!sqlOp) {
return {};
}
return {
attributes: {
some: {
attributeKey: { key: contactAttributeKey },
valueNumber: numberCondition,
},
},
};
const matchingContactIds = await prisma.$queryRawUnsafe<{ contactId: string }[]>(
`
SELECT DISTINCT ca."contactId"
FROM "ContactAttribute" ca
JOIN "ContactAttributeKey" cak ON ca."attributeKeyId" = cak.id
WHERE cak.key = $1
AND (
(ca."valueNumber" IS NOT NULL AND ca."valueNumber" ${sqlOp} $2)
OR
(ca."valueNumber" IS NULL AND ca.value ~ $3 AND ca.value::double precision ${sqlOp} $2)
)
`,
contactAttributeKey,
numericValue,
NUMBER_PATTERN_SQL
);
const contactIds = matchingContactIds.map((r) => r.contactId);
if (contactIds.length === 0) {
// Return an impossible condition so the filter correctly excludes all contacts
return { id: "__NUMBER_FILTER_NO_MATCH__" };
}
return { id: { in: contactIds } };
};
/**
* Builds a Prisma where clause from a segment attribute filter
*/
const buildAttributeFilterWhereClause = (filter: TSegmentAttributeFilter): Prisma.ContactWhereInput => {
const buildAttributeFilterWhereClause = async (
filter: TSegmentAttributeFilter
): Promise<Prisma.ContactWhereInput> => {
const { root, qualifier, value } = filter;
const { contactAttributeKey } = root;
const { operator } = qualifier;
@@ -179,7 +215,7 @@ const buildAttributeFilterWhereClause = (filter: TSegmentAttributeFilter): Prism
// Handle number operators
if (["greaterThan", "greaterEqual", "lessThan", "lessEqual"].includes(operator)) {
return buildNumberAttributeFilterWhereClause(filter);
return await buildNumberAttributeFilterWhereClause(filter);
}
// For string operators, ensure value is a primitive (not an object or array)
@@ -216,7 +252,9 @@ const buildAttributeFilterWhereClause = (filter: TSegmentAttributeFilter): Prism
/**
* Builds a Prisma where clause from a person filter
*/
const buildPersonFilterWhereClause = (filter: TSegmentPersonFilter): Prisma.ContactWhereInput => {
const buildPersonFilterWhereClause = async (
filter: TSegmentPersonFilter
): Promise<Prisma.ContactWhereInput> => {
const { personIdentifier } = filter.root;
if (personIdentifier === "userId") {
@@ -227,7 +265,7 @@ const buildPersonFilterWhereClause = (filter: TSegmentPersonFilter): Prisma.Cont
contactAttributeKey: personIdentifier,
},
};
return buildAttributeFilterWhereClause(personFilter);
return await buildAttributeFilterWhereClause(personFilter);
}
return {};
@@ -314,9 +352,9 @@ const processSingleFilter = async (
switch (root.type) {
case "attribute":
return buildAttributeFilterWhereClause(filter as TSegmentAttributeFilter);
return await buildAttributeFilterWhereClause(filter as TSegmentAttributeFilter);
case "person":
return buildPersonFilterWhereClause(filter as TSegmentPersonFilter);
return await buildPersonFilterWhereClause(filter as TSegmentPersonFilter);
case "device":
return buildDeviceFilterWhereClause(filter as TSegmentDeviceFilter, deviceType);
case "segment":

View File

@@ -0,0 +1,201 @@
import { logger } from "@formbricks/logger";
import type { MigrationScript } from "../../src/scripts/migration-runner";
// Regex patterns as constants for consistency
// NUMBER_PATTERN: requires digits after decimal if present (e.g., "123", "-45.67")
const NUMBER_PATTERN = "^-?[0-9]+(\\.[0-9]+)?$";
// ISO_DATE_PATTERN: YYYY-MM-DD or YYYY-MM-DDTHH:mm:ss.sssZ
const ISO_DATE_PATTERN = "^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]{3})?Z?)?$";
type KeyTypeAnalysis = {
id: string;
key: string;
detected_type: "number" | "date" | "string";
non_empty_count: bigint;
};
type MigrationStats = {
totalKeys: number;
defaultKeys: number;
customKeys: number;
processedKeys: number;
numberTypeKeys: number;
dateTypeKeys: number;
stringTypeKeys: number;
skippedEmptyKeys: number;
};
export const addedAttributesDataTypes: MigrationScript = {
type: "data",
id: "jdxclvxcwfh7031hmvwy3pe2",
name: "20260203033241_added_attributes_data_types",
run: async ({ tx }) => {
const stats: MigrationStats = {
totalKeys: 0,
defaultKeys: 0,
customKeys: 0,
processedKeys: 0,
numberTypeKeys: 0,
dateTypeKeys: 0,
stringTypeKeys: 0,
skippedEmptyKeys: 0,
};
// ============================================================
// STEP 1: Get overall counts for logging
// ============================================================
logger.info("Step 1: Gathering statistics...");
const countsResult = await tx.$queryRaw<{ total: bigint; default_keys: bigint; custom_keys: bigint }[]>`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE type = 'default') as default_keys,
COUNT(*) FILTER (WHERE type = 'custom') as custom_keys
FROM "ContactAttributeKey"
`;
stats.totalKeys = Number(countsResult[0].total);
stats.defaultKeys = Number(countsResult[0].default_keys);
stats.customKeys = Number(countsResult[0].custom_keys);
logger.info(
`Found ${stats.totalKeys.toString()} total keys (${stats.defaultKeys.toString()} default, ${stats.customKeys.toString()} custom)`
);
// ============================================================
// STEP 2: Analyze ALL custom keys in ONE query to determine their types
// This replaces thousands of individual queries with a single bulk analysis
// ============================================================
logger.info("Step 2: Analyzing custom keys to detect data types (bulk query)...");
const keyTypeAnalysis = await tx.$queryRawUnsafe<KeyTypeAnalysis[]>(
`
WITH key_analysis AS (
SELECT
cak.id,
cak.key,
COUNT(*) FILTER (WHERE TRIM(ca.value) != '') as non_empty_count,
COUNT(*) FILTER (WHERE TRIM(ca.value) != '' AND ca.value !~ $1) as non_number_count,
COUNT(*) FILTER (WHERE TRIM(ca.value) != '' AND ca.value !~ $2) as non_date_count
FROM "ContactAttributeKey" cak
LEFT JOIN "ContactAttribute" ca ON ca."attributeKeyId" = cak.id
WHERE cak.type = 'custom'
GROUP BY cak.id, cak.key
)
SELECT
id,
key,
non_empty_count,
CASE
WHEN non_empty_count = 0 THEN 'string'
WHEN non_number_count = 0 THEN 'number'
WHEN non_date_count = 0 THEN 'date'
ELSE 'string'
END as detected_type
FROM key_analysis
`,
NUMBER_PATTERN,
ISO_DATE_PATTERN
);
// Categorize keys by detected type
const numberKeys: string[] = [];
const dateKeys: string[] = [];
for (const analysis of keyTypeAnalysis) {
if (Number(analysis.non_empty_count) === 0) {
stats.skippedEmptyKeys++;
} else if (analysis.detected_type === "number") {
numberKeys.push(analysis.id);
stats.numberTypeKeys++;
} else if (analysis.detected_type === "date") {
dateKeys.push(analysis.id);
stats.dateTypeKeys++;
} else {
stats.stringTypeKeys++;
}
}
stats.processedKeys = stats.numberTypeKeys + stats.dateTypeKeys + stats.stringTypeKeys;
logger.info(
`Analysis complete: ${stats.numberTypeKeys.toString()} number, ${stats.dateTypeKeys.toString()} date, ${stats.stringTypeKeys.toString()} string, ${stats.skippedEmptyKeys.toString()} empty (skipped)`
);
// ============================================================
// STEP 3: Update dataType for number keys (in batches)
// ============================================================
if (numberKeys.length > 0) {
logger.info(`Step 3: Updating ${numberKeys.length.toString()} keys to 'number' type...`);
const KEY_BATCH_SIZE = 100;
for (let i = 0; i < numberKeys.length; i += KEY_BATCH_SIZE) {
const batch = numberKeys.slice(i, i + KEY_BATCH_SIZE);
logger.info(`Step 3: Updating batch ${Math.floor(i / KEY_BATCH_SIZE + 1).toString()}...`);
await tx.$executeRaw`
UPDATE "ContactAttributeKey"
SET "dataType" = 'number'::"ContactAttributeDataType"
WHERE id = ANY(${batch})
`;
}
logger.info("Step 3 complete: dataType updated for number keys");
} else {
logger.info("Step 3: No number keys to update, skipping");
}
// NOTE: Value backfill for number attributes (populating valueNumber) is handled
// by a separate post-deploy script: packages/database/src/scripts/backfill-attribute-values.ts
// The transition code in prisma-query.ts handles queries correctly during the backfill window.
// ============================================================
// STEP 4: Update dataType for date keys (in batches)
// ============================================================
if (dateKeys.length > 0) {
logger.info(`Step 4: Updating ${dateKeys.length.toString()} keys to 'date' type...`);
const DATE_KEY_BATCH_SIZE = 100;
for (let i = 0; i < dateKeys.length; i += DATE_KEY_BATCH_SIZE) {
const batch = dateKeys.slice(i, i + DATE_KEY_BATCH_SIZE);
logger.info(`Step 4: Updating batch ${Math.floor(i / DATE_KEY_BATCH_SIZE + 1).toString()}...`);
await tx.$executeRaw`
UPDATE "ContactAttributeKey"
SET "dataType" = 'date'::"ContactAttributeDataType"
WHERE id = ANY(${batch})
`;
}
logger.info("Step 4 complete: dataType updated for date keys");
} else {
logger.info("Step 4: No date keys to update, skipping");
}
// NOTE: Value backfill for date attributes (populating valueDate) is handled
// by a separate post-deploy script: packages/database/src/scripts/backfill-attribute-values.ts
// The transition code in prisma-query.ts handles queries correctly during the backfill window.
// ============================================================
// FINAL: Log summary
// ============================================================
logger.info(
`
========================================
Migration Complete (keys-only)!
========================================
Total attribute keys: ${stats.totalKeys.toString()}
- Default keys (skipped): ${stats.defaultKeys.toString()}
- Custom keys: ${stats.customKeys.toString()}
- Number type: ${stats.numberTypeKeys.toString()}
- Date type: ${stats.dateTypeKeys.toString()}
- String type: ${stats.stringTypeKeys.toString()}
- Empty (skipped): ${stats.skippedEmptyKeys.toString()}
NOTE: Value backfill (valueNumber/valueDate) is deferred.
Run the backfill script after deploy for large datasets:
npx tsx packages/database/src/scripts/backfill-attribute-values.ts
========================================`
);
},
};

View File

@@ -0,0 +1,151 @@
/**
* Standalone backfill script for populating valueNumber and valueDate columns
* on ContactAttribute rows where they are currently NULL.
*
* This script is intended to be run AFTER the keys-only data migration
* (20260203033241_added_attributes_data_types) has completed.
*
* - Required for Formbricks Cloud (~6M rows) to restore optimized query performance.
* - Optional for self-hosters (the transition code in prisma-query.ts handles
* un-migrated rows correctly for small datasets).
*
* Usage:
* npx tsx packages/database/src/scripts/backfill-attribute-values.ts
*
* Key characteristics:
* - Uses PrismaClient directly (no transaction wrapping, no 30-min timeout)
* - Processes in batches of keys (configurable via KEY_BATCH_SIZE)
* - Idempotent: only updates rows where valueNumber/valueDate IS NULL
* - Can be stopped and resumed safely (each batch commits independently)
* - Logs progress throughout
*/
import { PrismaClient } from "@prisma/client";
// Regex patterns matching those used in the migration for consistency
const NUMBER_PATTERN = "^-?[0-9]+(\\.[0-9]+)?$";
const ISO_DATE_PATTERN = "^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]{3})?Z?)?$";
// How many attribute keys to process in a single batch
const KEY_BATCH_SIZE = 10;
const prisma = new PrismaClient();
const backfillNumberAttributes = async (): Promise<number> => {
console.log("Fetching number-type attribute keys...");
const numberKeys = await prisma.$queryRaw<{ id: string; key: string }[]>`
SELECT id, key FROM "ContactAttributeKey"
WHERE "dataType" = 'number'::"ContactAttributeDataType"
`;
if (numberKeys.length === 0) {
console.log("No number-type attribute keys found. Skipping.");
return 0;
}
console.log(`Found ${numberKeys.length.toString()} number-type keys. Backfilling valueNumber...`);
let totalUpdated = 0;
const keyIds = numberKeys.map((k) => k.id);
for (let i = 0; i < keyIds.length; i += KEY_BATCH_SIZE) {
const batch = keyIds.slice(i, i + KEY_BATCH_SIZE);
const batchResult = await prisma.$executeRawUnsafe(
`
UPDATE "ContactAttribute"
SET "valueNumber" = value::DOUBLE PRECISION
WHERE "attributeKeyId" = ANY($1)
AND "valueNumber" IS NULL
AND TRIM(value) != ''
AND value ~ $2
`,
batch,
NUMBER_PATTERN
);
totalUpdated += Number(batchResult);
console.log(
` Number backfill progress: ${Math.min(i + KEY_BATCH_SIZE, keyIds.length).toString()}/${keyIds.length.toString()} keys (${totalUpdated.toString()} rows updated)`
);
}
return totalUpdated;
};
const backfillDateAttributes = async (): Promise<number> => {
console.log("Fetching date-type attribute keys...");
const dateKeys = await prisma.$queryRaw<{ id: string; key: string }[]>`
SELECT id, key FROM "ContactAttributeKey"
WHERE "dataType" = 'date'::"ContactAttributeDataType"
`;
if (dateKeys.length === 0) {
console.log("No date-type attribute keys found. Skipping.");
return 0;
}
console.log(`Found ${dateKeys.length.toString()} date-type keys. Backfilling valueDate...`);
let totalUpdated = 0;
const keyIds = dateKeys.map((k) => k.id);
for (let i = 0; i < keyIds.length; i += KEY_BATCH_SIZE) {
const batch = keyIds.slice(i, i + KEY_BATCH_SIZE);
const batchResult = await prisma.$executeRawUnsafe(
`
UPDATE "ContactAttribute"
SET "valueDate" = value::TIMESTAMP
WHERE "attributeKeyId" = ANY($1)
AND "valueDate" IS NULL
AND TRIM(value) != ''
AND value ~ $2
`,
batch,
ISO_DATE_PATTERN
);
totalUpdated += Number(batchResult);
console.log(
` Date backfill progress: ${Math.min(i + KEY_BATCH_SIZE, keyIds.length).toString()}/${keyIds.length.toString()} keys (${totalUpdated.toString()} rows updated)`
);
}
return totalUpdated;
};
const main = async (): Promise<void> => {
console.log("========================================");
console.log("Attribute Value Backfill Script");
console.log("========================================");
console.log("");
const startTime = Date.now();
const numberRowsUpdated = await backfillNumberAttributes();
console.log("");
const dateRowsUpdated = await backfillDateAttributes();
const durationMs = Date.now() - startTime;
const durationSec = (durationMs / 1000).toFixed(1);
console.log("");
console.log("========================================");
console.log("Backfill Complete!");
console.log("========================================");
console.log(` valueNumber rows updated: ${numberRowsUpdated.toString()}`);
console.log(` valueDate rows updated: ${dateRowsUpdated.toString()}`);
console.log(` Duration: ${durationSec}s`);
console.log("========================================");
};
main()
.catch((error: unknown) => {
console.error("Backfill failed:", error);
process.exit(1);
})
.finally(async () => {
await prisma.$disconnect();
});