add new database model including documentGroups

This commit is contained in:
Matthias Nannt
2024-08-27 17:13:04 +02:00
parent 81738b77f5
commit 32d870b063
19 changed files with 323 additions and 131 deletions

View File

@@ -4,13 +4,12 @@ import { getEmailTemplateHtml } from "@/app/(app)/environments/[environmentId]/s
import { generateText } from "ai";
import { customAlphabet } from "nanoid";
import { z } from "zod";
import { clusterDocuments } from "@formbricks/ee/ai-analysis/lib/document/kmeans";
import { getQuestionResponseReferenceId } from "@formbricks/ee/ai-analysis/lib/document/utils";
import { llmModel } from "@formbricks/ee/ai/lib/utils";
import { sendEmbedSurveyPreviewEmail } from "@formbricks/email";
import { authenticatedActionClient } from "@formbricks/lib/actionClient";
import { checkAuthorization } from "@formbricks/lib/actionClient/utils";
import { llmModel } from "@formbricks/lib/ai";
import { clusterDocuments } from "@formbricks/lib/document/kmeans";
import { getDocumentsByTypeAndReferenceId } from "@formbricks/lib/document/service";
import { getQuestionResponseReferenceId } from "@formbricks/lib/document/utils";
import { getOrganizationIdFromSurveyId } from "@formbricks/lib/organization/utils";
import { getSurvey, updateSurvey } from "@formbricks/lib/survey/service";
import { ZId } from "@formbricks/types/environment";
@@ -166,10 +165,7 @@ export const getOpenTextSummaryAction = authenticatedActionClient
throw new ResourceNotFoundError("Survey", parsedInput.surveyId);
}
const documents = await getDocumentsByTypeAndReferenceId(
"questionResponse",
getQuestionResponseReferenceId(parsedInput.surveyId, parsedInput.questionId)
);
const documents = []; // TODO
const topics = await clusterDocuments(documents, 3);

View File

@@ -1,13 +1,10 @@
import { responses } from "@/app/lib/api/response";
import { transformErrorToDetails } from "@/app/lib/api/validator";
import { embed } from "ai";
import { headers } from "next/headers";
import { prisma } from "@formbricks/database";
import { createDocument } from "@formbricks/ee/ai-analysis/lib/document/service";
import { sendResponseFinishedEmail } from "@formbricks/email";
import { embeddingsModel } from "@formbricks/lib/ai";
import { CRON_SECRET, IS_AI_ENABLED, IS_FORMBRICKS_CLOUD } from "@formbricks/lib/constants";
import { createDocument } from "@formbricks/lib/document/service";
import { getQuestionResponseReferenceId } from "@formbricks/lib/document/utils";
import { getIntegrations } from "@formbricks/lib/integration/service";
import { getOrganizationByEnvironmentId } from "@formbricks/lib/organization/service";
import { getProductByEnvironmentId } from "@formbricks/lib/product/service";
@@ -180,17 +177,11 @@ export const POST = async (request: Request) => {
continue;
}
const text = `${question.headline.default} Answer: ${response.data[question.id]}`;
const { embedding } = await embed({
model: embeddingsModel,
value: text,
});
console.log("creating embedding for question response", question.id);
await createDocument({
environmentId,
referenceId: getQuestionResponseReferenceId(survey.id, question.id),
type: "questionResponse",
await createDocument(environmentId, {
responseId: response.id,
questionId: question.id,
text,
vector: embedding,
});
}
}

View File

@@ -14,7 +14,8 @@
"noUnusedParameters": true,
"preserveWatchOutput": true,
"skipLibCheck": true,
"strict": true
"strict": true,
"strictNullChecks": true
},
"exclude": ["node_modules", "dist"]
}

View File

@@ -1,25 +0,0 @@
-- CreateExtension
CREATE EXTENSION IF NOT EXISTS "vector";
-- CreateEnum
CREATE TYPE "DocumentType" AS ENUM ('questionResponse');
-- CreateTable
CREATE TABLE "Document" (
"id" TEXT NOT NULL,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL,
"environmentId" TEXT NOT NULL,
"type" "DocumentType" NOT NULL,
"referenceId" TEXT NOT NULL,
"text" TEXT NOT NULL,
"vector" vector(512),
CONSTRAINT "Document_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE INDEX "Document_type_referenceId_idx" ON "Document"("type", "referenceId");
-- AddForeignKey
ALTER TABLE "Document" ADD CONSTRAINT "Document_environmentId_fkey" FOREIGN KEY ("environmentId") REFERENCES "Environment"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -0,0 +1,40 @@
-- CreateExtension
CREATE EXTENSION IF NOT EXISTS "vector";
-- CreateTable
CREATE TABLE "DocumentGroup" (
"id" TEXT NOT NULL,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL,
"environmentId" TEXT NOT NULL,
"text" TEXT NOT NULL,
"vector" vector(512),
CONSTRAINT "DocumentGroup_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "Document" (
"id" TEXT NOT NULL,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL,
"documentGroupId" TEXT,
"responseId" TEXT,
"questionId" TEXT,
"text" TEXT NOT NULL,
"vector" vector(512),
CONSTRAINT "Document_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "Document_responseId_questionId_key" ON "Document"("responseId", "questionId");
-- AddForeignKey
ALTER TABLE "DocumentGroup" ADD CONSTRAINT "DocumentGroup_environmentId_fkey" FOREIGN KEY ("environmentId") REFERENCES "Environment"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Document" ADD CONSTRAINT "Document_documentGroupId_fkey" FOREIGN KEY ("documentGroupId") REFERENCES "DocumentGroup"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Document" ADD CONSTRAINT "Document_responseId_fkey" FOREIGN KEY ("responseId") REFERENCES "Response"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -134,6 +134,7 @@ model Response {
// singleUseId, used to prevent multiple responses
singleUseId String?
language String?
documents Document[]
@@unique([surveyId, singleUseId])
@@index([surveyId, createdAt]) // to determine monthly response count
@@ -415,7 +416,7 @@ model Environment {
tags Tag[]
segments Segment[]
integration Integration[]
documents Document[]
documentGroups DocumentGroup[]
@@index([productId])
}
@@ -661,20 +662,28 @@ model SurveyLanguage {
@@index([languageId])
}
enum DocumentType {
questionResponse
}
model Document {
model DocumentGroup {
id String @id @default(cuid())
createdAt DateTime @default(now()) @map(name: "created_at")
updatedAt DateTime @updatedAt @map(name: "updated_at")
environmentId String
environment Environment @relation(fields: [environmentId], references: [id], onDelete: Cascade)
type DocumentType
referenceId String
text String
vector Unsupported("vector(512)")?
@@index([type, referenceId])
documents Document[]
}
model Document {
id String @id @default(cuid())
createdAt DateTime @default(now()) @map(name: "created_at")
updatedAt DateTime @updatedAt @map(name: "updated_at")
documentGroupId String?
documentGroup DocumentGroup? @relation(fields: [documentGroupId], references: [id], onDelete: Cascade)
responseId String?
response Response? @relation(fields: [responseId], references: [id], onDelete: Cascade)
questionId String?
text String
vector Unsupported("vector(512)")?
@@unique([responseId, questionId])
}

View File

@@ -0,0 +1,25 @@
import { revalidateTag } from "next/cache";
interface RevalidateProps {
id?: string;
environmentId?: string;
}
export const documentGroupCache = {
tag: {
byId(id: string) {
return `documentGroups-${id}`;
},
byEnvironmentId(environmentId: string) {
return `environments-${environmentId}-documentGroups`;
},
},
revalidate({ id, environmentId }: RevalidateProps): void {
if (id) {
revalidateTag(this.tag.byId(id));
}
if (environmentId) {
revalidateTag(this.tag.byEnvironmentId(environmentId));
}
},
};

View File

@@ -0,0 +1,107 @@
import "server-only";
import { Prisma } from "@prisma/client";
import { embed } from "ai";
import { prisma } from "@formbricks/database";
import { validateInputs } from "@formbricks/lib/utils/validate";
import {
TDocumentGroup,
TDocumentGroupCreateInput,
ZDocumentGroupCreateInput,
} from "@formbricks/types/document-groups";
import { ZId } from "@formbricks/types/environment";
import { DatabaseError } from "@formbricks/types/errors";
import { embeddingsModel } from "../../../ai/lib/utils";
import { documentGroupCache } from "./cache";
export type TPrismaDocumentGroup = Omit<TDocumentGroup, "vector"> & {
vector: string;
};
export const createDocumentGroup = async (
documentGroupInput: TDocumentGroupCreateInput
): Promise<TDocumentGroup> => {
validateInputs([documentGroupInput, ZDocumentGroupCreateInput]);
try {
// Generate text embedding
const embeddingPromise = embed({
model: embeddingsModel,
value: documentGroupInput.text,
});
// create document
const prismaDocumentGroupPromise = prisma.documentGroup.create({
data: documentGroupInput,
});
const [embeddingRes, prismaDocument] = await Promise.all([embeddingPromise, prismaDocumentGroupPromise]);
const { embedding } = embeddingRes;
const documentGroup = {
...prismaDocument,
vector: embedding,
};
// update document vector with the embedding
const vectorString = `[${embedding.join(",")}]`;
await prisma.$executeRaw`
UPDATE "DocumentGroup"
SET "vector" = ${vectorString}::vector(512)
WHERE "id" = ${documentGroup.id};
`;
documentGroupCache.revalidate({
id: documentGroup.id,
environmentId: documentGroup.environmentId,
});
return documentGroup;
} catch (error) {
if (error instanceof Prisma.PrismaClientKnownRequestError) {
throw new DatabaseError(error.message);
}
throw error;
}
};
export const findNearestDocumentGroups = async (
environmentId: string,
vector: number[],
limit: number = 5,
threshold: number = 0.5
): Promise<TDocumentGroup[]> => {
validateInputs([environmentId, ZId]);
// Convert the embedding array to a JSON-like string representation
const vectorString = `[${vector.join(",")}]`;
// Execute raw SQL query to find nearest neighbors and exclude the vector column
const prismaDocumentGroups: TPrismaDocumentGroup[] = await prisma.$queryRaw`
SELECT
id,
created_at AS "createdAt",
updated_at AS "updatedAt",
text,
"environmentId",
vector::text
FROM "DocumentGroup" d
WHERE d."environmentId" = ${environmentId}
AND d."vector" <=> ${vectorString}::vector(512) <= ${threshold}
ORDER BY d."vector" <=> ${vectorString}::vector(512)
LIMIT ${limit};
`;
const documentGroups = prismaDocumentGroups.map((prismaDocumentGroup) => {
// Convert the string representation of the vector back to an array of numbers
const vector = prismaDocumentGroup.vector
.slice(1, -1) // Remove the surrounding square brackets
.split(",") // Split the string into an array of strings
.map(Number); // Convert each string to a number
return {
...prismaDocumentGroup,
vector,
};
});
return documentGroups;
};

View File

@@ -0,0 +1,26 @@
import { revalidateTag } from "next/cache";
interface RevalidateProps {
id?: string;
responseId?: string;
questionId?: string;
}
export const documentCache = {
tag: {
byId(id: string) {
return `documents-${id}`;
},
byResponseIdQuestionId(responseId: string, questionId: string) {
return `responses-${responseId}-questions-${questionId}-documents`;
},
},
revalidate({ id, responseId, questionId }: RevalidateProps): void {
if (id) {
revalidateTag(this.tag.byId(id));
}
if (responseId && questionId) {
revalidateTag(this.tag.byResponseIdQuestionId(responseId, questionId));
}
},
};

View File

@@ -1,41 +1,70 @@
import "server-only";
import { Prisma } from "@prisma/client";
import { embed, generateText } from "ai";
import { cache as reactCache } from "react";
import { prisma } from "@formbricks/database";
import { ZString } from "@formbricks/types/common";
import {
TDocument,
TDocumentCreateInput,
ZDocumentCreateInput,
ZDocumentType,
} from "@formbricks/types/documents";
import { cache } from "@formbricks/lib/cache";
import { validateInputs } from "@formbricks/lib/utils/validate";
import { TDocument, TDocumentCreateInput, ZDocumentCreateInput } from "@formbricks/types/documents";
import { ZId } from "@formbricks/types/environment";
import { DatabaseError } from "@formbricks/types/errors";
import { cache } from "../cache";
import { validateInputs } from "../utils/validate";
import { embeddingsModel, llmModel } from "../../../ai/lib/utils";
import { createDocumentGroup, findNearestDocumentGroups } from "../document-group/service";
import { documentCache } from "./cache";
export type TPrismaDocument = Omit<TDocument, "vector"> & {
vector: string;
};
export const createDocument = async (documentInput: TDocumentCreateInput): Promise<TDocument> => {
export const createDocument = async (
environmentId: string,
documentInput: TDocumentCreateInput
): Promise<TDocument> => {
validateInputs([documentInput, ZDocumentCreateInput]);
try {
const { vector, ...data } = documentInput;
// Generate text embedding
const { embedding } = await embed({
model: embeddingsModel,
value: documentInput.text,
});
// find fitting documentGroup
let documentGroupId;
const nearestDocumentGroups = await findNearestDocumentGroups(environmentId, embedding, 1, 0.2);
if (nearestDocumentGroups.length > 0) {
documentGroupId = nearestDocumentGroups[0].id;
} else {
// create documentGroup
// generate name for documentGroup
const { text } = await generateText({
model: llmModel,
system: `You are a Customer Experience Management platform. You are asked to transform a user feedback into a well defined and consice insight (feature request, complaint, loved feature or bug) like "The dashboard is slow" or "The ability to export data from the app"`,
prompt: `The user feedback: "${documentInput.text}"`,
});
const documentGroup = await createDocumentGroup({
environmentId,
text,
});
documentGroupId = documentGroup.id;
}
// create document
const prismaDocument = await prisma.document.create({
data,
data: {
...documentInput,
documentGroupId,
},
});
const document = {
...prismaDocument,
vector,
vector: embedding,
};
// update vector
const vectorString = `[${vector.join(",")}]`;
// update document vector with the embedding
const vectorString = `[${embedding.join(",")}]`;
await prisma.$executeRaw`
UPDATE "Document"
SET "vector" = ${vectorString}::vector(512)
@@ -44,8 +73,12 @@ export const createDocument = async (documentInput: TDocumentCreateInput): Promi
documentCache.revalidate({
id: document.id,
type: document.type,
referenceId: document.referenceId,
});
// search for nearest documentGroup
await createDocumentGroup({
environmentId,
text: document.text,
});
return document;
@@ -57,11 +90,11 @@ export const createDocument = async (documentInput: TDocumentCreateInput): Promi
}
};
export const getDocumentsByTypeAndReferenceId = reactCache(
(type: string, referenceId: string): Promise<TDocument[]> =>
export const getDocumentsByResponseIdQuestionId = reactCache(
(responseId: string, questionId: string): Promise<TDocument[]> =>
cache(
async () => {
validateInputs([type, ZDocumentType], [referenceId, ZString]);
validateInputs([responseId, ZId], [questionId, ZId]);
try {
const prismaDocuments: TPrismaDocument[] = await prisma.$queryRaw`
@@ -69,13 +102,13 @@ export const getDocumentsByTypeAndReferenceId = reactCache(
id,
created_at AS "createdAt",
updated_at AS "updatedAt",
type,
"responseId",
"questionId",
text,
"referenceId",
vector::text
FROM "Document" d
WHERE d."type" = ${type}::"DocumentType"
AND d."referenceId" = ${referenceId}
WHERE d."responseId" = ${responseId}
AND d."questionId" = ${questionId}
`;
const documents = prismaDocuments.map((prismaDocument) => {
@@ -99,9 +132,9 @@ export const getDocumentsByTypeAndReferenceId = reactCache(
throw error;
}
},
[`getDocumentsByTypeAndReferenceId-${type}-${referenceId}`],
[`getDocumentsByResponseIdQuestionId-${responseId}-${questionId}`],
{
tags: [documentCache.tag.byTypeAndReferenceId(type, referenceId)],
tags: [documentCache.tag.byResponseIdQuestionId(responseId, questionId)],
}
)()
);
@@ -122,9 +155,9 @@ export const findNearestDocuments = async (
id,
created_at AS "createdAt",
updated_at AS "updatedAt",
type,
text,
"referenceId",
"responseId",
"questionId",
vector::text
FROM "Document" d
WHERE d."environmentId" = ${environmentId}

View File

@@ -1,5 +1,5 @@
import { createAzure } from "@ai-sdk/azure";
import { env } from "./env";
import { env } from "@formbricks/lib/env";
export const llmModel = createAzure({
resourceName: env.AI_AZURE_LLM_RESSOURCE_NAME, // Azure resource name

View File

@@ -20,10 +20,12 @@
"@types/react": "18.3.3"
},
"dependencies": {
"@ai-sdk/azure": "^0.0.17",
"@formbricks/database": "workspace:*",
"@formbricks/lib": "workspace:*",
"@paralleldrive/cuid2": "^2.2.2",
"@radix-ui/react-collapsible": "^1.1.0",
"ai": "^3.2.37",
"https-proxy-agent": "^7.0.5",
"lucide-react": "^0.427.0",
"next": "^14.2.5",

View File

@@ -1,26 +0,0 @@
import { revalidateTag } from "next/cache";
interface RevalidateProps {
id?: string;
type?: string;
referenceId?: string;
}
export const documentCache = {
tag: {
byId(id: string) {
return `documents-${id}`;
},
byTypeAndReferenceId(type: string, id: string) {
return `documents-${type}-${id}`;
},
},
revalidate({ id, type, referenceId }: RevalidateProps): void {
if (id) {
revalidateTag(this.tag.byId(referenceId));
}
if (type && referenceId) {
revalidateTag(this.tag.byTypeAndReferenceId(type, referenceId));
}
},
};

View File

@@ -14,7 +14,6 @@
"test": "dotenv -e ../../.env -- vitest run"
},
"dependencies": {
"@ai-sdk/azure": "^0.0.17",
"@aws-sdk/client-s3": "3.631.0",
"@aws-sdk/s3-presigned-post": "3.631.0",
"@aws-sdk/s3-request-presigner": "3.631.0",
@@ -24,7 +23,6 @@
"@paralleldrive/cuid2": "^2.2.2",
"@t3-oss/env-nextjs": "^0.11.0",
"@ungap/structured-clone": "^1.2.0",
"ai": "^3.2.37",
"aws-crt": "^1.21.3",
"date-fns": "^3.6.0",
"jsonwebtoken": "^9.0.2",

View File

@@ -0,0 +1,20 @@
import { z } from "zod";
import { ZId } from "./environment";
export const ZDocumentGroup = z.object({
id: ZId,
createdAt: z.date(),
updatedAt: z.date(),
environmentId: ZId,
text: z.string(),
vector: z.array(z.number()).length(512),
});
export type TDocumentGroup = z.infer<typeof ZDocumentGroup>;
export const ZDocumentGroupCreateInput = z.object({
environmentId: ZId,
text: z.string(),
});
export type TDocumentGroupCreateInput = z.infer<typeof ZDocumentGroupCreateInput>;

View File

@@ -1,16 +1,13 @@
import { z } from "zod";
import { ZId } from "./environment";
export const ZDocumentType = z.enum(["questionResponse"]);
export type TDocumentType = z.infer<typeof ZDocumentType>;
export const ZDocument = z.object({
environmentId: ZId,
referenceId: z.string(),
id: ZId,
createdAt: z.date(),
updatedAt: z.date(),
type: ZDocumentType,
documentGroupId: ZId.nullable(),
responseId: ZId.nullable(),
questionId: ZId.nullable(),
text: z.string(),
vector: z.array(z.number()).length(512),
});
@@ -18,11 +15,9 @@ export const ZDocument = z.object({
export type TDocument = z.infer<typeof ZDocument>;
export const ZDocumentCreateInput = z.object({
environmentId: ZId,
type: ZDocumentType,
referenceId: z.string(),
responseId: ZId.optional(),
questionId: ZId.optional(),
text: z.string(),
vector: z.array(z.number()).length(512),
});
export type TDocumentCreateInput = z.infer<typeof ZDocumentCreateInput>;

12
pnpm-lock.yaml generated
View File

@@ -635,6 +635,9 @@ importers:
packages/ee:
dependencies:
'@ai-sdk/azure':
specifier: ^0.0.17
version: 0.0.17(zod@3.23.8)
'@formbricks/database':
specifier: workspace:*
version: link:../database
@@ -647,6 +650,9 @@ importers:
'@radix-ui/react-collapsible':
specifier: ^1.1.0
version: 1.1.0(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@19.0.0-rc-935180c7e0-20240524(react@19.0.0-rc-935180c7e0-20240524))(react@19.0.0-rc-935180c7e0-20240524)
ai:
specifier: ^3.2.37
version: 3.3.13(react@19.0.0-rc-935180c7e0-20240524)(sswr@2.1.0(svelte@4.2.18))(svelte@4.2.18)(vue@3.4.38(typescript@5.5.4))(zod@3.23.8)
https-proxy-agent:
specifier: ^7.0.5
version: 7.0.5
@@ -791,9 +797,6 @@ importers:
packages/lib:
dependencies:
'@ai-sdk/azure':
specifier: ^0.0.17
version: 0.0.17(zod@3.23.8)
'@aws-sdk/client-s3':
specifier: 3.631.0
version: 3.631.0(aws-crt@1.21.3)
@@ -821,9 +824,6 @@ importers:
'@ungap/structured-clone':
specifier: ^1.2.0
version: 1.2.0
ai:
specifier: ^3.2.37
version: 3.3.13(react@19.0.0-rc-935180c7e0-20240524)(sswr@2.1.0(svelte@4.2.18))(svelte@4.2.18)(vue@3.4.38(typescript@5.5.4))(zod@3.23.8)
aws-crt:
specifier: ^1.21.3
version: 1.21.3