documents can have multiple insights

This commit is contained in:
Matthias Nannt
2024-08-28 17:47:44 +02:00
parent 32d870b063
commit 519f7838c6
12 changed files with 284 additions and 171 deletions

View File

@@ -3,6 +3,7 @@ import { transformErrorToDetails } from "@/app/lib/api/validator";
import { headers } from "next/headers";
import { prisma } from "@formbricks/database";
import { createDocument } from "@formbricks/ee/ai-analysis/lib/document/service";
import { getEnterpriseLicense } from "@formbricks/ee/lib/service";
import { sendResponseFinishedEmail } from "@formbricks/email";
import { CRON_SECRET, IS_AI_ENABLED, IS_FORMBRICKS_CLOUD } from "@formbricks/lib/constants";
import { getIntegrations } from "@formbricks/lib/integration/service";
@@ -163,26 +164,31 @@ export const POST = async (request: Request) => {
// generate embeddings for all open text question responses for enterprise and scale plans
const hasSurveyOpenTextQuestions = survey.questions.some((question) => question.type === "openText");
if (hasSurveyOpenTextQuestions && IS_FORMBRICKS_CLOUD && IS_AI_ENABLED) {
const organization = await getOrganizationByEnvironmentId(environmentId);
if (!organization) {
throw new Error("Organization not found");
}
if (organization.billing.plan === "enterprise" || organization.billing.plan === "scale") {
for (const question of survey.questions) {
if (question.type === "openText") {
const isQuestionAnswered = response.data[question.id] !== undefined;
console.log("isQuestionAnswered", isQuestionAnswered);
if (!isQuestionAnswered) {
continue;
if (hasSurveyOpenTextQuestions && IS_FORMBRICKS_CLOUD) {
const { active: isEnterpriseEdition } = await getEnterpriseLicense();
const isAiEnabled = isEnterpriseEdition && IS_AI_ENABLED;
if (hasSurveyOpenTextQuestions && isAiEnabled) {
const organization = await getOrganizationByEnvironmentId(environmentId);
if (!organization) {
throw new Error("Organization not found");
}
if (organization.billing.plan === "enterprise" || organization.billing.plan === "scale") {
for (const question of survey.questions) {
if (question.type === "openText") {
const isQuestionAnswered = response.data[question.id] !== undefined;
console.log("isQuestionAnswered", isQuestionAnswered);
if (!isQuestionAnswered) {
continue;
}
const text = `**${question.headline.default}**\n${response.data[question.id]}`;
console.log("creating embedding for question response", question.id);
await createDocument({
environmentId,
responseId: response.id,
questionId: question.id,
text,
});
}
const text = `${question.headline.default} Answer: ${response.data[question.id]}`;
console.log("creating embedding for question response", question.id);
await createDocument(environmentId, {
responseId: response.id,
questionId: question.id,
text,
});
}
}
}

View File

@@ -1,40 +0,0 @@
-- CreateExtension
CREATE EXTENSION IF NOT EXISTS "vector";
-- CreateTable
CREATE TABLE "DocumentGroup" (
"id" TEXT NOT NULL,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL,
"environmentId" TEXT NOT NULL,
"text" TEXT NOT NULL,
"vector" vector(512),
CONSTRAINT "DocumentGroup_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "Document" (
"id" TEXT NOT NULL,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL,
"documentGroupId" TEXT,
"responseId" TEXT,
"questionId" TEXT,
"text" TEXT NOT NULL,
"vector" vector(512),
CONSTRAINT "Document_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "Document_responseId_questionId_key" ON "Document"("responseId", "questionId");
-- AddForeignKey
ALTER TABLE "DocumentGroup" ADD CONSTRAINT "DocumentGroup_environmentId_fkey" FOREIGN KEY ("environmentId") REFERENCES "Environment"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Document" ADD CONSTRAINT "Document_documentGroupId_fkey" FOREIGN KEY ("documentGroupId") REFERENCES "DocumentGroup"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Document" ADD CONSTRAINT "Document_responseId_fkey" FOREIGN KEY ("responseId") REFERENCES "Response"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -0,0 +1,66 @@
-- CreateExtension
CREATE EXTENSION IF NOT EXISTS "vector";
-- CreateEnum
CREATE TYPE "InsightCategory" AS ENUM ('enhancementRequest', 'complaint', 'praise');
-- CreateEnum
CREATE TYPE "Sentiment" AS ENUM ('positive', 'negative', 'neutral');
-- CreateTable
CREATE TABLE "Insight" (
"id" TEXT NOT NULL,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL,
"environmentId" TEXT NOT NULL,
"category" "InsightCategory" NOT NULL,
"title" TEXT NOT NULL,
"description" TEXT NOT NULL,
"vector" vector(512),
CONSTRAINT "Insight_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "DocumentInsight" (
"documentId" TEXT NOT NULL,
"insightId" TEXT NOT NULL,
CONSTRAINT "DocumentInsight_pkey" PRIMARY KEY ("documentId","insightId")
);
-- CreateTable
CREATE TABLE "Document" (
"id" TEXT NOT NULL,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL,
"environmentId" TEXT NOT NULL,
"responseId" TEXT,
"questionId" TEXT,
"sentiment" "Sentiment" NOT NULL,
"text" TEXT NOT NULL,
"vector" vector(512),
CONSTRAINT "Document_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE INDEX "DocumentInsight_insightId_idx" ON "DocumentInsight"("insightId");
-- CreateIndex
CREATE UNIQUE INDEX "Document_responseId_questionId_key" ON "Document"("responseId", "questionId");
-- AddForeignKey
ALTER TABLE "Insight" ADD CONSTRAINT "Insight_environmentId_fkey" FOREIGN KEY ("environmentId") REFERENCES "Environment"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "DocumentInsight" ADD CONSTRAINT "DocumentInsight_documentId_fkey" FOREIGN KEY ("documentId") REFERENCES "Document"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "DocumentInsight" ADD CONSTRAINT "DocumentInsight_insightId_fkey" FOREIGN KEY ("insightId") REFERENCES "Insight"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Document" ADD CONSTRAINT "Document_environmentId_fkey" FOREIGN KEY ("environmentId") REFERENCES "Environment"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Document" ADD CONSTRAINT "Document_responseId_fkey" FOREIGN KEY ("responseId") REFERENCES "Response"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -416,7 +416,8 @@ model Environment {
tags Tag[]
segments Segment[]
integration Integration[]
documentGroups DocumentGroup[]
documents Document[]
insights Insight[]
@@index([productId])
}
@@ -662,28 +663,54 @@ model SurveyLanguage {
@@index([languageId])
}
model DocumentGroup {
enum InsightCategory {
enhancementRequest
complaint
praise
}
model Insight {
id String @id @default(cuid())
createdAt DateTime @default(now()) @map(name: "created_at")
updatedAt DateTime @updatedAt @map(name: "updated_at")
environmentId String
environment Environment @relation(fields: [environmentId], references: [id], onDelete: Cascade)
text String
category InsightCategory
title String
description String
vector Unsupported("vector(512)")?
documents Document[]
documents DocumentInsight[]
}
model DocumentInsight {
documentId String
document Document @relation(fields: [documentId], references: [id], onDelete: Cascade)
insightId String
insight Insight @relation(fields: [insightId], references: [id], onDelete: Cascade)
@@id([documentId, insightId])
@@index([insightId])
}
enum Sentiment {
positive
negative
neutral
}
model Document {
id String @id @default(cuid())
createdAt DateTime @default(now()) @map(name: "created_at")
updatedAt DateTime @updatedAt @map(name: "updated_at")
documentGroupId String?
documentGroup DocumentGroup? @relation(fields: [documentGroupId], references: [id], onDelete: Cascade)
responseId String?
response Response? @relation(fields: [responseId], references: [id], onDelete: Cascade)
questionId String?
text String
vector Unsupported("vector(512)")?
id String @id @default(cuid())
createdAt DateTime @default(now()) @map(name: "created_at")
updatedAt DateTime @updatedAt @map(name: "updated_at")
environmentId String
environment Environment @relation(fields: [environmentId], references: [id], onDelete: Cascade)
responseId String?
response Response? @relation(fields: [responseId], references: [id], onDelete: Cascade)
questionId String?
sentiment Sentiment
text String
vector Unsupported("vector(512)")?
insights DocumentInsight[]
@@unique([responseId, questionId])
}

View File

@@ -1,25 +1,30 @@
import "server-only";
import { Prisma } from "@prisma/client";
import { embed, generateText } from "ai";
import { embed, generateObject } from "ai";
import { cache as reactCache } from "react";
import { z } from "zod";
import { prisma } from "@formbricks/database";
import { cache } from "@formbricks/lib/cache";
import { validateInputs } from "@formbricks/lib/utils/validate";
import { TDocument, TDocumentCreateInput, ZDocumentCreateInput } from "@formbricks/types/documents";
import {
TDocument,
TDocumentCreateInput,
ZDocumentCreateInput,
ZDocumentSentiment,
} from "@formbricks/types/documents";
import { ZId } from "@formbricks/types/environment";
import { DatabaseError } from "@formbricks/types/errors";
import { ZInsightCategory } from "@formbricks/types/insights";
import { embeddingsModel, llmModel } from "../../../ai/lib/utils";
import { createDocumentGroup, findNearestDocumentGroups } from "../document-group/service";
import { createInsight, findNearestInsights } from "../insight/service";
import { getInsightVectorText } from "../insight/utils";
import { documentCache } from "./cache";
export type TPrismaDocument = Omit<TDocument, "vector"> & {
vector: string;
};
export const createDocument = async (
environmentId: string,
documentInput: TDocumentCreateInput
): Promise<TDocument> => {
export const createDocument = async (documentInput: TDocumentCreateInput): Promise<TDocument> => {
validateInputs([documentInput, ZDocumentCreateInput]);
try {
@@ -29,32 +34,33 @@ export const createDocument = async (
value: documentInput.text,
});
// find fitting documentGroup
let documentGroupId;
const nearestDocumentGroups = await findNearestDocumentGroups(environmentId, embedding, 1, 0.2);
if (nearestDocumentGroups.length > 0) {
documentGroupId = nearestDocumentGroups[0].id;
} else {
// create documentGroup
// generate name for documentGroup
const { text } = await generateText({
model: llmModel,
system: `You are a Customer Experience Management platform. You are asked to transform a user feedback into a well defined and consice insight (feature request, complaint, loved feature or bug) like "The dashboard is slow" or "The ability to export data from the app"`,
prompt: `The user feedback: "${documentInput.text}"`,
});
// generate sentiment and insights
const { object } = await generateObject({
model: llmModel,
schema: z.object({
sentiment: ZDocumentSentiment,
insights: z.array(
z.object({
title: z.string(),
description: z.string(),
category: ZInsightCategory,
})
),
}),
system: `You are an XM researcher. You analyse user feedback and extract insights and the sentiment from it. You are very objective, for the insights split the feedback in the smallest parts possible and only use the feedback itself to draw conclusions. An insight consist of a title and description (e.g. title: "Interactive charts and graphics", description: "Users would love to see a visualization of the analytics data") as well as tag it with the right category`,
prompt: `Analyze this feedback: "${documentInput.text}"`,
});
const documentGroup = await createDocumentGroup({
environmentId,
text,
});
documentGroupId = documentGroup.id;
}
console.log(JSON.stringify(object, null, 2));
const sentiment = object.sentiment;
const insights = object.insights;
// create document
const prismaDocument = await prisma.document.create({
data: {
...documentInput,
documentGroupId,
sentiment,
},
});
@@ -66,21 +72,56 @@ export const createDocument = async (
// update document vector with the embedding
const vectorString = `[${embedding.join(",")}]`;
await prisma.$executeRaw`
UPDATE "Document"
SET "vector" = ${vectorString}::vector(512)
WHERE "id" = ${document.id};
`;
UPDATE "Document"
SET "vector" = ${vectorString}::vector(512)
WHERE "id" = ${document.id};
`;
// connect or create the insights
for (const insight of insights) {
if (typeof insight.title !== "string" || typeof insight.description !== "string") {
throw new Error("Insight title and description must be a string");
}
// create embedding for insight
const { embedding } = await embed({
model: embeddingsModel,
value: getInsightVectorText(insight.title, insight.description),
});
// find close insight to merge it with
const nearestInsights = await findNearestInsights(documentInput.environmentId, embedding, 1, 0.2);
if (nearestInsights.length > 0) {
// create a documentInsight with this insight
console.log(`Merging ${insight.title} with existing insight: ${nearestInsights[0].id}`);
await prisma.documentInsight.create({
data: {
documentId: document.id,
insightId: nearestInsights[0].id,
},
});
} else {
console.log(`Creating new insight for ${insight.title}`);
// create new insight and documentInsight
const newInsight = await createInsight({
environmentId: documentInput.environmentId,
title: insight.title,
description: insight.description,
category: insight.category,
vector: embedding,
});
// create a documentInsight with this insight
await prisma.documentInsight.create({
data: {
documentId: document.id,
insightId: newInsight.id,
},
});
}
}
documentCache.revalidate({
id: document.id,
});
// search for nearest documentGroup
await createDocumentGroup({
environmentId,
text: document.text,
});
return document;
} catch (error) {
if (error instanceof Prisma.PrismaClientKnownRequestError) {
@@ -142,10 +183,10 @@ export const getDocumentsByResponseIdQuestionId = reactCache(
export const findNearestDocuments = async (
environmentId: string,
vector: number[],
limit: number = 5
limit: number = 5,
threshold: number = 0.5
): Promise<TDocument[]> => {
validateInputs([environmentId, ZId]);
const threshold = 0.8; //0.2;
// Convert the embedding array to a JSON-like string representation
const vectorString = `[${vector.join(",")}]`;
@@ -155,9 +196,11 @@ export const findNearestDocuments = async (
id,
created_at AS "createdAt",
updated_at AS "updatedAt",
"environmentId",
text,
"responseId",
"questionId",
"documentGroupId",
vector::text
FROM "Document" d
WHERE d."environmentId" = ${environmentId}

View File

@@ -5,7 +5,7 @@ interface RevalidateProps {
environmentId?: string;
}
export const documentGroupCache = {
export const insightCache = {
tag: {
byId(id: string) {
return `documentGroups-${id}`;

View File

@@ -1,57 +1,40 @@
import "server-only";
import { Prisma } from "@prisma/client";
import { embed } from "ai";
import { prisma } from "@formbricks/database";
import { validateInputs } from "@formbricks/lib/utils/validate";
import {
TDocumentGroup,
TDocumentGroupCreateInput,
ZDocumentGroupCreateInput,
} from "@formbricks/types/document-groups";
import { ZId } from "@formbricks/types/environment";
import { DatabaseError } from "@formbricks/types/errors";
import { embeddingsModel } from "../../../ai/lib/utils";
import { documentGroupCache } from "./cache";
import { TInsight, TInsightCreateInput, ZInsightCreateInput } from "@formbricks/types/insights";
import { insightCache } from "./cache";
export type TPrismaDocumentGroup = Omit<TDocumentGroup, "vector"> & {
export type TPrismaInsight = Omit<TInsight, "vector"> & {
vector: string;
};
export const createDocumentGroup = async (
documentGroupInput: TDocumentGroupCreateInput
): Promise<TDocumentGroup> => {
validateInputs([documentGroupInput, ZDocumentGroupCreateInput]);
export const createInsight = async (insightGroupInput: TInsightCreateInput): Promise<TInsight> => {
validateInputs([insightGroupInput, ZInsightCreateInput]);
try {
// Generate text embedding
const embeddingPromise = embed({
model: embeddingsModel,
value: documentGroupInput.text,
});
// create document
const prismaDocumentGroupPromise = prisma.documentGroup.create({
data: documentGroupInput,
const { vector, ...data } = insightGroupInput;
const prismaInsight = await prisma.insight.create({
data,
});
const [embeddingRes, prismaDocument] = await Promise.all([embeddingPromise, prismaDocumentGroupPromise]);
const { embedding } = embeddingRes;
const documentGroup = {
...prismaDocument,
vector: embedding,
...prismaInsight,
vector: insightGroupInput.vector,
};
// update document vector with the embedding
const vectorString = `[${embedding.join(",")}]`;
const vectorString = `[${insightGroupInput.vector.join(",")}]`;
await prisma.$executeRaw`
UPDATE "DocumentGroup"
UPDATE "Insight"
SET "vector" = ${vectorString}::vector(512)
WHERE "id" = ${documentGroup.id};
`;
documentGroupCache.revalidate({
insightCache.revalidate({
id: documentGroup.id,
environmentId: documentGroup.environmentId,
});
@@ -65,33 +48,35 @@ export const createDocumentGroup = async (
}
};
export const findNearestDocumentGroups = async (
export const findNearestInsights = async (
environmentId: string,
vector: number[],
limit: number = 5,
threshold: number = 0.5
): Promise<TDocumentGroup[]> => {
): Promise<TInsight[]> => {
validateInputs([environmentId, ZId]);
// Convert the embedding array to a JSON-like string representation
const vectorString = `[${vector.join(",")}]`;
// Execute raw SQL query to find nearest neighbors and exclude the vector column
const prismaDocumentGroups: TPrismaDocumentGroup[] = await prisma.$queryRaw`
const prismaInsights: TPrismaInsight[] = await prisma.$queryRaw`
SELECT
id,
created_at AS "createdAt",
updated_at AS "updatedAt",
text,
title,
description,
category,
"environmentId",
vector::text
FROM "DocumentGroup" d
FROM "Insight" d
WHERE d."environmentId" = ${environmentId}
AND d."vector" <=> ${vectorString}::vector(512) <= ${threshold}
ORDER BY d."vector" <=> ${vectorString}::vector(512)
LIMIT ${limit};
`;
const documentGroups = prismaDocumentGroups.map((prismaDocumentGroup) => {
const insights = prismaInsights.map((prismaDocumentGroup) => {
// Convert the string representation of the vector back to an array of numbers
const vector = prismaDocumentGroup.vector
.slice(1, -1) // Remove the surrounding square brackets
@@ -103,5 +88,5 @@ export const findNearestDocumentGroups = async (
};
});
return documentGroups;
return insights;
};

View File

@@ -0,0 +1,2 @@
export const getInsightVectorText = (title: string, description: string): string =>
`${title}: ${description}`;

View File

@@ -1,20 +0,0 @@
import { z } from "zod";
import { ZId } from "./environment";
export const ZDocumentGroup = z.object({
id: ZId,
createdAt: z.date(),
updatedAt: z.date(),
environmentId: ZId,
text: z.string(),
vector: z.array(z.number()).length(512),
});
export type TDocumentGroup = z.infer<typeof ZDocumentGroup>;
export const ZDocumentGroupCreateInput = z.object({
environmentId: ZId,
text: z.string(),
});
export type TDocumentGroupCreateInput = z.infer<typeof ZDocumentGroupCreateInput>;

View File

@@ -0,0 +1,9 @@
import { z } from "zod";
import { ZId } from "./environment";
export const ZDocumentInsight = z.object({
documentId: ZId,
insightId: ZId,
});
export type TDocumentInsight = z.infer<typeof ZDocumentInsight>;

View File

@@ -1,13 +1,18 @@
import { z } from "zod";
import { ZId } from "./environment";
export const ZDocumentSentiment = z.enum(["positive", "negative", "neutral"]);
export type TDocumentSentiment = z.infer<typeof ZDocumentSentiment>;
export const ZDocument = z.object({
id: ZId,
createdAt: z.date(),
updatedAt: z.date(),
documentGroupId: ZId.nullable(),
environmentId: ZId,
responseId: ZId.nullable(),
questionId: ZId.nullable(),
sentiment: ZDocumentSentiment,
text: z.string(),
vector: z.array(z.number()).length(512),
});
@@ -15,6 +20,7 @@ export const ZDocument = z.object({
export type TDocument = z.infer<typeof ZDocument>;
export const ZDocumentCreateInput = z.object({
environmentId: ZId,
responseId: ZId.optional(),
questionId: ZId.optional(),
text: z.string(),

View File

@@ -0,0 +1,29 @@
import { z } from "zod";
import { ZId } from "./environment";
export const ZInsightCategory = z.enum(["enhancementRequest", "complaint", "praise"]);
export type TInsightCategory = z.infer<typeof ZInsightCategory>;
export const ZInsight = z.object({
id: ZId,
createdAt: z.date(),
updatedAt: z.date(),
environmentId: ZId,
title: z.string(),
description: z.string(),
vector: z.array(z.number()).length(512),
category: ZInsightCategory,
});
export type TInsight = z.infer<typeof ZInsight>;
export const ZInsightCreateInput = z.object({
environmentId: ZId,
title: z.string(),
description: z.string(),
category: ZInsightCategory,
vector: z.array(z.number()).length(512),
});
export type TInsightCreateInput = z.infer<typeof ZInsightCreateInput>;