feat(documents): use update usecase when content extraction (#678)

This commit is contained in:
Corentin Thomasset
2025-12-06 21:51:09 +01:00
committed by GitHub
parent 2cf86e5968
commit e3e0078673
5 changed files with 94 additions and 21 deletions

View File

@@ -565,6 +565,7 @@ describe('documents usecases', () => {
tagsRepository,
webhookRepository,
documentActivityRepository,
eventServices: createTestEventServices(),
});
const documentRecords = await db.select().from(documentsTable);
@@ -576,6 +577,71 @@ describe('documents usecases', () => {
content: 'hello world', // The content is extracted and saved in the db
});
});
test('a document.updated event is emitted when the document content is extracted and saved', async () => {
const { db } = await createInMemoryDatabase({
users: [{ id: 'user-1', email: 'user-1@example.com' }],
organizations: [{ id: 'organization-1', name: 'Organization 1' }],
organizationMembers: [{ organizationId: 'organization-1', userId: 'user-1', role: ORGANIZATION_ROLES.OWNER }],
});
const config = overrideConfig({
organizationPlans: { isFreePlanUnlimited: true },
documentsStorage: { driver: 'in-memory' },
});
const documentsRepository = createDocumentsRepository({ db });
const documentsStorageService = createDocumentStorageService({ documentStorageConfig: config.documentsStorage });
const taggingRulesRepository = createTaggingRulesRepository({ db });
const tagsRepository = createTagsRepository({ db });
await db.insert(documentsTable).values({
id: 'document-1',
organizationId: 'organization-1',
originalStorageKey: 'organization-1/originals/document-1.txt',
mimeType: 'text/plain',
name: 'file-1.txt',
originalName: 'file-1.txt',
originalSha256Hash: 'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9',
});
await documentsStorageService.saveFile({
fileStream: createReadableStream({ content: 'hello world' }),
fileName: 'file-1.txt',
mimeType: 'text/plain',
storageKey: 'organization-1/originals/document-1.txt',
});
const webhookRepository = createWebhookRepository({ db });
const documentActivityRepository = createDocumentActivityRepository({ db });
const eventServices = createTestEventServices();
await extractAndSaveDocumentFileContent({
documentId: 'document-1',
organizationId: 'organization-1',
documentsRepository,
documentsStorageService,
taggingRulesRepository,
tagsRepository,
webhookRepository,
documentActivityRepository,
eventServices,
});
expect(
eventServices.getEmittedEvents(),
).to.eql([{
eventName: 'document.updated',
payload: {
changes: {
content: 'hello world',
},
documentId: 'document-1',
organizationId: 'organization-1',
userId: undefined,
},
}]);
});
});
describe('trashDocument', () => {

View File

@@ -468,6 +468,7 @@ export async function extractAndSaveDocumentFileContent({
tagsRepository,
webhookRepository,
documentActivityRepository,
eventServices,
}: {
documentId: string;
ocrLanguages?: string[];
@@ -478,6 +479,7 @@ export async function extractAndSaveDocumentFileContent({
tagsRepository: TagsRepository;
webhookRepository: WebhookRepository;
documentActivityRepository: DocumentActivityRepository;
eventServices: EventServices;
}) {
const { document } = await documentsRepository.getDocumentById({ documentId, organizationId });
@@ -496,7 +498,7 @@ export async function extractAndSaveDocumentFileContent({
const { text } = await extractDocumentText({ file, ocrLanguages });
const { document: updatedDocument } = await documentsRepository.updateDocument({ documentId, organizationId, content: text });
const { document: updatedDocument } = await updateDocument({ documentId, organizationId, changes: { content: text }, documentsRepository, eventServices });
if (isNil(updatedDocument)) {
// This should never happen, but for type safety

View File

@@ -1,4 +1,5 @@
import type { Database } from '../../app/database/database.types';
import type { EventServices } from '../../app/events/events.services';
import type { TaskServices } from '../../tasks/tasks.services';
import type { DocumentStorageService } from '../storage/documents.storage.services';
import { createTaggingRulesRepository } from '../../tagging-rules/tagging-rules.repository';
@@ -8,7 +9,17 @@ import { createDocumentActivityRepository } from '../document-activity/document-
import { createDocumentsRepository } from '../documents.repository';
import { extractAndSaveDocumentFileContent } from '../documents.usecases';
export async function registerExtractDocumentFileContentTask({ taskServices, db, documentsStorageService }: { taskServices: TaskServices; db: Database; documentsStorageService: DocumentStorageService }) {
export async function registerExtractDocumentFileContentTask({
taskServices,
db,
documentsStorageService,
eventServices,
}: {
taskServices: TaskServices;
db: Database;
documentsStorageService: DocumentStorageService;
eventServices: EventServices;
}) {
const taskName = 'extract-document-file-content';
taskServices.registerTask({
@@ -33,6 +44,7 @@ export async function registerExtractDocumentFileContentTask({ taskServices, db,
tagsRepository,
webhookRepository,
documentActivityRepository,
eventServices,
});
},
});

View File

@@ -1,17 +1,14 @@
import type { Database } from '../app/database/database.types';
import type { Config } from '../config/config.types';
import type { DocumentStorageService } from '../documents/storage/documents.storage.services';
import type { TaskServices } from './tasks.services';
import type { GlobalDependencies } from '../app/server.types';
import { registerExtractDocumentFileContentTask } from '../documents/tasks/extract-document-file-content.task';
import { registerHardDeleteExpiredDocumentsTask } from '../documents/tasks/hard-delete-expired-documents.task';
import { registerExpireInvitationsTask } from '../organizations/tasks/expire-invitations.task';
import { registerPurgeExpiredOrganizationsTask } from '../organizations/tasks/purge-expired-organizations.task';
import { registerApplyTaggingRuleToDocumentsTask } from '../tagging-rules/tasks/apply-tagging-rule-to-documents.task';
export async function registerTaskDefinitions({ taskServices, db, config, documentsStorageService }: { taskServices: TaskServices; db: Database; config: Config; documentsStorageService: DocumentStorageService }) {
await registerHardDeleteExpiredDocumentsTask({ taskServices, db, config, documentsStorageService });
await registerExpireInvitationsTask({ taskServices, db, config });
await registerPurgeExpiredOrganizationsTask({ taskServices, db, config, documentsStorageService });
await registerExtractDocumentFileContentTask({ taskServices, db, documentsStorageService });
await registerApplyTaggingRuleToDocumentsTask({ taskServices, db });
export async function registerTaskDefinitions(deps: GlobalDependencies) {
await registerHardDeleteExpiredDocumentsTask(deps);
await registerExpireInvitationsTask(deps);
await registerPurgeExpiredOrganizationsTask(deps);
await registerExtractDocumentFileContentTask(deps);
await registerApplyTaggingRuleToDocumentsTask(deps);
}

View File

@@ -30,20 +30,16 @@ async function startWebMode({ logger, ...dependencies }: { logger: Logger } & Gl
});
}
async function startWorkerMode({ logger, config, db, taskServices, documentsStorageService, eventServices }: { logger: Logger } & GlobalDependencies) {
async function startWorkerMode({ logger, ...deps }: { logger: Logger } & GlobalDependencies) {
const { taskServices, config } = deps;
if (config.ingestionFolder.isEnabled) {
const { startWatchingIngestionFolders } = createIngestionFolderWatcher({
taskServices,
config,
db,
documentsStorageService,
eventServices,
});
const { startWatchingIngestionFolders } = createIngestionFolderWatcher(deps);
await startWatchingIngestionFolders();
}
await registerTaskDefinitions({ taskServices, db, config, documentsStorageService });
await registerTaskDefinitions(deps);
taskServices.start();
logger.info('Worker started');