mirror of
https://github.com/papra-hq/papra.git
synced 2026-01-06 16:33:29 -06:00
feat(documents): use update usecase when content extraction (#678)
This commit is contained in:
committed by
GitHub
parent
2cf86e5968
commit
e3e0078673
@@ -565,6 +565,7 @@ describe('documents usecases', () => {
|
||||
tagsRepository,
|
||||
webhookRepository,
|
||||
documentActivityRepository,
|
||||
eventServices: createTestEventServices(),
|
||||
});
|
||||
|
||||
const documentRecords = await db.select().from(documentsTable);
|
||||
@@ -576,6 +577,71 @@ describe('documents usecases', () => {
|
||||
content: 'hello world', // The content is extracted and saved in the db
|
||||
});
|
||||
});
|
||||
|
||||
test('a document.updated event is emitted when the document content is extracted and saved', async () => {
|
||||
const { db } = await createInMemoryDatabase({
|
||||
users: [{ id: 'user-1', email: 'user-1@example.com' }],
|
||||
organizations: [{ id: 'organization-1', name: 'Organization 1' }],
|
||||
organizationMembers: [{ organizationId: 'organization-1', userId: 'user-1', role: ORGANIZATION_ROLES.OWNER }],
|
||||
});
|
||||
|
||||
const config = overrideConfig({
|
||||
organizationPlans: { isFreePlanUnlimited: true },
|
||||
documentsStorage: { driver: 'in-memory' },
|
||||
});
|
||||
|
||||
const documentsRepository = createDocumentsRepository({ db });
|
||||
const documentsStorageService = createDocumentStorageService({ documentStorageConfig: config.documentsStorage });
|
||||
const taggingRulesRepository = createTaggingRulesRepository({ db });
|
||||
const tagsRepository = createTagsRepository({ db });
|
||||
|
||||
await db.insert(documentsTable).values({
|
||||
id: 'document-1',
|
||||
organizationId: 'organization-1',
|
||||
originalStorageKey: 'organization-1/originals/document-1.txt',
|
||||
mimeType: 'text/plain',
|
||||
name: 'file-1.txt',
|
||||
originalName: 'file-1.txt',
|
||||
originalSha256Hash: 'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9',
|
||||
});
|
||||
|
||||
await documentsStorageService.saveFile({
|
||||
fileStream: createReadableStream({ content: 'hello world' }),
|
||||
fileName: 'file-1.txt',
|
||||
mimeType: 'text/plain',
|
||||
storageKey: 'organization-1/originals/document-1.txt',
|
||||
});
|
||||
|
||||
const webhookRepository = createWebhookRepository({ db });
|
||||
const documentActivityRepository = createDocumentActivityRepository({ db });
|
||||
const eventServices = createTestEventServices();
|
||||
|
||||
await extractAndSaveDocumentFileContent({
|
||||
documentId: 'document-1',
|
||||
organizationId: 'organization-1',
|
||||
documentsRepository,
|
||||
documentsStorageService,
|
||||
taggingRulesRepository,
|
||||
tagsRepository,
|
||||
webhookRepository,
|
||||
documentActivityRepository,
|
||||
eventServices,
|
||||
});
|
||||
|
||||
expect(
|
||||
eventServices.getEmittedEvents(),
|
||||
).to.eql([{
|
||||
eventName: 'document.updated',
|
||||
payload: {
|
||||
changes: {
|
||||
content: 'hello world',
|
||||
},
|
||||
documentId: 'document-1',
|
||||
organizationId: 'organization-1',
|
||||
userId: undefined,
|
||||
},
|
||||
}]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('trashDocument', () => {
|
||||
|
||||
@@ -468,6 +468,7 @@ export async function extractAndSaveDocumentFileContent({
|
||||
tagsRepository,
|
||||
webhookRepository,
|
||||
documentActivityRepository,
|
||||
eventServices,
|
||||
}: {
|
||||
documentId: string;
|
||||
ocrLanguages?: string[];
|
||||
@@ -478,6 +479,7 @@ export async function extractAndSaveDocumentFileContent({
|
||||
tagsRepository: TagsRepository;
|
||||
webhookRepository: WebhookRepository;
|
||||
documentActivityRepository: DocumentActivityRepository;
|
||||
eventServices: EventServices;
|
||||
}) {
|
||||
const { document } = await documentsRepository.getDocumentById({ documentId, organizationId });
|
||||
|
||||
@@ -496,7 +498,7 @@ export async function extractAndSaveDocumentFileContent({
|
||||
|
||||
const { text } = await extractDocumentText({ file, ocrLanguages });
|
||||
|
||||
const { document: updatedDocument } = await documentsRepository.updateDocument({ documentId, organizationId, content: text });
|
||||
const { document: updatedDocument } = await updateDocument({ documentId, organizationId, changes: { content: text }, documentsRepository, eventServices });
|
||||
|
||||
if (isNil(updatedDocument)) {
|
||||
// This should never happen, but for type safety
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { Database } from '../../app/database/database.types';
|
||||
import type { EventServices } from '../../app/events/events.services';
|
||||
import type { TaskServices } from '../../tasks/tasks.services';
|
||||
import type { DocumentStorageService } from '../storage/documents.storage.services';
|
||||
import { createTaggingRulesRepository } from '../../tagging-rules/tagging-rules.repository';
|
||||
@@ -8,7 +9,17 @@ import { createDocumentActivityRepository } from '../document-activity/document-
|
||||
import { createDocumentsRepository } from '../documents.repository';
|
||||
import { extractAndSaveDocumentFileContent } from '../documents.usecases';
|
||||
|
||||
export async function registerExtractDocumentFileContentTask({ taskServices, db, documentsStorageService }: { taskServices: TaskServices; db: Database; documentsStorageService: DocumentStorageService }) {
|
||||
export async function registerExtractDocumentFileContentTask({
|
||||
taskServices,
|
||||
db,
|
||||
documentsStorageService,
|
||||
eventServices,
|
||||
}: {
|
||||
taskServices: TaskServices;
|
||||
db: Database;
|
||||
documentsStorageService: DocumentStorageService;
|
||||
eventServices: EventServices;
|
||||
}) {
|
||||
const taskName = 'extract-document-file-content';
|
||||
|
||||
taskServices.registerTask({
|
||||
@@ -33,6 +44,7 @@ export async function registerExtractDocumentFileContentTask({ taskServices, db,
|
||||
tagsRepository,
|
||||
webhookRepository,
|
||||
documentActivityRepository,
|
||||
eventServices,
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
import type { Database } from '../app/database/database.types';
|
||||
import type { Config } from '../config/config.types';
|
||||
import type { DocumentStorageService } from '../documents/storage/documents.storage.services';
|
||||
import type { TaskServices } from './tasks.services';
|
||||
import type { GlobalDependencies } from '../app/server.types';
|
||||
import { registerExtractDocumentFileContentTask } from '../documents/tasks/extract-document-file-content.task';
|
||||
import { registerHardDeleteExpiredDocumentsTask } from '../documents/tasks/hard-delete-expired-documents.task';
|
||||
import { registerExpireInvitationsTask } from '../organizations/tasks/expire-invitations.task';
|
||||
import { registerPurgeExpiredOrganizationsTask } from '../organizations/tasks/purge-expired-organizations.task';
|
||||
import { registerApplyTaggingRuleToDocumentsTask } from '../tagging-rules/tasks/apply-tagging-rule-to-documents.task';
|
||||
|
||||
export async function registerTaskDefinitions({ taskServices, db, config, documentsStorageService }: { taskServices: TaskServices; db: Database; config: Config; documentsStorageService: DocumentStorageService }) {
|
||||
await registerHardDeleteExpiredDocumentsTask({ taskServices, db, config, documentsStorageService });
|
||||
await registerExpireInvitationsTask({ taskServices, db, config });
|
||||
await registerPurgeExpiredOrganizationsTask({ taskServices, db, config, documentsStorageService });
|
||||
await registerExtractDocumentFileContentTask({ taskServices, db, documentsStorageService });
|
||||
await registerApplyTaggingRuleToDocumentsTask({ taskServices, db });
|
||||
export async function registerTaskDefinitions(deps: GlobalDependencies) {
|
||||
await registerHardDeleteExpiredDocumentsTask(deps);
|
||||
await registerExpireInvitationsTask(deps);
|
||||
await registerPurgeExpiredOrganizationsTask(deps);
|
||||
await registerExtractDocumentFileContentTask(deps);
|
||||
await registerApplyTaggingRuleToDocumentsTask(deps);
|
||||
}
|
||||
|
||||
@@ -30,20 +30,16 @@ async function startWebMode({ logger, ...dependencies }: { logger: Logger } & Gl
|
||||
});
|
||||
}
|
||||
|
||||
async function startWorkerMode({ logger, config, db, taskServices, documentsStorageService, eventServices }: { logger: Logger } & GlobalDependencies) {
|
||||
async function startWorkerMode({ logger, ...deps }: { logger: Logger } & GlobalDependencies) {
|
||||
const { taskServices, config } = deps;
|
||||
|
||||
if (config.ingestionFolder.isEnabled) {
|
||||
const { startWatchingIngestionFolders } = createIngestionFolderWatcher({
|
||||
taskServices,
|
||||
config,
|
||||
db,
|
||||
documentsStorageService,
|
||||
eventServices,
|
||||
});
|
||||
const { startWatchingIngestionFolders } = createIngestionFolderWatcher(deps);
|
||||
|
||||
await startWatchingIngestionFolders();
|
||||
}
|
||||
|
||||
await registerTaskDefinitions({ taskServices, db, config, documentsStorageService });
|
||||
await registerTaskDefinitions(deps);
|
||||
|
||||
taskServices.start();
|
||||
logger.info('Worker started');
|
||||
|
||||
Reference in New Issue
Block a user