Files
puter/src/backend/src/modules/puterai/AWSTextractService.js
T
2025-10-21 14:59:24 -07:00

248 lines
9.2 KiB
JavaScript

/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// METADATA // {"ai-commented":{"service":"claude"}}
const { TextractClient, AnalyzeDocumentCommand, InvalidS3ObjectException } = require('@aws-sdk/client-textract');
const BaseService = require('../../services/BaseService');
const APIError = require('../../api/APIError');
const { Context } = require('../../util/context');
/**
* AWSTextractService class - Provides OCR (Optical Character Recognition) functionality using AWS Textract
* Extends BaseService to integrate with AWS Textract for document analysis and text extraction.
* Implements driver capabilities and puter-ocr interface for document recognition.
* Handles both S3-stored and buffer-based document processing with automatic region management.
*/
class AWSTextractService extends BaseService {
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
get meteringService(){
return this.services.get('meteringService').meteringService;
}
/**
* AWS Textract service for OCR functionality
* Provides document analysis capabilities using AWS Textract API
* Implements interfaces for OCR recognition and driver capabilities
* @extends BaseService
*/
_construct() {
this.clients_ = {};
}
static IMPLEMENTS = {
['driver-capabilities']: {
supports_test_mode(iface, method_name) {
return iface === 'puter-ocr' && method_name === 'recognize';
},
},
['puter-ocr']: {
/**
* Performs OCR recognition on a document using AWS Textract
* @param {Object} params - Recognition parameters
* @param {Object} params.source - The document source to analyze
* @param {boolean} params.test_mode - If true, returns sample test output instead of processing
* @returns {Promise<Object>} Recognition results containing blocks of text with confidence scores
*/
async recognize({ source, test_mode }) {
if ( test_mode ) {
return {
blocks: [
{
type: 'text/textract:WORD',
confidence: 0.9999998807907104,
text: 'Hello',
},
{
type: 'text/puter:sample-output',
confidence: 1,
text: 'The test_mode flag is set to true. This is a sample output.',
},
],
};
}
const resp = await this.analyze_document(source);
// Simplify the response for common interface
const puter_response = {
blocks: [],
};
for ( const block of resp.Blocks ) {
if ( block.BlockType === 'PAGE' ) continue;
if ( block.BlockType === 'CELL' ) continue;
if ( block.BlockType === 'TABLE' ) continue;
if ( block.BlockType === 'MERGED_CELL' ) continue;
if ( block.BlockType === 'LAYOUT_FIGURE' ) continue;
if ( block.BlockType === 'LAYOUT_TEXT' ) continue;
const puter_block = {
type: `text/textract:${block.BlockType}`,
confidence: block.Confidence,
text: block.Text,
};
puter_response.blocks.push(puter_block);
}
return puter_response;
},
},
};
/**
* Creates AWS credentials object for authentication
* @private
* @returns {Object} Object containing AWS access key ID and secret access key
*/
_create_aws_credentials() {
return {
accessKeyId: this.config.aws.access_key,
secretAccessKey: this.config.aws.secret_key,
};
}
_get_client(region) {
if ( ! region ) {
region = this.config.aws?.region ?? this.global_config.aws?.region
?? 'us-west-2';
}
if ( this.clients_[region] ) return this.clients_[region];
this.clients_[region] = new TextractClient({
credentials: this._create_aws_credentials(),
region,
});
return this.clients_[region];
}
/**
* Analyzes a document using AWS Textract to extract text and layout information
* @param {FileFacade} file_facade - Interface to access the document file
* @returns {Promise<Object>} The raw Textract API response containing extracted text blocks
* @throws {Error} If document analysis fails or no suitable input format is available
* @description Processes document through Textract's AnalyzeDocument API with LAYOUT feature.
* Will attempt to use S3 direct access first, falling back to buffer upload if needed.
*/
async analyze_document(file_facade) {
const {
client, document, using_s3,
} = await this._get_client_and_document(file_facade);
const actor = Context.get('actor');
const usageType = 'aws-textract:detect-document-text:page';
const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageType, 1); // allow them to pass if they have enough for 1 page atleast
if ( ! usageAllowed ) {
throw APIError.create('insufficient_funds');
}
const command = new AnalyzeDocumentCommand({
Document: document,
FeatureTypes: [
// 'TABLES',
// 'FORMS',
// 'SIGNATURES',
'LAYOUT',
],
});
let textractResp;
try {
textractResp = await client.send(command);
} catch (e) {
if ( using_s3 && e instanceof InvalidS3ObjectException ) {
const { client, document } =
await this._get_client_and_document(file_facade, true);
const command = new AnalyzeDocumentCommand({
Document: document,
FeatureTypes: [
'LAYOUT',
],
});
textractResp = await client.send(command);
} else {
throw e;
}
}
// Metering integration for Textract OCR usage
// AWS Textract metering: track page count, block count, cost, document size if available
let pageCount = 0;
if ( textractResp.Blocks ) {
for ( const block of textractResp.Blocks ) {
if ( block.BlockType === 'PAGE' ) pageCount += 1;
}
}
this.meteringService.incrementUsage(actor, usageType, pageCount || 1);
return textractResp;
}
/**
* Gets AWS client and document configuration for Textract processing
* @param {Object} file_facade - File facade object containing document source info
* @param {boolean} [force_buffer] - If true, forces using buffer instead of S3
* @returns {Promise<Object>} Object containing:
* - client: Configured AWS Textract client
* - document: Document configuration for Textract
* - using_s3: Boolean indicating if using S3 source
* @throws {APIError} If file does not exist
* @throws {Error} If no suitable input format is available
*/
async _get_client_and_document(file_facade, force_buffer) {
const try_s3info = await file_facade.get('s3-info');
if ( try_s3info && ! force_buffer ) {
console.log('S3 INFO', try_s3info);
return {
using_s3: true,
client: this._get_client(try_s3info.bucket_region),
document: {
S3Object: {
Bucket: try_s3info.bucket,
Name: try_s3info.key,
},
},
};
}
const try_buffer = await file_facade.get('buffer');
if ( try_buffer ) {
return {
client: this._get_client(),
document: {
Bytes: try_buffer,
},
};
}
const fsNode = await file_facade.get('fs-node');
if ( fsNode && ! await fsNode.exists() ) {
throw APIError.create('subject_does_not_exist');
}
throw new Error('No suitable input for Textract');
}
}
module.exports = {
AWSTextractService,
};