Add Mistral OCR support and imrpove img2txt API (#1957)

Introduces Mistral OCR integration in backend and updates the `AIInterfaceService` and `MistralAIService` to support new OCR options and response normalization. Updates metering cost map for OCR and annotation usage. Refactors `DriverService` to support interface-specific service aliases. Expands the puter.js `AI.img2txt` API to support flexible options and provider selection, including Mistral OCR.
This commit is contained in:
Nariman Jelveh
2025-11-11 17:06:44 -08:00
committed by GitHub
parent fdc8582dde
commit 7b93d7c15b
8 changed files with 374 additions and 66 deletions
+1 -26
View File
@@ -904,7 +904,6 @@
"integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@babel/code-frame": "^7.27.1",
"@babel/generator": "^7.28.3",
@@ -3048,7 +3047,6 @@
"resolved": "https://registry.npmjs.org/@jimp/custom/-/custom-0.22.12.tgz",
"integrity": "sha512-xcmww1O/JFP2MrlGUMd3Q78S3Qu6W3mYTXYuIqFq33EorgYHV/HqymHfXy9GjiCJ7OI+7lWx6nYFOzU7M4rd1Q==",
"license": "MIT",
"peer": true,
"dependencies": {
"@jimp/core": "^0.22.12"
}
@@ -3085,7 +3083,6 @@
"resolved": "https://registry.npmjs.org/@jimp/plugin-blit/-/plugin-blit-0.22.12.tgz",
"integrity": "sha512-xslz2ZoFZOPLY8EZ4dC29m168BtDx95D6K80TzgUi8gqT7LY6CsajWO0FAxDwHz6h0eomHMfyGX0stspBrTKnQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"@jimp/utils": "^0.22.12"
},
@@ -3098,7 +3095,6 @@
"resolved": "https://registry.npmjs.org/@jimp/plugin-blur/-/plugin-blur-0.22.12.tgz",
"integrity": "sha512-S0vJADTuh1Q9F+cXAwFPlrKWzDj2F9t/9JAbUvaaDuivpyWuImEKXVz5PUZw2NbpuSHjwssbTpOZ8F13iJX4uw==",
"license": "MIT",
"peer": true,
"dependencies": {
"@jimp/utils": "^0.22.12"
},
@@ -3123,7 +3119,6 @@
"resolved": "https://registry.npmjs.org/@jimp/plugin-color/-/plugin-color-0.22.12.tgz",
"integrity": "sha512-xImhTE5BpS8xa+mAN6j4sMRWaUgUDLoaGHhJhpC+r7SKKErYDR0WQV4yCE4gP+N0gozD0F3Ka1LUSaMXrn7ZIA==",
"license": "MIT",
"peer": true,
"dependencies": {
"@jimp/utils": "^0.22.12",
"tinycolor2": "^1.6.0"
@@ -3167,7 +3162,6 @@
"resolved": "https://registry.npmjs.org/@jimp/plugin-crop/-/plugin-crop-0.22.12.tgz",
"integrity": "sha512-FNuUN0OVzRCozx8XSgP9MyLGMxNHHJMFt+LJuFjn1mu3k0VQxrzqbN06yIl46TVejhyAhcq5gLzqmSCHvlcBVw==",
"license": "MIT",
"peer": true,
"dependencies": {
"@jimp/utils": "^0.22.12"
},
@@ -3291,7 +3285,6 @@
"resolved": "https://registry.npmjs.org/@jimp/plugin-resize/-/plugin-resize-0.22.12.tgz",
"integrity": "sha512-3NyTPlPbTnGKDIbaBgQ3HbE6wXbAlFfxHVERmrbqAi8R3r6fQPxpCauA8UVDnieg5eo04D0T8nnnNIX//i/sXg==",
"license": "MIT",
"peer": true,
"dependencies": {
"@jimp/utils": "^0.22.12"
},
@@ -3304,7 +3297,6 @@
"resolved": "https://registry.npmjs.org/@jimp/plugin-rotate/-/plugin-rotate-0.22.12.tgz",
"integrity": "sha512-9YNEt7BPAFfTls2FGfKBVgwwLUuKqy+E8bDGGEsOqHtbuhbshVGxN2WMZaD4gh5IDWvR+emmmPPWGgaYNYt1gA==",
"license": "MIT",
"peer": true,
"dependencies": {
"@jimp/utils": "^0.22.12"
},
@@ -3320,7 +3312,6 @@
"resolved": "https://registry.npmjs.org/@jimp/plugin-scale/-/plugin-scale-0.22.12.tgz",
"integrity": "sha512-dghs92qM6MhHj0HrV2qAwKPMklQtjNpoYgAB94ysYpsXslhRTiPisueSIELRwZGEr0J0VUxpUY7HgJwlSIgGZw==",
"license": "MIT",
"peer": true,
"dependencies": {
"@jimp/utils": "^0.22.12"
},
@@ -3628,7 +3619,6 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.4.1.tgz",
"integrity": "sha512-O2yRJce1GOc6PAy3QxFM4NzFiWzvScDC1/5ihYBL6BUEVdq0XMWN01sppE+H6bBXbaFYipjwFLEWLg5PaSOThA==",
"license": "Apache-2.0",
"peer": true,
"engines": {
"node": ">=8.0.0"
}
@@ -3638,7 +3628,6 @@
"resolved": "https://registry.npmjs.org/@opentelemetry/api-logs/-/api-logs-0.49.1.tgz",
"integrity": "sha512-kaNl/T7WzyMUQHQlVq7q0oV4Kev6+0xFwqzofryC66jgGMacd0QH5TwfpbUwSTby+SdAdprAe5UKMvBw4tKS5Q==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@opentelemetry/api": "^1.0.0"
},
@@ -7273,7 +7262,6 @@
"integrity": "sha512-6JSSaBZmsKvEkbRUkf7Zj7dru/8ZCrJxAqArcLaVMee5907JdtEbKGsZ7zNiIm/UAkpGUkaSMZEXShnN2D1HZA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@typescript-eslint/scope-manager": "8.46.1",
"@typescript-eslint/types": "8.46.1",
@@ -7816,8 +7804,7 @@
"version": "5.5.0",
"resolved": "https://registry.npmjs.org/@xterm/xterm/-/xterm-5.5.0.tgz",
"integrity": "sha512-hqJHYaQb5OptNunnyAnkHyM8aCjZ1MEIDTQu1iIbbTD/xops91NB5yq1ZK/dC2JDbVWtF23zUtl9JE2NqwT87A==",
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/@xtuc/ieee754": {
"version": "1.2.0",
@@ -7867,7 +7854,6 @@
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"license": "MIT",
"peer": true,
"bin": {
"acorn": "bin/acorn"
},
@@ -8557,7 +8543,6 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"baseline-browser-mapping": "^2.8.9",
"caniuse-lite": "^1.0.30001746",
@@ -8812,7 +8797,6 @@
"resolved": "https://registry.npmjs.org/chai/-/chai-5.3.3.tgz",
"integrity": "sha512-4zNhdJD/iOjSH0A05ea+Ke6MU5mmpQcbQsSOkgdaUMJ9zTlDTD/GYlwohmIE2u0gaxHYiVHEn1Fw9mZ/ktJWgw==",
"license": "MIT",
"peer": true,
"dependencies": {
"assertion-error": "^2.0.1",
"check-error": "^2.1.1",
@@ -10549,7 +10533,6 @@
"resolved": "https://registry.npmjs.org/eslint/-/eslint-9.37.0.tgz",
"integrity": "sha512-XyLmROnACWqSxiGYArdef1fItQd47weqB7iwtfr9JHwRrqIXZdcFMvvEcL9xHCmL0SNsOvF0c42lWyM1U5dgig==",
"license": "MIT",
"peer": true,
"dependencies": {
"@eslint-community/eslint-utils": "^4.8.0",
"@eslint-community/regexpp": "^4.12.1",
@@ -16322,7 +16305,6 @@
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
"integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
"license": "MIT",
"peer": true,
"dependencies": {
"fast-deep-equal": "^3.1.3",
"fast-uri": "^3.0.1",
@@ -18003,7 +17985,6 @@
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
"license": "Apache-2.0",
"peer": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
@@ -18230,7 +18211,6 @@
"integrity": "sha512-4nVGliEpxmhCL8DslSAUdxlB6+SMrhB0a1v5ijlh1xB1nEPuy1mxaHxysVucLHuWryAxLWg6a5ei+U4TLn/rFg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "^0.25.0",
"fdir": "^6.5.0",
@@ -18496,7 +18476,6 @@
"resolved": "https://registry.npmjs.org/webpack/-/webpack-5.102.1.tgz",
"integrity": "sha512-7h/weGm9d/ywQ6qzJ+Xy+r9n/3qgp/thalBbpOi5i223dPXKi04IBtqPN9nTd+jBc7QKfvDbaBnFipYp4sJAUQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"@types/eslint-scope": "^3.7.7",
"@types/estree": "^1.0.8",
@@ -18546,7 +18525,6 @@
"integrity": "sha512-pIDJHIEI9LR0yxHXQ+Qh95k2EvXpWzZ5l+d+jIo+RdSm9MiHfzazIxwwni/p7+x4eJZuvG1AJwgC4TNQ7NRgsg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@discoveryjs/json-ext": "^0.5.0",
"@webpack-cli/configtest": "^2.1.1",
@@ -18778,7 +18756,6 @@
"resolved": "https://registry.npmjs.org/winston/-/winston-3.18.3.tgz",
"integrity": "sha512-NoBZauFNNWENgsnC9YpgyYwOVrl2m58PpQ8lNHjV3kosGs7KJ7Npk9pCUE+WJlawVSe8mykWDKWFSVfs3QO9ww==",
"license": "MIT",
"peer": true,
"dependencies": {
"@colors/colors": "^1.6.0",
"@dabh/diagnostics": "^2.0.8",
@@ -18983,7 +18960,6 @@
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=10.0.0"
},
@@ -19317,7 +19293,6 @@
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
"license": "MIT",
"peer": true,
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
@@ -122,7 +122,8 @@ class AIChatService extends BaseService {
const svc_driver = this.services.get('driver');
for ( const provider of this.providers ) {
svc_driver.register_service_alias('ai-chat',
provider.service_name);
provider.service_name,
{ iface: 'puter-chat-completion' });
}
}
@@ -47,6 +47,35 @@ class AIInterfaceService extends BaseService {
source: {
type: 'file',
},
model: {
type: 'string',
optional: true,
},
pages: {
type: 'json',
subtype: 'array',
optional: true,
},
includeImageBase64: {
type: 'flag',
optional: true,
},
imageLimit: {
type: 'number',
optional: true,
},
imageMinSize: {
type: 'number',
optional: true,
},
bboxAnnotationFormat: {
type: 'json',
optional: true,
},
documentAnnotationFormat: {
type: 'json',
optional: true,
},
},
result: {
type: {
@@ -22,6 +22,9 @@ const BaseService = require('../../services/BaseService');
const axios = require('axios');
const OpenAIUtil = require('./lib/OpenAIUtil');
const { Context } = require('../../util/context');
const APIError = require('../../api/APIError');
const mime = require('mime-types');
const path = require('path');
/**
* MistralAIService class extends BaseService to provide integration with the Mistral AI API.
@@ -310,6 +313,69 @@ class MistralAIService extends BaseService {
return 'mistral-large-latest';
}
static IMPLEMENTS = {
'driver-capabilities': {
supports_test_mode(iface, method_name) {
return iface === 'puter-ocr' && method_name === 'recognize';
},
},
'puter-ocr': {
async recognize({
source,
model,
pages,
includeImageBase64,
imageLimit,
imageMinSize,
bboxAnnotationFormat,
documentAnnotationFormat,
test_mode,
}) {
if ( test_mode ) {
return this._sampleOcrResponse();
}
if ( ! source ) {
throw APIError.create('missing_required_argument', {
interface_name: 'puter-ocr',
method_name: 'recognize',
arg_name: 'source',
});
}
const document = await this._buildDocumentChunkFromSource(source);
const payload = {
model: model ?? 'mistral-ocr-latest',
document,
};
if ( Array.isArray(pages) ) {
payload.pages = pages;
}
if ( typeof includeImageBase64 === 'boolean' ) {
payload.includeImageBase64 = includeImageBase64;
}
if ( typeof imageLimit === 'number' ) {
payload.imageLimit = imageLimit;
}
if ( typeof imageMinSize === 'number' ) {
payload.imageMinSize = imageMinSize;
}
if ( bboxAnnotationFormat !== undefined ) {
payload.bboxAnnotationFormat = bboxAnnotationFormat;
}
if ( documentAnnotationFormat !== undefined ) {
payload.documentAnnotationFormat = documentAnnotationFormat;
}
const response = await this.client.ocr.process(payload);
const annotationsRequested = (
payload.documentAnnotationFormat !== undefined ||
payload.bboxAnnotationFormat !== undefined
);
this._recordOcrUsage(response, payload.model, {
annotationsRequested,
});
return this._normalizeOcrResponse(response);
},
},
'puter-chat-completion': {
/**
* Returns a list of available models and their details.
@@ -399,6 +465,157 @@ class MistralAIService extends BaseService {
},
},
};
async _buildDocumentChunkFromSource(fileFacade) {
const dataUrl = await this._safeFileValue(fileFacade, 'data_url');
const webUrl = await this._safeFileValue(fileFacade, 'web_url');
const filePath = await this._safeFileValue(fileFacade, 'path');
const fsNode = await this._safeFileValue(fileFacade, 'fs-node');
const fileName = filePath ? path.basename(filePath) : fsNode?.name;
const inferredMime = this._inferMimeFromName(fileName);
if ( webUrl ) {
return this._chunkFromUrl(webUrl, fileName, inferredMime);
}
if ( dataUrl ) {
const mimeFromUrl = this._extractMimeFromDataUrl(dataUrl) ?? inferredMime;
return this._chunkFromUrl(dataUrl, fileName, mimeFromUrl);
}
const buffer = await this._safeFileValue(fileFacade, 'buffer');
if ( ! buffer ) {
throw APIError.create('field_invalid', null, {
key: 'source',
expected: 'file, data URL, or web URL',
});
}
const mimeType = inferredMime ?? 'application/octet-stream';
const generatedDataUrl = this._createDataUrl(buffer, mimeType);
return this._chunkFromUrl(generatedDataUrl, fileName, mimeType);
}
async _safeFileValue(fileFacade, key) {
if ( ! fileFacade || typeof fileFacade.get !== 'function' ) return undefined;
const maybeCache = fileFacade.values?.values;
if ( maybeCache && Object.prototype.hasOwnProperty.call(maybeCache, key) ) {
return maybeCache[key];
}
try {
return await fileFacade.get(key);
} catch (e) {
return undefined;
}
}
_chunkFromUrl(url, fileName, mimeType) {
const lowerName = fileName?.toLowerCase();
const urlLooksPdf = /\.pdf($|\?)/i.test(url);
const mimeLooksPdf = mimeType?.includes('pdf');
const isPdf = mimeLooksPdf || urlLooksPdf || (lowerName ? lowerName.endsWith('.pdf') : false);
if ( isPdf ) {
const chunk = {
type: 'document_url',
documentUrl: url,
};
if ( fileName ) {
chunk.documentName = fileName;
}
return chunk;
}
return {
type: 'image_url',
imageUrl: {
url,
},
};
}
_inferMimeFromName(name) {
if ( ! name ) return undefined;
return mime.lookup(name) || undefined;
}
_extractMimeFromDataUrl(url) {
if ( typeof url !== 'string' ) return undefined;
const match = url.match(/^data:([^;,]+)[;,]/);
return match ? match[1] : undefined;
}
_createDataUrl(buffer, mimeType) {
return `data:${mimeType || 'application/octet-stream'};base64,${buffer.toString('base64')}`;
}
_normalizeOcrResponse(response) {
if ( ! response ) return {};
const normalized = {
model: response.model,
pages: response.pages ?? [],
usage_info: response.usageInfo,
};
const blocks = [];
if ( Array.isArray(response.pages) ) {
for ( const page of response.pages ) {
if ( typeof page?.markdown !== 'string' ) continue;
const lines = page.markdown.split('\n').map(line => line.trim()).filter(Boolean);
for ( const line of lines ) {
blocks.push({
type: 'text/mistral:LINE',
text: line,
page: page.index,
});
}
}
}
normalized.blocks = blocks;
if ( blocks.length ) {
normalized.text = blocks.map(block => block.text).join('\n');
} else if ( Array.isArray(response.pages) ) {
normalized.text = response.pages.map(page => page?.markdown || '').join('\n\n').trim();
}
return normalized;
}
_recordOcrUsage(response, model, { annotationsRequested } = {}) {
try {
if ( ! this.meteringService ) return;
const actor = Context.get('actor');
if ( ! actor ) return;
const pagesProcessed =
response?.usageInfo?.pagesProcessed ??
(Array.isArray(response?.pages) ? response.pages.length : 1);
this.meteringService.incrementUsage(actor, 'mistral-ocr:ocr:page', pagesProcessed);
if ( annotationsRequested ) {
this.meteringService.incrementUsage(actor, 'mistral-ocr:annotations:page', pagesProcessed);
}
} catch (e) {
// ignore metering failures to avoid blocking OCR results
}
}
_sampleOcrResponse() {
const markdown = 'Sample OCR output (test mode).';
return {
model: 'mistral-ocr-latest',
pages: [
{
index: 0,
markdown,
images: [],
dimensions: null,
},
],
blocks: [
{
type: 'text/mistral:LINE',
text: markdown,
page: 0,
},
],
text: markdown,
};
}
}
module.exports = { MistralAIService };
@@ -57,4 +57,9 @@ export const MISTRAL_COST_MAP = {
'mistral:open-mistral-nemo:completion_tokens': 10,
'mistral:mistral-ocr-latest:prompt_tokens': 100,
'mistral:mistral-ocr-latest:completion_tokens': 300,
};
// OCR page-based pricing (values in microcents/page)
// $1 / 1000 pages -> $0.001 per page -> 100000 microcents
'mistral-ocr:ocr:page': 100000,
// $3 / 1000 pages -> $0.003 per page -> 300000 microcents
'mistral-ocr:annotations:page': 300000,
};
@@ -86,6 +86,7 @@ class DriverService extends BaseService {
this.interface_to_implementation = {};
this.interface_to_test_service = {};
this.service_aliases = {};
this.interface_service_aliases = {};
}
_init () {
@@ -121,13 +122,12 @@ class DriverService extends BaseService {
},
'no_implementation_available': {
status: 502,
message: ({
iface,
interface_name,
driver
}) => `No implementation available for ` +
(iface ?? interface_name) ? 'interface' : 'driver' +
' ' + quot(iface ?? interface_name ?? driver) + '.',
message: ({ iface, interface_name, driver }) => {
const has_interface = (iface ?? interface_name) !== undefined;
const target_type = has_interface ? 'interface' : 'driver';
const target_name = quot(iface ?? interface_name ?? driver);
return `No implementation available for ${target_type} ${target_name}.`;
},
},
});
}
@@ -219,7 +219,15 @@ class DriverService extends BaseService {
this.interface_to_test_service[interface_name] = service_name;
}
register_service_alias (service_name, alias) {
register_service_alias (service_name, alias, options = {}) {
const iface = options.iface;
if ( iface ) {
if ( ! this.interface_service_aliases[iface] ) {
this.interface_service_aliases[iface] = {};
}
this.interface_service_aliases[iface][alias] = service_name;
return;
}
this.service_aliases[alias] = service_name;
}
@@ -323,7 +331,12 @@ class DriverService extends BaseService {
response_metadata: {},
test_mode,
};
driver = this.service_aliases[driver] ?? driver;
const iface_aliases = this.interface_service_aliases[iface];
if ( iface_aliases && iface_aliases[driver] ) {
driver = iface_aliases[driver];
} else {
driver = this.service_aliases[driver] ?? driver;
}
const service = this.get_service_or_throw_(driver, iface);
+16
View File
@@ -47,6 +47,9 @@ interface AI {
chat(messages: ChatMessage[], testMode?: boolean, options?: NonStreamingChatOptions): Promise<ChatResponse>;
img2txt(image: string | File | Blob, testMode?: boolean): Promise<string>;
img2txt(image: string | File | Blob, options?: Img2TxtOptions): Promise<string>;
img2txt(image: string | File | Blob, testMode?: boolean, options?: Img2TxtOptions): Promise<string>;
img2txt(options: Img2TxtOptions): Promise<string>;
txt2img(prompt: string, testMode?: boolean): Promise<HTMLImageElement>;
txt2img(prompt: string, options?: Txt2ImgOptions): Promise<HTMLImageElement>;
@@ -148,6 +151,19 @@ interface Txt2VidOptions {
test_mode?: boolean;
}
interface Img2TxtOptions {
source?: string | File | Blob;
provider?: 'aws-textract' | 'mistral';
model?: string;
pages?: number[];
includeImageBase64?: boolean;
imageLimit?: number;
imageMinSize?: number;
bboxAnnotationFormat?: Record<string, unknown>;
documentAnnotationFormat?: Record<string, unknown>;
testMode?: boolean;
}
interface Txt2SpeechOptions {
language?: string;
voice?: string;
+81 -29
View File
@@ -118,48 +118,100 @@ class AI{
}
img2txt = async (...args) => {
let MAX_INPUT_SIZE = 10 * 1024 * 1024;
let options = {};
let testMode = false;
// Check that the argument is not undefined or null
if(!args){
throw({message: 'Arguments are required', code: 'arguments_required'});
const MAX_INPUT_SIZE = 10 * 1024 * 1024;
if (!args || args.length === 0) {
throw { message: 'Arguments are required', code: 'arguments_required' };
}
// if argument is string transform it to the object that the API expects
if (typeof args[0] === 'string' || args[0] instanceof Blob) {
const isBlobLike = (value) => {
if (typeof Blob === 'undefined') return false;
return value instanceof Blob || (typeof File !== 'undefined' && value instanceof File);
};
const isPlainObject = (value) => value && typeof value === 'object' && !Array.isArray(value) && !isBlobLike(value);
const normalizeProvider = (value) => {
if (!value) return 'aws-textract';
const normalized = String(value).toLowerCase();
if (['aws', 'textract', 'aws-textract'].includes(normalized)) return 'aws-textract';
if (['mistral', 'mistral-ocr'].includes(normalized)) return 'mistral';
return 'aws-textract';
};
let options = {};
if (isPlainObject(args[0])) {
options = { ...args[0] };
} else {
options.source = args[0];
}
// if input is a blob, transform it to a data URI
if (args[0].source instanceof Blob) {
options.source = await utils.blobToDataUri(args[0].source);
let testMode = false;
for (let i = 1; i < args.length; i++) {
const value = args[i];
if (typeof value === 'boolean') {
testMode = testMode || value;
} else if (isPlainObject(value)) {
options = { ...options, ...value };
}
}
// check input size
if (options.source.length > this.MAX_INPUT_SIZE) {
if (typeof options.testMode === 'boolean') {
testMode = options.testMode;
}
const provider = normalizeProvider(options.provider);
delete options.provider;
delete options.testMode;
if (!options.source) {
throw { message: 'Source is required', code: 'source_required' };
}
if (isBlobLike(options.source)) {
options.source = await utils.blobToDataUri(options.source);
} else if (options.source?.source && isBlobLike(options.source.source)) {
// Support shape { source: Blob }
options.source = await utils.blobToDataUri(options.source.source);
}
if (typeof options.source === 'string' &&
options.source.startsWith('data:') &&
options.source.length > MAX_INPUT_SIZE) {
throw { message: 'Input size cannot be larger than ' + MAX_INPUT_SIZE, code: 'input_too_large' };
}
// determine if test mode is enabled
if (typeof args[1] === 'boolean' && args[1] === true ||
typeof args[2] === 'boolean' && args[2] === true ||
typeof args[3] === 'boolean' && args[3] === true) {
testMode = true;
}
return await utils.make_driver_method(['source'], 'puter-ocr', 'aws-textract', 'recognize', {
test_mode: testMode ?? false,
transform: async (result) => {
const toText = (result) => {
if (!result) return '';
if (Array.isArray(result.blocks) && result.blocks.length) {
let str = '';
for (let i = 0; i < result?.blocks?.length; i++) {
if("text/textract:LINE" === result.blocks[i].type)
str += result.blocks[i].text + "\n";
for (const block of result.blocks) {
if (typeof block?.text !== 'string') continue;
if (!block.type || block.type === 'text/textract:LINE' || block.type.startsWith('text/')) {
str += block.text + '\n';
}
}
return str;
if (str.trim()) return str;
}
}).call(this, options);
if (Array.isArray(result.pages) && result.pages.length) {
const markdown = result.pages
.map(page => (page?.markdown || '').trim())
.filter(Boolean)
.join('\n\n');
if (markdown.trim()) return markdown;
}
if (typeof result.document_annotation === 'string') {
return result.document_annotation;
}
if (typeof result.text === 'string') {
return result.text;
}
return '';
};
const driverCall = utils.make_driver_method(['source'], 'puter-ocr', provider, 'recognize', {
test_mode: testMode ?? false,
transform: async (result) => toText(result),
});
return await driverCall.call(this, options);
}
txt2speech = async (...args) => {