mirror of
https://github.com/HeyPuter/puter.git
synced 2026-04-30 03:59:33 -05:00
Add Mistral OCR support and imrpove img2txt API (#1957)
Introduces Mistral OCR integration in backend and updates the `AIInterfaceService` and `MistralAIService` to support new OCR options and response normalization. Updates metering cost map for OCR and annotation usage. Refactors `DriverService` to support interface-specific service aliases. Expands the puter.js `AI.img2txt` API to support flexible options and provider selection, including Mistral OCR.
This commit is contained in:
Generated
+1
-26
@@ -904,7 +904,6 @@
|
||||
"integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@babel/code-frame": "^7.27.1",
|
||||
"@babel/generator": "^7.28.3",
|
||||
@@ -3048,7 +3047,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@jimp/custom/-/custom-0.22.12.tgz",
|
||||
"integrity": "sha512-xcmww1O/JFP2MrlGUMd3Q78S3Qu6W3mYTXYuIqFq33EorgYHV/HqymHfXy9GjiCJ7OI+7lWx6nYFOzU7M4rd1Q==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@jimp/core": "^0.22.12"
|
||||
}
|
||||
@@ -3085,7 +3083,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@jimp/plugin-blit/-/plugin-blit-0.22.12.tgz",
|
||||
"integrity": "sha512-xslz2ZoFZOPLY8EZ4dC29m168BtDx95D6K80TzgUi8gqT7LY6CsajWO0FAxDwHz6h0eomHMfyGX0stspBrTKnQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@jimp/utils": "^0.22.12"
|
||||
},
|
||||
@@ -3098,7 +3095,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@jimp/plugin-blur/-/plugin-blur-0.22.12.tgz",
|
||||
"integrity": "sha512-S0vJADTuh1Q9F+cXAwFPlrKWzDj2F9t/9JAbUvaaDuivpyWuImEKXVz5PUZw2NbpuSHjwssbTpOZ8F13iJX4uw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@jimp/utils": "^0.22.12"
|
||||
},
|
||||
@@ -3123,7 +3119,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@jimp/plugin-color/-/plugin-color-0.22.12.tgz",
|
||||
"integrity": "sha512-xImhTE5BpS8xa+mAN6j4sMRWaUgUDLoaGHhJhpC+r7SKKErYDR0WQV4yCE4gP+N0gozD0F3Ka1LUSaMXrn7ZIA==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@jimp/utils": "^0.22.12",
|
||||
"tinycolor2": "^1.6.0"
|
||||
@@ -3167,7 +3162,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@jimp/plugin-crop/-/plugin-crop-0.22.12.tgz",
|
||||
"integrity": "sha512-FNuUN0OVzRCozx8XSgP9MyLGMxNHHJMFt+LJuFjn1mu3k0VQxrzqbN06yIl46TVejhyAhcq5gLzqmSCHvlcBVw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@jimp/utils": "^0.22.12"
|
||||
},
|
||||
@@ -3291,7 +3285,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@jimp/plugin-resize/-/plugin-resize-0.22.12.tgz",
|
||||
"integrity": "sha512-3NyTPlPbTnGKDIbaBgQ3HbE6wXbAlFfxHVERmrbqAi8R3r6fQPxpCauA8UVDnieg5eo04D0T8nnnNIX//i/sXg==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@jimp/utils": "^0.22.12"
|
||||
},
|
||||
@@ -3304,7 +3297,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@jimp/plugin-rotate/-/plugin-rotate-0.22.12.tgz",
|
||||
"integrity": "sha512-9YNEt7BPAFfTls2FGfKBVgwwLUuKqy+E8bDGGEsOqHtbuhbshVGxN2WMZaD4gh5IDWvR+emmmPPWGgaYNYt1gA==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@jimp/utils": "^0.22.12"
|
||||
},
|
||||
@@ -3320,7 +3312,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@jimp/plugin-scale/-/plugin-scale-0.22.12.tgz",
|
||||
"integrity": "sha512-dghs92qM6MhHj0HrV2qAwKPMklQtjNpoYgAB94ysYpsXslhRTiPisueSIELRwZGEr0J0VUxpUY7HgJwlSIgGZw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@jimp/utils": "^0.22.12"
|
||||
},
|
||||
@@ -3628,7 +3619,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.4.1.tgz",
|
||||
"integrity": "sha512-O2yRJce1GOc6PAy3QxFM4NzFiWzvScDC1/5ihYBL6BUEVdq0XMWN01sppE+H6bBXbaFYipjwFLEWLg5PaSOThA==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
@@ -3638,7 +3628,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/api-logs/-/api-logs-0.49.1.tgz",
|
||||
"integrity": "sha512-kaNl/T7WzyMUQHQlVq7q0oV4Kev6+0xFwqzofryC66jgGMacd0QH5TwfpbUwSTby+SdAdprAe5UKMvBw4tKS5Q==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@opentelemetry/api": "^1.0.0"
|
||||
},
|
||||
@@ -7273,7 +7262,6 @@
|
||||
"integrity": "sha512-6JSSaBZmsKvEkbRUkf7Zj7dru/8ZCrJxAqArcLaVMee5907JdtEbKGsZ7zNiIm/UAkpGUkaSMZEXShnN2D1HZA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@typescript-eslint/scope-manager": "8.46.1",
|
||||
"@typescript-eslint/types": "8.46.1",
|
||||
@@ -7816,8 +7804,7 @@
|
||||
"version": "5.5.0",
|
||||
"resolved": "https://registry.npmjs.org/@xterm/xterm/-/xterm-5.5.0.tgz",
|
||||
"integrity": "sha512-hqJHYaQb5OptNunnyAnkHyM8aCjZ1MEIDTQu1iIbbTD/xops91NB5yq1ZK/dC2JDbVWtF23zUtl9JE2NqwT87A==",
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@xtuc/ieee754": {
|
||||
"version": "1.2.0",
|
||||
@@ -7867,7 +7854,6 @@
|
||||
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
|
||||
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"acorn": "bin/acorn"
|
||||
},
|
||||
@@ -8557,7 +8543,6 @@
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"baseline-browser-mapping": "^2.8.9",
|
||||
"caniuse-lite": "^1.0.30001746",
|
||||
@@ -8812,7 +8797,6 @@
|
||||
"resolved": "https://registry.npmjs.org/chai/-/chai-5.3.3.tgz",
|
||||
"integrity": "sha512-4zNhdJD/iOjSH0A05ea+Ke6MU5mmpQcbQsSOkgdaUMJ9zTlDTD/GYlwohmIE2u0gaxHYiVHEn1Fw9mZ/ktJWgw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"assertion-error": "^2.0.1",
|
||||
"check-error": "^2.1.1",
|
||||
@@ -10549,7 +10533,6 @@
|
||||
"resolved": "https://registry.npmjs.org/eslint/-/eslint-9.37.0.tgz",
|
||||
"integrity": "sha512-XyLmROnACWqSxiGYArdef1fItQd47weqB7iwtfr9JHwRrqIXZdcFMvvEcL9xHCmL0SNsOvF0c42lWyM1U5dgig==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@eslint-community/eslint-utils": "^4.8.0",
|
||||
"@eslint-community/regexpp": "^4.12.1",
|
||||
@@ -16322,7 +16305,6 @@
|
||||
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
|
||||
"integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"fast-deep-equal": "^3.1.3",
|
||||
"fast-uri": "^3.0.1",
|
||||
@@ -18003,7 +17985,6 @@
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
|
||||
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
@@ -18230,7 +18211,6 @@
|
||||
"integrity": "sha512-4nVGliEpxmhCL8DslSAUdxlB6+SMrhB0a1v5ijlh1xB1nEPuy1mxaHxysVucLHuWryAxLWg6a5ei+U4TLn/rFg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
"fdir": "^6.5.0",
|
||||
@@ -18496,7 +18476,6 @@
|
||||
"resolved": "https://registry.npmjs.org/webpack/-/webpack-5.102.1.tgz",
|
||||
"integrity": "sha512-7h/weGm9d/ywQ6qzJ+Xy+r9n/3qgp/thalBbpOi5i223dPXKi04IBtqPN9nTd+jBc7QKfvDbaBnFipYp4sJAUQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@types/eslint-scope": "^3.7.7",
|
||||
"@types/estree": "^1.0.8",
|
||||
@@ -18546,7 +18525,6 @@
|
||||
"integrity": "sha512-pIDJHIEI9LR0yxHXQ+Qh95k2EvXpWzZ5l+d+jIo+RdSm9MiHfzazIxwwni/p7+x4eJZuvG1AJwgC4TNQ7NRgsg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@discoveryjs/json-ext": "^0.5.0",
|
||||
"@webpack-cli/configtest": "^2.1.1",
|
||||
@@ -18778,7 +18756,6 @@
|
||||
"resolved": "https://registry.npmjs.org/winston/-/winston-3.18.3.tgz",
|
||||
"integrity": "sha512-NoBZauFNNWENgsnC9YpgyYwOVrl2m58PpQ8lNHjV3kosGs7KJ7Npk9pCUE+WJlawVSe8mykWDKWFSVfs3QO9ww==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@colors/colors": "^1.6.0",
|
||||
"@dabh/diagnostics": "^2.0.8",
|
||||
@@ -18983,7 +18960,6 @@
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
||||
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
@@ -19317,7 +19293,6 @@
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
||||
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
|
||||
@@ -122,7 +122,8 @@ class AIChatService extends BaseService {
|
||||
const svc_driver = this.services.get('driver');
|
||||
for ( const provider of this.providers ) {
|
||||
svc_driver.register_service_alias('ai-chat',
|
||||
provider.service_name);
|
||||
provider.service_name,
|
||||
{ iface: 'puter-chat-completion' });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -47,6 +47,35 @@ class AIInterfaceService extends BaseService {
|
||||
source: {
|
||||
type: 'file',
|
||||
},
|
||||
model: {
|
||||
type: 'string',
|
||||
optional: true,
|
||||
},
|
||||
pages: {
|
||||
type: 'json',
|
||||
subtype: 'array',
|
||||
optional: true,
|
||||
},
|
||||
includeImageBase64: {
|
||||
type: 'flag',
|
||||
optional: true,
|
||||
},
|
||||
imageLimit: {
|
||||
type: 'number',
|
||||
optional: true,
|
||||
},
|
||||
imageMinSize: {
|
||||
type: 'number',
|
||||
optional: true,
|
||||
},
|
||||
bboxAnnotationFormat: {
|
||||
type: 'json',
|
||||
optional: true,
|
||||
},
|
||||
documentAnnotationFormat: {
|
||||
type: 'json',
|
||||
optional: true,
|
||||
},
|
||||
},
|
||||
result: {
|
||||
type: {
|
||||
|
||||
@@ -22,6 +22,9 @@ const BaseService = require('../../services/BaseService');
|
||||
const axios = require('axios');
|
||||
const OpenAIUtil = require('./lib/OpenAIUtil');
|
||||
const { Context } = require('../../util/context');
|
||||
const APIError = require('../../api/APIError');
|
||||
const mime = require('mime-types');
|
||||
const path = require('path');
|
||||
|
||||
/**
|
||||
* MistralAIService class extends BaseService to provide integration with the Mistral AI API.
|
||||
@@ -310,6 +313,69 @@ class MistralAIService extends BaseService {
|
||||
return 'mistral-large-latest';
|
||||
}
|
||||
static IMPLEMENTS = {
|
||||
'driver-capabilities': {
|
||||
supports_test_mode(iface, method_name) {
|
||||
return iface === 'puter-ocr' && method_name === 'recognize';
|
||||
},
|
||||
},
|
||||
'puter-ocr': {
|
||||
async recognize({
|
||||
source,
|
||||
model,
|
||||
pages,
|
||||
includeImageBase64,
|
||||
imageLimit,
|
||||
imageMinSize,
|
||||
bboxAnnotationFormat,
|
||||
documentAnnotationFormat,
|
||||
test_mode,
|
||||
}) {
|
||||
if ( test_mode ) {
|
||||
return this._sampleOcrResponse();
|
||||
}
|
||||
if ( ! source ) {
|
||||
throw APIError.create('missing_required_argument', {
|
||||
interface_name: 'puter-ocr',
|
||||
method_name: 'recognize',
|
||||
arg_name: 'source',
|
||||
});
|
||||
}
|
||||
|
||||
const document = await this._buildDocumentChunkFromSource(source);
|
||||
const payload = {
|
||||
model: model ?? 'mistral-ocr-latest',
|
||||
document,
|
||||
};
|
||||
if ( Array.isArray(pages) ) {
|
||||
payload.pages = pages;
|
||||
}
|
||||
if ( typeof includeImageBase64 === 'boolean' ) {
|
||||
payload.includeImageBase64 = includeImageBase64;
|
||||
}
|
||||
if ( typeof imageLimit === 'number' ) {
|
||||
payload.imageLimit = imageLimit;
|
||||
}
|
||||
if ( typeof imageMinSize === 'number' ) {
|
||||
payload.imageMinSize = imageMinSize;
|
||||
}
|
||||
if ( bboxAnnotationFormat !== undefined ) {
|
||||
payload.bboxAnnotationFormat = bboxAnnotationFormat;
|
||||
}
|
||||
if ( documentAnnotationFormat !== undefined ) {
|
||||
payload.documentAnnotationFormat = documentAnnotationFormat;
|
||||
}
|
||||
|
||||
const response = await this.client.ocr.process(payload);
|
||||
const annotationsRequested = (
|
||||
payload.documentAnnotationFormat !== undefined ||
|
||||
payload.bboxAnnotationFormat !== undefined
|
||||
);
|
||||
this._recordOcrUsage(response, payload.model, {
|
||||
annotationsRequested,
|
||||
});
|
||||
return this._normalizeOcrResponse(response);
|
||||
},
|
||||
},
|
||||
'puter-chat-completion': {
|
||||
/**
|
||||
* Returns a list of available models and their details.
|
||||
@@ -399,6 +465,157 @@ class MistralAIService extends BaseService {
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
async _buildDocumentChunkFromSource(fileFacade) {
|
||||
const dataUrl = await this._safeFileValue(fileFacade, 'data_url');
|
||||
const webUrl = await this._safeFileValue(fileFacade, 'web_url');
|
||||
const filePath = await this._safeFileValue(fileFacade, 'path');
|
||||
const fsNode = await this._safeFileValue(fileFacade, 'fs-node');
|
||||
const fileName = filePath ? path.basename(filePath) : fsNode?.name;
|
||||
const inferredMime = this._inferMimeFromName(fileName);
|
||||
|
||||
if ( webUrl ) {
|
||||
return this._chunkFromUrl(webUrl, fileName, inferredMime);
|
||||
}
|
||||
if ( dataUrl ) {
|
||||
const mimeFromUrl = this._extractMimeFromDataUrl(dataUrl) ?? inferredMime;
|
||||
return this._chunkFromUrl(dataUrl, fileName, mimeFromUrl);
|
||||
}
|
||||
|
||||
const buffer = await this._safeFileValue(fileFacade, 'buffer');
|
||||
if ( ! buffer ) {
|
||||
throw APIError.create('field_invalid', null, {
|
||||
key: 'source',
|
||||
expected: 'file, data URL, or web URL',
|
||||
});
|
||||
}
|
||||
const mimeType = inferredMime ?? 'application/octet-stream';
|
||||
const generatedDataUrl = this._createDataUrl(buffer, mimeType);
|
||||
return this._chunkFromUrl(generatedDataUrl, fileName, mimeType);
|
||||
}
|
||||
|
||||
async _safeFileValue(fileFacade, key) {
|
||||
if ( ! fileFacade || typeof fileFacade.get !== 'function' ) return undefined;
|
||||
const maybeCache = fileFacade.values?.values;
|
||||
if ( maybeCache && Object.prototype.hasOwnProperty.call(maybeCache, key) ) {
|
||||
return maybeCache[key];
|
||||
}
|
||||
try {
|
||||
return await fileFacade.get(key);
|
||||
} catch (e) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
_chunkFromUrl(url, fileName, mimeType) {
|
||||
const lowerName = fileName?.toLowerCase();
|
||||
const urlLooksPdf = /\.pdf($|\?)/i.test(url);
|
||||
const mimeLooksPdf = mimeType?.includes('pdf');
|
||||
const isPdf = mimeLooksPdf || urlLooksPdf || (lowerName ? lowerName.endsWith('.pdf') : false);
|
||||
|
||||
if ( isPdf ) {
|
||||
const chunk = {
|
||||
type: 'document_url',
|
||||
documentUrl: url,
|
||||
};
|
||||
if ( fileName ) {
|
||||
chunk.documentName = fileName;
|
||||
}
|
||||
return chunk;
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'image_url',
|
||||
imageUrl: {
|
||||
url,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
_inferMimeFromName(name) {
|
||||
if ( ! name ) return undefined;
|
||||
return mime.lookup(name) || undefined;
|
||||
}
|
||||
|
||||
_extractMimeFromDataUrl(url) {
|
||||
if ( typeof url !== 'string' ) return undefined;
|
||||
const match = url.match(/^data:([^;,]+)[;,]/);
|
||||
return match ? match[1] : undefined;
|
||||
}
|
||||
|
||||
_createDataUrl(buffer, mimeType) {
|
||||
return `data:${mimeType || 'application/octet-stream'};base64,${buffer.toString('base64')}`;
|
||||
}
|
||||
|
||||
_normalizeOcrResponse(response) {
|
||||
if ( ! response ) return {};
|
||||
const normalized = {
|
||||
model: response.model,
|
||||
pages: response.pages ?? [],
|
||||
usage_info: response.usageInfo,
|
||||
};
|
||||
const blocks = [];
|
||||
if ( Array.isArray(response.pages) ) {
|
||||
for ( const page of response.pages ) {
|
||||
if ( typeof page?.markdown !== 'string' ) continue;
|
||||
const lines = page.markdown.split('\n').map(line => line.trim()).filter(Boolean);
|
||||
for ( const line of lines ) {
|
||||
blocks.push({
|
||||
type: 'text/mistral:LINE',
|
||||
text: line,
|
||||
page: page.index,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
normalized.blocks = blocks;
|
||||
if ( blocks.length ) {
|
||||
normalized.text = blocks.map(block => block.text).join('\n');
|
||||
} else if ( Array.isArray(response.pages) ) {
|
||||
normalized.text = response.pages.map(page => page?.markdown || '').join('\n\n').trim();
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
_recordOcrUsage(response, model, { annotationsRequested } = {}) {
|
||||
try {
|
||||
if ( ! this.meteringService ) return;
|
||||
const actor = Context.get('actor');
|
||||
if ( ! actor ) return;
|
||||
const pagesProcessed =
|
||||
response?.usageInfo?.pagesProcessed ??
|
||||
(Array.isArray(response?.pages) ? response.pages.length : 1);
|
||||
this.meteringService.incrementUsage(actor, 'mistral-ocr:ocr:page', pagesProcessed);
|
||||
if ( annotationsRequested ) {
|
||||
this.meteringService.incrementUsage(actor, 'mistral-ocr:annotations:page', pagesProcessed);
|
||||
}
|
||||
} catch (e) {
|
||||
// ignore metering failures to avoid blocking OCR results
|
||||
}
|
||||
}
|
||||
|
||||
_sampleOcrResponse() {
|
||||
const markdown = 'Sample OCR output (test mode).';
|
||||
return {
|
||||
model: 'mistral-ocr-latest',
|
||||
pages: [
|
||||
{
|
||||
index: 0,
|
||||
markdown,
|
||||
images: [],
|
||||
dimensions: null,
|
||||
},
|
||||
],
|
||||
blocks: [
|
||||
{
|
||||
type: 'text/mistral:LINE',
|
||||
text: markdown,
|
||||
page: 0,
|
||||
},
|
||||
],
|
||||
text: markdown,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { MistralAIService };
|
||||
|
||||
@@ -57,4 +57,9 @@ export const MISTRAL_COST_MAP = {
|
||||
'mistral:open-mistral-nemo:completion_tokens': 10,
|
||||
'mistral:mistral-ocr-latest:prompt_tokens': 100,
|
||||
'mistral:mistral-ocr-latest:completion_tokens': 300,
|
||||
};
|
||||
// OCR page-based pricing (values in microcents/page)
|
||||
// $1 / 1000 pages -> $0.001 per page -> 100000 microcents
|
||||
'mistral-ocr:ocr:page': 100000,
|
||||
// $3 / 1000 pages -> $0.003 per page -> 300000 microcents
|
||||
'mistral-ocr:annotations:page': 300000,
|
||||
};
|
||||
|
||||
@@ -86,6 +86,7 @@ class DriverService extends BaseService {
|
||||
this.interface_to_implementation = {};
|
||||
this.interface_to_test_service = {};
|
||||
this.service_aliases = {};
|
||||
this.interface_service_aliases = {};
|
||||
}
|
||||
|
||||
_init () {
|
||||
@@ -121,13 +122,12 @@ class DriverService extends BaseService {
|
||||
},
|
||||
'no_implementation_available': {
|
||||
status: 502,
|
||||
message: ({
|
||||
iface,
|
||||
interface_name,
|
||||
driver
|
||||
}) => `No implementation available for ` +
|
||||
(iface ?? interface_name) ? 'interface' : 'driver' +
|
||||
' ' + quot(iface ?? interface_name ?? driver) + '.',
|
||||
message: ({ iface, interface_name, driver }) => {
|
||||
const has_interface = (iface ?? interface_name) !== undefined;
|
||||
const target_type = has_interface ? 'interface' : 'driver';
|
||||
const target_name = quot(iface ?? interface_name ?? driver);
|
||||
return `No implementation available for ${target_type} ${target_name}.`;
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -219,7 +219,15 @@ class DriverService extends BaseService {
|
||||
this.interface_to_test_service[interface_name] = service_name;
|
||||
}
|
||||
|
||||
register_service_alias (service_name, alias) {
|
||||
register_service_alias (service_name, alias, options = {}) {
|
||||
const iface = options.iface;
|
||||
if ( iface ) {
|
||||
if ( ! this.interface_service_aliases[iface] ) {
|
||||
this.interface_service_aliases[iface] = {};
|
||||
}
|
||||
this.interface_service_aliases[iface][alias] = service_name;
|
||||
return;
|
||||
}
|
||||
this.service_aliases[alias] = service_name;
|
||||
}
|
||||
|
||||
@@ -323,7 +331,12 @@ class DriverService extends BaseService {
|
||||
response_metadata: {},
|
||||
test_mode,
|
||||
};
|
||||
driver = this.service_aliases[driver] ?? driver;
|
||||
const iface_aliases = this.interface_service_aliases[iface];
|
||||
if ( iface_aliases && iface_aliases[driver] ) {
|
||||
driver = iface_aliases[driver];
|
||||
} else {
|
||||
driver = this.service_aliases[driver] ?? driver;
|
||||
}
|
||||
|
||||
const service = this.get_service_or_throw_(driver, iface);
|
||||
|
||||
|
||||
Vendored
+16
@@ -47,6 +47,9 @@ interface AI {
|
||||
chat(messages: ChatMessage[], testMode?: boolean, options?: NonStreamingChatOptions): Promise<ChatResponse>;
|
||||
|
||||
img2txt(image: string | File | Blob, testMode?: boolean): Promise<string>;
|
||||
img2txt(image: string | File | Blob, options?: Img2TxtOptions): Promise<string>;
|
||||
img2txt(image: string | File | Blob, testMode?: boolean, options?: Img2TxtOptions): Promise<string>;
|
||||
img2txt(options: Img2TxtOptions): Promise<string>;
|
||||
|
||||
txt2img(prompt: string, testMode?: boolean): Promise<HTMLImageElement>;
|
||||
txt2img(prompt: string, options?: Txt2ImgOptions): Promise<HTMLImageElement>;
|
||||
@@ -148,6 +151,19 @@ interface Txt2VidOptions {
|
||||
test_mode?: boolean;
|
||||
}
|
||||
|
||||
interface Img2TxtOptions {
|
||||
source?: string | File | Blob;
|
||||
provider?: 'aws-textract' | 'mistral';
|
||||
model?: string;
|
||||
pages?: number[];
|
||||
includeImageBase64?: boolean;
|
||||
imageLimit?: number;
|
||||
imageMinSize?: number;
|
||||
bboxAnnotationFormat?: Record<string, unknown>;
|
||||
documentAnnotationFormat?: Record<string, unknown>;
|
||||
testMode?: boolean;
|
||||
}
|
||||
|
||||
interface Txt2SpeechOptions {
|
||||
language?: string;
|
||||
voice?: string;
|
||||
|
||||
@@ -118,48 +118,100 @@ class AI{
|
||||
}
|
||||
|
||||
img2txt = async (...args) => {
|
||||
let MAX_INPUT_SIZE = 10 * 1024 * 1024;
|
||||
let options = {};
|
||||
let testMode = false;
|
||||
|
||||
// Check that the argument is not undefined or null
|
||||
if(!args){
|
||||
throw({message: 'Arguments are required', code: 'arguments_required'});
|
||||
const MAX_INPUT_SIZE = 10 * 1024 * 1024;
|
||||
if (!args || args.length === 0) {
|
||||
throw { message: 'Arguments are required', code: 'arguments_required' };
|
||||
}
|
||||
|
||||
// if argument is string transform it to the object that the API expects
|
||||
if (typeof args[0] === 'string' || args[0] instanceof Blob) {
|
||||
const isBlobLike = (value) => {
|
||||
if (typeof Blob === 'undefined') return false;
|
||||
return value instanceof Blob || (typeof File !== 'undefined' && value instanceof File);
|
||||
};
|
||||
const isPlainObject = (value) => value && typeof value === 'object' && !Array.isArray(value) && !isBlobLike(value);
|
||||
const normalizeProvider = (value) => {
|
||||
if (!value) return 'aws-textract';
|
||||
const normalized = String(value).toLowerCase();
|
||||
if (['aws', 'textract', 'aws-textract'].includes(normalized)) return 'aws-textract';
|
||||
if (['mistral', 'mistral-ocr'].includes(normalized)) return 'mistral';
|
||||
return 'aws-textract';
|
||||
};
|
||||
|
||||
let options = {};
|
||||
if (isPlainObject(args[0])) {
|
||||
options = { ...args[0] };
|
||||
} else {
|
||||
options.source = args[0];
|
||||
}
|
||||
|
||||
// if input is a blob, transform it to a data URI
|
||||
if (args[0].source instanceof Blob) {
|
||||
options.source = await utils.blobToDataUri(args[0].source);
|
||||
let testMode = false;
|
||||
for (let i = 1; i < args.length; i++) {
|
||||
const value = args[i];
|
||||
if (typeof value === 'boolean') {
|
||||
testMode = testMode || value;
|
||||
} else if (isPlainObject(value)) {
|
||||
options = { ...options, ...value };
|
||||
}
|
||||
}
|
||||
|
||||
// check input size
|
||||
if (options.source.length > this.MAX_INPUT_SIZE) {
|
||||
if (typeof options.testMode === 'boolean') {
|
||||
testMode = options.testMode;
|
||||
}
|
||||
|
||||
const provider = normalizeProvider(options.provider);
|
||||
delete options.provider;
|
||||
delete options.testMode;
|
||||
|
||||
if (!options.source) {
|
||||
throw { message: 'Source is required', code: 'source_required' };
|
||||
}
|
||||
|
||||
if (isBlobLike(options.source)) {
|
||||
options.source = await utils.blobToDataUri(options.source);
|
||||
} else if (options.source?.source && isBlobLike(options.source.source)) {
|
||||
// Support shape { source: Blob }
|
||||
options.source = await utils.blobToDataUri(options.source.source);
|
||||
}
|
||||
|
||||
if (typeof options.source === 'string' &&
|
||||
options.source.startsWith('data:') &&
|
||||
options.source.length > MAX_INPUT_SIZE) {
|
||||
throw { message: 'Input size cannot be larger than ' + MAX_INPUT_SIZE, code: 'input_too_large' };
|
||||
}
|
||||
|
||||
// determine if test mode is enabled
|
||||
if (typeof args[1] === 'boolean' && args[1] === true ||
|
||||
typeof args[2] === 'boolean' && args[2] === true ||
|
||||
typeof args[3] === 'boolean' && args[3] === true) {
|
||||
testMode = true;
|
||||
}
|
||||
|
||||
return await utils.make_driver_method(['source'], 'puter-ocr', 'aws-textract', 'recognize', {
|
||||
test_mode: testMode ?? false,
|
||||
transform: async (result) => {
|
||||
const toText = (result) => {
|
||||
if (!result) return '';
|
||||
if (Array.isArray(result.blocks) && result.blocks.length) {
|
||||
let str = '';
|
||||
for (let i = 0; i < result?.blocks?.length; i++) {
|
||||
if("text/textract:LINE" === result.blocks[i].type)
|
||||
str += result.blocks[i].text + "\n";
|
||||
for (const block of result.blocks) {
|
||||
if (typeof block?.text !== 'string') continue;
|
||||
if (!block.type || block.type === 'text/textract:LINE' || block.type.startsWith('text/')) {
|
||||
str += block.text + '\n';
|
||||
}
|
||||
}
|
||||
return str;
|
||||
if (str.trim()) return str;
|
||||
}
|
||||
}).call(this, options);
|
||||
if (Array.isArray(result.pages) && result.pages.length) {
|
||||
const markdown = result.pages
|
||||
.map(page => (page?.markdown || '').trim())
|
||||
.filter(Boolean)
|
||||
.join('\n\n');
|
||||
if (markdown.trim()) return markdown;
|
||||
}
|
||||
if (typeof result.document_annotation === 'string') {
|
||||
return result.document_annotation;
|
||||
}
|
||||
if (typeof result.text === 'string') {
|
||||
return result.text;
|
||||
}
|
||||
return '';
|
||||
};
|
||||
|
||||
const driverCall = utils.make_driver_method(['source'], 'puter-ocr', provider, 'recognize', {
|
||||
test_mode: testMode ?? false,
|
||||
transform: async (result) => toText(result),
|
||||
});
|
||||
|
||||
return await driverCall.call(this, options);
|
||||
}
|
||||
|
||||
txt2speech = async (...args) => {
|
||||
|
||||
Reference in New Issue
Block a user