mirror of
https://github.com/HeyPuter/puter.git
synced 2026-01-04 04:00:27 -06:00
metering: rest of ai (#1736)
* metering: rest of ai * fix: wrong cost name
This commit is contained in:
@@ -1,18 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
@@ -22,11 +22,12 @@ const { PollyClient, SynthesizeSpeechCommand, DescribeVoicesCommand } = require(
|
||||
const BaseService = require("../../services/BaseService");
|
||||
const { TypedValue } = require("../../services/drivers/meta/Runtime");
|
||||
const APIError = require("../../api/APIError");
|
||||
const { Context } = require("../../util/context");
|
||||
|
||||
// Polly price calculation per engine
|
||||
const ENGINE_PRICING = {
|
||||
'standard': 400, // $4.00 per 1M characters
|
||||
'neural': 1600, // $16.00 per 1M characters
|
||||
'neural': 1600, // $16.00 per 1M characters
|
||||
'long-form': 10000, // $100.00 per 1M characters
|
||||
'generative': 3000, // $30.00 per 1M characters
|
||||
};
|
||||
@@ -43,10 +44,12 @@ const VALID_ENGINES = ['standard', 'neural', 'long-form', 'generative'];
|
||||
* @extends BaseService
|
||||
*/
|
||||
class AWSPollyService extends BaseService {
|
||||
/** @type {import('../../services/abuse-prevention/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
meteringAndBillingService;
|
||||
|
||||
static MODULES = {
|
||||
kv: globalThis.kv,
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* Initializes the service by creating an empty clients object.
|
||||
@@ -54,15 +57,19 @@ class AWSPollyService extends BaseService {
|
||||
* the internal state needed for AWS Polly client management.
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async _construct () {
|
||||
async _construct() {
|
||||
this.clients_ = {};
|
||||
}
|
||||
|
||||
async _init() {
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['driver-capabilities']: {
|
||||
supports_test_mode (iface, method_name) {
|
||||
supports_test_mode(iface, method_name) {
|
||||
return iface === 'puter-tts' && method_name === 'synthesize';
|
||||
}
|
||||
},
|
||||
},
|
||||
['puter-tts']: {
|
||||
/**
|
||||
@@ -73,16 +80,14 @@ class AWSPollyService extends BaseService {
|
||||
* @property {Object} synthesize - Converts text to speech using specified voice/language
|
||||
* @property {Function} supports_test_mode - Indicates test mode support for methods
|
||||
*/
|
||||
async list_voices ({ engine } = {}) {
|
||||
async list_voices({ engine } = {}) {
|
||||
const polly_voices = await this.describe_voices();
|
||||
|
||||
let voices = polly_voices.Voices;
|
||||
|
||||
if (engine) {
|
||||
if (VALID_ENGINES.includes(engine)) {
|
||||
voices = voices.filter(
|
||||
(voice) => voice.SupportedEngines?.includes(engine)
|
||||
);
|
||||
if ( engine ) {
|
||||
if ( VALID_ENGINES.includes(engine) ) {
|
||||
voices = voices.filter((voice) => voice.SupportedEngines?.includes(engine));
|
||||
} else {
|
||||
throw APIError.create('invalid_engine', null, { engine, valid_engines: VALID_ENGINES });
|
||||
}
|
||||
@@ -96,25 +101,25 @@ class AWSPollyService extends BaseService {
|
||||
code: voice.LanguageCode,
|
||||
},
|
||||
supported_engines: voice.SupportedEngines || ['standard'],
|
||||
}))
|
||||
}));
|
||||
|
||||
return voices;
|
||||
},
|
||||
async list_engines () {
|
||||
async list_engines() {
|
||||
return VALID_ENGINES.map(engine => ({
|
||||
id: engine,
|
||||
name: engine.charAt(0).toUpperCase() + engine.slice(1),
|
||||
pricing_per_million_chars: ENGINE_PRICING[engine] / 100, // Convert microcents to dollars
|
||||
}));
|
||||
},
|
||||
async synthesize ({
|
||||
async synthesize({
|
||||
text, voice,
|
||||
ssml, language,
|
||||
engine = 'standard',
|
||||
test_mode,
|
||||
}) {
|
||||
if ( test_mode ) {
|
||||
const url = 'https://puter-sample-data.puter.site/tts_example.mp3'
|
||||
const url = 'https://puter-sample-data.puter.site/tts_example.mp3';
|
||||
return new TypedValue({
|
||||
$: 'string:url:web',
|
||||
content_type: 'audio',
|
||||
@@ -122,13 +127,13 @@ class AWSPollyService extends BaseService {
|
||||
}
|
||||
|
||||
// Validate engine
|
||||
if (!VALID_ENGINES.includes(engine)) {
|
||||
if ( !VALID_ENGINES.includes(engine) ) {
|
||||
throw APIError.create('invalid_engine', null, { engine, valid_engines: VALID_ENGINES });
|
||||
}
|
||||
|
||||
|
||||
const microcents_per_character = ENGINE_PRICING[engine];
|
||||
const exact_cost = microcents_per_character * text.length;
|
||||
|
||||
|
||||
const svc_cost = this.services.get('cost');
|
||||
const usageAllowed = await svc_cost.get_funding_allowed({
|
||||
minimum: exact_cost,
|
||||
@@ -139,7 +144,7 @@ class AWSPollyService extends BaseService {
|
||||
}
|
||||
// We can charge immediately
|
||||
await svc_cost.record_cost({ cost: exact_cost });
|
||||
|
||||
|
||||
const polly_speech = await this.synthesize_speech(text, {
|
||||
format: 'mp3',
|
||||
voice_id: voice,
|
||||
@@ -147,31 +152,38 @@ class AWSPollyService extends BaseService {
|
||||
language,
|
||||
engine,
|
||||
});
|
||||
|
||||
|
||||
// Metering integration for TTS usage
|
||||
const actor = Context.get('actor');
|
||||
// AWS Polly TTS metering: track character count, voice, engine, cost, audio duration if available
|
||||
const trackedUsage = {
|
||||
character: text.length,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, `aws-polly:${engine}`);
|
||||
|
||||
const speech = new TypedValue({
|
||||
$: 'stream',
|
||||
content_type: 'audio/mpeg',
|
||||
}, polly_speech.AudioStream);
|
||||
|
||||
return speech;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return speech;
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates AWS credentials object for authentication
|
||||
* @private
|
||||
* @returns {Object} Object containing AWS access key ID and secret access key
|
||||
*/
|
||||
_create_aws_credentials () {
|
||||
_create_aws_credentials() {
|
||||
return {
|
||||
accessKeyId: this.config.aws.access_key,
|
||||
secretAccessKey: this.config.aws.secret_key,
|
||||
};
|
||||
}
|
||||
|
||||
_get_client (region) {
|
||||
_get_client(region) {
|
||||
if ( ! region ) {
|
||||
region = this.config.aws?.region ?? this.global_config.aws?.region
|
||||
?? 'us-west-2';
|
||||
@@ -186,14 +198,13 @@ class AWSPollyService extends BaseService {
|
||||
return this.clients_[region];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Describes available AWS Polly voices and caches the results
|
||||
* @returns {Promise<Object>} Response containing array of voice details in Voices property
|
||||
* @description Fetches voice information from AWS Polly API and caches it for 10 minutes
|
||||
* Uses KV store for caching to avoid repeated API calls
|
||||
*/
|
||||
async describe_voices () {
|
||||
async describe_voices() {
|
||||
let voices = this.modules.kv.get('svc:polly:voices');
|
||||
if ( voices ) {
|
||||
this.log.debug('voices cache hit');
|
||||
@@ -216,7 +227,6 @@ class AWSPollyService extends BaseService {
|
||||
return response;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Synthesizes speech from text using AWS Polly
|
||||
* @param {string} text - The text to synthesize
|
||||
@@ -228,10 +238,10 @@ class AWSPollyService extends BaseService {
|
||||
* @param {string} [options.engine] - TTS engine to use ('standard', 'neural', 'long-form', 'generative')
|
||||
* @returns {Promise<AWS.Polly.SynthesizeSpeechOutput>} The synthesized speech response
|
||||
*/
|
||||
async synthesize_speech (text, { format, voice_id, language, text_type, engine = 'standard' }) {
|
||||
async synthesize_speech(text, { format, voice_id, language, text_type, engine = 'standard' }) {
|
||||
const client = this._get_client(this.config.aws.region);
|
||||
|
||||
let voice = voice_id ?? undefined
|
||||
let voice = voice_id ?? undefined;
|
||||
|
||||
if ( ! voice && language ) {
|
||||
this.log.debug('getting language appropriate voice', { language, engine });
|
||||
@@ -261,7 +271,6 @@ class AWSPollyService extends BaseService {
|
||||
return response;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Attempts to find an appropriate voice for the given language code and engine
|
||||
* @param {string} language - The language code to find a voice for (e.g. 'en-US')
|
||||
@@ -269,13 +278,13 @@ class AWSPollyService extends BaseService {
|
||||
* @returns {Promise<?string>} The voice ID if found, null if no matching voice exists
|
||||
* @private
|
||||
*/
|
||||
async maybe_get_language_appropriate_voice_ (language, engine = 'standard') {
|
||||
async maybe_get_language_appropriate_voice_(language, engine = 'standard') {
|
||||
const voices = await this.describe_voices();
|
||||
|
||||
const voice = voices.Voices.find((voice) => {
|
||||
return voice.LanguageCode === language &&
|
||||
voice.SupportedEngines &&
|
||||
voice.SupportedEngines.includes(engine);
|
||||
return voice.LanguageCode === language &&
|
||||
voice.SupportedEngines &&
|
||||
voice.SupportedEngines.includes(engine);
|
||||
});
|
||||
|
||||
if ( ! voice ) return null;
|
||||
@@ -289,9 +298,9 @@ class AWSPollyService extends BaseService {
|
||||
* @returns {Promise<string>} The default voice ID for the engine
|
||||
* @private
|
||||
*/
|
||||
async get_default_voice_for_engine_ (engine = 'standard') {
|
||||
async get_default_voice_for_engine_(engine = 'standard') {
|
||||
const voices = await this.describe_voices();
|
||||
|
||||
|
||||
// Common default voices for each engine
|
||||
const default_voices = {
|
||||
'standard': ['Salli', 'Joanna', 'Matthew'],
|
||||
@@ -301,23 +310,21 @@ class AWSPollyService extends BaseService {
|
||||
};
|
||||
|
||||
const preferred_voices = default_voices[engine] || ['Salli'];
|
||||
|
||||
for (const voice_name of preferred_voices) {
|
||||
const voice = voices.Voices.find((v) =>
|
||||
v.Id === voice_name &&
|
||||
v.SupportedEngines &&
|
||||
v.SupportedEngines.includes(engine)
|
||||
);
|
||||
if (voice) {
|
||||
|
||||
for ( const voice_name of preferred_voices ) {
|
||||
const voice = voices.Voices.find((v) =>
|
||||
v.Id === voice_name &&
|
||||
v.SupportedEngines &&
|
||||
v.SupportedEngines.includes(engine));
|
||||
if ( voice ) {
|
||||
return voice.Id;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: find any voice that supports the engine
|
||||
const fallback_voice = voices.Voices.find((voice) =>
|
||||
voice.SupportedEngines &&
|
||||
voice.SupportedEngines.includes(engine)
|
||||
);
|
||||
const fallback_voice = voices.Voices.find((voice) =>
|
||||
voice.SupportedEngines &&
|
||||
voice.SupportedEngines.includes(engine));
|
||||
|
||||
return fallback_voice ? fallback_voice.Id : 'Salli';
|
||||
}
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
@@ -22,7 +22,7 @@ const { TextractClient, AnalyzeDocumentCommand, InvalidS3ObjectException } = req
|
||||
|
||||
const BaseService = require("../../services/BaseService");
|
||||
const APIError = require("../../api/APIError");
|
||||
|
||||
const { Context } = require("../../util/context");
|
||||
|
||||
/**
|
||||
* AWSTextractService class - Provides OCR (Optical Character Recognition) functionality using AWS Textract
|
||||
@@ -31,21 +31,23 @@ const APIError = require("../../api/APIError");
|
||||
* Handles both S3-stored and buffer-based document processing with automatic region management.
|
||||
*/
|
||||
class AWSTextractService extends BaseService {
|
||||
/** @type {import('../../services/abuse-prevention/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
meteringAndBillingService;
|
||||
/**
|
||||
* AWS Textract service for OCR functionality
|
||||
* Provides document analysis capabilities using AWS Textract API
|
||||
* Implements interfaces for OCR recognition and driver capabilities
|
||||
* @extends BaseService
|
||||
*/
|
||||
_construct () {
|
||||
_construct() {
|
||||
this.clients_ = {};
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['driver-capabilities']: {
|
||||
supports_test_mode (iface, method_name) {
|
||||
supports_test_mode(iface, method_name) {
|
||||
return iface === 'puter-ocr' && method_name === 'recognize';
|
||||
}
|
||||
},
|
||||
},
|
||||
['puter-ocr']: {
|
||||
/**
|
||||
@@ -55,7 +57,7 @@ class AWSTextractService extends BaseService {
|
||||
* @param {boolean} params.test_mode - If true, returns sample test output instead of processing
|
||||
* @returns {Promise<Object>} Recognition results containing blocks of text with confidence scores
|
||||
*/
|
||||
async recognize ({ source, test_mode }) {
|
||||
async recognize({ source, test_mode }) {
|
||||
if ( test_mode ) {
|
||||
return {
|
||||
blocks: [
|
||||
@@ -69,7 +71,7 @@ class AWSTextractService extends BaseService {
|
||||
confidence: 1,
|
||||
text: 'The test_mode flag is set to true. This is a sample output.',
|
||||
},
|
||||
]
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
@@ -77,9 +79,9 @@ class AWSTextractService extends BaseService {
|
||||
|
||||
// Simplify the response for common interface
|
||||
const puter_response = {
|
||||
blocks: []
|
||||
blocks: [],
|
||||
};
|
||||
|
||||
|
||||
for ( const block of resp.Blocks ) {
|
||||
if ( block.BlockType === 'PAGE' ) continue;
|
||||
if ( block.BlockType === 'CELL' ) continue;
|
||||
@@ -87,7 +89,7 @@ class AWSTextractService extends BaseService {
|
||||
if ( block.BlockType === 'MERGED_CELL' ) continue;
|
||||
if ( block.BlockType === 'LAYOUT_FIGURE' ) continue;
|
||||
if ( block.BlockType === 'LAYOUT_TEXT' ) continue;
|
||||
|
||||
|
||||
const puter_block = {
|
||||
type: `text/textract:${block.BlockType}`,
|
||||
confidence: block.Confidence,
|
||||
@@ -95,26 +97,32 @@ class AWSTextractService extends BaseService {
|
||||
};
|
||||
puter_response.blocks.push(puter_block);
|
||||
}
|
||||
|
||||
|
||||
return puter_response;
|
||||
}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Service initialization: set up metering service
|
||||
*/
|
||||
async _init() {
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates AWS credentials object for authentication
|
||||
* @private
|
||||
* @returns {Object} Object containing AWS access key ID and secret access key
|
||||
*/
|
||||
_create_aws_credentials () {
|
||||
_create_aws_credentials() {
|
||||
return {
|
||||
accessKeyId: this.config.aws.access_key,
|
||||
secretAccessKey: this.config.aws.secret_key,
|
||||
};
|
||||
}
|
||||
|
||||
_get_client (region) {
|
||||
_get_client(region) {
|
||||
if ( ! region ) {
|
||||
region = this.config.aws?.region ?? this.global_config.aws?.region
|
||||
?? 'us-west-2';
|
||||
@@ -129,7 +137,6 @@ class AWSTextractService extends BaseService {
|
||||
return this.clients_[region];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Analyzes a document using AWS Textract to extract text and layout information
|
||||
* @param {FileFacade} file_facade - Interface to access the document file
|
||||
@@ -138,25 +145,25 @@ class AWSTextractService extends BaseService {
|
||||
* @description Processes document through Textract's AnalyzeDocument API with LAYOUT feature.
|
||||
* Will attempt to use S3 direct access first, falling back to buffer upload if needed.
|
||||
*/
|
||||
async analyze_document (file_facade) {
|
||||
async analyze_document(file_facade) {
|
||||
const {
|
||||
client, document, using_s3
|
||||
client, document, using_s3,
|
||||
} = await this._get_client_and_document(file_facade);
|
||||
|
||||
|
||||
const min_cost = 150 // cents per 1000 pages
|
||||
* Math.pow(10,6) // microcents per cent
|
||||
* Math.pow(10, 6) // microcents per cent
|
||||
/ 1000 // pages
|
||||
; // works out to 150,000 microcents per page
|
||||
|
||||
|
||||
const svc_cost = this.services.get('cost');
|
||||
const usageAllowed = await svc_cost.get_funding_allowed({
|
||||
minimum: min_cost,
|
||||
});
|
||||
|
||||
|
||||
if ( ! usageAllowed ) {
|
||||
throw APIError.create('insufficient_funds');
|
||||
}
|
||||
|
||||
|
||||
// Note: we are using the synchronous command, so cost
|
||||
// should always be the same (only 1 page allowed)
|
||||
await svc_cost.record_cost({ cost: min_cost });
|
||||
@@ -167,12 +174,13 @@ class AWSTextractService extends BaseService {
|
||||
// 'TABLES',
|
||||
// 'FORMS',
|
||||
// 'SIGNATURES',
|
||||
'LAYOUT'
|
||||
'LAYOUT',
|
||||
],
|
||||
});
|
||||
|
||||
let textractResp;
|
||||
try {
|
||||
return await client.send(command);
|
||||
textractResp = await client.send(command);
|
||||
} catch (e) {
|
||||
if ( using_s3 && e instanceof InvalidS3ObjectException ) {
|
||||
const { client, document } =
|
||||
@@ -182,16 +190,29 @@ class AWSTextractService extends BaseService {
|
||||
FeatureTypes: [
|
||||
'LAYOUT',
|
||||
],
|
||||
})
|
||||
return await client.send(command);
|
||||
});
|
||||
textractResp = await client.send(command);
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
|
||||
throw e;
|
||||
}
|
||||
|
||||
throw new Error('expected to be unreachable');
|
||||
}
|
||||
// Metering integration for Textract OCR usage
|
||||
const actor = Context.get('actor');
|
||||
// AWS Textract metering: track page count, block count, cost, document size if available
|
||||
let pageCount = 0;
|
||||
if ( textractResp.Blocks ) {
|
||||
for ( const block of textractResp.Blocks ) {
|
||||
if ( block.BlockType === 'PAGE' ) pageCount += 1;
|
||||
}
|
||||
}
|
||||
const trackedUsage = {
|
||||
page: pageCount || 1,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, "aws-textract:detect-document-text");
|
||||
|
||||
return textractResp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets AWS client and document configuration for Textract processing
|
||||
@@ -204,10 +225,10 @@ class AWSTextractService extends BaseService {
|
||||
* @throws {APIError} If file does not exist
|
||||
* @throws {Error} If no suitable input format is available
|
||||
*/
|
||||
async _get_client_and_document (file_facade, force_buffer) {
|
||||
async _get_client_and_document(file_facade, force_buffer) {
|
||||
const try_s3info = await file_facade.get('s3-info');
|
||||
if ( try_s3info && ! force_buffer ) {
|
||||
console.log('S3 INFO', try_s3info)
|
||||
console.log('S3 INFO', try_s3info);
|
||||
return {
|
||||
using_s3: true,
|
||||
client: this._get_client(try_s3info.bucket_region),
|
||||
|
||||
@@ -1,55 +1,58 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const BaseService = require("../../services/BaseService");
|
||||
const { Context } = require("../../util/context");
|
||||
const OpenAIUtil = require("./lib/OpenAIUtil");
|
||||
const dedent = require('dedent');
|
||||
|
||||
/**
|
||||
* DeepSeekService class - Provides integration with X.AI's API for chat completions
|
||||
* DeepSeekService class - Provides integration with DeepSeek's API for chat completions
|
||||
* Extends BaseService to implement the puter-chat-completion interface.
|
||||
* Handles model management, message adaptation, streaming responses,
|
||||
* and usage tracking for X.AI's language models like Grok.
|
||||
* and usage tracking for DeepSeek's language models like DeepSeek Chat and Reasoner.
|
||||
* @extends BaseService
|
||||
*/
|
||||
class DeepSeekService extends BaseService {
|
||||
static MODULES = {
|
||||
openai: require('openai'),
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @type {import('../../services/abuse-prevention/MeteringService/MeteringService').MeteringAndBillingService}
|
||||
*/
|
||||
meteringAndBillingService;
|
||||
/**
|
||||
* Gets the system prompt used for AI interactions
|
||||
* @returns {string} The base system prompt that identifies the AI as running on Puter
|
||||
*/
|
||||
adapt_model (model) {
|
||||
adapt_model(model) {
|
||||
return model;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initializes the XAI service by setting up the OpenAI client and registering with the AI chat provider
|
||||
* @private
|
||||
* @returns {Promise<void>} Resolves when initialization is complete
|
||||
*/
|
||||
async _init () {
|
||||
async _init() {
|
||||
this.openai = new this.modules.openai.OpenAI({
|
||||
apiKey: this.global_config.services.deepseek.apiKey,
|
||||
baseURL: 'https://api.deepseek.com',
|
||||
@@ -60,15 +63,15 @@ class DeepSeekService extends BaseService {
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the default model identifier for the XAI service
|
||||
* @returns {string} The default model ID 'grok-beta'
|
||||
* Returns the default model identifier for the DeepSeek service
|
||||
* @returns {string} The default model ID 'deepseek-chat'
|
||||
*/
|
||||
get_default_model () {
|
||||
return 'grok-beta';
|
||||
get_default_model() {
|
||||
return 'deepseek-chat';
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
@@ -76,10 +79,10 @@ class DeepSeekService extends BaseService {
|
||||
/**
|
||||
* Returns a list of available models and their details.
|
||||
* See AIChatService for more information.
|
||||
*
|
||||
*
|
||||
* @returns Promise<Array<Object>> Array of model details
|
||||
*/
|
||||
async models () {
|
||||
async models() {
|
||||
return await this.models_();
|
||||
},
|
||||
/**
|
||||
@@ -88,7 +91,7 @@ class DeepSeekService extends BaseService {
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
async list() {
|
||||
const models = await this.models_();
|
||||
const model_names = [];
|
||||
for ( const model of models ) {
|
||||
@@ -104,7 +107,7 @@ class DeepSeekService extends BaseService {
|
||||
* AI Chat completion method.
|
||||
* See AIChatService for more details.
|
||||
*/
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
async complete({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
model = this.adapt_model(model);
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
@@ -114,7 +117,7 @@ class DeepSeekService extends BaseService {
|
||||
message.content = "";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Function calling is just broken on DeepSeek - it never awknowledges
|
||||
// the tool results and instead keeps calling the function over and over.
|
||||
// (see https://github.com/deepseek-ai/DeepSeek-V3/issues/15)
|
||||
@@ -130,17 +133,17 @@ class DeepSeekService extends BaseService {
|
||||
|
||||
Tool call ${message.tool_call_id} returned: ${message.content}.
|
||||
`);
|
||||
for ( let i=messages.length-1; i >= 0 ; i-- ) {
|
||||
for ( let i = messages.length - 1; i >= 0 ; i-- ) {
|
||||
const message = messages[i];
|
||||
if ( message.role === 'tool' ) {
|
||||
messages.splice(i+1, 0, {
|
||||
messages.splice(i + 1, 0, {
|
||||
role: 'system',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: TOOL_TEXT(message),
|
||||
}
|
||||
]
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -156,14 +159,26 @@ class DeepSeekService extends BaseService {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
});
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
stream, completion,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Metering integration now handled via usage_calculator in OpenAIUtil.handle_completion_output
|
||||
const actor = Context.get('actor');
|
||||
const modelDetails = (await this.models_()).find(m => m.id === (model ?? this.get_default_model()));
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, `deepseek:${modelDetails.id}`);
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves available AI models and their specifications
|
||||
@@ -174,7 +189,7 @@ class DeepSeekService extends BaseService {
|
||||
* - cost: Pricing information object with currency and rates
|
||||
* @private
|
||||
*/
|
||||
async models_ () {
|
||||
async models_() {
|
||||
return [
|
||||
{
|
||||
id: 'deepseek-chat',
|
||||
@@ -199,7 +214,7 @@ class DeepSeekService extends BaseService {
|
||||
output: 168,
|
||||
},
|
||||
max_tokens: 64000,
|
||||
}
|
||||
},
|
||||
];
|
||||
}
|
||||
}
|
||||
@@ -207,4 +222,3 @@ class DeepSeekService extends BaseService {
|
||||
module.exports = {
|
||||
DeepSeekService,
|
||||
};
|
||||
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
@@ -30,14 +30,16 @@ const { GoogleGenAI } = require('@google/genai');
|
||||
* the puter-image-generation interface.
|
||||
*/
|
||||
class GeminiImageGenerationService extends BaseService {
|
||||
/** @type {import('../../services/abuse-prevention/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
meteringAndBillingService;
|
||||
static MODULES = {
|
||||
}
|
||||
};
|
||||
|
||||
_construct() {
|
||||
this.models_ = {
|
||||
'gemini-2.5-flash-image-preview': {
|
||||
"1024x1024": 0.039,
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -48,7 +50,8 @@ class GeminiImageGenerationService extends BaseService {
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async _init() {
|
||||
this.genAI = new GoogleGenAI({apiKey: this.global_config.services.gemini.apiKey});
|
||||
this.genAI = new GoogleGenAI({ apiKey: this.global_config.services.gemini.apiKey });
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
@@ -56,7 +59,7 @@ class GeminiImageGenerationService extends BaseService {
|
||||
supports_test_mode(iface, method_name) {
|
||||
return iface === 'puter-image-generation' &&
|
||||
method_name === 'generate';
|
||||
}
|
||||
},
|
||||
},
|
||||
['puter-image-generation']: {
|
||||
/**
|
||||
@@ -72,8 +75,8 @@ class GeminiImageGenerationService extends BaseService {
|
||||
*/
|
||||
async generate(params) {
|
||||
const { prompt, quality, test_mode, model, ratio, input_image, input_image_mime_type } = params;
|
||||
|
||||
if (test_mode) {
|
||||
|
||||
if ( test_mode ) {
|
||||
return new TypedValue({
|
||||
$: 'string:url:web',
|
||||
content_type: 'image',
|
||||
@@ -85,19 +88,19 @@ class GeminiImageGenerationService extends BaseService {
|
||||
ratio: ratio || this.constructor.RATIO_SQUARE,
|
||||
model,
|
||||
input_image,
|
||||
input_image_mime_type
|
||||
input_image_mime_type,
|
||||
});
|
||||
|
||||
// Determine if this is a data URL or web URL
|
||||
const isDataUrl = url.startsWith('data:');
|
||||
const image = new TypedValue({
|
||||
$: isDataUrl ? 'string:url:data' : 'string:url:web',
|
||||
content_type: 'image'
|
||||
content_type: 'image',
|
||||
}, url);
|
||||
|
||||
return image;
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static RATIO_SQUARE = { w: 1024, h: 1024 };
|
||||
@@ -108,31 +111,31 @@ class GeminiImageGenerationService extends BaseService {
|
||||
input_image,
|
||||
input_image_mime_type,
|
||||
}) {
|
||||
if (typeof prompt !== 'string') {
|
||||
if ( typeof prompt !== 'string' ) {
|
||||
throw new Error('`prompt` must be a string');
|
||||
}
|
||||
|
||||
if (!ratio || !this._validate_ratio(ratio, model)) {
|
||||
if ( !ratio || !this._validate_ratio(ratio, model) ) {
|
||||
throw new Error('`ratio` must be a valid ratio for model ' + model);
|
||||
}
|
||||
|
||||
// Validate input image if provided
|
||||
if (input_image && !input_image_mime_type) {
|
||||
if ( input_image && !input_image_mime_type ) {
|
||||
throw new Error('`input_image_mime_type` is required when `input_image` is provided');
|
||||
}
|
||||
|
||||
if (input_image_mime_type && !input_image) {
|
||||
if ( input_image_mime_type && !input_image ) {
|
||||
throw new Error('`input_image` is required when `input_image_mime_type` is provided');
|
||||
}
|
||||
|
||||
if (input_image_mime_type && !this._validate_image_mime_type(input_image_mime_type)) {
|
||||
if ( input_image_mime_type && !this._validate_image_mime_type(input_image_mime_type) ) {
|
||||
throw new Error('`input_image_mime_type` must be a valid image MIME type (image/png, image/jpeg, image/webp)');
|
||||
}
|
||||
|
||||
// Somewhat sane defaults
|
||||
model = model ?? 'gemini-2.5-flash-image-preview';
|
||||
|
||||
if (!this.models_[model]) {
|
||||
if ( !this.models_[model] ) {
|
||||
throw APIError.create('field_invalid', null, {
|
||||
key: 'model',
|
||||
expected: 'one of: ' +
|
||||
@@ -142,7 +145,7 @@ class GeminiImageGenerationService extends BaseService {
|
||||
}
|
||||
|
||||
const price_key = `${ratio.w}x${ratio.h}`;
|
||||
if (!this.models_[model][price_key]) {
|
||||
if ( !this.models_[model][price_key] ) {
|
||||
const availableSizes = Object.keys(this.models_[model]);
|
||||
throw APIError.create('field_invalid', null, {
|
||||
key: 'size/quality combination',
|
||||
@@ -152,7 +155,7 @@ class GeminiImageGenerationService extends BaseService {
|
||||
}
|
||||
|
||||
const user_private_uid = Context.get('actor')?.private_uid ?? 'UNKNOWN';
|
||||
if (user_private_uid === 'UNKNOWN') {
|
||||
if ( user_private_uid === 'UNKNOWN' ) {
|
||||
this.errors.report('chat-completion-service:unknown-user', {
|
||||
message: 'failed to get a user ID for a Gemini request',
|
||||
alarm: true,
|
||||
@@ -162,14 +165,14 @@ class GeminiImageGenerationService extends BaseService {
|
||||
|
||||
const exact_cost = this.models_[model][price_key]
|
||||
* 100 // $ USD to cents USD
|
||||
* Math.pow(10, 6) // cents to microcents
|
||||
* Math.pow(10, 6); // cents to microcents
|
||||
|
||||
const svc_cost = this.services.get('cost');
|
||||
const usageAllowed = await svc_cost.get_funding_allowed({
|
||||
minimum: exact_cost,
|
||||
});
|
||||
|
||||
if (!usageAllowed) {
|
||||
if ( !usageAllowed ) {
|
||||
throw APIError.create('insufficient_funds');
|
||||
}
|
||||
|
||||
@@ -178,7 +181,7 @@ class GeminiImageGenerationService extends BaseService {
|
||||
|
||||
// Construct the prompt based on whether we have an input image
|
||||
let contents;
|
||||
if (input_image && input_image_mime_type) {
|
||||
if ( input_image && input_image_mime_type ) {
|
||||
// Image-to-image generation
|
||||
contents = [
|
||||
{ text: `Generate a picture of dimensions ${parseInt(ratio.w)}x${parseInt(ratio.h)} with the prompt: ${prompt}` },
|
||||
@@ -199,23 +202,33 @@ class GeminiImageGenerationService extends BaseService {
|
||||
contents: contents,
|
||||
});
|
||||
let url = undefined;
|
||||
for (const part of response.candidates[0].content.parts) {
|
||||
if (part.text) {
|
||||
} else if (part.inlineData) {
|
||||
for ( const part of response.candidates[0].content.parts ) {
|
||||
if ( part.text ) {
|
||||
// do nothing here
|
||||
} else if ( part.inlineData ) {
|
||||
const imageData = part.inlineData.data;
|
||||
url = "data:image/png;base64," + imageData
|
||||
url = "data:image/png;base64," + imageData;
|
||||
}
|
||||
}
|
||||
|
||||
if (!url) {
|
||||
if ( !url ) {
|
||||
throw new Error('Failed to extract image URL from Gemini response');
|
||||
}
|
||||
|
||||
// Metering usage tracking
|
||||
const actor = Context.get('actor');
|
||||
// Gemini usage: always 1 image, resolution, cost, model
|
||||
const trackedUsage = {
|
||||
[price_key]: 1,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, `gemini:${model}`);
|
||||
|
||||
const spending_meta = {
|
||||
model,
|
||||
size: `${ratio.w}x${ratio.h}`,
|
||||
};
|
||||
|
||||
// Legacy spending record for analytics
|
||||
const svc_spending = Context.get('services').get('spending');
|
||||
svc_spending.record_spending('gemini', 'image-generation', spending_meta);
|
||||
|
||||
@@ -229,7 +242,7 @@ class GeminiImageGenerationService extends BaseService {
|
||||
* @private
|
||||
*/
|
||||
_getValidRatios(model) {
|
||||
if (model === 'gemini-2.5-flash-image-preview') {
|
||||
if ( model === 'gemini-2.5-flash-image-preview' ) {
|
||||
return [this.constructor.RATIO_SQUARE];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,14 +3,21 @@ const { GoogleGenerativeAI } = require('@google/generative-ai');
|
||||
const GeminiSquareHole = require("./lib/GeminiSquareHole");
|
||||
const putility = require("@heyputer/putility");
|
||||
const FunctionCalling = require("./lib/FunctionCalling");
|
||||
const { Context } = require("../../util/context");
|
||||
|
||||
class GeminiService extends BaseService {
|
||||
/**
|
||||
* @type {import('../../services/abuse-prevention/MeteringService/MeteringService').MeteringAndBillingService}
|
||||
*/
|
||||
meteringAndBillingService = undefined;
|
||||
|
||||
async _init() {
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
@@ -33,9 +40,10 @@ class GeminiService extends BaseService {
|
||||
async complete({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
tools = FunctionCalling.make_gemini_tools(tools);
|
||||
|
||||
model = model ?? 'gemini-2.0-flash';
|
||||
const genAI = new GoogleGenerativeAI(this.config.apiKey);
|
||||
const genModel = genAI.getGenerativeModel({
|
||||
model: model ?? 'gemini-2.0-flash',
|
||||
model,
|
||||
tools,
|
||||
generationConfig: {
|
||||
temperature: temperature, // Set temperature (0.0 to 1.0). Defaults to 0.7
|
||||
@@ -59,6 +67,9 @@ class GeminiService extends BaseService {
|
||||
model_details: (await this.models_()).find(m => m.id === model),
|
||||
});
|
||||
|
||||
// Metering integration
|
||||
const actor = Context.get('actor');
|
||||
const meteringPrefix = `gemini:${model}`;
|
||||
if ( stream ) {
|
||||
const genResult = await chat.sendMessageStream(last_message_parts);
|
||||
const stream = genResult.stream;
|
||||
@@ -68,9 +79,16 @@ class GeminiService extends BaseService {
|
||||
stream: true,
|
||||
init_chat_stream:
|
||||
GeminiSquareHole.create_chat_stream_handler({
|
||||
stream, usage_promise,
|
||||
stream,
|
||||
usage_promise,
|
||||
}),
|
||||
usage_promise: usage_promise.then(usageMetadata => {
|
||||
const trackedUsage = {
|
||||
prompt_tokens: usageMetadata.promptTokenCount - (usageMetadata.cachedContentTokenCount || 0),
|
||||
completion_tokens: usageMetadata.candidatesTokenCount,
|
||||
cached_tokens: usageMetadata.cachedContentTokenCount || 0,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix);
|
||||
return usage_calculator({ usageMetadata });
|
||||
}),
|
||||
};
|
||||
@@ -83,6 +101,13 @@ class GeminiService extends BaseService {
|
||||
|
||||
const result = { message };
|
||||
result.usage = usage_calculator(genResult.response);
|
||||
// TODO DS: dedup this logic
|
||||
const trackedUsage = {
|
||||
prompt_tokens: genResult.response.usageMetadata.promptTokenCount - (genResult.cachedContentTokenCount || 0),
|
||||
completion_tokens: genResult.response.usageMetadata.candidatesTokenCount,
|
||||
cached_tokens: genResult.response.usageMetadata.cachedContentTokenCount || 0,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix);
|
||||
return result;
|
||||
}
|
||||
},
|
||||
|
||||
@@ -1,27 +1,28 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const BaseService = require("../../services/BaseService");
|
||||
const { TypedValue } = require("../../services/drivers/meta/Runtime");
|
||||
const { Context } = require("../../util/context");
|
||||
const OpenAIUtil = require("./lib/OpenAIUtil");
|
||||
|
||||
/** @type {import('../../services/abuse-prevention/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
|
||||
/**
|
||||
* Service class for integrating with Groq AI's language models.
|
||||
@@ -33,17 +34,18 @@ const OpenAIUtil = require("./lib/OpenAIUtil");
|
||||
* @extends BaseService
|
||||
*/
|
||||
class GroqAIService extends BaseService {
|
||||
/** @type {import('../../services/abuse-prevention/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
meteringAndBillingService;
|
||||
static MODULES = {
|
||||
Groq: require('groq-sdk'),
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* Initializes the GroqAI service by setting up the Groq client and registering with the AI chat provider
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _init () {
|
||||
async _init() {
|
||||
const Groq = require('groq-sdk');
|
||||
this.client = new Groq({
|
||||
apiKey: this.config.apiKey,
|
||||
@@ -54,26 +56,26 @@ class GroqAIService extends BaseService {
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService; // TODO DS: move to proper extensions
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the default model ID for the Groq AI service
|
||||
* @returns {string} The default model ID 'llama-3.1-8b-instant'
|
||||
*/
|
||||
get_default_model () {
|
||||
get_default_model() {
|
||||
return 'llama-3.1-8b-instant';
|
||||
}
|
||||
|
||||
|
||||
static IMPLEMENTS = {
|
||||
'puter-chat-completion': {
|
||||
/**
|
||||
* Returns a list of available models and their details.
|
||||
* See AIChatService for more information.
|
||||
*
|
||||
*
|
||||
* @returns Promise<Array<Object>> Array of model details
|
||||
*/
|
||||
async models () {
|
||||
async models() {
|
||||
return await this.models_();
|
||||
},
|
||||
/**
|
||||
@@ -82,7 +84,7 @@ class GroqAIService extends BaseService {
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
async list() {
|
||||
// They send: { "object": "list", data }
|
||||
const funny_wrapper = await this.client.models.list();
|
||||
return funny_wrapper.data;
|
||||
@@ -95,7 +97,7 @@ class GroqAIService extends BaseService {
|
||||
* @param {boolean} [options.stream] - Whether to stream the response
|
||||
* @returns {TypedValue|Object} Returns either a TypedValue with streaming response or completion object with usage stats
|
||||
*/
|
||||
async complete ({ messages, model, stream, tools, max_tokens, temperature }) {
|
||||
async complete({ messages, model, stream, tools, max_tokens, temperature }) {
|
||||
model = model ?? this.get_default_model();
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
@@ -106,42 +108,52 @@ class GroqAIService extends BaseService {
|
||||
}
|
||||
}
|
||||
|
||||
const actor = Context.get('actor');
|
||||
|
||||
const completion = await this.client.chat.completions.create({
|
||||
messages,
|
||||
model,
|
||||
stream,
|
||||
tools,
|
||||
max_completion_tokens: max_tokens, // max_tokens has been deprecated
|
||||
temperature
|
||||
temperature,
|
||||
});
|
||||
|
||||
const modelDetails = (await this.models_()).find(m => m.id === model);
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
deviations: {
|
||||
index_usage_from_stream_chunk: chunk =>
|
||||
chunk.x_groq?.usage,
|
||||
},
|
||||
usage_calculator: OpenAIUtil.create_usage_calculator({
|
||||
model_details: (await this.models_()).find(m => m.id === model),
|
||||
}),
|
||||
stream, completion,
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, `groq:${modelDetails.id}`);
|
||||
// Still return legacy cost calculation for compatibility
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Returns an array of available AI models with their specifications
|
||||
*
|
||||
*
|
||||
* Each model object contains:
|
||||
* - id: Unique identifier for the model
|
||||
* - name: Human-readable name
|
||||
* - context: Maximum context window size in tokens
|
||||
* - cost: Pricing details including currency and token rates
|
||||
*
|
||||
*
|
||||
* @returns {Array<Object>} Array of model specification objects
|
||||
*/
|
||||
models_ () {
|
||||
models_() {
|
||||
return [
|
||||
{
|
||||
id: 'gemma2-9b-it',
|
||||
@@ -164,7 +176,7 @@ class GroqAIService extends BaseService {
|
||||
tokens: 1_000_000,
|
||||
input: 7,
|
||||
output: 7,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'llama3-groq-70b-8192-tool-use-preview',
|
||||
@@ -196,8 +208,8 @@ class GroqAIService extends BaseService {
|
||||
"currency": "usd-cents",
|
||||
"tokens": 1000000,
|
||||
"input": 59,
|
||||
"output": 79
|
||||
}
|
||||
"output": 79,
|
||||
},
|
||||
},
|
||||
{
|
||||
// This was only available on their Discord, not
|
||||
@@ -209,8 +221,8 @@ class GroqAIService extends BaseService {
|
||||
"currency": "usd-cents",
|
||||
"tokens": 1000000,
|
||||
"input": 59,
|
||||
"output": 99
|
||||
}
|
||||
"output": 99,
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "llama-3.1-8b-instant",
|
||||
@@ -220,7 +232,7 @@ class GroqAIService extends BaseService {
|
||||
"currency": "usd-cents",
|
||||
"tokens": 1000000,
|
||||
"input": 5,
|
||||
"output": 8
|
||||
"output": 8,
|
||||
},
|
||||
max_tokens: 131072,
|
||||
},
|
||||
@@ -234,7 +246,7 @@ class GroqAIService extends BaseService {
|
||||
input: 20,
|
||||
output: 20,
|
||||
},
|
||||
max_tokens:1024,
|
||||
max_tokens: 1024,
|
||||
},
|
||||
{
|
||||
id: 'meta-llama/llama-prompt-guard-2-86m',
|
||||
@@ -246,7 +258,7 @@ class GroqAIService extends BaseService {
|
||||
input: 4,
|
||||
output: 4,
|
||||
},
|
||||
max_tokens:512,
|
||||
max_tokens: 512,
|
||||
},
|
||||
{
|
||||
"id": "llama-3.2-1b-preview",
|
||||
@@ -256,7 +268,7 @@ class GroqAIService extends BaseService {
|
||||
"currency": "usd-cents",
|
||||
"tokens": 1000000,
|
||||
"input": 4,
|
||||
"output": 4
|
||||
"output": 4,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -267,8 +279,8 @@ class GroqAIService extends BaseService {
|
||||
"currency": "usd-cents",
|
||||
"tokens": 1000000,
|
||||
"input": 6,
|
||||
"output": 6
|
||||
}
|
||||
"output": 6,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'llama-3.2-11b-vision-preview',
|
||||
@@ -278,7 +290,7 @@ class GroqAIService extends BaseService {
|
||||
tokens: 1_000_000,
|
||||
input: 18,
|
||||
output: 18,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'llama-3.2-90b-vision-preview',
|
||||
@@ -298,8 +310,8 @@ class GroqAIService extends BaseService {
|
||||
"currency": "usd-cents",
|
||||
"tokens": 1000000,
|
||||
"input": 59,
|
||||
"output": 79
|
||||
}
|
||||
"output": 79,
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "llama3-8b-8192",
|
||||
@@ -309,8 +321,8 @@ class GroqAIService extends BaseService {
|
||||
"currency": "usd-cents",
|
||||
"tokens": 1000000,
|
||||
"input": 5,
|
||||
"output": 8
|
||||
}
|
||||
"output": 8,
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "mixtral-8x7b-32768",
|
||||
@@ -320,8 +332,8 @@ class GroqAIService extends BaseService {
|
||||
"currency": "usd-cents",
|
||||
"tokens": 1000000,
|
||||
"input": 24,
|
||||
"output": 24
|
||||
}
|
||||
"output": 24,
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "llama-guard-3-8b",
|
||||
@@ -331,9 +343,9 @@ class GroqAIService extends BaseService {
|
||||
"currency": "usd-cents",
|
||||
"tokens": 1000000,
|
||||
"input": 20,
|
||||
"output": 20
|
||||
}
|
||||
}
|
||||
"output": 20,
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
@@ -21,6 +21,7 @@
|
||||
const BaseService = require("../../services/BaseService");
|
||||
const axios = require('axios');
|
||||
const OpenAIUtil = require("./lib/OpenAIUtil");
|
||||
const { Context } = require("../../util/context");
|
||||
|
||||
/**
|
||||
* MistralAIService class extends BaseService to provide integration with the Mistral AI API.
|
||||
@@ -30,20 +31,22 @@ const OpenAIUtil = require("./lib/OpenAIUtil");
|
||||
* for different models and implements the puter-chat-completion interface.
|
||||
*/
|
||||
class MistralAIService extends BaseService {
|
||||
/** @type {import('../../services/abuse-prevention/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
meteringAndBillingService;
|
||||
static MODULES = {
|
||||
'@mistralai/mistralai': require('@mistralai/mistralai'),
|
||||
}
|
||||
};
|
||||
/**
|
||||
* Initializes the service's cost structure for different Mistral AI models.
|
||||
* Sets up pricing information for various models including token costs for input/output.
|
||||
* Each model entry specifies currency (usd-cents) and costs per million tokens.
|
||||
* @private
|
||||
*/
|
||||
_construct () {
|
||||
_construct() {
|
||||
this.costs_ = {
|
||||
'mistral-large-latest': {
|
||||
aliases: ['mistral-large-2411'],
|
||||
cost:{
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 200,
|
||||
@@ -233,7 +236,7 @@ class MistralAIService extends BaseService {
|
||||
* Each model entry specifies currency (USD cents) and costs per million tokens.
|
||||
* @private
|
||||
*/
|
||||
async _init () {
|
||||
async _init() {
|
||||
const require = this.require;
|
||||
const { Mistral } = require('@mistralai/mistralai');
|
||||
this.api_base_url = 'https://api.mistral.ai/v1';
|
||||
@@ -247,6 +250,8 @@ class MistralAIService extends BaseService {
|
||||
alias: true,
|
||||
});
|
||||
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
|
||||
// TODO: make this event-driven so it doesn't hold up boot
|
||||
await this.populate_models_();
|
||||
}
|
||||
@@ -257,24 +262,26 @@ class MistralAIService extends BaseService {
|
||||
* @private
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async populate_models_ () {
|
||||
async populate_models_() {
|
||||
const resp = await axios({
|
||||
method: 'get',
|
||||
url: this.api_base_url + '/models',
|
||||
headers: {
|
||||
Authorization: `Bearer ${this.config.apiKey}`
|
||||
}
|
||||
})
|
||||
Authorization: `Bearer ${this.config.apiKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
const response_json = resp.data;
|
||||
const models = response_json.data;
|
||||
this.models_array_ = [];
|
||||
for ( const api_model of models ) {
|
||||
|
||||
|
||||
let cost = this.costs_[api_model.id];
|
||||
if ( ! cost ) for ( const alias of api_model.aliases ) {
|
||||
cost = this.costs_[alias];
|
||||
if ( cost ) break;
|
||||
if ( ! cost ) {
|
||||
for ( const alias of api_model.aliases ) {
|
||||
cost = this.costs_[alias];
|
||||
if ( cost ) break;
|
||||
}
|
||||
}
|
||||
if ( ! cost ) continue;
|
||||
const model = {
|
||||
@@ -299,7 +306,7 @@ class MistralAIService extends BaseService {
|
||||
* @async
|
||||
* @returns {void}
|
||||
*/
|
||||
get_default_model () {
|
||||
get_default_model() {
|
||||
return 'mistral-large-latest';
|
||||
}
|
||||
static IMPLEMENTS = {
|
||||
@@ -307,10 +314,10 @@ class MistralAIService extends BaseService {
|
||||
/**
|
||||
* Returns a list of available models and their details.
|
||||
* See AIChatService for more information.
|
||||
*
|
||||
*
|
||||
* @returns Promise<Array<Object>> Array of model details
|
||||
*/
|
||||
async models () {
|
||||
async models() {
|
||||
return this.models_array_;
|
||||
},
|
||||
|
||||
@@ -320,7 +327,7 @@ class MistralAIService extends BaseService {
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
async list() {
|
||||
return this.models_array_.map(m => m.id);
|
||||
},
|
||||
|
||||
@@ -328,7 +335,7 @@ class MistralAIService extends BaseService {
|
||||
* AI Chat completion method.
|
||||
* See AIChatService for more details.
|
||||
*/
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
async complete({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
for ( const message of messages ) {
|
||||
@@ -344,6 +351,7 @@ class MistralAIService extends BaseService {
|
||||
|
||||
console.log('MESSAGES TO MISTRAL', messages);
|
||||
|
||||
const actor = Context.get('actor');
|
||||
const completion = await this.client.chat[
|
||||
stream ? 'stream' : 'complete'
|
||||
]({
|
||||
@@ -351,9 +359,11 @@ class MistralAIService extends BaseService {
|
||||
...(tools ? { tools } : {}),
|
||||
messages,
|
||||
max_tokens: max_tokens,
|
||||
temperature
|
||||
temperature,
|
||||
});
|
||||
|
||||
|
||||
const modelDetails = this.models_array_.find(m => m.id === (model ?? this.get_default_model()));
|
||||
|
||||
return await OpenAIUtil.handle_completion_output({
|
||||
deviations: {
|
||||
index_usage_from_stream_chunk: chunk => {
|
||||
@@ -374,14 +384,23 @@ class MistralAIService extends BaseService {
|
||||
completion_tokens: completion.usage.completionTokens,
|
||||
}),
|
||||
},
|
||||
completion, stream,
|
||||
usage_calculator: OpenAIUtil.create_usage_calculator({
|
||||
model_details: this.models_array_.find(m => m.id === model),
|
||||
}),
|
||||
completion,
|
||||
stream,
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
if ( this.meteringAndBillingService ) {
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, `mistral:${modelDetails.id}`);
|
||||
}
|
||||
// Still return legacy cost calculation for compatibility
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { MistralAIService };
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
@@ -23,7 +23,6 @@ const BaseService = require("../../services/BaseService");
|
||||
const { TypedValue } = require("../../services/drivers/meta/Runtime");
|
||||
const { Context } = require("../../util/context");
|
||||
|
||||
|
||||
/**
|
||||
* Service class for generating images using OpenAI's DALL-E API.
|
||||
* Extends BaseService to provide image generation capabilities through
|
||||
@@ -32,11 +31,14 @@ const { Context } = require("../../util/context");
|
||||
* validation, and spending tracking.
|
||||
*/
|
||||
class OpenAIImageGenerationService extends BaseService {
|
||||
/** @type {import('../../services/abuse-prevention/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
meteringAndBillingService;
|
||||
|
||||
static MODULES = {
|
||||
openai: require('openai'),
|
||||
}
|
||||
|
||||
_construct () {
|
||||
};
|
||||
|
||||
_construct() {
|
||||
this.models_ = {
|
||||
'gpt-image-1': {
|
||||
"low:1024x1024": 0.011,
|
||||
@@ -47,7 +49,7 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
"medium:1536x1024": 0.063,
|
||||
"high:1024x1024": 0.167,
|
||||
"high:1024x1536": 0.25,
|
||||
"high:1536x1024": 0.25
|
||||
"high:1536x1024": 0.25,
|
||||
},
|
||||
'dall-e-3': {
|
||||
'1024x1024': 0.04,
|
||||
@@ -64,29 +66,31 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initializes the OpenAI client with API credentials from config
|
||||
* @private
|
||||
* @async
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async _init () {
|
||||
async _init() {
|
||||
const sk_key =
|
||||
this.config?.openai?.secret_key ??
|
||||
this.global_config.openai?.secret_key;
|
||||
|
||||
this.openai = new this.modules.openai.OpenAI({
|
||||
apiKey: sk_key
|
||||
apiKey: sk_key,
|
||||
});
|
||||
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['driver-capabilities']: {
|
||||
supports_test_mode (iface, method_name) {
|
||||
supports_test_mode(iface, method_name) {
|
||||
return iface === 'puter-image-generation' &&
|
||||
method_name === 'generate';
|
||||
}
|
||||
},
|
||||
},
|
||||
['puter-image-generation']: {
|
||||
/**
|
||||
@@ -98,9 +102,9 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
* @returns {Promise<string>} URL of the generated image
|
||||
* @throws {Error} If prompt is not a string or ratio is invalid
|
||||
*/
|
||||
async generate (params) {
|
||||
async generate(params) {
|
||||
const { prompt, quality, test_mode, model, ratio } = params;
|
||||
|
||||
|
||||
if ( test_mode ) {
|
||||
return new TypedValue({
|
||||
$: 'string:url:web',
|
||||
@@ -110,28 +114,28 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
const url = await this.generate(prompt, {
|
||||
quality,
|
||||
ratio: ratio || this.constructor.RATIO_SQUARE,
|
||||
model
|
||||
model,
|
||||
});
|
||||
|
||||
const image = new TypedValue({
|
||||
$: 'string:url:web',
|
||||
content_type: 'image'
|
||||
content_type: 'image',
|
||||
}, url);
|
||||
|
||||
return image;
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static RATIO_SQUARE = { w: 1024, h: 1024 };
|
||||
static RATIO_PORTRAIT = { w: 1024, h: 1792 };
|
||||
static RATIO_LANDSCAPE = { w: 1792, h: 1024 };
|
||||
|
||||
|
||||
// GPT-Image-1 specific ratios
|
||||
static RATIO_GPT_PORTRAIT = { w: 1024, h: 1536 };
|
||||
static RATIO_GPT_LANDSCAPE = { w: 1536, h: 1024 };
|
||||
|
||||
async generate (prompt, {
|
||||
async generate(prompt, {
|
||||
ratio,
|
||||
model,
|
||||
quality,
|
||||
@@ -146,8 +150,8 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
|
||||
// Somewhat sane defaults
|
||||
model = model ?? 'gpt-image-1';
|
||||
quality = quality ?? 'low'
|
||||
|
||||
quality = quality ?? 'low';
|
||||
|
||||
if ( ! this.models_[model] ) {
|
||||
throw APIError.create('field_invalid', null, {
|
||||
key: 'model',
|
||||
@@ -156,7 +160,7 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
got: model,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// Validate quality based on the model
|
||||
const validQualities = this._getValidQualities(model);
|
||||
if ( quality !== undefined && !validQualities.includes(quality) ) {
|
||||
@@ -166,7 +170,7 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
got: quality,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
const size = `${ratio.w}x${ratio.h}`;
|
||||
const price_key = this._buildPriceKey(model, quality, size);
|
||||
if ( ! this.models_[model][price_key] ) {
|
||||
@@ -177,7 +181,7 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
got: price_key,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
const user_private_uid = Context.get('actor')?.private_uid ?? 'UNKNOWN';
|
||||
if ( user_private_uid === 'UNKNOWN' ) {
|
||||
this.errors.report('chat-completion-service:unknown-user', {
|
||||
@@ -186,20 +190,20 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
trace: true,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
const exact_cost = this.models_[model][price_key]
|
||||
* 100 // $ USD to cents USD
|
||||
* Math.pow(10,6) // cents to microcents
|
||||
|
||||
* Math.pow(10, 6); // cents to microcents
|
||||
|
||||
const svc_cost = this.services.get('cost');
|
||||
const usageAllowed = await svc_cost.get_funding_allowed({
|
||||
minimum: exact_cost,
|
||||
});
|
||||
|
||||
|
||||
if ( ! usageAllowed ) {
|
||||
throw APIError.create('insufficient_funds');
|
||||
}
|
||||
|
||||
|
||||
// We can charge immediately
|
||||
await svc_cost.record_cost({ cost: exact_cost });
|
||||
|
||||
@@ -208,11 +212,18 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
user: user_private_uid,
|
||||
prompt,
|
||||
size,
|
||||
quality
|
||||
quality,
|
||||
});
|
||||
|
||||
|
||||
const result = await this.openai.images.generate(apiParams);
|
||||
|
||||
|
||||
const actor = Context.get('actor');
|
||||
// For image generation, usage is typically image count and resolution
|
||||
const trackedUsage = {
|
||||
[price_key]: 1,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, `openai:${model}`);
|
||||
|
||||
// Tiny base64 result for testing
|
||||
// const result = {
|
||||
// data: [
|
||||
@@ -235,18 +246,18 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
size: `${ratio.w}x${ratio.h}`,
|
||||
};
|
||||
|
||||
if (quality) {
|
||||
if ( quality ) {
|
||||
spending_meta.size = quality + ":" + spending_meta.size;
|
||||
}
|
||||
|
||||
const svc_spending = Context.get('services').get('spending');
|
||||
svc_spending.record_spending('openai', 'image-generation', spending_meta);
|
||||
const url = result.data?.[0]?.url || (result.data?.[0]?.b64_json ? "data:image/png;base64," + result.data[0].b64_json : null);
|
||||
|
||||
if (!url) {
|
||||
|
||||
if ( !url ) {
|
||||
throw new Error('Failed to extract image URL from OpenAI response');
|
||||
}
|
||||
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
@@ -257,13 +268,13 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
* @private
|
||||
*/
|
||||
_getValidQualities(model) {
|
||||
if (model === 'gpt-image-1') {
|
||||
if ( model === 'gpt-image-1' ) {
|
||||
return ['low', 'medium', 'high'];
|
||||
}
|
||||
if (model === 'dall-e-2') {
|
||||
if ( model === 'dall-e-2' ) {
|
||||
return [''];
|
||||
}
|
||||
if (model === 'dall-e-3') {
|
||||
}
|
||||
if ( model === 'dall-e-3' ) {
|
||||
return ['', 'hd'];
|
||||
}
|
||||
// Fallback for unknown models - assume no quality tiers
|
||||
@@ -279,7 +290,7 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
* @private
|
||||
*/
|
||||
_buildPriceKey(model, quality, size) {
|
||||
if (model === 'gpt-image-1') {
|
||||
if ( model === 'gpt-image-1' ) {
|
||||
// gpt-image-1 uses format: "quality:size" - default to low if not specified
|
||||
const qualityLevel = quality || 'low';
|
||||
return `${qualityLevel}:${size}`;
|
||||
@@ -303,7 +314,7 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
size: baseParams.size,
|
||||
};
|
||||
|
||||
if (model === 'gpt-image-1') {
|
||||
if ( model === 'gpt-image-1' ) {
|
||||
// gpt-image-1 requires the model parameter and uses different quality mapping
|
||||
apiParams.model = model;
|
||||
// Default to low quality if not specified, consistent with _buildPriceKey
|
||||
@@ -311,7 +322,7 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
} else {
|
||||
// dall-e models
|
||||
apiParams.model = model;
|
||||
if (baseParams.quality === 'hd') {
|
||||
if ( baseParams.quality === 'hd' ) {
|
||||
apiParams.quality = 'hd';
|
||||
}
|
||||
}
|
||||
@@ -327,24 +338,24 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
*/
|
||||
_getValidRatios(model) {
|
||||
const commonRatios = [this.constructor.RATIO_SQUARE];
|
||||
|
||||
if (model === 'gpt-image-1') {
|
||||
|
||||
if ( model === 'gpt-image-1' ) {
|
||||
return [
|
||||
...commonRatios,
|
||||
this.constructor.RATIO_GPT_PORTRAIT,
|
||||
this.constructor.RATIO_GPT_LANDSCAPE
|
||||
this.constructor.RATIO_GPT_LANDSCAPE,
|
||||
];
|
||||
} else {
|
||||
// DALL-E models
|
||||
return [
|
||||
...commonRatios,
|
||||
this.constructor.RATIO_PORTRAIT,
|
||||
this.constructor.RATIO_LANDSCAPE
|
||||
this.constructor.RATIO_LANDSCAPE,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
_validate_ratio (ratio, model) {
|
||||
_validate_ratio(ratio, model) {
|
||||
const validRatios = this._getValidRatios(model);
|
||||
return validRatios.includes(ratio);
|
||||
}
|
||||
|
||||
@@ -142,7 +142,12 @@ class OpenRouterService extends BaseService {
|
||||
const modelDetails = (await this.models_()).find(m => m.id === 'openrouter:' + model);
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
// custom open router logic because they're pricing are weird
|
||||
const trackedUsage = {
|
||||
prompt: usage.prompt_tokens ?? 0,
|
||||
completion: usage.completion_tokens ?? 0,
|
||||
input_cache_read: usage.prompt_tokens_details.cached_tokens ?? 0,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, modelDetails.id);
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
|
||||
@@ -23,6 +23,9 @@ const BaseService = require("../../services/BaseService");
|
||||
const { TypedValue } = require("../../services/drivers/meta/Runtime");
|
||||
const { nou } = require("../../util/langutil");
|
||||
const { TeePromise } = require('@heyputer/putility').libs.promise;
|
||||
const { Together } = require('together-ai');
|
||||
const OpenAIUtil = require("./lib/OpenAIUtil");
|
||||
const { Context } = require("../../util/context");
|
||||
|
||||
/**
|
||||
* TogetherAIService class provides integration with Together AI's language models.
|
||||
@@ -32,8 +35,11 @@ const { TeePromise } = require('@heyputer/putility').libs.promise;
|
||||
* @extends BaseService
|
||||
*/
|
||||
class TogetherAIService extends BaseService {
|
||||
/**
|
||||
* @type {import('../../services/abuse-prevention/MeteringService/MeteringService').MeteringAndBillingService}
|
||||
*/
|
||||
meteringAndBillingService;
|
||||
static MODULES = {
|
||||
['together-ai']: require('together-ai'),
|
||||
kv: globalThis.kv,
|
||||
uuidv4: require('uuid').v4,
|
||||
};
|
||||
@@ -45,8 +51,6 @@ class TogetherAIService extends BaseService {
|
||||
* @private
|
||||
*/
|
||||
async _init() {
|
||||
const require = this.require;
|
||||
const Together = require('together-ai');
|
||||
this.together = new Together({
|
||||
apiKey: this.config.apiKey,
|
||||
});
|
||||
@@ -57,6 +61,7 @@ class TogetherAIService extends BaseService {
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -99,12 +104,17 @@ class TogetherAIService extends BaseService {
|
||||
throw new Error('Model Fallback Test 1');
|
||||
}
|
||||
|
||||
/** @type {import('together-ai/streaming.mjs').Stream<import("together-ai/resources/chat/completions.mjs").ChatCompletionChunk>} */
|
||||
const completion = await this.together.chat.completions.create({
|
||||
model: model ?? this.get_default_model(),
|
||||
messages: messages,
|
||||
stream,
|
||||
});
|
||||
|
||||
// Metering integration
|
||||
const actor = Context.get('actor');
|
||||
const modelId = model ?? this.get_default_model();
|
||||
|
||||
if ( stream ) {
|
||||
let usage_promise = new TeePromise();
|
||||
|
||||
@@ -118,10 +128,14 @@ class TogetherAIService extends BaseService {
|
||||
for await ( const chunk of completion ) {
|
||||
// DRY: same as openai
|
||||
if ( chunk.usage ) {
|
||||
// TODO DS: get rid of legacy usage
|
||||
usage_promise.resolve({
|
||||
input_tokens: chunk.usage.prompt_tokens,
|
||||
output_tokens: chunk.usage.completion_tokens,
|
||||
});
|
||||
// Metering: record usage for streamed chunks
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(chunk.usage);
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, modelId);
|
||||
}
|
||||
|
||||
if ( chunk.choices.length < 1 ) continue;
|
||||
@@ -151,6 +165,8 @@ class TogetherAIService extends BaseService {
|
||||
input_tokens: completion.usage.prompt_tokens,
|
||||
output_tokens: completion.usage.completion_tokens,
|
||||
};
|
||||
// Metering: record usage for non-streamed completion
|
||||
this.meteringAndBillingService.utilRecordUsageObject(completion.usage, actor, modelId);
|
||||
return ret;
|
||||
},
|
||||
},
|
||||
|
||||
@@ -1,24 +1,25 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const BaseService = require("../../services/BaseService");
|
||||
const { Context } = require("../../util/context");
|
||||
const OpenAIUtil = require("./lib/OpenAIUtil");
|
||||
|
||||
/**
|
||||
@@ -31,20 +32,20 @@ const OpenAIUtil = require("./lib/OpenAIUtil");
|
||||
class XAIService extends BaseService {
|
||||
static MODULES = {
|
||||
openai: require('openai'),
|
||||
}
|
||||
};
|
||||
/** @type {import('../../services/abuse-prevention/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
meteringAndBillingService;
|
||||
|
||||
|
||||
adapt_model (model) {
|
||||
adapt_model(model) {
|
||||
return model;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initializes the XAI service by setting up the OpenAI client and registering with the AI chat provider
|
||||
* @private
|
||||
* @returns {Promise<void>} Resolves when initialization is complete
|
||||
*/
|
||||
async _init () {
|
||||
async _init() {
|
||||
this.openai = new this.modules.openai.OpenAI({
|
||||
apiKey: this.global_config.services.xai.apiKey,
|
||||
baseURL: "https://api.x.ai/v1",
|
||||
@@ -55,14 +56,14 @@ class XAIService extends BaseService {
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService; // TODO DS: move to proper extensions
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the default model identifier for the XAI service
|
||||
* @returns {string} The default model ID 'grok-beta'
|
||||
*/
|
||||
get_default_model () {
|
||||
get_default_model() {
|
||||
return 'grok-beta';
|
||||
}
|
||||
|
||||
@@ -71,11 +72,11 @@ class XAIService extends BaseService {
|
||||
/**
|
||||
* Returns a list of available models and their details.
|
||||
* See AIChatService for more information.
|
||||
*
|
||||
* @returns Promise<Array<Object>> Array of model details
|
||||
*
|
||||
* @returns Array<Object> Array of model details
|
||||
*/
|
||||
async models () {
|
||||
return await this.models_();
|
||||
models() {
|
||||
return this.models_();
|
||||
},
|
||||
/**
|
||||
* Returns a list of available model names including their aliases
|
||||
@@ -83,7 +84,7 @@ class XAIService extends BaseService {
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
async list() {
|
||||
const models = await this.models_();
|
||||
const model_names = [];
|
||||
for ( const model of models ) {
|
||||
@@ -99,11 +100,11 @@ class XAIService extends BaseService {
|
||||
* AI Chat completion method.
|
||||
* See AIChatService for more details.
|
||||
*/
|
||||
async complete ({ messages, stream, model, tools }) {
|
||||
async complete({ messages, stream, model, tools }) {
|
||||
model = this.adapt_model(model);
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
|
||||
|
||||
const completion = await this.openai.chat.completions.create({
|
||||
messages,
|
||||
model: model ?? this.get_default_model(),
|
||||
@@ -115,27 +116,42 @@ class XAIService extends BaseService {
|
||||
} : {}),
|
||||
});
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: OpenAIUtil.create_usage_calculator({
|
||||
model_details: (await this.models_()).find(m => m.id === model),
|
||||
}),
|
||||
stream, completion,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
// Metering integration
|
||||
const actor = Context.get('actor');
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const modelDetails = this.models().find(m => m.id === model || m.aliases?.includes(model));
|
||||
const trackedUsage = {
|
||||
prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
completion_tokens: usage.completion_tokens ?? 0,
|
||||
cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, `openai:${modelDetails.id}`);
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves available AI models and their specifications
|
||||
* @returns {Promise<Array>} Array of model objects containing:
|
||||
* @returns Array of model objects containing:
|
||||
* - id: Model identifier string
|
||||
* - name: Human readable model name
|
||||
* - context: Maximum context window size
|
||||
* - cost: Pricing information object with currency and rates
|
||||
* @private
|
||||
*/
|
||||
async models_ () {
|
||||
models_() {
|
||||
return [
|
||||
{
|
||||
id: 'grok-beta',
|
||||
@@ -169,7 +185,7 @@ class XAIService extends BaseService {
|
||||
tokens: 1_000_000,
|
||||
input: 300,
|
||||
output: 1500,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-3-fast',
|
||||
@@ -180,7 +196,7 @@ class XAIService extends BaseService {
|
||||
tokens: 1_000_000,
|
||||
input: 500,
|
||||
output: 2500,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-3-mini',
|
||||
@@ -191,7 +207,7 @@ class XAIService extends BaseService {
|
||||
tokens: 1_000_000,
|
||||
input: 30,
|
||||
output: 50,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-3-mini-fast',
|
||||
@@ -202,7 +218,7 @@ class XAIService extends BaseService {
|
||||
tokens: 1_000_000,
|
||||
input: 60,
|
||||
output: 400,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-2-vision',
|
||||
@@ -213,7 +229,7 @@ class XAIService extends BaseService {
|
||||
tokens: 1_000_000,
|
||||
input: 200,
|
||||
output: 1000,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-2',
|
||||
@@ -224,7 +240,7 @@ class XAIService extends BaseService {
|
||||
tokens: 1_000_000,
|
||||
input: 200,
|
||||
output: 1000,
|
||||
}
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
@@ -7,9 +7,9 @@
|
||||
// Pattern: "aws-textract:{api}:page"
|
||||
// Example: "aws-textract:detect-document-text:page" → 150 microcents per page
|
||||
//
|
||||
// Note: 1,000,000 microcents = $0.01 USD. $1.50 per 1,000 pages = 150 microcents per page.
|
||||
// Note: 1,000,000 microcents = $0.01 USD. $1.50 per 1,000 pages = $0.0015 per page = 0.15 cents per page = 150000 microcents per page.
|
||||
//
|
||||
export const AWS_TEXTRACT_COST_MAP = {
|
||||
// Detect Document Text API: $1.50 per 1,000 pages (150 microcents per page)
|
||||
"aws-textract:detect-document-text:page": 150,
|
||||
// Detect Document Text API: $1.50 per 1,000 pages (150000 microcents per page)
|
||||
"aws-textract:detect-document-text:page": 150000,
|
||||
};
|
||||
@@ -19,10 +19,10 @@
|
||||
|
||||
export const DEEPSEEK_COST_MAP = {
|
||||
// DeepSeek Chat
|
||||
"deepseek:deepseek-chat:input": 56,
|
||||
"deepseek:deepseek-chat:output": 168,
|
||||
"deepseek:deepseek-chat:prompt_tokens": 56,
|
||||
"deepseek:deepseek-chat:completion_tokens": 168,
|
||||
|
||||
// DeepSeek Reasoner
|
||||
"deepseek:deepseek-reasoner:input": 56,
|
||||
"deepseek:deepseek-reasoner:output": 168,
|
||||
"deepseek:deepseek-reasoner:prompt_tokens": 56,
|
||||
"deepseek:deepseek-reasoner:completion_tokens": 168,
|
||||
};
|
||||
@@ -9,9 +9,9 @@
|
||||
*/
|
||||
export const GEMINI_COST_MAP = {
|
||||
// Gemini api usage types (costs per token in microcents)
|
||||
"gemini:gemini-2.0-flash:input": 10,
|
||||
"gemini:gemini-2.0-flash:output": 40,
|
||||
"gemini:gemini-1.5-flash:input": 7.5,
|
||||
"gemini:gemini-1.5-flash:output": 30,
|
||||
'gemini-2.5-flash-image-preview1024x1024': 3_900_000
|
||||
"gemini:gemini-2.0-flash:promptTokenCount": 10,
|
||||
"gemini:gemini-2.0-flash:candidatesTokenCount": 40,
|
||||
"gemini:gemini-1.5-flash:promptTokenCount": 3,
|
||||
"gemini:gemini-1.5-flash:candidatesTokenCount": 2,
|
||||
"gemini:gemini-2.5-flash-image-preview:1024x1024": 3_900_000
|
||||
}
|
||||
|
||||
@@ -19,52 +19,52 @@
|
||||
|
||||
export const GROQ_COST_MAP = {
|
||||
// Gemma models
|
||||
"groq:gemma2-9b-it:input": 20,
|
||||
"groq:gemma2-9b-it:output": 20,
|
||||
"groq:gemma-7b-it:input": 7,
|
||||
"groq:gemma-7b-it:output": 7,
|
||||
"groq:gemma2-9b-it:prompt_tokens": 20,
|
||||
"groq:gemma2-9b-it:completion_tokens": 20,
|
||||
"groq:gemma-7b-it:prompt_tokens": 7,
|
||||
"groq:gemma-7b-it:completion_tokens": 7,
|
||||
|
||||
// Llama 3 Groq Tool Use Preview
|
||||
"groq:llama3-groq-70b-8192-tool-use-preview:input": 89,
|
||||
"groq:llama3-groq-70b-8192-tool-use-preview:output": 89,
|
||||
"groq:llama3-groq-8b-8192-tool-use-preview:input": 19,
|
||||
"groq:llama3-groq-8b-8192-tool-use-preview:output": 19,
|
||||
"groq:llama3-groq-70b-8192-tool-use-preview:prompt_tokens": 89,
|
||||
"groq:llama3-groq-70b-8192-tool-use-preview:completion_tokens": 89,
|
||||
"groq:llama3-groq-8b-8192-tool-use-preview:prompt_tokens": 19,
|
||||
"groq:llama3-groq-8b-8192-tool-use-preview:completion_tokens": 19,
|
||||
|
||||
// Llama 3.1
|
||||
"groq:llama-3.1-70b-versatile:input": 59,
|
||||
"groq:llama-3.1-70b-versatile:output": 79,
|
||||
"groq:llama-3.1-70b-specdec:input": 59,
|
||||
"groq:llama-3.1-70b-specdec:output": 99,
|
||||
"groq:llama-3.1-8b-instant:input": 5,
|
||||
"groq:llama-3.1-8b-instant:output": 8,
|
||||
"groq:llama-3.1-70b-versatile:prompt_tokens": 59,
|
||||
"groq:llama-3.1-70b-versatile:completion_tokens": 79,
|
||||
"groq:llama-3.1-70b-specdec:prompt_tokens": 59,
|
||||
"groq:llama-3.1-70b-specdec:completion_tokens": 99,
|
||||
"groq:llama-3.1-8b-instant:prompt_tokens": 5,
|
||||
"groq:llama-3.1-8b-instant:completion_tokens": 8,
|
||||
|
||||
// Llama Guard
|
||||
"groq:meta-llama/llama-guard-4-12b:input": 20,
|
||||
"groq:meta-llama/llama-guard-4-12b:output": 20,
|
||||
"groq:llama-guard-3-8b:input": 20,
|
||||
"groq:llama-guard-3-8b:output": 20,
|
||||
"groq:meta-llama/llama-guard-4-12b:prompt_tokens": 20,
|
||||
"groq:meta-llama/llama-guard-4-12b:completion_tokens": 20,
|
||||
"groq:llama-guard-3-8b:prompt_tokens": 20,
|
||||
"groq:llama-guard-3-8b:completion_tokens": 20,
|
||||
|
||||
// Prompt Guard
|
||||
"groq:meta-llama/llama-prompt-guard-2-86m:input": 4,
|
||||
"groq:meta-llama/llama-prompt-guard-2-86m:output": 4,
|
||||
"groq:meta-llama/llama-prompt-guard-2-86m:prompt_tokens": 4,
|
||||
"groq:meta-llama/llama-prompt-guard-2-86m:completion_tokens": 4,
|
||||
|
||||
// Llama 3.2 Preview
|
||||
"groq:llama-3.2-1b-preview:input": 4,
|
||||
"groq:llama-3.2-1b-preview:output": 4,
|
||||
"groq:llama-3.2-3b-preview:input": 6,
|
||||
"groq:llama-3.2-3b-preview:output": 6,
|
||||
"groq:llama-3.2-11b-vision-preview:input": 18,
|
||||
"groq:llama-3.2-11b-vision-preview:output": 18,
|
||||
"groq:llama-3.2-90b-vision-preview:input": 90,
|
||||
"groq:llama-3.2-90b-vision-preview:output": 90,
|
||||
"groq:llama-3.2-1b-preview:prompt_tokens": 4,
|
||||
"groq:llama-3.2-1b-preview:completion_tokens": 4,
|
||||
"groq:llama-3.2-3b-preview:prompt_tokens": 6,
|
||||
"groq:llama-3.2-3b-preview:completion_tokens": 6,
|
||||
"groq:llama-3.2-11b-vision-preview:prompt_tokens": 18,
|
||||
"groq:llama-3.2-11b-vision-preview:completion_tokens": 18,
|
||||
"groq:llama-3.2-90b-vision-preview:prompt_tokens": 90,
|
||||
"groq:llama-3.2-90b-vision-preview:completion_tokens": 90,
|
||||
|
||||
// Llama 3 8k/70B
|
||||
"groq:llama3-70b-8192:input": 59,
|
||||
"groq:llama3-70b-8192:output": 79,
|
||||
"groq:llama3-8b-8192:input": 5,
|
||||
"groq:llama3-8b-8192:output": 8,
|
||||
"groq:llama3-70b-8192:prompt_tokens": 59,
|
||||
"groq:llama3-70b-8192:completion_tokens": 79,
|
||||
"groq:llama3-8b-8192:prompt_tokens": 5,
|
||||
"groq:llama3-8b-8192:completion_tokens": 8,
|
||||
|
||||
// Mixtral
|
||||
"groq:mixtral-8x7b-32768:input": 24,
|
||||
"groq:mixtral-8x7b-32768:output": 24,
|
||||
"groq:mixtral-8x7b-32768:prompt_tokens": 24,
|
||||
"groq:mixtral-8x7b-32768:completion_tokens": 24,
|
||||
};
|
||||
@@ -2,6 +2,7 @@ import { AWS_POLLY_COST_MAP } from "./awsPollyCostMap";
|
||||
import { AWS_TEXTRACT_COST_MAP } from "./awsTextractCostMap";
|
||||
import { CLAUDE_COST_MAP } from "./claudeCostMap";
|
||||
import { DEEPSEEK_COST_MAP } from "./deepSeekCostMap";
|
||||
import { GEMINI_COST_MAP } from "./geminiCostMap";
|
||||
import { GROQ_COST_MAP } from "./groqCostMap";
|
||||
import { KV_COST_MAP } from "./kvCostMap";
|
||||
import { MISTRAL_COST_MAP } from "./mistralCostMap";
|
||||
@@ -12,16 +13,17 @@ import { TOGETHER_COST_MAP } from "./togetherCostMap";
|
||||
import { XAI_COST_MAP } from "./xaiCostMap";
|
||||
|
||||
export const COST_MAPS = {
|
||||
...OPENAI_COST_MAP,
|
||||
...KV_COST_MAP,
|
||||
...OPENROUTER_COST_MAP,
|
||||
...MISTRAL_COST_MAP,
|
||||
...GROQ_COST_MAP,
|
||||
...OPENAI_IMAGE_COST_MAP,
|
||||
...XAI_COST_MAP,
|
||||
...DEEPSEEK_COST_MAP,
|
||||
...TOGETHER_COST_MAP,
|
||||
...CLAUDE_COST_MAP,
|
||||
...AWS_POLLY_COST_MAP,
|
||||
...AWS_TEXTRACT_COST_MAP
|
||||
...AWS_TEXTRACT_COST_MAP,
|
||||
...CLAUDE_COST_MAP,
|
||||
...DEEPSEEK_COST_MAP,
|
||||
...GEMINI_COST_MAP,
|
||||
...GROQ_COST_MAP,
|
||||
...KV_COST_MAP,
|
||||
...MISTRAL_COST_MAP,
|
||||
...OPENAI_COST_MAP,
|
||||
...OPENAI_IMAGE_COST_MAP,
|
||||
...OPENROUTER_COST_MAP,
|
||||
...TOGETHER_COST_MAP,
|
||||
...XAI_COST_MAP
|
||||
}
|
||||
@@ -19,42 +19,42 @@
|
||||
|
||||
export const MISTRAL_COST_MAP = {
|
||||
// Mistral models (values in microcents/token, from MistralAIService.js)
|
||||
"mistral:mistral-large-latest:input": 200,
|
||||
"mistral:mistral-large-latest:output": 600,
|
||||
"mistral:pixtral-large-latest:input": 200,
|
||||
"mistral:pixtral-large-latest:output": 600,
|
||||
"mistral:mistral-small-latest:input": 20,
|
||||
"mistral:mistral-small-latest:output": 60,
|
||||
"mistral:codestral-latest:input": 30,
|
||||
"mistral:codestral-latest:output": 90,
|
||||
"mistral:ministral-8b-latest:input": 10,
|
||||
"mistral:ministral-8b-latest:output": 10,
|
||||
"mistral:ministral-3b-latest:input": 4,
|
||||
"mistral:ministral-3b-latest:output": 4,
|
||||
"mistral:pixtral-12b:input": 15,
|
||||
"mistral:pixtral-12b:output": 15,
|
||||
"mistral:mistral-nemo:input": 15,
|
||||
"mistral:mistral-nemo:output": 15,
|
||||
"mistral:open-mistral-7b:input": 25,
|
||||
"mistral:open-mistral-7b:output": 25,
|
||||
"mistral:open-mixtral-8x7b:input": 7,
|
||||
"mistral:open-mixtral-8x7b:output": 7,
|
||||
"mistral:open-mixtral-8x22b:input": 2,
|
||||
"mistral:open-mixtral-8x22b:output": 6,
|
||||
"mistral:magistral-medium-latest:input": 200,
|
||||
"mistral:magistral-medium-latest:output": 500,
|
||||
"mistral:magistral-small-latest:input": 10,
|
||||
"mistral:magistral-small-latest:output": 10,
|
||||
"mistral:mistral-medium-latest:input": 40,
|
||||
"mistral:mistral-medium-latest:output": 200,
|
||||
"mistral:mistral-moderation-latest:input": 10,
|
||||
"mistral:mistral-moderation-latest:output": 10,
|
||||
"mistral:devstral-small-latest:input": 10,
|
||||
"mistral:devstral-small-latest:output": 10,
|
||||
"mistral:mistral-saba-latest:input": 20,
|
||||
"mistral:mistral-saba-latest:output": 60,
|
||||
"mistral:open-mistral-nemo:input": 10,
|
||||
"mistral:open-mistral-nemo:output": 10,
|
||||
"mistral:mistral-ocr-latest:input": 100,
|
||||
"mistral:mistral-ocr-latest:output": 300,
|
||||
"mistral:mistral-large-latest:prompt_tokens": 200,
|
||||
"mistral:mistral-large-latest:completion_tokens": 600,
|
||||
"mistral:pixtral-large-latest:prompt_tokens": 200,
|
||||
"mistral:pixtral-large-latest:completion_tokens": 600,
|
||||
"mistral:mistral-small-latest:prompt_tokens": 20,
|
||||
"mistral:mistral-small-latest:completion_tokens": 60,
|
||||
"mistral:codestral-latest:prompt_tokens": 30,
|
||||
"mistral:codestral-latest:completion_tokens": 90,
|
||||
"mistral:ministral-8b-latest:prompt_tokens": 10,
|
||||
"mistral:ministral-8b-latest:completion_tokens": 10,
|
||||
"mistral:ministral-3b-latest:prompt_tokens": 4,
|
||||
"mistral:ministral-3b-latest:completion_tokens": 4,
|
||||
"mistral:pixtral-12b:prompt_tokens": 15,
|
||||
"mistral:pixtral-12b:completion_tokens": 15,
|
||||
"mistral:mistral-nemo:prompt_tokens": 15,
|
||||
"mistral:mistral-nemo:completion_tokens": 15,
|
||||
"mistral:open-mistral-7b:prompt_tokens": 25,
|
||||
"mistral:open-mistral-7b:completion_tokens": 25,
|
||||
"mistral:open-mixtral-8x7b:prompt_tokens": 7,
|
||||
"mistral:open-mixtral-8x7b:completion_tokens": 7,
|
||||
"mistral:open-mixtral-8x22b:prompt_tokens": 2,
|
||||
"mistral:open-mixtral-8x22b:completion_tokens": 6,
|
||||
"mistral:magistral-medium-latest:prompt_tokens": 200,
|
||||
"mistral:magistral-medium-latest:completion_tokens": 500,
|
||||
"mistral:magistral-small-latest:prompt_tokens": 10,
|
||||
"mistral:magistral-small-latest:completion_tokens": 10,
|
||||
"mistral:mistral-medium-latest:prompt_tokens": 40,
|
||||
"mistral:mistral-medium-latest:completion_tokens": 200,
|
||||
"mistral:mistral-moderation-latest:prompt_tokens": 10,
|
||||
"mistral:mistral-moderation-latest:completion_tokens": 10,
|
||||
"mistral:devstral-small-latest:prompt_tokens": 10,
|
||||
"mistral:devstral-small-latest:completion_tokens": 10,
|
||||
"mistral:mistral-saba-latest:prompt_tokens": 20,
|
||||
"mistral:mistral-saba-latest:completion_tokens": 60,
|
||||
"mistral:open-mistral-nemo:prompt_tokens": 10,
|
||||
"mistral:open-mistral-nemo:completion_tokens": 10,
|
||||
"mistral:mistral-ocr-latest:prompt_tokens": 100,
|
||||
"mistral:mistral-ocr-latest:completion_tokens": 300,
|
||||
};
|
||||
@@ -6,17 +6,32 @@
|
||||
//
|
||||
// Naming pattern: "openai:{model}:{size}" or "openai:{model}:hd:{size}" for HD images
|
||||
|
||||
import { toMicroCents } from "../utils";
|
||||
|
||||
|
||||
|
||||
export const OPENAI_IMAGE_COST_MAP = {
|
||||
// DALL-E 3
|
||||
"openai:dall-e-3:1024x1024": 40000, // $0.04
|
||||
"openai:dall-e-3:1024x1792": 80000, // $0.08
|
||||
"openai:dall-e-3:1792x1024": 80000, // $0.08
|
||||
"openai:dall-e-3:hd:1024x1024": 80000, // $0.08
|
||||
"openai:dall-e-3:hd:1024x1792": 120000, // $0.12
|
||||
"openai:dall-e-3:hd:1792x1024": 120000, // $0.12
|
||||
"openai:dall-e-3:1024x1024": toMicroCents(0.04), // $0.04
|
||||
"openai:dall-e-3:1024x1792": toMicroCents(0.08), // $0.08
|
||||
"openai:dall-e-3:1792x1024": toMicroCents(0.08), // $0.08
|
||||
"openai:dall-e-3:hd:1024x1024": toMicroCents(0.08), // $0.08
|
||||
"openai:dall-e-3:hd:1024x1792": toMicroCents(0.12), // $0.12
|
||||
"openai:dall-e-3:hd:1792x1024": toMicroCents(0.12), // $0.12
|
||||
|
||||
// DALL-E 2
|
||||
"openai:dall-e-2:1024x1024": 20000, // $0.02
|
||||
"openai:dall-e-2:512x512": 18000, // $0.018
|
||||
"openai:dall-e-2:256x256": 16000, // $0.016
|
||||
"openai:dall-e-2:1024x1024": toMicroCents(0.02), // $0.02
|
||||
"openai:dall-e-2:512x512": toMicroCents(0.018), // $0.018
|
||||
"openai:dall-e-2:256x256": toMicroCents(0.016), // $0.016
|
||||
|
||||
// gpt-image-1
|
||||
"openai:gpt-image-1:low:1024x1024": toMicroCents(0.011),
|
||||
"openai:gpt-image-1:low:1024x1536": toMicroCents(0.016),
|
||||
"openai:gpt-image-1:low:1536x1024": toMicroCents(0.016),
|
||||
"openai:gpt-image-1:medium:1024x1024": toMicroCents(0.042),
|
||||
"openai:gpt-image-1:medium:1024x1536": toMicroCents(0.063),
|
||||
"openai:gpt-image-1:medium:1536x1024": toMicroCents(0.063),
|
||||
"openai:gpt-image-1:high:1024x1024": toMicroCents(0.167),
|
||||
"openai:gpt-image-1:high:1024x1536": toMicroCents(0.25),
|
||||
"openai:gpt-image-1:high:1536x1024": toMicroCents(0.25),
|
||||
};
|
||||
@@ -19,35 +19,35 @@
|
||||
|
||||
export const XAI_COST_MAP = {
|
||||
// Grok Beta
|
||||
"xai:grok-beta:input": 500,
|
||||
"xai:grok-beta:output": 1500,
|
||||
"xai:grok-beta:prompt_tokens": 500,
|
||||
"xai:grok-beta:completion-tokens": 1500,
|
||||
|
||||
// Grok Vision Beta
|
||||
"xai:grok-vision-beta:input": 500,
|
||||
"xai:grok-vision-beta:output": 1500,
|
||||
"xai:grok-vision-beta:prompt_tokens": 500,
|
||||
"xai:grok-vision-beta:completion-tokens": 1500,
|
||||
"xai:grok-vision-beta:image": 1000,
|
||||
|
||||
// Grok 3
|
||||
"xai:grok-3:input": 300,
|
||||
"xai:grok-3:output": 1500,
|
||||
"xai:grok-3:prompt_tokens": 300,
|
||||
"xai:grok-3:completion-tokens": 1500,
|
||||
|
||||
// Grok 3 Fast
|
||||
"xai:grok-3-fast:input": 500,
|
||||
"xai:grok-3-fast:output": 2500,
|
||||
"xai:grok-3-fast:prompt_tokens": 500,
|
||||
"xai:grok-3-fast:completion-tokens": 2500,
|
||||
|
||||
// Grok 3 Mini
|
||||
"xai:grok-3-mini:input": 30,
|
||||
"xai:grok-3-mini:output": 50,
|
||||
"xai:grok-3-mini:prompt_tokens": 30,
|
||||
"xai:grok-3-mini:completion-tokens": 50,
|
||||
|
||||
// Grok 3 Mini Fast
|
||||
"xai:grok-3-mini-fast:input": 60,
|
||||
"xai:grok-3-mini-fast:output": 400,
|
||||
"xai:grok-3-mini-fast:prompt_tokens": 60,
|
||||
"xai:grok-3-mini-fast:completion-tokens": 400,
|
||||
|
||||
// Grok 2 Vision
|
||||
"xai:grok-2-vision:input": 200,
|
||||
"xai:grok-2-vision:output": 1000,
|
||||
"xai:grok-2-vision:prompt_tokens": 200,
|
||||
"xai:grok-2-vision:completion-tokens": 1000,
|
||||
|
||||
// Grok 2
|
||||
"xai:grok-2:input": 200,
|
||||
"xai:grok-2:output": 1000,
|
||||
"xai:grok-2:prompt_tokens": 200,
|
||||
"xai:grok-2:completion-tokens": 1000,
|
||||
};
|
||||
@@ -0,0 +1 @@
|
||||
export const toMicroCents = (dollars: number) => Math.round(dollars * 1_000_000 * 100);
|
||||
Reference in New Issue
Block a user