mirror of
https://github.com/HeyPuter/puter.git
synced 2026-01-09 22:51:29 -06:00
feat: refactor ai to have single entry point and follow defined model structure (#2114)
* feat: refactor ai to have single entry point and follow defined model structure
* fix: missing openrouter registration
* fix: dedupe models
* feat: provide usage in stream mode 🚀
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -66,4 +66,5 @@ AGENTS.md
|
||||
*.map
|
||||
|
||||
|
||||
coverage/
|
||||
coverage/
|
||||
*.log
|
||||
|
||||
2927
package-lock.json
generated
2927
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -14,6 +14,7 @@
|
||||
"@eslint/js": "^9.35.0",
|
||||
"@playwright/test": "^1.56.1",
|
||||
"@stylistic/eslint-plugin": "^5.3.1",
|
||||
"@types/mime-types": "^3.0.1",
|
||||
"@types/uuid": "^10.0.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.46.1",
|
||||
"@typescript-eslint/parser": "^8.46.1",
|
||||
@@ -53,7 +54,7 @@
|
||||
"build": "npx eslint --quiet -c eslint/mandatory.eslint.config.js src/backend/src extensions && npm run build:ts && cd src/gui && node ./build.js",
|
||||
"check-translations": "node tools/check-translations.js",
|
||||
"prepare": "husky",
|
||||
"build:ts": "tsc",
|
||||
"build:ts": "tsc -p tsconfig.build.json",
|
||||
"gen": "./scripts/gen.sh"
|
||||
},
|
||||
"workspaces": [
|
||||
|
||||
@@ -16,35 +16,34 @@
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
const CoreModule = require('./src/CoreModule.js');
|
||||
const { Kernel } = require('./src/Kernel.js');
|
||||
const DatabaseModule = require('./src/DatabaseModule.js');
|
||||
const LocalDiskStorageModule = require('./src/LocalDiskStorageModule.js');
|
||||
const MemoryStorageModule = require('./src/MemoryStorageModule.js');
|
||||
const SelfHostedModule = require('./src/modules/selfhosted/SelfHostedModule.js');
|
||||
const { testlaunch } = require('./src/index.js');
|
||||
const BaseService = require('./src/services/BaseService.js');
|
||||
const { Context } = require('./src/util/context.js');
|
||||
const { TestDriversModule } = require('./src/modules/test-drivers/TestDriversModule.js');
|
||||
const { PuterAIModule } = require('./src/modules/puterai/PuterAIModule.js');
|
||||
const { BroadcastModule } = require('./src/modules/broadcast/BroadcastModule.js');
|
||||
const { WebModule } = require('./src/modules/web/WebModule.js');
|
||||
const { Core2Module } = require('./src/modules/core/Core2Module.js');
|
||||
const { TemplateModule } = require('./src/modules/template/TemplateModule.js');
|
||||
const { PuterFSModule } = require('./src/modules/puterfs/PuterFSModule.js');
|
||||
const { PerfMonModule } = require('./src/modules/perfmon/PerfMonModule.js');
|
||||
const { AppsModule } = require('./src/modules/apps/AppsModule.js');
|
||||
const { DevelopmentModule } = require('./src/modules/development/DevelopmentModule.js');
|
||||
const { HostOSModule } = require('./src/modules/hostos/HostOSModule.js');
|
||||
const { InternetModule } = require('./src/modules/internet/InternetModule.js');
|
||||
const { CaptchaModule } = require('./src/modules/captcha/CaptchaModule.js');
|
||||
const { EntityStoreModule } = require('./src/modules/entitystore/EntityStoreModule.js');
|
||||
const { KVStoreModule } = require('./src/modules/kvstore/KVStoreModule.js');
|
||||
const { DomainModule } = require('./src/modules/domain/DomainModule.js');
|
||||
const { DNSModule } = require('./src/modules/dns/DNSModule.js');
|
||||
const { TestConfigModule } = require('./src/modules/test-config/TestConfigModule.js');
|
||||
import CoreModule from './src/CoreModule.js';
|
||||
import DatabaseModule from './src/DatabaseModule.js';
|
||||
import { testlaunch } from './src/index.js';
|
||||
import { Kernel } from './src/Kernel.js';
|
||||
import LocalDiskStorageModule from './src/LocalDiskStorageModule.js';
|
||||
import MemoryStorageModule from './src/MemoryStorageModule.js';
|
||||
import { PuterAIModule } from './src/modules/ai/PuterAIChatModule.js';
|
||||
import { AppsModule } from './src/modules/apps/AppsModule.js';
|
||||
import { BroadcastModule } from './src/modules/broadcast/BroadcastModule.js';
|
||||
import { CaptchaModule } from './src/modules/captcha/CaptchaModule.js';
|
||||
import { Core2Module } from './src/modules/core/Core2Module.js';
|
||||
import { DevelopmentModule } from './src/modules/development/DevelopmentModule.js';
|
||||
import { DNSModule } from './src/modules/dns/DNSModule.js';
|
||||
import { DomainModule } from './src/modules/domain/DomainModule.js';
|
||||
import { EntityStoreModule } from './src/modules/entitystore/EntityStoreModule.js';
|
||||
import { HostOSModule } from './src/modules/hostos/HostOSModule.js';
|
||||
import { InternetModule } from './src/modules/internet/InternetModule.js';
|
||||
import { KVStoreModule } from './src/modules/kvstore/KVStoreModule.js';
|
||||
import { PerfMonModule } from './src/modules/perfmon/PerfMonModule.js';
|
||||
import { PuterFSModule } from './src/modules/puterfs/PuterFSModule.js';
|
||||
import SelfHostedModule from './src/modules/selfhosted/SelfHostedModule.js';
|
||||
import { TestConfigModule } from './src/modules/test-config/TestConfigModule.js';
|
||||
import { TestDriversModule } from './src/modules/test-drivers/TestDriversModule.js';
|
||||
import { WebModule } from './src/modules/web/WebModule.js';
|
||||
import BaseService from './src/services/BaseService.js';
|
||||
import { Context } from './src/util/context.js';
|
||||
|
||||
module.exports = {
|
||||
export default {
|
||||
helloworld: () => {
|
||||
console.log('Hello, World!');
|
||||
process.exit(0);
|
||||
|
||||
@@ -82,7 +82,7 @@
|
||||
"svg-captcha": "^1.4.0",
|
||||
"svgo": "^3.0.2",
|
||||
"tiktoken": "^1.0.16",
|
||||
"together-ai": "^0.29.0",
|
||||
"together-ai": "^0.32.0",
|
||||
"tweetnacl": "^1.0.3",
|
||||
"ua-parser-js": "^1.0.38",
|
||||
"uglify-js": "^3.17.4",
|
||||
|
||||
@@ -35,6 +35,7 @@ const readline = require('node:readline/promises');
|
||||
const { RuntimeModuleRegistry } = require('./extension/RuntimeModuleRegistry');
|
||||
const { RuntimeModule } = require('./extension/RuntimeModule');
|
||||
const deep_proto_merge = require('./config/deep_proto_merge');
|
||||
const { kv } = require('./util/kvSingleton');
|
||||
|
||||
const { quot } = libs.string;
|
||||
|
||||
@@ -63,8 +64,6 @@ class Kernel extends AdvancedBase {
|
||||
}
|
||||
|
||||
_runtime_init (boot_parameters) {
|
||||
const kvjs = require('@heyputer/kv.js');
|
||||
const kv = new kvjs();
|
||||
global.kv = kv;
|
||||
global.cl = console.log;
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ const { quot } = require('@heyputer/putility').libs.string;
|
||||
* @property {string} message the error message
|
||||
* @property {object} source the source of the error
|
||||
*/
|
||||
module.exports = class APIError {
|
||||
class APIError {
|
||||
static codes = {
|
||||
// General
|
||||
'unknown_error': {
|
||||
@@ -560,14 +560,14 @@ module.exports = class APIError {
|
||||
*
|
||||
* @static
|
||||
* @param {number|string} status
|
||||
* @param {object} source
|
||||
* @param {Error} source
|
||||
* @param {string|Error|object} fields one of the following:
|
||||
* - a string to use as the error message
|
||||
* - an Error object to use as the source of the error
|
||||
* - an object with a message property to use as the error message
|
||||
* @returns
|
||||
*/
|
||||
static create (status, source, fields = {}) {
|
||||
static create (status, source = {}, fields = {}) {
|
||||
// Just the error code
|
||||
if ( typeof status === 'string' ) {
|
||||
const code = this.codes[status];
|
||||
@@ -669,3 +669,6 @@ module.exports = class APIError {
|
||||
return `APIError(${this.status}, ${this.message})`;
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = APIError;
|
||||
module.exports.APIError = APIError;
|
||||
@@ -21,9 +21,8 @@ const { is_valid_uuid4 } = require('../../helpers');
|
||||
const { Context } = require('../../util/context');
|
||||
const { PathBuilder } = require('../../util/pathutil');
|
||||
const APIError = require('../APIError');
|
||||
const _path = require('path');
|
||||
|
||||
module.exports = class FSNodeParam {
|
||||
class FSNodeParam {
|
||||
constructor (srckey, options) {
|
||||
this.srckey = srckey;
|
||||
this.options = options ?? {};
|
||||
@@ -77,4 +76,7 @@ module.exports = class FSNodeParam {
|
||||
const resolved_path = PathBuilder.resolve(uidOrPath, { puterfs: true });
|
||||
return await fs.node({ path: resolved_path });
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
module.exports = FSNodeParam;
|
||||
module.exports.FSNodeParam = FSNodeParam;
|
||||
@@ -1,8 +1,5 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.FSEntry = exports.protobufPackage = void 0;
|
||||
const wire_1 = require("@bufbuild/protobuf/wire");
|
||||
exports.protobufPackage = "";
|
||||
import { BinaryReader, BinaryWriter } from "@bufbuild/protobuf/wire";
|
||||
export const protobufPackage = "";
|
||||
function createBaseFSEntry() {
|
||||
return {
|
||||
uuid: "",
|
||||
@@ -19,8 +16,8 @@ function createBaseFSEntry() {
|
||||
size: 0,
|
||||
};
|
||||
}
|
||||
exports.FSEntry = {
|
||||
encode(message, writer = new wire_1.BinaryWriter()) {
|
||||
export const FSEntry = {
|
||||
encode(message, writer = new BinaryWriter()) {
|
||||
if (message.uuid !== "") {
|
||||
writer.uint32(10).string(message.uuid);
|
||||
}
|
||||
@@ -60,7 +57,7 @@ exports.FSEntry = {
|
||||
return writer;
|
||||
},
|
||||
decode(input, length) {
|
||||
const reader = input instanceof wire_1.BinaryReader ? input : new wire_1.BinaryReader(input);
|
||||
const reader = input instanceof BinaryReader ? input : new BinaryReader(input);
|
||||
const end = length === undefined ? reader.len : reader.pos + length;
|
||||
const message = createBaseFSEntry();
|
||||
while (reader.pos < end) {
|
||||
@@ -215,7 +212,7 @@ exports.FSEntry = {
|
||||
return obj;
|
||||
},
|
||||
create(base) {
|
||||
return exports.FSEntry.fromPartial(base ?? {});
|
||||
return FSEntry.fromPartial(base ?? {});
|
||||
},
|
||||
fromPartial(object) {
|
||||
const message = createBaseFSEntry();
|
||||
|
||||
102
src/backend/src/modules/ai/PuterAIChatModule.js
Normal file
102
src/backend/src/modules/ai/PuterAIChatModule.js
Normal file
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
import { AdvancedBase } from '@heyputer/putility';
|
||||
import config from '../../config.js';
|
||||
import { AIInterfaceService } from '../../services/ai/AIInterfaceService.js';
|
||||
import { AIChatService } from '../../services/ai/chat/AIChatService.js';
|
||||
import { GeminiImageGenerationService } from '../../services/ai/image/GeminiImageGenerationService.js';
|
||||
import { OpenAIImageGenerationService } from '../../services/ai/image/OpenAIImageGenerationService.js';
|
||||
import { TogetherImageGenerationService } from '../../services/ai/image/TogetherImageGenerationService.js';
|
||||
import { AWSTextractService } from '../../services/ai/ocr/AWSTextractService.js';
|
||||
import { ElevenLabsVoiceChangerService } from '../../services/ai/sts/ElevenLabsVoiceChangerService.js';
|
||||
import { OpenAISpeechToTextService } from '../../services/ai/stt/OpenAISpeechToTextService.js';
|
||||
import { AWSPollyService } from '../../services/ai/tts/AWSPollyService.js';
|
||||
import { ElevenLabsTTSService } from '../../services/ai/tts/ElevenLabsTTSService.js';
|
||||
import { OpenAITTSService } from '../../services/ai/tts/OpenAITTSService.js';
|
||||
import { OpenAIVideoGenerationService } from '../../services/ai/video/OpenAIVideoGenerationService.js';
|
||||
import { TogetherVideoGenerationService } from '../../services/ai/video/TogetherVideoGenerationService.js';
|
||||
|
||||
/**
|
||||
* PuterAIModule class extends AdvancedBase to manage and register various AI services.
|
||||
* This module handles the initialization and registration of multiple AI-related services
|
||||
* including text processing, speech synthesis, chat completion, and image generation.
|
||||
* Services are conditionally registered based on configuration settings, allowing for
|
||||
* flexible deployment with different AI providers like AWS, OpenAI, Claude, Together AI,
|
||||
* Mistral, Groq, and XAI.
|
||||
* @extends AdvancedBase
|
||||
*/
|
||||
export class PuterAIModule extends AdvancedBase {
|
||||
/**
|
||||
* Module for managing AI-related services in the Puter platform
|
||||
* Extends AdvancedBase to provide core functionality
|
||||
* Handles registration and configuration of various AI services like OpenAI, Claude, AWS services etc.
|
||||
*/
|
||||
async install (context) {
|
||||
const services = context.get('services');
|
||||
|
||||
services.registerService('__ai-interfaces', AIInterfaceService);
|
||||
|
||||
// completion ai service
|
||||
services.registerService('ai-chat', AIChatService);
|
||||
|
||||
// TODO DS: centralize other service types too
|
||||
|
||||
// TODO: services should govern their own availability instead of the module deciding what to register
|
||||
if ( config?.services?.['aws-textract']?.aws ) {
|
||||
|
||||
services.registerService('aws-textract', AWSTextractService);
|
||||
}
|
||||
|
||||
if ( config?.services?.['aws-polly']?.aws ) {
|
||||
|
||||
services.registerService('aws-polly', AWSPollyService);
|
||||
}
|
||||
|
||||
if ( config?.services?.['elevenlabs'] || config?.elevenlabs ) {
|
||||
services.registerService('elevenlabs-tts', ElevenLabsTTSService);
|
||||
|
||||
services.registerService('elevenlabs-voice-changer', ElevenLabsVoiceChangerService);
|
||||
}
|
||||
|
||||
if ( config?.services?.openai || config?.openai ) {
|
||||
|
||||
services.registerService('openai-image-generation', OpenAIImageGenerationService);
|
||||
|
||||
services.registerService('openai-video-generation', OpenAIVideoGenerationService);
|
||||
|
||||
services.registerService('openai-tts', OpenAITTSService);
|
||||
|
||||
services.registerService('openai-speech2txt', OpenAISpeechToTextService);
|
||||
}
|
||||
|
||||
if ( config?.services?.['together-ai'] ) {
|
||||
|
||||
services.registerService('together-image-generation', TogetherImageGenerationService);
|
||||
|
||||
services.registerService('together-video-generation', TogetherVideoGenerationService);
|
||||
}
|
||||
|
||||
if ( config?.services?.['gemini'] ) {
|
||||
|
||||
services.registerService('gemini-image-generation', GeminiImageGenerationService);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,792 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const { PassThrough } = require('stream');
|
||||
const APIError = require('../../api/APIError');
|
||||
const config = require('../../config');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { DB_WRITE } = require('../../services/database/consts');
|
||||
const { TypedValue } = require('../../services/drivers/meta/Runtime');
|
||||
const { Context } = require('../../util/context');
|
||||
const { AsModeration } = require('./lib/AsModeration');
|
||||
const FunctionCalling = require('./lib/FunctionCalling');
|
||||
const Messages = require('./lib/Messages');
|
||||
const Streaming = require('./lib/Streaming');
|
||||
|
||||
// Maximum number of fallback attempts when a model fails, including the first attempt
|
||||
const MAX_FALLBACKS = 3 + 1; // includes first attempt
|
||||
|
||||
/**
|
||||
* AIChatService class extends BaseService to provide AI chat completion functionality.
|
||||
* Manages multiple AI providers, models, and fallback mechanisms for chat interactions.
|
||||
* Handles model registration, usage tracking, cost calculation, content moderation,
|
||||
* and implements the puter-chat-completion driver interface. Supports streaming responses
|
||||
* and maintains detailed model information including pricing and capabilities.
|
||||
*/
|
||||
class AIChatService extends BaseService {
|
||||
static MODULES = {
|
||||
kv: globalThis.kv,
|
||||
uuidv4: require('uuid').v4,
|
||||
cuid2: require('@paralleldrive/cuid2').createId,
|
||||
};
|
||||
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
get meteringService () {
|
||||
return this.services.get('meteringService').meteringService;
|
||||
}
|
||||
/**
|
||||
* Initializes the service by setting up core properties.
|
||||
* Creates empty arrays for providers and model lists,
|
||||
* and initializes an empty object for the model map.
|
||||
* Called during service instantiation.
|
||||
* @private
|
||||
*/
|
||||
_construct () {
|
||||
this.providers = [];
|
||||
this.simple_model_list = [];
|
||||
this.detail_model_list = [];
|
||||
this.detail_model_map = {};
|
||||
}
|
||||
|
||||
get_model_details (model_name, context) {
|
||||
let model_details = this.detail_model_map[model_name];
|
||||
if ( Array.isArray(model_details) && context ) {
|
||||
for ( const model of model_details ) {
|
||||
if ( model.provider === context.service_used ) {
|
||||
model_details = model;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( Array.isArray(model_details) ) {
|
||||
model_details = model_details[0];
|
||||
}
|
||||
return model_details;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the service by setting up empty arrays and maps for providers and models.
|
||||
* This method is called during service construction to establish the initial state.
|
||||
* Creates empty arrays for providers, simple model list, and detailed model list,
|
||||
* as well as an empty object for the detailed model map.
|
||||
* @private
|
||||
*/
|
||||
_init () {
|
||||
this.kvkey = this.modules.uuidv4();
|
||||
|
||||
this.db = this.services.get('database').get(DB_WRITE, 'ai-usage');
|
||||
|
||||
const svc_apiErrpr = this.services.get('api-error');
|
||||
svc_apiErrpr.register({
|
||||
max_tokens_exceeded: {
|
||||
status: 400,
|
||||
message: ({ input_tokens, max_tokens }) =>
|
||||
'Input exceeds maximum token count. ' +
|
||||
`Input has ${input_tokens} tokens, ` +
|
||||
`but the maximum is ${max_tokens}.`,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles consolidation during service boot by registering service aliases
|
||||
* and populating model lists/maps from providers.
|
||||
*
|
||||
* Registers each provider as an 'ai-chat' service alias and fetches their
|
||||
* available models and pricing information. Populates:
|
||||
* - simple_model_list: Basic list of supported models
|
||||
* - detail_model_list: Detailed model info including costs
|
||||
* - detail_model_map: Maps model IDs/aliases to their details
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async ['__on_boot.consolidation'] () {
|
||||
{
|
||||
const svc_driver = this.services.get('driver');
|
||||
for ( const provider of this.providers ) {
|
||||
svc_driver.register_service_alias('ai-chat',
|
||||
provider.service_name,
|
||||
{ iface: 'puter-chat-completion' });
|
||||
}
|
||||
}
|
||||
|
||||
for ( const provider of this.providers ) {
|
||||
const delegate = this.services.get(provider.service_name)
|
||||
.as('puter-chat-completion');
|
||||
|
||||
// Populate simple model list
|
||||
{
|
||||
/**
|
||||
* Populates the simple model list by fetching available models from the delegate service.
|
||||
* Wraps the delegate.list() call in a try-catch block to handle potential errors gracefully.
|
||||
* If the call fails, logs the error and returns an empty array to avoid breaking the service.
|
||||
* The fetched models are added to this.simple_model_list.
|
||||
*
|
||||
* @private
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
const models = await (async () => {
|
||||
try {
|
||||
return await delegate.list() ?? [];
|
||||
} catch (e) {
|
||||
this.log.error(e);
|
||||
return [];
|
||||
}
|
||||
})();
|
||||
this.simple_model_list.push(...models);
|
||||
}
|
||||
|
||||
// Populate detail model list and map
|
||||
{
|
||||
/**
|
||||
* Populates the detail model list and map with model information from the provider.
|
||||
* Fetches detailed model data including pricing and capabilities.
|
||||
* Handles model aliases and potential conflicts by storing multiple models in arrays.
|
||||
* Annotates models with their provider service name.
|
||||
* Catches and logs any errors during model fetching.
|
||||
* @private
|
||||
*/
|
||||
const models = await (async () => {
|
||||
try {
|
||||
return await delegate.models() ?? [];
|
||||
} catch (e) {
|
||||
this.log.error(e);
|
||||
return [];
|
||||
}
|
||||
})();
|
||||
const annotated_models = [];
|
||||
for ( const model of models ) {
|
||||
annotated_models.push({
|
||||
...model,
|
||||
provider: provider.service_name,
|
||||
});
|
||||
}
|
||||
this.detail_model_list.push(...annotated_models);
|
||||
/**
|
||||
* Helper function to set or push a model into the detail_model_map.
|
||||
* If there's no existing entry for the key, sets it directly.
|
||||
* If there's a conflict, converts the entry to an array and pushes the new model.
|
||||
* @param {string} key - The model ID or alias
|
||||
* @param {Object} model - The model details to add
|
||||
*/
|
||||
const set_or_push = (key, model) => {
|
||||
// Typical case: no conflict
|
||||
if ( ! this.detail_model_map[key] ) {
|
||||
this.detail_model_map[key] = model;
|
||||
return;
|
||||
}
|
||||
|
||||
// Conflict: model name will map to an array
|
||||
let array = this.detail_model_map[key];
|
||||
if ( ! Array.isArray(array) ) {
|
||||
array = [array];
|
||||
this.detail_model_map[key] = array;
|
||||
}
|
||||
|
||||
array.push(model);
|
||||
};
|
||||
for ( const model of annotated_models ) {
|
||||
set_or_push(model.id, model);
|
||||
|
||||
if ( ! model.aliases ) continue;
|
||||
|
||||
for ( const alias of model.aliases ) {
|
||||
set_or_push(alias, model);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
register_provider (spec) {
|
||||
this.providers.push(spec);
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['driver-capabilities']: {
|
||||
supports_test_mode (iface, method_name) {
|
||||
return iface === 'puter-chat-completion' &&
|
||||
method_name === 'complete';
|
||||
},
|
||||
},
|
||||
/**
|
||||
* Implements the 'puter-chat-completion' interface methods for AI chat functionality.
|
||||
* Handles model selection, fallbacks, usage tracking, and moderation.
|
||||
* Contains methods for listing available models, completing chat prompts,
|
||||
* and managing provider interactions.
|
||||
*
|
||||
* @property {Object} models - Available AI models with details like costs
|
||||
* @property {Object} list - Simplified list of available models
|
||||
* @property {Object} complete - Main method for chat completion requests
|
||||
* @param {Object} parameters - Chat completion parameters including model and messages
|
||||
* @returns {Promise<Object>} Chat completion response with usage stats
|
||||
* @throws {Error} If service is called directly or no fallback models available
|
||||
*/
|
||||
['puter-chat-completion']: {
|
||||
/**
|
||||
* Returns list of available AI models with detailed information
|
||||
*
|
||||
* Delegates to the intended service's models() method if a delegate exists,
|
||||
* otherwise returns the internal detail_model_list containing all available models
|
||||
* across providers with their capabilities and pricing information.
|
||||
*
|
||||
* For an example of the expected model object structure, see the `async models_`
|
||||
* private method at the bottom of any service with hard-coded model details such
|
||||
* as ClaudeService or GroqAIService.
|
||||
*
|
||||
* @returns {Promise<Array<Object>>} Array of model objects with details like id, provider, cost, etc.
|
||||
*/
|
||||
async models () {
|
||||
const delegate = this.get_delegate();
|
||||
if ( ! delegate ) return await this.models_();
|
||||
return await delegate.models();
|
||||
},
|
||||
|
||||
/**
|
||||
* Reports model names (including aliased names) only with no additional
|
||||
* detail.
|
||||
* @returns {Promise<Array<string>} Array of model objects with basic details
|
||||
*/
|
||||
async list () {
|
||||
const delegate = this.get_delegate();
|
||||
if ( ! delegate ) return await this.list_();
|
||||
return await delegate.list();
|
||||
},
|
||||
|
||||
/**
|
||||
* Completes a chat interaction using one of the available AI models
|
||||
*
|
||||
* This service registers itself under an alias for each other AI
|
||||
* chat service, which results in DriverService always calling this
|
||||
* `complete` implementaiton first, which delegates to the intended
|
||||
* service.
|
||||
*
|
||||
* The return value may be anything that DriverService knows how to
|
||||
* coerce to the intended result. When `options.stream` is FALSE,
|
||||
* this is typically a raw object for the JSON response. When
|
||||
* `options.stream` is TRUE, the result is an object with this
|
||||
* structure:
|
||||
*
|
||||
* {
|
||||
* stream: true,
|
||||
* response: stream {
|
||||
* content_type: 'application/x-ndjson',
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* @param {Object} options - The completion options
|
||||
* @param {Array} options.messages - Array of chat messages to process
|
||||
* @param {boolean} options.stream - Whether to stream the response
|
||||
* @param {string} options.model - The name of a model to use
|
||||
* @returns {{stream: boolean, [k:string]: unknown}} Returns either an object with stream:true property or a completion object
|
||||
*/
|
||||
async complete (parameters) {
|
||||
const client_driver_call = Context.get('client_driver_call');
|
||||
let { test_mode, intended_service, response_metadata } = client_driver_call;
|
||||
|
||||
const completionId = this.modules.cuid2();
|
||||
this.log.noticeme('AIChatService.complete', { intended_service, test_mode });
|
||||
const svc_event = this.services.get('event');
|
||||
const event = {
|
||||
actor: Context.get('actor'),
|
||||
completionId,
|
||||
allow: true,
|
||||
intended_service,
|
||||
parameters,
|
||||
};
|
||||
await svc_event.emit('ai.prompt.validate', event);
|
||||
if ( ! event.allow ) {
|
||||
test_mode = true;
|
||||
if ( event.custom ) parameters.custom = event.custom;
|
||||
}
|
||||
|
||||
if ( parameters.messages ) {
|
||||
parameters.messages =
|
||||
Messages.normalize_messages(parameters.messages);
|
||||
}
|
||||
|
||||
// Skip moderation for Ollama (local service) and other local services
|
||||
const should_moderate = !test_mode &&
|
||||
intended_service !== 'ollama' &&
|
||||
!parameters.model?.startsWith('ollama:');
|
||||
|
||||
if ( should_moderate && !await this.moderate(parameters) ) {
|
||||
test_mode = true;
|
||||
throw APIError.create('moderation_failed');
|
||||
}
|
||||
|
||||
// Only set moderated flag if we actually ran moderation
|
||||
if ( !test_mode && should_moderate ) {
|
||||
Context.set('moderated', true);
|
||||
}
|
||||
|
||||
if ( test_mode ) {
|
||||
intended_service = 'fake-chat';
|
||||
if ( event.abuse ) {
|
||||
parameters.model = 'abuse';
|
||||
}
|
||||
}
|
||||
|
||||
if ( parameters.tools ) {
|
||||
FunctionCalling.normalize_tools_object(parameters.tools);
|
||||
}
|
||||
|
||||
if ( intended_service === this.service_name ) {
|
||||
throw new Error('Calling ai-chat directly is not yet supported');
|
||||
}
|
||||
|
||||
const svc_driver = this.services.get('driver');
|
||||
let ret, error;
|
||||
let service_used = intended_service;
|
||||
let model_used = this.get_model_from_request(parameters, {
|
||||
intended_service,
|
||||
});
|
||||
|
||||
// Updated: Check usage and get a boolean result instead of throwing error
|
||||
const actor = Context.get('actor');
|
||||
const model_details = this.get_model_details(model_used, {
|
||||
service_used,
|
||||
});
|
||||
|
||||
if ( ! model_details ) {
|
||||
// TODO (xiaochen): replace with a standard link
|
||||
const available_models_url = `${this.global_config.origin }/puterai/chat/models`;
|
||||
|
||||
throw APIError.create('field_invalid', null, {
|
||||
key: 'model',
|
||||
expected: `a valid model name from ${available_models_url}`,
|
||||
got: model_used,
|
||||
});
|
||||
}
|
||||
|
||||
const model_input_cost = model_details.cost.input;
|
||||
const model_output_cost = model_details.cost.output;
|
||||
const model_max_tokens = model_details.max_tokens;
|
||||
const text = Messages.extract_text(parameters.messages);
|
||||
const approximate_input_cost = text.length / 4 * model_input_cost; // TODO DS: guesstimate tokens better,
|
||||
const usageAllowed = await this.meteringService.hasEnoughCredits(actor, approximate_input_cost);
|
||||
|
||||
// Handle usage limits reached case
|
||||
if ( ! usageAllowed ) {
|
||||
// The check_usage_ method has eady updated the intended_service to 'usage-limited-chat'
|
||||
service_used = 'usage-limited-chat';
|
||||
model_used = 'usage-limited';
|
||||
// Update intended_service to match service_used
|
||||
intended_service = service_used;
|
||||
}
|
||||
|
||||
// available is no longer defined, so use meteringService to get available credits
|
||||
const availableCredits = await this.meteringService.getRemainingUsage(actor);
|
||||
const max_allowed_output_amount =
|
||||
availableCredits - approximate_input_cost;
|
||||
|
||||
const max_allowed_output_tokens =
|
||||
max_allowed_output_amount / model_output_cost;
|
||||
|
||||
if ( model_max_tokens ) {
|
||||
parameters.max_tokens = Math.floor(Math.min(parameters.max_tokens ?? Number.POSITIVE_INFINITY,
|
||||
max_allowed_output_tokens,
|
||||
model_max_tokens - (Math.ceil(text.length / 4))));
|
||||
if ( parameters.max_tokens < 1 ) {
|
||||
parameters.max_tokens = undefined;
|
||||
}
|
||||
}
|
||||
try {
|
||||
ret = await svc_driver.call_new_({
|
||||
actor: Context.get('actor'),
|
||||
service_name: intended_service,
|
||||
skip_usage: true,
|
||||
iface: 'puter-chat-completion',
|
||||
method: 'complete',
|
||||
args: parameters,
|
||||
});
|
||||
} catch (e) {
|
||||
const tried = [];
|
||||
let model = model_used;
|
||||
|
||||
// TODO: if conflict models exist, add service name
|
||||
tried.push(model);
|
||||
|
||||
error = e;
|
||||
|
||||
// Distinguishing between user errors and service errors
|
||||
// is very messy because of different conventions between
|
||||
// services. This is a best-effort attempt to catch user
|
||||
// errors and throw them as 400s.
|
||||
const is_request_error = (() => {
|
||||
if ( e instanceof APIError ) {
|
||||
return true;
|
||||
}
|
||||
if ( e.type === 'invalid_request_error' ) {
|
||||
return true;
|
||||
}
|
||||
let some_error = e;
|
||||
while ( some_error ) {
|
||||
if ( some_error.type === 'invalid_request_error' ) {
|
||||
return true;
|
||||
}
|
||||
some_error = some_error.error ?? some_error.cause;
|
||||
}
|
||||
return false;
|
||||
})();
|
||||
|
||||
if ( is_request_error ) {
|
||||
console.log(e.stack);
|
||||
throw APIError.create('error_400_from_delegate', e, {
|
||||
delegate: intended_service,
|
||||
message: e.message,
|
||||
});
|
||||
}
|
||||
console.error(e);
|
||||
|
||||
if ( config.disable_fallback_mechanisms ) {
|
||||
throw e;
|
||||
}
|
||||
|
||||
this.log.error('error calling service', {
|
||||
intended_service,
|
||||
model,
|
||||
error: e,
|
||||
});
|
||||
while ( error ) {
|
||||
// No fallbacks for pseudo-models
|
||||
if ( intended_service === 'fake-chat' ) {
|
||||
break;
|
||||
}
|
||||
|
||||
const fallback = this.get_fallback_model({
|
||||
model, tried,
|
||||
});
|
||||
|
||||
if ( ! fallback ) {
|
||||
throw new Error('no fallback model available');
|
||||
}
|
||||
|
||||
const {
|
||||
fallback_service_name,
|
||||
fallback_model_name,
|
||||
} = fallback;
|
||||
|
||||
this.log.warn('model fallback', {
|
||||
intended_service,
|
||||
fallback_service_name,
|
||||
fallback_model_name,
|
||||
});
|
||||
|
||||
// Check usage for fallback model too (with updated method)
|
||||
const actor = Context.get('actor');
|
||||
const fallbackUsageAllowed = await this.meteringService.hasEnoughCredits(actor, 1);
|
||||
|
||||
// If usage not allowed for fallback, use usage-limited-chat instead
|
||||
if ( ! fallbackUsageAllowed ) {
|
||||
// The check_usage_ method has already updated intended_service
|
||||
service_used = 'usage-limited-chat';
|
||||
model_used = 'usage-limited';
|
||||
// Clear the error to exit the fallback loop
|
||||
error = null;
|
||||
|
||||
// Call the usage-limited service
|
||||
ret = await svc_driver.call_new_({
|
||||
actor: Context.get('actor'),
|
||||
service_name: 'usage-limited-chat',
|
||||
skip_usage: true,
|
||||
iface: 'puter-chat-completion',
|
||||
method: 'complete',
|
||||
args: parameters,
|
||||
});
|
||||
} else {
|
||||
// Normal fallback flow continues
|
||||
try {
|
||||
ret = await svc_driver.call_new_({
|
||||
actor: Context.get('actor'),
|
||||
service_name: fallback_service_name,
|
||||
skip_usage: true,
|
||||
iface: 'puter-chat-completion',
|
||||
method: 'complete',
|
||||
args: {
|
||||
...parameters,
|
||||
model: fallback_model_name,
|
||||
},
|
||||
});
|
||||
error = null;
|
||||
service_used = fallback_service_name;
|
||||
model_used = fallback_model_name;
|
||||
response_metadata.fallback = {
|
||||
service: fallback_service_name,
|
||||
model: fallback_model_name,
|
||||
tried: tried,
|
||||
};
|
||||
} catch (e) {
|
||||
error = e;
|
||||
tried.push(fallback_model_name);
|
||||
this.log.error('error calling fallback', {
|
||||
intended_service,
|
||||
model,
|
||||
error: e,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret.result.via_ai_chat_service = true;
|
||||
response_metadata.service_used = service_used;
|
||||
|
||||
// Add flag if we're using the usage-limited service
|
||||
if ( service_used === 'usage-limited-chat' ) {
|
||||
response_metadata.usage_limited = true;
|
||||
}
|
||||
|
||||
const username = Context.get('actor').type?.user?.username;
|
||||
|
||||
if ( ret.result.stream ) {
|
||||
if ( ret.result.init_chat_stream ) {
|
||||
const stream = new PassThrough();
|
||||
const retval = new TypedValue({
|
||||
$: 'stream',
|
||||
content_type: 'application/x-ndjson',
|
||||
chunked: true,
|
||||
}, stream);
|
||||
|
||||
const chatStream = new Streaming.AIChatStream({
|
||||
stream,
|
||||
});
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
await ret.result.init_chat_stream({ chatStream });
|
||||
} catch (e) {
|
||||
this.errors.report('error during stream response', {
|
||||
source: e,
|
||||
});
|
||||
stream.write(`${JSON.stringify({
|
||||
type: 'error',
|
||||
message: e.message,
|
||||
}) }\n`);
|
||||
stream.end();
|
||||
} finally {
|
||||
if ( ret.result.finally_fn ) {
|
||||
await ret.result.finally_fn();
|
||||
}
|
||||
}
|
||||
})();
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
return ret.result.response;
|
||||
}
|
||||
|
||||
await svc_event.emit('ai.prompt.complete', {
|
||||
username,
|
||||
intended_service,
|
||||
parameters,
|
||||
result: ret.result,
|
||||
model_used,
|
||||
service_used,
|
||||
});
|
||||
|
||||
if ( parameters.response?.normalize ) {
|
||||
ret.result.message =
|
||||
Messages.normalize_single_message(ret.result.message);
|
||||
ret.result = {
|
||||
message: ret.result.message,
|
||||
via_ai_chat_service: true,
|
||||
normalized: true,
|
||||
};
|
||||
}
|
||||
|
||||
return ret.result;
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Moderates chat messages for inappropriate content using OpenAI's moderation service
|
||||
*
|
||||
* @param {Object} params - The parameters object
|
||||
* @param {Array} params.messages - Array of chat messages to moderate
|
||||
* @returns {Promise<boolean>} Returns true if content is appropriate, false if flagged
|
||||
*
|
||||
* @description
|
||||
* Extracts text content from messages and checks each against OpenAI's moderation.
|
||||
* Handles both string content and structured message objects.
|
||||
* Returns false immediately if any message is flagged as inappropriate.
|
||||
* Returns true if OpenAI service is unavailable or all messages pass moderation.
|
||||
*/
|
||||
async moderate ({ messages }) {
|
||||
if ( process.env.TEST_MODERATION_FAILURE ) return false;
|
||||
const fulltext = Messages.extract_text(messages);
|
||||
let mod_last_error = null;
|
||||
let mod_result = null;
|
||||
try {
|
||||
const svc_openai = this.services.get('openai-completion');
|
||||
mod_result = await svc_openai.check_moderation(fulltext);
|
||||
if ( mod_result.flagged ) return false;
|
||||
return true;
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
mod_last_error = e;
|
||||
}
|
||||
try {
|
||||
const svc_claude = this.services.get('claude');
|
||||
const chat = svc_claude.as('puter-chat-completion');
|
||||
const mod = new AsModeration({
|
||||
chat,
|
||||
model: 'claude-3-haiku-20240307',
|
||||
});
|
||||
if ( ! await mod.moderate(fulltext) ) {
|
||||
return false;
|
||||
}
|
||||
mod_last_error = null;
|
||||
return true;
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
mod_last_error = e;
|
||||
}
|
||||
|
||||
if ( mod_last_error ) {
|
||||
this.log.error('moderation error', {
|
||||
fulltext,
|
||||
mod_last_error,
|
||||
});
|
||||
throw new Error('no working moderation service');
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
async models_ () {
|
||||
return this.detail_model_list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of available AI models with basic details
|
||||
* @returns {Promise<Array>} Array of simple model objects containing basic model information
|
||||
*/
|
||||
async list_ () {
|
||||
return this.simple_model_list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the appropriate delegate service for handling chat completion requests.
|
||||
* If the intended service is this service (ai-chat), returns undefined.
|
||||
* Otherwise returns the intended service wrapped as a puter-chat-completion interface.
|
||||
*
|
||||
* @returns {Object|undefined} The delegate service or undefined if intended service is ai-chat
|
||||
*/
|
||||
get_delegate () {
|
||||
const client_driver_call = Context.get('client_driver_call');
|
||||
if ( client_driver_call.intended_service === this.service_name ) {
|
||||
return undefined;
|
||||
}
|
||||
console.log('getting service', client_driver_call.intended_service);
|
||||
const service = this.services.get(client_driver_call.intended_service);
|
||||
return service.as('puter-chat-completion');
|
||||
}
|
||||
|
||||
/**
|
||||
* Find an appropriate fallback model by sorting the list of models
|
||||
* by the euclidean distance of the input/output prices and selecting
|
||||
* the first one that is not in the tried list.
|
||||
*
|
||||
* @param {*} param0
|
||||
* @returns
|
||||
*/
|
||||
get_fallback_model ({ model, tried }) {
|
||||
let target_model = this.detail_model_map[model];
|
||||
|
||||
if ( ! target_model ) {
|
||||
this.log.error('could not find model', { model });
|
||||
throw new Error('could not find model');
|
||||
}
|
||||
if ( Array.isArray(target_model) ) {
|
||||
// TODO: better conflict resolution
|
||||
this.log.noticeme('conflict exists', { model, target_model });
|
||||
target_model = target_model[0];
|
||||
}
|
||||
|
||||
// First check KV for the sorted list
|
||||
let potentialFallbacks = this.modules.kv.get(`${this.kvkey}:fallbacks:${model}`);
|
||||
|
||||
if ( ! potentialFallbacks ) {
|
||||
// Calculate the sorted list
|
||||
const models = this.detail_model_list;
|
||||
|
||||
let aiProvider, modelToSearch;
|
||||
if ( target_model.id.startsWith('openrouter:') || target_model.id.startsWith('togetherai:') ) {
|
||||
[aiProvider, modelToSearch] = target_model.id.replace('openrouter:', '').replace('togetherai:', '').toLowerCase().split('/');
|
||||
} else {
|
||||
[aiProvider, modelToSearch] = target_model.provider.toLowerCase().replace('gemini', 'google').replace('openai-completion', 'openai'), target_model.id.toLowerCase();
|
||||
}
|
||||
|
||||
const potentialMatches = models.filter(model => {
|
||||
const possibleModelNames = [`openrouter:${aiProvider}/${modelToSearch}`,
|
||||
`togetherai:${aiProvider}/${modelToSearch}`, ...(target_model.aliases?.map((alias) => [`openrouter:${aiProvider}/${alias}`,
|
||||
`togetherai:${aiProvider}/${alias}`])?.flat() ?? [])];
|
||||
|
||||
return !!possibleModelNames.find(possibleName => model.id.toLowerCase() === possibleName);
|
||||
}).slice(0, MAX_FALLBACKS);
|
||||
|
||||
this.modules.kv.set(`${this.kvkey}:fallbacks:${model}`, potentialMatches);
|
||||
potentialFallbacks = potentialMatches;
|
||||
}
|
||||
|
||||
for ( const model of potentialFallbacks ) {
|
||||
if ( tried.includes(model.id) ) continue;
|
||||
if ( model.provider === 'fake-chat' ) continue;
|
||||
|
||||
return {
|
||||
fallback_service_name: model.provider,
|
||||
fallback_model_name: model.id,
|
||||
};
|
||||
}
|
||||
|
||||
// No fallbacks available
|
||||
this.log.error('no fallbacks', {
|
||||
potentialFallbacks,
|
||||
tried,
|
||||
});
|
||||
}
|
||||
|
||||
get_model_from_request (parameters, modified_context = {}) {
|
||||
const client_driver_call = Context.get('client_driver_call');
|
||||
let { intended_service } = client_driver_call;
|
||||
|
||||
if ( modified_context.intended_service ) {
|
||||
intended_service = modified_context.intended_service;
|
||||
}
|
||||
|
||||
let model = parameters.model;
|
||||
if ( ! model ) {
|
||||
const service = this.services.get(intended_service);
|
||||
if ( ! service.get_default_model ) {
|
||||
throw new Error('could not infer model from service');
|
||||
}
|
||||
model = service.get_default_model();
|
||||
if ( ! model ) {
|
||||
throw new Error('could not infer model from service');
|
||||
}
|
||||
}
|
||||
|
||||
return model;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { AIChatService };
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const BaseService = require('../../services/BaseService');
|
||||
|
||||
/**
|
||||
* Service class that handles AI test mode functionality.
|
||||
* Extends BaseService to register test services for AI chat completions.
|
||||
* Used for testing and development of AI-related features by providing
|
||||
* a mock implementation of the chat completion service.
|
||||
*/
|
||||
class AITestModeService extends BaseService {
|
||||
/**
|
||||
* Service for managing AI test mode functionality
|
||||
* @extends BaseService
|
||||
*/
|
||||
async _init () {
|
||||
const svc_driver = this.services.get('driver');
|
||||
svc_driver.register_test_service('puter-chat-completion', 'ai-chat');
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
AITestModeService,
|
||||
};
|
||||
@@ -1,495 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const { default: Anthropic, toFile } = require('@anthropic-ai/sdk');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const FunctionCalling = require('./lib/FunctionCalling');
|
||||
const Messages = require('./lib/Messages');
|
||||
const FSNodeParam = require('../../api/filesystem/FSNodeParam');
|
||||
const { LLRead } = require('../../filesystem/ll_operations/ll_read');
|
||||
const { Context } = require('../../util/context');
|
||||
const mime = require('mime-types');
|
||||
|
||||
/**
|
||||
* ClaudeService class extends BaseService to provide integration with Anthropic's Claude AI models.
|
||||
* Implements the puter-chat-completion interface for handling AI chat interactions.
|
||||
* Manages message streaming, token limits, model selection, and API communication with Claude.
|
||||
* Supports system prompts, message adaptation, and usage tracking.
|
||||
* @extends BaseService
|
||||
*/
|
||||
class ClaudeService extends BaseService {
|
||||
|
||||
// Traits definitions
|
||||
static IMPLEMENTS = {
|
||||
['puter-chat-completion']: {
|
||||
async models () {
|
||||
return this.models();
|
||||
},
|
||||
async list () {
|
||||
return this.list();
|
||||
},
|
||||
async complete (...args) {
|
||||
return this.complete(...args);
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* @type {import('@anthropic-ai/sdk').Anthropic}
|
||||
*/
|
||||
anthropic;
|
||||
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
#meteringService;
|
||||
|
||||
async _init () {
|
||||
this.anthropic = new Anthropic({
|
||||
apiKey: this.config.apiKey,
|
||||
// 10 minutes is the default; we need to override the timeout to
|
||||
// disable an "aggressive" preemptive error that's thrown
|
||||
// erroneously by the SDK.
|
||||
// (https://github.com/anthropics/anthropic-sdk-typescript/issues/822)
|
||||
timeout: 10 * 60 * 1001,
|
||||
});
|
||||
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.#meteringService = this.services.get('meteringService').meteringService; // TODO DS: move to proper extensions
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default model identifier for Claude API interactions
|
||||
* @returns {string} The default model ID 'claude-3-5-sonnet-latest'
|
||||
*/
|
||||
get_default_model () {
|
||||
return 'claude-3-5-sonnet-latest';
|
||||
}
|
||||
|
||||
async list () {
|
||||
const models = this.models();
|
||||
const model_names = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
model_names.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return model_names;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {object} arg
|
||||
* @param {Array} arg.messages
|
||||
* @param {boolean} [arg.stream]
|
||||
* @param {string} arg.model
|
||||
* @param {Array} [arg.tools]
|
||||
* @param {number} [arg.max_tokens]
|
||||
* @param {number} [arg.temperature]
|
||||
* @returns
|
||||
*/
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
tools = FunctionCalling.make_claude_tools(tools);
|
||||
|
||||
let system_prompts;
|
||||
// unsure why system_prompts is an array but it always seems to only have exactly one element,
|
||||
// and the real array of system_prompts seems to be the [0].content -- NS
|
||||
[system_prompts, messages] = Messages.extract_and_remove_system_messages(messages);
|
||||
|
||||
// Apply the cache control tag to all content blocks
|
||||
if (
|
||||
system_prompts.length > 0 &&
|
||||
system_prompts[0].cache_control &&
|
||||
system_prompts[0]?.content
|
||||
) {
|
||||
system_prompts[0].content = system_prompts[0].content.map(prompt => {
|
||||
prompt.cache_control = system_prompts[0].cache_control;
|
||||
return prompt;
|
||||
});
|
||||
}
|
||||
|
||||
messages = messages.map(message => {
|
||||
if ( message.cache_control ) {
|
||||
message.content[0].cache_control = message.cache_control;
|
||||
}
|
||||
delete message.cache_control;
|
||||
return message;
|
||||
});
|
||||
|
||||
const sdk_params = {
|
||||
model: model ?? this.get_default_model(),
|
||||
max_tokens: Math.floor(max_tokens) ||
|
||||
((
|
||||
model === 'claude-3-5-sonnet-20241022'
|
||||
|| model === 'claude-3-5-sonnet-20240620'
|
||||
) ? 8192 : this.models().filter(e => (e.name === model || e.aliases?.includes(model)))[0]?.max_tokens || 4096), //required
|
||||
temperature: temperature || 0, // required
|
||||
...( (system_prompts && system_prompts[0]?.content) ? {
|
||||
system: system_prompts[0]?.content,
|
||||
} : {}),
|
||||
tool_choice: {
|
||||
type: 'auto',
|
||||
disable_parallel_tool_use: true,
|
||||
},
|
||||
messages,
|
||||
...(tools ? { tools } : {}),
|
||||
};
|
||||
console.log(sdk_params.max_tokens);
|
||||
|
||||
let beta_mode = false;
|
||||
|
||||
// Perform file uploads
|
||||
const file_delete_tasks = [];
|
||||
const actor = Context.get('actor');
|
||||
const { user } = actor.type;
|
||||
|
||||
const file_input_tasks = [];
|
||||
for ( const message of messages ) {
|
||||
// We can assume `message.content` is not undefined because
|
||||
// Messages.normalize_single_message ensures this.
|
||||
for ( const contentPart of message.content ) {
|
||||
if ( ! contentPart.puter_path ) continue;
|
||||
file_input_tasks.push({
|
||||
node: await (new FSNodeParam(contentPart.puter_path)).consolidate({
|
||||
req: { user },
|
||||
getParam: () => contentPart.puter_path,
|
||||
}),
|
||||
contentPart,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const promises = [];
|
||||
for ( const task of file_input_tasks ) {
|
||||
promises.push((async () => {
|
||||
const ll_read = new LLRead();
|
||||
const stream = await ll_read.run({
|
||||
actor: Context.get('actor'),
|
||||
fsNode: task.node,
|
||||
});
|
||||
|
||||
const mimeType = mime.contentType(await task.node.get('name'));
|
||||
|
||||
beta_mode = true;
|
||||
const fileUpload = await this.anthropic.beta.files.upload({
|
||||
file: await toFile(stream, undefined, { type: mimeType }),
|
||||
}, {
|
||||
betas: ['files-api-2025-04-14'],
|
||||
});
|
||||
|
||||
file_delete_tasks.push({ file_id: fileUpload.id });
|
||||
// We have to copy a table from the documentation here:
|
||||
// https://docs.anthropic.com/en/docs/build-with-claude/files
|
||||
const contentBlockTypeForFileBasedOnMime = (() => {
|
||||
if ( mimeType.startsWith('image/') ) {
|
||||
return 'image';
|
||||
}
|
||||
if ( mimeType.startsWith('text/') ) {
|
||||
return 'document';
|
||||
}
|
||||
if ( mimeType === 'application/pdf' || mimeType === 'application/x-pdf' ) {
|
||||
return 'document';
|
||||
}
|
||||
return 'container_upload';
|
||||
})();
|
||||
|
||||
delete task.contentPart.puter_path,
|
||||
task.contentPart.type = contentBlockTypeForFileBasedOnMime;
|
||||
task.contentPart.source = {
|
||||
type: 'file',
|
||||
file_id: fileUpload.id,
|
||||
};
|
||||
})());
|
||||
}
|
||||
await Promise.all(promises);
|
||||
|
||||
const cleanup_files = async () => {
|
||||
const promises = [];
|
||||
for ( const task of file_delete_tasks ) {
|
||||
promises.push((async () => {
|
||||
try {
|
||||
await this.anthropic.beta.files.delete(task.file_id,
|
||||
{ betas: ['files-api-2025-04-14'] });
|
||||
} catch (e) {
|
||||
this.errors.report('claude:file-delete-task', {
|
||||
source: e,
|
||||
trace: true,
|
||||
alarm: true,
|
||||
extra: { file_id: task.file_id },
|
||||
});
|
||||
}
|
||||
})());
|
||||
}
|
||||
await Promise.all(promises);
|
||||
};
|
||||
|
||||
if ( beta_mode ) {
|
||||
Object.assign(sdk_params, { betas: ['files-api-2025-04-14'] });
|
||||
}
|
||||
const anthropic = beta_mode ? this.anthropic.beta : this.anthropic;
|
||||
|
||||
if ( stream ) {
|
||||
const init_chat_stream = async ({ chatStream }) => {
|
||||
const completion = await anthropic.messages.stream(sdk_params);
|
||||
const usageSum = {};
|
||||
|
||||
let message, contentBlock;
|
||||
for await ( const event of completion ) {
|
||||
|
||||
const usageObject = (event?.usage ?? event?.message?.usage ?? {});
|
||||
const meteredData = this.usageFormatterUtil(usageObject);
|
||||
Object.keys(meteredData).forEach((key) => {
|
||||
if ( ! usageSum[key] ) usageSum[key] = 0;
|
||||
usageSum[key] += meteredData[key];
|
||||
});
|
||||
|
||||
if ( event.type === 'message_start' ) {
|
||||
message = chatStream.message();
|
||||
continue;
|
||||
}
|
||||
if ( event.type === 'message_stop' ) {
|
||||
message.end();
|
||||
message = null;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( event.type === 'content_block_start' ) {
|
||||
if ( event.content_block.type === 'tool_use' ) {
|
||||
contentBlock = message.contentBlock({
|
||||
type: event.content_block.type,
|
||||
id: event.content_block.id,
|
||||
name: event.content_block.name,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
contentBlock = message.contentBlock({
|
||||
type: event.content_block.type,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( event.type === 'content_block_stop' ) {
|
||||
contentBlock.end();
|
||||
contentBlock = null;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( event.type === 'content_block_delta' ) {
|
||||
if ( event.delta.type === 'input_json_delta' ) {
|
||||
contentBlock.addPartialJSON(event.delta.partial_json);
|
||||
continue;
|
||||
}
|
||||
if ( event.delta.type === 'text_delta' ) {
|
||||
contentBlock.addText(event.delta.text);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
chatStream.end();
|
||||
|
||||
this.#meteringService.utilRecordUsageObject(usageSum, actor, `claude:${this.models().find(m => [m.id, ...(m.aliases || [])].includes(model || this.get_default_model())).id}`);
|
||||
};
|
||||
|
||||
return {
|
||||
init_chat_stream,
|
||||
stream: true,
|
||||
finally_fn: cleanup_files,
|
||||
};
|
||||
}
|
||||
|
||||
const msg = await anthropic.messages.create(sdk_params);
|
||||
await cleanup_files();
|
||||
|
||||
const usage = this.usageFormatterUtil(msg.usage);
|
||||
this.#meteringService.utilRecordUsageObject(usage, actor, `claude:${this.models().find(m => [m.id, ...(m.aliases || [])].includes(model || this.get_default_model())).id}`);
|
||||
|
||||
// TODO DS: cleanup old usage tracking
|
||||
return {
|
||||
message: msg,
|
||||
usage: msg.usage,
|
||||
finish_reason: 'stop',
|
||||
};
|
||||
}
|
||||
|
||||
/** @type {(usage: import("@anthropic-ai/sdk/resources/messages.js").Usage | import("@anthropic-ai/sdk/resources/beta/messages/messages.js").BetaUsage) => {}}) */
|
||||
usageFormatterUtil (usage) {
|
||||
return {
|
||||
input_tokens: usage?.input_tokens || 0,
|
||||
ephemeral_5m_input_tokens: usage?.cache_creation?.ephemeral_5m_input_tokens || usage.cache_creation_input_tokens || 0, // this is because they're api is a bit inconsistent
|
||||
ephemeral_1h_input_tokens: usage?.cache_creation?.ephemeral_1h_input_tokens || 0,
|
||||
cache_read_input_tokens: usage?.cache_read_input_tokens || 0,
|
||||
output_tokens: usage?.output_tokens || 0,
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves available Claude AI models and their specifications
|
||||
* @returns Array of model objects containing:
|
||||
* - id: Model identifier
|
||||
* - name: Display name
|
||||
* - aliases: Alternative names for the model
|
||||
* - context: Maximum context window size
|
||||
* - cost: Pricing details (currency, token counts, input/output costs)
|
||||
* - qualitative_speed: Relative speed rating
|
||||
* - max_output: Maximum output tokens
|
||||
* - training_cutoff: Training data cutoff date
|
||||
*/
|
||||
models () {
|
||||
return [
|
||||
{
|
||||
id: 'claude-opus-4-5-20251101',
|
||||
aliases: ['claude-opus-4-5-latest', 'claude-opus-4-5', 'claude-opus-4.5'],
|
||||
name: 'Claude Opus 4.5',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 500,
|
||||
output: 2500,
|
||||
},
|
||||
context: 200000,
|
||||
max_tokens: 64000,
|
||||
},
|
||||
{
|
||||
id: 'claude-haiku-4-5-20251001',
|
||||
aliases: ['claude-haiku-4.5', 'claude-haiku-4-5'],
|
||||
name: 'Claude Haiku 4.5',
|
||||
context: 200000,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 100,
|
||||
output: 500,
|
||||
},
|
||||
max_tokens: 64000,
|
||||
},
|
||||
{
|
||||
id: 'claude-sonnet-4-5-20250929',
|
||||
aliases: ['claude-sonnet-4.5', 'claude-sonnet-4-5'],
|
||||
name: 'Claude Sonnet 4.5',
|
||||
context: 200000,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 300,
|
||||
output: 1500,
|
||||
},
|
||||
max_tokens: 64000,
|
||||
},
|
||||
{
|
||||
id: 'claude-opus-4-1-20250805',
|
||||
aliases: ['claude-opus-4-1'],
|
||||
name: 'Claude Opus 4.1',
|
||||
context: 200000,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 1500,
|
||||
output: 7500,
|
||||
},
|
||||
max_tokens: 32000,
|
||||
},
|
||||
{
|
||||
id: 'claude-opus-4-20250514',
|
||||
aliases: ['claude-opus-4', 'claude-opus-4-latest'],
|
||||
name: 'Claude Opus 4',
|
||||
context: 200000,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 1500,
|
||||
output: 7500,
|
||||
},
|
||||
max_tokens: 32000,
|
||||
},
|
||||
{
|
||||
id: 'claude-sonnet-4-20250514',
|
||||
aliases: ['claude-sonnet-4', 'claude-sonnet-4-latest'],
|
||||
name: 'Claude Sonnet 4',
|
||||
context: 200000,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 300,
|
||||
output: 1500,
|
||||
},
|
||||
max_tokens: 64000,
|
||||
},
|
||||
{
|
||||
id: 'claude-3-7-sonnet-20250219',
|
||||
aliases: ['claude-3-7-sonnet-latest'],
|
||||
succeeded_by: 'claude-sonnet-4-20250514',
|
||||
context: 200000,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 300,
|
||||
output: 1500,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'claude-3-5-sonnet-20241022',
|
||||
name: 'Claude 3.5 Sonnet',
|
||||
aliases: ['claude-3-5-sonnet-latest'],
|
||||
context: 200000,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 300,
|
||||
output: 1500,
|
||||
},
|
||||
qualitative_speed: 'fast',
|
||||
training_cutoff: '2024-04',
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'claude-3-5-sonnet-20240620',
|
||||
succeeded_by: 'claude-3-5-sonnet-20241022',
|
||||
context: 200000, // might be wrong
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 300,
|
||||
output: 1500,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'claude-3-haiku-20240307',
|
||||
context: 200000,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 25,
|
||||
output: 125,
|
||||
},
|
||||
qualitative_speed: 'fastest',
|
||||
max_tokens: 4096,
|
||||
},
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
ClaudeService,
|
||||
};
|
||||
@@ -1,224 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { Context } = require('../../util/context');
|
||||
const OpenAIUtil = require('./lib/OpenAIUtil');
|
||||
const dedent = require('dedent');
|
||||
|
||||
/**
|
||||
* DeepSeekService class - Provides integration with DeepSeek's API for chat completions
|
||||
* Extends BaseService to implement the puter-chat-completion interface.
|
||||
* Handles model management, message adaptation, streaming responses,
|
||||
* and usage tracking for DeepSeek's language models like DeepSeek Chat and Reasoner.
|
||||
* @extends BaseService
|
||||
*/
|
||||
class DeepSeekService extends BaseService {
|
||||
static MODULES = {
|
||||
openai: require('openai'),
|
||||
};
|
||||
|
||||
/**
|
||||
* @type {import('../../services/MeteringService/MeteringService').MeteringService}
|
||||
*/
|
||||
meteringService;
|
||||
/**
|
||||
* Gets the system prompt used for AI interactions
|
||||
* @returns {string} The base system prompt that identifies the AI as running on Puter
|
||||
*/
|
||||
adapt_model (model) {
|
||||
return model;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the XAI service by setting up the OpenAI client and registering with the AI chat provider
|
||||
* @private
|
||||
* @returns {Promise<void>} Resolves when initialization is complete
|
||||
*/
|
||||
async _init () {
|
||||
this.openai = new this.modules.openai.OpenAI({
|
||||
apiKey: this.global_config.services.deepseek.apiKey,
|
||||
baseURL: 'https://api.deepseek.com',
|
||||
});
|
||||
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringService = this.services.get('meteringService').meteringService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default model identifier for the DeepSeek service
|
||||
* @returns {string} The default model ID 'deepseek-chat'
|
||||
*/
|
||||
get_default_model () {
|
||||
return 'deepseek-chat';
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['puter-chat-completion']: {
|
||||
/**
|
||||
* Returns a list of available models and their details.
|
||||
* See AIChatService for more information.
|
||||
*
|
||||
* @returns Promise<Array<Object>> Array of model details
|
||||
*/
|
||||
async models () {
|
||||
return await this.models_();
|
||||
},
|
||||
/**
|
||||
* Returns a list of available model names including their aliases
|
||||
* @returns {Promise<string[]>} Array of model identifiers and their aliases
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
const models = await this.models_();
|
||||
const model_names = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
model_names.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return model_names;
|
||||
},
|
||||
|
||||
/**
|
||||
* AI Chat completion method.
|
||||
* See AIChatService for more details.
|
||||
*/
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
model = this.adapt_model(model);
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
for ( const message of messages ) {
|
||||
// DeepSeek doesn't appreciate arrays here
|
||||
if ( message.tool_calls && Array.isArray(message.content) ) {
|
||||
message.content = '';
|
||||
}
|
||||
}
|
||||
|
||||
// Function calling is just broken on DeepSeek - it never awknowledges
|
||||
// the tool results and instead keeps calling the function over and over.
|
||||
// (see https://github.com/deepseek-ai/DeepSeek-V3/issues/15)
|
||||
// To fix this, we inject a message that tells DeepSeek what happened.
|
||||
const TOOL_TEXT = message => dedent(`
|
||||
Hi DeepSeek V3, your tool calling is broken and you are not able to
|
||||
obtain tool results in the expected way. That's okay, we can work
|
||||
around this.
|
||||
|
||||
Please do not repeat this tool call.
|
||||
|
||||
We have provided the tool call results below:
|
||||
|
||||
Tool call ${message.tool_call_id} returned: ${message.content}.
|
||||
`);
|
||||
for ( let i = messages.length - 1; i >= 0 ; i-- ) {
|
||||
const message = messages[i];
|
||||
if ( message.role === 'tool' ) {
|
||||
messages.splice(i + 1, 0, {
|
||||
role: 'system',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: TOOL_TEXT(message),
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const completion = await this.openai.chat.completions.create({
|
||||
messages,
|
||||
model: model ?? this.get_default_model(),
|
||||
...(tools ? { tools } : {}),
|
||||
max_tokens: max_tokens || 1000,
|
||||
temperature, // the default temperature is 1.0. suggested 0 for math/coding and 1.5 for creative poetry
|
||||
stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
});
|
||||
|
||||
// Metering integration now handled via usage_calculator in OpenAIUtil.handle_completion_output
|
||||
const actor = Context.get('actor');
|
||||
const modelDetails = (await this.models_()).find(m => m.id === (model ?? this.get_default_model()));
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, `deepseek:${modelDetails.id}`);
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves available AI models and their specifications
|
||||
* @returns {Promise<Array>} Array of model objects containing:
|
||||
* - id: Model identifier string
|
||||
* - name: Human readable model name
|
||||
* - context: Maximum context window size
|
||||
* - cost: Pricing information object with currency and rates
|
||||
* @private
|
||||
*/
|
||||
async models_ () {
|
||||
return [
|
||||
{
|
||||
id: 'deepseek-chat',
|
||||
name: 'DeepSeek Chat',
|
||||
context: 128000,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 56,
|
||||
output: 168,
|
||||
},
|
||||
max_tokens: 8000,
|
||||
},
|
||||
{
|
||||
id: 'deepseek-reasoner',
|
||||
name: 'DeepSeek Reasoner',
|
||||
context: 128000,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 56,
|
||||
output: 168,
|
||||
},
|
||||
max_tokens: 64000,
|
||||
},
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
DeepSeekService,
|
||||
};
|
||||
@@ -1,217 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const { default: dedent } = require('dedent');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
/**
|
||||
* FakeChatService - A mock implementation of a chat service that extends BaseService.
|
||||
* Provides fake chat completion responses using Lorem Ipsum text generation.
|
||||
* Used for testing and development purposes when a real chat service is not needed.
|
||||
* Implements the 'puter-chat-completion' interface with list() and complete() methods.
|
||||
*/
|
||||
class FakeChatService extends BaseService {
|
||||
/**
|
||||
* Initializes the service and registers it as a provider with AIChatService
|
||||
* @private
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async _init () {
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
}
|
||||
|
||||
get_default_model () {
|
||||
return 'fake';
|
||||
}
|
||||
static IMPLEMENTS = {
|
||||
['puter-chat-completion']: {
|
||||
/**
|
||||
* Returns a list of available models with their details
|
||||
* @returns {Promise<Object[]>} Array of model details including costs
|
||||
* @description Returns detailed information about available models including
|
||||
* their costs for input and output tokens
|
||||
*/
|
||||
async models () {
|
||||
return [
|
||||
{
|
||||
id: 'fake',
|
||||
aliases: [],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'costly',
|
||||
aliases: [],
|
||||
cost: {
|
||||
input: 1000, // 1000 microcents per million tokens (0.001 cents per 1000 tokens)
|
||||
output: 2000, // 2000 microcents per million tokens (0.002 cents per 1000 tokens)
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'abuse',
|
||||
aliases: [],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
},
|
||||
},
|
||||
];
|
||||
},
|
||||
|
||||
/**
|
||||
* Returns a list of available model names including their aliases
|
||||
* @returns {Promise<string[]>} Array of model identifiers and their aliases
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
return ['fake', 'costly', 'abuse'];
|
||||
},
|
||||
|
||||
/**
|
||||
* Simulates a chat completion request by generating random Lorem Ipsum text
|
||||
* @param {Object} params - The completion parameters
|
||||
* @param {Array} params.messages - Array of chat messages
|
||||
* @param {boolean} params.stream - Whether to stream the response (unused in fake implementation)
|
||||
* @param {string} params.model - The model to use ('fake', 'costly', or 'abuse')
|
||||
* @returns {Object} A simulated chat completion response with Lorem Ipsum content
|
||||
*/
|
||||
async complete ({ messages, stream, model, max_tokens, custom }) {
|
||||
const { LoremIpsum } = require('lorem-ipsum');
|
||||
const li = new LoremIpsum({
|
||||
sentencesPerParagraph: {
|
||||
max: 8,
|
||||
min: 4,
|
||||
},
|
||||
wordsPerSentence: {
|
||||
max: 20,
|
||||
min: 12,
|
||||
},
|
||||
});
|
||||
|
||||
// Determine token counts based on messages and model
|
||||
const usedModel = model || this.get_default_model();
|
||||
|
||||
// For the costly model, simulate actual token counting
|
||||
const resp = this.get_response({ li, usedModel, custom, max_tokens, messages });
|
||||
|
||||
if ( stream ) {
|
||||
return {
|
||||
stream: true,
|
||||
init_chat_stream: async ({ chatStream }) => {
|
||||
await new Promise(rslv => setTimeout(rslv, 500));
|
||||
chatStream.stream.write(`${JSON.stringify({
|
||||
type: 'text',
|
||||
text: resp.message.content[0].text,
|
||||
}) }\n`);
|
||||
chatStream.end();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return resp;
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
get_response ({ li, usedModel, messages, custom, max_tokens }) {
|
||||
let inputTokens = 0;
|
||||
let outputTokens = 0;
|
||||
|
||||
if ( usedModel === 'costly' ) {
|
||||
// Simple token estimation: roughly 4 chars per token for input
|
||||
if ( messages && messages.length > 0 ) {
|
||||
for ( const message of messages ) {
|
||||
if ( typeof message.content === 'string' ) {
|
||||
inputTokens += Math.ceil(message.content.length / 4);
|
||||
} else if ( Array.isArray(message.content) ) {
|
||||
for ( const content of message.content ) {
|
||||
if ( content.type === 'text' ) {
|
||||
inputTokens += Math.ceil(content.text.length / 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate random output token count between 50 and 200
|
||||
outputTokens = Math.floor(Math.min((Math.random() * 150) + 50, max_tokens));
|
||||
// outputTokens = Math.floor(Math.random() * 150) + 50;
|
||||
}
|
||||
|
||||
// Generate the response text
|
||||
let responseText;
|
||||
if ( usedModel === 'abuse' ) {
|
||||
// responseText = dedent(`
|
||||
// This is a message from ${
|
||||
// this.global_config.origin}. We have detected abuse of our services.
|
||||
|
||||
// If you are seeing this on another website, please report it to ${
|
||||
// this.global_config.abuse_email ?? 'hi@puter.com'}
|
||||
// `);
|
||||
responseText = dedent(`
|
||||
<h2>Free AI and Cloud for everyone!</h2><br />
|
||||
Come on down to <a href="https://puter.com">puter.com</a> and try it out!
|
||||
${custom ?? ''}
|
||||
`);
|
||||
} else {
|
||||
// Generate 1-3 paragraphs for both fake and costly models
|
||||
responseText = li.generateParagraphs(Math.floor(Math.random() * 3) + 1);
|
||||
}
|
||||
|
||||
// Report usage based on model
|
||||
const usage = {
|
||||
'input_tokens': usedModel === 'costly' ? inputTokens : 0,
|
||||
'output_tokens': usedModel === 'costly' ? outputTokens : 1,
|
||||
};
|
||||
|
||||
return {
|
||||
'index': 0,
|
||||
message: {
|
||||
'id': '00000000-0000-0000-0000-000000000000',
|
||||
'type': 'message',
|
||||
'role': 'assistant',
|
||||
'model': usedModel,
|
||||
'content': [
|
||||
{
|
||||
'type': 'text',
|
||||
'text': responseText,
|
||||
},
|
||||
],
|
||||
'stop_reason': 'end_turn',
|
||||
'stop_sequence': null,
|
||||
'usage': usage,
|
||||
},
|
||||
'usage': usage,
|
||||
'logprobs': null,
|
||||
'finish_reason': 'stop',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
FakeChatService,
|
||||
};
|
||||
@@ -1,114 +0,0 @@
|
||||
// Preamble: Before this we used Gemini's SDK directly and as we found out
|
||||
// its actually kind of terrible. So we use the openai sdk now
|
||||
import BaseService from '../../../services/BaseService.js';
|
||||
import openai from 'openai';
|
||||
import OpenAIUtil from '../lib/OpenAIUtil.js';
|
||||
import { Context } from '../../../util/context.js';
|
||||
import { models } from './models.mjs';
|
||||
|
||||
|
||||
export class GeminiService extends BaseService {
|
||||
/**
|
||||
* @type {import('../../services/MeteringService/MeteringService').MeteringService}
|
||||
*/
|
||||
meteringService = undefined;
|
||||
|
||||
defaultModel = 'gemini-2.5-flash';
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['puter-chat-completion']: {
|
||||
async models () {
|
||||
return await this.models();
|
||||
},
|
||||
async complete (...args) {
|
||||
return await this.complete(...args);
|
||||
},
|
||||
async list () {
|
||||
return await this.list();
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
async _init () {
|
||||
this.openai = new openai.OpenAI({
|
||||
apiKey: this.config.apiKey,
|
||||
baseURL: 'https://generativelanguage.googleapis.com/v1beta/openai/',
|
||||
});
|
||||
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringService = this.services.get('meteringService').meteringService;
|
||||
}
|
||||
|
||||
get_default_model () {
|
||||
return this.defaultModel;
|
||||
}
|
||||
|
||||
async models () {
|
||||
return models;
|
||||
}
|
||||
async list () {
|
||||
const model_names = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
model_names.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return model_names;
|
||||
}
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
const actor = Context.get('actor');
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
|
||||
// delete cache_control
|
||||
messages = messages.map(m => {
|
||||
delete m.cache_control;
|
||||
return m;
|
||||
});
|
||||
|
||||
const sdk_params = {
|
||||
messages: messages,
|
||||
model: model,
|
||||
...(tools ? { tools } : {}),
|
||||
...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
|
||||
...(temperature ? { temperature } : {}),
|
||||
stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
};
|
||||
|
||||
let completion;
|
||||
try {
|
||||
completion = await this.openai.chat.completions.create(sdk_params);
|
||||
} catch (e) {
|
||||
console.error('Gemini completion error: ', e);
|
||||
throw e;
|
||||
}
|
||||
|
||||
const modelDetails = (await this.models()).find(m => m.id === model);
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = {
|
||||
prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
completion_tokens: usage.completion_tokens ?? 0,
|
||||
cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, `gemini:${modelDetails.id}`);
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
@@ -1,86 +0,0 @@
|
||||
export const models = [
|
||||
{
|
||||
id: 'gemini-1.5-flash',
|
||||
name: 'Gemini 1.5 Flash',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 7.5,
|
||||
output: 30,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.0-flash',
|
||||
name: 'Gemini 2.0 Flash',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 40,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.0-flash-lite',
|
||||
name: 'Gemini 2.0 Flash-Lite',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 8,
|
||||
output: 32,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-flash',
|
||||
name: 'Gemini 2.5 Flash',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 12,
|
||||
output: 48,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-flash-lite',
|
||||
name: 'Gemini 2.5 Flash-Lite',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 40,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-pro',
|
||||
name: 'Gemini 2.5 Pro',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 15,
|
||||
output: 60,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'gemini-3-pro-preview',
|
||||
name: 'Gemini 3 Pro',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 25,
|
||||
output: 100,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
];
|
||||
@@ -1,355 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { Context } = require('../../util/context');
|
||||
const OpenAIUtil = require('./lib/OpenAIUtil');
|
||||
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
|
||||
/**
|
||||
* Service class for integrating with Groq AI's language models.
|
||||
* Extends BaseService to provide chat completion capabilities through the Groq API.
|
||||
* Implements the puter-chat-completion interface for model management and text generation.
|
||||
* Supports both streaming and non-streaming responses, handles multiple models including
|
||||
* various versions of Llama, Mixtral, and Gemma, and manages usage tracking.
|
||||
* @class GroqAIService
|
||||
* @extends BaseService
|
||||
*/
|
||||
class GroqAIService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
meteringService;
|
||||
static MODULES = {
|
||||
Groq: require('groq-sdk'),
|
||||
};
|
||||
|
||||
/**
|
||||
* Initializes the GroqAI service by setting up the Groq client and registering with the AI chat provider
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _init () {
|
||||
const Groq = require('groq-sdk');
|
||||
this.client = new Groq({
|
||||
apiKey: this.config.apiKey,
|
||||
});
|
||||
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringService = this.services.get('meteringService').meteringService; // TODO DS: move to proper extensions
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default model ID for the Groq AI service
|
||||
* @returns {string} The default model ID 'llama-3.1-8b-instant'
|
||||
*/
|
||||
get_default_model () {
|
||||
return 'llama-3.1-8b-instant';
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
'puter-chat-completion': {
|
||||
/**
|
||||
* Returns a list of available models and their details.
|
||||
* See AIChatService for more information.
|
||||
*
|
||||
* @returns Promise<Array<Object>> Array of model details
|
||||
*/
|
||||
async models () {
|
||||
return await this.models_();
|
||||
},
|
||||
/**
|
||||
* Returns a list of available model names including their aliases
|
||||
* @returns {Promise<string[]>} Array of model identifiers and their aliases
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
// They send: { "object": "list", data }
|
||||
const funny_wrapper = await this.client.models.list();
|
||||
return funny_wrapper.data;
|
||||
},
|
||||
/**
|
||||
* Completes a chat interaction using the Groq API
|
||||
* @param {Object} options - The completion options
|
||||
* @param {Array<Object>} options.messages - Array of message objects containing the conversation history
|
||||
* @param {string} [options.model] - The model ID to use for completion. Defaults to service's default model
|
||||
* @param {boolean} [options.stream] - Whether to stream the response
|
||||
* @returns {TypedValue|Object} Returns either a TypedValue with streaming response or completion object with usage stats
|
||||
*/
|
||||
async complete ({ messages, model, stream, tools, max_tokens, temperature }) {
|
||||
model = model ?? this.get_default_model();
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
for ( const message of messages ) {
|
||||
// Curiously, DeepSeek has the exact same deviation
|
||||
if ( message.tool_calls && Array.isArray(message.content) ) {
|
||||
message.content = '';
|
||||
}
|
||||
}
|
||||
|
||||
const actor = Context.get('actor');
|
||||
|
||||
const completion = await this.client.chat.completions.create({
|
||||
messages,
|
||||
model,
|
||||
stream,
|
||||
tools,
|
||||
max_completion_tokens: max_tokens, // max_tokens has been deprecated
|
||||
temperature,
|
||||
});
|
||||
|
||||
const modelDetails = (await this.models_()).find(m => m.id === model);
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
deviations: {
|
||||
index_usage_from_stream_chunk: chunk =>
|
||||
chunk.x_groq?.usage,
|
||||
},
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, `groq:${modelDetails.id}`);
|
||||
// Still return legacy cost calculation for compatibility
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns an array of available AI models with their specifications
|
||||
*
|
||||
* Each model object contains:
|
||||
* - id: Unique identifier for the model
|
||||
* - name: Human-readable name
|
||||
* - context: Maximum context window size in tokens
|
||||
* - cost: Pricing details including currency and token rates
|
||||
*
|
||||
* @returns {Array<Object>} Array of model specification objects
|
||||
*/
|
||||
models_ () {
|
||||
return [
|
||||
{
|
||||
id: 'gemma2-9b-it',
|
||||
name: 'Gemma 2 9B 8k',
|
||||
context: 8192,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 20,
|
||||
output: 20,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemma-7b-it',
|
||||
name: 'Gemma 7B 8k Instruct',
|
||||
context: 8192,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 7,
|
||||
output: 7,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'llama3-groq-70b-8192-tool-use-preview',
|
||||
name: 'Llama 3 Groq 70B Tool Use Preview 8k',
|
||||
context: 8192,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 89,
|
||||
output: 89,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'llama3-groq-8b-8192-tool-use-preview',
|
||||
name: 'Llama 3 Groq 8B Tool Use Preview 8k',
|
||||
context: 8192,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 19,
|
||||
output: 19,
|
||||
},
|
||||
},
|
||||
{
|
||||
'id': 'llama-3.1-70b-versatile',
|
||||
'name': 'Llama 3.1 70B Versatile 128k',
|
||||
'context': 128000,
|
||||
'cost': {
|
||||
'currency': 'usd-cents',
|
||||
'tokens': 1000000,
|
||||
'input': 59,
|
||||
'output': 79,
|
||||
},
|
||||
},
|
||||
{
|
||||
// This was only available on their Discord, not
|
||||
// on the pricing page.
|
||||
'id': 'llama-3.1-70b-specdec',
|
||||
'name': 'Llama 3.1 8B Instant 128k',
|
||||
'context': 128000,
|
||||
'cost': {
|
||||
'currency': 'usd-cents',
|
||||
'tokens': 1000000,
|
||||
'input': 59,
|
||||
'output': 99,
|
||||
},
|
||||
},
|
||||
{
|
||||
'id': 'llama-3.1-8b-instant',
|
||||
'name': 'Llama 3.1 8B Instant 128k',
|
||||
'context': 131072,
|
||||
'cost': {
|
||||
'currency': 'usd-cents',
|
||||
'tokens': 1000000,
|
||||
'input': 5,
|
||||
'output': 8,
|
||||
},
|
||||
max_tokens: 131072,
|
||||
},
|
||||
{
|
||||
id: 'meta-llama/llama-guard-4-12b',
|
||||
name: 'Llama Guard 4 12B',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1000000,
|
||||
input: 20,
|
||||
output: 20,
|
||||
},
|
||||
max_tokens: 1024,
|
||||
},
|
||||
{
|
||||
id: 'meta-llama/llama-prompt-guard-2-86m',
|
||||
name: 'Prompt Guard 2 86M',
|
||||
context: 512,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1000000,
|
||||
input: 4,
|
||||
output: 4,
|
||||
},
|
||||
max_tokens: 512,
|
||||
},
|
||||
{
|
||||
'id': 'llama-3.2-1b-preview',
|
||||
'name': 'Llama 3.2 1B (Preview) 8k',
|
||||
'context': 128000,
|
||||
'cost': {
|
||||
'currency': 'usd-cents',
|
||||
'tokens': 1000000,
|
||||
'input': 4,
|
||||
'output': 4,
|
||||
},
|
||||
},
|
||||
{
|
||||
'id': 'llama-3.2-3b-preview',
|
||||
'name': 'Llama 3.2 3B (Preview) 8k',
|
||||
'context': 128000,
|
||||
'cost': {
|
||||
'currency': 'usd-cents',
|
||||
'tokens': 1000000,
|
||||
'input': 6,
|
||||
'output': 6,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'llama-3.2-11b-vision-preview',
|
||||
name: 'Llama 3.2 11B Vision 8k (Preview)',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 18,
|
||||
output: 18,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'llama-3.2-90b-vision-preview',
|
||||
name: 'Llama 3.2 90B Vision 8k (Preview)',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 90,
|
||||
output: 90,
|
||||
},
|
||||
},
|
||||
{
|
||||
'id': 'llama3-70b-8192',
|
||||
'name': 'Llama 3 70B 8k',
|
||||
'context': 8192,
|
||||
'cost': {
|
||||
'currency': 'usd-cents',
|
||||
'tokens': 1000000,
|
||||
'input': 59,
|
||||
'output': 79,
|
||||
},
|
||||
},
|
||||
{
|
||||
'id': 'llama3-8b-8192',
|
||||
'name': 'Llama 3 8B 8k',
|
||||
'context': 8192,
|
||||
'cost': {
|
||||
'currency': 'usd-cents',
|
||||
'tokens': 1000000,
|
||||
'input': 5,
|
||||
'output': 8,
|
||||
},
|
||||
},
|
||||
{
|
||||
'id': 'mixtral-8x7b-32768',
|
||||
'name': 'Mixtral 8x7B Instruct 32k',
|
||||
'context': 32768,
|
||||
'cost': {
|
||||
'currency': 'usd-cents',
|
||||
'tokens': 1000000,
|
||||
'input': 24,
|
||||
'output': 24,
|
||||
},
|
||||
},
|
||||
{
|
||||
'id': 'llama-guard-3-8b',
|
||||
'name': 'Llama Guard 3 8B 8k',
|
||||
'context': 8192,
|
||||
'cost': {
|
||||
'currency': 'usd-cents',
|
||||
'tokens': 1000000,
|
||||
'input': 20,
|
||||
'output': 20,
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
GroqAIService,
|
||||
};
|
||||
@@ -1,621 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const axios = require('axios');
|
||||
const OpenAIUtil = require('./lib/OpenAIUtil');
|
||||
const { Context } = require('../../util/context');
|
||||
const APIError = require('../../api/APIError');
|
||||
const mime = require('mime-types');
|
||||
const path = require('path');
|
||||
|
||||
/**
|
||||
* MistralAIService class extends BaseService to provide integration with the Mistral AI API.
|
||||
* Implements chat completion functionality with support for various Mistral models including
|
||||
* mistral-large, pixtral, codestral, and ministral variants. Handles both streaming and
|
||||
* non-streaming responses, token usage tracking, and model management. Provides cost information
|
||||
* for different models and implements the puter-chat-completion interface.
|
||||
*/
|
||||
class MistralAIService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
meteringService;
|
||||
static MODULES = {
|
||||
'@mistralai/mistralai': require('@mistralai/mistralai'),
|
||||
};
|
||||
/**
|
||||
* Initializes the service's cost structure for different Mistral AI models.
|
||||
* Sets up pricing information for various models including token costs for input/output.
|
||||
* Each model entry specifies currency (usd-cents) and costs per million tokens.
|
||||
* @private
|
||||
*/
|
||||
_construct () {
|
||||
this.costs_ = {
|
||||
'mistral-large-latest': {
|
||||
aliases: ['mistral-large-2411'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 200,
|
||||
output: 600,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
'pixtral-large-latest': {
|
||||
aliases: ['pixtral-large-2411'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 200,
|
||||
output: 600,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
'mistral-small-latest': {
|
||||
aliases: ['mistral-small-2506'],
|
||||
license: 'Apache-2.0',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 20,
|
||||
output: 60,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
'codestral-latest': {
|
||||
aliases: ['codestral-2501'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 30,
|
||||
output: 90,
|
||||
},
|
||||
max_tokens: 256000,
|
||||
},
|
||||
'ministral-8b-latest': {
|
||||
aliases: ['ministral-8b-2410'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 10,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
'ministral-3b-latest': {
|
||||
aliases: ['ministral-3b-2410'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 4,
|
||||
output: 4,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
'pixtral-12b': {
|
||||
aliases: ['pixtral-12b-2409'],
|
||||
license: 'Apache-2.0',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 15,
|
||||
output: 15,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
'mistral-nemo': {
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 15,
|
||||
output: 15,
|
||||
},
|
||||
},
|
||||
'open-mistral-7b': {
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 25,
|
||||
output: 25,
|
||||
},
|
||||
},
|
||||
'open-mixtral-8x7b': {
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 7,
|
||||
output: 7,
|
||||
},
|
||||
},
|
||||
'open-mixtral-8x22b': {
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 2,
|
||||
output: 6,
|
||||
},
|
||||
},
|
||||
'magistral-medium-latest': {
|
||||
aliases: ['magistral-medium-2506'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 200,
|
||||
output: 500,
|
||||
},
|
||||
max_tokens: 40000,
|
||||
},
|
||||
'magistral-small-latest': {
|
||||
aliases: ['magistral-small-2506'],
|
||||
license: 'Apache-2.0',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 10,
|
||||
},
|
||||
max_tokens: 40000,
|
||||
},
|
||||
'mistral-medium-latest': {
|
||||
aliases: ['mistral-medium-2505'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 40,
|
||||
output: 200,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
'mistral-moderation-latest': {
|
||||
aliases: ['mistral-moderation-2411'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 10,
|
||||
},
|
||||
max_tokens: 8000,
|
||||
},
|
||||
'devstral-small-latest': {
|
||||
aliases: ['devstral-small-2505'],
|
||||
license: 'Apache-2.0',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 10,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
'mistral-saba-latest': {
|
||||
aliases: ['mistral-saba-2502'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 20,
|
||||
output: 60,
|
||||
},
|
||||
},
|
||||
'open-mistral-nemo': {
|
||||
aliases: ['open-mistral-nemo-2407'],
|
||||
license: 'Apache-2.0',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 10,
|
||||
},
|
||||
},
|
||||
'mistral-ocr-latest': {
|
||||
aliases: ['mistral-ocr-2505'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 100,
|
||||
output: 300,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Initializes the service's cost structure for different Mistral AI models.
|
||||
* Sets up pricing information for various models including token costs for input/output.
|
||||
* Each model entry specifies currency (USD cents) and costs per million tokens.
|
||||
* @private
|
||||
*/
|
||||
async _init () {
|
||||
const require = this.require;
|
||||
const { Mistral } = require('@mistralai/mistralai');
|
||||
this.api_base_url = 'https://api.mistral.ai/v1';
|
||||
this.client = new Mistral({
|
||||
apiKey: this.config.apiKey,
|
||||
});
|
||||
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
|
||||
this.meteringService = this.services.get('meteringService').meteringService;
|
||||
|
||||
// TODO: make this event-driven so it doesn't hold up boot
|
||||
await this.populate_models_();
|
||||
}
|
||||
/**
|
||||
* Populates the internal models array with available Mistral AI models and their configurations.
|
||||
* Makes an API call to fetch model data, then processes and filters models based on cost information.
|
||||
* Each model entry includes id, name, aliases, context window size, capabilities, and pricing.
|
||||
* @private
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async populate_models_ () {
|
||||
const resp = await axios({
|
||||
method: 'get',
|
||||
url: `${this.api_base_url }/models`,
|
||||
headers: {
|
||||
Authorization: `Bearer ${this.config.apiKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
const response_json = resp.data;
|
||||
const models = response_json.data;
|
||||
this.models_array_ = [];
|
||||
for ( const api_model of models ) {
|
||||
|
||||
let cost = this.costs_[api_model.id];
|
||||
if ( ! cost ) {
|
||||
for ( const alias of api_model.aliases ) {
|
||||
cost = this.costs_[alias];
|
||||
if ( cost ) break;
|
||||
}
|
||||
}
|
||||
if ( ! cost ) continue;
|
||||
const model = {
|
||||
...cost,
|
||||
id: api_model.id,
|
||||
name: api_model.description,
|
||||
aliases: api_model.aliases,
|
||||
context: api_model.max_context_length,
|
||||
capabilities: api_model.capabilities,
|
||||
vision: api_model.capabilities.vision,
|
||||
};
|
||||
|
||||
this.models_array_.push(model);
|
||||
}
|
||||
// return resp.data;
|
||||
}
|
||||
/**
|
||||
* Populates the internal models array with available Mistral AI models and their metadata
|
||||
* Fetches model data from the API, filters based on cost configuration, and stores
|
||||
* model objects containing ID, name, aliases, context length, capabilities, and pricing
|
||||
* @private
|
||||
* @async
|
||||
* @returns {void}
|
||||
*/
|
||||
get_default_model () {
|
||||
return 'mistral-large-latest';
|
||||
}
|
||||
static IMPLEMENTS = {
|
||||
'driver-capabilities': {
|
||||
supports_test_mode (iface, method_name) {
|
||||
return iface === 'puter-ocr' && method_name === 'recognize';
|
||||
},
|
||||
},
|
||||
'puter-ocr': {
|
||||
async recognize ({
|
||||
source,
|
||||
model,
|
||||
pages,
|
||||
includeImageBase64,
|
||||
imageLimit,
|
||||
imageMinSize,
|
||||
bboxAnnotationFormat,
|
||||
documentAnnotationFormat,
|
||||
test_mode,
|
||||
}) {
|
||||
if ( test_mode ) {
|
||||
return this._sampleOcrResponse();
|
||||
}
|
||||
if ( ! source ) {
|
||||
throw APIError.create('missing_required_argument', {
|
||||
interface_name: 'puter-ocr',
|
||||
method_name: 'recognize',
|
||||
arg_name: 'source',
|
||||
});
|
||||
}
|
||||
|
||||
const document = await this._buildDocumentChunkFromSource(source);
|
||||
const payload = {
|
||||
model: model ?? 'mistral-ocr-latest',
|
||||
document,
|
||||
};
|
||||
if ( Array.isArray(pages) ) {
|
||||
payload.pages = pages;
|
||||
}
|
||||
if ( typeof includeImageBase64 === 'boolean' ) {
|
||||
payload.includeImageBase64 = includeImageBase64;
|
||||
}
|
||||
if ( typeof imageLimit === 'number' ) {
|
||||
payload.imageLimit = imageLimit;
|
||||
}
|
||||
if ( typeof imageMinSize === 'number' ) {
|
||||
payload.imageMinSize = imageMinSize;
|
||||
}
|
||||
if ( bboxAnnotationFormat !== undefined ) {
|
||||
payload.bboxAnnotationFormat = bboxAnnotationFormat;
|
||||
}
|
||||
if ( documentAnnotationFormat !== undefined ) {
|
||||
payload.documentAnnotationFormat = documentAnnotationFormat;
|
||||
}
|
||||
|
||||
const response = await this.client.ocr.process(payload);
|
||||
const annotationsRequested = (
|
||||
payload.documentAnnotationFormat !== undefined ||
|
||||
payload.bboxAnnotationFormat !== undefined
|
||||
);
|
||||
this._recordOcrUsage(response, payload.model, {
|
||||
annotationsRequested,
|
||||
});
|
||||
return this._normalizeOcrResponse(response);
|
||||
},
|
||||
},
|
||||
'puter-chat-completion': {
|
||||
/**
|
||||
* Returns a list of available models and their details.
|
||||
* See AIChatService for more information.
|
||||
*
|
||||
* @returns Promise<Array<Object>> Array of model details
|
||||
*/
|
||||
async models () {
|
||||
return this.models_array_;
|
||||
},
|
||||
|
||||
/**
|
||||
* Returns a list of available model names including their aliases
|
||||
* @returns {Promise<string[]>} Array of model identifiers and their aliases
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
return this.models_array_.map(m => m.id);
|
||||
},
|
||||
|
||||
/**
|
||||
* AI Chat completion method.
|
||||
* See AIChatService for more details.
|
||||
*/
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
for ( const message of messages ) {
|
||||
if ( message.tool_calls ) {
|
||||
message.toolCalls = message.tool_calls;
|
||||
delete message.tool_calls;
|
||||
}
|
||||
if ( message.tool_call_id ) {
|
||||
message.toolCallId = message.tool_call_id;
|
||||
delete message.tool_call_id;
|
||||
}
|
||||
}
|
||||
|
||||
console.log('MESSAGES TO MISTRAL', messages);
|
||||
|
||||
const actor = Context.get('actor');
|
||||
const completion = await this.client.chat[
|
||||
stream ? 'stream' : 'complete'
|
||||
]({
|
||||
model: model ?? this.get_default_model(),
|
||||
...(tools ? { tools } : {}),
|
||||
messages,
|
||||
max_tokens: max_tokens,
|
||||
temperature,
|
||||
});
|
||||
|
||||
const modelDetails = this.models_array_.find(m => m.id === (model ?? this.get_default_model()));
|
||||
|
||||
return await OpenAIUtil.handle_completion_output({
|
||||
deviations: {
|
||||
index_usage_from_stream_chunk: chunk => {
|
||||
if ( ! chunk.usage ) return;
|
||||
|
||||
const snake_usage = {};
|
||||
for ( const key in chunk.usage ) {
|
||||
const snakeKey = key.replace(/([A-Z])/g, '_$1').toLowerCase();
|
||||
snake_usage[snakeKey] = chunk.usage[key];
|
||||
}
|
||||
|
||||
return snake_usage;
|
||||
},
|
||||
chunk_but_like_actually: chunk => chunk.data,
|
||||
index_tool_calls_from_stream_choice: choice => choice.delta.toolCalls,
|
||||
coerce_completion_usage: completion => ({
|
||||
prompt_tokens: completion.usage.promptTokens,
|
||||
completion_tokens: completion.usage.completionTokens,
|
||||
}),
|
||||
},
|
||||
completion,
|
||||
stream,
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, `mistral:${modelDetails.id}`);
|
||||
// Still return legacy cost calculation for compatibility
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
});
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
async _buildDocumentChunkFromSource (fileFacade) {
|
||||
const dataUrl = await this._safeFileValue(fileFacade, 'data_url');
|
||||
const webUrl = await this._safeFileValue(fileFacade, 'web_url');
|
||||
const filePath = await this._safeFileValue(fileFacade, 'path');
|
||||
const fsNode = await this._safeFileValue(fileFacade, 'fs-node');
|
||||
const fileName = filePath ? path.basename(filePath) : fsNode?.name;
|
||||
const inferredMime = this._inferMimeFromName(fileName);
|
||||
|
||||
if ( webUrl ) {
|
||||
return this._chunkFromUrl(webUrl, fileName, inferredMime);
|
||||
}
|
||||
if ( dataUrl ) {
|
||||
const mimeFromUrl = this._extractMimeFromDataUrl(dataUrl) ?? inferredMime;
|
||||
return this._chunkFromUrl(dataUrl, fileName, mimeFromUrl);
|
||||
}
|
||||
|
||||
const buffer = await this._safeFileValue(fileFacade, 'buffer');
|
||||
if ( ! buffer ) {
|
||||
throw APIError.create('field_invalid', null, {
|
||||
key: 'source',
|
||||
expected: 'file, data URL, or web URL',
|
||||
});
|
||||
}
|
||||
const mimeType = inferredMime ?? 'application/octet-stream';
|
||||
const generatedDataUrl = this._createDataUrl(buffer, mimeType);
|
||||
return this._chunkFromUrl(generatedDataUrl, fileName, mimeType);
|
||||
}
|
||||
|
||||
async _safeFileValue (fileFacade, key) {
|
||||
if ( !fileFacade || typeof fileFacade.get !== 'function' ) return undefined;
|
||||
const maybeCache = fileFacade.values?.values;
|
||||
if ( maybeCache && Object.prototype.hasOwnProperty.call(maybeCache, key) ) {
|
||||
return maybeCache[key];
|
||||
}
|
||||
try {
|
||||
return await fileFacade.get(key);
|
||||
} catch (e) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
_chunkFromUrl (url, fileName, mimeType) {
|
||||
const lowerName = fileName?.toLowerCase();
|
||||
const urlLooksPdf = /\.pdf($|\?)/i.test(url);
|
||||
const mimeLooksPdf = mimeType?.includes('pdf');
|
||||
const isPdf = mimeLooksPdf || urlLooksPdf || (lowerName ? lowerName.endsWith('.pdf') : false);
|
||||
|
||||
if ( isPdf ) {
|
||||
const chunk = {
|
||||
type: 'document_url',
|
||||
documentUrl: url,
|
||||
};
|
||||
if ( fileName ) {
|
||||
chunk.documentName = fileName;
|
||||
}
|
||||
return chunk;
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'image_url',
|
||||
imageUrl: {
|
||||
url,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
_inferMimeFromName (name) {
|
||||
if ( ! name ) return undefined;
|
||||
return mime.lookup(name) || undefined;
|
||||
}
|
||||
|
||||
_extractMimeFromDataUrl (url) {
|
||||
if ( typeof url !== 'string' ) return undefined;
|
||||
const match = url.match(/^data:([^;,]+)[;,]/);
|
||||
return match ? match[1] : undefined;
|
||||
}
|
||||
|
||||
_createDataUrl (buffer, mimeType) {
|
||||
return `data:${mimeType || 'application/octet-stream'};base64,${buffer.toString('base64')}`;
|
||||
}
|
||||
|
||||
_normalizeOcrResponse (response) {
|
||||
if ( ! response ) return {};
|
||||
const normalized = {
|
||||
model: response.model,
|
||||
pages: response.pages ?? [],
|
||||
usage_info: response.usageInfo,
|
||||
};
|
||||
const blocks = [];
|
||||
if ( Array.isArray(response.pages) ) {
|
||||
for ( const page of response.pages ) {
|
||||
if ( typeof page?.markdown !== 'string' ) continue;
|
||||
const lines = page.markdown.split('\n').map(line => line.trim()).filter(Boolean);
|
||||
for ( const line of lines ) {
|
||||
blocks.push({
|
||||
type: 'text/mistral:LINE',
|
||||
text: line,
|
||||
page: page.index,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
normalized.blocks = blocks;
|
||||
if ( blocks.length ) {
|
||||
normalized.text = blocks.map(block => block.text).join('\n');
|
||||
} else if ( Array.isArray(response.pages) ) {
|
||||
normalized.text = response.pages.map(page => page?.markdown || '').join('\n\n').trim();
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
_recordOcrUsage (response, model, { annotationsRequested } = {}) {
|
||||
try {
|
||||
if ( ! this.meteringService ) return;
|
||||
const actor = Context.get('actor');
|
||||
if ( ! actor ) return;
|
||||
const pagesProcessed =
|
||||
response?.usageInfo?.pagesProcessed ??
|
||||
(Array.isArray(response?.pages) ? response.pages.length : 1);
|
||||
this.meteringService.incrementUsage(actor, 'mistral-ocr:ocr:page', pagesProcessed);
|
||||
if ( annotationsRequested ) {
|
||||
this.meteringService.incrementUsage(actor, 'mistral-ocr:annotations:page', pagesProcessed);
|
||||
}
|
||||
} catch (e) {
|
||||
// ignore metering failures to avoid blocking OCR results
|
||||
}
|
||||
}
|
||||
|
||||
_sampleOcrResponse () {
|
||||
const markdown = 'Sample OCR output (test mode).';
|
||||
return {
|
||||
model: 'mistral-ocr-latest',
|
||||
pages: [
|
||||
{
|
||||
index: 0,
|
||||
markdown,
|
||||
images: [],
|
||||
dimensions: null,
|
||||
},
|
||||
],
|
||||
blocks: [
|
||||
{
|
||||
type: 'text/mistral:LINE',
|
||||
text: markdown,
|
||||
page: 0,
|
||||
},
|
||||
],
|
||||
text: markdown,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { MistralAIService };
|
||||
@@ -1,217 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const OpenAIUtil = require('./lib/OpenAIUtil');
|
||||
const { Context } = require('../../util/context');
|
||||
const openai = require('openai');
|
||||
const uuidv4 = require('uuid').v4;
|
||||
const axios = require('axios');
|
||||
/**
|
||||
* OllamaService class - Provides integration with Ollama's API for chat completions
|
||||
* Extends BaseService to implement the puter-chat-completion interface.
|
||||
* Handles model management, message adaptation, streaming responses,
|
||||
* and usage tracking for Ollama's language models.
|
||||
* @extends BaseService
|
||||
*/
|
||||
class OllamaService extends BaseService {
|
||||
static MODULES = {
|
||||
kv: globalThis.kv,
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets the system prompt used for AI interactions
|
||||
* @returns {string} The base system prompt that identifies the AI as running on Puter
|
||||
*/
|
||||
adapt_model (model) {
|
||||
return model;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the Ollama service by setting up the Ollama client and registering with the AI chat provider
|
||||
* @private
|
||||
* @returns {Promise<void>} Resolves when initialization is complete
|
||||
*/
|
||||
async _init () {
|
||||
// Ollama typically runs on HTTP, not HTTPS
|
||||
this.api_base_url = this.config?.api_base_url || 'http://localhost:11434';
|
||||
|
||||
// OpenAI SDK is used to interact with the Ollama API
|
||||
this.openai = new openai.OpenAI({
|
||||
apiKey: 'ollama', // Ollama doesn't use an API key, it uses the "ollama" string
|
||||
baseURL: `${this.api_base_url }/v1`,
|
||||
});
|
||||
this.kvkey = uuidv4();
|
||||
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
// We don't need to meter usage for Ollama because it's a local service
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default model identifier for the Ollama service
|
||||
* @returns {string} The default model ID 'gpt-oss:20b'
|
||||
*/
|
||||
get_default_model () {
|
||||
return 'gpt-oss:20b';
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
'puter-chat-completion': {
|
||||
/**
|
||||
* Returns a list of available models and their details.
|
||||
* See AIChatService for more information.
|
||||
*
|
||||
* @returns Promise<Array<Object>> Array of model details
|
||||
*/
|
||||
async models () {
|
||||
return await this.models_();
|
||||
},
|
||||
/**
|
||||
* Returns a list of available model names including their aliases
|
||||
* @returns {Promise<string[]>} Array of model identifiers and their aliases
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
const models = await this.models_();
|
||||
const model_names = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
}
|
||||
return model_names;
|
||||
},
|
||||
|
||||
/**
|
||||
* AI Chat completion method.
|
||||
* See AIChatService for more details.
|
||||
*/
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
model = this.adapt_model(model);
|
||||
|
||||
if ( model.startsWith('ollama:') ) {
|
||||
model = model.slice('ollama:'.length);
|
||||
}
|
||||
|
||||
const actor = Context.get('actor');
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
const sdk_params = {
|
||||
messages,
|
||||
model: model ?? this.get_default_model(),
|
||||
...(tools ? { tools } : {}),
|
||||
max_tokens,
|
||||
temperature: temperature, // default to 1.0
|
||||
stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
};
|
||||
|
||||
const completion = await this.openai.chat.completions.create(sdk_params);
|
||||
|
||||
const modelDetails = (await this.models_()).find(m => m.id === `ollama:${model}`);
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
|
||||
const trackedUsage = {
|
||||
prompt: (usage.prompt_tokens ?? 1 ) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
completion: usage.completion_tokens ?? 1,
|
||||
input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => {
|
||||
return [k, 0]; // override to 0 since local is free
|
||||
}));
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, modelDetails.id, { costOverwrites });
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves available AI models and their specifications
|
||||
* @returns Array of model objects containing:
|
||||
* - id: Model identifier string
|
||||
* - name: Human readable model name
|
||||
* - context: Maximum context window size
|
||||
* - cost: Pricing information object with currency and rates
|
||||
* @private
|
||||
*/
|
||||
async models_ (_rawPriceKeys = false) {
|
||||
|
||||
let models = this.modules.kv.get(`${this.kvkey}:models`);
|
||||
if ( ! models ) {
|
||||
try {
|
||||
const resp = await axios.request({
|
||||
method: 'GET',
|
||||
url: `${this.api_base_url}/api/tags`,
|
||||
});
|
||||
models = resp.data.models || [];
|
||||
if ( models.length > 0 ) {
|
||||
this.modules.kv.set(`${this.kvkey}:models`, models);
|
||||
}
|
||||
} catch ( error ) {
|
||||
this.log.error('Failed to fetch models from Ollama:', error.message);
|
||||
// Return empty array if Ollama is not available
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
if ( !models || models.length === 0 ) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const coerced_models = [];
|
||||
for ( const model of models ) {
|
||||
// Ollama API returns models with 'name' property, not 'model'
|
||||
const modelName = model.name || model.model || 'unknown';
|
||||
const microcentCosts = {
|
||||
input: 0,
|
||||
output: 0,
|
||||
};
|
||||
coerced_models.push({
|
||||
id: `ollama:${ modelName}`,
|
||||
name: `${modelName} (Ollama)`,
|
||||
max_tokens: model.size || model.max_context || 8192,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
...microcentCosts,
|
||||
},
|
||||
});
|
||||
}
|
||||
console.log('coerced_models', coerced_models);
|
||||
return coerced_models;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
OllamaService,
|
||||
};
|
||||
@@ -1,57 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
|
||||
import BaseService from '../../../services/BaseService.js';
|
||||
import { OpenAICompletionService } from './OpenAICompletionService.mjs';
|
||||
|
||||
export class OpenAICompletionServiceWrapper extends BaseService {
|
||||
/** @type {OpenAICompletionService} */
|
||||
openAICompletionService;
|
||||
|
||||
_init () {
|
||||
this.openAICompletionService = new OpenAICompletionService({
|
||||
serviceName: this.service_name,
|
||||
config: this.config,
|
||||
globalConfig: this.global_config,
|
||||
aiChatService: this.services.get('ai-chat'),
|
||||
meteringService: this.services.get('meteringService').meteringService,
|
||||
});
|
||||
}
|
||||
|
||||
async check_moderation (text) {
|
||||
return await this.openAICompletionService.checkModeration(text);
|
||||
}
|
||||
|
||||
get_default_model () {
|
||||
return this.openAICompletionService.get_default_model();
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['puter-chat-completion']: Object.getOwnPropertyNames(OpenAICompletionService.prototype)
|
||||
.filter(n => n !== 'constructor')
|
||||
.reduce((acc, fn) => ({
|
||||
...acc,
|
||||
[fn]: async function (...a) {
|
||||
return await this.openAICompletionService[fn](...a);
|
||||
},
|
||||
}), {}),
|
||||
};
|
||||
}
|
||||
@@ -1,206 +0,0 @@
|
||||
// TODO DS: centralize somewhere
|
||||
|
||||
export const OPEN_AI_MODELS = [
|
||||
{
|
||||
id: 'gpt-5.1',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 125,
|
||||
output: 1000,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5.1-codex',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 125,
|
||||
output: 1000,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5.1-codex-mini',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 25,
|
||||
output: 200,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5.1-chat-latest',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 125,
|
||||
output: 1000,
|
||||
},
|
||||
max_tokens: 16384,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5-2025-08-07',
|
||||
aliases: ['gpt-5'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 125,
|
||||
output: 1000,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5-mini-2025-08-07',
|
||||
aliases: ['gpt-5-mini'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 25,
|
||||
output: 200,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5-nano-2025-08-07',
|
||||
aliases: ['gpt-5-nano'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 5,
|
||||
output: 40,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5-chat-latest',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 125,
|
||||
output: 1000,
|
||||
},
|
||||
max_tokens: 16384,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4o',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 250,
|
||||
output: 1000,
|
||||
},
|
||||
max_tokens: 16384,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4o-mini',
|
||||
max_tokens: 16384,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 15,
|
||||
output: 60,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'o1',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 1500,
|
||||
output: 6000,
|
||||
},
|
||||
max_tokens: 100000,
|
||||
},
|
||||
{
|
||||
id: 'o1-mini',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 300,
|
||||
output: 1200,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'o1-pro',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 15000,
|
||||
output: 60000,
|
||||
},
|
||||
max_tokens: 100000,
|
||||
},
|
||||
{
|
||||
id: 'o3',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 1000,
|
||||
output: 4000,
|
||||
},
|
||||
max_tokens: 100000,
|
||||
},
|
||||
{
|
||||
id: 'o3-mini',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 110,
|
||||
output: 440,
|
||||
},
|
||||
max_tokens: 100000,
|
||||
},
|
||||
{
|
||||
id: 'o4-mini',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 110,
|
||||
output: 440,
|
||||
},
|
||||
max_tokens: 100000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4.1',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 200,
|
||||
output: 800,
|
||||
},
|
||||
max_tokens: 32768,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4.1-mini',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 40,
|
||||
output: 160,
|
||||
},
|
||||
max_tokens: 32768,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4.1-nano',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 40,
|
||||
},
|
||||
max_tokens: 32768,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4.5-preview',
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 7500,
|
||||
output: 15000,
|
||||
},
|
||||
},
|
||||
];
|
||||
@@ -1,214 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const APIError = require('../../api/APIError');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const OpenAIUtil = require('./lib/OpenAIUtil');
|
||||
const { Context } = require('../../util/context');
|
||||
const openai = require('openai');
|
||||
const uuidv4 = require('uuid').v4;
|
||||
const axios = require('axios');
|
||||
/**
|
||||
* XAIService class - Provides integration with X.AI's API for chat completions
|
||||
* Extends BaseService to implement the puter-chat-completion interface.
|
||||
* Handles model management, message adaptation, streaming responses,
|
||||
* and usage tracking for X.AI's language models like Grok.
|
||||
* @extends BaseService
|
||||
*/
|
||||
class OpenRouterService extends BaseService {
|
||||
static MODULES = {
|
||||
kv: globalThis.kv,
|
||||
};
|
||||
|
||||
// TODO DS: extract this into driver wrapper like openAiService
|
||||
static IMPLEMENTS = {
|
||||
['puter-chat-completion']: {
|
||||
async models () {
|
||||
return await this.models();
|
||||
},
|
||||
async list () {
|
||||
return await this.list();
|
||||
},
|
||||
async complete (...params) {
|
||||
return await this.complete(...params);
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets the system prompt used for AI interactions
|
||||
* @returns {string} The base system prompt that identifies the AI as running on Puter
|
||||
*/
|
||||
adapt_model (model) {
|
||||
return model;
|
||||
}
|
||||
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
meteringService;
|
||||
|
||||
/**
|
||||
* Initializes the XAI service by setting up the OpenAI client and registering with the AI chat provider
|
||||
* @private
|
||||
* @returns {Promise<void>} Resolves when initialization is complete
|
||||
*/
|
||||
async _init () {
|
||||
this.api_base_url = 'https://openrouter.ai/api/v1';
|
||||
this.openai = new openai.OpenAI({
|
||||
apiKey: this.config.apiKey,
|
||||
baseURL: this.api_base_url,
|
||||
});
|
||||
this.kvkey = uuidv4();
|
||||
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringService = this.services.get('meteringService').meteringService; // TODO DS: move to proper extensions
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default model identifier for the XAI service
|
||||
* @returns {string} The default model ID 'grok-beta'
|
||||
*/
|
||||
get_default_model () {
|
||||
}
|
||||
/**
|
||||
* Returns a list of available model names including their aliases
|
||||
* @returns {Promise<string[]>} Array of model identifiers and their aliases
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
const models = await this.models();
|
||||
const model_names = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
}
|
||||
return model_names;
|
||||
}
|
||||
|
||||
/**
|
||||
* AI Chat completion method.
|
||||
* See AIChatService for more details.
|
||||
*/
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
model = this.adapt_model(model);
|
||||
|
||||
if ( model.startsWith('openrouter:') ) {
|
||||
model = model.slice('openrouter:'.length);
|
||||
}
|
||||
|
||||
if ( model === 'openrouter/auto' ) {
|
||||
throw APIError.create('field_invalid', null, {
|
||||
key: 'model',
|
||||
expected: 'allowed model',
|
||||
got: 'disallowed model',
|
||||
});
|
||||
}
|
||||
|
||||
const actor = Context.get('actor');
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
|
||||
const completion = await this.openai.chat.completions.create({
|
||||
messages,
|
||||
model: model ?? this.get_default_model(),
|
||||
...(tools ? { tools } : {}),
|
||||
max_tokens,
|
||||
temperature: temperature, // default to 1.0
|
||||
stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
usage: { include: true },
|
||||
});
|
||||
|
||||
const modelDetails = (await this.models()).find(m => m.id === `openrouter:${ model}`);
|
||||
const rawPriceModelDetails = (await this.models(true)).find(m => m.id === `openrouter:${ model}`);
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
// custom open router logic because they're pricing are weird
|
||||
const trackedUsage = {
|
||||
prompt: (usage.prompt_tokens ?? 0 ) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
completion: usage.completion_tokens ?? 0,
|
||||
input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => {
|
||||
return [k, rawPriceModelDetails.cost[k] * trackedUsage[k]];
|
||||
}));
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, modelDetails.id, costOverwrites);
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves available AI models and their specifications
|
||||
* @returns Array of model objects containing:
|
||||
* - id: Model identifier string
|
||||
* - name: Human readable model name
|
||||
* - context: Maximum context window size
|
||||
* - cost: Pricing information object with currency and rates
|
||||
*/
|
||||
async models (rawPriceKeys = false) {
|
||||
let models = this.modules.kv.get(`${this.kvkey}:models`);
|
||||
if ( ! models ) {
|
||||
try {
|
||||
const resp = await axios.request({
|
||||
method: 'GET',
|
||||
url: `${this.api_base_url}/models`,
|
||||
});
|
||||
|
||||
models = resp.data.data;
|
||||
this.modules.kv.set(`${this.kvkey}:models`, models);
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
}
|
||||
}
|
||||
const coerced_models = [];
|
||||
for ( const model of models ) {
|
||||
const microcentCosts = rawPriceKeys ? Object.fromEntries(Object.entries(model.pricing).map(([k, v]) => [k, Math.round(v * 1_000_000 * 100)])) : {
|
||||
input: Math.round(model.pricing.prompt * 1_000_000 * 100),
|
||||
output: Math.round(model.pricing.completion * 1_000_000 * 100),
|
||||
};
|
||||
coerced_models.push({
|
||||
id: `openrouter:${ model.id}`,
|
||||
name: `${model.name } (OpenRouter)`,
|
||||
max_tokens: model.top_provider.max_completion_tokens,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
...microcentCosts,
|
||||
},
|
||||
});
|
||||
}
|
||||
return coerced_models;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
OpenRouterService,
|
||||
};
|
||||
@@ -1,164 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const { AdvancedBase } = require('@heyputer/putility');
|
||||
const config = require('../../config');
|
||||
|
||||
/**
|
||||
* PuterAIModule class extends AdvancedBase to manage and register various AI services.
|
||||
* This module handles the initialization and registration of multiple AI-related services
|
||||
* including text processing, speech synthesis, chat completion, and image generation.
|
||||
* Services are conditionally registered based on configuration settings, allowing for
|
||||
* flexible deployment with different AI providers like AWS, OpenAI, Claude, Together AI,
|
||||
* Mistral, Groq, and XAI.
|
||||
* @extends AdvancedBase
|
||||
*/
|
||||
class PuterAIModule extends AdvancedBase {
|
||||
/**
|
||||
* Module for managing AI-related services in the Puter platform
|
||||
* Extends AdvancedBase to provide core functionality
|
||||
* Handles registration and configuration of various AI services like OpenAI, Claude, AWS services etc.
|
||||
*/
|
||||
async install (context) {
|
||||
const services = context.get('services');
|
||||
|
||||
const { AIInterfaceService } = require('./AIInterfaceService');
|
||||
services.registerService('__ai-interfaces', AIInterfaceService);
|
||||
|
||||
// TODO: services should govern their own availability instead of
|
||||
// the module deciding what to register
|
||||
|
||||
if ( config?.services?.['aws-textract']?.aws ) {
|
||||
const { AWSTextractService } = require('./AWSTextractService');
|
||||
services.registerService('aws-textract', AWSTextractService);
|
||||
}
|
||||
|
||||
if ( config?.services?.['aws-polly']?.aws ) {
|
||||
const { AWSPollyService } = require('./AWSPollyService');
|
||||
services.registerService('aws-polly', AWSPollyService);
|
||||
}
|
||||
|
||||
if ( config?.services?.['elevenlabs'] || config?.elevenlabs ) {
|
||||
const { ElevenLabsTTSService } = require('./ElevenLabsTTSService');
|
||||
services.registerService('elevenlabs-tts', ElevenLabsTTSService);
|
||||
|
||||
const { ElevenLabsVoiceChangerService } = require('./ElevenLabsVoiceChangerService');
|
||||
services.registerService('elevenlabs-voice-changer', ElevenLabsVoiceChangerService);
|
||||
}
|
||||
|
||||
if ( config?.services?.openai || config?.openai ) {
|
||||
const { OpenAICompletionServiceWrapper } = require('./OpenAiCompletionService/index.mjs');
|
||||
services.registerService('openai-completion', OpenAICompletionServiceWrapper);
|
||||
|
||||
const { OpenAIImageGenerationService } = require('./OpenAIImageGenerationService');
|
||||
services.registerService('openai-image-generation', OpenAIImageGenerationService);
|
||||
|
||||
const { OpenAIVideoGenerationService } = require('./OpenAIVideoGenerationService');
|
||||
services.registerService('openai-video-generation', OpenAIVideoGenerationService);
|
||||
|
||||
const { OpenAITTSService } = require('./OpenAITTSService');
|
||||
services.registerService('openai-tts', OpenAITTSService);
|
||||
|
||||
const { OpenAISpeechToTextService } = require('./OpenAISpeechToTextService');
|
||||
services.registerService('openai-speech2txt', OpenAISpeechToTextService);
|
||||
}
|
||||
|
||||
if ( config?.services?.claude ) {
|
||||
const { ClaudeService } = require('./ClaudeService');
|
||||
services.registerService('claude', ClaudeService);
|
||||
}
|
||||
|
||||
if ( config?.services?.['together-ai'] ) {
|
||||
const { TogetherAIService } = require('./TogetherAIService');
|
||||
services.registerService('together-ai', TogetherAIService);
|
||||
|
||||
const { TogetherImageGenerationService } = require('./TogetherImageGenerationService');
|
||||
services.registerService('together-image-generation', TogetherImageGenerationService);
|
||||
|
||||
const { TogetherVideoGenerationService } = require('./TogetherVideoGenerationService');
|
||||
services.registerService('together-video-generation', TogetherVideoGenerationService);
|
||||
}
|
||||
|
||||
if ( config?.services?.['mistral'] ) {
|
||||
const { MistralAIService } = require('./MistralAIService');
|
||||
services.registerService('mistral', MistralAIService);
|
||||
}
|
||||
|
||||
if ( config?.services?.['groq'] ) {
|
||||
const { GroqAIService } = require('./GroqAIService');
|
||||
services.registerService('groq', GroqAIService);
|
||||
}
|
||||
|
||||
if ( config?.services?.['xai'] ) {
|
||||
const { XAIService } = require('./XAIService');
|
||||
services.registerService('xai', XAIService);
|
||||
}
|
||||
|
||||
if ( config?.services?.['deepseek'] ) {
|
||||
const { DeepSeekService } = require('./DeepSeekService');
|
||||
services.registerService('deepseek', DeepSeekService);
|
||||
}
|
||||
if ( config?.services?.['gemini'] ) {
|
||||
const { GeminiService } = require('./GeminiService/GeminiService.mjs');
|
||||
const { GeminiImageGenerationService } = require('./GeminiImageGenerationService');
|
||||
|
||||
services.registerService('gemini', GeminiService);
|
||||
services.registerService('gemini-image-generation', GeminiImageGenerationService);
|
||||
}
|
||||
if ( config?.services?.['openrouter'] ) {
|
||||
const { OpenRouterService } = require('./OpenRouterService');
|
||||
services.registerService('openrouter', OpenRouterService);
|
||||
}
|
||||
|
||||
// Autodiscover Ollama service and then check if its disabled in the config
|
||||
// if config.services.ollama.enabled is undefined, it means the user hasn't set it, so we should default to true
|
||||
const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(_data => {
|
||||
const ollama_enabled = config?.services?.['ollama']?.enabled;
|
||||
if ( ollama_enabled === undefined ) {
|
||||
return true;
|
||||
}
|
||||
return ollama_enabled;
|
||||
}).catch(_err => {
|
||||
return false;
|
||||
});
|
||||
// User can disable ollama in the config, but by default it should be enabled if discovery is successful
|
||||
if ( ollama_available || config?.services?.['ollama']?.enabled ) {
|
||||
console.log('Local AI support detected! Registering Ollama');
|
||||
const { OllamaService } = require('./OllamaService');
|
||||
services.registerService('ollama', OllamaService);
|
||||
}
|
||||
|
||||
const { AIChatService } = require('./AIChatService');
|
||||
services.registerService('ai-chat', AIChatService);
|
||||
|
||||
const { FakeChatService } = require('./FakeChatService');
|
||||
services.registerService('fake-chat', FakeChatService);
|
||||
|
||||
const { AITestModeService } = require('./AITestModeService');
|
||||
services.registerService('ai-test-mode', AITestModeService);
|
||||
|
||||
const { UsageLimitedChatService } = require('./UsageLimitedChatService');
|
||||
services.registerService('usage-limited-chat', UsageLimitedChatService);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
PuterAIModule,
|
||||
};
|
||||
@@ -1,224 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const { PassThrough } = require('stream');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { TypedValue } = require('../../services/drivers/meta/Runtime');
|
||||
const { nou } = require('../../util/langutil');
|
||||
const { Together } = require('together-ai');
|
||||
const OpenAIUtil = require('./lib/OpenAIUtil');
|
||||
const { Context } = require('../../util/context');
|
||||
|
||||
/**
|
||||
* TogetherAIService class provides integration with Together AI's language models.
|
||||
* Extends BaseService to implement chat completion functionality through the
|
||||
* puter-chat-completion interface. Manages model listings, chat completions,
|
||||
* and streaming responses while handling usage tracking and model fallback testing.
|
||||
* @extends BaseService
|
||||
*/
|
||||
class TogetherAIService extends BaseService {
|
||||
/**
|
||||
* @type {import('../../services/MeteringService/MeteringService').MeteringService}
|
||||
*/
|
||||
meteringService;
|
||||
static MODULES = {
|
||||
kv: globalThis.kv,
|
||||
uuidv4: require('uuid').v4,
|
||||
};
|
||||
|
||||
/**
|
||||
* Initializes the TogetherAI service by setting up the API client and registering as a chat provider
|
||||
* @async
|
||||
* @returns {Promise<void>}
|
||||
* @private
|
||||
*/
|
||||
async _init () {
|
||||
this.together = new Together({
|
||||
apiKey: this.config.apiKey,
|
||||
});
|
||||
this.kvkey = this.modules.uuidv4();
|
||||
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringService = this.services.get('meteringService').meteringService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default model ID for the Together AI service
|
||||
* @returns {string} The ID of the default model (meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo)
|
||||
*/
|
||||
get_default_model () {
|
||||
return 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo';
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['puter-chat-completion']: {
|
||||
/**
|
||||
* Returns a list of available models and their details.
|
||||
* See AIChatService for more information.
|
||||
*
|
||||
* @returns Promise<Array<Object>> Array of model details
|
||||
*/
|
||||
async models () {
|
||||
return await this.models_();
|
||||
},
|
||||
|
||||
/**
|
||||
* Returns a list of available model names including their aliases
|
||||
* @returns {Promise<string[]>} Array of model identifiers and their aliases
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
let models = this.modules.kv.get(`${this.kvkey}:models`);
|
||||
if ( ! models ) models = await this.models_();
|
||||
return models.map(model => model.id);
|
||||
},
|
||||
/**
|
||||
* AI Chat completion method.
|
||||
* See AIChatService for more details.
|
||||
*/
|
||||
async complete ({ messages, stream, model }) {
|
||||
if ( model === 'model-fallback-test-1' ) {
|
||||
throw new Error('Model Fallback Test 1');
|
||||
}
|
||||
|
||||
/** @type {import('together-ai/streaming.mjs').Stream<import("together-ai/resources/chat/completions.mjs").ChatCompletionChunk>} */
|
||||
const completion = await this.together.chat.completions.create({
|
||||
model: model ?? this.get_default_model(),
|
||||
messages: messages,
|
||||
stream,
|
||||
});
|
||||
|
||||
// Metering integration
|
||||
const actor = Context.get('actor');
|
||||
|
||||
const modelDetails = (await this.models_()).find(m => m.id === model || m.aliases?.include(model));
|
||||
const modelId = modelDetails ?? this.get_default_model();
|
||||
|
||||
if ( stream ) {
|
||||
const stream = new PassThrough();
|
||||
const retval = new TypedValue({
|
||||
$: 'stream',
|
||||
content_type: 'application/x-ndjson',
|
||||
chunked: true,
|
||||
}, stream);
|
||||
(async () => {
|
||||
for await ( const chunk of completion ) {
|
||||
// DRY: same as openai
|
||||
if ( chunk.usage ) {
|
||||
// Metering: record usage for streamed chunks
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(chunk.usage);
|
||||
const costOverrides = {
|
||||
prompt_tokens: trackedUsage.prompt_tokens * (modelDetails?.cost?.input ?? 0),
|
||||
completion_tokens: trackedUsage.completion_tokens * (modelDetails?.cost?.output ?? 0),
|
||||
};
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, modelId, costOverrides);
|
||||
}
|
||||
|
||||
if ( chunk.choices.length < 1 ) continue;
|
||||
if ( chunk.choices[0].finish_reason ) {
|
||||
stream.end();
|
||||
break;
|
||||
}
|
||||
if ( nou(chunk.choices[0].delta.content) ) continue;
|
||||
const str = JSON.stringify({
|
||||
text: chunk.choices[0].delta.content,
|
||||
});
|
||||
stream.write(`${str }\n`);
|
||||
}
|
||||
stream.end();
|
||||
})();
|
||||
|
||||
return {
|
||||
stream: true,
|
||||
response: retval,
|
||||
};
|
||||
}
|
||||
|
||||
const ret = completion.choices[0];
|
||||
|
||||
ret.usage = {
|
||||
input_tokens: completion.usage.prompt_tokens,
|
||||
output_tokens: completion.usage.completion_tokens,
|
||||
};
|
||||
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(completion.usage);
|
||||
const costOverrides = {
|
||||
prompt_tokens: trackedUsage.prompt_tokens * (modelDetails?.cost?.input ?? 0),
|
||||
completion_tokens: trackedUsage.completion_tokens * (modelDetails?.cost?.output ?? 0),
|
||||
};
|
||||
// Metering: record usage for non-streamed completion
|
||||
this.meteringService.utilRecordUsageObject(completion.usage, actor, modelId, costOverrides);
|
||||
|
||||
return ret;
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Fetches and caches available AI models from Together API
|
||||
* @private
|
||||
* @returns Array of model objects containing id, name, context length,
|
||||
* description and pricing information
|
||||
* @remarks Models are cached for 5 minutes in KV store
|
||||
*/
|
||||
async models_ () {
|
||||
let models = this.modules.kv.get(`${this.kvkey}:models`);
|
||||
if ( models ) return models;
|
||||
const api_models = await this.together.models.list();
|
||||
models = [];
|
||||
for ( const model of api_models ) {
|
||||
models.push({
|
||||
id: `togetherai:${model.id}`,
|
||||
aliases: [model.id],
|
||||
name: model.display_name,
|
||||
context: model.context_length,
|
||||
description: model.description,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: model.pricing.input,
|
||||
output: model.pricing.output,
|
||||
},
|
||||
});
|
||||
}
|
||||
models.push({
|
||||
id: 'model-fallback-test-1',
|
||||
name: 'Model Fallback Test 1',
|
||||
context: 1000,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 10,
|
||||
},
|
||||
});
|
||||
this.modules.kv.set(`${this.kvkey}:models`, models, { EX: 5 * 60 });
|
||||
return models;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
TogetherAIService,
|
||||
};
|
||||
@@ -1,159 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const { default: dedent } = require('dedent');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { PassThrough } = require('stream');
|
||||
const Streaming = require('./lib/Streaming');
|
||||
|
||||
/**
|
||||
* UsageLimitedChatService - A specialized chat service that returns resource exhaustion messages.
|
||||
* Extends BaseService to provide responses indicating the user has exceeded their usage limits.
|
||||
* Follows the same response format as real AI providers but with a custom message about upgrading.
|
||||
* Can handle both streaming and non-streaming requests consistently.
|
||||
*/
|
||||
class UsageLimitedChatService extends BaseService {
|
||||
get_default_model () {
|
||||
return 'usage-limited';
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['puter-chat-completion']: {
|
||||
/**
|
||||
* Returns a list of available model names
|
||||
* @returns {Promise<string[]>} Array containing the single model identifier
|
||||
*/
|
||||
async list () {
|
||||
return ['usage-limited'];
|
||||
},
|
||||
|
||||
/**
|
||||
* Returns model details for the usage-limited model
|
||||
* @returns {Promise<Object[]>} Array containing the model details
|
||||
*/
|
||||
async models () {
|
||||
return [{
|
||||
id: 'usage-limited',
|
||||
name: 'Usage Limited',
|
||||
context: 16384,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 0,
|
||||
output: 0,
|
||||
},
|
||||
}];
|
||||
},
|
||||
|
||||
/**
|
||||
* Simulates a chat completion request with a usage limit message
|
||||
* @param {Object} params - The completion parameters
|
||||
* @param {Array} params.messages - Array of chat messages (unused)
|
||||
* @param {boolean} params.stream - Whether to stream the response
|
||||
* @param {string} params.model - The model to use (unused)
|
||||
* @returns {Object|TypedValue} A chat completion response or streamed response
|
||||
*/
|
||||
async complete ({ stream, customLimitMessage }) {
|
||||
const limitMessage = customLimitMessage || dedent(`
|
||||
You have reached your AI usage limit for this account.
|
||||
`);
|
||||
|
||||
// If streaming is requested, return a streaming response
|
||||
if ( stream ) {
|
||||
const streamObj = new PassThrough();
|
||||
|
||||
const chatStream = new Streaming.AIChatStream({
|
||||
stream: streamObj,
|
||||
});
|
||||
|
||||
// Schedule the streaming response
|
||||
setTimeout(() => {
|
||||
chatStream.write({
|
||||
type: 'content_block_start',
|
||||
index: 0,
|
||||
});
|
||||
|
||||
chatStream.write({
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: {
|
||||
type: 'text',
|
||||
text: limitMessage,
|
||||
},
|
||||
});
|
||||
|
||||
chatStream.write({
|
||||
type: 'content_block_stop',
|
||||
index: 0,
|
||||
});
|
||||
|
||||
chatStream.write({
|
||||
type: 'message_stop',
|
||||
stop_reason: 'end_turn',
|
||||
});
|
||||
|
||||
chatStream.end();
|
||||
}, 10);
|
||||
|
||||
return {
|
||||
stream: true,
|
||||
init_chat_stream: async ({ chatStream: cs }) => {
|
||||
// Copy contents from our stream to the provided one
|
||||
chatStream.stream.pipe(cs.stream);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Non-streaming response
|
||||
return {
|
||||
'index': 0,
|
||||
message: {
|
||||
'id': '00000000-0000-0000-0000-000000000000',
|
||||
'type': 'message',
|
||||
'role': 'assistant',
|
||||
'model': 'usage-limited',
|
||||
'content': [
|
||||
{
|
||||
'type': 'text',
|
||||
'text': limitMessage,
|
||||
},
|
||||
],
|
||||
'stop_reason': 'end_turn',
|
||||
'stop_sequence': null,
|
||||
'usage': {
|
||||
'input_tokens': 0,
|
||||
'output_tokens': 1,
|
||||
},
|
||||
},
|
||||
'usage': {
|
||||
'input_tokens': 0,
|
||||
'output_tokens': 1,
|
||||
},
|
||||
'logprobs': null,
|
||||
'finish_reason': 'stop',
|
||||
};
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
UsageLimitedChatService,
|
||||
};
|
||||
@@ -1,251 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { Context } = require('../../util/context');
|
||||
const OpenAIUtil = require('./lib/OpenAIUtil');
|
||||
|
||||
/**
|
||||
* XAIService class - Provides integration with X.AI's API for chat completions
|
||||
* Extends BaseService to implement the puter-chat-completion interface.
|
||||
* Handles model management, message adaptation, streaming responses,
|
||||
* and usage tracking for X.AI's language models like Grok.
|
||||
* @extends BaseService
|
||||
*/
|
||||
class XAIService extends BaseService {
|
||||
static MODULES = {
|
||||
openai: require('openai'),
|
||||
};
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
meteringService;
|
||||
|
||||
adapt_model (model) {
|
||||
return model;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the XAI service by setting up the OpenAI client and registering with the AI chat provider
|
||||
* @private
|
||||
* @returns {Promise<void>} Resolves when initialization is complete
|
||||
*/
|
||||
async _init () {
|
||||
this.openai = new this.modules.openai.OpenAI({
|
||||
apiKey: this.global_config.services.xai.apiKey,
|
||||
baseURL: 'https://api.x.ai/v1',
|
||||
});
|
||||
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringService = this.services.get('meteringService').meteringService; // TODO DS: move to proper extensions
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default model identifier for the XAI service
|
||||
* @returns {string} The default model ID 'grok-beta'
|
||||
*/
|
||||
get_default_model () {
|
||||
return 'grok-beta';
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['puter-chat-completion']: {
|
||||
/**
|
||||
* Returns a list of available models and their details.
|
||||
* See AIChatService for more information.
|
||||
*
|
||||
* @returns Array<Object> Array of model details
|
||||
*/
|
||||
models () {
|
||||
return this.models_();
|
||||
},
|
||||
/**
|
||||
* Returns a list of available model names including their aliases
|
||||
* @returns {Promise<string[]>} Array of model identifiers and their aliases
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
const models = await this.models_();
|
||||
const model_names = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
model_names.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return model_names;
|
||||
},
|
||||
|
||||
/**
|
||||
* AI Chat completion method.
|
||||
* See AIChatService for more details.
|
||||
*/
|
||||
async complete ({ messages, stream, model, tools }) {
|
||||
model = this.adapt_model(model);
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
|
||||
const completion = await this.openai.chat.completions.create({
|
||||
messages,
|
||||
model: model ?? this.get_default_model(),
|
||||
...(tools ? { tools } : {}),
|
||||
max_tokens: 1000,
|
||||
stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
});
|
||||
|
||||
// Metering integration
|
||||
const actor = Context.get('actor');
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const modelDetails = this.models().find(m => m.id === model || m.aliases?.includes(model));
|
||||
const trackedUsage = {
|
||||
prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
completion_tokens: usage.completion_tokens ?? 0,
|
||||
cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, `openai:${modelDetails.id}`);
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves available AI models and their specifications
|
||||
* @returns Array of model objects containing:
|
||||
* - id: Model identifier string
|
||||
* - name: Human readable model name
|
||||
* - context: Maximum context window size
|
||||
* - cost: Pricing information object with currency and rates
|
||||
* @private
|
||||
*/
|
||||
models_ () {
|
||||
return [
|
||||
{
|
||||
id: 'grok-beta',
|
||||
name: 'Grok Beta',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 500,
|
||||
output: 1500,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-vision-beta',
|
||||
name: 'Grok Vision Beta',
|
||||
context: 8192,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 500,
|
||||
output: 1500,
|
||||
image: 1000,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-3',
|
||||
name: 'Grok 3',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 300,
|
||||
output: 1500,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-3-fast',
|
||||
name: 'Grok 3 Fast',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 500,
|
||||
output: 2500,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-3-mini',
|
||||
name: 'Grok 3 Mini',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 30,
|
||||
output: 50,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-3-mini-fast',
|
||||
name: 'Grok 3 Mini',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 60,
|
||||
output: 400,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-2-vision',
|
||||
name: 'Grok 2 Vision',
|
||||
context: 8192,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 200,
|
||||
output: 1000,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'grok-2',
|
||||
name: 'Grok 2',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 200,
|
||||
output: 1000,
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
XAIService,
|
||||
};
|
||||
@@ -1,58 +0,0 @@
|
||||
const { nou } = require('../../../util/langutil');
|
||||
const Streaming = require('../lib/Streaming');
|
||||
// const claude_sample = require('../samples/claude-1');
|
||||
const claude_sample = require('../samples/claude-tools-1');
|
||||
|
||||
const echo_stream = {
|
||||
write: data => {
|
||||
console.log(data);
|
||||
},
|
||||
};
|
||||
|
||||
const chatStream = new Streaming.AIChatStream({ stream: echo_stream });
|
||||
|
||||
let message;
|
||||
let contentBlock;
|
||||
for ( const event of claude_sample ) {
|
||||
if ( event.type === 'message_start' ) {
|
||||
message = chatStream.message();
|
||||
continue;
|
||||
}
|
||||
if ( event.type === 'message_stop' ) {
|
||||
message.end();
|
||||
message = null;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( event.type === 'content_block_start' ) {
|
||||
if ( event.content_block.type === 'tool_use' ) {
|
||||
contentBlock = message.contentBlock({
|
||||
type: event.content_block.type,
|
||||
id: event.content_block.id,
|
||||
name: event.content_block.name,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
contentBlock = message.contentBlock({
|
||||
type: event.content_block.type,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( event.type === 'content_block_stop' ) {
|
||||
contentBlock.end();
|
||||
contentBlock = null;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( event.type === 'content_block_delta' ) {
|
||||
if ( event.delta.type === 'input_json_delta' ) {
|
||||
contentBlock.addPartialJSON(event.delta.partial_json);
|
||||
continue;
|
||||
}
|
||||
if ( event.delta.type === 'text_delta' ) {
|
||||
contentBlock.addText(event.delta.text);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,61 +0,0 @@
|
||||
const { nou } = require('../../../util/langutil');
|
||||
const FunctionCalling = require('../lib/FunctionCalling');
|
||||
const Streaming = require('../lib/Streaming');
|
||||
const openai_fish = require('../samples/openai-tools-1');
|
||||
|
||||
const echo_stream = {
|
||||
write: data => {
|
||||
console.log(data);
|
||||
},
|
||||
};
|
||||
|
||||
const chatStream = new Streaming.AIChatStream({
|
||||
stream: echo_stream,
|
||||
});
|
||||
|
||||
const message = chatStream.message();
|
||||
let textblock = message.contentBlock({ type: 'text' });
|
||||
let toolblock = null;
|
||||
let mode = 'text';
|
||||
|
||||
const tool_call_blocks = [];
|
||||
|
||||
for ( const chunk of openai_fish ) {
|
||||
if ( chunk.usage ) continue;
|
||||
if ( chunk.choices.length < 1 ) continue;
|
||||
|
||||
const choice = chunk.choices[0];
|
||||
|
||||
if ( ! nou(choice.delta.content) ) {
|
||||
if ( mode === 'tool' ) {
|
||||
toolblock.end();
|
||||
mode = 'text';
|
||||
textblock = message.contentBlock({ type: 'text' });
|
||||
}
|
||||
textblock.addText(choice.delta.content);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( ! nou(choice.delta.tool_calls) ) {
|
||||
if ( mode === 'text' ) {
|
||||
mode = 'tool';
|
||||
textblock.end();
|
||||
}
|
||||
for ( const tool_call of choice.delta.tool_calls ) {
|
||||
if ( ! tool_call_blocks[tool_call.index] ) {
|
||||
toolblock = message.contentBlock({
|
||||
type: 'tool_use',
|
||||
id: tool_call.function.name,
|
||||
});
|
||||
tool_call_blocks[tool_call.index] = toolblock;
|
||||
} else {
|
||||
toolblock = tool_call_blocks[tool_call.index];
|
||||
}
|
||||
toolblock.addPartialJSON(tool_call.function.arguments);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( mode === 'text' ) textblock.end();
|
||||
if ( mode === 'tool' ) toolblock.end();
|
||||
message.end();
|
||||
@@ -1,122 +0,0 @@
|
||||
module.exports = class FunctionCalling {
|
||||
/**
|
||||
* Normalizes the 'tools' object in-place.
|
||||
*
|
||||
* This function will accept an array of tools provided by the
|
||||
* user, and produce a normalized object that can then be
|
||||
* converted to the apprpriate representation for another
|
||||
* service.
|
||||
*
|
||||
* We will accept conventions from either service that a user
|
||||
* might expect to work, prioritizing the OpenAI convention
|
||||
* when conflicting conventions are present.
|
||||
*
|
||||
* @param {*} tools
|
||||
*/
|
||||
static normalize_tools_object (tools) {
|
||||
for ( let i = 0 ; i < tools.length ; i++ ) {
|
||||
const tool = tools[i];
|
||||
let normalized_tool = {};
|
||||
|
||||
const normalize_function = fn => {
|
||||
const normal_fn = {};
|
||||
let parameters =
|
||||
fn.parameters ||
|
||||
fn.input_schema;
|
||||
|
||||
normal_fn.parameters = parameters ?? {
|
||||
type: 'object',
|
||||
};
|
||||
|
||||
if ( parameters.properties ) {
|
||||
parameters = this.normalize_json_schema(parameters);
|
||||
}
|
||||
|
||||
if ( fn.name ) {
|
||||
normal_fn.name = fn.name;
|
||||
}
|
||||
|
||||
if ( fn.description ) {
|
||||
normal_fn.description = fn.description;
|
||||
}
|
||||
|
||||
return normal_fn;
|
||||
};
|
||||
|
||||
if ( tool.input_schema ) {
|
||||
normalized_tool = {
|
||||
type: 'function',
|
||||
function: normalize_function(tool),
|
||||
};
|
||||
} else if ( tool.type === 'function' ) {
|
||||
normalized_tool = {
|
||||
type: 'function',
|
||||
function: normalize_function(tool.function),
|
||||
};
|
||||
} else {
|
||||
normalized_tool = {
|
||||
type: 'function',
|
||||
function: normalize_function(tool),
|
||||
};
|
||||
}
|
||||
|
||||
tools[i] = normalized_tool;
|
||||
}
|
||||
return tools;
|
||||
}
|
||||
|
||||
static normalize_json_schema (schema) {
|
||||
if ( ! schema ) return schema;
|
||||
|
||||
if ( schema.type === 'object' ) {
|
||||
if ( ! schema.properties ) {
|
||||
return schema;
|
||||
}
|
||||
|
||||
const keys = Object.keys(schema.properties);
|
||||
for ( const key of keys ) {
|
||||
schema.properties[key] = this.normalize_json_schema(schema.properties[key]);
|
||||
}
|
||||
}
|
||||
|
||||
if ( schema.type === 'array' ) {
|
||||
if ( ! schema.items ) {
|
||||
schema.items = {};
|
||||
} else {
|
||||
schema.items = this.normalize_json_schema(schema.items);
|
||||
}
|
||||
}
|
||||
|
||||
return schema;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function will convert a normalized tools object to the
|
||||
* format expected by OpenAI.
|
||||
*
|
||||
* @param {*} tools
|
||||
* @returns
|
||||
*/
|
||||
static make_openai_tools (tools) {
|
||||
return tools;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function will convert a normalized tools object to the
|
||||
* format expected by Claude.
|
||||
*
|
||||
* @param {*} tools
|
||||
* @returns
|
||||
*/
|
||||
static make_claude_tools (tools) {
|
||||
if ( ! tools ) return undefined;
|
||||
return tools.map(tool => {
|
||||
const { name, description, parameters } = tool.function;
|
||||
return {
|
||||
name,
|
||||
description,
|
||||
input_schema: parameters,
|
||||
};
|
||||
});
|
||||
}
|
||||
};
|
||||
@@ -1,186 +0,0 @@
|
||||
const { whatis } = require('../../../util/langutil');
|
||||
|
||||
module.exports = class Messages {
|
||||
/**
|
||||
* Normalizes a single message into a standardized format with role and content array.
|
||||
* Converts string messages to objects, ensures content is an array of content blocks,
|
||||
* transforms tool_calls into tool_use content blocks, and coerces content items into objects.
|
||||
*
|
||||
* @param {string|Object} message - The message to normalize, either a string or message object
|
||||
* @param {Object} params - Optional parameters including default role
|
||||
* @returns {Object} Normalized message with role and content array
|
||||
* @throws {Error} If message is not a string or object
|
||||
* @throws {Error} If message has no content property and no tool_calls
|
||||
* @throws {Error} If any content item is not a string or object
|
||||
*/
|
||||
static normalize_single_message (message, params = {}) {
|
||||
params = Object.assign({
|
||||
role: 'user',
|
||||
}, params);
|
||||
|
||||
if ( typeof message === 'string' ) {
|
||||
message = {
|
||||
content: [message],
|
||||
};
|
||||
}
|
||||
if ( whatis(message) !== 'object' ) {
|
||||
throw new Error('each message must be a string or object');
|
||||
}
|
||||
if ( ! message.role ) {
|
||||
message.role = params.role;
|
||||
}
|
||||
if ( ! message.content ) {
|
||||
if ( message.tool_calls ) {
|
||||
message.content = [];
|
||||
for ( let i = 0 ; i < message.tool_calls.length ; i++ ) {
|
||||
const tool_call = message.tool_calls[i];
|
||||
message.content.push({
|
||||
type: 'tool_use',
|
||||
id: tool_call.id,
|
||||
name: tool_call.function.name,
|
||||
input: tool_call.function.arguments,
|
||||
});
|
||||
}
|
||||
delete message.tool_calls;
|
||||
} else {
|
||||
throw new Error('each message must have a \'content\' property');
|
||||
}
|
||||
}
|
||||
if ( whatis(message.content) !== 'array' ) {
|
||||
message.content = [message.content];
|
||||
}
|
||||
// Coerce each content block into an object
|
||||
for ( let i = 0 ; i < message.content.length ; i++ ) {
|
||||
if ( whatis(message.content[i]) === 'string' ) {
|
||||
message.content[i] = {
|
||||
type: 'text',
|
||||
text: message.content[i],
|
||||
};
|
||||
}
|
||||
if ( whatis(message.content[i]) !== 'object' ) {
|
||||
throw new Error('each message content item must be a string or object');
|
||||
}
|
||||
if ( typeof message.content[i].text === 'string' && !message.content[i].type ) {
|
||||
message.content[i].type = 'text';
|
||||
}
|
||||
}
|
||||
|
||||
// Remove "text" properties from content blocks with type=tool_result
|
||||
for ( let i = 0 ; i < message.content.length ; i++ ) {
|
||||
if ( message.content[i].type !== 'tool_use' ) {
|
||||
continue;
|
||||
}
|
||||
if ( message.content[i].hasOwnProperty('text') ) {
|
||||
delete message.content[i].text;
|
||||
}
|
||||
}
|
||||
|
||||
return message;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes an array of messages by applying normalize_single_message to each,
|
||||
* then splits messages with multiple content blocks into separate messages,
|
||||
* and finally merges consecutive messages from the same role.
|
||||
*
|
||||
* @param {Array} messages - Array of messages to normalize
|
||||
* @param {Object} params - Optional parameters passed to normalize_single_message
|
||||
* @returns {Array} Normalized and merged array of messages
|
||||
*/
|
||||
static normalize_messages (messages, params = {}) {
|
||||
for ( let i = 0 ; i < messages.length ; i++ ) {
|
||||
messages[i] = this.normalize_single_message(messages[i], params);
|
||||
}
|
||||
|
||||
// Split messages with tool_use content into separate messages
|
||||
// TODO: unit test this
|
||||
messages = [...messages];
|
||||
for ( let i = 0 ; i < messages.length ; i++ ) {
|
||||
let message = messages[i];
|
||||
let separated_messages = [];
|
||||
for ( let j = 0 ; j < message.content.length ; j++ ) {
|
||||
if ( message.content[j].type === 'tool_result' ) {
|
||||
separated_messages.push({
|
||||
...message,
|
||||
content: [message.content[j]],
|
||||
});
|
||||
} else {
|
||||
separated_messages.push({
|
||||
...message,
|
||||
content: [message.content[j]],
|
||||
});
|
||||
}
|
||||
}
|
||||
messages.splice(i, 1, ...separated_messages);
|
||||
}
|
||||
|
||||
// If multiple messages are from the same role, merge them
|
||||
let merged_messages = [];
|
||||
let current_role = null;
|
||||
for ( let i = 0 ; i < messages.length ; i++ ) {
|
||||
if ( current_role === messages[i].role ) {
|
||||
merged_messages[merged_messages.length - 1].content.push(...messages[i].content);
|
||||
} else {
|
||||
merged_messages.push(messages[i]);
|
||||
current_role = messages[i].role;
|
||||
}
|
||||
}
|
||||
|
||||
return merged_messages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Separates system messages from other messages in the array.
|
||||
*
|
||||
* @param {Array} messages - Array of messages to process
|
||||
* @returns {Array} Tuple containing [system_messages, non_system_messages]
|
||||
*/
|
||||
static extract_and_remove_system_messages (messages) {
|
||||
let system_messages = [];
|
||||
let new_messages = [];
|
||||
for ( let i = 0 ; i < messages.length ; i++ ) {
|
||||
if ( messages[i].role === 'system' ) {
|
||||
system_messages.push(messages[i]);
|
||||
} else {
|
||||
new_messages.push(messages[i]);
|
||||
}
|
||||
}
|
||||
return [system_messages, new_messages];
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all text content from messages, handling various message formats.
|
||||
* Processes strings, objects with content arrays, and nested content structures,
|
||||
* joining all text with spaces.
|
||||
*
|
||||
* @param {Array} messages - Array of messages to extract text from
|
||||
* @returns {string} Concatenated text content from all messages
|
||||
* @throws {Error} If text content is not a string
|
||||
*/
|
||||
static extract_text (messages) {
|
||||
return messages.map(m => {
|
||||
if ( whatis(m) === 'string' ) {
|
||||
return m;
|
||||
}
|
||||
if ( whatis(m) !== 'object' ) {
|
||||
return '';
|
||||
}
|
||||
if ( whatis(m.content) === 'array' ) {
|
||||
return m.content.map(c => c.text).join(' ');
|
||||
}
|
||||
if ( whatis(m.content) === 'string' ) {
|
||||
return m.content;
|
||||
} else {
|
||||
const is_text_type = m.content.type === 'text' ||
|
||||
!m.content.hasOwnProperty('type');
|
||||
if ( is_text_type ) {
|
||||
if ( whatis(m.content.text) !== 'string' ) {
|
||||
throw new Error('text content must be a string');
|
||||
}
|
||||
return m.content.text;
|
||||
}
|
||||
return '';
|
||||
}
|
||||
}).join(' ');
|
||||
}
|
||||
};
|
||||
@@ -1,65 +0,0 @@
|
||||
module.exports = [
|
||||
{
|
||||
type: 'message_start',
|
||||
message: {
|
||||
id: 'msg_01KKQeaUDpMzNovH9utP5qJc',
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
model: 'claude-3-5-sonnet-20241022',
|
||||
content: [],
|
||||
stop_reason: null,
|
||||
stop_sequence: null,
|
||||
usage: {
|
||||
input_tokens: 82,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
output_tokens: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'content_block_start',
|
||||
index: 0,
|
||||
content_block: { type: 'text', text: '' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: { type: 'text_delta', text: 'Some' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: { type: 'text_delta', text: ' species of fish, like the electric' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: {
|
||||
type: 'text_delta',
|
||||
text: ' eel, can generate powerful electrical',
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: { type: 'text_delta', text: ' charges of up to 860 ' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: { type: 'text_delta', text: 'volts to stun prey an' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: { type: 'text_delta', text: 'd defend themselves.' },
|
||||
},
|
||||
{ type: 'content_block_stop', index: 0 },
|
||||
{
|
||||
type: 'message_delta',
|
||||
delta: { stop_reason: 'end_turn', stop_sequence: null },
|
||||
usage: { output_tokens: 35 },
|
||||
},
|
||||
{ type: 'message_stop' },
|
||||
];
|
||||
@@ -1,76 +0,0 @@
|
||||
module.exports = [
|
||||
{
|
||||
type: 'message_start',
|
||||
message: {
|
||||
id: 'msg_01GAy4THpFyFJcpxqWXBMrvx',
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
model: 'claude-3-5-sonnet-20241022',
|
||||
content: [],
|
||||
stop_reason: null,
|
||||
stop_sequence: null,
|
||||
usage: {
|
||||
input_tokens: 458,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
output_tokens: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'content_block_start',
|
||||
index: 0,
|
||||
content_block: { type: 'text', text: '' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: { type: 'text_delta', text: 'I' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: {
|
||||
type: 'text_delta',
|
||||
text: "'ll check the weather in Vancouver for you.",
|
||||
},
|
||||
},
|
||||
{ type: 'content_block_stop', index: 0 },
|
||||
{
|
||||
type: 'content_block_start',
|
||||
index: 1,
|
||||
content_block: {
|
||||
type: 'tool_use',
|
||||
id: 'toolu_01E12jeyCenTtntPBk1j7rgc',
|
||||
name: 'get_weather',
|
||||
input: {},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 1,
|
||||
delta: { type: 'input_json_delta', partial_json: '' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 1,
|
||||
delta: { type: 'input_json_delta', partial_json: '{"location"' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 1,
|
||||
delta: { type: 'input_json_delta', partial_json: ': "Van' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 1,
|
||||
delta: { type: 'input_json_delta', partial_json: 'couver"}' },
|
||||
},
|
||||
{ type: 'content_block_stop', index: 1 },
|
||||
{
|
||||
type: 'message_delta',
|
||||
delta: { stop_reason: 'tool_use', stop_sequence: null },
|
||||
usage: { output_tokens: 64 },
|
||||
},
|
||||
{ type: 'message_stop' },
|
||||
];
|
||||
@@ -1,46 +0,0 @@
|
||||
module.exports = [
|
||||
{
|
||||
id: 'chatcmpl-AvspmQTvFBBjKsFhHYhyiphFmKMY8',
|
||||
object: 'chat.completion.chunk',
|
||||
created: 1738358842,
|
||||
model: 'gpt-4o-mini-2024-07-18',
|
||||
service_tier: 'default',
|
||||
system_fingerprint: 'fp_bd83329f63',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
refusal: null,
|
||||
},
|
||||
logprobs: null,
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
usage: null,
|
||||
},
|
||||
...[
|
||||
'Fish', ' are', ' diverse', ' aquatic', ' creatures', ' that', ' play',
|
||||
' a', ' crucial', ' role', ' in', ' marine', ' ecosystems', ' and',
|
||||
' human', ' diets', '.',
|
||||
].map(str => ({
|
||||
id: 'chatcmpl-AvspmQTvFBBjKsFhHYhyiphFmKMY8',
|
||||
object: 'chat.completion.chunk',
|
||||
created: 1738358842,
|
||||
model: 'gpt-4o-mini-2024-07-18',
|
||||
service_tier: 'default',
|
||||
system_fingerprint: 'fp_bd83329f63',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
content: str,
|
||||
},
|
||||
logprobs: null,
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
usage: null,
|
||||
})),
|
||||
];
|
||||
@@ -1,102 +0,0 @@
|
||||
module.exports = [
|
||||
{
|
||||
id: 'chatcmpl-Avqr6AwmQoEFLXuwf1llkKknIR4Ry',
|
||||
object: 'chat.completion.chunk',
|
||||
created: 1738351236,
|
||||
model: 'gpt-4o-mini-2024-07-18',
|
||||
service_tier: 'default',
|
||||
system_fingerprint: 'fp_72ed7ab54c',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
role: 'assistant',
|
||||
content: null,
|
||||
tool_calls: [
|
||||
{
|
||||
index: 0,
|
||||
id: 'call_ULl8cRKFQbYeJSIZ3giLAg6r',
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'get_weather',
|
||||
arguments: '',
|
||||
},
|
||||
},
|
||||
],
|
||||
refusal: null,
|
||||
},
|
||||
logprobs: null,
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
usage: null,
|
||||
},
|
||||
...[
|
||||
'{"', 'location', '":"',
|
||||
'V', 'ancouver',
|
||||
'"}',
|
||||
].map(str => ({
|
||||
id: 'chatcmpl-Avqr6AwmQoEFLXuwf1llkKknIR4Ry',
|
||||
object: 'chat.completion.chunk',
|
||||
created: 1738351236,
|
||||
model: 'gpt-4o-mini-2024-07-18',
|
||||
service_tier: 'default',
|
||||
system_fingerprint: 'fp_72ed7ab54c',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
tool_calls: [
|
||||
{
|
||||
index: 0,
|
||||
function: {
|
||||
arguments: str,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
logprobs: null,
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
usage: null,
|
||||
})),
|
||||
{
|
||||
id: 'chatcmpl-Avqr6AwmQoEFLXuwf1llkKknIR4Ry',
|
||||
object: 'chat.completion.chunk',
|
||||
created: 1738351236,
|
||||
model: 'gpt-4o-mini-2024-07-18',
|
||||
service_tier: 'default',
|
||||
system_fingerprint: 'fp_72ed7ab54c',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {},
|
||||
logprobs: null,
|
||||
finish_reason: 'tool_calls',
|
||||
},
|
||||
],
|
||||
usage: null,
|
||||
},
|
||||
{
|
||||
id: 'chatcmpl-Avqr6AwmQoEFLXuwf1llkKknIR4Ry',
|
||||
object: 'chat.completion.chunk',
|
||||
created: 1738351236,
|
||||
model: 'gpt-4o-mini-2024-07-18',
|
||||
service_tier: 'default',
|
||||
system_fingerprint: 'fp_72ed7ab54c',
|
||||
choices: [],
|
||||
usage: {
|
||||
prompt_tokens: 62,
|
||||
completion_tokens: 16,
|
||||
total_tokens: 78,
|
||||
prompt_tokens_details: { cached_tokens: 0, audio_tokens: 0 },
|
||||
completion_tokens_details: {
|
||||
reasoning_tokens: 0,
|
||||
audio_tokens: 0,
|
||||
accepted_prediction_tokens: 0,
|
||||
rejected_prediction_tokens: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
@@ -1,19 +1,30 @@
|
||||
const { AnomalyService } = require('../../services/AnomalyService');
|
||||
const { GroupService } = require('../../services/auth/GroupService');
|
||||
const { PermissionService } = require('../../services/auth/PermissionService');
|
||||
const { CommandService } = require('../../services/CommandService');
|
||||
const { SqliteDatabaseAccessService } = require('../../services/database/SqliteDatabaseAccessService');
|
||||
const { DetailProviderService } = require('../../services/DetailProviderService');
|
||||
const { EventService } = require('../../services/EventService');
|
||||
const { GetUserService } = require('../../services/GetUserService');
|
||||
const { MeteringServiceWrapper } = require('../../services/MeteringService/MeteringServiceWrapper.mjs');
|
||||
const { DBKVServiceWrapper } = require('../../services/repositories/DBKVStore/index.mjs');
|
||||
const { SUService } = require('../../services/SUService');
|
||||
const { TraceService } = require('../../services/TraceService');
|
||||
const { AlarmService } = require('../core/AlarmService');
|
||||
const APIErrorService = require('../web/APIErrorService');
|
||||
import { FilesystemService } from '../../filesystem/FilesystemService';
|
||||
import { AnomalyService } from '../../services/AnomalyService';
|
||||
import { AuthService } from '../../services/auth/AuthService';
|
||||
import { GroupService } from '../../services/auth/GroupService';
|
||||
import { PermissionService } from '../../services/auth/PermissionService';
|
||||
import { TokenService } from '../../services/auth/TokenService';
|
||||
import { CommandService } from '../../services/CommandService';
|
||||
import { SqliteDatabaseAccessService } from '../../services/database/SqliteDatabaseAccessService';
|
||||
import { DetailProviderService } from '../../services/DetailProviderService';
|
||||
import { EventService } from '../../services/EventService';
|
||||
import { FeatureFlagService } from '../../services/FeatureFlagService';
|
||||
import { GetUserService } from '../../services/GetUserService';
|
||||
import { InformationService } from '../../services/information/InformationService';
|
||||
import { MeteringServiceWrapper } from '../../services/MeteringService/MeteringServiceWrapper.mjs';
|
||||
import { NotificationService } from '../../services/NotificationService';
|
||||
import { RegistrantService } from '../../services/RegistrantService';
|
||||
import { RegistryService } from '../../services/RegistryService';
|
||||
import { DBKVServiceWrapper } from '../../services/repositories/DBKVStore/index.mjs';
|
||||
import { ScriptService } from '../../services/ScriptService';
|
||||
import { SessionService } from '../../services/SessionService';
|
||||
import { SUService } from '../../services/SUService';
|
||||
import { SystemValidationService } from '../../services/SystemValidationService';
|
||||
import { TraceService } from '../../services/TraceService';
|
||||
import { AlarmService } from '../core/AlarmService';
|
||||
import APIErrorService from '../web/APIErrorService';
|
||||
|
||||
class TestCoreModule {
|
||||
export class TestCoreModule {
|
||||
async install (context) {
|
||||
const services = context.get('services');
|
||||
services.registerService('whoami', DetailProviderService);
|
||||
@@ -30,9 +41,16 @@ class TestCoreModule {
|
||||
services.registerService('group', GroupService);
|
||||
services.registerService('anomaly', AnomalyService);
|
||||
services.registerService('api-error', APIErrorService);
|
||||
services.registerService('system-validation', SystemValidationService);
|
||||
services.registerService('registry', RegistryService);
|
||||
services.registerService('__registrant', RegistrantService);
|
||||
services.registerService('feature-flag', FeatureFlagService);
|
||||
services.registerService('token', TokenService);
|
||||
services.registerService('information', InformationService);
|
||||
services.registerService('auth', AuthService);
|
||||
services.registerService('session', SessionService);
|
||||
services.registerService('notification', NotificationService);
|
||||
services.registerService('script', ScriptService);
|
||||
services.registerService('filesystem', FilesystemService);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
TestCoreModule,
|
||||
};
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
*/
|
||||
const { AdvancedBase } = require('@heyputer/putility');
|
||||
const { WeakConstructorFeature } = require('../../traits/WeakConstructorFeature');
|
||||
const { Context } = require('../../util/context');
|
||||
|
||||
/**
|
||||
* BaseES is a base class for Entity Store classes.
|
||||
@@ -82,11 +81,6 @@ class BaseES extends AdvancedBase {
|
||||
|
||||
this[k] = this.impl_methods[k];
|
||||
}
|
||||
|
||||
this.log = Context.get('services').get('log-service')
|
||||
.create(`ES:${this.entity_name}:${this.constructor.name}`, {
|
||||
concern: 'es',
|
||||
});
|
||||
}
|
||||
|
||||
async provide_context ( args ) {
|
||||
@@ -97,9 +91,6 @@ class BaseES extends AdvancedBase {
|
||||
if ( this._on_context_provided ) {
|
||||
await this._on_context_provided(args);
|
||||
}
|
||||
|
||||
this.log = Context.get('services').get('log-service')
|
||||
.create(`ES:${this.entity_name}:${this.constructor.name}`);
|
||||
}
|
||||
async read (uid) {
|
||||
let entity = await this.call_on_impl_('read', uid);
|
||||
|
||||
@@ -27,59 +27,57 @@ const { BaseES } = require('./BaseES');
|
||||
const PERM_READ_ALL_SUBDOMAINS = 'read-all-subdomains';
|
||||
|
||||
class SubdomainES extends BaseES {
|
||||
static METHODS = {
|
||||
async _on_context_provided () {
|
||||
const services = this.context.get('services');
|
||||
this.db = services.get('database').get(DB_READ, 'subdomains');
|
||||
},
|
||||
async create_predicate (id) {
|
||||
if ( id === 'user-can-edit' ) {
|
||||
return new Eq({
|
||||
key: 'owner',
|
||||
value: Context.get('user').id,
|
||||
});
|
||||
}
|
||||
},
|
||||
async upsert (entity, extra) {
|
||||
if ( ! extra.old_entity ) {
|
||||
await this._check_max_subdomains();
|
||||
}
|
||||
async _on_context_provided () {
|
||||
const services = this.context.get('services');
|
||||
this.db = services.get('database').get(DB_READ, 'subdomains');
|
||||
}
|
||||
async create_predicate (id) {
|
||||
if ( id === 'user-can-edit' ) {
|
||||
return new Eq({
|
||||
key: 'owner',
|
||||
value: Context.get('user').id,
|
||||
});
|
||||
}
|
||||
}
|
||||
async upsert (entity, extra) {
|
||||
if ( ! extra.old_entity ) {
|
||||
await this._check_max_subdomains();
|
||||
}
|
||||
|
||||
return await this.upstream.upsert(entity, extra);
|
||||
},
|
||||
async select (options) {
|
||||
const actor = Context.get('actor');
|
||||
const user = actor.type.user;
|
||||
return await this.upstream.upsert(entity, extra);
|
||||
}
|
||||
async select (options) {
|
||||
const actor = Context.get('actor');
|
||||
const user = actor.type.user;
|
||||
|
||||
// Note: we don't need to worry about read;
|
||||
// non-owner users don't have permission to list
|
||||
// but they still have permission to read.
|
||||
const svc_permission = this.context.get('services').get('permission');
|
||||
const has_permission_to_read_all = await svc_permission.check(Context.get('actor'), PERM_READ_ALL_SUBDOMAINS);
|
||||
// Note: we don't need to worry about read;
|
||||
// non-owner users don't have permission to list
|
||||
// but they still have permission to read.
|
||||
const svc_permission = this.context.get('services').get('permission');
|
||||
const has_permission_to_read_all = await svc_permission.check(Context.get('actor'), PERM_READ_ALL_SUBDOMAINS);
|
||||
|
||||
if ( ! has_permission_to_read_all ) {
|
||||
options.predicate = options.predicate.and(new Eq({
|
||||
key: 'owner',
|
||||
value: user.id,
|
||||
}));
|
||||
}
|
||||
if ( ! has_permission_to_read_all ) {
|
||||
options.predicate = options.predicate.and(new Eq({
|
||||
key: 'owner',
|
||||
value: user.id,
|
||||
}));
|
||||
}
|
||||
|
||||
return await this.upstream.select(options);
|
||||
},
|
||||
async _check_max_subdomains () {
|
||||
const user = Context.get('user');
|
||||
return await this.upstream.select(options);
|
||||
}
|
||||
async _check_max_subdomains () {
|
||||
const user = Context.get('user');
|
||||
|
||||
let cnt = await this.db.read('SELECT COUNT(id) AS subdomain_count FROM subdomains WHERE user_id = ?',
|
||||
[user.id]);
|
||||
let cnt = await this.db.read('SELECT COUNT(id) AS subdomain_count FROM subdomains WHERE user_id = ?',
|
||||
[user.id]);
|
||||
|
||||
const max_subdomains = user.max_subdomains ?? config.max_subdomains_per_user;
|
||||
const max_subdomains = user.max_subdomains ?? config.max_subdomains_per_user;
|
||||
|
||||
if ( max_subdomains && cnt[0].subdomain_count >= max_subdomains ) {
|
||||
throw APIError.create('subdomain_limit_reached', null, {
|
||||
limit: max_subdomains,
|
||||
});
|
||||
}
|
||||
},
|
||||
if ( max_subdomains && cnt[0].subdomain_count >= max_subdomains ) {
|
||||
throw APIError.create('subdomain_limit_reached', null, {
|
||||
limit: max_subdomains,
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
const kvjs = require('@heyputer/kv.js');
|
||||
import { kv } from '../util/kvSingleton';
|
||||
const uuid = require('uuid');
|
||||
const proxyquire = require('proxyquire');
|
||||
|
||||
@@ -156,7 +156,7 @@ const get_mock_context = () => {
|
||||
};
|
||||
|
||||
describe('GET /launch-apps', () => {
|
||||
globalThis.kv = new kvjs();
|
||||
globalThis.kv = kv;
|
||||
|
||||
it('should return expected format', async () => {
|
||||
// First call
|
||||
@@ -165,52 +165,6 @@ describe('GET /launch-apps', () => {
|
||||
req_mock.query = {};
|
||||
await get_launch_apps(req_mock, res_mock);
|
||||
|
||||
// TODO: bring this back, figure out what it's testing,
|
||||
// document why it needs to be here (if it does)
|
||||
// or remove it.
|
||||
if ( false ) {
|
||||
|
||||
expect(res_mock.send).toHaveBeenCalledOnce();
|
||||
|
||||
const call = res_mock.send.mock.calls[0];
|
||||
const response = call[0];
|
||||
console.log('response', response);
|
||||
|
||||
expect(response).toBeTypeOf('object');
|
||||
|
||||
expect(response).toHaveProperty('recommended');
|
||||
expect(response.recommended).toBeInstanceOf(Array);
|
||||
expect(response.recommended).toHaveLength(apps_names_expected_to_exist.length);
|
||||
expect(response.recommended).toEqual(
|
||||
data_mockapps
|
||||
.filter(app => apps_names_expected_to_exist.includes(app.name))
|
||||
.map(app => ({
|
||||
uuid: app.uid,
|
||||
name: app.name,
|
||||
title: app.title,
|
||||
icon: app.icon,
|
||||
godmode: app.godmode,
|
||||
maximize_on_start: app.maximize_on_start,
|
||||
index_url: app.index_url,
|
||||
})));
|
||||
|
||||
expect(response).toHaveProperty('recent');
|
||||
expect(response.recent).toBeInstanceOf(Array);
|
||||
expect(response.recent).toHaveLength(data_appopens.length);
|
||||
expect(response.recent).toEqual(
|
||||
data_mockapps
|
||||
.filter(app => data_appopens.map(app_open => app_open.app_uid).includes(app.uid))
|
||||
.map(app => ({
|
||||
uuid: app.uid,
|
||||
name: app.name,
|
||||
title: app.title,
|
||||
icon: app.icon,
|
||||
godmode: app.godmode,
|
||||
maximize_on_start: app.maximize_on_start,
|
||||
index_url: app.index_url,
|
||||
})));
|
||||
}
|
||||
|
||||
// << HOW TO FIX >>
|
||||
// If you updated the list of recommended apps,
|
||||
// you can simply update this number to match the new length
|
||||
|
||||
6
src/backend/src/services/BaseService.d.ts
vendored
6
src/backend/src/services/BaseService.d.ts
vendored
@@ -8,11 +8,11 @@ export interface ServiceResources {
|
||||
|
||||
export type EventHandler = (id: string, ...args: any[]) => any;
|
||||
|
||||
export type Logger = {
|
||||
export interface Logger {
|
||||
debug: (...args: any[]) => any;
|
||||
info: (...args: any[]) => any;
|
||||
[key: string]: any;
|
||||
};
|
||||
}
|
||||
|
||||
export class BaseService {
|
||||
constructor (service_resources: ServiceResources, ...a: any[]);
|
||||
@@ -26,7 +26,7 @@ export class BaseService {
|
||||
log: Logger;
|
||||
errors: any;
|
||||
|
||||
as(interfaceName: string): Record<string, unknown>;
|
||||
as (interfaceName: string): Record<string, unknown>;
|
||||
|
||||
run_as_early_as_possible (): Promise<void>;
|
||||
construct (): Promise<void>;
|
||||
|
||||
@@ -75,7 +75,7 @@ class ChatAPIService extends BaseService {
|
||||
const models = await svc_su.sudo(async () => {
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
// Return the simple model list which contains basic model information
|
||||
return svc_aiChat.simple_model_list;
|
||||
return svc_aiChat.list();
|
||||
});
|
||||
|
||||
// Return the list of models
|
||||
@@ -98,7 +98,7 @@ class ChatAPIService extends BaseService {
|
||||
const models = await svc_su.sudo(async () => {
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
// Return the detailed model list which includes cost and capability information
|
||||
return svc_aiChat.detail_model_list;
|
||||
return svc_aiChat.models();
|
||||
});
|
||||
|
||||
// Return the detailed list of models
|
||||
|
||||
@@ -51,8 +51,8 @@ describe('ChatAPIService', () => {
|
||||
beforeEach(() => {
|
||||
// Mock AIChatService
|
||||
mockAIChatService = {
|
||||
simple_model_list: ['model1', 'model2'],
|
||||
detail_model_list: [
|
||||
list: () => ['model1', 'model2'],
|
||||
models: () => [
|
||||
{ id: 'model1', name: 'Model 1', cost: { input: 1, output: 2 } },
|
||||
{ id: 'model2', name: 'Model 2', cost: { input: 3, output: 4 } },
|
||||
],
|
||||
@@ -159,7 +159,7 @@ describe('ChatAPIService', () => {
|
||||
// Verify
|
||||
expect(mockSUService.sudo).toHaveBeenCalled();
|
||||
expect(mockRes.json).toHaveBeenCalledWith({
|
||||
models: mockAIChatService.simple_model_list,
|
||||
models: mockAIChatService.list(),
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -179,7 +179,7 @@ describe('ChatAPIService', () => {
|
||||
// Verify
|
||||
expect(mockSUService.sudo).toHaveBeenCalled();
|
||||
expect(mockRes.json).toHaveBeenCalledWith({
|
||||
models: mockAIChatService.detail_model_list,
|
||||
models: mockAIChatService.models(),
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -514,7 +514,7 @@ export class MeteringService {
|
||||
const currentMonth = this.#getMonthYearString();
|
||||
const keyPrefix = `${METRICS_PREFIX}:puter:`;
|
||||
return this.#superUserService.sudo(async () => {
|
||||
const keys = [];
|
||||
const keys: string[] = [];
|
||||
for ( let shard = 0; shard < MeteringService.GLOBAL_SHARD_COUNT; shard++ ) {
|
||||
keys.push(`${keyPrefix}${shard}:${currentMonth}`);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { toMicroCents } from '../utils';
|
||||
import { toMicroCents } from '../utils.js';
|
||||
|
||||
export const FILE_SYSTEM_COST_MAP = {
|
||||
'filesystem:ingress:bytes': 0,
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
import { AWS_POLLY_COST_MAP } from './awsPollyCostMap';
|
||||
import { AWS_TEXTRACT_COST_MAP } from './awsTextractCostMap';
|
||||
import { CLAUDE_COST_MAP } from './claudeCostMap';
|
||||
import { DEEPSEEK_COST_MAP } from './deepSeekCostMap';
|
||||
import { FILE_SYSTEM_COST_MAP } from './fileSystemCostMap';
|
||||
import { GEMINI_COST_MAP } from './geminiCostMap';
|
||||
import { GROQ_COST_MAP } from './groqCostMap';
|
||||
import { KV_COST_MAP } from './kvCostMap';
|
||||
import { MISTRAL_COST_MAP } from './mistralCostMap';
|
||||
import { OPENAI_COST_MAP } from './openAiCostMap';
|
||||
import { OPENAI_IMAGE_COST_MAP } from './openaiImageCostMap';
|
||||
import { OPENROUTER_COST_MAP } from './openrouterCostMap';
|
||||
import { OPENAI_VIDEO_COST_MAP } from './openaiVideoCostMap';
|
||||
import { TOGETHER_COST_MAP } from './togetherCostMap';
|
||||
import { XAI_COST_MAP } from './xaiCostMap';
|
||||
import { ELEVENLABS_COST_MAP } from './elevenlabsCostMap';
|
||||
import { AWS_POLLY_COST_MAP } from './awsPollyCostMap.js';
|
||||
import { AWS_TEXTRACT_COST_MAP } from './awsTextractCostMap.js';
|
||||
import { CLAUDE_COST_MAP } from './claudeCostMap.js';
|
||||
import { DEEPSEEK_COST_MAP } from './deepSeekCostMap.js';
|
||||
import { FILE_SYSTEM_COST_MAP } from './fileSystemCostMap.js';
|
||||
import { GEMINI_COST_MAP } from './geminiCostMap.js';
|
||||
import { GROQ_COST_MAP } from './groqCostMap.js';
|
||||
import { KV_COST_MAP } from './kvCostMap.js';
|
||||
import { MISTRAL_COST_MAP } from './mistralCostMap.js';
|
||||
import { OPENAI_COST_MAP } from './openAiCostMap.js';
|
||||
import { OPENAI_IMAGE_COST_MAP } from './openaiImageCostMap.js';
|
||||
import { OPENROUTER_COST_MAP } from './openrouterCostMap.js';
|
||||
import { OPENAI_VIDEO_COST_MAP } from './openaiVideoCostMap.js';
|
||||
import { TOGETHER_COST_MAP } from './togetherCostMap.js';
|
||||
import { XAI_COST_MAP } from './xaiCostMap.js';
|
||||
import { ELEVENLABS_COST_MAP } from './elevenlabsCostMap.js';
|
||||
|
||||
export const COST_MAPS = {
|
||||
...AWS_POLLY_COST_MAP,
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// All costs are in microcents (1/1,000,000th of a cent). Example: 1,000,000 microcents = $0.01 USD.//
|
||||
// Naming pattern: "openai:{model}:{size}" or "openai:{model}:hd:{size}" for HD images
|
||||
|
||||
import { toMicroCents } from '../utils';
|
||||
import { toMicroCents } from '../utils.js';
|
||||
|
||||
export const OPENAI_IMAGE_COST_MAP = {
|
||||
// DALL-E 3
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { toMicroCents } from '../utils';
|
||||
import { toMicroCents } from '../utils.js';
|
||||
|
||||
// Prices are per generated video-second.
|
||||
export const OPENAI_VIDEO_COST_MAP = {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { REGISTERED_USER_FREE } from './registeredUserFreePolicy';
|
||||
import { TEMP_USER_FREE } from './tempUserFreePolicy';
|
||||
import { REGISTERED_USER_FREE } from './registeredUserFreePolicy.js';
|
||||
import { TEMP_USER_FREE } from './tempUserFreePolicy.js';
|
||||
|
||||
export const SUB_POLICIES = [
|
||||
TEMP_USER_FREE,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { toMicroCents } from '../utils';
|
||||
import { toMicroCents } from '../utils.js';
|
||||
|
||||
export const REGISTERED_USER_FREE = {
|
||||
id: 'user_free',
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { toMicroCents } from '../utils';
|
||||
import { toMicroCents } from '../utils.js';
|
||||
|
||||
export const TEMP_USER_FREE = {
|
||||
id: 'temp_free',
|
||||
|
||||
@@ -19,6 +19,8 @@
|
||||
*/
|
||||
const { AdvancedBase } = require('@heyputer/putility');
|
||||
const BaseService = require('./BaseService');
|
||||
const { kv } = require('../util/kvSingleton');
|
||||
const uuidv4 = require('uuid').v4;
|
||||
|
||||
/**
|
||||
* @class MapCollection
|
||||
@@ -29,10 +31,6 @@ const BaseService = require('./BaseService');
|
||||
* This class provides methods for basic CRUD operations (create, read, update, delete) on the key-value pairs, as well as methods for checking the existence of a key and retrieving all keys in the collection.
|
||||
*/
|
||||
class MapCollection extends AdvancedBase {
|
||||
static MODULES = {
|
||||
kv: globalThis.kv,
|
||||
uuidv4: require('uuid').v4,
|
||||
};
|
||||
/**
|
||||
* @method MapCollection#_mk_key
|
||||
* @description Creates a unique key for the map collection.
|
||||
@@ -43,7 +41,7 @@ class MapCollection extends AdvancedBase {
|
||||
super();
|
||||
// We use kvjs instead of a plain object because it doesn't
|
||||
// have a limit on the number of keys it can store.
|
||||
this.map_id = this.modules.uuidv4();
|
||||
this.map_id = uuidv4();
|
||||
this.kv = kv;
|
||||
}
|
||||
|
||||
|
||||
@@ -5,21 +5,21 @@ import { RegistryService } from './RegistryService';
|
||||
describe('RegistryService', async () => {
|
||||
// Initialize globalThis.kv for testing
|
||||
beforeAll(() => {
|
||||
if (!globalThis.kv) {
|
||||
if ( ! globalThis.kv ) {
|
||||
globalThis.kv = new Map();
|
||||
globalThis.kv.set = function(key, value) {
|
||||
globalThis.kv.set = function (key, value) {
|
||||
return Map.prototype.set.call(this, key, value);
|
||||
};
|
||||
globalThis.kv.get = function(key) {
|
||||
globalThis.kv.get = function (key) {
|
||||
return Map.prototype.get.call(this, key);
|
||||
};
|
||||
globalThis.kv.exists = function(key) {
|
||||
globalThis.kv.exists = function (key) {
|
||||
return this.has(key);
|
||||
};
|
||||
globalThis.kv.del = function(key) {
|
||||
globalThis.kv.del = function (key) {
|
||||
return this.delete(key);
|
||||
};
|
||||
globalThis.kv.keys = function(pattern) {
|
||||
globalThis.kv.keys = function (pattern) {
|
||||
const prefix = pattern.replace('*', '');
|
||||
return Array.from(this.keys()).filter(k => k.startsWith(prefix));
|
||||
};
|
||||
@@ -72,27 +72,27 @@ describe('RegistryService', async () => {
|
||||
it('should allow checking existence in collection', () => {
|
||||
const collection = registryService.register_collection('exists-collection');
|
||||
collection.set('existing-key', 'value');
|
||||
expect(collection.exists('existing-key')).toBe(true);
|
||||
expect(collection.exists('non-existing-key')).toBe(false);
|
||||
expect(collection.exists('existing-key')).toBeTruthy();
|
||||
expect(collection.exists('non-existing-key')).toBeFalsy();
|
||||
});
|
||||
|
||||
it('should allow deleting from collection', () => {
|
||||
it('should allow deleting from collection', async () => {
|
||||
const collection = registryService.register_collection('delete-collection');
|
||||
collection.set('delete-key', 'value');
|
||||
expect(collection.exists('delete-key')).toBe(true);
|
||||
const res = collection.exists('delete-key');
|
||||
expect(collection.exists('delete-key')).toBeTruthy();
|
||||
collection.del('delete-key');
|
||||
expect(collection.exists('delete-key')).toBe(false);
|
||||
expect(collection.exists('delete-key')).toBeFalsy();
|
||||
});
|
||||
|
||||
it('should support multiple independent collections', () => {
|
||||
const collection1 = registryService.register_collection('coll1');
|
||||
const collection2 = registryService.register_collection('coll2');
|
||||
|
||||
|
||||
collection1.set('key', 'value1');
|
||||
collection2.set('key', 'value2');
|
||||
|
||||
|
||||
expect(collection1.get('key')).toBe('value1');
|
||||
expect(collection2.get('key')).toBe('value2');
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
8
src/backend/src/services/SUService.d.ts
vendored
8
src/backend/src/services/SUService.d.ts
vendored
@@ -1,8 +0,0 @@
|
||||
import type { Actor } from './auth/Actor';
|
||||
|
||||
export class SUService {
|
||||
_construct (): void;
|
||||
get_system_actor (): Promise<Actor>;
|
||||
sudo<T>(callback: () => Promise<T>): Promise<T>;
|
||||
sudo<T>(actorOrCallback: Actor, callback: () => Promise<T>): Promise<T>;
|
||||
}
|
||||
@@ -18,10 +18,10 @@
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"openai-completion","model":"gpt-4o-mini"}}
|
||||
const { Context } = require('../util/context');
|
||||
const { TeePromise } = require('@heyputer/putility').libs.promise;
|
||||
const { Actor, UserActorType } = require('./auth/Actor');
|
||||
const BaseService = require('./BaseService');
|
||||
import { TeePromise } from '@heyputer/putility/src/libs/promise.js';
|
||||
import { Context } from '../util/context.js';
|
||||
import { Actor, UserActorType } from './auth/Actor.js';
|
||||
import BaseService from './BaseService.js';
|
||||
|
||||
/**
|
||||
* "SUS"-Service (Super-User Service)
|
||||
@@ -33,7 +33,7 @@ const BaseService = require('./BaseService');
|
||||
* instances, providing methods to retrieve the system actor
|
||||
* and perform actions with elevated privileges.
|
||||
*/
|
||||
class SUService extends BaseService {
|
||||
export class SUService extends BaseService {
|
||||
/**
|
||||
* Initializes the SUService instance, creating promises for system user
|
||||
* and system actor. This method does not take any parameters and does
|
||||
@@ -110,8 +110,4 @@ class SUService extends BaseService {
|
||||
user: actor.type.user,
|
||||
}).arun(callback);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
SUService,
|
||||
};
|
||||
}
|
||||
2
src/backend/src/services/User.d.ts
vendored
2
src/backend/src/services/User.d.ts
vendored
@@ -4,7 +4,7 @@ export interface IUser {
|
||||
id: number,
|
||||
uuid: string,
|
||||
username: string,
|
||||
email: string,
|
||||
email?: string,
|
||||
subscription?: (typeof SUB_POLICIES)[number]['id'],
|
||||
metadata?: Record<string, unknown> & { hasDevAccountAccess?: boolean }
|
||||
}
|
||||
@@ -18,7 +18,7 @@
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const BaseService = require('../BaseService');
|
||||
|
||||
/**
|
||||
* Service class that manages AI interface registrations and configurations.
|
||||
2
src/backend/src/services/ai/chat/.gitignore
vendored
Normal file
2
src/backend/src/services/ai/chat/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
*.js
|
||||
*.js.map
|
||||
652
src/backend/src/services/ai/chat/AIChatService.ts
Normal file
652
src/backend/src/services/ai/chat/AIChatService.ts
Normal file
@@ -0,0 +1,652 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
import { createId as cuid2 } from '@paralleldrive/cuid2';
|
||||
import { PassThrough } from 'stream';
|
||||
import { APIError } from '../../../api/APIError.js';
|
||||
import { ErrorService } from '../../../modules/core/ErrorService.js';
|
||||
import { Context } from '../../../util/context.js';
|
||||
import { kv } from '../../../util/kvSingleton.js';
|
||||
import BaseService from '../../BaseService.js';
|
||||
import { BaseDatabaseAccessService } from '../../database/BaseDatabaseAccessService.js';
|
||||
import { DB_WRITE } from '../../database/consts.js';
|
||||
import { DriverService } from '../../drivers/DriverService.js';
|
||||
import { TypedValue } from '../../drivers/meta/Runtime.js';
|
||||
import { EventService } from '../../EventService.js';
|
||||
import { MeteringService } from '../../MeteringService/MeteringService.js';
|
||||
import { AsModeration } from '../moderation/AsModeration.js';
|
||||
import { normalize_tools_object } from '../utils/FunctionCalling.js';
|
||||
import { extract_text, normalize_messages, normalize_single_message } from '../utils/Messages.js';
|
||||
import Streaming from '../utils/Streaming.js';
|
||||
import { ClaudeProvider } from './providers/ClaudeProvider/ClaudeProvider.js';
|
||||
import { FakeChatProvider } from './providers/FakeChatProvider.js';
|
||||
import { GeminiChatProvider } from './providers/GeminiProvider/GeminiChatProvider.js';
|
||||
import { GroqAIProvider } from './providers/GroqAiProvider/GroqAIProvider.js';
|
||||
import { MistralAIProvider } from './providers/MistralAiProvider/MistralAiProvider.js';
|
||||
import { OpenAiChatProvider } from './providers/OpenAiProvider/OpenAiChatProvider.js';
|
||||
import { IChatModel, IChatProvider, ICompleteArguments } from './providers/types.js';
|
||||
import { UsageLimitedChatProvider } from './providers/UsageLimitedChatProvider.js';
|
||||
import { OllamaChatProvider } from './providers/OllamaProvider.js';
|
||||
import { DeepSeekProvider } from './providers/DeepSeekProvider/DeepSeekProvider.js';
|
||||
import { XAIProvider } from './providers/XAIProvider/XAIProvider.js';
|
||||
import { TogetherAIProvider } from './providers/TogetherAiProvider/TogetherAIProvider.js';
|
||||
import { OpenRouterProvider } from './providers/OpenRouterProvider/OpenRouterProvider.js';
|
||||
|
||||
// Maximum number of fallback attempts when a model fails, including the first attempt
|
||||
const MAX_FALLBACKS = 3 + 1; // includes first attempt
|
||||
|
||||
export class AIChatService extends BaseService {
|
||||
|
||||
static SERVICE_NAME = 'ai-chat';
|
||||
|
||||
static DEFAULT_PROVIDER = 'openai-completion';
|
||||
|
||||
get meteringService (): MeteringService {
|
||||
return this.services.get('meteringService').meteringService;
|
||||
}
|
||||
|
||||
get db (): BaseDatabaseAccessService {
|
||||
return this.services.get('database').get(DB_WRITE, 'ai-service');
|
||||
}
|
||||
|
||||
get errorService (): ErrorService {
|
||||
return this.services.get('error-service');
|
||||
}
|
||||
|
||||
get eventService (): EventService {
|
||||
return this.services.get('event');
|
||||
}
|
||||
|
||||
get driverService (): DriverService {
|
||||
return this.services.get('driver');
|
||||
}
|
||||
|
||||
getProvider (name: string): IChatProvider | undefined {
|
||||
return this.#providers[name];
|
||||
}
|
||||
|
||||
#providers: Record<string, IChatProvider> = {};
|
||||
#modelIdMap: Record<string, IChatModel[]> = {};
|
||||
|
||||
/** Driver interfaces */
|
||||
static IMPLEMENTS = {
|
||||
['driver-capabilities']: {
|
||||
supports_test_mode (iface: string, method_name: string) {
|
||||
return iface === 'puter-chat-completion' &&
|
||||
method_name === 'complete';
|
||||
},
|
||||
},
|
||||
['puter-chat-completion']: {
|
||||
|
||||
async models () {
|
||||
return await (this as unknown as AIChatService).models();
|
||||
},
|
||||
|
||||
async list () {
|
||||
return await (this as unknown as AIChatService).list();
|
||||
},
|
||||
|
||||
async complete (...parameters: Parameters<AIChatService['complete']>) {
|
||||
return await (this as unknown as AIChatService).complete(...parameters);
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
getModel ({ modelId, provider}: { modelId: string, provider?: string }) {
|
||||
const models = this.#modelIdMap[modelId];
|
||||
|
||||
if ( ! models ) {
|
||||
throw new Error(`Model not found, please try one of the following models: ${ Object.keys(this.#modelIdMap).join(', ')}`);
|
||||
}
|
||||
if ( ! provider ) {
|
||||
return models[0];
|
||||
}
|
||||
const model = models.find(m => m.provider === provider);
|
||||
return model ?? models[0];
|
||||
}
|
||||
|
||||
private async registerProviders () {
|
||||
const claudeConfig = this.config.providers?.['claude'] || this.global_config?.services?.['claude'];
|
||||
if ( claudeConfig && claudeConfig.apiKey ) {
|
||||
this.#providers['claude'] = new ClaudeProvider(this.meteringService, claudeConfig, this.errorService);
|
||||
}
|
||||
const openAiConfig = this.config.providers?.['openai-completion'] || this.global_config?.services?.['openai-completion'] || this.global_config?.openai;
|
||||
if ( openAiConfig && (openAiConfig.apiKey || openAiConfig.secret_key) ) {
|
||||
this.#providers['openai-completion'] = new OpenAiChatProvider(this.meteringService, openAiConfig);
|
||||
}
|
||||
const geminiConfig = this.config.providers?.['gemini'] || this.global_config?.services?.['gemini'];
|
||||
if ( geminiConfig && geminiConfig.apiKey ) {
|
||||
this.#providers['gemini'] = new GeminiChatProvider(this.meteringService, geminiConfig);
|
||||
}
|
||||
const groqConfig = this.config.providers?.['groq'] || this.global_config?.services?.['groq'];
|
||||
if ( groqConfig && groqConfig.apiKey ) {
|
||||
this.#providers['groq'] = new GroqAIProvider(groqConfig, this.meteringService);
|
||||
}
|
||||
const deepSeekConfig = this.config.providers?.['deepseek'] || this.global_config?.services?.['deepseek'];
|
||||
if ( deepSeekConfig && deepSeekConfig.apiKey ) {
|
||||
this.#providers['deepseek'] = new DeepSeekProvider(deepSeekConfig, this.meteringService);
|
||||
}
|
||||
const mistralConfig = this.config.providers?.['mistral'] || this.global_config?.services?.['mistral'];
|
||||
if ( mistralConfig && mistralConfig.apiKey ) {
|
||||
this.#providers['mistral'] = new MistralAIProvider(mistralConfig, this.meteringService);
|
||||
}
|
||||
const xaiConfig = this.config.providers?.['xai'] || this.global_config?.services?.['xai'];
|
||||
if ( xaiConfig && xaiConfig.apiKey ) {
|
||||
this.#providers['xai'] = new XAIProvider(xaiConfig, this.meteringService);
|
||||
}
|
||||
const togetherConfig = this.config.providers?.['together-ai'] || this.global_config?.services?.['together-ai'];
|
||||
if ( togetherConfig && togetherConfig.apiKey ) {
|
||||
this.#providers['together-ai'] = new TogetherAIProvider(togetherConfig, this.meteringService);
|
||||
}
|
||||
const openrouterConfig = this.config.providers?.['openrouter'] || this.global_config?.services?.['openrouter'];
|
||||
if ( openrouterConfig && openrouterConfig.apiKey ) {
|
||||
this.#providers['openrouter'] = new OpenRouterProvider(openrouterConfig, this.meteringService);
|
||||
}
|
||||
|
||||
// ollama if local instance detected
|
||||
|
||||
// Autodiscover Ollama service and then check if its disabled in the config
|
||||
// if config.services.ollama.enabled is undefined, it means the user hasn't set it, so we should default to true
|
||||
const ollamaConfig = this.config.providers?.['ollama'] || this.global_config?.services?.ollama;
|
||||
const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(_data => {
|
||||
if ( ollamaConfig?.enabled === undefined ) {
|
||||
return true;
|
||||
}
|
||||
return ollamaConfig?.enabled;
|
||||
}).catch(_err => {
|
||||
return false;
|
||||
});
|
||||
// User can disable ollama in the config, but by default it should be enabled if discovery is successful
|
||||
if ( ollama_available || ollamaConfig?.enabled ) {
|
||||
console.log('Local AI support detected! Registering Ollama');
|
||||
this.#providers['ollama'] = new OllamaChatProvider(ollamaConfig, this.meteringService);
|
||||
}
|
||||
|
||||
// fake and usage-limited providers last
|
||||
this.#providers['fake-chat'] = new FakeChatProvider();
|
||||
this.#providers['usage-limited-chat'] = new UsageLimitedChatProvider();
|
||||
|
||||
// emit event for extensions to add providers
|
||||
const extensionProviders = {} as Record<string, IChatProvider>;
|
||||
await this.eventService.emit('ai.chat.registerProviders', extensionProviders);
|
||||
for ( const providerName in extensionProviders ) {
|
||||
if ( this.#providers[providerName] ) {
|
||||
console.warn('AIChatService: provider name conflict for ', providerName, ' registering with -extension suffix');
|
||||
this.#providers[`${providerName}-extension`] = extensionProviders[providerName];
|
||||
continue;
|
||||
}
|
||||
this.#providers[providerName] = extensionProviders[providerName];
|
||||
}
|
||||
}
|
||||
|
||||
protected async '__on_boot.consolidation' () {
|
||||
// register chat providers here
|
||||
await this.registerProviders();
|
||||
|
||||
// build model id map
|
||||
for ( const providerName in this.#providers ) {
|
||||
const provider = this.#providers[providerName];
|
||||
|
||||
// alias all driver requests to go here to support legacy routing
|
||||
this.driverService.register_service_alias(AIChatService.SERVICE_NAME,
|
||||
providerName,
|
||||
{ iface: 'puter-chat-completion' });
|
||||
|
||||
// build model id map
|
||||
for ( const model of await provider.models() ) {
|
||||
model.id = model.id.trim().toLowerCase();
|
||||
if ( ! this.#modelIdMap[model.id] ) {
|
||||
this.#modelIdMap[model.id] = [];
|
||||
}
|
||||
this.#modelIdMap[model.id].push({ ...model, provider: providerName });
|
||||
if ( model.aliases ) {
|
||||
for ( let alias of model.aliases ) {
|
||||
alias = alias.trim().toLowerCase();
|
||||
// join arrays which are aliased the same
|
||||
if ( ! this.#modelIdMap[alias] ) {
|
||||
this.#modelIdMap[alias] = this.#modelIdMap[model.id];
|
||||
continue;
|
||||
}
|
||||
if ( this.#modelIdMap[alias] !== this.#modelIdMap[model.id] ) {
|
||||
this.#modelIdMap[alias].push({ ...model, provider: providerName });
|
||||
this.#modelIdMap[model.id] = this.#modelIdMap[alias];
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
this.#modelIdMap[model.id].sort((a, b) => {
|
||||
if ( a.costs[a.input_cost_key || 'input_tokens'] === b.costs[b.input_cost_key || 'input_tokens'] ) {
|
||||
return a.id.length - b.id.length; // use shorter id since its likely the official one
|
||||
}
|
||||
return a.costs[a.input_cost_key || 'input_tokens'] - b.costs[b.input_cost_key || 'input_tokens'];
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
models () {
|
||||
const seen = new Set<string>();
|
||||
return Object.entries(this.#modelIdMap)
|
||||
.map(([_, models]) => models)
|
||||
.flat()
|
||||
.filter(model => {
|
||||
if ( seen.has(model.id) ) {
|
||||
return false;
|
||||
}
|
||||
seen.add(model.id);
|
||||
return true;
|
||||
})
|
||||
.sort((a, b) => {
|
||||
if ( a.provider === b.provider ) {
|
||||
return a.id.localeCompare(b.id);
|
||||
}
|
||||
return a.provider!.localeCompare(b.provider!);
|
||||
});
|
||||
}
|
||||
|
||||
list () {
|
||||
return this.models().map(m => m.id).sort();
|
||||
}
|
||||
|
||||
async complete (parameters: ICompleteArguments) {
|
||||
const clientDriverCall = Context.get('client_driver_call');
|
||||
let { test_mode: testMode, response_metadata: resMetadata, intended_service: legacyProviderName } = clientDriverCall as { test_mode?: boolean; response_metadata: Record<string, unknown>; intended_service?: string };
|
||||
const actor = Context.get('actor');
|
||||
|
||||
let intendedProvider = parameters.provider || legacyProviderName === AIChatService.SERVICE_NAME ? '' : legacyProviderName ; // should now all go through here
|
||||
|
||||
if ( !parameters.model && !intendedProvider ) {
|
||||
intendedProvider = AIChatService.DEFAULT_PROVIDER;
|
||||
}
|
||||
if ( !parameters.model && intendedProvider ) {
|
||||
parameters.model = this.#providers[intendedProvider].getDefaultModel();
|
||||
}
|
||||
let model = this.getModel({ modelId: parameters.model, provider: intendedProvider }) || this.getFallbackModel(parameters.model, [], []);
|
||||
const abuseModel = this.getModel({ modelId: 'abuse' });
|
||||
const usageLimitedModel = this.getModel({ modelId: 'usage-limited' });
|
||||
|
||||
const completionId = cuid2();
|
||||
const event = {
|
||||
actor,
|
||||
completionId,
|
||||
allow: true,
|
||||
intended_service: intendedProvider || '',
|
||||
parameters,
|
||||
} as Record<string, unknown>;
|
||||
await this.eventService.emit('ai.prompt.validate', event);
|
||||
if ( ! event.allow ) {
|
||||
testMode = true;
|
||||
if ( event.custom ) parameters.custom = event.custom;
|
||||
}
|
||||
|
||||
if ( parameters.messages ) {
|
||||
parameters.messages =
|
||||
normalize_messages(parameters.messages);
|
||||
}
|
||||
|
||||
// Skip moderation for Ollama (local service) and other local services
|
||||
const should_moderate = !testMode &&
|
||||
parameters.provider !== 'ollama';
|
||||
|
||||
if ( should_moderate && !await this.moderate(parameters) ) {
|
||||
testMode = true;
|
||||
throw APIError.create('moderation_failed');
|
||||
}
|
||||
|
||||
// Only set moderated flag if we actually ran moderation
|
||||
if ( !testMode && should_moderate ) {
|
||||
Context.set('moderated', true);
|
||||
}
|
||||
|
||||
if ( testMode ) {
|
||||
if ( event.abuse ) {
|
||||
model = abuseModel;
|
||||
}
|
||||
}
|
||||
|
||||
if ( parameters.tools ) {
|
||||
normalize_tools_object(parameters.tools);
|
||||
}
|
||||
|
||||
if ( ! model ) {
|
||||
// TODO DS: route them to new endpoints once ready
|
||||
const availableModelsUrl = `${this.global_config.origin }/puterai/chat/models`;
|
||||
|
||||
throw APIError.create('field_invalid', undefined, {
|
||||
key: 'model',
|
||||
expected: `a valid model name from ${availableModelsUrl}`,
|
||||
got: model,
|
||||
});
|
||||
}
|
||||
|
||||
const inputTokenCost = model.costs[model.input_cost_key || 'input_tokens'] as number;
|
||||
const outputTokenCost = model.costs[model.output_cost_key || 'output_tokens'] as number;
|
||||
const maxTokens = model.max_tokens;
|
||||
const text = extract_text(parameters.messages);
|
||||
const approximateTokenCount = Math.floor(((text.length / 4) + (text.split(/\s+/).length * (4 / 3))) / 2); // see https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
|
||||
const approximateInputCost = approximateTokenCount * inputTokenCost;
|
||||
const usageAllowed = await this.meteringService.hasEnoughCredits(actor, approximateInputCost);
|
||||
|
||||
// Handle usage limits reached case
|
||||
if ( ! usageAllowed ) {
|
||||
model = usageLimitedModel;
|
||||
}
|
||||
|
||||
const availableCredits = await this.meteringService.getRemainingUsage(actor);
|
||||
const maxAllowedOutput =
|
||||
availableCredits - approximateInputCost;
|
||||
|
||||
const maxAllowedOutputTokens =
|
||||
maxAllowedOutput / outputTokenCost;
|
||||
|
||||
if ( maxAllowedOutputTokens ) {
|
||||
parameters.max_tokens = Math.floor(Math.min(parameters.max_tokens ?? Number.POSITIVE_INFINITY,
|
||||
maxAllowedOutputTokens,
|
||||
maxTokens - approximateTokenCount));
|
||||
if ( parameters.max_tokens < 1 ) {
|
||||
parameters.max_tokens = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
// call model provider;
|
||||
let res: Awaited<ReturnType<IChatProvider['complete']>>;
|
||||
const provider = this.#providers[model.provider!];
|
||||
if ( ! provider ) {
|
||||
throw new Error(`no provider found for model ${model.id}`);
|
||||
}
|
||||
try {
|
||||
res = await provider.complete({
|
||||
...parameters,
|
||||
model: model.id,
|
||||
provider: model.provider,
|
||||
});
|
||||
} catch (e) {
|
||||
const tried: string[] = [];
|
||||
const triedProviders: string[] = [];
|
||||
|
||||
tried.push(model.id);
|
||||
triedProviders.push(model.provider!);
|
||||
|
||||
let error = e as Error;
|
||||
|
||||
while ( error ) {
|
||||
|
||||
// TODO: simplify our error handling
|
||||
// Distinguishing between user errors and service errors
|
||||
// is very messy because of different conventions between
|
||||
// services. This is a best-effort attempt to catch user
|
||||
// errors and throw them as 400s.
|
||||
const isRequestError = (() => {
|
||||
if ( error instanceof APIError ) {
|
||||
return true;
|
||||
}
|
||||
if ( (error as unknown as { type: string }).type === 'invalid_request_error' ) {
|
||||
return true;
|
||||
}
|
||||
})();
|
||||
|
||||
if ( isRequestError ) {
|
||||
console.error((error as Error));
|
||||
throw APIError.create('error_400_from_delegate', error as Error, {
|
||||
delegate: model.provider,
|
||||
message: (error as Error).message,
|
||||
});
|
||||
}
|
||||
|
||||
if ( this.config.disable_fallback_mechanisms ) {
|
||||
console.error((error as Error));
|
||||
throw error;
|
||||
}
|
||||
|
||||
console.error('error calling ai chat provider for model: ', model, '\n trying fallbacks...');
|
||||
|
||||
// No fallbacks for pseudo-models
|
||||
if ( model.provider === 'fake-chat' ) {
|
||||
break;
|
||||
}
|
||||
|
||||
const fallback = this.getFallbackModel(model.id, tried, triedProviders);
|
||||
|
||||
if ( ! fallback ) {
|
||||
throw new Error('no fallback model available');
|
||||
}
|
||||
|
||||
const {
|
||||
fallbackModelId,
|
||||
fallbackProvider,
|
||||
} = fallback;
|
||||
|
||||
console.warn('model fallback', {
|
||||
fallbackModelId,
|
||||
fallbackProvider,
|
||||
});
|
||||
|
||||
let fallBackModel = this.getModel({ modelId: fallbackModelId, provider: fallbackProvider });
|
||||
|
||||
const fallbackUsageAllowed = await this.meteringService.hasEnoughCredits(actor, 1); // we checked earlier, assume same costs
|
||||
|
||||
if ( ! fallbackUsageAllowed ) {
|
||||
fallBackModel = usageLimitedModel;
|
||||
}
|
||||
|
||||
const provider = this.#providers[fallBackModel.provider!];
|
||||
if ( ! provider ) {
|
||||
throw new Error(`no provider found for model ${fallBackModel.id}`);
|
||||
}
|
||||
try {
|
||||
res = await provider.complete({
|
||||
...parameters,
|
||||
model: fallBackModel.id,
|
||||
provider: fallBackModel.provider,
|
||||
});
|
||||
model = fallBackModel;
|
||||
break; // success
|
||||
} catch (e) {
|
||||
console.error('error during fallback selection: ', e);
|
||||
error = e as Error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resMetadata.service_used = model.provider; // legacy field
|
||||
resMetadata.providerUsed = model.id;
|
||||
|
||||
// Add flag if we're using the usage-limited service
|
||||
if ( model.provider === 'usage-limited-chat' ) {
|
||||
resMetadata.usage_limited = true;
|
||||
}
|
||||
|
||||
const username = actor.type?.user?.username;
|
||||
|
||||
if ( ! res! ) {
|
||||
throw new Error('No response from AI chat provider');
|
||||
}
|
||||
|
||||
res.via_ai_chat_service = true; // legacy field always true now
|
||||
if ( res.stream ) {
|
||||
if ( res.init_chat_stream ) {
|
||||
const stream = new PassThrough();
|
||||
// TODO DS: simplify how we handle streaming responses and remove custom runtime types
|
||||
const retval = new TypedValue({
|
||||
$: 'stream',
|
||||
content_type: 'application/x-ndjson',
|
||||
chunked: true,
|
||||
}, stream);
|
||||
|
||||
const chatStream = new Streaming.AIChatStream({
|
||||
stream,
|
||||
});
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
await res.init_chat_stream({ chatStream });
|
||||
} catch (e) {
|
||||
this.errors.report('error during stream response', {
|
||||
source: e,
|
||||
});
|
||||
stream.write(`${JSON.stringify({
|
||||
type: 'error',
|
||||
message: (e as Error).message,
|
||||
}) }\n`);
|
||||
stream.end();
|
||||
} finally {
|
||||
if ( res.finally_fn ) {
|
||||
await res.finally_fn();
|
||||
}
|
||||
}
|
||||
})();
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
await this.eventService.emit('ai.prompt.complete', {
|
||||
username,
|
||||
intended_service: intendedProvider,
|
||||
parameters,
|
||||
result: res,
|
||||
model_used: model.id,
|
||||
service_used: model.provider,
|
||||
});
|
||||
|
||||
if ( parameters.response?.normalize ) {
|
||||
res = {
|
||||
...res,
|
||||
message: normalize_single_message(res.message),
|
||||
normalized: true,
|
||||
};
|
||||
}
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
async moderate ({ messages }: { messages: Array<unknown>; }) {
|
||||
if ( process.env.TEST_MODERATION_FAILURE ) return false;
|
||||
const fulltext = extract_text(messages);
|
||||
let mod_last_error;
|
||||
let mod_result: Awaited<ReturnType<IChatProvider['checkModeration']>>;
|
||||
try {
|
||||
const openaiProvider = this.#providers['openai-completion'];
|
||||
mod_result = await openaiProvider.checkModeration(fulltext);
|
||||
if ( mod_result.flagged ) return false;
|
||||
return true;
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
mod_last_error = e;
|
||||
}
|
||||
try {
|
||||
const claudeChatProvider = this.#providers['claude'];
|
||||
const mod = new AsModeration({
|
||||
chatProvider: claudeChatProvider,
|
||||
model: 'claude-3-haiku-20240307',
|
||||
});
|
||||
if ( ! await mod.moderate(fulltext) ) {
|
||||
return false;
|
||||
}
|
||||
mod_last_error = null;
|
||||
return true;
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
mod_last_error = e;
|
||||
}
|
||||
|
||||
if ( mod_last_error ) {
|
||||
this.log.error('moderation error', {
|
||||
fulltext,
|
||||
mod_last_error,
|
||||
});
|
||||
throw new Error('no working moderation service');
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find an appropriate fallback model by sorting the list of models
|
||||
* by the euclidean distance of the input/output prices and selecting
|
||||
* the first one that is not in the tried list.
|
||||
*
|
||||
* @param {*} param0
|
||||
* @returns
|
||||
*/
|
||||
getFallbackModel (modelId: string, triedIds: string[], triedProviders: string[]) {
|
||||
const models = this.#modelIdMap[modelId];
|
||||
|
||||
if ( ! models ) {
|
||||
this.log.error('could not find model', { modelId });
|
||||
throw new Error('could not find model');
|
||||
}
|
||||
|
||||
const targetModel = models[0];
|
||||
|
||||
// First see if any models with the same id but different provider exist
|
||||
for ( const model of models ) {
|
||||
if ( triedProviders.includes(model.provider!) ) continue;
|
||||
if ( model.provider === 'fake-chat' ) continue;
|
||||
return {
|
||||
fallbackProvider: model.provider,
|
||||
fallbackModelId: model.id,
|
||||
};
|
||||
}
|
||||
|
||||
// First check KV for the sorted list
|
||||
let potentialFallbacks = kv.get(`aichat:fallbacks:${targetModel.id}`);
|
||||
|
||||
if ( ! potentialFallbacks ) {
|
||||
// Calculate the sorted list
|
||||
const models = this.models();
|
||||
|
||||
let aiProvider, modelToSearch;
|
||||
if ( targetModel.id.startsWith('openrouter:') || targetModel.id.startsWith('togetherai:') ) {
|
||||
[aiProvider, modelToSearch] = targetModel.id.replace('openrouter:', '').replace('togetherai:', '').toLowerCase().split('/');
|
||||
} else {
|
||||
[aiProvider, modelToSearch] = targetModel.provider!.toLowerCase().replace('gemini', 'google').replace('openai-completion', 'openai'), targetModel.id.toLowerCase();
|
||||
}
|
||||
|
||||
const potentialMatches = models.filter(model => {
|
||||
const possibleModelNames = [`openrouter:${aiProvider}/${modelToSearch}`,
|
||||
`togetherai:${aiProvider}/${modelToSearch}`, ...(targetModel.aliases?.map((alias) => [`openrouter:${aiProvider}/${alias}`,
|
||||
`togetherai:${aiProvider}/${alias}`])?.flat() ?? [])];
|
||||
|
||||
return !!possibleModelNames.find(possibleName => model.id.toLowerCase() === possibleName);
|
||||
}).slice(0, MAX_FALLBACKS);
|
||||
|
||||
kv.set(`aichat:fallbacks:${modelId}`, potentialMatches);
|
||||
potentialFallbacks = potentialMatches;
|
||||
}
|
||||
|
||||
for ( const model of potentialFallbacks ) {
|
||||
if ( triedIds.includes(model.id) ) continue;
|
||||
if ( model.provider === 'fake-chat' ) continue;
|
||||
|
||||
return {
|
||||
fallbackProvider: model.provider,
|
||||
fallbackModelId: model.id,
|
||||
};
|
||||
}
|
||||
|
||||
// No fallbacks available
|
||||
console.error('no fallbacks', {
|
||||
potentialFallbacks,
|
||||
triedIds,
|
||||
triedProviders,
|
||||
});
|
||||
}
|
||||
}
|
||||
26
src/backend/src/services/ai/chat/providers/ChatProvider.ts
Normal file
26
src/backend/src/services/ai/chat/providers/ChatProvider.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import { ModerationCreateResponse } from 'openai/resources/moderations.js';
|
||||
import { IChatModel, IChatProvider, ICompleteArguments } from './types';
|
||||
|
||||
/**
|
||||
* Abstract base class for AI chat providers, and default hollow implementation;
|
||||
*/
|
||||
export class ChatProvider implements IChatProvider {
|
||||
getDefaultModel (): string {
|
||||
return '';
|
||||
}
|
||||
models (): IChatModel[] | Promise<IChatModel[]> {
|
||||
return [];
|
||||
}
|
||||
list (): string[] | Promise<string[]> {
|
||||
return [];
|
||||
}
|
||||
async checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
return {
|
||||
flagged: false,
|
||||
results: {} as ModerationCreateResponse,
|
||||
};
|
||||
}
|
||||
async complete (_arg: ICompleteArguments): ReturnType<IChatProvider['complete']> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
}
|
||||
@@ -1,33 +1,25 @@
|
||||
import { describe, expect, it, test } from 'vitest';
|
||||
import { createTestKernel } from '../../../tools/test.mjs';
|
||||
import { COST_MAPS } from '../../services/MeteringService/costMaps';
|
||||
import { SUService } from '../../services/SUService';
|
||||
import { AIChatService } from './AIChatService';
|
||||
import { ClaudeService } from './ClaudeService';
|
||||
import { createTestKernel } from '../../../../../../tools/test.mjs';
|
||||
import { COST_MAPS } from '../../../../MeteringService/costMaps/index.js';
|
||||
import { SUService } from '../../../../SUService.js';
|
||||
import { ClaudeProvider } from './ClaudeProvider.js';
|
||||
|
||||
describe('ClaudeService ', async () => {
|
||||
describe('ClaudeProvider ', async () => {
|
||||
const testKernel = await createTestKernel({
|
||||
serviceMap: {
|
||||
'claude': ClaudeService,
|
||||
'ai-chat': AIChatService,
|
||||
},
|
||||
initLevelString: 'init',
|
||||
testCore: true,
|
||||
serviceConfigOverrideMap: {
|
||||
'database': {
|
||||
path: ':memory:',
|
||||
},
|
||||
'claude': {
|
||||
apiKey: process.env.PUTER_CLAUDE_API_KEY,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const target = testKernel.services!.get('claude') as ClaudeService;
|
||||
const target = new ClaudeProvider(testKernel.services!.get('meteringService'), { apiKey: process.env.PUTER_CLAUDE_API_KEY || '' }, testKernel.services?.get('error-service'));
|
||||
const su = testKernel.services!.get('su') as SUService;
|
||||
|
||||
it('should have all models mapped in cost maps', async () => {
|
||||
const models = await target.models();
|
||||
const models = target.models();
|
||||
|
||||
for ( const model of models ) {
|
||||
const entry = Object.entries(COST_MAPS).find(([key, _value]) => key.startsWith('claude') && key.includes(model.id));
|
||||
@@ -0,0 +1,321 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
import Anthropic, { toFile } from '@anthropic-ai/sdk';
|
||||
import { Message } from '@anthropic-ai/sdk/resources';
|
||||
import { BetaUsage } from '@anthropic-ai/sdk/resources/beta.js';
|
||||
import { MessageCreateParams as BetaMessageCreateParams } from '@anthropic-ai/sdk/resources/beta/messages/messages.js';
|
||||
import { MessageCreateParams, Usage } from '@anthropic-ai/sdk/resources/messages.js';
|
||||
import mime from 'mime-types';
|
||||
import FSNodeParam from '../../../../../api/filesystem/FSNodeParam.js';
|
||||
import { LLRead } from '../../../../../filesystem/ll_operations/ll_read.js';
|
||||
import { ErrorService } from '../../../../../modules/core/ErrorService.js';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import { make_claude_tools } from '../../../utils/FunctionCalling.js';
|
||||
import { extract_and_remove_system_messages } from '../../../utils/Messages.js';
|
||||
import { AIChatStream, AIChatTextStream, AIChatToolUseStream } from '../../../utils/Streaming.js';
|
||||
import { IChatProvider, ICompleteArguments } from '../types.js';
|
||||
import { CLAUDE_MODELS } from './models.js';
|
||||
export class ClaudeProvider implements IChatProvider {
|
||||
anthropic: Anthropic;
|
||||
|
||||
#meteringService: MeteringService;
|
||||
|
||||
errorService: ErrorService;
|
||||
|
||||
constructor (meteringService: MeteringService, config: { apiKey: string }, errorService: ErrorService) {
|
||||
|
||||
this.#meteringService = meteringService;
|
||||
this.errorService = errorService;
|
||||
this.anthropic = new Anthropic({
|
||||
apiKey: config.apiKey,
|
||||
// 10 minutes is the default; we need to override the timeout to
|
||||
// disable an "aggressive" preemptive error that's thrown
|
||||
// erroneously by the SDK.
|
||||
// (https://github.com/anthropics/anthropic-sdk-typescript/issues/822)
|
||||
timeout: 10 * 60 * 1001,
|
||||
});
|
||||
}
|
||||
getDefaultModel () {
|
||||
return 'claude-haiku-4-5-20251001';
|
||||
}
|
||||
|
||||
async list () {
|
||||
const models = this.models();
|
||||
const model_names: string[] = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
model_names.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return model_names;
|
||||
}
|
||||
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
|
||||
tools = make_claude_tools(tools);
|
||||
|
||||
let system_prompts: string | any[];
|
||||
// unsure why system_prompts is an array but it always seems to only have exactly one element,
|
||||
// and the real array of system_prompts seems to be the [0].content -- NS
|
||||
[system_prompts, messages] = extract_and_remove_system_messages(messages);
|
||||
|
||||
// Apply the cache control tag to all content blocks
|
||||
if (
|
||||
system_prompts.length > 0 &&
|
||||
system_prompts[0].cache_control &&
|
||||
system_prompts[0]?.content
|
||||
) {
|
||||
system_prompts[0].content = system_prompts[0].content.map((prompt: { cache_control: unknown }) => {
|
||||
prompt.cache_control = system_prompts[0].cache_control;
|
||||
return prompt;
|
||||
});
|
||||
}
|
||||
|
||||
messages = messages.map(message => {
|
||||
if ( message.cache_control ) {
|
||||
message.content[0].cache_control = message.cache_control;
|
||||
}
|
||||
delete message.cache_control;
|
||||
return message;
|
||||
});
|
||||
|
||||
const modelUsed = this.models().find(m => [m.id, ...(m.aliases || [])].includes(model)) || this.models().find(m => m.id === this.getDefaultModel())!;
|
||||
const sdkParams: MessageCreateParams = {
|
||||
model: modelUsed.id,
|
||||
max_tokens: Math.floor(max_tokens ||
|
||||
((
|
||||
model === 'claude-3-5-sonnet-20241022'
|
||||
|| model === 'claude-3-5-sonnet-20240620'
|
||||
) ? 8192 : this.models().filter(e => (e.name === model || e.aliases?.includes(model)))[0]?.max_tokens || 4096)), //required
|
||||
temperature: temperature || 0, // required
|
||||
...( (system_prompts && system_prompts[0]?.content) ? {
|
||||
system: system_prompts[0]?.content,
|
||||
} : {}),
|
||||
tool_choice: {
|
||||
type: 'auto',
|
||||
disable_parallel_tool_use: true,
|
||||
},
|
||||
messages,
|
||||
...(tools ? { tools } : {}),
|
||||
} as MessageCreateParams;
|
||||
|
||||
let beta_mode = false;
|
||||
|
||||
// Perform file uploads
|
||||
const file_delete_tasks: { file_id: string }[] = [];
|
||||
const actor = Context.get('actor');
|
||||
const { user } = actor.type;
|
||||
|
||||
const file_input_tasks: any[] = [];
|
||||
for ( const message of messages ) {
|
||||
// We can assume `message.content` is not undefined because
|
||||
// Messages.normalize_single_message ensures this.
|
||||
for ( const contentPart of message.content ) {
|
||||
if ( ! contentPart.puter_path ) continue;
|
||||
file_input_tasks.push({
|
||||
node: await (new FSNodeParam(contentPart.puter_path)).consolidate({
|
||||
req: { user },
|
||||
getParam: () => contentPart.puter_path,
|
||||
}),
|
||||
contentPart,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const promises: Promise<unknown>[] = [];
|
||||
for ( const task of file_input_tasks ) {
|
||||
promises.push((async () => {
|
||||
const ll_read = new LLRead();
|
||||
const stream = await ll_read.run({
|
||||
actor: Context.get('actor'),
|
||||
fsNode: task.node,
|
||||
});
|
||||
|
||||
const mimeType = mime.contentType(await task.node.get('name'));
|
||||
|
||||
beta_mode = true;
|
||||
const fileUpload = await this.anthropic.beta.files.upload({
|
||||
file: await toFile(stream, undefined, { type: mimeType as string }),
|
||||
}, {
|
||||
betas: ['files-api-2025-04-14'],
|
||||
} as Parameters<typeof this.anthropic.beta.files.upload>[1]);
|
||||
|
||||
file_delete_tasks.push({ file_id: fileUpload.id });
|
||||
// We have to copy a table from the documentation here:
|
||||
// https://docs.anthropic.com/en/docs/build-with-claude/files
|
||||
const contentBlockTypeForFileBasedOnMime = (() => {
|
||||
if ( mimeType && mimeType.startsWith('image/') ) {
|
||||
return 'image';
|
||||
}
|
||||
if ( mimeType && mimeType.startsWith('text/') ) {
|
||||
return 'document';
|
||||
}
|
||||
if ( mimeType && mimeType === 'application/pdf' || mimeType === 'application/x-pdf' ) {
|
||||
return 'document';
|
||||
}
|
||||
return 'container_upload';
|
||||
})();
|
||||
|
||||
delete task.contentPart.puter_path,
|
||||
task.contentPart.type = contentBlockTypeForFileBasedOnMime;
|
||||
task.contentPart.source = {
|
||||
type: 'file',
|
||||
file_id: fileUpload.id,
|
||||
};
|
||||
})());
|
||||
}
|
||||
await Promise.all(promises);
|
||||
|
||||
const cleanup_files = async () => {
|
||||
const promises: Promise<unknown>[] = [];
|
||||
for ( const task of file_delete_tasks ) {
|
||||
promises.push((async () => {
|
||||
try {
|
||||
await this.anthropic.beta.files.delete(task.file_id,
|
||||
{ betas: ['files-api-2025-04-14'] });
|
||||
} catch (e) {
|
||||
this.errorService.report('claude:file-delete-task', {
|
||||
source: e,
|
||||
trace: true,
|
||||
alarm: true,
|
||||
extra: { file_id: task.file_id },
|
||||
});
|
||||
}
|
||||
})());
|
||||
}
|
||||
await Promise.all(promises);
|
||||
};
|
||||
|
||||
if ( beta_mode ) {
|
||||
(sdkParams as BetaMessageCreateParams).betas = ['files-api-2025-04-14'];
|
||||
}
|
||||
const anthropic = (beta_mode ? this.anthropic.beta : this.anthropic) as Anthropic;
|
||||
|
||||
if ( stream ) {
|
||||
const init_chat_stream = async ({ chatStream }: { chatStream: AIChatStream }) => {
|
||||
const completion = await anthropic.messages.stream(sdkParams as MessageCreateParams);
|
||||
const usageSum: Record<string, number> = {};
|
||||
|
||||
let message, contentBlock;
|
||||
for await ( const event of completion ) {
|
||||
|
||||
if ( event.type === 'message_delta' ) {
|
||||
const usageObject = (event?.usage ?? {});
|
||||
const meteredData = this.#usageFormatterUtil(usageObject as Usage | BetaUsage);
|
||||
|
||||
for ( const key in meteredData ) {
|
||||
if ( ! usageSum[key] ) usageSum[key] = 0;
|
||||
usageSum[key] += meteredData[key as keyof typeof meteredData];
|
||||
}
|
||||
}
|
||||
|
||||
if ( event.type === 'message_start' ) {
|
||||
message = chatStream.message();
|
||||
continue;
|
||||
}
|
||||
if ( event.type === 'message_stop' ) {
|
||||
message!.end();
|
||||
message = null;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( event.type === 'content_block_start' ) {
|
||||
if ( event.content_block.type === 'tool_use' ) {
|
||||
contentBlock = message!.contentBlock({
|
||||
type: event.content_block.type,
|
||||
id: event.content_block.id,
|
||||
name: event.content_block.name,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
contentBlock = message!.contentBlock({
|
||||
type: event.content_block.type,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( event.type === 'content_block_stop' ) {
|
||||
contentBlock!.end();
|
||||
contentBlock = null;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( event.type === 'content_block_delta' ) {
|
||||
if ( event.delta.type === 'input_json_delta' ) {
|
||||
(contentBlock as AIChatToolUseStream)!.addPartialJSON(event.delta.partial_json);
|
||||
continue;
|
||||
}
|
||||
if ( event.delta.type === 'text_delta' ) {
|
||||
(contentBlock as AIChatTextStream)!.addText(event.delta.text);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
chatStream.end(usageSum);
|
||||
const costsOverrideFromModel = Object.fromEntries(Object.entries(usageSum).map(([k, v]) => {
|
||||
return [k, v * (modelUsed.costs[k] || 0)];
|
||||
}));
|
||||
this.#meteringService.utilRecordUsageObject(usageSum, actor, `claude:${modelUsed.id}`, costsOverrideFromModel);
|
||||
};
|
||||
|
||||
return {
|
||||
init_chat_stream,
|
||||
stream: true,
|
||||
finally_fn: cleanup_files,
|
||||
};
|
||||
}
|
||||
|
||||
const msg = await anthropic.messages.create(sdkParams);
|
||||
await cleanup_files();
|
||||
|
||||
const usage = this.#usageFormatterUtil((msg as Message).usage as Usage | BetaUsage);
|
||||
const costsOverrideFromModel = Object.fromEntries(Object.entries(usage).map(([k, v]) => {
|
||||
return [k, v * (modelUsed.costs[k] || 0)];
|
||||
}));
|
||||
this.#meteringService.utilRecordUsageObject(usage, actor, `claude:${modelUsed.id}`, costsOverrideFromModel);
|
||||
|
||||
// TODO DS: cleanup old usage tracking
|
||||
return {
|
||||
message: msg,
|
||||
usage: usage,
|
||||
finish_reason: 'stop',
|
||||
};
|
||||
}
|
||||
|
||||
#usageFormatterUtil (usage: Usage | BetaUsage) {
|
||||
return {
|
||||
input_tokens: usage?.input_tokens || 0,
|
||||
ephemeral_5m_input_tokens: usage?.cache_creation?.ephemeral_5m_input_tokens || usage.cache_creation_input_tokens || 0, // this is because they're api is a bit inconsistent
|
||||
ephemeral_1h_input_tokens: usage?.cache_creation?.ephemeral_1h_input_tokens || 0,
|
||||
cache_read_input_tokens: usage?.cache_read_input_tokens || 0,
|
||||
output_tokens: usage?.output_tokens || 0,
|
||||
};
|
||||
};
|
||||
|
||||
models () {
|
||||
return CLAUDE_MODELS;
|
||||
}
|
||||
|
||||
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
throw new Error('CheckModeration Not provided.');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,184 @@
|
||||
import { IChatModel } from '../types';
|
||||
|
||||
export const CLAUDE_MODELS: IChatModel[] = [
|
||||
{
|
||||
id: 'claude-opus-4-5-20251101',
|
||||
aliases: ['claude-opus-4-5-latest', 'claude-opus-4-5', 'claude-opus-4.5'],
|
||||
name: 'Claude Opus 4.5',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'input_tokens',
|
||||
output_cost_key: 'output_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input_tokens: 500,
|
||||
ephemeral_5m_input_tokens: 500 * 1.25,
|
||||
ephemeral_1h_input_tokens: 500 * 2,
|
||||
cache_read_input_tokens: 500 * 0.1,
|
||||
output_tokens: 2500,
|
||||
},
|
||||
context: 200000,
|
||||
max_tokens: 64000,
|
||||
},
|
||||
{
|
||||
id: 'claude-haiku-4-5-20251001',
|
||||
aliases: ['claude-haiku-4.5', 'claude-haiku-4-5', 'claude-4-5-haiku'],
|
||||
name: 'Claude Haiku 4.5',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'input_tokens',
|
||||
output_cost_key: 'output_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input_tokens: 100,
|
||||
ephemeral_5m_input_tokens: 100 * 1.25,
|
||||
ephemeral_1h_input_tokens: 100 * 2,
|
||||
cache_read_input_tokens: 100 * 0.1,
|
||||
output_tokens: 500,
|
||||
},
|
||||
context: 200000,
|
||||
max_tokens: 64000,
|
||||
},
|
||||
{
|
||||
id: 'claude-sonnet-4-5-20250929',
|
||||
aliases: ['claude-sonnet-4.5', 'claude-sonnet-4-5'],
|
||||
name: 'Claude Sonnet 4.5',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'input_tokens',
|
||||
output_cost_key: 'output_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input_tokens: 300,
|
||||
ephemeral_5m_input_tokens: 300 * 1.25,
|
||||
ephemeral_1h_input_tokens: 300 * 2,
|
||||
cache_read_input_tokens: 300 * 0.1,
|
||||
output_tokens: 1500,
|
||||
},
|
||||
context: 200000,
|
||||
max_tokens: 64000,
|
||||
},
|
||||
{
|
||||
id: 'claude-opus-4-1-20250805',
|
||||
aliases: ['claude-opus-4-1'],
|
||||
name: 'Claude Opus 4.1',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'input_tokens',
|
||||
output_cost_key: 'output_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input_tokens: 1500,
|
||||
ephemeral_5m_input_tokens: 1500 * 1.25,
|
||||
ephemeral_1h_input_tokens: 1500 * 2,
|
||||
cache_read_input_tokens: 1500 * 0.1,
|
||||
output_tokens: 7500,
|
||||
},
|
||||
context: 200000,
|
||||
max_tokens: 32000,
|
||||
},
|
||||
{
|
||||
id: 'claude-opus-4-20250514',
|
||||
aliases: ['claude-opus-4', 'claude-opus-4-latest'],
|
||||
name: 'Claude Opus 4',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'input_tokens',
|
||||
output_cost_key: 'output_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input_tokens: 1500,
|
||||
ephemeral_5m_input_tokens: 1500 * 1.25,
|
||||
ephemeral_1h_input_tokens: 1500 * 2,
|
||||
cache_read_input_tokens: 1500 * 0.1,
|
||||
output_tokens: 7500,
|
||||
},
|
||||
context: 200000,
|
||||
max_tokens: 32000,
|
||||
},
|
||||
{
|
||||
id: 'claude-sonnet-4-20250514',
|
||||
aliases: ['claude-sonnet-4', 'claude-sonnet-4-latest'],
|
||||
name: 'Claude Sonnet 4',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'input_tokens',
|
||||
output_cost_key: 'output_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input_tokens: 300,
|
||||
ephemeral_5m_input_tokens: 300 * 1.25,
|
||||
ephemeral_1h_input_tokens: 300 * 2,
|
||||
cache_read_input_tokens: 300 * 0.1,
|
||||
output_tokens: 1500,
|
||||
},
|
||||
context: 200000,
|
||||
max_tokens: 64000,
|
||||
},
|
||||
{
|
||||
id: 'claude-3-7-sonnet-20250219',
|
||||
aliases: ['claude-3-7-sonnet-latest'],
|
||||
succeeded_by: 'claude-sonnet-4-20250514',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'input_tokens',
|
||||
output_cost_key: 'output_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input_tokens: 300,
|
||||
ephemeral_5m_input_tokens: 300 * 1.25,
|
||||
ephemeral_1h_input_tokens: 300 * 2,
|
||||
cache_read_input_tokens: 300 * 0.1,
|
||||
output_tokens: 1500,
|
||||
},
|
||||
context: 200000,
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'claude-3-5-sonnet-20241022',
|
||||
name: 'Claude 3.5 Sonnet',
|
||||
aliases: ['claude-3-5-sonnet-latest'],
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'input_tokens',
|
||||
output_cost_key: 'output_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input_tokens: 300,
|
||||
ephemeral_5m_input_tokens: 300 * 1.25,
|
||||
ephemeral_1h_input_tokens: 300 * 2,
|
||||
cache_read_input_tokens: 300 * 0.1,
|
||||
output_tokens: 1500,
|
||||
},
|
||||
qualitative_speed: 'fast',
|
||||
training_cutoff: '2024-04',
|
||||
context: 200000,
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'claude-3-5-sonnet-20240620',
|
||||
succeeded_by: 'claude-3-5-sonnet-20241022',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'input_tokens',
|
||||
output_cost_key: 'output_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input_tokens: 300,
|
||||
ephemeral_5m_input_tokens: 300 * 1.25,
|
||||
ephemeral_1h_input_tokens: 300 * 2,
|
||||
cache_read_input_tokens: 300 * 0.1,
|
||||
output_tokens: 1500,
|
||||
},
|
||||
context: 200000, // might be wrong
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'claude-3-haiku-20240307',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'input_tokens',
|
||||
output_cost_key: 'output_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input_tokens: 25,
|
||||
ephemeral_5m_input_tokens: 25 * 1.25,
|
||||
ephemeral_1h_input_tokens: 25 * 2,
|
||||
cache_read_input_tokens: 25 * 0.1,
|
||||
output_tokens: 125,
|
||||
},
|
||||
qualitative_speed: 'fastest',
|
||||
context: 200000,
|
||||
max_tokens: 4096,
|
||||
},
|
||||
];
|
||||
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import dedent from 'dedent';
|
||||
import { OpenAI } from 'openai';
|
||||
import { ChatCompletionCreateParams } from 'openai/resources/index.js';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
|
||||
import { IChatProvider, ICompleteArguments } from '../types.js';
|
||||
import { DEEPSEEK_MODELS } from './models.js';
|
||||
|
||||
export class DeepSeekProvider implements IChatProvider {
|
||||
#openai: OpenAI;
|
||||
|
||||
#meteringService: MeteringService;
|
||||
|
||||
constructor (config: { apiKey: string }, meteringService: MeteringService) {
|
||||
this.#openai = new OpenAI({
|
||||
apiKey: config.apiKey,
|
||||
baseURL: 'https://api.deepseek.com',
|
||||
});
|
||||
this.#meteringService = meteringService;
|
||||
}
|
||||
|
||||
getDefaultModel () {
|
||||
return 'deepseek-chat';
|
||||
}
|
||||
|
||||
models () {
|
||||
return DEEPSEEK_MODELS;
|
||||
}
|
||||
|
||||
async list () {
|
||||
const models = this.models();
|
||||
const modelNames: string[] = [];
|
||||
for ( const model of models ) {
|
||||
modelNames.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
modelNames.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return modelNames;
|
||||
}
|
||||
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
|
||||
const actor = Context.get('actor');
|
||||
const availableModels = this.models();
|
||||
const modelUsed = availableModels.find(m => [m.id, ...(m.aliases || [])].includes(model)) || availableModels.find(m => m.id === this.getDefaultModel())!;
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
for ( const message of messages ) {
|
||||
// DeepSeek doesn't accept string arrays alongside tool calls
|
||||
if ( message.tool_calls && Array.isArray(message.content) ) {
|
||||
message.content = '';
|
||||
}
|
||||
}
|
||||
|
||||
// Function calling currently loops unless we inject the tool result as a system message.
|
||||
const TOOL_TEXT = (message: { tool_call_id: string; content: string }) => dedent(`
|
||||
Hi DeepSeek V3, your tool calling is broken and you are not able to
|
||||
obtain tool results in the expected way. That's okay, we can work
|
||||
around this.
|
||||
|
||||
Please do not repeat this tool call.
|
||||
|
||||
We have provided the tool call results below:
|
||||
|
||||
Tool call ${message.tool_call_id} returned: ${message.content}.
|
||||
`);
|
||||
for ( let i = messages.length - 1; i >= 0; i-- ) {
|
||||
const message = messages[i];
|
||||
if ( message.role === 'tool' ) {
|
||||
messages.splice(i + 1, 0, {
|
||||
role: 'system',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: TOOL_TEXT(message),
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const completion = await this.#openai.chat.completions.create({
|
||||
messages,
|
||||
model: modelUsed.id,
|
||||
...(tools ? { tools } : {}),
|
||||
max_tokens: max_tokens || 1000,
|
||||
temperature,
|
||||
stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
} as ChatCompletionCreateParams);
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
const costsOverrideFromModel = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
|
||||
return [k, v * (modelUsed.costs[k] || 0)];
|
||||
}));
|
||||
this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `deepseek:${modelUsed.id}`, costsOverrideFromModel);
|
||||
return trackedUsage;
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
}
|
||||
|
||||
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
import { IChatModel } from '../types.js';
|
||||
|
||||
export const DEEPSEEK_MODELS: IChatModel[] = [
|
||||
{
|
||||
id: 'deepseek-chat',
|
||||
name: 'DeepSeek Chat',
|
||||
aliases: [],
|
||||
context: 128000,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 56,
|
||||
completion_tokens: 168,
|
||||
cached_tokens: 0,
|
||||
},
|
||||
max_tokens: 8000,
|
||||
},
|
||||
{
|
||||
id: 'deepseek-reasoner',
|
||||
name: 'DeepSeek Reasoner',
|
||||
aliases: [],
|
||||
context: 128000,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 56,
|
||||
completion_tokens: 168,
|
||||
cached_tokens: 0,
|
||||
},
|
||||
max_tokens: 64000,
|
||||
},
|
||||
];
|
||||
173
src/backend/src/services/ai/chat/providers/FakeChatProvider.ts
Normal file
173
src/backend/src/services/ai/chat/providers/FakeChatProvider.ts
Normal file
@@ -0,0 +1,173 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
|
||||
import dedent from 'dedent';
|
||||
import { LoremIpsum } from 'lorem-ipsum';
|
||||
import { AIChatStream } from '../../utils/Streaming';
|
||||
import { IChatProvider, ICompleteArguments } from './types';
|
||||
|
||||
export class FakeChatProvider implements IChatProvider {
|
||||
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
|
||||
getDefaultModel () {
|
||||
return 'fake';
|
||||
}
|
||||
|
||||
async models () {
|
||||
return [
|
||||
{
|
||||
id: 'fake',
|
||||
aliases: [],
|
||||
costs_currency: 'usd-cents',
|
||||
costs: {
|
||||
'input-tokens': 0,
|
||||
'output-tokens': 0,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
|
||||
},
|
||||
{
|
||||
id: 'costly',
|
||||
aliases: [],
|
||||
costs_currency: 'usd-cents',
|
||||
costs: {
|
||||
'input-tokens': 1000, // 1000 microcents per million tokens (0.001 cents per 1000 tokens)
|
||||
'output-tokens': 2000, // 2000 microcents per million tokens (0.002 cents per 1000 tokens)
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'abuse',
|
||||
aliases: [],
|
||||
costs_currency: 'usd-cents',
|
||||
costs: {
|
||||
'input-tokens': 0,
|
||||
'output-tokens': 0,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
];
|
||||
}
|
||||
async list () {
|
||||
return ['fake', 'costly', 'abuse'];
|
||||
}
|
||||
async complete ({ messages, stream, model, max_tokens, custom }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
|
||||
|
||||
// Determine token counts based on messages and model
|
||||
const usedModel = model || this.getDefaultModel();
|
||||
|
||||
// For the costly model, simulate actual token counting
|
||||
const resp = this.getFakeResponse(usedModel, custom, messages, max_tokens);
|
||||
|
||||
if ( stream ) {
|
||||
return {
|
||||
init_chat_stream: async ({ chatStream }: { chatStream: AIChatStream }) => {
|
||||
await new Promise(rslv => setTimeout(rslv, 500));
|
||||
chatStream.stream.write(`${JSON.stringify({
|
||||
type: 'text',
|
||||
text: (await resp).message.content[0].text,
|
||||
}) }\n`);
|
||||
chatStream.end();
|
||||
},
|
||||
stream: true,
|
||||
finally_fn: async () => {
|
||||
// no op
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return resp;
|
||||
}
|
||||
async getFakeResponse (modelId: string, custom: unknown, messages: any[], maxTokens: number = 8192): ReturnType<IChatProvider['complete']> {
|
||||
let inputTokens = 0;
|
||||
let outputTokens = 0;
|
||||
|
||||
if ( modelId === 'costly' ) {
|
||||
// Simple token estimation: roughly 4 chars per token for input
|
||||
if ( messages && messages.length > 0 ) {
|
||||
for ( const message of messages ) {
|
||||
if ( typeof message.content === 'string' ) {
|
||||
inputTokens += Math.ceil(message.content.length / 4);
|
||||
} else if ( Array.isArray(message.content) ) {
|
||||
for ( const content of message.content ) {
|
||||
if ( content.type === 'text' ) {
|
||||
inputTokens += Math.ceil(content.text.length / 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate random output token count between 50 and 200
|
||||
outputTokens = Math.floor(Math.min((Math.random() * 150) + 50, maxTokens));
|
||||
// outputTokens = Math.floor(Math.random() * 150) + 50;
|
||||
}
|
||||
|
||||
// Generate the response text
|
||||
let responseText;
|
||||
if ( modelId === 'abuse' ) {
|
||||
responseText = dedent(`
|
||||
<h2>Free AI and Cloud for everyone!</h2><br />
|
||||
Come on down to <a href="https://puter.com">puter.com</a> and try it out!
|
||||
${custom ?? ''}
|
||||
`);
|
||||
} else {
|
||||
// Generate 1-3 paragraphs for both fake and costly models
|
||||
responseText = new LoremIpsum({
|
||||
sentencesPerParagraph: {
|
||||
max: 8,
|
||||
min: 4,
|
||||
},
|
||||
wordsPerSentence: {
|
||||
max: 20,
|
||||
min: 12,
|
||||
},
|
||||
}).generateParagraphs(Math.floor(Math.random() * 3) + 1);
|
||||
}
|
||||
|
||||
// Report usage based on model
|
||||
const usage = {
|
||||
'input_tokens': modelId === 'costly' ? inputTokens : 0,
|
||||
'output_tokens': modelId === 'costly' ? outputTokens : 1,
|
||||
};
|
||||
|
||||
return {
|
||||
message: {
|
||||
'id': '00000000-0000-0000-0000-000000000000',
|
||||
'type': 'message',
|
||||
'role': 'assistant',
|
||||
'model': modelId,
|
||||
'content': [
|
||||
{
|
||||
'type': 'text',
|
||||
'text': responseText,
|
||||
},
|
||||
],
|
||||
'stop_reason': 'end_turn',
|
||||
'stop_sequence': null,
|
||||
'usage': usage,
|
||||
},
|
||||
'usage': usage,
|
||||
'finish_reason': 'stop',
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
// Preamble: Before this we used Gemini's SDK directly and as we found out
|
||||
// its actually kind of terrible. So we use the openai sdk now
|
||||
import openai, { OpenAI } from 'openai';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import { handle_completion_output, process_input_messages } from '../../../utils/OpenAIUtil.js';
|
||||
import { IChatProvider, ICompleteArguments } from '../types.js';
|
||||
import { GEMINI_MODELS } from './models.js';
|
||||
import { ChatCompletionCreateParams } from 'openai/resources/index.js';
|
||||
|
||||
export class GeminiChatProvider implements IChatProvider {
|
||||
|
||||
meteringService: MeteringService;
|
||||
openai: OpenAI;
|
||||
|
||||
defaultModel = 'gemini-2.5-flash';
|
||||
|
||||
constructor ( meteringService: MeteringService, config: { apiKey: string })
|
||||
{
|
||||
this.meteringService = meteringService;
|
||||
this.openai = new openai.OpenAI({
|
||||
apiKey: config.apiKey,
|
||||
baseURL: 'https://generativelanguage.googleapis.com/v1beta/openai/',
|
||||
});
|
||||
}
|
||||
|
||||
getDefaultModel () {
|
||||
return this.defaultModel;
|
||||
}
|
||||
|
||||
async models () {
|
||||
return GEMINI_MODELS;
|
||||
}
|
||||
async list () {
|
||||
return (await this.models()).map(m => [m.id, ... (m.aliases || [])]).flat();
|
||||
}
|
||||
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
|
||||
|
||||
const actor = Context.get('actor');
|
||||
messages = await process_input_messages(messages);
|
||||
|
||||
// delete cache_control
|
||||
messages = messages.map(m => {
|
||||
delete m.cache_control;
|
||||
return m;
|
||||
});
|
||||
|
||||
const modelUsed = (await this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (await this.models()).find(m => m.id === this.getDefaultModel())!;
|
||||
const sdk_params: ChatCompletionCreateParams = {
|
||||
messages: messages,
|
||||
model: modelUsed.id,
|
||||
...(tools ? { tools } : {}),
|
||||
...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
|
||||
...(temperature ? { temperature } : {}),
|
||||
stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
} as ChatCompletionCreateParams;
|
||||
|
||||
let completion;
|
||||
try {
|
||||
completion = await this.openai.chat.completions.create(sdk_params);
|
||||
} catch (e) {
|
||||
console.error('Gemini completion error: ', e);
|
||||
throw e;
|
||||
}
|
||||
|
||||
return handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = {
|
||||
prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
completion_tokens: usage.completion_tokens ?? 0,
|
||||
cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
|
||||
const costsOverrideFromModel = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
|
||||
return [k, v * (modelUsed.costs[k] || 0)];
|
||||
}));
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, `gemini:${modelUsed?.id}`), costsOverrideFromModel;
|
||||
|
||||
return trackedUsage;
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
throw new Error('No moderation logic.');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
import { IChatModel } from '../types';
|
||||
|
||||
export const GEMINI_MODELS: IChatModel[] = [
|
||||
{
|
||||
id: 'gemini-2.0-flash',
|
||||
name: 'Gemini 2.0 Flash',
|
||||
context: 131072,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 40,
|
||||
cached_tokens: 3,
|
||||
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.0-flash-lite',
|
||||
name: 'Gemini 2.0 Flash-Lite',
|
||||
context: 1_048_576,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 8,
|
||||
completion_tokens: 30,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-flash',
|
||||
name: 'Gemini 2.5 Flash',
|
||||
context: 1_048_576,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 30,
|
||||
completion_tokens: 250,
|
||||
cached_tokens: 3,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-flash-lite',
|
||||
name: 'Gemini 2.5 Flash-Lite',
|
||||
context: 1_048_576,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 40,
|
||||
cached_tokens: 1,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-pro',
|
||||
name: 'Gemini 2.5 Pro',
|
||||
context: 1_048_576,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 125,
|
||||
completion_tokens: 1000,
|
||||
cached_tokens: 13,
|
||||
},
|
||||
max_tokens: 200_000,
|
||||
},
|
||||
{
|
||||
id: 'gemini-3-pro-preview',
|
||||
name: 'Gemini 3 Pro',
|
||||
context: 1_048_576,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 200,
|
||||
completion_tokens: 1200,
|
||||
cached_tokens: 20,
|
||||
},
|
||||
max_tokens: 200_000,
|
||||
},
|
||||
];
|
||||
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import Groq from 'groq-sdk';
|
||||
import { ChatCompletionCreateParams } from 'groq-sdk/resources/chat/completions.mjs';
|
||||
import { CompletionUsage } from 'openai/resources';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
|
||||
import { IChatProvider, ICompleteArguments } from '../types.js';
|
||||
import { GROQ_MODELS } from './models.js';
|
||||
|
||||
export class GroqAIProvider implements IChatProvider {
|
||||
#client: Groq;
|
||||
|
||||
#meteringService: MeteringService;
|
||||
|
||||
constructor (config: { apiKey: string }, meteringService: MeteringService) {
|
||||
this.#client = new Groq({
|
||||
apiKey: config.apiKey,
|
||||
});
|
||||
this.#meteringService = meteringService;
|
||||
}
|
||||
|
||||
getDefaultModel () {
|
||||
return 'llama-3.1-8b-instant';
|
||||
}
|
||||
|
||||
models () {
|
||||
return GROQ_MODELS;
|
||||
}
|
||||
|
||||
async list () {
|
||||
const models = this.models();
|
||||
const modelNames: string[] = [];
|
||||
for ( const model of models ) {
|
||||
modelNames.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
modelNames.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return modelNames;
|
||||
}
|
||||
|
||||
async complete ({ messages, model, stream, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
|
||||
const actor = Context.get('actor');
|
||||
const availableModels = this.models();
|
||||
const modelUsed = availableModels.find(m => [m.id, ...(m.aliases || [])].includes(model)) || availableModels.find(m => m.id === this.getDefaultModel())!;
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
for ( const message of messages ) {
|
||||
if ( message.tool_calls && Array.isArray(message.content) ) {
|
||||
message.content = '';
|
||||
}
|
||||
}
|
||||
|
||||
const completion = await this.#client.chat.completions.create({
|
||||
messages,
|
||||
model: modelUsed.id,
|
||||
stream,
|
||||
tools,
|
||||
max_completion_tokens: max_tokens,
|
||||
temperature,
|
||||
} as ChatCompletionCreateParams);
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
deviations: {
|
||||
index_usage_from_stream_chunk: chunk =>
|
||||
// x_groq contains usage details for streamed responses
|
||||
(chunk as { x_groq?: { usage?: CompletionUsage } }).x_groq?.usage,
|
||||
},
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
const costsOverride = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
|
||||
return [k, v * (modelUsed.costs[k] || 0)];
|
||||
}));
|
||||
this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `groq:${modelUsed.id}`, costsOverride);
|
||||
return trackedUsage;
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
}
|
||||
|
||||
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,156 @@
|
||||
import { IChatModel } from '../types.js';
|
||||
|
||||
const makeModel = ({
|
||||
id,
|
||||
name,
|
||||
context,
|
||||
input,
|
||||
output,
|
||||
max_tokens,
|
||||
}: {
|
||||
id: string;
|
||||
name: string;
|
||||
context?: number;
|
||||
input: number;
|
||||
output: number;
|
||||
max_tokens?: number;
|
||||
}): IChatModel => ({
|
||||
id,
|
||||
name,
|
||||
context,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: input,
|
||||
completion_tokens: output,
|
||||
cached_tokens: 0,
|
||||
},
|
||||
max_tokens: max_tokens ?? context ?? 8192,
|
||||
});
|
||||
|
||||
export const GROQ_MODELS: IChatModel[] = [
|
||||
makeModel({
|
||||
id: 'gemma2-9b-it',
|
||||
name: 'Gemma 2 9B 8k',
|
||||
context: 8192,
|
||||
input: 20,
|
||||
output: 20,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'gemma-7b-it',
|
||||
name: 'Gemma 7B 8k Instruct',
|
||||
context: 8192,
|
||||
input: 7,
|
||||
output: 7,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama3-groq-70b-8192-tool-use-preview',
|
||||
name: 'Llama 3 Groq 70B Tool Use Preview 8k',
|
||||
context: 8192,
|
||||
input: 89,
|
||||
output: 89,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama3-groq-8b-8192-tool-use-preview',
|
||||
name: 'Llama 3 Groq 8B Tool Use Preview 8k',
|
||||
context: 8192,
|
||||
input: 19,
|
||||
output: 19,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama-3.1-70b-versatile',
|
||||
name: 'Llama 3.1 70B Versatile 128k',
|
||||
context: 128000,
|
||||
input: 59,
|
||||
output: 79,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama-3.1-70b-specdec',
|
||||
name: 'Llama 3.1 8B Instant 128k',
|
||||
context: 128000,
|
||||
input: 59,
|
||||
output: 99,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama-3.1-8b-instant',
|
||||
name: 'Llama 3.1 8B Instant 128k',
|
||||
context: 131072,
|
||||
input: 5,
|
||||
output: 8,
|
||||
max_tokens: 131072,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'meta-llama/llama-guard-4-12b',
|
||||
name: 'Llama Guard 4 12B',
|
||||
context: 131072,
|
||||
input: 20,
|
||||
output: 20,
|
||||
max_tokens: 1024,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'meta-llama/llama-prompt-guard-2-86m',
|
||||
name: 'Prompt Guard 2 86M',
|
||||
context: 512,
|
||||
input: 4,
|
||||
output: 4,
|
||||
max_tokens: 512,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama-3.2-1b-preview',
|
||||
name: 'Llama 3.2 1B (Preview) 8k',
|
||||
context: 128000,
|
||||
input: 4,
|
||||
output: 4,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama-3.2-3b-preview',
|
||||
name: 'Llama 3.2 3B (Preview) 8k',
|
||||
context: 128000,
|
||||
input: 6,
|
||||
output: 6,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama-3.2-11b-vision-preview',
|
||||
name: 'Llama 3.2 11B Vision 8k (Preview)',
|
||||
context: 8000,
|
||||
input: 18,
|
||||
output: 18,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama-3.2-90b-vision-preview',
|
||||
name: 'Llama 3.2 90B Vision 8k (Preview)',
|
||||
context: 8000,
|
||||
input: 90,
|
||||
output: 90,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama3-70b-8192',
|
||||
name: 'Llama 3 70B 8k',
|
||||
context: 8192,
|
||||
input: 59,
|
||||
output: 79,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama3-8b-8192',
|
||||
name: 'Llama 3 8B 8k',
|
||||
context: 8192,
|
||||
input: 5,
|
||||
output: 8,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'mixtral-8x7b-32768',
|
||||
name: 'Mixtral 8x7B Instruct 32k',
|
||||
context: 32768,
|
||||
input: 24,
|
||||
output: 24,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'llama-guard-3-8b',
|
||||
name: 'Llama Guard 3 8B 8k',
|
||||
context: 8192,
|
||||
input: 20,
|
||||
output: 20,
|
||||
}),
|
||||
];
|
||||
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { Mistral } from '@mistralai/mistralai';
|
||||
import { ChatCompletionResponse } from '@mistralai/mistralai/models/components/chatcompletionresponse.js';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
|
||||
import { IChatProvider, ICompleteArguments } from '../types.js';
|
||||
import { MISTRAL_MODELS } from './models.js';
|
||||
|
||||
export class MistralAIProvider implements IChatProvider {
|
||||
#client: Mistral;
|
||||
|
||||
#meteringService: MeteringService;
|
||||
|
||||
constructor (config: { apiKey: string }, meteringService: MeteringService) {
|
||||
this.#client = new Mistral({
|
||||
apiKey: config.apiKey,
|
||||
});
|
||||
this.#meteringService = meteringService;
|
||||
}
|
||||
|
||||
getDefaultModel () {
|
||||
return 'mistral-small-2506';
|
||||
}
|
||||
|
||||
async models () {
|
||||
return MISTRAL_MODELS;
|
||||
}
|
||||
|
||||
async list () {
|
||||
const models = await this.models();
|
||||
const ids: string[] = [];
|
||||
for ( const model of models ) {
|
||||
ids.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
ids.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
for ( const message of messages ) {
|
||||
if ( message.tool_calls ) {
|
||||
message.toolCalls = message.tool_calls;
|
||||
delete message.tool_calls;
|
||||
}
|
||||
if ( message.tool_call_id ) {
|
||||
message.toolCallId = message.tool_call_id;
|
||||
delete message.tool_call_id;
|
||||
}
|
||||
}
|
||||
|
||||
const selectedModel = (await this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (await this.models()).find(m => m.id === this.getDefaultModel())!;
|
||||
const actor = Context.get('actor');
|
||||
const completion = await this.#client.chat[
|
||||
stream ? 'stream' : 'complete'
|
||||
]({
|
||||
model: selectedModel.id,
|
||||
...(tools ? { tools: tools as any[] } : {}),
|
||||
messages,
|
||||
maxTokens: max_tokens,
|
||||
temperature,
|
||||
});
|
||||
|
||||
return await OpenAIUtil.handle_completion_output({
|
||||
deviations: {
|
||||
index_usage_from_stream_chunk: chunk => {
|
||||
if ( ! chunk.usage ) return;
|
||||
|
||||
const snake_usage = {};
|
||||
for ( const key in chunk.usage ) {
|
||||
const snakeKey = key.replace(/([A-Z])/g, '_$1').toLowerCase();
|
||||
snake_usage[snakeKey] = chunk.usage[key];
|
||||
}
|
||||
|
||||
return snake_usage;
|
||||
},
|
||||
chunk_but_like_actually: chunk => (chunk as any).data,
|
||||
index_tool_calls_from_stream_choice: choice => (choice.delta as any).toolCalls,
|
||||
coerce_completion_usage: (completion: ChatCompletionResponse) => ({
|
||||
prompt_tokens: completion.usage.promptTokens,
|
||||
completion_tokens: completion.usage.completionTokens,
|
||||
}),
|
||||
},
|
||||
completion: completion as ChatCompletionResponse,
|
||||
stream,
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `mistral:${selectedModel.id}`);
|
||||
// Still return legacy cost calculation for compatibility
|
||||
|
||||
return trackedUsage;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,299 @@
|
||||
import { IChatModel } from '../types';
|
||||
|
||||
export const MISTRAL_MODELS: IChatModel[] = [
|
||||
{
|
||||
id: 'mistral-medium-2508',
|
||||
name: 'mistral-medium-2508',
|
||||
aliases: [
|
||||
'mistral-medium-latest',
|
||||
'mistral-medium',
|
||||
],
|
||||
max_tokens: 131072,
|
||||
description: 'Update on Mistral Medium 3 with improved capabilities.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 40,
|
||||
completion_tokens: 200,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'open-mistral-7b',
|
||||
name: 'open-mistral-7b',
|
||||
aliases: [
|
||||
'mistral-tiny',
|
||||
'mistral-tiny-2312',
|
||||
],
|
||||
max_tokens: 32768,
|
||||
description: 'Our first dense model released September 2023.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 25,
|
||||
completion_tokens: 25,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'open-mistral-nemo',
|
||||
name: 'open-mistral-nemo',
|
||||
aliases: [
|
||||
'open-mistral-nemo-2407',
|
||||
'mistral-tiny-2407',
|
||||
'mistral-tiny-latest',
|
||||
],
|
||||
max_tokens: 131072,
|
||||
description: 'Our best multilingual open source model released July 2024.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 15,
|
||||
completion_tokens: 15,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'pixtral-large-2411',
|
||||
name: 'pixtral-large-2411',
|
||||
aliases: [
|
||||
'pixtral-large-latest',
|
||||
'mistral-large-pixtral-2411',
|
||||
],
|
||||
max_tokens: 131072,
|
||||
description: 'Official pixtral-large-2411 Mistral AI model',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 200,
|
||||
completion_tokens: 600,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'codestral-2508',
|
||||
name: 'codestral-2508',
|
||||
aliases: [
|
||||
'codestral-latest',
|
||||
],
|
||||
max_tokens: 256000,
|
||||
description: 'Our cutting-edge language model for coding released August 2025.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 30,
|
||||
completion_tokens: 90,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'devstral-small-2507',
|
||||
name: 'devstral-small-2507',
|
||||
aliases: [
|
||||
'devstral-small-latest',
|
||||
],
|
||||
max_tokens: 131072,
|
||||
description: 'Our small open-source code-agentic model.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 30,
|
||||
cached_tokens: 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'devstral-medium-2507',
|
||||
name: 'devstral-medium-2507',
|
||||
aliases: [
|
||||
'devstral-medium-latest',
|
||||
],
|
||||
max_tokens: 131072,
|
||||
description: 'Our medium code-agentic model.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 40,
|
||||
completion_tokens: 200,
|
||||
cached_tokens: 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'mistral-small-2506',
|
||||
name: 'mistral-small-2506',
|
||||
aliases: [
|
||||
'mistral-small-latest',
|
||||
],
|
||||
max_tokens: 131072,
|
||||
description: 'Our latest enterprise-grade small model with the latest version released June 2025.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 30,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'magistral-medium-2509',
|
||||
name: 'magistral-medium-2509',
|
||||
aliases: [
|
||||
'magistral-medium-latest',
|
||||
],
|
||||
max_tokens: 131072,
|
||||
description: 'Our frontier-class reasoning model release candidate September 2025.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 200,
|
||||
completion_tokens: 500,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'magistral-small-2509',
|
||||
name: 'magistral-small-2509',
|
||||
aliases: [
|
||||
'magistral-small-latest',
|
||||
],
|
||||
max_tokens: 131072,
|
||||
description: 'Our efficient reasoning model released September 2025.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 50,
|
||||
completion_tokens: 150,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'voxtral-mini-2507',
|
||||
name: 'voxtral-mini-2507',
|
||||
aliases: [
|
||||
'voxtral-mini-latest',
|
||||
],
|
||||
max_tokens: 32768,
|
||||
description: 'A mini audio understanding model released in July 2025',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 4,
|
||||
completion_tokens: 4,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'voxtral-small-2507',
|
||||
name: 'voxtral-small-2507',
|
||||
aliases: [
|
||||
'voxtral-small-latest',
|
||||
],
|
||||
max_tokens: 32768,
|
||||
description: 'A small audio understanding model released in July 2025',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 30,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'mistral-large-latest',
|
||||
name: 'mistral-large-2512',
|
||||
aliases: [
|
||||
'mistral-large-2512',
|
||||
],
|
||||
max_tokens: 262144,
|
||||
description: 'Official mistral-large-2512 Mistral AI model',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 50,
|
||||
completion_tokens: 150,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'ministral-3b-2512',
|
||||
name: 'ministral-3b-2512',
|
||||
aliases: [
|
||||
'ministral-3b-latest',
|
||||
],
|
||||
max_tokens: 131072,
|
||||
description: 'Ministral 3 (a.k.a. Tinystral) 3B Instruct.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 10,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'ministral-8b-2512',
|
||||
name: 'ministral-8b-2512',
|
||||
aliases: [
|
||||
'ministral-8b-latest',
|
||||
],
|
||||
max_tokens: 262144,
|
||||
description: 'Ministral 3 (a.k.a. Tinystral) 8B Instruct.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 15,
|
||||
completion_tokens: 15,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'ministral-14b-2512',
|
||||
name: 'ministral-14b-2512',
|
||||
aliases: [
|
||||
'ministral-14b-latest',
|
||||
],
|
||||
max_tokens: 262144,
|
||||
description: 'Ministral 3 (a.k.a. Tinystral) 14B Instruct.',
|
||||
provider: 'mistral',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1000000,
|
||||
prompt_tokens: 20,
|
||||
completion_tokens: 20,
|
||||
},
|
||||
},
|
||||
];
|
||||
159
src/backend/src/services/ai/chat/providers/OllamaProvider.ts
Normal file
159
src/backend/src/services/ai/chat/providers/OllamaProvider.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
import axios from 'axios';
|
||||
import { default as openai, default as OpenAI } from 'openai';
|
||||
import { Context } from '../../../../util/context.js';
|
||||
import { kv } from '../../../../util/kvSingleton.js';
|
||||
import * as OpenAIUtil from '../../utils/OpenAIUtil.js';
|
||||
import { IChatModel, IChatProvider, ICompleteArguments } from './types';
|
||||
import { MeteringService } from '../../../MeteringService/MeteringService';
|
||||
import { ChatCompletionCreateParams } from 'openai/resources/index.js';
|
||||
/**
|
||||
* OllamaService class - Provides integration with Ollama's API for chat completions
|
||||
* Extends BaseService to implement the puter-chat-completion interface.
|
||||
* Handles model management, message adaptation, streaming responses,
|
||||
* and usage tracking for Ollama's language models.
|
||||
* @extends BaseService
|
||||
*/
|
||||
export class OllamaChatProvider implements IChatProvider {
|
||||
|
||||
#apiBaseUrl: string;
|
||||
|
||||
#openai: OpenAI;
|
||||
|
||||
#meteringService: MeteringService;
|
||||
|
||||
constructor (config: { api_base_url?: string } | undefined, meteringService: MeteringService) {
|
||||
// Ollama typically runs on HTTP, not HTTPS
|
||||
this.#apiBaseUrl = config?.api_base_url || 'http://localhost:11434';
|
||||
|
||||
// OpenAI SDK is used to interact with the Ollama API
|
||||
this.#openai = new openai.OpenAI({
|
||||
apiKey: 'ollama', // Ollama doesn't use an API key, it uses the "ollama" string
|
||||
baseURL: `${config?.api_base_url }/v1`,
|
||||
});
|
||||
|
||||
this.#meteringService = meteringService;
|
||||
}
|
||||
|
||||
async models () {
|
||||
let models = kv.get('ollamaChat:models');
|
||||
if ( ! models ) {
|
||||
try {
|
||||
const resp = await axios.request({
|
||||
method: 'GET',
|
||||
url: `${this.#apiBaseUrl}/api/tags`,
|
||||
});
|
||||
models = resp.data.models || [];
|
||||
if ( models.length > 0 ) {
|
||||
kv.set('ollamaChat:models', models);
|
||||
}
|
||||
} catch ( error ) {
|
||||
console.error('Failed to fetch models from Ollama:', (error as Error).message);
|
||||
// Return empty array if Ollama is not available
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
if ( !models || models.length === 0 ) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const coerced_models: IChatModel[] = [];
|
||||
for ( const model of models ) {
|
||||
// Ollama API returns models with 'name' property, not 'model'
|
||||
const modelName = model.name || model.model || 'unknown';
|
||||
coerced_models.push({
|
||||
id: `ollama:${ modelName}`,
|
||||
name: `${modelName} (Ollama)`,
|
||||
max_tokens: model.size || model.max_context || 8192,
|
||||
costs_currency: 'usd-cents',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input_token: 0,
|
||||
output_token: 0,
|
||||
},
|
||||
});
|
||||
}
|
||||
console.log('coerced_models', coerced_models);
|
||||
return coerced_models;
|
||||
}
|
||||
async list () {
|
||||
const models = await this.models();
|
||||
const model_names: string[] = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
}
|
||||
return model_names;
|
||||
}
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
|
||||
|
||||
if ( model.startsWith('ollama:') ) {
|
||||
model = model.slice('ollama:'.length);
|
||||
}
|
||||
|
||||
const actor = Context.get('actor');
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
|
||||
const completion = await this.#openai.chat.completions.create({
|
||||
messages,
|
||||
model: model ?? this.getDefaultModel(),
|
||||
...(tools ? { tools } : {}),
|
||||
max_tokens,
|
||||
temperature: temperature, // default to 1.0
|
||||
stream: !!stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
} as ChatCompletionCreateParams) ;
|
||||
|
||||
const modelDetails = (await this.models()).find(m => m.id === `ollama:${model}`);
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
|
||||
const trackedUsage = {
|
||||
prompt: (usage.prompt_tokens ?? 1 ) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
completion: usage.completion_tokens ?? 1,
|
||||
input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => {
|
||||
return [k, 0]; // override to 0 since local is free
|
||||
}));
|
||||
this.#meteringService.utilRecordUsageObject(trackedUsage, actor, modelDetails!.id, costOverwrites);
|
||||
return trackedUsage;
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
}
|
||||
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default model identifier for the Ollama service
|
||||
* @returns {string} The default model ID 'gpt-oss:20b'
|
||||
*/
|
||||
getDefaultModel () {
|
||||
return 'gpt-oss:20b';
|
||||
}
|
||||
}
|
||||
@@ -19,12 +19,18 @@
|
||||
|
||||
import mime from 'mime-types';
|
||||
import { OpenAI } from 'openai';
|
||||
import FSNodeParam from '../../../api/filesystem/FSNodeParam.js';
|
||||
import { LLRead } from '../../../filesystem/ll_operations/ll_read.js';
|
||||
import { Context } from '../../../util/context.js';
|
||||
import { stream_to_buffer } from '../../../util/streamutil.js';
|
||||
import OpenAIUtil from '../lib/OpenAIUtil.js';
|
||||
import { OPEN_AI_MODELS } from './models.mjs';
|
||||
import { ChatCompletionCreateParams } from 'openai/resources/index.js';
|
||||
import { FSNodeParam } from '../../../../../api/filesystem/FSNodeParam.js';
|
||||
import { LLRead } from '../../../../../filesystem/ll_operations/ll_read.js';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { stream_to_buffer } from '../../../../../util/streamutil.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import * as OpenAiUtil from '../../../utils/OpenAIUtil.js';
|
||||
import { IChatProvider, ICompleteArguments } from '../types.js';
|
||||
import { OPEN_AI_MODELS } from './models.js';
|
||||
|
||||
;
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
|
||||
// We're capping at 5MB, which sucks, but Chat Completions doesn't suuport
|
||||
@@ -38,100 +44,200 @@ const MAX_FILE_SIZE = 5 * 1_000_000;
|
||||
* OpenAI API interactions with support for multiple models including GPT-4 variants.
|
||||
* Handles usage tracking, spending records, and content moderation.
|
||||
*/
|
||||
export class OpenAICompletionService {
|
||||
export class OpenAiChatProvider implements IChatProvider {
|
||||
/**
|
||||
* @type {import('openai').OpenAI}
|
||||
*/
|
||||
#openAi;
|
||||
#openAi: OpenAI;
|
||||
|
||||
#defaultModel;
|
||||
#defaultModel = 'gpt-5-nano';
|
||||
|
||||
#models;
|
||||
#meteringService: MeteringService;
|
||||
|
||||
/** @type {import('../../../services/MeteringService/MeteringService.js').MeteringService} */
|
||||
#meteringService;
|
||||
constructor (
|
||||
meteringService: MeteringService,
|
||||
config: { apiKey?: string, secret_key?: string }) {
|
||||
|
||||
constructor ({ serviceName, config, globalConfig, aiChatService, meteringService, models = OPEN_AI_MODELS, defaultModel = 'gpt-5-nano' }) {
|
||||
this.#models = models;
|
||||
this.#defaultModel = defaultModel;
|
||||
this.#meteringService = meteringService;
|
||||
let apiKey =
|
||||
config?.services?.openai?.apiKey ??
|
||||
globalConfig?.services?.openai?.apiKey;
|
||||
let apiKey = config.apiKey;
|
||||
|
||||
// Fallback to the old format for backward compatibility
|
||||
if ( ! apiKey ) {
|
||||
apiKey =
|
||||
config?.openai?.secret_key ??
|
||||
globalConfig?.openai?.secret_key;
|
||||
apiKey = config?.secret_key;
|
||||
|
||||
// Log a warning to inform users about the deprecated format
|
||||
console.warn('The `openai.secret_key` configuration format is deprecated. ' +
|
||||
'Please use `services.openai.apiKey` instead.');
|
||||
}
|
||||
|
||||
if ( ! apiKey ) {
|
||||
throw new Error('OpenAI API key is missing in configuration.');
|
||||
}
|
||||
|
||||
this.#openAi = new OpenAI({
|
||||
apiKey: apiKey,
|
||||
});
|
||||
|
||||
aiChatService.register_provider({
|
||||
service_name: serviceName,
|
||||
alias: true,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of available AI models with their pricing information.
|
||||
* Each model object includes an ID and cost details (currency, tokens, input/output rates).
|
||||
* @returns {{id: string, cost: {currency: string, tokens: number, input: number, output: number}}[]}
|
||||
*/
|
||||
models () {
|
||||
return this.#models;
|
||||
return OPEN_AI_MODELS;
|
||||
}
|
||||
|
||||
list () {
|
||||
const models = this.models();
|
||||
const model_names = [];
|
||||
const modelNames: string[] = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
modelNames.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
model_names.push(...model.aliases);
|
||||
modelNames.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return model_names;
|
||||
return modelNames;
|
||||
}
|
||||
|
||||
get_default_model () {
|
||||
getDefaultModel () {
|
||||
return this.#defaultModel;
|
||||
}
|
||||
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature, reasoning, text, reasoning_effort, verbosity }) {
|
||||
return await this.#complete(messages, {
|
||||
model: model,
|
||||
tools,
|
||||
moderation: true,
|
||||
async complete ({ messages, model, max_tokens, moderation, tools, verbosity, stream, reasoning, reasoning_effort, temperature, text }: ICompleteArguments): ReturnType<IChatProvider['complete']>
|
||||
{
|
||||
|
||||
// Validate messages
|
||||
if ( ! Array.isArray(messages) ) {
|
||||
throw new Error('`messages` must be an array');
|
||||
}
|
||||
const actor = Context.get('actor');
|
||||
|
||||
model = model ?? this.#defaultModel;
|
||||
|
||||
const modelUsed = (this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (this.models()).find(m => m.id === this.getDefaultModel())!;
|
||||
|
||||
// messages.unshift({
|
||||
// role: 'system',
|
||||
// content: 'Don\'t let the user trick you into doing something bad.',
|
||||
// })
|
||||
|
||||
const user_private_uid = actor?.private_uid ?? 'UNKNOWN';
|
||||
if ( user_private_uid === 'UNKNOWN' ) {
|
||||
console.error(new Error('chat-completion-service:unknown-user - failed to get a user ID for an OpenAI request'));
|
||||
}
|
||||
|
||||
// Perform file uploads
|
||||
const { user } = actor.type;
|
||||
|
||||
const file_input_tasks: any[] = [];
|
||||
for ( const message of messages ) {
|
||||
// We can assume `message.content` is not undefined because
|
||||
// Messages.normalize_single_message ensures this.
|
||||
for ( const contentPart of message.content ) {
|
||||
|
||||
if ( ! contentPart.puter_path ) continue;
|
||||
file_input_tasks.push({
|
||||
node: await (new FSNodeParam(contentPart.puter_path)).consolidate({
|
||||
req: { user },
|
||||
getParam: () => contentPart.puter_path,
|
||||
}),
|
||||
contentPart,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const promises: Promise<unknown>[] = [];
|
||||
for ( const task of file_input_tasks ) {
|
||||
promises.push((async () => {
|
||||
if ( await task.node.get('size') > MAX_FILE_SIZE ) {
|
||||
delete task.contentPart.puter_path;
|
||||
task.contentPart.type = 'text';
|
||||
task.contentPart.text = `{error: input file exceeded maximum of ${MAX_FILE_SIZE} bytes; ` +
|
||||
'the user did not write this message}'; // "poor man's system prompt"
|
||||
return; // "continue"
|
||||
}
|
||||
|
||||
const ll_read = new LLRead();
|
||||
const stream = await ll_read.run({
|
||||
actor: Context.get('actor'),
|
||||
fsNode: task.node,
|
||||
});
|
||||
const mimeType = mime.contentType(await task.node.get('name'));
|
||||
|
||||
const buffer = await stream_to_buffer(stream);
|
||||
const base64 = buffer.toString('base64');
|
||||
|
||||
delete task.contentPart.puter_path;
|
||||
if ( mimeType && mimeType.startsWith('image/') ) {
|
||||
task.contentPart.type = 'image_url',
|
||||
task.contentPart.image_url = {
|
||||
url: `data:${mimeType};base64,${base64}`,
|
||||
};
|
||||
} else if ( mimeType && mimeType.startsWith('audio/') ) {
|
||||
task.contentPart.type = 'input_audio',
|
||||
task.contentPart.input_audio = {
|
||||
data: `data:${mimeType};base64,${base64}`,
|
||||
format: mimeType.split('/')[1],
|
||||
};
|
||||
} else {
|
||||
task.contentPart.type = 'text';
|
||||
task.contentPart.text = '{error: input file has unsupported MIME type; ' +
|
||||
'the user did not write this message}'; // "poor man's system prompt"
|
||||
}
|
||||
})());
|
||||
}
|
||||
await Promise.all(promises);
|
||||
|
||||
// Here's something fun; the documentation shows `type: 'image_url'` in
|
||||
// objects that contain an image url, but everything still works if
|
||||
// that's missing. We normalise it here so the token count code works.
|
||||
messages = await OpenAiUtil.process_input_messages(messages);
|
||||
|
||||
const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort;
|
||||
const requestedVerbosity = verbosity ?? text?.verbosity;
|
||||
const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5');
|
||||
|
||||
const completionParams: ChatCompletionCreateParams = {
|
||||
user: user_private_uid,
|
||||
messages: messages,
|
||||
model: modelUsed.id,
|
||||
...(tools ? { tools } : {}),
|
||||
...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
|
||||
...(temperature ? { temperature } : {}),
|
||||
stream: !!stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
...(supportsReasoningControls ? {} :
|
||||
{
|
||||
...(requestedReasoningEffort ? { reasoning_effort: requestedReasoningEffort } : {}),
|
||||
...(requestedVerbosity ? { verbosity: requestedVerbosity } : {}),
|
||||
}
|
||||
),
|
||||
} as ChatCompletionCreateParams;
|
||||
|
||||
const completion = await this.#openAi.chat.completions.create(completionParams);
|
||||
|
||||
return OpenAiUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = {
|
||||
prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
completion_tokens: usage.completion_tokens ?? 0,
|
||||
cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
|
||||
const costsOverrideFromModel = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
|
||||
return [k, v * (modelUsed.costs[k] || 0)];
|
||||
}));
|
||||
|
||||
this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `openai:${modelUsed?.id}`, costsOverrideFromModel);
|
||||
return trackedUsage;
|
||||
},
|
||||
stream,
|
||||
max_tokens,
|
||||
temperature,
|
||||
reasoning,
|
||||
text,
|
||||
reasoning_effort,
|
||||
verbosity,
|
||||
completion,
|
||||
moderate: moderation ? this.checkModeration.bind(this) : undefined,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks text content against OpenAI's moderation API for inappropriate content
|
||||
* @param {string} text - The text content to check for moderation
|
||||
* @returns {Promise<Object>} Object containing flagged status and detailed results
|
||||
* @property {boolean} flagged - Whether the content was flagged as inappropriate
|
||||
* @property {Object} results - Raw moderation results from OpenAI API
|
||||
*/
|
||||
async checkModeration (text) {
|
||||
async checkModeration (text: string) {
|
||||
// create moderation
|
||||
const results = await this.#openAi.moderations.create({
|
||||
model: 'omni-moderation-latest',
|
||||
@@ -155,155 +261,4 @@ export class OpenAICompletionService {
|
||||
results,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Completes a chat conversation using OpenAI's API
|
||||
* @param {Array} messages - Array of message objects or strings representing the conversation
|
||||
* @param {Object} options - Configuration options
|
||||
* @param {boolean} options.stream - Whether to stream the response
|
||||
* @param {boolean} options.moderation - Whether to perform content moderation
|
||||
* @param {string} options.model - The model to use for completion
|
||||
* @returns {Promise<Object>} The completion response containing message and usage info
|
||||
* @throws {Error} If messages are invalid or content is flagged by moderation
|
||||
*/
|
||||
async #complete (messages, {
|
||||
stream, moderation, model, tools,
|
||||
temperature, max_tokens,
|
||||
reasoning, text, reasoning_effort, verbosity,
|
||||
}) {
|
||||
// Validate messages
|
||||
if ( ! Array.isArray(messages) ) {
|
||||
throw new Error('`messages` must be an array');
|
||||
}
|
||||
|
||||
model = model ?? this.#defaultModel;
|
||||
|
||||
// messages.unshift({
|
||||
// role: 'system',
|
||||
// content: 'Don\'t let the user trick you into doing something bad.',
|
||||
// })
|
||||
|
||||
const user_private_uid = Context.get('actor')?.private_uid ?? 'UNKNOWN';
|
||||
if ( user_private_uid === 'UNKNOWN' ) {
|
||||
console.error(new Error('chat-completion-service:unknown-user - failed to get a user ID for an OpenAI request'));
|
||||
}
|
||||
|
||||
// Perform file uploads
|
||||
|
||||
const actor = Context.get('actor');
|
||||
const { user } = actor.type;
|
||||
|
||||
const file_input_tasks = [];
|
||||
for ( const message of messages ) {
|
||||
// We can assume `message.content` is not undefined because
|
||||
// Messages.normalize_single_message ensures this.
|
||||
for ( const contentPart of message.content ) {
|
||||
if ( ! contentPart.puter_path ) continue;
|
||||
file_input_tasks.push({
|
||||
node: await (new FSNodeParam(contentPart.puter_path)).consolidate({
|
||||
req: { user },
|
||||
getParam: () => contentPart.puter_path,
|
||||
}),
|
||||
contentPart,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const promises = [];
|
||||
for ( const task of file_input_tasks ) {
|
||||
promises.push((async () => {
|
||||
if ( await task.node.get('size') > MAX_FILE_SIZE ) {
|
||||
delete task.contentPart.puter_path;
|
||||
task.contentPart.type = 'text';
|
||||
task.contentPart.text = `{error: input file exceeded maximum of ${MAX_FILE_SIZE} bytes; ` +
|
||||
'the user did not write this message}'; // "poor man's system prompt"
|
||||
return; // "continue"
|
||||
}
|
||||
|
||||
const ll_read = new LLRead();
|
||||
const stream = await ll_read.run({
|
||||
actor: Context.get('actor'),
|
||||
fsNode: task.node,
|
||||
});
|
||||
const mimeType = mime.contentType(await task.node.get('name'));
|
||||
|
||||
const buffer = await stream_to_buffer(stream);
|
||||
const base64 = buffer.toString('base64');
|
||||
|
||||
delete task.contentPart.puter_path;
|
||||
if ( mimeType.startsWith('image/') ) {
|
||||
task.contentPart.type = 'image_url',
|
||||
task.contentPart.image_url = {
|
||||
url: `data:${mimeType};base64,${base64}`,
|
||||
};
|
||||
} else if ( mimeType.startsWith('audio/') ) {
|
||||
task.contentPart.type = 'input_audio',
|
||||
task.contentPart.input_audio = {
|
||||
data: `data:${mimeType};base64,${base64}`,
|
||||
format: mimeType.split('/')[1],
|
||||
};
|
||||
} else {
|
||||
task.contentPart.type = 'text';
|
||||
task.contentPart.text = '{error: input file has unsupported MIME type; ' +
|
||||
'the user did not write this message}'; // "poor man's system prompt"
|
||||
}
|
||||
})());
|
||||
}
|
||||
await Promise.all(promises);
|
||||
|
||||
// Here's something fun; the documentation shows `type: 'image_url'` in
|
||||
// objects that contain an image url, but everything still works if
|
||||
// that's missing. We normalise it here so the token count code works.
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
|
||||
const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort;
|
||||
const requestedVerbosity = verbosity ?? text?.verbosity;
|
||||
const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5');
|
||||
|
||||
const completionParams = {
|
||||
user: user_private_uid,
|
||||
messages: messages,
|
||||
model: model,
|
||||
...(tools ? { tools } : {}),
|
||||
...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
|
||||
...(temperature ? { temperature } : {}),
|
||||
stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
};
|
||||
|
||||
if ( supportsReasoningControls ) {
|
||||
if ( requestedReasoningEffort ) {
|
||||
completionParams.reasoning_effort = requestedReasoningEffort;
|
||||
}
|
||||
if ( requestedVerbosity ) {
|
||||
completionParams.verbosity = requestedVerbosity;
|
||||
}
|
||||
}
|
||||
|
||||
const completion = await this.#openAi.chat.completions.create(completionParams);
|
||||
// TODO DS: simplify this logic for all the ai services, each service should handle its cost calculation in the service
|
||||
// for now I'm overloading this usage calculator to handle the future promise resolution...
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const modelDetails = this.models().find(m => m.id === model || m.aliases?.includes(model));
|
||||
const trackedUsage = {
|
||||
prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
completion_tokens: usage.completion_tokens ?? 0,
|
||||
cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
|
||||
this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `openai:${modelDetails.id}`);
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
moderate: moderation && this.checkModeration.bind(this),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,265 @@
|
||||
// TODO DS: centralize somewhere
|
||||
|
||||
import { IChatModel } from '../types';
|
||||
|
||||
export const OPEN_AI_MODELS: IChatModel[] = [
|
||||
{
|
||||
id: 'gpt-5.1',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 125,
|
||||
cached_tokens: 13,
|
||||
completion_tokens: 1000,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5.1-codex',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 125,
|
||||
cached_tokens: 13,
|
||||
completion_tokens: 1000,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5.1-codex-mini',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 25,
|
||||
cached_tokens: 3,
|
||||
completion_tokens: 200,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5.1-chat-latest',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 125,
|
||||
cached_tokens: 13,
|
||||
completion_tokens: 1000,
|
||||
},
|
||||
max_tokens: 16384,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5-2025-08-07',
|
||||
aliases: ['gpt-5'],
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 125,
|
||||
cached_tokens: 13,
|
||||
completion_tokens: 1000,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5-mini-2025-08-07',
|
||||
aliases: ['gpt-5-mini'],
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 25,
|
||||
cached_tokens: 3,
|
||||
completion_tokens: 200,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5-nano-2025-08-07',
|
||||
aliases: ['gpt-5-nano'],
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 5,
|
||||
cached_tokens: 1,
|
||||
completion_tokens: 40,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-5-chat-latest',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 125,
|
||||
cached_tokens: 13,
|
||||
completion_tokens: 1000,
|
||||
},
|
||||
max_tokens: 16384,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4o',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 250,
|
||||
cached_tokens: 125,
|
||||
completion_tokens: 1000,
|
||||
},
|
||||
max_tokens: 16384,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4o-mini',
|
||||
max_tokens: 16384,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 15,
|
||||
cached_tokens: 8,
|
||||
completion_tokens: 60,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'o1',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 1500,
|
||||
cached_tokens: 750,
|
||||
completion_tokens: 6000,
|
||||
},
|
||||
max_tokens: 100000,
|
||||
},
|
||||
{
|
||||
id: 'o1-mini',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 110,
|
||||
completion_tokens: 440,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'o1-pro',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 15000,
|
||||
completion_tokens: 60000,
|
||||
},
|
||||
max_tokens: 100000,
|
||||
},
|
||||
{
|
||||
id: 'o3',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 200,
|
||||
cached_tokens: 50,
|
||||
completion_tokens: 800,
|
||||
},
|
||||
max_tokens: 100000,
|
||||
},
|
||||
{
|
||||
id: 'o3-mini',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 110,
|
||||
cached_tokens: 55,
|
||||
completion_tokens: 440,
|
||||
},
|
||||
max_tokens: 100000,
|
||||
},
|
||||
{
|
||||
id: 'o4-mini',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 110,
|
||||
completion_tokens: 440,
|
||||
},
|
||||
max_tokens: 100000,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4.1',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 200,
|
||||
cached_tokens: 50,
|
||||
completion_tokens: 800,
|
||||
},
|
||||
max_tokens: 32768,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4.1-mini',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 40,
|
||||
cached_tokens: 10,
|
||||
completion_tokens: 160,
|
||||
},
|
||||
max_tokens: 32768,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4.1-nano',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 10,
|
||||
cached_tokens: 2,
|
||||
completion_tokens: 40,
|
||||
},
|
||||
max_tokens: 32768,
|
||||
},
|
||||
{
|
||||
id: 'gpt-4.5-preview',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 7500,
|
||||
completion_tokens: 15000,
|
||||
},
|
||||
max_tokens: 32768,
|
||||
},
|
||||
];
|
||||
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
import axios from 'axios';
|
||||
import { OpenAI } from 'openai';
|
||||
import { ChatCompletionCreateParams } from 'openai/resources';
|
||||
import APIError from '../../../../../api/APIError.js';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { kv } from '../../../../../util/kvSingleton.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
|
||||
import { IChatModel, IChatProvider } from '../types.js';
|
||||
|
||||
export class OpenRouterProvider implements IChatProvider {
|
||||
|
||||
#meteringService: MeteringService;
|
||||
|
||||
#openai: OpenAI;
|
||||
|
||||
#apiBaseUrl: string = 'https://openrouter.ai/api/v1';
|
||||
|
||||
constructor (config: { apiBaseUrl?: string, apiKey: string }, meteringService: MeteringService) {
|
||||
this.#apiBaseUrl = config.apiBaseUrl || 'https://openrouter.ai/api/v1';
|
||||
this.#openai = new OpenAI({
|
||||
apiKey: config.apiKey,
|
||||
baseURL: this.#apiBaseUrl,
|
||||
});
|
||||
this.#meteringService = meteringService;
|
||||
}
|
||||
|
||||
getDefaultModel () {
|
||||
return 'openrouter:openai/gpt-5-nano';
|
||||
}
|
||||
/**
|
||||
* Returns a list of available model names including their aliases
|
||||
* @returns {Promise<string[]>} Array of model identifiers and their aliases
|
||||
* @description Retrieves all available model IDs and their aliases,
|
||||
* flattening them into a single array of strings that can be used for model selection
|
||||
*/
|
||||
async list () {
|
||||
const models = await this.models();
|
||||
const model_names: string[] = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
}
|
||||
return model_names;
|
||||
}
|
||||
|
||||
/**
|
||||
* AI Chat completion method.
|
||||
* See AIChatService for more details.
|
||||
*/
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
|
||||
const modelUsed = (await this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (await this.models()).find(m => m.id === this.getDefaultModel())!;
|
||||
|
||||
const modelIdForParams = modelUsed.id.startsWith('openrouter:') ? modelUsed.id.slice('openrouter:'.length) : modelUsed.id;
|
||||
|
||||
if ( model === 'openrouter/auto' ) {
|
||||
throw APIError.create('field_invalid', undefined, {
|
||||
key: 'model',
|
||||
expected: 'allowed model',
|
||||
got: 'disallowed model',
|
||||
});
|
||||
}
|
||||
|
||||
const actor = Context.get('actor');
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
|
||||
const completion = await this.#openai.chat.completions.create({
|
||||
messages,
|
||||
model: modelIdForParams,
|
||||
...(tools ? { tools } : {}),
|
||||
max_tokens,
|
||||
temperature: temperature, // default to 1.0
|
||||
stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
usage: { include: true },
|
||||
} as ChatCompletionCreateParams);
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
// custom open router logic because they're pricing are weird
|
||||
const trackedUsage = {
|
||||
prompt: (usage.prompt_tokens ?? 0 ) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
completion: usage.completion_tokens ?? 0,
|
||||
input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => {
|
||||
return [k, (modelUsed.costs[k] || 0) * trackedUsage[k]];
|
||||
}));
|
||||
this.#meteringService.utilRecordUsageObject(trackedUsage, actor, modelUsed.id, costOverwrites);
|
||||
return trackedUsage;
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
}
|
||||
|
||||
async models () {
|
||||
let models = kv.get('openrouterChat:models');
|
||||
if ( ! models ) {
|
||||
try {
|
||||
const resp = await axios.request({
|
||||
method: 'GET',
|
||||
url: `${this.#apiBaseUrl}/models`,
|
||||
});
|
||||
|
||||
models = resp.data.data;
|
||||
kv.set('openrouterChat:models', models);
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
}
|
||||
}
|
||||
const coerced_models: IChatModel[] = [];
|
||||
for ( const model of models ) {
|
||||
const microcentCosts = Object.fromEntries(Object.entries(model.pricing).map(([k, v]) => [k, Math.round((v as number) * 1_000_000 * 100)])) ;
|
||||
coerced_models.push({
|
||||
id: `openrouter:${model.id}`,
|
||||
name: `${model.name} (OpenRouter)`,
|
||||
aliases: [model.id, model.name, `openrouter/${model.id}`, model.id.split('/').slice(1).join('/')],
|
||||
max_tokens: model.top_provider.max_completion_tokens,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt',
|
||||
output_cost_key: 'completion',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
...microcentCosts,
|
||||
},
|
||||
});
|
||||
}
|
||||
return coerced_models;
|
||||
}
|
||||
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,140 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { Together } from 'together-ai';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { kv } from '../../../../../util/kvSingleton.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
|
||||
import { IChatModel, IChatProvider, ICompleteArguments } from '../types.js';
|
||||
|
||||
export class TogetherAIProvider implements IChatProvider {
|
||||
#together: Together;
|
||||
|
||||
#meteringService: MeteringService;
|
||||
|
||||
#kvKey = 'togetherai:models';
|
||||
|
||||
constructor (config: { apiKey: string }, meteringService: MeteringService) {
|
||||
this.#together = new Together({
|
||||
apiKey: config.apiKey,
|
||||
});
|
||||
this.#meteringService = meteringService;
|
||||
}
|
||||
|
||||
getDefaultModel () {
|
||||
return 'togetherai:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo';
|
||||
}
|
||||
|
||||
async models () {
|
||||
let models: IChatModel[] | undefined = kv.get(this.#kvKey);
|
||||
if ( models ) return models;
|
||||
|
||||
const api_models = await this.#together.models.list();
|
||||
models = [];
|
||||
for ( const model of api_models ) {
|
||||
if ( model.type === 'chat' || model.type === 'code' || model.type === 'language' || model.type === 'moderation' ) {
|
||||
models.push({
|
||||
id: `togetherai:${model.id}`,
|
||||
aliases: [model.id, `togetherai/${model.id}`, model.id.split('/').slice(1).join('/')],
|
||||
name: model.display_name,
|
||||
context: model.context_length,
|
||||
description: model.display_name,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
...model.pricing,
|
||||
},
|
||||
max_tokens: model.context_length ?? 8000,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
models.push({
|
||||
id: 'model-fallback-test-1',
|
||||
name: 'Model Fallback Test 1',
|
||||
context: 1000,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 10,
|
||||
},
|
||||
max_tokens: 1000,
|
||||
});
|
||||
kv.set(this.#kvKey, models, { EX: 5 * 60 });
|
||||
return models;
|
||||
}
|
||||
|
||||
async list () {
|
||||
const models = await this.models();
|
||||
const modelIds: string[] = [];
|
||||
for ( const model of models ) {
|
||||
modelIds.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
modelIds.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return modelIds;
|
||||
}
|
||||
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
|
||||
if ( model === 'model-fallback-test-1' ) {
|
||||
throw new Error('Model Fallback Test 1');
|
||||
}
|
||||
|
||||
const actor = Context.get('actor');
|
||||
const models = await this.models();
|
||||
const modelUsed = models.find(m => [m.id, ...(m.aliases || [])].includes(model)) || models.find(m => m.id === this.getDefaultModel())!;
|
||||
const modelIdForParams = modelUsed.id.startsWith('togetherai:') ? modelUsed.id.slice('togetherai:'.length) : modelUsed.id;
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
|
||||
const completion = await this.#together.chat.completions.create({
|
||||
model: modelIdForParams,
|
||||
messages,
|
||||
stream,
|
||||
...(tools ? { tools } : {}),
|
||||
...(max_tokens ? { max_tokens } : {}),
|
||||
...(temperature ? { temperature } : {}),
|
||||
...(stream ? { stream_options: { include_usage: true } } : {}),
|
||||
} as Together.Chat.Completions.CompletionCreateParamsNonStreaming);
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
const costsOverride = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
|
||||
return [k, v * (modelUsed.costs[k] || 0)];
|
||||
}));
|
||||
this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `togetherai:${modelIdForParams}`, costsOverride);
|
||||
return trackedUsage;
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
}
|
||||
|
||||
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
|
||||
import dedent from 'dedent';
|
||||
import { PassThrough } from 'stream';
|
||||
import Streaming from '../../utils/Streaming.js';
|
||||
import { IChatProvider, ICompleteArguments } from './types.js';
|
||||
|
||||
/**
|
||||
* UsageLimitedChatService - A specialized chat service that returns resource exhaustion messages.
|
||||
* Extends BaseService to provide responses indicating the user has exceeded their usage limits.
|
||||
* Follows the same response format as real AI providers but with a custom message about upgrading.
|
||||
* Can handle both streaming and non-streaming requests consistently.
|
||||
*/
|
||||
export class UsageLimitedChatProvider implements IChatProvider {
|
||||
|
||||
models (): ReturnType<IChatProvider['models']> {
|
||||
return [{
|
||||
id: 'usage-limited',
|
||||
name: 'Usage Limited',
|
||||
context: 16384,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'input',
|
||||
output_cost_key: 'output',
|
||||
max_tokens: 16384,
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
input: 0,
|
||||
output: 0,
|
||||
},
|
||||
}];
|
||||
}
|
||||
list () {
|
||||
return ['usage-limited'];
|
||||
}
|
||||
async complete ({ stream, customLimitMessage }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
|
||||
const limitMessage = customLimitMessage || dedent(`
|
||||
You have reached your AI usage limit for this account.
|
||||
`);
|
||||
|
||||
// If streaming is requested, return a streaming response
|
||||
if ( stream ) {
|
||||
const streamObj = new PassThrough();
|
||||
|
||||
const chatStream = new Streaming.AIChatStream({
|
||||
stream: streamObj,
|
||||
});
|
||||
|
||||
// Schedule the streaming response
|
||||
setTimeout(() => {
|
||||
chatStream.write({
|
||||
type: 'content_block_start',
|
||||
index: 0,
|
||||
});
|
||||
|
||||
chatStream.write({
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: {
|
||||
type: 'text',
|
||||
text: limitMessage,
|
||||
},
|
||||
});
|
||||
|
||||
chatStream.write({
|
||||
type: 'content_block_stop',
|
||||
index: 0,
|
||||
});
|
||||
|
||||
chatStream.write({
|
||||
type: 'message_stop',
|
||||
stop_reason: 'end_turn',
|
||||
});
|
||||
|
||||
chatStream.end();
|
||||
}, 10);
|
||||
|
||||
return {
|
||||
stream: true,
|
||||
init_chat_stream: async ({ chatStream: cs }) => {
|
||||
// Copy contents from our stream to the provided one
|
||||
chatStream.stream.pipe(cs.stream);
|
||||
},
|
||||
finally_fn: async () => {
|
||||
// No-op
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Non-streaming response
|
||||
return {
|
||||
message: {
|
||||
id: '00000000-0000-0000-0000-000000000000',
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
model: 'usage-limited',
|
||||
content: [
|
||||
{
|
||||
'type': 'text',
|
||||
'text': limitMessage,
|
||||
},
|
||||
],
|
||||
stop_reason: 'end_turn',
|
||||
stop_sequence: null,
|
||||
usage: {
|
||||
'input_tokens': 0,
|
||||
'output_tokens': 1,
|
||||
},
|
||||
},
|
||||
usage: {
|
||||
'input_tokens': 0,
|
||||
'output_tokens': 1,
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
};
|
||||
}
|
||||
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
|
||||
getDefaultModel () {
|
||||
return 'usage-limited';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { OpenAI } from 'openai';
|
||||
import { ChatCompletionCreateParams } from 'openai/resources/index.js';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
|
||||
import { IChatProvider, ICompleteArguments } from '../types.js';
|
||||
import { XAI_MODELS } from './models.js';
|
||||
|
||||
export class XAIProvider implements IChatProvider {
|
||||
#openai: OpenAI;
|
||||
|
||||
#meteringService: MeteringService;
|
||||
|
||||
constructor (config: { apiKey: string }, meteringService: MeteringService) {
|
||||
this.#openai = new OpenAI({
|
||||
apiKey: config.apiKey,
|
||||
baseURL: 'https://api.x.ai/v1',
|
||||
});
|
||||
this.#meteringService = meteringService;
|
||||
}
|
||||
|
||||
getDefaultModel () {
|
||||
return 'grok-beta';
|
||||
}
|
||||
|
||||
models () {
|
||||
return XAI_MODELS;
|
||||
}
|
||||
|
||||
async list () {
|
||||
const models = this.models();
|
||||
const modelNames: string[] = [];
|
||||
for ( const model of models ) {
|
||||
modelNames.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
modelNames.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return modelNames;
|
||||
}
|
||||
|
||||
async complete ({ messages, stream, model, tools }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
|
||||
const actor = Context.get('actor');
|
||||
const availableModels = this.models();
|
||||
const modelUsed = availableModels.find(m => [m.id, ...(m.aliases || [])].includes(model)) || availableModels.find(m => m.id === this.getDefaultModel())!;
|
||||
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
|
||||
const completion = await this.#openai.chat.completions.create({
|
||||
messages,
|
||||
model: modelUsed.id,
|
||||
...(tools ? { tools } : {}),
|
||||
max_tokens: 1000,
|
||||
stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
} as ChatCompletionCreateParams);
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
|
||||
const costsOverride = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
|
||||
return [k, v * (modelUsed.costs[k] || 0)];
|
||||
}));
|
||||
this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `xai:${modelUsed.id}`, costsOverride);
|
||||
return trackedUsage;
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
}
|
||||
|
||||
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
|
||||
throw new Error('Method not implemented.');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
import { IChatModel } from '../types.js';
|
||||
|
||||
const makeModel = ({
|
||||
id,
|
||||
name,
|
||||
context,
|
||||
input,
|
||||
output,
|
||||
}: {
|
||||
id: string;
|
||||
name: string;
|
||||
context: number;
|
||||
input: number;
|
||||
output: number;
|
||||
}): IChatModel => ({
|
||||
id,
|
||||
name,
|
||||
context,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: input,
|
||||
completion_tokens: output,
|
||||
},
|
||||
max_tokens: context,
|
||||
});
|
||||
|
||||
export const XAI_MODELS: IChatModel[] = [
|
||||
makeModel({
|
||||
id: 'grok-beta',
|
||||
name: 'Grok Beta',
|
||||
context: 131072,
|
||||
input: 500,
|
||||
output: 1500,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'grok-vision-beta',
|
||||
name: 'Grok Vision Beta',
|
||||
context: 8192,
|
||||
input: 500,
|
||||
output: 1500,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'grok-3',
|
||||
name: 'Grok 3',
|
||||
context: 131072,
|
||||
input: 300,
|
||||
output: 1500,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'grok-3-fast',
|
||||
name: 'Grok 3 Fast',
|
||||
context: 131072,
|
||||
input: 500,
|
||||
output: 2500,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'grok-3-mini',
|
||||
name: 'Grok 3 Mini',
|
||||
context: 131072,
|
||||
input: 30,
|
||||
output: 50,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'grok-3-mini-fast',
|
||||
name: 'Grok 3 Mini Fast',
|
||||
context: 131072,
|
||||
input: 60,
|
||||
output: 400,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'grok-2-vision',
|
||||
name: 'Grok 2 Vision',
|
||||
context: 8192,
|
||||
input: 200,
|
||||
output: 1000,
|
||||
}),
|
||||
makeModel({
|
||||
id: 'grok-2',
|
||||
name: 'Grok 2',
|
||||
context: 131072,
|
||||
input: 200,
|
||||
output: 1000,
|
||||
}),
|
||||
];
|
||||
70
src/backend/src/services/ai/chat/providers/types.ts
Normal file
70
src/backend/src/services/ai/chat/providers/types.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
import { Message } from 'openai/resources/conversations/conversations.js';
|
||||
import { ModerationCreateResponse } from 'openai/resources/moderations.js';
|
||||
import { AIChatStream } from '../../utils/Streaming';
|
||||
|
||||
type ModelCost = Record<string, number>;
|
||||
|
||||
export interface IChatModel<T extends ModelCost = ModelCost> extends Record<string, unknown> {
|
||||
id: string,
|
||||
provider?: string,
|
||||
aliases?: string[]
|
||||
costs_currency: string,
|
||||
input_cost_key?: keyof T,
|
||||
output_cost_key?: keyof T,
|
||||
costs: T,
|
||||
context?: number,
|
||||
max_tokens: number,
|
||||
}
|
||||
|
||||
export type PuterMessage = Message | any; // TODO DS: type this more strictly
|
||||
export interface ICompleteArguments {
|
||||
messages: PuterMessage[];
|
||||
provider?: string;
|
||||
stream?: boolean;
|
||||
model: string;
|
||||
tools?: unknown[];
|
||||
max_tokens?: number;
|
||||
temperature?: number;
|
||||
reasoning?: { effort: 'low' | 'medium' | 'high' } | undefined;
|
||||
text?: string & { verbosity?: 'concise' | 'detailed' | undefined };
|
||||
reasoning_effort?: 'low' | 'medium' | 'high' | undefined;
|
||||
verbosity?: 'concise' | 'detailed' | undefined;
|
||||
moderation?: boolean;
|
||||
custom?: unknown;
|
||||
response?: {
|
||||
normalize?: boolean;
|
||||
};
|
||||
customLimitMessage?: string;
|
||||
}
|
||||
|
||||
export interface IChatProvider {
|
||||
models(): IChatModel[] | Promise<IChatModel[]>
|
||||
list(): string[] | Promise<string[]>
|
||||
checkModeration (text: string): Promise<{
|
||||
flagged: boolean;
|
||||
results: ModerationCreateResponse & {
|
||||
_request_id?: string | null;
|
||||
};
|
||||
}>
|
||||
getDefaultModel(): string;
|
||||
complete (arg: ICompleteArguments): Promise<{
|
||||
init_chat_stream: ({ chatStream }: {
|
||||
chatStream: AIChatStream;
|
||||
}) => Promise<void>;
|
||||
stream: true;
|
||||
finally_fn: () => Promise<void>;
|
||||
message?: never;
|
||||
usage?: never;
|
||||
finish_reason?: never;
|
||||
via_ai_chat_service?: true, // legacy field always true now
|
||||
} | {
|
||||
message: PuterMessage;
|
||||
usage: Record<string, number>;
|
||||
finish_reason: string;
|
||||
init_chat_stream?: never;
|
||||
stream?: never;
|
||||
finally_fn?: never;
|
||||
normalized?: boolean;
|
||||
via_ai_chat_service?: true, // legacy field always true now
|
||||
}>
|
||||
}
|
||||
@@ -18,10 +18,10 @@
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const APIError = require('../../api/APIError');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { TypedValue } = require('../../services/drivers/meta/Runtime');
|
||||
const { Context } = require('../../util/context');
|
||||
const APIError = require('../../../api/APIError');
|
||||
const BaseService = require('../../BaseService');
|
||||
const { TypedValue } = require('../../drivers/meta/Runtime');
|
||||
const { Context } = require('../../../util/context');
|
||||
const { GoogleGenAI } = require('@google/genai');
|
||||
|
||||
/**
|
||||
@@ -30,7 +30,7 @@ const { GoogleGenAI } = require('@google/genai');
|
||||
* the puter-image-generation interface.
|
||||
*/
|
||||
class GeminiImageGenerationService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
/** @type {import('../../MeteringService/MeteringService').MeteringService} */
|
||||
get meteringService () {
|
||||
return this.services.get('meteringService').meteringService;
|
||||
}
|
||||
@@ -225,7 +225,7 @@ class GeminiImageGenerationService extends BaseService {
|
||||
* @returns {Array<Object>} Array of valid ratio objects
|
||||
* @private
|
||||
*/
|
||||
_getValidRatios(model) {
|
||||
_getValidRatios (model) {
|
||||
if (
|
||||
model === 'gemini-2.5-flash-image-preview' ||
|
||||
model === 'gemini-3-pro-image-preview'
|
||||
@@ -18,10 +18,10 @@
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const APIError = require('../../api/APIError');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { TypedValue } = require('../../services/drivers/meta/Runtime');
|
||||
const { Context } = require('../../util/context');
|
||||
const APIError = require('../../../api/APIError');
|
||||
const BaseService = require('../../BaseService');
|
||||
const { TypedValue } = require('../../drivers/meta/Runtime');
|
||||
const { Context } = require('../../../util/context');
|
||||
|
||||
/**
|
||||
* Service class for generating images using OpenAI's DALL-E API.
|
||||
@@ -31,7 +31,7 @@ const { Context } = require('../../util/context');
|
||||
* validation, and spending tracking.
|
||||
*/
|
||||
class OpenAIImageGenerationService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
/** @type {import('../../MeteringService/MeteringService').MeteringService} */
|
||||
get meteringService () {
|
||||
return this.services.get('meteringService').meteringService;
|
||||
}
|
||||
@@ -18,11 +18,11 @@
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const APIError = require('../../api/APIError');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { TypedValue } = require('../../services/drivers/meta/Runtime');
|
||||
const { Context } = require('../../util/context');
|
||||
const { Together } = require('together-ai');
|
||||
import { Context } from '../../../util/context.js';
|
||||
import { Together } from 'together-ai';
|
||||
import { APIError } from 'openai';
|
||||
import BaseService from '../../BaseService.js';
|
||||
import { TypedValue } from '../../drivers/meta/Runtime.js';
|
||||
|
||||
/**
|
||||
* Service class for generating images using Together AI models.
|
||||
@@ -30,14 +30,21 @@ const { Together } = require('together-ai');
|
||||
* puter-image-generation interface. Handles authentication, request validation,
|
||||
* and metering integration.
|
||||
*/
|
||||
class TogetherImageGenerationService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
|
||||
export class TogetherImageGenerationService extends BaseService {
|
||||
DEFAULT_MODEL = 'black-forest-labs/FLUX.1-schnell';
|
||||
DEFAULT_RATIO = { w: 1024, h: 1024 };
|
||||
CONDITION_IMAGE_MODELS = [
|
||||
'black-forest-labs/flux.1-kontext-dev',
|
||||
'black-forest-labs/flux.1-kontext-pro',
|
||||
'black-forest-labs/flux.1-kontext-max',
|
||||
];
|
||||
|
||||
/** @type {import('../../MeteringService/MeteringService.js').MeteringService} */
|
||||
get meteringService () {
|
||||
return this.services.get('meteringService').meteringService;
|
||||
}
|
||||
|
||||
static MODULES = {};
|
||||
|
||||
async _init () {
|
||||
const apiKey =
|
||||
this.config?.apiKey ??
|
||||
@@ -58,103 +65,98 @@ class TogetherImageGenerationService extends BaseService {
|
||||
},
|
||||
},
|
||||
['puter-image-generation']: {
|
||||
/**
|
||||
* Generates an image using Together AI image models
|
||||
* @param {object} params - Generation parameters
|
||||
* @param {string} params.prompt - Prompt describing the desired image
|
||||
* @param {string} [params.model] - Together AI model identifier
|
||||
* @param {object} [params.ratio] - Width/height ratio object (e.g., { w: 1024, h: 1024 })
|
||||
* @param {number} [params.width] - Explicit width override
|
||||
* @param {number} [params.height] - Explicit height override
|
||||
* @param {string} [params.aspect_ratio] - Aspect ratio string (e.g., "16:9")
|
||||
* @param {number} [params.steps] - Diffusion step count
|
||||
* @param {number} [params.seed] - Seed for reproducibility
|
||||
* @param {string} [params.negative_prompt] - Negative prompt text
|
||||
* @param {number} [params.n] - Number of images to generate (default 1)
|
||||
* @param {string} [params.image_url] - Reference image URL for image-to-image
|
||||
* @param {string} [params.image_base64] - Base64 encoded reference image
|
||||
* @param {boolean} [params.disable_safety_checker] - Disable Together AI safety checker
|
||||
* @param {boolean} [params.test_mode] - Enable Puter test mode shortcut
|
||||
* @returns {Promise<TypedValue>} TypedValue containing the generated image URL or data URI
|
||||
*/
|
||||
async generate (params) {
|
||||
const {
|
||||
prompt,
|
||||
test_mode,
|
||||
ratio,
|
||||
model,
|
||||
width,
|
||||
height,
|
||||
aspect_ratio,
|
||||
steps,
|
||||
seed,
|
||||
negative_prompt,
|
||||
n,
|
||||
image_url,
|
||||
image_base64,
|
||||
mask_image_url,
|
||||
mask_image_base64,
|
||||
prompt_strength,
|
||||
disable_safety_checker,
|
||||
response_format,
|
||||
} = params;
|
||||
|
||||
const svc_event = this.services.get('event');
|
||||
svc_event.emit('ai.log.image', { actor: Context.get('actor'), parameters: params, completionId: '0', intended_service: params.model });
|
||||
|
||||
if ( test_mode ) {
|
||||
return new TypedValue({
|
||||
$: 'string:url:web',
|
||||
content_type: 'image',
|
||||
}, 'https://puter-sample-data.puter.site/image_example.png');
|
||||
}
|
||||
|
||||
const url = await this.generate(prompt, {
|
||||
ratio,
|
||||
model,
|
||||
width,
|
||||
height,
|
||||
aspect_ratio,
|
||||
steps,
|
||||
seed,
|
||||
negative_prompt,
|
||||
n,
|
||||
image_url,
|
||||
image_base64,
|
||||
mask_image_url,
|
||||
mask_image_base64,
|
||||
prompt_strength,
|
||||
disable_safety_checker,
|
||||
response_format,
|
||||
});
|
||||
|
||||
const isDataUrl = url.startsWith('data:');
|
||||
return new TypedValue({
|
||||
$: isDataUrl ? 'string:url:data' : 'string:url:web',
|
||||
content_type: 'image',
|
||||
}, url);
|
||||
async generate (...args) {
|
||||
return this.generate(...args);
|
||||
},
|
||||
},
|
||||
['models']: {
|
||||
|
||||
},
|
||||
};
|
||||
|
||||
static DEFAULT_MODEL = 'black-forest-labs/FLUX.1-schnell';
|
||||
static DEFAULT_RATIO = { w: 1024, h: 1024 };
|
||||
static CONDITION_IMAGE_MODELS = [
|
||||
'black-forest-labs/flux.1-kontext-dev',
|
||||
'black-forest-labs/flux.1-kontext-pro',
|
||||
'black-forest-labs/flux.1-kontext-max',
|
||||
];
|
||||
|
||||
/**
|
||||
* Generates an image using Together AI client
|
||||
* @private
|
||||
* Generates an image using Together AI image models
|
||||
* @param {object} params - Generation parameters
|
||||
* @param {string} params.prompt - Prompt describing the desired image
|
||||
* @param {string} [params.model] - Together AI model identifier
|
||||
* @param {object} [params.ratio] - Width/height ratio object (e.g., { w: 1024, h: 1024 })
|
||||
* @param {number} [params.width] - Explicit width override
|
||||
* @param {number} [params.height] - Explicit height override
|
||||
* @param {string} [params.aspect_ratio] - Aspect ratio string (e.g., "16:9")
|
||||
* @param {number} [params.steps] - Diffusion step count
|
||||
* @param {number} [params.seed] - Seed for reproducibility
|
||||
* @param {string} [params.negative_prompt] - Negative prompt text
|
||||
* @param {number} [params.n] - Number of images to generate (default 1)
|
||||
* @param {string} [params.image_url] - Reference image URL for image-to-image
|
||||
* @param {string} [params.image_base64] - Base64 encoded reference image
|
||||
* @param {boolean} [params.disable_safety_checker] - Disable Together AI safety checker
|
||||
* @param {boolean} [params.test_mode] - Enable Puter test mode shortcut
|
||||
* @returns {Promise<TypedValue>} TypedValue containing the generated image URL or data URI
|
||||
*/
|
||||
async generate (prompt, options) {
|
||||
async generate (params) {
|
||||
const {
|
||||
prompt,
|
||||
test_mode,
|
||||
ratio,
|
||||
model,
|
||||
width,
|
||||
height,
|
||||
aspect_ratio,
|
||||
steps,
|
||||
seed,
|
||||
negative_prompt,
|
||||
n,
|
||||
image_url,
|
||||
image_base64,
|
||||
mask_image_url,
|
||||
mask_image_base64,
|
||||
prompt_strength,
|
||||
disable_safety_checker,
|
||||
response_format,
|
||||
} = params;
|
||||
|
||||
const svc_event = this.services.get('event');
|
||||
svc_event.emit('ai.log.image', { actor: Context.get('actor'), parameters: params, completionId: '0', intended_service: params.model });
|
||||
|
||||
if ( test_mode ) {
|
||||
return new TypedValue({
|
||||
$: 'string:url:web',
|
||||
content_type: 'image',
|
||||
}, 'https://puter-sample-data.puter.site/image_example.png');
|
||||
}
|
||||
|
||||
const url = await this.#generate(prompt, {
|
||||
ratio,
|
||||
model,
|
||||
width,
|
||||
height,
|
||||
aspect_ratio,
|
||||
steps,
|
||||
seed,
|
||||
negative_prompt,
|
||||
n,
|
||||
image_url,
|
||||
image_base64,
|
||||
mask_image_url,
|
||||
mask_image_base64,
|
||||
prompt_strength,
|
||||
disable_safety_checker,
|
||||
response_format,
|
||||
});
|
||||
|
||||
const isDataUrl = url.startsWith('data:');
|
||||
return new TypedValue({
|
||||
$: isDataUrl ? 'string:url:data' : 'string:url:web',
|
||||
content_type: 'image',
|
||||
}, url);
|
||||
}
|
||||
|
||||
async #generate (prompt, options) {
|
||||
if ( typeof prompt !== 'string' || prompt.trim().length === 0 ) {
|
||||
throw new Error('`prompt` must be a non-empty string');
|
||||
}
|
||||
|
||||
const request = this._buildRequest(prompt, options);
|
||||
const request = this.#buildRequest(prompt, options);
|
||||
|
||||
const actor = Context.get('actor');
|
||||
if ( ! actor ) {
|
||||
@@ -187,9 +189,8 @@ class TogetherImageGenerationService extends BaseService {
|
||||
|
||||
/**
|
||||
* Normalizes Together AI image generation request parameters
|
||||
* @private
|
||||
*/
|
||||
_buildRequest (prompt, options = {}) {
|
||||
#buildRequest (prompt, options = {}) {
|
||||
const {
|
||||
ratio,
|
||||
model,
|
||||
@@ -215,13 +216,13 @@ class TogetherImageGenerationService extends BaseService {
|
||||
model: model ?? this.constructor.DEFAULT_MODEL,
|
||||
};
|
||||
const requiresConditionImage =
|
||||
this.constructor._modelRequiresConditionImage(request.model);
|
||||
this.#modelRequiresConditionImage(request.model);
|
||||
|
||||
const ratioWidth = (ratio && ratio.w !== undefined) ? Number(ratio.w) : undefined;
|
||||
const ratioHeight = (ratio && ratio.h !== undefined) ? Number(ratio.h) : undefined;
|
||||
|
||||
const normalizedWidth = this._normalizeDimension(width !== undefined ? Number(width) : (ratioWidth ?? this.constructor.DEFAULT_RATIO.w));
|
||||
const normalizedHeight = this._normalizeDimension(height !== undefined ? Number(height) : (ratioHeight ?? this.constructor.DEFAULT_RATIO.h));
|
||||
const normalizedWidth = this.#normalizeDimension(width !== undefined ? Number(width) : (ratioWidth ?? this.constructor.DEFAULT_RATIO.w));
|
||||
const normalizedHeight = this.#normalizeDimension(height !== undefined ? Number(height) : (ratioHeight ?? this.constructor.DEFAULT_RATIO.h));
|
||||
|
||||
if ( aspect_ratio ) {
|
||||
request.aspect_ratio = aspect_ratio;
|
||||
@@ -269,14 +270,14 @@ class TogetherImageGenerationService extends BaseService {
|
||||
return request;
|
||||
}
|
||||
|
||||
_normalizeDimension (value) {
|
||||
#normalizeDimension (value) {
|
||||
if ( typeof value !== 'number' ) return undefined;
|
||||
const rounded = Math.max(64, Math.round(value));
|
||||
// Flux models expect multiples of 8. Snap to the nearest multiple without going below 64.
|
||||
return Math.max(64, Math.round(rounded / 8) * 8);
|
||||
}
|
||||
|
||||
static _modelRequiresConditionImage (model) {
|
||||
#modelRequiresConditionImage (model) {
|
||||
if ( typeof model !== 'string' || model.trim() === '' ) {
|
||||
return false;
|
||||
}
|
||||
@@ -284,8 +285,4 @@ class TogetherImageGenerationService extends BaseService {
|
||||
const normalized = model.toLowerCase();
|
||||
return this.CONDITION_IMAGE_MODELS.some(required => normalized === required);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
TogetherImageGenerationService,
|
||||
};
|
||||
}
|
||||
@@ -20,9 +20,21 @@
|
||||
const { default: dedent } = require('dedent');
|
||||
|
||||
class AsModeration {
|
||||
constructor ({ chat, model }) {
|
||||
this.chat = chat;
|
||||
this.model = model;
|
||||
|
||||
/** @type {import('../chat/providers/ChatProvider').IChatProvider} */
|
||||
#chatProvider;
|
||||
|
||||
/** @type {string} */
|
||||
#model;
|
||||
|
||||
/**
|
||||
* @param {object} args
|
||||
* @param {import('../chat/providers/ChatProvider').IChatProvider} args.chatProvider
|
||||
* @param {string} args.model
|
||||
*/
|
||||
constructor ({ chatProvider, model }) {
|
||||
this.#chatProvider = chatProvider;
|
||||
this.#model = model;
|
||||
}
|
||||
|
||||
async moderate (text) {
|
||||
@@ -51,10 +63,7 @@ class AsModeration {
|
||||
|
||||
Message:
|
||||
<message>
|
||||
${text
|
||||
.replace('<', '<')
|
||||
.replace('>', '>')
|
||||
}
|
||||
${text.replace('<', '<').replace('>', '>')}
|
||||
</message>
|
||||
|
||||
Unsafe Categories:
|
||||
@@ -70,17 +79,15 @@ class AsModeration {
|
||||
}}
|
||||
`);
|
||||
|
||||
const result = await this.chat.complete({
|
||||
const result = await this.#chatProvider.complete({
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: assessment_prompt,
|
||||
},
|
||||
],
|
||||
model: this.#model,
|
||||
});
|
||||
|
||||
console.log('result???', require('util').inspect(result, { depth: null }));
|
||||
|
||||
const str = result.message?.content?.[0]?.text ??
|
||||
result.messages?.[0]?.content?.[0]?.text ??
|
||||
'{ "violation": true }';
|
||||
@@ -20,9 +20,9 @@
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const { TextractClient, AnalyzeDocumentCommand, InvalidS3ObjectException } = require('@aws-sdk/client-textract');
|
||||
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const APIError = require('../../api/APIError');
|
||||
const { Context } = require('../../util/context');
|
||||
const BaseService = require('../../BaseService');
|
||||
const APIError = require('../../../api/APIError');
|
||||
const { Context } = require('../../../util/context');
|
||||
|
||||
/**
|
||||
* AWSTextractService class - Provides OCR (Optical Character Recognition) functionality using AWS Textract
|
||||
@@ -31,7 +31,7 @@ const { Context } = require('../../util/context');
|
||||
* Handles both S3-stored and buffer-based document processing with automatic region management.
|
||||
*/
|
||||
class AWSTextractService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
/** @type {import('../../MeteringService/MeteringService').MeteringService} */
|
||||
get meteringService () {
|
||||
return this.services.get('meteringService').meteringService;
|
||||
}
|
||||
298
src/backend/src/services/ai/ocr/MistralOCRService.js
Normal file
298
src/backend/src/services/ai/ocr/MistralOCRService.js
Normal file
@@ -0,0 +1,298 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
import { Context } from '@heyputer/putility/src/libs/context.js';
|
||||
import { Mistral } from '@mistralai/mistralai';
|
||||
import mime from 'mime-types';
|
||||
import { APIError } from 'openai';
|
||||
import path from 'path';
|
||||
import BaseService from '../../BaseService.js';
|
||||
|
||||
/**
|
||||
* MistralAIService class extends BaseService to provide integration with the Mistral AI API.
|
||||
* Implements chat completion functionality with support for various Mistral models including
|
||||
* mistral-large, pixtral, codestral, and ministral variants. Handles both streaming and
|
||||
* non-streaming responses, token usage tracking, and model management. Provides cost information
|
||||
* for different models and implements the puter-chat-completion interface.
|
||||
*/
|
||||
export class MistralOCRService extends BaseService {
|
||||
/** @type {import('../../MeteringService/MeteringService.js').MeteringService} */
|
||||
meteringService;
|
||||
/**
|
||||
* Initializes the service's cost structure for different Mistral AI models.
|
||||
* Sets up pricing information for various models including token costs for input/output.
|
||||
* Each model entry specifies currency (usd-cents) and costs per million tokens.
|
||||
* @private
|
||||
*/
|
||||
|
||||
models = [
|
||||
{ id: 'mistral-ocr-latest',
|
||||
aliases: ['mistral-ocr-2505', 'mistral-ocr'],
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
pages: 1000,
|
||||
input: 100,
|
||||
output: 300,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
static IMPLEMENTS = {
|
||||
'driver-capabilities': {
|
||||
supports_test_mode (iface, method_name) {
|
||||
return iface === 'puter-ocr' && method_name === 'recognize';
|
||||
},
|
||||
},
|
||||
'puter-ocr': {
|
||||
async recognize (...params) {
|
||||
return this.recognize(...params);
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Initializes the service's cost structure for different Mistral AI models.
|
||||
* Sets up pricing information for various models including token costs for input/output.
|
||||
* Each model entry specifies currency (USD cents) and costs per million tokens.
|
||||
* @private
|
||||
*/
|
||||
async _init () {
|
||||
this.api_base_url = 'https://api.mistral.ai/v1';
|
||||
this.client = new Mistral({
|
||||
apiKey: this.config.apiKey,
|
||||
});
|
||||
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
|
||||
this.meteringService = this.services.get('meteringService').meteringService;
|
||||
}
|
||||
|
||||
async recognize ({
|
||||
source,
|
||||
model,
|
||||
pages,
|
||||
includeImageBase64,
|
||||
imageLimit,
|
||||
imageMinSize,
|
||||
bboxAnnotationFormat,
|
||||
documentAnnotationFormat,
|
||||
test_mode,
|
||||
}) {
|
||||
if ( test_mode ) {
|
||||
return this.#sampleOcrResponse();
|
||||
}
|
||||
if ( ! source ) {
|
||||
throw APIError.create('missing_required_argument', {
|
||||
interface_name: 'puter-ocr',
|
||||
method_name: 'recognize',
|
||||
arg_name: 'source',
|
||||
});
|
||||
}
|
||||
|
||||
const document = await this._buildDocumentChunkFromSource(source);
|
||||
const payload = {
|
||||
model: model ?? 'mistral-ocr-latest',
|
||||
document,
|
||||
};
|
||||
if ( Array.isArray(pages) ) {
|
||||
payload.pages = pages;
|
||||
}
|
||||
if ( typeof includeImageBase64 === 'boolean' ) {
|
||||
payload.includeImageBase64 = includeImageBase64;
|
||||
}
|
||||
if ( typeof imageLimit === 'number' ) {
|
||||
payload.imageLimit = imageLimit;
|
||||
}
|
||||
if ( typeof imageMinSize === 'number' ) {
|
||||
payload.imageMinSize = imageMinSize;
|
||||
}
|
||||
if ( bboxAnnotationFormat !== undefined ) {
|
||||
payload.bboxAnnotationFormat = bboxAnnotationFormat;
|
||||
}
|
||||
if ( documentAnnotationFormat !== undefined ) {
|
||||
payload.documentAnnotationFormat = documentAnnotationFormat;
|
||||
}
|
||||
|
||||
const response = await this.client.ocr.process(payload);
|
||||
const annotationsRequested = (
|
||||
payload.documentAnnotationFormat !== undefined ||
|
||||
payload.bboxAnnotationFormat !== undefined
|
||||
);
|
||||
this.#recordOcrUsage(response, payload.model, {
|
||||
annotationsRequested,
|
||||
});
|
||||
return this.#normalizeOcrResponse(response);
|
||||
}
|
||||
|
||||
async _buildDocumentChunkFromSource (fileFacade) {
|
||||
const dataUrl = await this._safeFileValue(fileFacade, 'data_url');
|
||||
const webUrl = await this._safeFileValue(fileFacade, 'web_url');
|
||||
const filePath = await this._safeFileValue(fileFacade, 'path');
|
||||
const fsNode = await this._safeFileValue(fileFacade, 'fs-node');
|
||||
const fileName = filePath ? path.basename(filePath) : fsNode?.name;
|
||||
const inferredMime = this._inferMimeFromName(fileName);
|
||||
|
||||
if ( webUrl ) {
|
||||
return this._chunkFromUrl(webUrl, fileName, inferredMime);
|
||||
}
|
||||
if ( dataUrl ) {
|
||||
const mimeFromUrl = this._extractMimeFromDataUrl(dataUrl) ?? inferredMime;
|
||||
return this._chunkFromUrl(dataUrl, fileName, mimeFromUrl);
|
||||
}
|
||||
|
||||
const buffer = await this._safeFileValue(fileFacade, 'buffer');
|
||||
if ( ! buffer ) {
|
||||
throw APIError.create('field_invalid', null, {
|
||||
key: 'source',
|
||||
expected: 'file, data URL, or web URL',
|
||||
});
|
||||
}
|
||||
const mimeType = inferredMime ?? 'application/octet-stream';
|
||||
const generatedDataUrl = this._createDataUrl(buffer, mimeType);
|
||||
return this._chunkFromUrl(generatedDataUrl, fileName, mimeType);
|
||||
}
|
||||
|
||||
async _safeFileValue (fileFacade, key) {
|
||||
if ( !fileFacade || typeof fileFacade.get !== 'function' ) return undefined;
|
||||
const maybeCache = fileFacade.values?.values;
|
||||
if ( maybeCache && Object.prototype.hasOwnProperty.call(maybeCache, key) ) {
|
||||
return maybeCache[key];
|
||||
}
|
||||
try {
|
||||
return await fileFacade.get(key);
|
||||
} catch (e) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
_chunkFromUrl (url, fileName, mimeType) {
|
||||
const lowerName = fileName?.toLowerCase();
|
||||
const urlLooksPdf = /\.pdf($|\?)/i.test(url);
|
||||
const mimeLooksPdf = mimeType?.includes('pdf');
|
||||
const isPdf = mimeLooksPdf || urlLooksPdf || (lowerName ? lowerName.endsWith('.pdf') : false);
|
||||
|
||||
if ( isPdf ) {
|
||||
const chunk = {
|
||||
type: 'document_url',
|
||||
documentUrl: url,
|
||||
};
|
||||
if ( fileName ) {
|
||||
chunk.documentName = fileName;
|
||||
}
|
||||
return chunk;
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'image_url',
|
||||
imageUrl: {
|
||||
url,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
_inferMimeFromName (name) {
|
||||
if ( ! name ) return undefined;
|
||||
return mime.lookup(name) || undefined;
|
||||
}
|
||||
|
||||
_extractMimeFromDataUrl (url) {
|
||||
if ( typeof url !== 'string' ) return undefined;
|
||||
const match = url.match(/^data:([^;,]+)[;,]/);
|
||||
return match ? match[1] : undefined;
|
||||
}
|
||||
|
||||
_createDataUrl (buffer, mimeType) {
|
||||
return `data:${mimeType || 'application/octet-stream'};base64,${buffer.toString('base64')}`;
|
||||
}
|
||||
|
||||
#normalizeOcrResponse (response) {
|
||||
if ( ! response ) return {};
|
||||
const normalized = {
|
||||
model: response.model,
|
||||
pages: response.pages ?? [],
|
||||
usage_info: response.usageInfo,
|
||||
};
|
||||
const blocks = [];
|
||||
if ( Array.isArray(response.pages) ) {
|
||||
for ( const page of response.pages ) {
|
||||
if ( typeof page?.markdown !== 'string' ) continue;
|
||||
const lines = page.markdown.split('\n').map(line => line.trim()).filter(Boolean);
|
||||
for ( const line of lines ) {
|
||||
blocks.push({
|
||||
type: 'text/mistral:LINE',
|
||||
text: line,
|
||||
page: page.index,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
normalized.blocks = blocks;
|
||||
if ( blocks.length ) {
|
||||
normalized.text = blocks.map(block => block.text).join('\n');
|
||||
} else if ( Array.isArray(response.pages) ) {
|
||||
normalized.text = response.pages.map(page => page?.markdown || '').join('\n\n').trim();
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
#recordOcrUsage (response, model, { annotationsRequested } = {}) {
|
||||
try {
|
||||
if ( ! this.meteringService ) return;
|
||||
const actor = Context.get('actor');
|
||||
if ( ! actor ) return;
|
||||
const pagesProcessed =
|
||||
response?.usageInfo?.pagesProcessed ??
|
||||
(Array.isArray(response?.pages) ? response.pages.length : 1);
|
||||
this.meteringService.incrementUsage(actor, 'mistral-ocr:ocr:page', pagesProcessed);
|
||||
if ( annotationsRequested ) {
|
||||
this.meteringService.incrementUsage(actor, 'mistral-ocr:annotations:page', pagesProcessed);
|
||||
}
|
||||
} catch (e) {
|
||||
// ignore metering failures to avoid blocking OCR results
|
||||
}
|
||||
}
|
||||
|
||||
#sampleOcrResponse () {
|
||||
const markdown = 'Sample OCR output (test mode).';
|
||||
return {
|
||||
model: 'mistral-ocr-latest',
|
||||
pages: [
|
||||
{
|
||||
index: 0,
|
||||
markdown,
|
||||
images: [],
|
||||
dimensions: null,
|
||||
},
|
||||
],
|
||||
blocks: [
|
||||
{
|
||||
type: 'text/mistral:LINE',
|
||||
text: markdown,
|
||||
page: 0,
|
||||
},
|
||||
],
|
||||
text: markdown,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -18,11 +18,11 @@
|
||||
*/
|
||||
|
||||
const { Readable } = require('stream');
|
||||
const APIError = require('../../api/APIError');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { TypedValue } = require('../../services/drivers/meta/Runtime');
|
||||
const { FileFacade } = require('../../services/drivers/FileFacade');
|
||||
const { Context } = require('../../util/context');
|
||||
const APIError = require('../../../api/APIError');
|
||||
const BaseService = require('../../BaseService');
|
||||
const { TypedValue } = require('../../drivers/meta/Runtime');
|
||||
const { FileFacade } = require('../../drivers/FileFacade');
|
||||
const { Context } = require('../../../util/context');
|
||||
|
||||
const DEFAULT_MODEL = 'eleven_multilingual_sts_v2';
|
||||
const DEFAULT_VOICE_ID = '21m00Tcm4TlvDq8ikWAM';
|
||||
@@ -34,7 +34,7 @@ const DEFAULT_OUTPUT_FORMAT = 'mp3_44100_128';
|
||||
* ElevenLabs voice changer (speech-to-speech).
|
||||
*/
|
||||
class ElevenLabsVoiceChangerService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
/** @type {import('../../MeteringService/MeteringService').MeteringService} */
|
||||
get meteringService () {
|
||||
return this.services.get('meteringService').meteringService;
|
||||
}
|
||||
@@ -68,7 +68,7 @@ class ElevenLabsVoiceChangerService extends BaseService {
|
||||
this.defaultVoiceId = svcConfig?.defaultVoiceId ?? svcConfig?.voiceId ?? DEFAULT_VOICE_ID;
|
||||
this.defaultModelId = svcConfig?.speechToSpeechModelId ?? svcConfig?.stsModelId ?? DEFAULT_MODEL;
|
||||
|
||||
if ( !this.apiKey ) {
|
||||
if ( ! this.apiKey ) {
|
||||
throw new Error('ElevenLabs API key not configured');
|
||||
}
|
||||
}
|
||||
@@ -99,11 +99,11 @@ class ElevenLabsVoiceChangerService extends BaseService {
|
||||
}, SAMPLE_AUDIO_URL);
|
||||
}
|
||||
|
||||
if ( !audio ) {
|
||||
if ( ! audio ) {
|
||||
throw APIError.create('field_required', null, { key: 'audio' });
|
||||
}
|
||||
|
||||
if ( !(audio instanceof FileFacade) ) {
|
||||
if ( ! (audio instanceof FileFacade) ) {
|
||||
throw APIError.create('field_invalid', null, {
|
||||
key: 'audio',
|
||||
expected: 'file reference',
|
||||
@@ -120,14 +120,14 @@ class ElevenLabsVoiceChangerService extends BaseService {
|
||||
const modelId = model_id || model || this.defaultModelId || DEFAULT_MODEL;
|
||||
const selectedVoiceId = voice_id || voiceId || voice || this.defaultVoiceId;
|
||||
|
||||
if ( !selectedVoiceId ) {
|
||||
if ( ! selectedVoiceId ) {
|
||||
throw APIError.create('field_required', null, { key: 'voice' });
|
||||
}
|
||||
|
||||
const actor = Context.get('actor');
|
||||
const usageKey = `elevenlabs:${modelId}:second`;
|
||||
const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, estimatedSeconds);
|
||||
if ( !usageAllowed ) {
|
||||
if ( ! usageAllowed ) {
|
||||
throw APIError.create('insufficient_funds');
|
||||
}
|
||||
|
||||
@@ -182,7 +182,7 @@ class ElevenLabsVoiceChangerService extends BaseService {
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if ( !response.ok ) {
|
||||
if ( ! response.ok ) {
|
||||
let detail = null;
|
||||
try {
|
||||
detail = await response.json();
|
||||
@@ -17,10 +17,10 @@
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const APIError = require('../../api/APIError');
|
||||
const { Context } = require('../../util/context');
|
||||
const { FileFacade } = require('../../services/drivers/FileFacade');
|
||||
const BaseService = require('../../BaseService');
|
||||
const APIError = require('../../../api/APIError');
|
||||
const { Context } = require('../../../util/context');
|
||||
const { FileFacade } = require('../../drivers/FileFacade');
|
||||
|
||||
const MAX_AUDIO_FILE_SIZE = 25 * 1024 * 1024; // 25 MB per OpenAI limits
|
||||
const DEFAULT_TRANSCRIBE_MODEL = 'gpt-4o-mini-transcribe';
|
||||
@@ -63,7 +63,7 @@ const TRANSCRIPTION_MODEL_CAPABILITIES = {
|
||||
};
|
||||
|
||||
class OpenAISpeechToTextService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
/** @type {import('../../MeteringService/MeteringService').MeteringService} */
|
||||
get meteringService () {
|
||||
return this.services.get('meteringService').meteringService;
|
||||
}
|
||||
@@ -19,10 +19,10 @@
|
||||
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
const { PollyClient, SynthesizeSpeechCommand, DescribeVoicesCommand } = require('@aws-sdk/client-polly');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { TypedValue } = require('../../services/drivers/meta/Runtime');
|
||||
const APIError = require('../../api/APIError');
|
||||
const { Context } = require('../../util/context');
|
||||
const BaseService = require('../../BaseService');
|
||||
const { TypedValue } = require('../../drivers/meta/Runtime');
|
||||
const APIError = require('../../../api/APIError');
|
||||
const { Context } = require('../../../util/context');
|
||||
|
||||
// Polly price calculation per engine
|
||||
const ENGINE_PRICING = {
|
||||
@@ -45,7 +45,7 @@ const VALID_ENGINES = ['standard', 'neural', 'long-form', 'generative'];
|
||||
*/
|
||||
class AWSPollyService extends BaseService {
|
||||
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
/** @type {import('../../MeteringService/MeteringService').MeteringService} */
|
||||
get meteringService () {
|
||||
return this.services.get('meteringService').meteringService;
|
||||
}
|
||||
@@ -18,10 +18,10 @@
|
||||
*/
|
||||
|
||||
const { Readable } = require('stream');
|
||||
const APIError = require('../../api/APIError');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { TypedValue } = require('../../services/drivers/meta/Runtime');
|
||||
const { Context } = require('../../util/context');
|
||||
const APIError = require('../../../api/APIError');
|
||||
const BaseService = require('../../BaseService');
|
||||
const { TypedValue } = require('../../drivers/meta/Runtime');
|
||||
const { Context } = require('../../../util/context');
|
||||
|
||||
const DEFAULT_MODEL = 'eleven_multilingual_v2';
|
||||
const DEFAULT_VOICE_ID = '21m00Tcm4TlvDq8ikWAM'; // Common public "Rachel" sample voice
|
||||
@@ -41,7 +41,7 @@ const ELEVENLABS_TTS_MODELS = [
|
||||
* using ElevenLabs voices.
|
||||
*/
|
||||
class ElevenLabsTTSService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
/** @type {import('../../MeteringService/MeteringService').MeteringService} */
|
||||
get meteringService () {
|
||||
return this.services.get('meteringService').meteringService;
|
||||
}
|
||||
@@ -72,7 +72,7 @@ class ElevenLabsTTSService extends BaseService {
|
||||
this.baseUrl = svcThere?.baseUrl ?? 'https://api.elevenlabs.io';
|
||||
this.defaultVoiceId = svcThere?.defaultVoiceId ?? svcThere?.voiceId ?? DEFAULT_VOICE_ID;
|
||||
|
||||
if ( !this.apiKey ) {
|
||||
if ( ! this.apiKey ) {
|
||||
throw new Error('ElevenLabs API key not configured');
|
||||
}
|
||||
}
|
||||
@@ -158,7 +158,7 @@ class ElevenLabsTTSService extends BaseService {
|
||||
const actor = Context.get('actor');
|
||||
const usageKey = `elevenlabs:${modelId}:character`;
|
||||
const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, text.length);
|
||||
if ( !usageAllowed ) {
|
||||
if ( ! usageAllowed ) {
|
||||
throw APIError.create('insufficient_funds');
|
||||
}
|
||||
|
||||
@@ -18,10 +18,10 @@
|
||||
*/
|
||||
|
||||
const { Readable } = require('stream');
|
||||
const APIError = require('../../api/APIError');
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { TypedValue } = require('../../services/drivers/meta/Runtime');
|
||||
const { Context } = require('../../util/context');
|
||||
const APIError = require('../../../api/APIError');
|
||||
const BaseService = require('../../BaseService');
|
||||
const { TypedValue } = require('../../drivers/meta/Runtime');
|
||||
const { Context } = require('../../../util/context');
|
||||
|
||||
const DEFAULT_MODEL = 'gpt-4o-mini-tts';
|
||||
const DEFAULT_VOICE = 'alloy';
|
||||
@@ -73,7 +73,7 @@ const OPENAI_TTS_MODELS = [
|
||||
* the AWS Polly implementation.
|
||||
*/
|
||||
class OpenAITTSService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
|
||||
/** @type {import('../../MeteringService/MeteringService').MeteringService} */
|
||||
get meteringService () {
|
||||
return this.services.get('meteringService').meteringService;
|
||||
}
|
||||
120
src/backend/src/services/ai/utils/FunctionCalling.js
Normal file
120
src/backend/src/services/ai/utils/FunctionCalling.js
Normal file
@@ -0,0 +1,120 @@
|
||||
/**
|
||||
* Normalizes the 'tools' object in-place.
|
||||
*
|
||||
* This function will accept an array of tools provided by the
|
||||
* user, and produce a normalized object that can then be
|
||||
* converted to the apprpriate representation for another
|
||||
* service.
|
||||
*
|
||||
* We will accept conventions from either service that a user
|
||||
* might expect to work, prioritizing the OpenAI convention
|
||||
* when conflicting conventions are present.
|
||||
*
|
||||
* @param {*} tools
|
||||
*/
|
||||
export const normalize_tools_object = (tools) => {
|
||||
for ( let i = 0 ; i < tools.length ; i++ ) {
|
||||
const tool = tools[i];
|
||||
let normalized_tool = {};
|
||||
|
||||
const normalize_function = fn => {
|
||||
const normal_fn = {};
|
||||
let parameters =
|
||||
fn.parameters ||
|
||||
fn.input_schema;
|
||||
|
||||
normal_fn.parameters = parameters ?? {
|
||||
type: 'object',
|
||||
};
|
||||
|
||||
if ( parameters.properties ) {
|
||||
parameters = this.normalize_json_schema(parameters);
|
||||
}
|
||||
|
||||
if ( fn.name ) {
|
||||
normal_fn.name = fn.name;
|
||||
}
|
||||
|
||||
if ( fn.description ) {
|
||||
normal_fn.description = fn.description;
|
||||
}
|
||||
|
||||
return normal_fn;
|
||||
};
|
||||
|
||||
if ( tool.input_schema ) {
|
||||
normalized_tool = {
|
||||
type: 'function',
|
||||
function: normalize_function(tool),
|
||||
};
|
||||
} else if ( tool.type === 'function' ) {
|
||||
normalized_tool = {
|
||||
type: 'function',
|
||||
function: normalize_function(tool.function),
|
||||
};
|
||||
} else {
|
||||
normalized_tool = {
|
||||
type: 'function',
|
||||
function: normalize_function(tool),
|
||||
};
|
||||
}
|
||||
|
||||
tools[i] = normalized_tool;
|
||||
}
|
||||
return tools;
|
||||
};
|
||||
|
||||
export const normalize_json_schema = (schema) => {
|
||||
if ( ! schema ) return schema;
|
||||
|
||||
if ( schema.type === 'object' ) {
|
||||
if ( ! schema.properties ) {
|
||||
return schema;
|
||||
}
|
||||
|
||||
const keys = Object.keys(schema.properties);
|
||||
for ( const key of keys ) {
|
||||
schema.properties[key] = this.normalize_json_schema(schema.properties[key]);
|
||||
}
|
||||
}
|
||||
|
||||
if ( schema.type === 'array' ) {
|
||||
if ( ! schema.items ) {
|
||||
schema.items = {};
|
||||
} else {
|
||||
schema.items = this.normalize_json_schema(schema.items);
|
||||
}
|
||||
}
|
||||
|
||||
return schema;
|
||||
};
|
||||
|
||||
/**
|
||||
* This function will convert a normalized tools object to the
|
||||
* format expected by OpenAI.
|
||||
*
|
||||
* @param {*} tools
|
||||
* @returns
|
||||
*/
|
||||
export const make_openai_tools = (tools) => {
|
||||
return tools;
|
||||
};
|
||||
|
||||
/**
|
||||
* This function will convert a normalized tools object to the
|
||||
* format expected by Claude.
|
||||
*
|
||||
* @param {*} tools
|
||||
* @returns
|
||||
*/
|
||||
export const make_claude_tools = (tools) => {
|
||||
if ( ! tools ) return undefined;
|
||||
return tools.map(tool => {
|
||||
const { name, description, parameters } = tool.function;
|
||||
return {
|
||||
name,
|
||||
description,
|
||||
input_schema: parameters,
|
||||
};
|
||||
});
|
||||
};
|
||||
184
src/backend/src/services/ai/utils/Messages.js
Normal file
184
src/backend/src/services/ai/utils/Messages.js
Normal file
@@ -0,0 +1,184 @@
|
||||
import { whatis } from '../../../util/langutil.js';
|
||||
|
||||
/**
|
||||
* Normalizes a single message into a standardized format with role and content array.
|
||||
* Converts string messages to objects, ensures content is an array of content blocks,
|
||||
* transforms tool_calls into tool_use content blocks, and coerces content items into objects.
|
||||
*
|
||||
* @param {string|Object} message - The message to normalize, either a string or message object
|
||||
* @param {Object} params - Optional parameters including default role
|
||||
* @returns {Object} Normalized message with role and content array
|
||||
* @throws {Error} If message is not a string or object
|
||||
* @throws {Error} If message has no content property and no tool_calls
|
||||
* @throws {Error} If any content item is not a string or object
|
||||
*/
|
||||
export const normalize_single_message = (message, params = {}) => {
|
||||
params = Object.assign({
|
||||
role: 'user',
|
||||
}, params);
|
||||
|
||||
if ( typeof message === 'string' ) {
|
||||
message = {
|
||||
content: [message],
|
||||
};
|
||||
}
|
||||
if ( whatis(message) !== 'object' ) {
|
||||
throw new Error('each message must be a string or object');
|
||||
}
|
||||
if ( ! message.role ) {
|
||||
message.role = params.role;
|
||||
}
|
||||
if ( ! message.content ) {
|
||||
if ( message.tool_calls ) {
|
||||
message.content = [];
|
||||
for ( let i = 0 ; i < message.tool_calls.length ; i++ ) {
|
||||
const tool_call = message.tool_calls[i];
|
||||
message.content.push({
|
||||
type: 'tool_use',
|
||||
id: tool_call.id,
|
||||
name: tool_call.function.name,
|
||||
input: tool_call.function.arguments,
|
||||
});
|
||||
}
|
||||
delete message.tool_calls;
|
||||
} else {
|
||||
throw new Error('each message must have a \'content\' property');
|
||||
}
|
||||
}
|
||||
if ( whatis(message.content) !== 'array' ) {
|
||||
message.content = [message.content];
|
||||
}
|
||||
// Coerce each content block into an object
|
||||
for ( let i = 0 ; i < message.content.length ; i++ ) {
|
||||
if ( whatis(message.content[i]) === 'string' ) {
|
||||
message.content[i] = {
|
||||
type: 'text',
|
||||
text: message.content[i],
|
||||
};
|
||||
}
|
||||
if ( whatis(message.content[i]) !== 'object' ) {
|
||||
throw new Error('each message content item must be a string or object');
|
||||
}
|
||||
if ( typeof message.content[i].text === 'string' && !message.content[i].type ) {
|
||||
message.content[i].type = 'text';
|
||||
}
|
||||
}
|
||||
|
||||
// Remove "text" properties from content blocks with type=tool_result
|
||||
for ( let i = 0 ; i < message.content.length ; i++ ) {
|
||||
if ( message.content[i].type !== 'tool_use' ) {
|
||||
continue;
|
||||
}
|
||||
if ( Object.prototype.hasOwnProperty.call(message.content[i], 'text') ) {
|
||||
delete message.content[i].text;
|
||||
}
|
||||
}
|
||||
|
||||
return message;
|
||||
};
|
||||
|
||||
/**
|
||||
* Normalizes an array of messages by applying normalize_single_message to each,
|
||||
* then splits messages with multiple content blocks into separate messages,
|
||||
* and finally merges consecutive messages from the same role.
|
||||
*
|
||||
* @param {Array} messages - Array of messages to normalize
|
||||
* @param {Object} params - Optional parameters passed to normalize_single_message
|
||||
* @returns {Array} Normalized and merged array of messages
|
||||
*/
|
||||
export const normalize_messages = (messages, params = {}) => {
|
||||
for ( let i = 0 ; i < messages.length ; i++ ) {
|
||||
messages[i] = normalize_single_message(messages[i], params);
|
||||
}
|
||||
|
||||
// Split messages with tool_use content into separate messages
|
||||
// TODO: unit test this
|
||||
messages = [...messages];
|
||||
for ( let i = 0 ; i < messages.length ; i++ ) {
|
||||
let message = messages[i];
|
||||
let separated_messages = [];
|
||||
for ( let j = 0 ; j < message.content.length ; j++ ) {
|
||||
if ( message.content[j].type === 'tool_result' ) {
|
||||
separated_messages.push({
|
||||
...message,
|
||||
content: [message.content[j]],
|
||||
});
|
||||
} else {
|
||||
separated_messages.push({
|
||||
...message,
|
||||
content: [message.content[j]],
|
||||
});
|
||||
}
|
||||
}
|
||||
messages.splice(i, 1, ...separated_messages);
|
||||
}
|
||||
|
||||
// If multiple messages are from the same role, merge them
|
||||
let merged_messages = [];
|
||||
let current_role = null;
|
||||
for ( let i = 0 ; i < messages.length ; i++ ) {
|
||||
if ( current_role === messages[i].role ) {
|
||||
merged_messages[merged_messages.length - 1].content.push(...messages[i].content);
|
||||
} else {
|
||||
merged_messages.push(messages[i]);
|
||||
current_role = messages[i].role;
|
||||
}
|
||||
}
|
||||
|
||||
return merged_messages;
|
||||
};
|
||||
|
||||
/**
|
||||
* Separates system messages from other messages in the array.
|
||||
*
|
||||
* @param {Array} messages - Array of messages to process
|
||||
* @returns {Array} Tuple containing [system_messages, non_system_messages]
|
||||
*/
|
||||
export const extract_and_remove_system_messages = (messages) => {
|
||||
let system_messages = [];
|
||||
let new_messages = [];
|
||||
for ( let i = 0 ; i < messages.length ; i++ ) {
|
||||
if ( messages[i].role === 'system' ) {
|
||||
system_messages.push(messages[i]);
|
||||
} else {
|
||||
new_messages.push(messages[i]);
|
||||
}
|
||||
}
|
||||
return [system_messages, new_messages];
|
||||
};
|
||||
|
||||
/**
|
||||
* Extracts all text content from messages, handling various message formats.
|
||||
* Processes strings, objects with content arrays, and nested content structures,
|
||||
* joining all text with spaces.
|
||||
*
|
||||
* @param {Array} messages - Array of messages to extract text from
|
||||
* @returns {string} Concatenated text content from all messages
|
||||
* @throws {Error} If text content is not a string
|
||||
*/
|
||||
export const extract_text = (messages) => {
|
||||
return messages.map(m => {
|
||||
if ( whatis(m) === 'string' ) {
|
||||
return m;
|
||||
}
|
||||
if ( whatis(m) !== 'object' ) {
|
||||
return '';
|
||||
}
|
||||
if ( whatis(m.content) === 'array' ) {
|
||||
return m.content.map(c => c.text).join(' ');
|
||||
}
|
||||
if ( whatis(m.content) === 'string' ) {
|
||||
return m.content;
|
||||
} else {
|
||||
const is_text_type = m.content.type === 'text' ||
|
||||
!Object.prototype.hasOwnProperty.call(m.content, 'type');
|
||||
if ( is_text_type ) {
|
||||
if ( whatis(m.content.text) !== 'string' ) {
|
||||
throw new Error('text content must be a string');
|
||||
}
|
||||
return m.content.text;
|
||||
}
|
||||
return '';
|
||||
}
|
||||
}).join(' ');
|
||||
};
|
||||
116
src/backend/src/services/ai/utils/OpenAIUtil.d.ts
vendored
Normal file
116
src/backend/src/services/ai/utils/OpenAIUtil.d.ts
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
import type {
|
||||
ChatCompletion,
|
||||
ChatCompletionChunk,
|
||||
ChatCompletionContentPart,
|
||||
ChatCompletionMessageParam,
|
||||
ChatCompletionMessageToolCall,
|
||||
} from 'openai/resources/chat/completions';
|
||||
import type { CompletionUsage } from 'openai/resources/completions';
|
||||
import { IChatModel, IChatProvider } from '../chat/providers/types';
|
||||
|
||||
export interface ToolUseContent {
|
||||
type: 'tool_use';
|
||||
id: string;
|
||||
name: string;
|
||||
input: unknown;
|
||||
extra_content?: unknown;
|
||||
}
|
||||
|
||||
export interface ToolResultContent {
|
||||
type: 'tool_result';
|
||||
tool_use_id: string;
|
||||
content: unknown;
|
||||
}
|
||||
|
||||
export type NormalizedContent =
|
||||
| ChatCompletionContentPart
|
||||
| ToolUseContent
|
||||
| ToolResultContent
|
||||
| ({ type?: 'image_url'; image_url: unknown; [key: string]: unknown });
|
||||
|
||||
export interface NormalizedMessage extends Partial<ChatCompletionMessageParam> {
|
||||
role?: ChatCompletionMessageParam['role'] | string;
|
||||
content?: NormalizedContent[] | null;
|
||||
tool_calls?: ChatCompletionMessageToolCall[];
|
||||
tool_call_id?: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export type UsageCalculator = (args: { usage: CompletionUsage }) => Record<string, number>;
|
||||
|
||||
export interface ChatStream {
|
||||
message(): {
|
||||
contentBlock: (params: { type: 'text' } | { type: 'tool_use'; id: string; name: string; extra_content?: unknown }) => {
|
||||
addText?(text: string): void;
|
||||
addReasoning?(reasoning: string): void;
|
||||
addExtraContent?(extra_content: unknown): void;
|
||||
addPartialJSON?(partial_json: string): void;
|
||||
end(): void;
|
||||
};
|
||||
end(): void;
|
||||
};
|
||||
end(): void;
|
||||
}
|
||||
|
||||
export type StreamingToolCall = ChatCompletionChunk.Choice.Delta.ToolCall & { extra_content?: unknown };
|
||||
|
||||
export type CompletionChunk = Omit<ChatCompletionChunk, 'choices' | 'usage'> & {
|
||||
choices: Array<
|
||||
Omit<ChatCompletionChunk['choices'][number], 'delta'> & {
|
||||
delta: ChatCompletionChunk['choices'][number]['delta'] & {
|
||||
reasoning_content?: string | null;
|
||||
reasoning?: string | null;
|
||||
extra_content?: unknown;
|
||||
tool_calls?: StreamingToolCall[];
|
||||
};
|
||||
}
|
||||
>;
|
||||
usage?: CompletionUsage | null;
|
||||
};
|
||||
|
||||
export interface StreamDeviations {
|
||||
index_usage_from_stream_chunk?: (chunk: CompletionChunk) => Partial<CompletionUsage> | null | undefined;
|
||||
chunk_but_like_actually?: (chunk: CompletionChunk) => Partial<CompletionChunk>;
|
||||
index_tool_calls_from_stream_choice?: (choice: CompletionChunk['choices'][number]) => StreamingToolCall[] | undefined;
|
||||
}
|
||||
|
||||
export interface CompletionDeviations<TCompletion = ChatCompletion> {
|
||||
coerce_completion_usage?: (completion: TCompletion) => Partial<CompletionUsage>;
|
||||
chunk_but_like_actually?: (chunk: CompletionChunk) => Partial<CompletionChunk>;
|
||||
index_tool_calls_from_stream_choice?: (choice: CompletionChunk['choices'][number]) => StreamingToolCall[] | undefined;
|
||||
index_usage_from_stream_chunk?: (chunk: CompletionChunk) => Partial<CompletionUsage> | null | undefined;
|
||||
|
||||
}
|
||||
|
||||
export function process_input_messages<TMessage extends NormalizedMessage> (messages: TMessage[]): Promise<TMessage[]>;
|
||||
|
||||
export function create_usage_calculator (params: { model_details: IChatModel }): UsageCalculator;
|
||||
|
||||
export function extractMeteredUsage (usage: {
|
||||
prompt_tokens?: number | null;
|
||||
completion_tokens?: number | null;
|
||||
prompt_tokens_details?: { cached_tokens?: number | null } | null;
|
||||
}): {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
cached_tokens: number;
|
||||
};
|
||||
|
||||
export function create_chat_stream_handler (params: {
|
||||
deviations?: StreamDeviations;
|
||||
completion: AsyncIterable<CompletionChunk>;
|
||||
usage_calculator?: UsageCalculator;
|
||||
}): (args: { chatStream: ChatStream }) => Promise<void>;
|
||||
|
||||
type CompletionChoice<TCompletion> = TCompletion extends { choices: Array<infer Choice> }
|
||||
? Choice
|
||||
: ChatCompletion['choices'][number];
|
||||
|
||||
export function handle_completion_output<TCompletion = ChatCompletion> (params: {
|
||||
deviations?: CompletionDeviations<TCompletion>;
|
||||
stream?: boolean;
|
||||
completion: AsyncIterable<CompletionChunk> | TCompletion;
|
||||
moderate?: (text: string) => Promise<{ flagged: boolean }>;
|
||||
usage_calculator?: UsageCalculator;
|
||||
finally_fn?: () => Promise<void>;
|
||||
}): ReturnType<IChatProvider['complete']>;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user