Files
puter/src/puter-js/src/modules/AI.js
T

473 lines
17 KiB
JavaScript

import * as utils from '../lib/utils.js'
class AI{
/**
* Creates a new instance with the given authentication token, API origin, and app ID,
*
* @class
* @param {string} authToken - Token used to authenticate the user.
* @param {string} APIOrigin - Origin of the API server. Used to build the API endpoint URLs.
* @param {string} appID - ID of the app to use.
*/
constructor (context) {
this.authToken = context.authToken;
this.APIOrigin = context.APIOrigin;
this.appID = context.appID;
}
/**
* Sets a new authentication token and resets the socket connection with the updated token, if applicable.
*
* @param {string} authToken - The new authentication token.
* @memberof [AI]
* @returns {void}
*/
setAuthToken (authToken) {
this.authToken = authToken;
}
/**
* Sets the API origin.
*
* @param {string} APIOrigin - The new API origin.
* @memberof [AI]
* @returns {void}
*/
setAPIOrigin (APIOrigin) {
this.APIOrigin = APIOrigin;
}
/**
* Returns a list of available AI models.
* @param {string} provider - The provider to filter the models returned.
* @returns {Object} Object containing lists of available models by provider
*/
async listModels(provider) {
const modelsByProvider = {};
const models = await puter.drivers.call('puter-chat-completion','ai-chat','models');
if (!models || !models.result || !Array.isArray(models.result)) {
return modelsByProvider;
}
models.result.forEach(item => {
if (!item.provider || !item.id) return;
if (provider && item.provider !== provider) return;
if (!modelsByProvider[item.provider]) modelsByProvider[item.provider] = [];
modelsByProvider[item.provider].push(item.id);
});
return modelsByProvider;
}
/**
* Returns a list of all available AI providers
* @returns {Array} Array containing providers
*/
async listModelProviders() {
let providers = [];
const models = await puter.drivers.call('puter-chat-completion','ai-chat','models');
if (!models || !models.result || !Array.isArray(models.result)) return providers; // if models is invalid then return empty array
providers = new Set(); // Use a Set to store unique providers
models.result.forEach(item => {
if (item.provider) providers.add(item.provider);
});
providers = Array.from(providers); // Convert Set to an array
return providers;
}
img2txt = async (...args) => {
let MAX_INPUT_SIZE = 10 * 1024 * 1024;
let options = {};
let testMode = false;
// Check that the argument is not undefined or null
if(!args){
throw({message: 'Arguments are required', code: 'arguments_required'});
}
// if argument is string transform it to the object that the API expects
if (typeof args[0] === 'string' || args[0] instanceof Blob) {
options.source = args[0];
}
// if input is a blob, transform it to a data URI
if (args[0].source instanceof Blob) {
options.source = await utils.blobToDataUri(args[0].source);
}
// check input size
if (options.source.length > this.MAX_INPUT_SIZE) {
throw { message: 'Input size cannot be larger than ' + MAX_INPUT_SIZE, code: 'input_too_large' };
}
// determine if test mode is enabled
if (typeof args[1] === 'boolean' && args[1] === true ||
typeof args[2] === 'boolean' && args[2] === true ||
typeof args[3] === 'boolean' && args[3] === true) {
testMode = true;
}
return await utils.make_driver_method(['source'], 'puter-ocr', 'aws-textract', 'recognize', {
test_mode: testMode ?? false,
transform: async (result) => {
let str = '';
for (let i = 0; i < result?.blocks?.length; i++) {
if("text/textract:LINE" === result.blocks[i].type)
str += result.blocks[i].text + "\n";
}
return str;
}
}).call(this, options);
}
txt2speech = async (...args) => {
let MAX_INPUT_SIZE = 3000;
let options = {};
let testMode = false;
if(!args){
throw({message: 'Arguments are required', code: 'arguments_required'});
}
// if argument is string transform it to the object that the API expects
if (typeof args[0] === 'string') {
options = { text: args[0] };
}
// * ai.txt2speech('Hello, world!', 'en-US')
// * ai.txt2speech('Hello, world!', 'en-US', 'Brian')
if (args[1] && typeof args[1] === 'string') {
// Determine language
if (args[1] && typeof args[1] === 'string') {
// Check if it's a language code (ISO 639-1 or with region)
// Pattern matches: en, es, fr, de, en-US, es-ES, fr-FR, etc.
const languageCodePattern = /^[a-z]{2}(-[A-Z]{2})?$/;
// if language code is invalid, throw an error
if(!languageCodePattern.test(args[1])){
throw { message: 'Invalid language code', code: 'invalid_language_code' };
}
// set language
options.language = args[1];
}
// Determine voice
// Note that voice is optional, and if not provided, the default voice for the language will be used
// Also, it is important that a language is set before a voice is set since voices are language-specific
if (options.language && args[2] && typeof args[2] === 'string') {
// set voice
options.voice = args[2];
}
}
// check input size
if (options.text.length > this.MAX_INPUT_SIZE) {
throw { message: 'Input size cannot be larger than ' + MAX_INPUT_SIZE, code: 'input_too_large' };
}
// determine if test mode is enabled
if (typeof args[1] === 'boolean' && args[1] === true ||
typeof args[2] === 'boolean' && args[2] === true ||
typeof args[3] === 'boolean' && args[3] === true) {
testMode = true;
}
return await utils.make_driver_method(['source'], 'puter-tts', 'aws-polly', 'synthesize', {
responseType: 'blob',
test_mode: testMode ?? false,
transform: async (result) => {
const url = await utils.blob_to_url(result);
const audio = new Audio(url);
audio.toString = () => url;
audio.valueOf = () => url;
return audio;
}
}).call(this, options);
}
// accepts either a string or an array of message objects
// if string, it's treated as the prompt which is a shorthand for { messages: [{ content: prompt }] }
// if object, it's treated as the full argument object that the API expects
chat = async (...args) => {
// requestParams: parameters that will be sent to the backend driver
let requestParams = {};
// userParams: parameters provided by the user in the function call
let userParams = {};
let testMode = false;
// default driver is openai-completion
let driver = 'openai-completion';
// Check that the argument is not undefined or null
if(!args){
throw({message: 'Arguments are required', code: 'arguments_required'});
}
// ai.chat(prompt)
if(typeof args[0] === 'string'){
requestParams = { messages: [{ content: args[0] }] };
}
// ai.chat(prompt, testMode)
if (typeof args[0] === 'string' && (!args[1] || typeof args[1] === 'boolean')) {
requestParams = { messages: [{ content: args[0] }] };
}
// ai.chat(prompt, imageURL/File)
// ai.chat(prompt, imageURL/File, testMode)
else if (typeof args[0] === 'string' && (typeof args[1] === 'string' || args[1] instanceof File)) {
// if imageURL is a File, transform it to a data URI
if(args[1] instanceof File){
args[1] = await utils.blobToDataUri(args[1]);
}
// parse args[1] as an image_url object
requestParams = {
vision: true,
messages: [
{
content: [
args[0],
{
image_url: {
url: args[1]
}
}
],
}
]
};
}
// chat(prompt, [imageURLs])
else if (typeof args[0] === 'string' && Array.isArray(args[1])) {
// parse args[1] as an array of image_url objects
for (let i = 0; i < args[1].length; i++) {
args[1][i] = { image_url: { url: args[1][i] } };
}
requestParams = {
vision: true,
messages: [
{
content: [
args[0],
...args[1]
],
}
]
};
}
// chat([messages])
else if (Array.isArray(args[0])) {
requestParams = { messages: args[0] };
}
// determine if testMode is enabled
if (typeof args[1] === 'boolean' && args[1] === true ||
typeof args[2] === 'boolean' && args[2] === true ||
typeof args[3] === 'boolean' && args[3] === true) {
testMode = true;
}
// if any of the args is an object, assume it's the user parameters object
const is_object = v => {
return typeof v === 'object' &&
!Array.isArray(v) &&
v !== null;
};
for (let i = 0; i < args.length; i++) {
if (is_object(args[i])) {
userParams = args[i];
break;
}
}
// Copy relevant parameters from userParams to requestParams
if (userParams.model) {
requestParams.model = userParams.model;
}
if (userParams.temperature) {
requestParams.temperature = userParams.temperature;
}
if (userParams.max_tokens) {
requestParams.max_tokens = userParams.max_tokens;
}
// convert undefined to empty string so that .startsWith works
requestParams.model = requestParams.model ?? '';
// If model starts with "anthropic/", remove it
// later on we should standardize the model names to [vendor]/[model]
// for example: "claude-3-5-sonnet" should become "anthropic/claude-3-5-sonnet"
// but for now, we want to keep the old behavior
// so we remove the "anthropic/" prefix if it exists
if (requestParams.model && requestParams.model.startsWith('anthropic/')) {
requestParams.model = requestParams.model.replace('anthropic/', '');
}
// convert to the correct model name if necessary
if( requestParams.model === 'claude-3-5-sonnet'){
requestParams.model = 'claude-3-5-sonnet-latest';
}
if( requestParams.model === 'claude-3-7-sonnet' || requestParams.model === 'claude'){
requestParams.model = 'claude-3-7-sonnet-latest';
}
if( requestParams.model === 'claude-sonnet-4' || requestParams.model === 'claude-sonnet-4-latest'){
requestParams.model = 'claude-sonnet-4-20250514';
}
if( requestParams.model === 'claude-opus-4' || requestParams.model === 'claude-opus-4-latest') {
requestParams.model = 'claude-opus-4-20250514';
}
if ( requestParams.model === 'mistral' ) {
requestParams.model = 'mistral-large-latest';
}
if ( requestParams.model === 'groq' ) {
requestParams.model = 'llama3-8b-8192';
}
if ( requestParams.model === 'deepseek' ) {
requestParams.model = 'deepseek-chat';
}
// o1-mini to openrouter:openai/o1-mini
if ( requestParams.model === 'o1-mini') {
requestParams.model = 'openrouter:openai/o1-mini';
}
// if a model is prepended with "openai/", remove it
if (requestParams.model && requestParams.model.startsWith('openai/')) {
requestParams.model = requestParams.model.replace('openai/', '');
driver = 'openai-completion';
}
// if model starts with:
// meta-llama/
// google/
// deepseek/
// x-ai/
// prepend it with openrouter:
if ( requestParams.model.startsWith('meta-llama/') || requestParams.model.startsWith('google/') || requestParams.model.startsWith('deepseek/') || requestParams.model.startsWith('x-ai/') ) {
requestParams.model = 'openrouter:' + requestParams.model;
}
// map model to the appropriate driver
if (!requestParams.model || requestParams.model.startsWith('gpt-')) {
driver = 'openai-completion';
}else if(
requestParams.model.startsWith('claude-')
){
driver = 'claude';
}else if(requestParams.model === 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' || requestParams.model === 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo' || requestParams.model === 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo' || requestParams.model === `google/gemma-2-27b-it`){
driver = 'together-ai';
}else if(requestParams.model === 'mistral-large-latest' || requestParams.model === 'codestral-latest'){
driver = 'mistral';
}else if([
"distil-whisper-large-v3-en",
"gemma2-9b-it",
"gemma-7b-it",
"llama-3.1-70b-versatile",
"llama-3.1-8b-instant",
"llama3-70b-8192",
"llama3-8b-8192",
"llama3-groq-70b-8192-tool-use-preview",
"llama3-groq-8b-8192-tool-use-preview",
"llama-guard-3-8b",
"mixtral-8x7b-32768",
"whisper-large-v3"
].includes(requestParams.model)) {
driver = 'groq';
}else if(requestParams.model === 'grok-beta') {
driver = 'xai';
}
else if(
requestParams.model === 'deepseek-chat' ||
requestParams.model === 'deepseek-reasoner'
){
driver = 'deepseek';
}
else if(
requestParams.model === 'gemini-1.5-flash' ||
requestParams.model === 'gemini-2.0-flash'
){
driver = 'gemini';
}
else if ( requestParams.model.startsWith('openrouter:') ) {
driver = 'openrouter';
}
// stream flag from userParams
if(userParams.stream !== undefined && typeof userParams.stream === 'boolean'){
requestParams.stream = userParams.stream;
}
if ( userParams.driver ) {
driver = userParams.driver;
}
// Additional parameters to pass from userParams to requestParams
const PARAMS_TO_PASS = ['tools', 'response'];
for ( const name of PARAMS_TO_PASS ) {
if ( userParams[name] ) {
requestParams[name] = userParams[name];
}
}
if ( requestParams.model === '' ) {
delete requestParams.model;
}
// Call the original chat.complete method
return await utils.make_driver_method(['messages'], 'puter-chat-completion', driver, 'complete', {
test_mode: testMode ?? false,
transform: async (result) => {
result.toString = () => {
return result.message?.content;
};
result.valueOf = () => {
return result.message?.content;
}
return result;
}
}).call(this, requestParams);
}
txt2img = async (...args) => {
let options = {};
let testMode = false;
if(!args){
throw({message: 'Arguments are required', code: 'arguments_required'});
}
// if argument is string transform it to the object that the API expects
if (typeof args[0] === 'string') {
options = { prompt: args[0] };
}
// if second argument is string, it's the `testMode`
if (typeof args[1] === 'boolean' && args[1] === true) {
testMode = true;
}
// Call the original chat.complete method
return await utils.make_driver_method(['prompt'], 'puter-image-generation', undefined, 'generate', {
responseType: 'blob',
test_mode: testMode ?? false,
transform: async blob => {
let img = new Image();
img.src = await utils.blob_to_url(blob);
img.toString = () => img.src;
img.valueOf = () => img.src;
return img;
}
}).call(this, options);
}
}
export default AI;