mirror of
https://github.com/HeyPuter/puter.git
synced 2026-04-24 16:28:29 -05:00
473 lines
17 KiB
JavaScript
473 lines
17 KiB
JavaScript
import * as utils from '../lib/utils.js'
|
|
|
|
class AI{
|
|
/**
|
|
* Creates a new instance with the given authentication token, API origin, and app ID,
|
|
*
|
|
* @class
|
|
* @param {string} authToken - Token used to authenticate the user.
|
|
* @param {string} APIOrigin - Origin of the API server. Used to build the API endpoint URLs.
|
|
* @param {string} appID - ID of the app to use.
|
|
*/
|
|
constructor (context) {
|
|
this.authToken = context.authToken;
|
|
this.APIOrigin = context.APIOrigin;
|
|
this.appID = context.appID;
|
|
}
|
|
|
|
/**
|
|
* Sets a new authentication token and resets the socket connection with the updated token, if applicable.
|
|
*
|
|
* @param {string} authToken - The new authentication token.
|
|
* @memberof [AI]
|
|
* @returns {void}
|
|
*/
|
|
setAuthToken (authToken) {
|
|
this.authToken = authToken;
|
|
}
|
|
|
|
/**
|
|
* Sets the API origin.
|
|
*
|
|
* @param {string} APIOrigin - The new API origin.
|
|
* @memberof [AI]
|
|
* @returns {void}
|
|
*/
|
|
setAPIOrigin (APIOrigin) {
|
|
this.APIOrigin = APIOrigin;
|
|
}
|
|
|
|
/**
|
|
* Returns a list of available AI models.
|
|
* @param {string} provider - The provider to filter the models returned.
|
|
* @returns {Object} Object containing lists of available models by provider
|
|
*/
|
|
async listModels(provider) {
|
|
const modelsByProvider = {};
|
|
|
|
const models = await puter.drivers.call('puter-chat-completion','ai-chat','models');
|
|
|
|
if (!models || !models.result || !Array.isArray(models.result)) {
|
|
return modelsByProvider;
|
|
}
|
|
models.result.forEach(item => {
|
|
if (!item.provider || !item.id) return;
|
|
if (provider && item.provider !== provider) return;
|
|
if (!modelsByProvider[item.provider]) modelsByProvider[item.provider] = [];
|
|
modelsByProvider[item.provider].push(item.id);
|
|
});
|
|
|
|
return modelsByProvider;
|
|
}
|
|
|
|
/**
|
|
* Returns a list of all available AI providers
|
|
* @returns {Array} Array containing providers
|
|
*/
|
|
async listModelProviders() {
|
|
let providers = [];
|
|
const models = await puter.drivers.call('puter-chat-completion','ai-chat','models');
|
|
|
|
if (!models || !models.result || !Array.isArray(models.result)) return providers; // if models is invalid then return empty array
|
|
providers = new Set(); // Use a Set to store unique providers
|
|
models.result.forEach(item => {
|
|
if (item.provider) providers.add(item.provider);
|
|
});
|
|
providers = Array.from(providers); // Convert Set to an array
|
|
return providers;
|
|
}
|
|
|
|
img2txt = async (...args) => {
|
|
let MAX_INPUT_SIZE = 10 * 1024 * 1024;
|
|
let options = {};
|
|
let testMode = false;
|
|
|
|
// Check that the argument is not undefined or null
|
|
if(!args){
|
|
throw({message: 'Arguments are required', code: 'arguments_required'});
|
|
}
|
|
|
|
// if argument is string transform it to the object that the API expects
|
|
if (typeof args[0] === 'string' || args[0] instanceof Blob) {
|
|
options.source = args[0];
|
|
}
|
|
|
|
// if input is a blob, transform it to a data URI
|
|
if (args[0].source instanceof Blob) {
|
|
options.source = await utils.blobToDataUri(args[0].source);
|
|
}
|
|
|
|
// check input size
|
|
if (options.source.length > this.MAX_INPUT_SIZE) {
|
|
throw { message: 'Input size cannot be larger than ' + MAX_INPUT_SIZE, code: 'input_too_large' };
|
|
}
|
|
|
|
// determine if test mode is enabled
|
|
if (typeof args[1] === 'boolean' && args[1] === true ||
|
|
typeof args[2] === 'boolean' && args[2] === true ||
|
|
typeof args[3] === 'boolean' && args[3] === true) {
|
|
testMode = true;
|
|
}
|
|
|
|
return await utils.make_driver_method(['source'], 'puter-ocr', 'aws-textract', 'recognize', {
|
|
test_mode: testMode ?? false,
|
|
transform: async (result) => {
|
|
let str = '';
|
|
for (let i = 0; i < result?.blocks?.length; i++) {
|
|
if("text/textract:LINE" === result.blocks[i].type)
|
|
str += result.blocks[i].text + "\n";
|
|
}
|
|
return str;
|
|
}
|
|
}).call(this, options);
|
|
}
|
|
|
|
txt2speech = async (...args) => {
|
|
let MAX_INPUT_SIZE = 3000;
|
|
let options = {};
|
|
let testMode = false;
|
|
|
|
if(!args){
|
|
throw({message: 'Arguments are required', code: 'arguments_required'});
|
|
}
|
|
|
|
// if argument is string transform it to the object that the API expects
|
|
if (typeof args[0] === 'string') {
|
|
options = { text: args[0] };
|
|
}
|
|
|
|
// * ai.txt2speech('Hello, world!', 'en-US')
|
|
// * ai.txt2speech('Hello, world!', 'en-US', 'Brian')
|
|
if (args[1] && typeof args[1] === 'string') {
|
|
|
|
// Determine language
|
|
if (args[1] && typeof args[1] === 'string') {
|
|
// Check if it's a language code (ISO 639-1 or with region)
|
|
// Pattern matches: en, es, fr, de, en-US, es-ES, fr-FR, etc.
|
|
const languageCodePattern = /^[a-z]{2}(-[A-Z]{2})?$/;
|
|
|
|
// if language code is invalid, throw an error
|
|
if(!languageCodePattern.test(args[1])){
|
|
throw { message: 'Invalid language code', code: 'invalid_language_code' };
|
|
}
|
|
|
|
// set language
|
|
options.language = args[1];
|
|
}
|
|
// Determine voice
|
|
// Note that voice is optional, and if not provided, the default voice for the language will be used
|
|
// Also, it is important that a language is set before a voice is set since voices are language-specific
|
|
if (options.language && args[2] && typeof args[2] === 'string') {
|
|
// set voice
|
|
options.voice = args[2];
|
|
}
|
|
}
|
|
|
|
// check input size
|
|
if (options.text.length > this.MAX_INPUT_SIZE) {
|
|
throw { message: 'Input size cannot be larger than ' + MAX_INPUT_SIZE, code: 'input_too_large' };
|
|
}
|
|
|
|
// determine if test mode is enabled
|
|
if (typeof args[1] === 'boolean' && args[1] === true ||
|
|
typeof args[2] === 'boolean' && args[2] === true ||
|
|
typeof args[3] === 'boolean' && args[3] === true) {
|
|
testMode = true;
|
|
}
|
|
|
|
return await utils.make_driver_method(['source'], 'puter-tts', 'aws-polly', 'synthesize', {
|
|
responseType: 'blob',
|
|
test_mode: testMode ?? false,
|
|
transform: async (result) => {
|
|
const url = await utils.blob_to_url(result);
|
|
const audio = new Audio(url);
|
|
audio.toString = () => url;
|
|
audio.valueOf = () => url;
|
|
return audio;
|
|
}
|
|
}).call(this, options);
|
|
}
|
|
|
|
|
|
// accepts either a string or an array of message objects
|
|
// if string, it's treated as the prompt which is a shorthand for { messages: [{ content: prompt }] }
|
|
// if object, it's treated as the full argument object that the API expects
|
|
chat = async (...args) => {
|
|
// requestParams: parameters that will be sent to the backend driver
|
|
let requestParams = {};
|
|
// userParams: parameters provided by the user in the function call
|
|
let userParams = {};
|
|
let testMode = false;
|
|
|
|
// default driver is openai-completion
|
|
let driver = 'openai-completion';
|
|
|
|
// Check that the argument is not undefined or null
|
|
if(!args){
|
|
throw({message: 'Arguments are required', code: 'arguments_required'});
|
|
}
|
|
|
|
// ai.chat(prompt)
|
|
if(typeof args[0] === 'string'){
|
|
requestParams = { messages: [{ content: args[0] }] };
|
|
}
|
|
|
|
// ai.chat(prompt, testMode)
|
|
if (typeof args[0] === 'string' && (!args[1] || typeof args[1] === 'boolean')) {
|
|
requestParams = { messages: [{ content: args[0] }] };
|
|
}
|
|
|
|
// ai.chat(prompt, imageURL/File)
|
|
// ai.chat(prompt, imageURL/File, testMode)
|
|
else if (typeof args[0] === 'string' && (typeof args[1] === 'string' || args[1] instanceof File)) {
|
|
// if imageURL is a File, transform it to a data URI
|
|
if(args[1] instanceof File){
|
|
args[1] = await utils.blobToDataUri(args[1]);
|
|
}
|
|
|
|
// parse args[1] as an image_url object
|
|
requestParams = {
|
|
vision: true,
|
|
messages: [
|
|
{
|
|
content: [
|
|
args[0],
|
|
{
|
|
image_url: {
|
|
url: args[1]
|
|
}
|
|
}
|
|
],
|
|
}
|
|
]
|
|
};
|
|
}
|
|
// chat(prompt, [imageURLs])
|
|
else if (typeof args[0] === 'string' && Array.isArray(args[1])) {
|
|
// parse args[1] as an array of image_url objects
|
|
for (let i = 0; i < args[1].length; i++) {
|
|
args[1][i] = { image_url: { url: args[1][i] } };
|
|
}
|
|
requestParams = {
|
|
vision: true,
|
|
messages: [
|
|
{
|
|
content: [
|
|
args[0],
|
|
...args[1]
|
|
],
|
|
}
|
|
]
|
|
};
|
|
}
|
|
// chat([messages])
|
|
else if (Array.isArray(args[0])) {
|
|
requestParams = { messages: args[0] };
|
|
}
|
|
|
|
// determine if testMode is enabled
|
|
if (typeof args[1] === 'boolean' && args[1] === true ||
|
|
typeof args[2] === 'boolean' && args[2] === true ||
|
|
typeof args[3] === 'boolean' && args[3] === true) {
|
|
testMode = true;
|
|
}
|
|
|
|
// if any of the args is an object, assume it's the user parameters object
|
|
const is_object = v => {
|
|
return typeof v === 'object' &&
|
|
!Array.isArray(v) &&
|
|
v !== null;
|
|
};
|
|
for (let i = 0; i < args.length; i++) {
|
|
if (is_object(args[i])) {
|
|
userParams = args[i];
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
// Copy relevant parameters from userParams to requestParams
|
|
if (userParams.model) {
|
|
requestParams.model = userParams.model;
|
|
}
|
|
if (userParams.temperature) {
|
|
requestParams.temperature = userParams.temperature;
|
|
}
|
|
if (userParams.max_tokens) {
|
|
requestParams.max_tokens = userParams.max_tokens;
|
|
}
|
|
|
|
// convert undefined to empty string so that .startsWith works
|
|
requestParams.model = requestParams.model ?? '';
|
|
|
|
// If model starts with "anthropic/", remove it
|
|
// later on we should standardize the model names to [vendor]/[model]
|
|
// for example: "claude-3-5-sonnet" should become "anthropic/claude-3-5-sonnet"
|
|
// but for now, we want to keep the old behavior
|
|
// so we remove the "anthropic/" prefix if it exists
|
|
if (requestParams.model && requestParams.model.startsWith('anthropic/')) {
|
|
requestParams.model = requestParams.model.replace('anthropic/', '');
|
|
}
|
|
|
|
// convert to the correct model name if necessary
|
|
if( requestParams.model === 'claude-3-5-sonnet'){
|
|
requestParams.model = 'claude-3-5-sonnet-latest';
|
|
}
|
|
if( requestParams.model === 'claude-3-7-sonnet' || requestParams.model === 'claude'){
|
|
requestParams.model = 'claude-3-7-sonnet-latest';
|
|
}
|
|
if( requestParams.model === 'claude-sonnet-4' || requestParams.model === 'claude-sonnet-4-latest'){
|
|
requestParams.model = 'claude-sonnet-4-20250514';
|
|
}
|
|
if( requestParams.model === 'claude-opus-4' || requestParams.model === 'claude-opus-4-latest') {
|
|
requestParams.model = 'claude-opus-4-20250514';
|
|
}
|
|
if ( requestParams.model === 'mistral' ) {
|
|
requestParams.model = 'mistral-large-latest';
|
|
}
|
|
if ( requestParams.model === 'groq' ) {
|
|
requestParams.model = 'llama3-8b-8192';
|
|
}
|
|
if ( requestParams.model === 'deepseek' ) {
|
|
requestParams.model = 'deepseek-chat';
|
|
}
|
|
|
|
// o1-mini to openrouter:openai/o1-mini
|
|
if ( requestParams.model === 'o1-mini') {
|
|
requestParams.model = 'openrouter:openai/o1-mini';
|
|
}
|
|
|
|
// if a model is prepended with "openai/", remove it
|
|
if (requestParams.model && requestParams.model.startsWith('openai/')) {
|
|
requestParams.model = requestParams.model.replace('openai/', '');
|
|
driver = 'openai-completion';
|
|
}
|
|
|
|
// if model starts with:
|
|
// meta-llama/
|
|
// google/
|
|
// deepseek/
|
|
// x-ai/
|
|
// prepend it with openrouter:
|
|
if ( requestParams.model.startsWith('meta-llama/') || requestParams.model.startsWith('google/') || requestParams.model.startsWith('deepseek/') || requestParams.model.startsWith('x-ai/') ) {
|
|
requestParams.model = 'openrouter:' + requestParams.model;
|
|
}
|
|
|
|
// map model to the appropriate driver
|
|
if (!requestParams.model || requestParams.model.startsWith('gpt-')) {
|
|
driver = 'openai-completion';
|
|
}else if(
|
|
requestParams.model.startsWith('claude-')
|
|
){
|
|
driver = 'claude';
|
|
}else if(requestParams.model === 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' || requestParams.model === 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo' || requestParams.model === 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo' || requestParams.model === `google/gemma-2-27b-it`){
|
|
driver = 'together-ai';
|
|
}else if(requestParams.model === 'mistral-large-latest' || requestParams.model === 'codestral-latest'){
|
|
driver = 'mistral';
|
|
}else if([
|
|
"distil-whisper-large-v3-en",
|
|
"gemma2-9b-it",
|
|
"gemma-7b-it",
|
|
"llama-3.1-70b-versatile",
|
|
"llama-3.1-8b-instant",
|
|
"llama3-70b-8192",
|
|
"llama3-8b-8192",
|
|
"llama3-groq-70b-8192-tool-use-preview",
|
|
"llama3-groq-8b-8192-tool-use-preview",
|
|
"llama-guard-3-8b",
|
|
"mixtral-8x7b-32768",
|
|
"whisper-large-v3"
|
|
].includes(requestParams.model)) {
|
|
driver = 'groq';
|
|
}else if(requestParams.model === 'grok-beta') {
|
|
driver = 'xai';
|
|
}
|
|
else if(
|
|
requestParams.model === 'deepseek-chat' ||
|
|
requestParams.model === 'deepseek-reasoner'
|
|
){
|
|
driver = 'deepseek';
|
|
}
|
|
else if(
|
|
requestParams.model === 'gemini-1.5-flash' ||
|
|
requestParams.model === 'gemini-2.0-flash'
|
|
){
|
|
driver = 'gemini';
|
|
}
|
|
else if ( requestParams.model.startsWith('openrouter:') ) {
|
|
driver = 'openrouter';
|
|
}
|
|
|
|
// stream flag from userParams
|
|
if(userParams.stream !== undefined && typeof userParams.stream === 'boolean'){
|
|
requestParams.stream = userParams.stream;
|
|
}
|
|
|
|
if ( userParams.driver ) {
|
|
driver = userParams.driver;
|
|
}
|
|
|
|
// Additional parameters to pass from userParams to requestParams
|
|
const PARAMS_TO_PASS = ['tools', 'response'];
|
|
for ( const name of PARAMS_TO_PASS ) {
|
|
if ( userParams[name] ) {
|
|
requestParams[name] = userParams[name];
|
|
}
|
|
}
|
|
|
|
if ( requestParams.model === '' ) {
|
|
delete requestParams.model;
|
|
}
|
|
|
|
// Call the original chat.complete method
|
|
return await utils.make_driver_method(['messages'], 'puter-chat-completion', driver, 'complete', {
|
|
test_mode: testMode ?? false,
|
|
transform: async (result) => {
|
|
result.toString = () => {
|
|
return result.message?.content;
|
|
};
|
|
|
|
result.valueOf = () => {
|
|
return result.message?.content;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}).call(this, requestParams);
|
|
}
|
|
|
|
txt2img = async (...args) => {
|
|
let options = {};
|
|
let testMode = false;
|
|
|
|
if(!args){
|
|
throw({message: 'Arguments are required', code: 'arguments_required'});
|
|
}
|
|
|
|
// if argument is string transform it to the object that the API expects
|
|
if (typeof args[0] === 'string') {
|
|
options = { prompt: args[0] };
|
|
}
|
|
|
|
// if second argument is string, it's the `testMode`
|
|
if (typeof args[1] === 'boolean' && args[1] === true) {
|
|
testMode = true;
|
|
}
|
|
|
|
// Call the original chat.complete method
|
|
return await utils.make_driver_method(['prompt'], 'puter-image-generation', undefined, 'generate', {
|
|
responseType: 'blob',
|
|
test_mode: testMode ?? false,
|
|
transform: async blob => {
|
|
let img = new Image();
|
|
img.src = await utils.blob_to_url(blob);
|
|
img.toString = () => img.src;
|
|
img.valueOf = () => img.src;
|
|
return img;
|
|
}
|
|
}).call(this, options);
|
|
}
|
|
}
|
|
|
|
export default AI;
|