diff --git a/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs b/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs index 41c952bb..159d3ef4 100644 --- a/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs +++ b/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs @@ -109,7 +109,7 @@ export class OpenAICompletionService { return this.#defaultModel; } - async complete({ messages, stream, model, tools, max_tokens, temperature }) { + async complete({ messages, stream, model, tools, max_tokens, temperature, reasoning, text, reasoning_effort, verbosity }) { return await this.#complete(messages, { model: model, tools, @@ -117,7 +117,10 @@ export class OpenAICompletionService { stream, max_tokens, temperature, - + reasoning, + text, + reasoning_effort, + verbosity, }); } @@ -166,6 +169,7 @@ export class OpenAICompletionService { async #complete(messages, { stream, moderation, model, tools, temperature, max_tokens, + reasoning, text, reasoning_effort, verbosity, }) { // Validate messages if ( ! Array.isArray(messages) ) { @@ -252,7 +256,11 @@ export class OpenAICompletionService { // that's missing. We normalise it here so the token count code works. messages = await OpenAIUtil.process_input_messages(messages); - const completion = await this.#openAi.chat.completions.create({ + const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort; + const requestedVerbosity = verbosity ?? text?.verbosity; + const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5'); + + const completionParams = { user: user_private_uid, messages: messages, model: model, @@ -263,7 +271,18 @@ export class OpenAICompletionService { ...(stream ? { stream_options: { include_usage: true }, } : {}), - }); + }; + + if ( supportsReasoningControls ) { + if ( requestedReasoningEffort ) { + completionParams.reasoning_effort = requestedReasoningEffort; + } + if ( requestedVerbosity ) { + completionParams.verbosity = requestedVerbosity; + } + } + + const completion = await this.#openAi.chat.completions.create(completionParams); // TODO DS: simplify this logic for all the ai services, each service should handle its cost calculation in the service // for now I'm overloading this usage calculator to handle the future promise resolution... return OpenAIUtil.handle_completion_output({ diff --git a/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs b/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs index 12fda6e3..5aea77f8 100644 --- a/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs +++ b/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs @@ -1,6 +1,16 @@ // TODO DS: centralize somewhere export const OPEN_AI_MODELS = [ + { + id: 'gpt-5.1', + cost: { + currency: 'usd-cents', + tokens: 1_000_000, + input: 125, + output: 1000, + }, + max_tokens: 128000, + }, { id: 'gpt-5-2025-08-07', aliases: ['gpt-5'], @@ -163,4 +173,4 @@ export const OPEN_AI_MODELS = [ output: 15000, }, }, -]; \ No newline at end of file +]; diff --git a/src/backend/src/services/MeteringService/costMaps/openAiCostMap.ts b/src/backend/src/services/MeteringService/costMaps/openAiCostMap.ts index 3ac34f1a..509b60d2 100644 --- a/src/backend/src/services/MeteringService/costMaps/openAiCostMap.ts +++ b/src/backend/src/services/MeteringService/costMaps/openAiCostMap.ts @@ -20,6 +20,9 @@ export const OPENAI_COST_MAP = { // GPT-5 models + 'openai:gpt-5.1:prompt_tokens': 125, + 'openai:gpt-5.1:cached_tokens': 13, + 'openai:gpt-5.1:completion_tokens': 1000, 'openai:gpt-5-2025-08-07:prompt_tokens': 125, 'openai:gpt-5-2025-08-07:cached_tokens': 13, 'openai:gpt-5-2025-08-07:completion_tokens': 1000, diff --git a/src/backend/src/services/MeteringService/costMaps/openrouterCostMap.ts b/src/backend/src/services/MeteringService/costMaps/openrouterCostMap.ts index 469e3d68..ffbafcd3 100644 --- a/src/backend/src/services/MeteringService/costMaps/openrouterCostMap.ts +++ b/src/backend/src/services/MeteringService/costMaps/openrouterCostMap.ts @@ -130,6 +130,10 @@ export const OPENROUTER_COST_MAP = { 'openrouter:openai/gpt-5-chat:prompt': 125, 'openrouter:openai/gpt-5-chat:completion': 1000, 'openrouter:openai/gpt-5-chat:input_cache_read': 12, + 'openrouter:openai/gpt-5.1:prompt': 125, + 'openrouter:openai/gpt-5.1:completion': 1000, + 'openrouter:openai/gpt-5.1:web_search': 1000000, + 'openrouter:openai/gpt-5.1:input_cache_read': 12, 'openrouter:openai/gpt-5:prompt': 125, 'openrouter:openai/gpt-5:completion': 1000, 'openrouter:openai/gpt-5:web_search': 1000000, diff --git a/src/puter-js/index.d.ts b/src/puter-js/index.d.ts index 4d9ed1ab..0237bd34 100644 --- a/src/puter-js/index.d.ts +++ b/src/puter-js/index.d.ts @@ -78,6 +78,16 @@ interface ChatOptions { stream?: boolean; max_tokens?: number; temperature?: number; + reasoning?: { + effort?: 'none' | 'low' | 'medium' | 'high' | 'minimal'; + [key: string]: unknown; + }; + reasoning_effort?: 'none' | 'low' | 'medium' | 'high' | 'minimal'; + text?: { + verbosity?: 'low' | 'medium' | 'high'; + [key: string]: unknown; + }; + verbosity?: 'low' | 'medium' | 'high'; tools?: ToolDefinition[]; } diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js index d8ec4ad8..4263a446 100644 --- a/src/puter-js/src/modules/AI.js +++ b/src/puter-js/src/modules/AI.js @@ -809,7 +809,7 @@ class AI{ } // Additional parameters to pass from userParams to requestParams - const PARAMS_TO_PASS = ['tools', 'response']; + const PARAMS_TO_PASS = ['tools', 'response', 'reasoning', 'reasoning_effort', 'text', 'verbosity']; for ( const name of PARAMS_TO_PASS ) { if ( userParams[name] ) { requestParams[name] = userParams[name]; diff --git a/src/puter-js/test/ai.test.js b/src/puter-js/test/ai.test.js index f5b9a5ab..eab1d306 100644 --- a/src/puter-js/test/ai.test.js +++ b/src/puter-js/test/ai.test.js @@ -7,6 +7,7 @@ const TEST_MODELS = [ "openrouter:anthropic/claude-sonnet-4", "google/gemini-2.5-pro", "deepseek-chat", + "gpt-5.1", "gpt-5-nano", "openai/gpt-5-nano", "claude-sonnet-4-latest", @@ -48,7 +49,9 @@ const testChatWithParametersCore = async function(model) { const result = await puter.ai.chat("What is 2+2?", { model: model, temperature: 0.7, - max_tokens: 50 + max_tokens: 50, + reasoning: { effort: 'low' }, + text: { verbosity: 'low' }, }); // Check basic result structure @@ -212,4 +215,4 @@ const generateAllTests = function() { }; // Export the generated tests -window.aiTests = generateAllTests(); \ No newline at end of file +window.aiTests = generateAllTests();