diff --git a/src/backend/src/modules/puterai/AIChatService.js b/src/backend/src/modules/puterai/AIChatService.js index e995947d..0009ab39 100644 --- a/src/backend/src/modules/puterai/AIChatService.js +++ b/src/backend/src/modules/puterai/AIChatService.js @@ -409,6 +409,7 @@ class AIChatService extends BaseService { const available = await svc_cost.get_available_amount(); const model_input_cost = this.detail_model_map[model_used].cost.input; const model_output_cost = this.detail_model_map[model_used].cost.output; + const model_max_tokens = this.detail_model_map[model_used].max_tokens; const text = Messages.extract_text(parameters.messages); const approximate_input_cost = text.length / 4 * model_input_cost; const usageAllowed = await svc_cost.get_funding_allowed({ @@ -440,10 +441,13 @@ class AIChatService extends BaseService { const max_allowed_output_tokens = max_allowed_output_amount / model_output_cost; - parameters.max_tokens = Math.min( - parameters.max_tokens ?? Number.POSITIVE_INFINITY, - max_allowed_output_tokens, - ); + if ( model_max_tokens ) { + parameters.max_tokens = Math.floor(Math.min( + parameters.max_tokens ?? Number.POSITIVE_INFINITY, + max_allowed_output_tokens, + model_max_tokens, + )); + } this.log.noticeme('AI PARAMETERS', parameters); diff --git a/src/backend/src/modules/puterai/ClaudeService.js b/src/backend/src/modules/puterai/ClaudeService.js index d8fbe49b..39b78b4b 100644 --- a/src/backend/src/modules/puterai/ClaudeService.js +++ b/src/backend/src/modules/puterai/ClaudeService.js @@ -111,7 +111,11 @@ class ClaudeService extends BaseService { const sdk_params = { model: model ?? this.get_default_model(), - max_tokens: max_tokens || (model === 'claude-3-5-sonnet-20241022' || model === 'claude-3-5-sonnet-20240620') ? 8192 : 4096, //required + max_tokens: Math.floor(max_tokens) || + (( + model === 'claude-3-5-sonnet-20241022' + || model === 'claude-3-5-sonnet-20240620' + ) ? 8192 : 4096), //required temperature: temperature || 0, // required ...(system_prompts ? { system: system_prompts.length > 1 diff --git a/src/backend/src/modules/puterai/OpenAICompletionService.js b/src/backend/src/modules/puterai/OpenAICompletionService.js index 02e2aa7a..7155eca5 100644 --- a/src/backend/src/modules/puterai/OpenAICompletionService.js +++ b/src/backend/src/modules/puterai/OpenAICompletionService.js @@ -106,6 +106,7 @@ class OpenAICompletionService extends BaseService { }, { id: 'gpt-4o-mini', + max_tokens: 16384, cost: { currency: 'usd-cents', tokens: 1_000_000, @@ -169,6 +170,7 @@ class OpenAICompletionService extends BaseService { }, { id: 'o4-mini', + max_tokens: 100000, cost: { currency: 'usd-cents', tokens: 1_000_000, @@ -335,7 +337,7 @@ class OpenAICompletionService extends BaseService { messages: messages, model: model, ...(tools ? { tools } : {}), - ...(max_tokens ? { max_tokens } : {}), + ...(max_tokens ? { max_completion_tokens: max_tokens } : {}), ...(temperature ? { temperature } : {}), stream, ...(stream ? { diff --git a/src/backend/src/modules/puterai/OpenRouterService.js b/src/backend/src/modules/puterai/OpenRouterService.js index 1e7f8094..62791f7d 100644 --- a/src/backend/src/modules/puterai/OpenRouterService.js +++ b/src/backend/src/modules/puterai/OpenRouterService.js @@ -172,6 +172,7 @@ class OpenRouterService extends BaseService { coerced_models.push({ id: 'openrouter:' + model.id, name: model.name + ' (OpenRouter)', + max_tokens: model.max_completion_tokens, cost: { currency: 'usd-cents', tokens: 1_000_000,