From aa8860b3ea7f763edf7e51690d37460fb3e7ca96 Mon Sep 17 00:00:00 2001 From: Neal Shah <30693865+ProgrammerIn-wonderland@users.noreply.github.com> Date: Mon, 3 Nov 2025 17:52:03 -0500 Subject: [PATCH] Add cache_control support and haiku 4.5 (#1897) * Add cache_control support and haiku 4.5 --- package.json | 1 + src/backend/package.json | 1 - .../src/modules/puterai/ClaudeService.js | 87 +++++++++++++++++-- 3 files changed, 81 insertions(+), 8 deletions(-) diff --git a/package.json b/package.json index 285ba218..23dee41b 100644 --- a/package.json +++ b/package.json @@ -65,6 +65,7 @@ "@aws-sdk/client-secrets-manager": "^3.879.0", "@aws-sdk/client-sns": "^3.907.0", "@google/genai": "^1.19.0", + "@anthropic-ai/sdk": "^0.68.0", "@heyputer/putility": "^1.0.2", "@paralleldrive/cuid2": "^2.2.2", "@stylistic/eslint-plugin-js": "^4.4.1", diff --git a/src/backend/package.json b/src/backend/package.json index eae43a92..42a2eaad 100644 --- a/src/backend/package.json +++ b/src/backend/package.json @@ -8,7 +8,6 @@ "build:worker": "cd src/services/worker && npm run build" }, "dependencies": { - "@anthropic-ai/sdk": "^0.56.0", "@aws-sdk/client-polly": "^3.622.0", "@aws-sdk/client-textract": "^3.621.0", "@google/generative-ai": "^0.21.0", diff --git a/src/backend/src/modules/puterai/ClaudeService.js b/src/backend/src/modules/puterai/ClaudeService.js index c8b83f52..b5d11e7f 100644 --- a/src/backend/src/modules/puterai/ClaudeService.js +++ b/src/backend/src/modules/puterai/ClaudeService.js @@ -120,10 +120,29 @@ class ClaudeService extends BaseService { */ async complete({ messages, stream, model, tools, max_tokens, temperature }) { tools = FunctionCalling.make_claude_tools(tools); - + // console.log("here are the messages: ", messages) + let system_prompts; + // unsure why system_prompts is an array but it always seems to only have exactly one element, + // and the real array of system_prompts seems to be the [0].content -- NS [system_prompts, messages] = Messages.extract_and_remove_system_messages(messages); + // Apply the cache control tag to all content blocks + if (system_prompts[0].cache_control && system_prompts[0]?.content) { + system_prompts[0].content = system_prompts[0].content.map(prompt => { + prompt.cache_control = system_prompts[0].cache_control; + return prompt; + }); + } + + messages = messages.map(message => { + if (message.cache_control) { + message.content[0].cache_control = message.cache_control; + } + delete message.cache_control; + return message + }) + const sdk_params = { model: model ?? this.get_default_model(), max_tokens: Math.floor(max_tokens) || @@ -132,16 +151,18 @@ class ClaudeService extends BaseService { || model === 'claude-3-5-sonnet-20240620' ) ? 8192 : 4096), //required temperature: temperature || 0, // required - ...(system_prompts ? { - system: system_prompts.length > 1 - ? JSON.stringify(system_prompts) - : JSON.stringify(system_prompts[0]), + ...( (system_prompts && system_prompts[0]?.content) ? { + system: system_prompts[0]?.content } : {}), + tool_choice: { + type: "auto", + disable_parallel_tool_use: true + }, messages, ...(tools ? { tools } : {}), }; - console.log('\x1B[26;1m ===== SDK PARAMETERS', require('util').inspect(sdk_params, undefined, Infinity)); + // console.log('\x1B[26;1m ===== SDK PARAMETERS', require('util').inspect(sdk_params, undefined, Infinity)); let beta_mode = false; @@ -303,6 +324,25 @@ class ClaudeService extends BaseService { chatStream.end(); this.billForUsage(actor, model || this.get_default_model(), usageSum); + + // Log token usage statistics + const totalTokens = usageSum.input_tokens + usageSum.output_tokens; + const cachedTokens = usageSum.ephemeral_5m_input_tokens + usageSum.ephemeral_1h_input_tokens; + const cacheHits = usageSum.cache_read_input_tokens; + const uncachedTokens = usageSum.input_tokens - cacheHits - cachedTokens; + +// console.log(` +// ╔══════════════════════════════════════════════════════════════╗ +// ║ 🎯 Token Usage Statistics 🎯 ║ +// ╠══════════════════════════════════════════════════════════════╣ +// ║ 📊 Total Tokens Used: ${String(totalTokens).padStart(10)} 📊 ║ +// ║ 💾 Cached Tokens: ${String(cachedTokens).padStart(10)} 💾 ║ +// ║ ✅ Cache Hits: ${String(cacheHits).padStart(10)} ✅ ║ +// ║ 🔄 Uncached Tokens: ${String(uncachedTokens).padStart(10)} 🔄 ║ +// ║ 📥 Input Tokens: ${String(usageSum.input_tokens).padStart(10)} 📥 ║ +// ║ 📤 Output Tokens: ${String(usageSum.output_tokens).padStart(10)} 📤 ║ +// ╚══════════════════════════════════════════════════════════════╝ +// `); }; return { @@ -315,7 +355,27 @@ class ClaudeService extends BaseService { const msg = await anthropic.messages.create(sdk_params); await cleanup_files(); - this.billForUsage(actor, model || this.get_default_model(), this.usageFormatterUtil(msg.usage)); + const usage = this.usageFormatterUtil(msg.usage); + this.billForUsage(actor, model || this.get_default_model(), usage); + + // Log token usage statistics + const totalTokens = usage.input_tokens + usage.output_tokens; + const cachedTokens = usage.ephemeral_5m_input_tokens + usage.ephemeral_1h_input_tokens; + const cacheHits = usage.cache_read_input_tokens; + const uncachedTokens = usage.input_tokens - cacheHits - cachedTokens; + +// console.log(` +// ╔══════════════════════════════════════════════════════════════╗ +// ║ 🎯 Token Usage Statistics 🎯 ║ +// ╠══════════════════════════════════════════════════════════════╣ +// ║ 📊 Total Tokens Used: ${String(totalTokens).padStart(10)} 📊 ║ +// ║ 💾 Cached Tokens: ${String(cachedTokens).padStart(10)} 💾 ║ +// ║ ✅ Cache Hits: ${String(cacheHits).padStart(10)} ✅ ║ +// ║ 🔄 Uncached Tokens: ${String(uncachedTokens).padStart(10)} 🔄 ║ +// ║ 📥 Input Tokens: ${String(usage.input_tokens).padStart(10)} 📥 ║ +// ║ 📤 Output Tokens: ${String(usage.output_tokens).padStart(10)} 📤 ║ +// ╚══════════════════════════════════════════════════════════════╝ +// `); // TODO DS: cleanup old usage tracking return { @@ -358,6 +418,19 @@ class ClaudeService extends BaseService { */ models_() { return [ + { + id: 'claude-haiku-4-5-20251001', + aliases: ['claude-haiku-4.5', 'claude-haiku-4-5'], + name: 'Claude Haiku 4.5', + context: 200000, + cost: { + currency: 'usd-cents', + tokens: 1_000_000, + input: 100, + output: 500, + }, + max_tokens: 64000, + }, { id: 'claude-sonnet-4-5-20250929', aliases: ['claude-sonnet-4.5', 'claude-sonnet-4-5'],