Add cache_control support and haiku 4.5 (#1897)

* Add cache_control support and haiku 4.5
2026-01-03 19:50:22 -06:00 · 2025-11-03 17:52:03 -05:00
parent 9ae4e7227b
commit aa8860b3ea
3 changed files with 81 additions and 8 deletions
--- a/package.json
+++ b/package.json
@@ -65,6 +65,7 @@
    "@aws-sdk/client-secrets-manager": "^3.879.0",
    "@aws-sdk/client-sns": "^3.907.0",
    "@google/genai": "^1.19.0",
+    "@anthropic-ai/sdk": "^0.68.0",
    "@heyputer/putility": "^1.0.2",
    "@paralleldrive/cuid2": "^2.2.2",
    "@stylistic/eslint-plugin-js": "^4.4.1",
--- a/src/backend/package.json
+++ b/src/backend/package.json
@@ -8,7 +8,6 @@
    "build:worker": "cd src/services/worker && npm run build"
  },
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.56.0",
    "@aws-sdk/client-polly": "^3.622.0",
    "@aws-sdk/client-textract": "^3.621.0",
    "@google/generative-ai": "^0.21.0",
--- a/src/backend/src/modules/puterai/ClaudeService.js
+++ b/src/backend/src/modules/puterai/ClaudeService.js
@@ -120,10 +120,29 @@ class ClaudeService extends BaseService {
            */
            async complete({ messages, stream, model, tools, max_tokens, temperature }) {
                tools = FunctionCalling.make_claude_tools(tools);
-
+                // console.log("here are the messages: ", messages)
+                
                let system_prompts;
+                // unsure why system_prompts is an array but it always seems to only have exactly one element,
+                // and the real array of system_prompts seems to be the [0].content -- NS
                [system_prompts, messages] = Messages.extract_and_remove_system_messages(messages);

+                // Apply the cache control tag to all content blocks
+                if (system_prompts[0].cache_control && system_prompts[0]?.content) {
+                    system_prompts[0].content = system_prompts[0].content.map(prompt => {
+                        prompt.cache_control = system_prompts[0].cache_control;
+                        return prompt;
+                    });
+                }
+
+                messages = messages.map(message => {
+                    if (message.cache_control) {
+                        message.content[0].cache_control = message.cache_control;
+                    }
+                    delete message.cache_control;
+                    return message
+                })
+
                const sdk_params = {
                    model: model ?? this.get_default_model(),
                    max_tokens: Math.floor(max_tokens) ||
@@ -132,16 +151,18 @@ class ClaudeService extends BaseService {
                            || model === 'claude-3-5-sonnet-20240620'
                        ) ? 8192 : 4096), //required
                    temperature: temperature || 0, // required
-                    ...(system_prompts ? {
-                        system: system_prompts.length > 1
-                            ? JSON.stringify(system_prompts)
-                            : JSON.stringify(system_prompts[0]),
+                    ...( (system_prompts && system_prompts[0]?.content) ? {
+                        system: system_prompts[0]?.content
                    } : {}),
+                    tool_choice: {
+                        type: "auto",
+                        disable_parallel_tool_use: true
+                    },
                    messages,
                    ...(tools ? { tools } : {}),
                };

-                console.log('\x1B[26;1m ===== SDK PARAMETERS', require('util').inspect(sdk_params, undefined, Infinity));
+                // console.log('\x1B[26;1m ===== SDK PARAMETERS', require('util').inspect(sdk_params, undefined, Infinity));

                let beta_mode = false;

@@ -303,6 +324,25 @@ class ClaudeService extends BaseService {
                        chatStream.end();

                        this.billForUsage(actor, model || this.get_default_model(), usageSum);
+
+                        // Log token usage statistics
+                        const totalTokens = usageSum.input_tokens + usageSum.output_tokens;
+                        const cachedTokens = usageSum.ephemeral_5m_input_tokens + usageSum.ephemeral_1h_input_tokens;
+                        const cacheHits = usageSum.cache_read_input_tokens;
+                        const uncachedTokens = usageSum.input_tokens - cacheHits - cachedTokens;
+
+//                         console.log(`
+// ╔══════════════════════════════════════════════════════════════╗
+// ║           🎯 Token Usage Statistics 🎯                       ║
+// ╠══════════════════════════════════════════════════════════════╣
+// ║  📊 Total Tokens Used:     ${String(totalTokens).padStart(10)} 📊  ║
+// ║  💾 Cached Tokens:         ${String(cachedTokens).padStart(10)} 💾  ║
+// ║  ✅ Cache Hits:             ${String(cacheHits).padStart(10)} ✅  ║
+// ║  🔄 Uncached Tokens:        ${String(uncachedTokens).padStart(10)} 🔄  ║
+// ║  📥 Input Tokens:           ${String(usageSum.input_tokens).padStart(10)} 📥  ║
+// ║  📤 Output Tokens:          ${String(usageSum.output_tokens).padStart(10)} 📤  ║
+// ╚══════════════════════════════════════════════════════════════╝
+//                         `);
                    };

                    return {
@@ -315,7 +355,27 @@ class ClaudeService extends BaseService {
                const msg = await anthropic.messages.create(sdk_params);
                await cleanup_files();

-                this.billForUsage(actor, model || this.get_default_model(), this.usageFormatterUtil(msg.usage));
+                const usage = this.usageFormatterUtil(msg.usage);
+                this.billForUsage(actor, model || this.get_default_model(), usage);
+
+                // Log token usage statistics
+                const totalTokens = usage.input_tokens + usage.output_tokens;
+                const cachedTokens = usage.ephemeral_5m_input_tokens + usage.ephemeral_1h_input_tokens;
+                const cacheHits = usage.cache_read_input_tokens;
+                const uncachedTokens = usage.input_tokens - cacheHits - cachedTokens;
+
+//                 console.log(`
+// ╔══════════════════════════════════════════════════════════════╗
+// ║           🎯 Token Usage Statistics 🎯                      ║
+// ╠══════════════════════════════════════════════════════════════╣
+// ║  📊 Total Tokens Used:     ${String(totalTokens).padStart(10)} 📊  ║
+// ║  💾 Cached Tokens:         ${String(cachedTokens).padStart(10)} 💾  ║
+// ║  ✅ Cache Hits:             ${String(cacheHits).padStart(10)} ✅  ║
+// ║  🔄 Uncached Tokens:        ${String(uncachedTokens).padStart(10)} 🔄  ║
+// ║  📥 Input Tokens:           ${String(usage.input_tokens).padStart(10)} 📥  ║
+// ║  📤 Output Tokens:          ${String(usage.output_tokens).padStart(10)} 📤  ║
+// ╚══════════════════════════════════════════════════════════════╝
+//                 `);

                // TODO DS: cleanup old usage tracking
                return {
@@ -358,6 +418,19 @@ class ClaudeService extends BaseService {
    */
    models_() {
        return [
+            {
+                id: 'claude-haiku-4-5-20251001',
+                aliases: ['claude-haiku-4.5', 'claude-haiku-4-5'],
+                name: 'Claude Haiku 4.5',
+                context: 200000,
+                cost: {
+                    currency: 'usd-cents',
+                    tokens: 1_000_000,
+                    input: 100,
+                    output: 500,
+                },
+                max_tokens: 64000,
+            },
            {
                id: 'claude-sonnet-4-5-20250929',
                aliases: ['claude-sonnet-4.5', 'claude-sonnet-4-5'],