feat: refactor ai to have single entry point and follow defined model structure (#2114)

* feat: refactor ai to have single entry point and follow defined model structure * fix: missing openrouter registration * fix: dedupe models * feat: provide usage in stream mode 🚀
2026-01-09 22:51:29 -06:00 · 2025-12-09 15:59:21 -08:00
parent 2ab99d38da
commit e2ae313589
119 changed files with 5127 additions and 8839 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -66,4 +66,5 @@ AGENTS.md
 *.map


-coverage/
+coverage/
+*.log
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -14,6 +14,7 @@
    "@eslint/js": "^9.35.0",
    "@playwright/test": "^1.56.1",
    "@stylistic/eslint-plugin": "^5.3.1",
+    "@types/mime-types": "^3.0.1",
    "@types/uuid": "^10.0.0",
    "@typescript-eslint/eslint-plugin": "^8.46.1",
    "@typescript-eslint/parser": "^8.46.1",
@@ -53,7 +54,7 @@
    "build": "npx eslint --quiet -c eslint/mandatory.eslint.config.js src/backend/src extensions && npm run build:ts && cd src/gui && node ./build.js",
    "check-translations": "node tools/check-translations.js",
    "prepare": "husky",
-    "build:ts": "tsc",
+    "build:ts": "tsc -p tsconfig.build.json",
    "gen": "./scripts/gen.sh"
  },
  "workspaces": [
--- a/src/backend/exports.js
+++ b/src/backend/exports.js
@@ -16,35 +16,34 @@
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
-const CoreModule = require('./src/CoreModule.js');
-const { Kernel } = require('./src/Kernel.js');
-const DatabaseModule = require('./src/DatabaseModule.js');
-const LocalDiskStorageModule = require('./src/LocalDiskStorageModule.js');
-const MemoryStorageModule = require('./src/MemoryStorageModule.js');
-const SelfHostedModule = require('./src/modules/selfhosted/SelfHostedModule.js');
-const { testlaunch } = require('./src/index.js');
-const BaseService = require('./src/services/BaseService.js');
-const { Context } = require('./src/util/context.js');
-const { TestDriversModule } = require('./src/modules/test-drivers/TestDriversModule.js');
-const { PuterAIModule } = require('./src/modules/puterai/PuterAIModule.js');
-const { BroadcastModule } = require('./src/modules/broadcast/BroadcastModule.js');
-const { WebModule } = require('./src/modules/web/WebModule.js');
-const { Core2Module } = require('./src/modules/core/Core2Module.js');
-const { TemplateModule } = require('./src/modules/template/TemplateModule.js');
-const { PuterFSModule } = require('./src/modules/puterfs/PuterFSModule.js');
-const { PerfMonModule } = require('./src/modules/perfmon/PerfMonModule.js');
-const { AppsModule } = require('./src/modules/apps/AppsModule.js');
-const { DevelopmentModule } = require('./src/modules/development/DevelopmentModule.js');
-const { HostOSModule } = require('./src/modules/hostos/HostOSModule.js');
-const { InternetModule } = require('./src/modules/internet/InternetModule.js');
-const { CaptchaModule } = require('./src/modules/captcha/CaptchaModule.js');
-const { EntityStoreModule } = require('./src/modules/entitystore/EntityStoreModule.js');
-const { KVStoreModule } = require('./src/modules/kvstore/KVStoreModule.js');
-const { DomainModule } = require('./src/modules/domain/DomainModule.js');
-const { DNSModule } = require('./src/modules/dns/DNSModule.js');
-const { TestConfigModule } = require('./src/modules/test-config/TestConfigModule.js');
+import CoreModule from './src/CoreModule.js';
+import DatabaseModule from './src/DatabaseModule.js';
+import { testlaunch } from './src/index.js';
+import { Kernel } from './src/Kernel.js';
+import LocalDiskStorageModule from './src/LocalDiskStorageModule.js';
+import MemoryStorageModule from './src/MemoryStorageModule.js';
+import { PuterAIModule } from './src/modules/ai/PuterAIChatModule.js';
+import { AppsModule } from './src/modules/apps/AppsModule.js';
+import { BroadcastModule } from './src/modules/broadcast/BroadcastModule.js';
+import { CaptchaModule } from './src/modules/captcha/CaptchaModule.js';
+import { Core2Module } from './src/modules/core/Core2Module.js';
+import { DevelopmentModule } from './src/modules/development/DevelopmentModule.js';
+import { DNSModule } from './src/modules/dns/DNSModule.js';
+import { DomainModule } from './src/modules/domain/DomainModule.js';
+import { EntityStoreModule } from './src/modules/entitystore/EntityStoreModule.js';
+import { HostOSModule } from './src/modules/hostos/HostOSModule.js';
+import { InternetModule } from './src/modules/internet/InternetModule.js';
+import { KVStoreModule } from './src/modules/kvstore/KVStoreModule.js';
+import { PerfMonModule } from './src/modules/perfmon/PerfMonModule.js';
+import { PuterFSModule } from './src/modules/puterfs/PuterFSModule.js';
+import SelfHostedModule from './src/modules/selfhosted/SelfHostedModule.js';
+import { TestConfigModule } from './src/modules/test-config/TestConfigModule.js';
+import { TestDriversModule } from './src/modules/test-drivers/TestDriversModule.js';
+import { WebModule } from './src/modules/web/WebModule.js';
+import BaseService from './src/services/BaseService.js';
+import { Context } from './src/util/context.js';

-module.exports = {
+export default {
    helloworld: () => {
        console.log('Hello, World!');
        process.exit(0);
--- a/src/backend/package.json
+++ b/src/backend/package.json
@@ -82,7 +82,7 @@
    "svg-captcha": "^1.4.0",
    "svgo": "^3.0.2",
    "tiktoken": "^1.0.16",
-    "together-ai": "^0.29.0",
+    "together-ai": "^0.32.0",
    "tweetnacl": "^1.0.3",
    "ua-parser-js": "^1.0.38",
    "uglify-js": "^3.17.4",
--- a/src/backend/src/Kernel.js
+++ b/src/backend/src/Kernel.js
@@ -35,6 +35,7 @@ const readline = require('node:readline/promises');
 const { RuntimeModuleRegistry } = require('./extension/RuntimeModuleRegistry');
 const { RuntimeModule } = require('./extension/RuntimeModule');
 const deep_proto_merge = require('./config/deep_proto_merge');
+const { kv } = require('./util/kvSingleton');

 const { quot } = libs.string;

@@ -63,8 +64,6 @@ class Kernel extends AdvancedBase {
    }

    _runtime_init (boot_parameters) {
-        const kvjs = require('@heyputer/kv.js');
-        const kv = new kvjs();
        global.kv = kv;
        global.cl = console.log;

--- a/src/backend/src/api/APIError.js
+++ b/src/backend/src/api/APIError.js
@@ -26,7 +26,7 @@ const { quot } = require('@heyputer/putility').libs.string;
 * @property {string} message the error message
 * @property {object} source the source of the error
 */
-module.exports = class APIError {
+class APIError {
    static codes = {
        // General
        'unknown_error': {
@@ -560,14 +560,14 @@ module.exports = class APIError {
     *
     * @static
     * @param {number|string} status
-     * @param {object} source
+     * @param {Error} source
     * @param {string|Error|object} fields one of the following:
     * - a string to use as the error message
     * - an Error object to use as the source of the error
     * - an object with a message property to use as the error message
     * @returns
     */
-    static create (status, source, fields = {}) {
+    static create (status, source = {}, fields = {}) {
        // Just the error code
        if ( typeof status === 'string' ) {
            const code = this.codes[status];
@@ -669,3 +669,6 @@ module.exports = class APIError {
        return `APIError(${this.status}, ${this.message})`;
    }
 };
+
+module.exports = APIError;
+module.exports.APIError = APIError;
--- a/src/backend/src/api/filesystem/FSNodeParam.js
+++ b/src/backend/src/api/filesystem/FSNodeParam.js
@@ -21,9 +21,8 @@ const { is_valid_uuid4 } = require('../../helpers');
 const { Context } = require('../../util/context');
 const { PathBuilder } = require('../../util/pathutil');
 const APIError = require('../APIError');
-const _path = require('path');

-module.exports = class FSNodeParam {
+class FSNodeParam {
    constructor (srckey, options) {
        this.srckey = srckey;
        this.options = options ?? {};
@@ -77,4 +76,7 @@ module.exports = class FSNodeParam {
        const resolved_path = PathBuilder.resolve(uidOrPath, { puterfs: true });
        return await fs.node({ path: resolved_path });
    }
-};
+};
+
+module.exports = FSNodeParam;
+module.exports.FSNodeParam = FSNodeParam;
--- a/src/backend/src/filesystem/definitions/ts/fsentry.js
+++ b/src/backend/src/filesystem/definitions/ts/fsentry.js
@@ -1,8 +1,5 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.FSEntry = exports.protobufPackage = void 0;
-const wire_1 = require("@bufbuild/protobuf/wire");
-exports.protobufPackage = "";
+import { BinaryReader, BinaryWriter } from "@bufbuild/protobuf/wire";
+export const protobufPackage = "";
 function createBaseFSEntry() {
    return {
        uuid: "",
@@ -19,8 +16,8 @@ function createBaseFSEntry() {
        size: 0,
    };
 }
-exports.FSEntry = {
-    encode(message, writer = new wire_1.BinaryWriter()) {
+export const FSEntry = {
+    encode(message, writer = new BinaryWriter()) {
        if (message.uuid !== "") {
            writer.uint32(10).string(message.uuid);
        }
@@ -60,7 +57,7 @@ exports.FSEntry = {
        return writer;
    },
    decode(input, length) {
-        const reader = input instanceof wire_1.BinaryReader ? input : new wire_1.BinaryReader(input);
+        const reader = input instanceof BinaryReader ? input : new BinaryReader(input);
        const end = length === undefined ? reader.len : reader.pos + length;
        const message = createBaseFSEntry();
        while (reader.pos < end) {
@@ -215,7 +212,7 @@ exports.FSEntry = {
        return obj;
    },
    create(base) {
-        return exports.FSEntry.fromPartial(base ?? {});
+        return FSEntry.fromPartial(base ?? {});
    },
    fromPartial(object) {
        const message = createBaseFSEntry();
--- a/src/backend/src/modules/ai/PuterAIChatModule.js
+++ b/src/backend/src/modules/ai/PuterAIChatModule.js
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+// METADATA // {"ai-commented":{"service":"claude"}}
+import { AdvancedBase } from '@heyputer/putility';
+import config from '../../config.js';
+import { AIInterfaceService } from '../../services/ai/AIInterfaceService.js';
+import { AIChatService } from '../../services/ai/chat/AIChatService.js';
+import { GeminiImageGenerationService } from '../../services/ai/image/GeminiImageGenerationService.js';
+import { OpenAIImageGenerationService } from '../../services/ai/image/OpenAIImageGenerationService.js';
+import { TogetherImageGenerationService } from '../../services/ai/image/TogetherImageGenerationService.js';
+import { AWSTextractService } from '../../services/ai/ocr/AWSTextractService.js';
+import { ElevenLabsVoiceChangerService } from '../../services/ai/sts/ElevenLabsVoiceChangerService.js';
+import { OpenAISpeechToTextService } from '../../services/ai/stt/OpenAISpeechToTextService.js';
+import { AWSPollyService } from '../../services/ai/tts/AWSPollyService.js';
+import { ElevenLabsTTSService } from '../../services/ai/tts/ElevenLabsTTSService.js';
+import { OpenAITTSService } from '../../services/ai/tts/OpenAITTSService.js';
+import { OpenAIVideoGenerationService } from '../../services/ai/video/OpenAIVideoGenerationService.js';
+import { TogetherVideoGenerationService } from '../../services/ai/video/TogetherVideoGenerationService.js';
+
+/**
+* PuterAIModule class extends AdvancedBase to manage and register various AI services.
+* This module handles the initialization and registration of multiple AI-related services
+* including text processing, speech synthesis, chat completion, and image generation.
+* Services are conditionally registered based on configuration settings, allowing for
+* flexible deployment with different AI providers like AWS, OpenAI, Claude, Together AI,
+* Mistral, Groq, and XAI.
+* @extends AdvancedBase
+*/
+export class PuterAIModule extends AdvancedBase {
+    /**
+    * Module for managing AI-related services in the Puter platform
+    * Extends AdvancedBase to provide core functionality
+    * Handles registration and configuration of various AI services like OpenAI, Claude, AWS services etc.
+    */
+    async install (context) {
+        const services = context.get('services');
+
+        services.registerService('__ai-interfaces', AIInterfaceService);
+
+        // completion ai service
+        services.registerService('ai-chat', AIChatService);
+
+        // TODO DS: centralize other service types too
+
+        // TODO: services should govern their own availability instead of the module deciding what to register
+        if ( config?.services?.['aws-textract']?.aws ) {
+
+            services.registerService('aws-textract', AWSTextractService);
+        }
+
+        if ( config?.services?.['aws-polly']?.aws ) {
+
+            services.registerService('aws-polly', AWSPollyService);
+        }
+
+        if ( config?.services?.['elevenlabs'] || config?.elevenlabs ) {
+            services.registerService('elevenlabs-tts', ElevenLabsTTSService);
+
+            services.registerService('elevenlabs-voice-changer', ElevenLabsVoiceChangerService);
+        }
+
+        if ( config?.services?.openai || config?.openai ) {
+
+            services.registerService('openai-image-generation', OpenAIImageGenerationService);
+
+            services.registerService('openai-video-generation', OpenAIVideoGenerationService);
+
+            services.registerService('openai-tts', OpenAITTSService);
+
+            services.registerService('openai-speech2txt', OpenAISpeechToTextService);
+        }
+
+        if ( config?.services?.['together-ai'] ) {
+
+            services.registerService('together-image-generation', TogetherImageGenerationService);
+
+            services.registerService('together-video-generation', TogetherVideoGenerationService);
+        }
+
+        if ( config?.services?.['gemini'] ) {
+
+            services.registerService('gemini-image-generation', GeminiImageGenerationService);
+        }
+    }
+}
--- a/src/backend/src/modules/puterai/AIChatService.js
+++ b/src/backend/src/modules/puterai/AIChatService.js
@@ -1,792 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const { PassThrough } = require('stream');
-const APIError = require('../../api/APIError');
-const config = require('../../config');
-const BaseService = require('../../services/BaseService');
-const { DB_WRITE } = require('../../services/database/consts');
-const { TypedValue } = require('../../services/drivers/meta/Runtime');
-const { Context } = require('../../util/context');
-const { AsModeration } = require('./lib/AsModeration');
-const FunctionCalling = require('./lib/FunctionCalling');
-const Messages = require('./lib/Messages');
-const Streaming = require('./lib/Streaming');
-
-// Maximum number of fallback attempts when a model fails, including the first attempt
-const MAX_FALLBACKS = 3 + 1; // includes first attempt
-
-/**
-* AIChatService class extends BaseService to provide AI chat completion functionality.
-* Manages multiple AI providers, models, and fallback mechanisms for chat interactions.
-* Handles model registration, usage tracking, cost calculation, content moderation,
-* and implements the puter-chat-completion driver interface. Supports streaming responses
-* and maintains detailed model information including pricing and capabilities.
-*/
-class AIChatService extends BaseService {
-    static MODULES = {
-        kv: globalThis.kv,
-        uuidv4: require('uuid').v4,
-        cuid2: require('@paralleldrive/cuid2').createId,
-    };
-
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
-    get meteringService () {
-        return this.services.get('meteringService').meteringService;
-    }
-    /**
-    * Initializes the service by setting up core properties.
-    * Creates empty arrays for providers and model lists,
-    * and initializes an empty object for the model map.
-    * Called during service instantiation.
-    * @private
-    */
-    _construct () {
-        this.providers = [];
-        this.simple_model_list = [];
-        this.detail_model_list = [];
-        this.detail_model_map = {};
-    }
-
-    get_model_details (model_name, context) {
-        let model_details = this.detail_model_map[model_name];
-        if ( Array.isArray(model_details) && context ) {
-            for ( const model of model_details ) {
-                if ( model.provider === context.service_used ) {
-                    model_details = model;
-                    break;
-                }
-            }
-        }
-        if ( Array.isArray(model_details) ) {
-            model_details = model_details[0];
-        }
-        return model_details;
-    }
-
-    /**
-    * Initializes the service by setting up empty arrays and maps for providers and models.
-    * This method is called during service construction to establish the initial state.
-    * Creates empty arrays for providers, simple model list, and detailed model list,
-    * as well as an empty object for the detailed model map.
-    * @private
-    */
-    _init () {
-        this.kvkey = this.modules.uuidv4();
-
-        this.db = this.services.get('database').get(DB_WRITE, 'ai-usage');
-
-        const svc_apiErrpr = this.services.get('api-error');
-        svc_apiErrpr.register({
-            max_tokens_exceeded: {
-                status: 400,
-                message: ({ input_tokens, max_tokens }) =>
-                    'Input exceeds maximum token count. ' +
-                    `Input has ${input_tokens} tokens, ` +
-                    `but the maximum is ${max_tokens}.`,
-            },
-        });
-    }
-
-    /**
-    * Handles consolidation during service boot by registering service aliases
-    * and populating model lists/maps from providers.
-    *
-    * Registers each provider as an 'ai-chat' service alias and fetches their
-    * available models and pricing information. Populates:
-    * - simple_model_list: Basic list of supported models
-    * - detail_model_list: Detailed model info including costs
-    * - detail_model_map: Maps model IDs/aliases to their details
-    *
-    * @returns {Promise<void>}
-    */
-    async ['__on_boot.consolidation'] () {
-        {
-            const svc_driver = this.services.get('driver');
-            for ( const provider of this.providers ) {
-                svc_driver.register_service_alias('ai-chat',
-                                provider.service_name,
-                                { iface: 'puter-chat-completion' });
-            }
-        }
-
-        for ( const provider of this.providers ) {
-            const delegate = this.services.get(provider.service_name)
-                .as('puter-chat-completion');
-
-            // Populate simple model list
-            {
-                /**
-                * Populates the simple model list by fetching available models from the delegate service.
-                * Wraps the delegate.list() call in a try-catch block to handle potential errors gracefully.
-                * If the call fails, logs the error and returns an empty array to avoid breaking the service.
-                * The fetched models are added to this.simple_model_list.
-                *
-                * @private
-                * @returns {Promise<void>}
-                */
-                const models = await (async () => {
-                    try {
-                        return await delegate.list() ?? [];
-                    } catch (e) {
-                        this.log.error(e);
-                        return [];
-                    }
-                })();
-                this.simple_model_list.push(...models);
-            }
-
-            // Populate detail model list and map
-            {
-                /**
-                * Populates the detail model list and map with model information from the provider.
-                * Fetches detailed model data including pricing and capabilities.
-                * Handles model aliases and potential conflicts by storing multiple models in arrays.
-                * Annotates models with their provider service name.
-                * Catches and logs any errors during model fetching.
-                * @private
-                */
-                const models = await (async () => {
-                    try {
-                        return await delegate.models() ?? [];
-                    } catch (e) {
-                        this.log.error(e);
-                        return [];
-                    }
-                })();
-                const annotated_models = [];
-                for ( const model of models ) {
-                    annotated_models.push({
-                        ...model,
-                        provider: provider.service_name,
-                    });
-                }
-                this.detail_model_list.push(...annotated_models);
-                /**
-                * Helper function to set or push a model into the detail_model_map.
-                * If there's no existing entry for the key, sets it directly.
-                * If there's a conflict, converts the entry to an array and pushes the new model.
-                * @param {string} key - The model ID or alias
-                * @param {Object} model - The model details to add
-                */
-                const set_or_push = (key, model) => {
-                    // Typical case: no conflict
-                    if ( ! this.detail_model_map[key] ) {
-                        this.detail_model_map[key] = model;
-                        return;
-                    }
-
-                    // Conflict: model name will map to an array
-                    let array = this.detail_model_map[key];
-                    if ( ! Array.isArray(array) ) {
-                        array = [array];
-                        this.detail_model_map[key] = array;
-                    }
-
-                    array.push(model);
-                };
-                for ( const model of annotated_models ) {
-                    set_or_push(model.id, model);
-
-                    if ( ! model.aliases ) continue;
-
-                    for ( const alias of model.aliases ) {
-                        set_or_push(alias, model);
-                    }
-                }
-            }
-        }
-    }
-
-    register_provider (spec) {
-        this.providers.push(spec);
-    }
-
-    static IMPLEMENTS = {
-        ['driver-capabilities']: {
-            supports_test_mode (iface, method_name) {
-                return iface === 'puter-chat-completion' &&
-                    method_name === 'complete';
-            },
-        },
-        /**
-        * Implements the 'puter-chat-completion' interface methods for AI chat functionality.
-        * Handles model selection, fallbacks, usage tracking, and moderation.
-        * Contains methods for listing available models, completing chat prompts,
-        * and managing provider interactions.
-        *
-        * @property {Object} models - Available AI models with details like costs
-        * @property {Object} list - Simplified list of available models
-        * @property {Object} complete - Main method for chat completion requests
-        * @param {Object} parameters - Chat completion parameters including model and messages
-        * @returns {Promise<Object>} Chat completion response with usage stats
-        * @throws {Error} If service is called directly or no fallback models available
-        */
-        ['puter-chat-completion']: {
-            /**
-            * Returns list of available AI models with detailed information
-            *
-            * Delegates to the intended service's models() method if a delegate exists,
-            * otherwise returns the internal detail_model_list containing all available models
-            * across providers with their capabilities and pricing information.
-            *
-            * For an example of the expected model object structure, see the `async models_`
-            * private method at the bottom of any service with hard-coded model details such
-            * as ClaudeService or GroqAIService.
-            *
-            * @returns {Promise<Array<Object>>} Array of model objects with details like id, provider, cost, etc.
-            */
-            async models () {
-                const delegate = this.get_delegate();
-                if ( ! delegate ) return await this.models_();
-                return await delegate.models();
-            },
-
-            /**
-             * Reports model names (including aliased names) only with no additional
-             * detail.
-             * @returns {Promise<Array<string>} Array of model objects with basic details
-             */
-            async list () {
-                const delegate = this.get_delegate();
-                if ( ! delegate ) return await this.list_();
-                return await delegate.list();
-            },
-
-            /**
-            * Completes a chat interaction using one of the available AI models
-            *
-            * This service registers itself under an alias for each other AI
-            * chat service, which results in DriverService always calling this
-            * `complete` implementaiton first, which delegates to the intended
-            * service.
-            *
-            * The return value may be anything that DriverService knows how to
-            * coerce to the intended result. When `options.stream` is FALSE,
-            * this is typically a raw object for the JSON response. When
-            * `options.stream` is TRUE, the result is an object with this
-            * structure:
-            *
-            *    {
-            *     stream: true,
-            *     response: stream {
-            *       content_type: 'application/x-ndjson',
-            *     }
-            *   }
-            *
-            * @param {Object} options - The completion options
-            * @param {Array} options.messages - Array of chat messages to process
-            * @param {boolean} options.stream - Whether to stream the response
-            * @param {string} options.model   - The name of a model to use
-            * @returns {{stream: boolean, [k:string]: unknown}} Returns either an object with stream:true property or a completion object
-            */
-            async complete (parameters) {
-                const client_driver_call = Context.get('client_driver_call');
-                let { test_mode, intended_service, response_metadata } = client_driver_call;
-
-                const completionId = this.modules.cuid2();
-                this.log.noticeme('AIChatService.complete', { intended_service, test_mode });
-                const svc_event = this.services.get('event');
-                const event = {
-                    actor: Context.get('actor'),
-                    completionId,
-                    allow: true,
-                    intended_service,
-                    parameters,
-                };
-                await svc_event.emit('ai.prompt.validate', event);
-                if ( ! event.allow ) {
-                    test_mode = true;
-                    if ( event.custom ) parameters.custom = event.custom;
-                }
-
-                if ( parameters.messages ) {
-                    parameters.messages =
-                        Messages.normalize_messages(parameters.messages);
-                }
-
-                // Skip moderation for Ollama (local service) and other local services
-                const should_moderate = !test_mode &&
-                    intended_service !== 'ollama' &&
-                    !parameters.model?.startsWith('ollama:');
-
-                if ( should_moderate && !await this.moderate(parameters) ) {
-                    test_mode = true;
-                    throw APIError.create('moderation_failed');
-                }
-
-                // Only set moderated flag if we actually ran moderation
-                if ( !test_mode && should_moderate ) {
-                    Context.set('moderated', true);
-                }
-
-                if ( test_mode ) {
-                    intended_service = 'fake-chat';
-                    if ( event.abuse ) {
-                        parameters.model = 'abuse';
-                    }
-                }
-
-                if ( parameters.tools ) {
-                    FunctionCalling.normalize_tools_object(parameters.tools);
-                }
-
-                if ( intended_service === this.service_name ) {
-                    throw new Error('Calling ai-chat directly is not yet supported');
-                }
-
-                const svc_driver = this.services.get('driver');
-                let ret, error;
-                let service_used = intended_service;
-                let model_used = this.get_model_from_request(parameters, {
-                    intended_service,
-                });
-
-                // Updated: Check usage and get a boolean result instead of throwing error
-                const actor = Context.get('actor');
-                const model_details = this.get_model_details(model_used, {
-                    service_used,
-                });
-
-                if ( ! model_details ) {
-                    // TODO (xiaochen): replace with a standard link
-                    const available_models_url = `${this.global_config.origin }/puterai/chat/models`;
-
-                    throw APIError.create('field_invalid', null, {
-                        key: 'model',
-                        expected: `a valid model name from ${available_models_url}`,
-                        got: model_used,
-                    });
-                }
-
-                const model_input_cost = model_details.cost.input;
-                const model_output_cost = model_details.cost.output;
-                const model_max_tokens = model_details.max_tokens;
-                const text = Messages.extract_text(parameters.messages);
-                const approximate_input_cost = text.length / 4 * model_input_cost; // TODO DS: guesstimate tokens better,
-                const usageAllowed = await this.meteringService.hasEnoughCredits(actor, approximate_input_cost);
-
-                // Handle usage limits reached case
-                if ( ! usageAllowed ) {
-                    // The check_usage_ method has eady updated the intended_service to 'usage-limited-chat'
-                    service_used = 'usage-limited-chat';
-                    model_used = 'usage-limited';
-                    // Update intended_service to match service_used
-                    intended_service = service_used;
-                }
-
-                // available is no longer defined, so use meteringService to get available credits
-                const availableCredits = await this.meteringService.getRemainingUsage(actor);
-                const max_allowed_output_amount =
-                    availableCredits - approximate_input_cost;
-
-                const max_allowed_output_tokens =
-                    max_allowed_output_amount / model_output_cost;
-
-                if ( model_max_tokens ) {
-                    parameters.max_tokens = Math.floor(Math.min(parameters.max_tokens ?? Number.POSITIVE_INFINITY,
-                                    max_allowed_output_tokens,
-                                    model_max_tokens - (Math.ceil(text.length / 4))));
-                    if ( parameters.max_tokens < 1 ) {
-                        parameters.max_tokens = undefined;
-                    }
-                }
-                try {
-                    ret = await svc_driver.call_new_({
-                        actor: Context.get('actor'),
-                        service_name: intended_service,
-                        skip_usage: true,
-                        iface: 'puter-chat-completion',
-                        method: 'complete',
-                        args: parameters,
-                    });
-                } catch (e) {
-                    const tried = [];
-                    let model = model_used;
-
-                    // TODO: if conflict models exist, add service name
-                    tried.push(model);
-
-                    error = e;
-
-                    // Distinguishing between user errors and service errors
-                    // is very messy because of different conventions between
-                    // services. This is a best-effort attempt to catch user
-                    // errors and throw them as 400s.
-                    const is_request_error = (() => {
-                        if ( e instanceof APIError ) {
-                            return true;
-                        }
-                        if ( e.type === 'invalid_request_error' ) {
-                            return true;
-                        }
-                        let some_error = e;
-                        while ( some_error ) {
-                            if ( some_error.type === 'invalid_request_error' ) {
-                                return true;
-                            }
-                            some_error = some_error.error ?? some_error.cause;
-                        }
-                        return false;
-                    })();
-
-                    if ( is_request_error ) {
-                        console.log(e.stack);
-                        throw APIError.create('error_400_from_delegate', e, {
-                            delegate: intended_service,
-                            message: e.message,
-                        });
-                    }
-                    console.error(e);
-
-                    if ( config.disable_fallback_mechanisms ) {
-                        throw e;
-                    }
-
-                    this.log.error('error calling service', {
-                        intended_service,
-                        model,
-                        error: e,
-                    });
-                    while ( error ) {
-                        // No fallbacks for pseudo-models
-                        if ( intended_service === 'fake-chat' ) {
-                            break;
-                        }
-
-                        const fallback = this.get_fallback_model({
-                            model, tried,
-                        });
-
-                        if ( ! fallback ) {
-                            throw new Error('no fallback model available');
-                        }
-
-                        const {
-                            fallback_service_name,
-                            fallback_model_name,
-                        } = fallback;
-
-                        this.log.warn('model fallback', {
-                            intended_service,
-                            fallback_service_name,
-                            fallback_model_name,
-                        });
-
-                        // Check usage for fallback model too (with updated method)
-                        const actor = Context.get('actor');
-                        const fallbackUsageAllowed = await this.meteringService.hasEnoughCredits(actor, 1);
-
-                        // If usage not allowed for fallback, use usage-limited-chat instead
-                        if ( ! fallbackUsageAllowed ) {
-                            // The check_usage_ method has already updated intended_service
-                            service_used = 'usage-limited-chat';
-                            model_used = 'usage-limited';
-                            // Clear the error to exit the fallback loop
-                            error = null;
-
-                            // Call the usage-limited service
-                            ret = await svc_driver.call_new_({
-                                actor: Context.get('actor'),
-                                service_name: 'usage-limited-chat',
-                                skip_usage: true,
-                                iface: 'puter-chat-completion',
-                                method: 'complete',
-                                args: parameters,
-                            });
-                        } else {
-                            // Normal fallback flow continues
-                            try {
-                                ret = await svc_driver.call_new_({
-                                    actor: Context.get('actor'),
-                                    service_name: fallback_service_name,
-                                    skip_usage: true,
-                                    iface: 'puter-chat-completion',
-                                    method: 'complete',
-                                    args: {
-                                        ...parameters,
-                                        model: fallback_model_name,
-                                    },
-                                });
-                                error = null;
-                                service_used = fallback_service_name;
-                                model_used = fallback_model_name;
-                                response_metadata.fallback = {
-                                    service: fallback_service_name,
-                                    model: fallback_model_name,
-                                    tried: tried,
-                                };
-                            } catch (e) {
-                                error = e;
-                                tried.push(fallback_model_name);
-                                this.log.error('error calling fallback', {
-                                    intended_service,
-                                    model,
-                                    error: e,
-                                });
-                            }
-                        }
-                    }
-                }
-
-                ret.result.via_ai_chat_service = true;
-                response_metadata.service_used = service_used;
-
-                // Add flag if we're using the usage-limited service
-                if ( service_used === 'usage-limited-chat' ) {
-                    response_metadata.usage_limited = true;
-                }
-
-                const username = Context.get('actor').type?.user?.username;
-
-                if ( ret.result.stream ) {
-                    if ( ret.result.init_chat_stream ) {
-                        const stream = new PassThrough();
-                        const retval = new TypedValue({
-                            $: 'stream',
-                            content_type: 'application/x-ndjson',
-                            chunked: true,
-                        }, stream);
-
-                        const chatStream = new Streaming.AIChatStream({
-                            stream,
-                        });
-
-                        (async () => {
-                            try {
-                                await ret.result.init_chat_stream({ chatStream });
-                            } catch (e) {
-                                this.errors.report('error during stream response', {
-                                    source: e,
-                                });
-                                stream.write(`${JSON.stringify({
-                                    type: 'error',
-                                    message: e.message,
-                                }) }\n`);
-                                stream.end();
-                            } finally {
-                                if ( ret.result.finally_fn ) {
-                                    await ret.result.finally_fn();
-                                }
-                            }
-                        })();
-
-                        return retval;
-                    }
-
-                    return ret.result.response;
-                }
-
-                await svc_event.emit('ai.prompt.complete', {
-                    username,
-                    intended_service,
-                    parameters,
-                    result: ret.result,
-                    model_used,
-                    service_used,
-                });
-
-                if ( parameters.response?.normalize ) {
-                    ret.result.message =
-                        Messages.normalize_single_message(ret.result.message);
-                    ret.result = {
-                        message: ret.result.message,
-                        via_ai_chat_service: true,
-                        normalized: true,
-                    };
-                }
-
-                return ret.result;
-            },
-        },
-    };
-
-    /**
-    * Moderates chat messages for inappropriate content using OpenAI's moderation service
-    *
-    * @param {Object} params - The parameters object
-    * @param {Array} params.messages - Array of chat messages to moderate
-    * @returns {Promise<boolean>} Returns true if content is appropriate, false if flagged
-    *
-    * @description
-    * Extracts text content from messages and checks each against OpenAI's moderation.
-    * Handles both string content and structured message objects.
-    * Returns false immediately if any message is flagged as inappropriate.
-    * Returns true if OpenAI service is unavailable or all messages pass moderation.
-    */
-    async moderate ({ messages }) {
-        if ( process.env.TEST_MODERATION_FAILURE ) return false;
-        const fulltext = Messages.extract_text(messages);
-        let mod_last_error = null;
-        let mod_result = null;
-        try {
-            const svc_openai = this.services.get('openai-completion');
-            mod_result = await svc_openai.check_moderation(fulltext);
-            if ( mod_result.flagged ) return false;
-            return true;
-        } catch (e) {
-            console.error(e);
-            mod_last_error = e;
-        }
-        try {
-            const svc_claude = this.services.get('claude');
-            const chat = svc_claude.as('puter-chat-completion');
-            const mod = new AsModeration({
-                chat,
-                model: 'claude-3-haiku-20240307',
-            });
-            if ( ! await mod.moderate(fulltext) ) {
-                return false;
-            }
-            mod_last_error = null;
-            return true;
-        } catch (e) {
-            console.error(e);
-            mod_last_error = e;
-        }
-
-        if ( mod_last_error ) {
-            this.log.error('moderation error', {
-                fulltext,
-                mod_last_error,
-            });
-            throw new Error('no working moderation service');
-        }
-        return true;
-    }
-
-    async models_ () {
-        return this.detail_model_list;
-    }
-
-    /**
-    * Returns a list of available AI models with basic details
-    * @returns {Promise<Array>} Array of simple model objects containing basic model information
-    */
-    async list_ () {
-        return this.simple_model_list;
-    }
-
-    /**
-    * Gets the appropriate delegate service for handling chat completion requests.
-    * If the intended service is this service (ai-chat), returns undefined.
-    * Otherwise returns the intended service wrapped as a puter-chat-completion interface.
-    *
-    * @returns {Object|undefined} The delegate service or undefined if intended service is ai-chat
-    */
-    get_delegate () {
-        const client_driver_call = Context.get('client_driver_call');
-        if ( client_driver_call.intended_service === this.service_name ) {
-            return undefined;
-        }
-        console.log('getting service', client_driver_call.intended_service);
-        const service = this.services.get(client_driver_call.intended_service);
-        return service.as('puter-chat-completion');
-    }
-
-    /**
-     * Find an appropriate fallback model by sorting the list of models
-     * by the euclidean distance of the input/output prices and selecting
-     * the first one that is not in the tried list.
-     *
-     * @param {*} param0
-     * @returns
-     */
-    get_fallback_model ({ model, tried }) {
-        let target_model = this.detail_model_map[model];
-
-        if ( ! target_model ) {
-            this.log.error('could not find model', { model });
-            throw new Error('could not find model');
-        }
-        if ( Array.isArray(target_model) ) {
-            // TODO: better conflict resolution
-            this.log.noticeme('conflict exists', { model, target_model });
-            target_model = target_model[0];
-        }
-
-        // First check KV for the sorted list
-        let potentialFallbacks = this.modules.kv.get(`${this.kvkey}:fallbacks:${model}`);
-
-        if ( ! potentialFallbacks ) {
-            // Calculate the sorted list
-            const models = this.detail_model_list;
-
-            let aiProvider, modelToSearch;
-            if ( target_model.id.startsWith('openrouter:') || target_model.id.startsWith('togetherai:') ) {
-                [aiProvider, modelToSearch] = target_model.id.replace('openrouter:', '').replace('togetherai:', '').toLowerCase().split('/');
-            } else {
-                [aiProvider, modelToSearch] = target_model.provider.toLowerCase().replace('gemini', 'google').replace('openai-completion', 'openai'), target_model.id.toLowerCase();
-            }
-
-            const potentialMatches = models.filter(model => {
-                const possibleModelNames = [`openrouter:${aiProvider}/${modelToSearch}`,
-                    `togetherai:${aiProvider}/${modelToSearch}`, ...(target_model.aliases?.map((alias) => [`openrouter:${aiProvider}/${alias}`,
-                        `togetherai:${aiProvider}/${alias}`])?.flat() ?? [])];
-
-                return !!possibleModelNames.find(possibleName => model.id.toLowerCase() === possibleName);
-            }).slice(0, MAX_FALLBACKS);
-
-            this.modules.kv.set(`${this.kvkey}:fallbacks:${model}`, potentialMatches);
-            potentialFallbacks = potentialMatches;
-        }
-
-        for ( const model of potentialFallbacks ) {
-            if ( tried.includes(model.id) ) continue;
-            if ( model.provider === 'fake-chat' ) continue;
-
-            return {
-                fallback_service_name: model.provider,
-                fallback_model_name: model.id,
-            };
-        }
-
-        // No fallbacks available
-        this.log.error('no fallbacks', {
-            potentialFallbacks,
-            tried,
-        });
-    }
-
-    get_model_from_request (parameters, modified_context = {}) {
-        const client_driver_call = Context.get('client_driver_call');
-        let { intended_service } = client_driver_call;
-
-        if ( modified_context.intended_service ) {
-            intended_service = modified_context.intended_service;
-        }
-
-        let model = parameters.model;
-        if ( ! model ) {
-            const service = this.services.get(intended_service);
-            if ( ! service.get_default_model ) {
-                throw new Error('could not infer model from service');
-            }
-            model = service.get_default_model();
-            if ( ! model ) {
-                throw new Error('could not infer model from service');
-            }
-        }
-
-        return model;
-    }
-}
-
-module.exports = { AIChatService };
--- a/src/backend/src/modules/puterai/AITestModeService.js
+++ b/src/backend/src/modules/puterai/AITestModeService.js
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const BaseService = require('../../services/BaseService');
-
-/**
-* Service class that handles AI test mode functionality.
-* Extends BaseService to register test services for AI chat completions.
-* Used for testing and development of AI-related features by providing
-* a mock implementation of the chat completion service.
-*/
-class AITestModeService extends BaseService {
-    /**
-    * Service for managing AI test mode functionality
-    * @extends BaseService
-    */
-    async _init () {
-        const svc_driver = this.services.get('driver');
-        svc_driver.register_test_service('puter-chat-completion', 'ai-chat');
-    }
-}
-
-module.exports = {
-    AITestModeService,
-};
--- a/src/backend/src/modules/puterai/ClaudeService.js
+++ b/src/backend/src/modules/puterai/ClaudeService.js
@@ -1,495 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const { default: Anthropic, toFile } = require('@anthropic-ai/sdk');
-const BaseService = require('../../services/BaseService');
-const FunctionCalling = require('./lib/FunctionCalling');
-const Messages = require('./lib/Messages');
-const FSNodeParam = require('../../api/filesystem/FSNodeParam');
-const { LLRead } = require('../../filesystem/ll_operations/ll_read');
-const { Context } = require('../../util/context');
-const mime = require('mime-types');
-
-/**
-* ClaudeService class extends BaseService to provide integration with Anthropic's Claude AI models.
-* Implements the puter-chat-completion interface for handling AI chat interactions.
-* Manages message streaming, token limits, model selection, and API communication with Claude.
-* Supports system prompts, message adaptation, and usage tracking.
-* @extends BaseService
-*/
-class ClaudeService extends BaseService {
-
-    // Traits definitions
-    static IMPLEMENTS = {
-        ['puter-chat-completion']: {
-            async models () {
-                return this.models();
-            },
-            async list () {
-                return this.list();
-            },
-            async complete (...args) {
-                return this.complete(...args);
-            },
-        },
-    };
-
-    /**
-     * @type {import('@anthropic-ai/sdk').Anthropic}
-     */
-    anthropic;
-
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
-    #meteringService;
-
-    async _init () {
-        this.anthropic = new Anthropic({
-            apiKey: this.config.apiKey,
-            // 10 minutes is the default; we need to override the timeout to
-            // disable an "aggressive" preemptive error that's thrown
-            // erroneously by the SDK.
-            // (https://github.com/anthropics/anthropic-sdk-typescript/issues/822)
-            timeout: 10 * 60 * 1001,
-        });
-
-        const svc_aiChat = this.services.get('ai-chat');
-        svc_aiChat.register_provider({
-            service_name: this.service_name,
-            alias: true,
-        });
-        this.#meteringService = this.services.get('meteringService').meteringService; // TODO DS: move to proper extensions
-    }
-
-    /**
-    * Returns the default model identifier for Claude API interactions
-    * @returns {string} The default model ID 'claude-3-5-sonnet-latest'
-    */
-    get_default_model () {
-        return 'claude-3-5-sonnet-latest';
-    }
-
-    async list () {
-        const models = this.models();
-        const model_names = [];
-        for ( const model of models ) {
-            model_names.push(model.id);
-            if ( model.aliases ) {
-                model_names.push(...model.aliases);
-            }
-        }
-        return model_names;
-    }
-
-    /**
-     *
-     * @param {object} arg
-     * @param {Array} arg.messages
-     * @param {boolean} [arg.stream]
-     * @param {string} arg.model
-     * @param {Array} [arg.tools]
-     * @param {number} [arg.max_tokens]
-     * @param {number} [arg.temperature]
-     * @returns
-     */
-    async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
-        tools = FunctionCalling.make_claude_tools(tools);
-
-        let system_prompts;
-        // unsure why system_prompts is an array but it always seems to only have exactly one element,
-        // and the real array of system_prompts seems to be the [0].content -- NS
-        [system_prompts, messages] = Messages.extract_and_remove_system_messages(messages);
-
-        // Apply the cache control tag to all content blocks
-        if (
-            system_prompts.length > 0 &&
-            system_prompts[0].cache_control &&
-            system_prompts[0]?.content
-        ) {
-            system_prompts[0].content = system_prompts[0].content.map(prompt => {
-                prompt.cache_control = system_prompts[0].cache_control;
-                return prompt;
-            });
-        }
-
-        messages = messages.map(message => {
-            if ( message.cache_control ) {
-                message.content[0].cache_control = message.cache_control;
-            }
-            delete message.cache_control;
-            return message;
-        });
-
-        const sdk_params = {
-            model: model ?? this.get_default_model(),
-            max_tokens: Math.floor(max_tokens) ||
-                ((
-                    model === 'claude-3-5-sonnet-20241022'
-                    || model === 'claude-3-5-sonnet-20240620'
-                ) ? 8192 : this.models().filter(e => (e.name === model || e.aliases?.includes(model)))[0]?.max_tokens || 4096), //required
-            temperature: temperature || 0, // required
-            ...( (system_prompts && system_prompts[0]?.content) ? {
-                system: system_prompts[0]?.content,
-            } : {}),
-            tool_choice: {
-                type: 'auto',
-                disable_parallel_tool_use: true,
-            },
-            messages,
-            ...(tools ? { tools } : {}),
-        };
-        console.log(sdk_params.max_tokens);
-
-        let beta_mode = false;
-
-        // Perform file uploads
-        const file_delete_tasks = [];
-        const actor = Context.get('actor');
-        const { user } = actor.type;
-
-        const file_input_tasks = [];
-        for ( const message of messages ) {
-            // We can assume `message.content` is not undefined because
-            // Messages.normalize_single_message ensures this.
-            for ( const contentPart of message.content ) {
-                if ( ! contentPart.puter_path ) continue;
-                file_input_tasks.push({
-                    node: await (new FSNodeParam(contentPart.puter_path)).consolidate({
-                        req: { user },
-                        getParam: () => contentPart.puter_path,
-                    }),
-                    contentPart,
-                });
-            }
-        }
-
-        const promises = [];
-        for ( const task of file_input_tasks ) {
-            promises.push((async () => {
-                const ll_read = new LLRead();
-                const stream = await ll_read.run({
-                    actor: Context.get('actor'),
-                    fsNode: task.node,
-                });
-
-                const mimeType = mime.contentType(await task.node.get('name'));
-
-                beta_mode = true;
-                const fileUpload = await this.anthropic.beta.files.upload({
-                    file: await toFile(stream, undefined, { type: mimeType }),
-                }, {
-                    betas: ['files-api-2025-04-14'],
-                });
-
-                file_delete_tasks.push({ file_id: fileUpload.id });
-                // We have to copy a table from the documentation here:
-                // https://docs.anthropic.com/en/docs/build-with-claude/files
-                const contentBlockTypeForFileBasedOnMime = (() => {
-                    if ( mimeType.startsWith('image/') ) {
-                        return 'image';
-                    }
-                    if ( mimeType.startsWith('text/') ) {
-                        return 'document';
-                    }
-                    if ( mimeType === 'application/pdf' || mimeType === 'application/x-pdf' ) {
-                        return 'document';
-                    }
-                    return 'container_upload';
-                })();
-
-                delete task.contentPart.puter_path,
-                task.contentPart.type = contentBlockTypeForFileBasedOnMime;
-                task.contentPart.source = {
-                    type: 'file',
-                    file_id: fileUpload.id,
-                };
-            })());
-        }
-        await Promise.all(promises);
-
-        const cleanup_files = async () => {
-            const promises = [];
-            for ( const task of file_delete_tasks ) {
-                promises.push((async () => {
-                    try {
-                        await this.anthropic.beta.files.delete(task.file_id,
-                                        { betas: ['files-api-2025-04-14'] });
-                    } catch (e) {
-                        this.errors.report('claude:file-delete-task', {
-                            source: e,
-                            trace: true,
-                            alarm: true,
-                            extra: { file_id: task.file_id },
-                        });
-                    }
-                })());
-            }
-            await Promise.all(promises);
-        };
-
-        if ( beta_mode ) {
-            Object.assign(sdk_params, { betas: ['files-api-2025-04-14'] });
-        }
-        const anthropic = beta_mode ? this.anthropic.beta : this.anthropic;
-
-        if ( stream ) {
-            const init_chat_stream = async ({ chatStream }) => {
-                const completion = await anthropic.messages.stream(sdk_params);
-                const usageSum = {};
-
-                let message, contentBlock;
-                for await ( const event of completion ) {
-
-                    const usageObject = (event?.usage ?? event?.message?.usage ?? {});
-                    const meteredData = this.usageFormatterUtil(usageObject);
-                    Object.keys(meteredData).forEach((key) => {
-                        if ( ! usageSum[key] ) usageSum[key] = 0;
-                        usageSum[key] += meteredData[key];
-                    });
-
-                    if ( event.type === 'message_start' ) {
-                        message = chatStream.message();
-                        continue;
-                    }
-                    if ( event.type === 'message_stop' ) {
-                        message.end();
-                        message = null;
-                        continue;
-                    }
-
-                    if ( event.type === 'content_block_start' ) {
-                        if ( event.content_block.type === 'tool_use' ) {
-                            contentBlock = message.contentBlock({
-                                type: event.content_block.type,
-                                id: event.content_block.id,
-                                name: event.content_block.name,
-                            });
-                            continue;
-                        }
-                        contentBlock = message.contentBlock({
-                            type: event.content_block.type,
-                        });
-                        continue;
-                    }
-
-                    if ( event.type === 'content_block_stop' ) {
-                        contentBlock.end();
-                        contentBlock = null;
-                        continue;
-                    }
-
-                    if ( event.type === 'content_block_delta' ) {
-                        if ( event.delta.type === 'input_json_delta' ) {
-                            contentBlock.addPartialJSON(event.delta.partial_json);
-                            continue;
-                        }
-                        if ( event.delta.type === 'text_delta' ) {
-                            contentBlock.addText(event.delta.text);
-                            continue;
-                        }
-                    }
-                }
-                chatStream.end();
-
-                this.#meteringService.utilRecordUsageObject(usageSum, actor, `claude:${this.models().find(m => [m.id, ...(m.aliases || [])].includes(model || this.get_default_model())).id}`);
-            };
-
-            return {
-                init_chat_stream,
-                stream: true,
-                finally_fn: cleanup_files,
-            };
-        }
-
-        const msg = await anthropic.messages.create(sdk_params);
-        await cleanup_files();
-
-        const usage = this.usageFormatterUtil(msg.usage);
-        this.#meteringService.utilRecordUsageObject(usage, actor, `claude:${this.models().find(m => [m.id, ...(m.aliases || [])].includes(model || this.get_default_model())).id}`);
-
-        // TODO DS: cleanup old usage tracking
-        return {
-            message: msg,
-            usage: msg.usage,
-            finish_reason: 'stop',
-        };
-    }
-
-    /** @type {(usage: import("@anthropic-ai/sdk/resources/messages.js").Usage | import("@anthropic-ai/sdk/resources/beta/messages/messages.js").BetaUsage) => {}}) */
-    usageFormatterUtil (usage) {
-        return {
-            input_tokens: usage?.input_tokens || 0,
-            ephemeral_5m_input_tokens: usage?.cache_creation?.ephemeral_5m_input_tokens || usage.cache_creation_input_tokens || 0, // this is because they're api is a bit inconsistent
-            ephemeral_1h_input_tokens: usage?.cache_creation?.ephemeral_1h_input_tokens || 0,
-            cache_read_input_tokens: usage?.cache_read_input_tokens || 0,
-            output_tokens: usage?.output_tokens || 0,
-        };
-    };
-
-    /**
-    * Retrieves available Claude AI models and their specifications
-    * @returns Array of model objects containing:
-    *   - id: Model identifier
-    *   - name: Display name
-    *   - aliases: Alternative names for the model
-    *   - context: Maximum context window size
-    *   - cost: Pricing details (currency, token counts, input/output costs)
-    *   - qualitative_speed: Relative speed rating
-    *   - max_output: Maximum output tokens
-    *   - training_cutoff: Training data cutoff date
-    */
-    models () {
-        return [
-            {
-                id: 'claude-opus-4-5-20251101',
-                aliases: ['claude-opus-4-5-latest', 'claude-opus-4-5', 'claude-opus-4.5'],
-                name: 'Claude Opus 4.5',
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 500,
-                    output: 2500,
-                },
-                context: 200000,
-                max_tokens: 64000,
-            },
-            {
-                id: 'claude-haiku-4-5-20251001',
-                aliases: ['claude-haiku-4.5', 'claude-haiku-4-5'],
-                name: 'Claude Haiku 4.5',
-                context: 200000,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 100,
-                    output: 500,
-                },
-                max_tokens: 64000,
-            },
-            {
-                id: 'claude-sonnet-4-5-20250929',
-                aliases: ['claude-sonnet-4.5', 'claude-sonnet-4-5'],
-                name: 'Claude Sonnet 4.5',
-                context: 200000,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 300,
-                    output: 1500,
-                },
-                max_tokens: 64000,
-            },
-            {
-                id: 'claude-opus-4-1-20250805',
-                aliases: ['claude-opus-4-1'],
-                name: 'Claude Opus 4.1',
-                context: 200000,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 1500,
-                    output: 7500,
-                },
-                max_tokens: 32000,
-            },
-            {
-                id: 'claude-opus-4-20250514',
-                aliases: ['claude-opus-4', 'claude-opus-4-latest'],
-                name: 'Claude Opus 4',
-                context: 200000,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 1500,
-                    output: 7500,
-                },
-                max_tokens: 32000,
-            },
-            {
-                id: 'claude-sonnet-4-20250514',
-                aliases: ['claude-sonnet-4', 'claude-sonnet-4-latest'],
-                name: 'Claude Sonnet 4',
-                context: 200000,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 300,
-                    output: 1500,
-                },
-                max_tokens: 64000,
-            },
-            {
-                id: 'claude-3-7-sonnet-20250219',
-                aliases: ['claude-3-7-sonnet-latest'],
-                succeeded_by: 'claude-sonnet-4-20250514',
-                context: 200000,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 300,
-                    output: 1500,
-                },
-                max_tokens: 8192,
-            },
-            {
-                id: 'claude-3-5-sonnet-20241022',
-                name: 'Claude 3.5 Sonnet',
-                aliases: ['claude-3-5-sonnet-latest'],
-                context: 200000,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 300,
-                    output: 1500,
-                },
-                qualitative_speed: 'fast',
-                training_cutoff: '2024-04',
-                max_tokens: 8192,
-            },
-            {
-                id: 'claude-3-5-sonnet-20240620',
-                succeeded_by: 'claude-3-5-sonnet-20241022',
-                context: 200000, // might be wrong
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 300,
-                    output: 1500,
-                },
-                max_tokens: 8192,
-            },
-            {
-                id: 'claude-3-haiku-20240307',
-                context: 200000,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 25,
-                    output: 125,
-                },
-                qualitative_speed: 'fastest',
-                max_tokens: 4096,
-            },
-        ];
-    }
-}
-
-module.exports = {
-    ClaudeService,
-};
--- a/src/backend/src/modules/puterai/DeepSeekService.js
+++ b/src/backend/src/modules/puterai/DeepSeekService.js
@@ -1,224 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const BaseService = require('../../services/BaseService');
-const { Context } = require('../../util/context');
-const OpenAIUtil = require('./lib/OpenAIUtil');
-const dedent = require('dedent');
-
-/**
-* DeepSeekService class - Provides integration with DeepSeek's API for chat completions
-* Extends BaseService to implement the puter-chat-completion interface.
-* Handles model management, message adaptation, streaming responses,
-* and usage tracking for DeepSeek's language models like DeepSeek Chat and Reasoner.
-* @extends BaseService
-*/
-class DeepSeekService extends BaseService {
-    static MODULES = {
-        openai: require('openai'),
-    };
-
-    /**
-    * @type {import('../../services/MeteringService/MeteringService').MeteringService}
-    */
-    meteringService;
-    /**
-    * Gets the system prompt used for AI interactions
-    * @returns {string} The base system prompt that identifies the AI as running on Puter
-    */
-    adapt_model (model) {
-        return model;
-    }
-
-    /**
-    * Initializes the XAI service by setting up the OpenAI client and registering with the AI chat provider
-    * @private
-    * @returns {Promise<void>} Resolves when initialization is complete
-    */
-    async _init () {
-        this.openai = new this.modules.openai.OpenAI({
-            apiKey: this.global_config.services.deepseek.apiKey,
-            baseURL: 'https://api.deepseek.com',
-        });
-
-        const svc_aiChat = this.services.get('ai-chat');
-        svc_aiChat.register_provider({
-            service_name: this.service_name,
-            alias: true,
-        });
-        this.meteringService = this.services.get('meteringService').meteringService;
-    }
-
-    /**
-    * Returns the default model identifier for the DeepSeek service
-    * @returns {string} The default model ID 'deepseek-chat'
-    */
-    get_default_model () {
-        return 'deepseek-chat';
-    }
-
-    static IMPLEMENTS = {
-        ['puter-chat-completion']: {
-            /**
-             * Returns a list of available models and their details.
-             * See AIChatService for more information.
-             *
-             * @returns Promise<Array<Object>> Array of model details
-             */
-            async models () {
-                return await this.models_();
-            },
-            /**
-            * Returns a list of available model names including their aliases
-            * @returns {Promise<string[]>} Array of model identifiers and their aliases
-            * @description Retrieves all available model IDs and their aliases,
-            * flattening them into a single array of strings that can be used for model selection
-            */
-            async list () {
-                const models = await this.models_();
-                const model_names = [];
-                for ( const model of models ) {
-                    model_names.push(model.id);
-                    if ( model.aliases ) {
-                        model_names.push(...model.aliases);
-                    }
-                }
-                return model_names;
-            },
-
-            /**
-             * AI Chat completion method.
-             * See AIChatService for more details.
-             */
-            async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
-                model = this.adapt_model(model);
-
-                messages = await OpenAIUtil.process_input_messages(messages);
-                for ( const message of messages ) {
-                    // DeepSeek doesn't appreciate arrays here
-                    if ( message.tool_calls && Array.isArray(message.content) ) {
-                        message.content = '';
-                    }
-                }
-
-                // Function calling is just broken on DeepSeek - it never awknowledges
-                // the tool results and instead keeps calling the function over and over.
-                // (see https://github.com/deepseek-ai/DeepSeek-V3/issues/15)
-                // To fix this, we inject a message that tells DeepSeek what happened.
-                const TOOL_TEXT = message => dedent(`
-                    Hi DeepSeek V3, your tool calling is broken and you are not able to
-                    obtain tool results in the expected way. That's okay, we can work
-                    around this.
-
-                    Please do not repeat this tool call.
-
-                    We have provided the tool call results below:
-
-                    Tool call ${message.tool_call_id} returned: ${message.content}.
-                `);
-                for ( let i = messages.length - 1; i >= 0 ; i-- ) {
-                    const message = messages[i];
-                    if ( message.role === 'tool' ) {
-                        messages.splice(i + 1, 0, {
-                            role: 'system',
-                            content: [
-                                {
-                                    type: 'text',
-                                    text: TOOL_TEXT(message),
-                                },
-                            ],
-                        });
-                    }
-                }
-
-                const completion = await this.openai.chat.completions.create({
-                    messages,
-                    model: model ?? this.get_default_model(),
-                    ...(tools ? { tools } : {}),
-                    max_tokens: max_tokens || 1000,
-                    temperature, // the default temperature is 1.0. suggested 0 for math/coding and 1.5 for creative poetry
-                    stream,
-                    ...(stream ? {
-                        stream_options: { include_usage: true },
-                    } : {}),
-                });
-
-                // Metering integration now handled via usage_calculator in OpenAIUtil.handle_completion_output
-                const actor = Context.get('actor');
-                const modelDetails = (await this.models_()).find(m => m.id === (model ?? this.get_default_model()));
-
-                return OpenAIUtil.handle_completion_output({
-                    usage_calculator: ({ usage }) => {
-                        const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
-                        this.meteringService.utilRecordUsageObject(trackedUsage, actor, `deepseek:${modelDetails.id}`);
-                        const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
-                            model_details: modelDetails,
-                        });
-                        return legacyCostCalculator({ usage });
-                    },
-                    stream,
-                    completion,
-                });
-            },
-        },
-    };
-
-    /**
-    * Retrieves available AI models and their specifications
-    * @returns {Promise<Array>} Array of model objects containing:
-    *   - id: Model identifier string
-    *   - name: Human readable model name
-    *   - context: Maximum context window size
-    *   - cost: Pricing information object with currency and rates
-    * @private
-    */
-    async models_ () {
-        return [
-            {
-                id: 'deepseek-chat',
-                name: 'DeepSeek Chat',
-                context: 128000,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 56,
-                    output: 168,
-                },
-                max_tokens: 8000,
-            },
-            {
-                id: 'deepseek-reasoner',
-                name: 'DeepSeek Reasoner',
-                context: 128000,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 56,
-                    output: 168,
-                },
-                max_tokens: 64000,
-            },
-        ];
-    }
-}
-
-module.exports = {
-    DeepSeekService,
-};
--- a/src/backend/src/modules/puterai/FakeChatService.js
+++ b/src/backend/src/modules/puterai/FakeChatService.js
@@ -1,217 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const { default: dedent } = require('dedent');
-const BaseService = require('../../services/BaseService');
-/**
-* FakeChatService - A mock implementation of a chat service that extends BaseService.
-* Provides fake chat completion responses using Lorem Ipsum text generation.
-* Used for testing and development purposes when a real chat service is not needed.
-* Implements the 'puter-chat-completion' interface with list() and complete() methods.
-*/
-class FakeChatService extends BaseService {
-    /**
-     * Initializes the service and registers it as a provider with AIChatService
-     * @private
-     * @returns {Promise<void>}
-     */
-    async _init () {
-        const svc_aiChat = this.services.get('ai-chat');
-        svc_aiChat.register_provider({
-            service_name: this.service_name,
-            alias: true,
-        });
-    }
-
-    get_default_model () {
-        return 'fake';
-    }
-    static IMPLEMENTS = {
-        ['puter-chat-completion']: {
-            /**
-            * Returns a list of available models with their details
-            * @returns {Promise<Object[]>} Array of model details including costs
-            * @description Returns detailed information about available models including
-            * their costs for input and output tokens
-            */
-            async models () {
-                return [
-                    {
-                        id: 'fake',
-                        aliases: [],
-                        cost: {
-                            input: 0,
-                            output: 0,
-                        },
-                    },
-                    {
-                        id: 'costly',
-                        aliases: [],
-                        cost: {
-                            input: 1000, // 1000 microcents per million tokens (0.001 cents per 1000 tokens)
-                            output: 2000, // 2000 microcents per million tokens (0.002 cents per 1000 tokens)
-                        },
-                        max_tokens: 8192,
-                    },
-                    {
-                        id: 'abuse',
-                        aliases: [],
-                        cost: {
-                            input: 0,
-                            output: 0,
-                        },
-                    },
-                ];
-            },
-
-            /**
-            * Returns a list of available model names including their aliases
-            * @returns {Promise<string[]>} Array of model identifiers and their aliases
-            * @description Retrieves all available model IDs and their aliases,
-            * flattening them into a single array of strings that can be used for model selection
-            */
-            async list () {
-                return ['fake', 'costly', 'abuse'];
-            },
-
-            /**
-            * Simulates a chat completion request by generating random Lorem Ipsum text
-            * @param {Object} params - The completion parameters
-            * @param {Array} params.messages - Array of chat messages
-            * @param {boolean} params.stream - Whether to stream the response (unused in fake implementation)
-            * @param {string} params.model - The model to use ('fake', 'costly', or 'abuse')
-            * @returns {Object} A simulated chat completion response with Lorem Ipsum content
-            */
-            async complete ({ messages, stream, model, max_tokens, custom }) {
-                const { LoremIpsum } = require('lorem-ipsum');
-                const li = new LoremIpsum({
-                    sentencesPerParagraph: {
-                        max: 8,
-                        min: 4,
-                    },
-                    wordsPerSentence: {
-                        max: 20,
-                        min: 12,
-                    },
-                });
-
-                // Determine token counts based on messages and model
-                const usedModel = model || this.get_default_model();
-
-                // For the costly model, simulate actual token counting
-                const resp = this.get_response({ li, usedModel, custom, max_tokens, messages });
-
-                if ( stream ) {
-                    return {
-                        stream: true,
-                        init_chat_stream: async ({ chatStream }) => {
-                            await new Promise(rslv => setTimeout(rslv, 500));
-                            chatStream.stream.write(`${JSON.stringify({
-                                type: 'text',
-                                text: resp.message.content[0].text,
-                            }) }\n`);
-                            chatStream.end();
-                        },
-                    };
-                }
-
-                return resp;
-            },
-        },
-    };
-
-    get_response ({ li, usedModel, messages, custom, max_tokens }) {
-        let inputTokens = 0;
-        let outputTokens = 0;
-
-        if ( usedModel === 'costly' ) {
-            // Simple token estimation: roughly 4 chars per token for input
-            if ( messages && messages.length > 0 ) {
-                for ( const message of messages ) {
-                    if ( typeof message.content === 'string' ) {
-                        inputTokens += Math.ceil(message.content.length / 4);
-                    } else if ( Array.isArray(message.content) ) {
-                        for ( const content of message.content ) {
-                            if ( content.type === 'text' ) {
-                                inputTokens += Math.ceil(content.text.length / 4);
-                            }
-                        }
-                    }
-                }
-            }
-
-            // Generate random output token count between 50 and 200
-            outputTokens = Math.floor(Math.min((Math.random() * 150) + 50, max_tokens));
-            // outputTokens = Math.floor(Math.random() * 150) + 50;
-        }
-
-        // Generate the response text
-        let responseText;
-        if ( usedModel === 'abuse' ) {
-            // responseText = dedent(`
-            //     This is a message from ${
-            //         this.global_config.origin}. We have detected abuse of our services.
-
-            //     If you are seeing this on another website, please report it to ${
-            //         this.global_config.abuse_email ?? 'hi@puter.com'}
-            // `);
-            responseText = dedent(`
-                <h2>Free AI and Cloud for everyone!</h2><br />
-                Come on down to <a href="https://puter.com">puter.com</a> and try it out!
-                ${custom ?? ''}
-            `);
-        } else {
-            // Generate 1-3 paragraphs for both fake and costly models
-            responseText = li.generateParagraphs(Math.floor(Math.random() * 3) + 1);
-        }
-
-        // Report usage based on model
-        const usage = {
-            'input_tokens': usedModel === 'costly' ? inputTokens : 0,
-            'output_tokens': usedModel === 'costly' ? outputTokens : 1,
-        };
-
-        return {
-            'index': 0,
-            message: {
-                'id': '00000000-0000-0000-0000-000000000000',
-                'type': 'message',
-                'role': 'assistant',
-                'model': usedModel,
-                'content': [
-                    {
-                        'type': 'text',
-                        'text': responseText,
-                    },
-                ],
-                'stop_reason': 'end_turn',
-                'stop_sequence': null,
-                'usage': usage,
-            },
-            'usage': usage,
-            'logprobs': null,
-            'finish_reason': 'stop',
-        };
-    }
-}
-
-module.exports = {
-    FakeChatService,
-};
--- a/src/backend/src/modules/puterai/GeminiService/GeminiService.mjs
+++ b/src/backend/src/modules/puterai/GeminiService/GeminiService.mjs
@@ -1,114 +0,0 @@
-// Preamble: Before this we used Gemini's SDK directly and as we found out
-// its actually kind of terrible. So we use the openai sdk now
-import BaseService from '../../../services/BaseService.js';
-import openai from 'openai';
-import OpenAIUtil from '../lib/OpenAIUtil.js';
-import { Context } from '../../../util/context.js';
-import { models } from './models.mjs';
-
-
-export class GeminiService extends BaseService {
-    /**
-    * @type {import('../../services/MeteringService/MeteringService').MeteringService}
-    */
-    meteringService = undefined;
-
-    defaultModel = 'gemini-2.5-flash';
-
-    static IMPLEMENTS = {
-        ['puter-chat-completion']: {
-            async models () {
-                return await this.models();
-            },
-            async complete (...args) {
-                return await this.complete(...args);
-            },
-            async list () {
-                return await this.list();
-            },
-        },
-    };
-
-    async _init () {
-        this.openai = new openai.OpenAI({
-            apiKey: this.config.apiKey,
-            baseURL: 'https://generativelanguage.googleapis.com/v1beta/openai/',
-        });
-
-        const svc_aiChat = this.services.get('ai-chat');
-        svc_aiChat.register_provider({
-            service_name: this.service_name,
-            alias: true,
-        });
-        this.meteringService = this.services.get('meteringService').meteringService;
-    }
-
-    get_default_model () {
-        return this.defaultModel;
-    }
-
-    async models () {
-        return models;
-    }
-    async list () {
-        const model_names = [];
-        for ( const model of models ) {
-            model_names.push(model.id);
-            if ( model.aliases ) {
-                model_names.push(...model.aliases);
-            }
-        }
-        return model_names;
-    }
-    async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
-        const actor = Context.get('actor');
-        messages = await OpenAIUtil.process_input_messages(messages);
-
-        // delete cache_control
-        messages = messages.map(m => {
-            delete m.cache_control;
-            return m;
-        });
-
-        const sdk_params = {
-            messages: messages,
-            model: model,
-            ...(tools ? { tools } : {}),
-            ...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
-            ...(temperature ? { temperature } : {}),
-            stream,
-            ...(stream ? {
-                stream_options: { include_usage: true },
-            } : {}),
-        };
-
-        let completion;
-        try {
-            completion = await this.openai.chat.completions.create(sdk_params);
-        } catch (e) {
-            console.error('Gemini completion error: ', e);
-            throw e;
-        }
-        
-        const modelDetails =  (await this.models()).find(m => m.id === model);
-        return OpenAIUtil.handle_completion_output({
-            usage_calculator: ({ usage }) => {
-                const trackedUsage = {
-                    prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
-                    completion_tokens: usage.completion_tokens ?? 0,
-                    cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
-                };
-
-                this.meteringService.utilRecordUsageObject(trackedUsage, actor, `gemini:${modelDetails.id}`);
-                const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
-                    model_details: modelDetails,
-                });
-
-                return legacyCostCalculator({ usage });
-            },
-            stream,
-            completion,
-        });
-
-    }
-}
--- a/src/backend/src/modules/puterai/GeminiService/models.mjs
+++ b/src/backend/src/modules/puterai/GeminiService/models.mjs
@@ -1,86 +0,0 @@
-export const models = [
-    {
-        id: 'gemini-1.5-flash',
-        name: 'Gemini 1.5 Flash',
-        context: 131072,
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 7.5,
-            output: 30,
-        },
-        max_tokens: 8192,
-    },
-    {
-        id: 'gemini-2.0-flash',
-        name: 'Gemini 2.0 Flash',
-        context: 131072,
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 10,
-            output: 40,
-        },
-        max_tokens: 8192,
-    },
-    {
-        id: 'gemini-2.0-flash-lite',
-        name: 'Gemini 2.0 Flash-Lite',
-        context: 1_048_576,
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 8,
-            output: 32,
-        },
-        max_tokens: 8192,
-    },
-    {
-        id: 'gemini-2.5-flash',
-        name: 'Gemini 2.5 Flash',
-        context: 1_048_576,
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 12,
-            output: 48,
-        },
-        max_tokens: 65536,
-    },
-    {
-        id: 'gemini-2.5-flash-lite',
-        name: 'Gemini 2.5 Flash-Lite',
-        context: 1_048_576,
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 10,
-            output: 40,
-        },
-        max_tokens: 65536,
-    },
-    {
-        id: 'gemini-2.5-pro',
-        name: 'Gemini 2.5 Pro',
-        context: 1_048_576,
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 15,
-            output: 60,
-        },
-        max_tokens: 65536,
-    },
-    {
-        id: 'gemini-3-pro-preview',
-        name: 'Gemini 3 Pro',
-        context: 1_048_576,
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 25,
-            output: 100,
-        },
-        max_tokens: 65536,
-    },
-];
--- a/src/backend/src/modules/puterai/GroqAIService.js
+++ b/src/backend/src/modules/puterai/GroqAIService.js
@@ -1,355 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const BaseService = require('../../services/BaseService');
-const { Context } = require('../../util/context');
-const OpenAIUtil = require('./lib/OpenAIUtil');
-
-/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
-
-/**
-* Service class for integrating with Groq AI's language models.
-* Extends BaseService to provide chat completion capabilities through the Groq API.
-* Implements the puter-chat-completion interface for model management and text generation.
-* Supports both streaming and non-streaming responses, handles multiple models including
-* various versions of Llama, Mixtral, and Gemma, and manages usage tracking.
-* @class GroqAIService
-* @extends BaseService
-*/
-class GroqAIService extends BaseService {
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
-    meteringService;
-    static MODULES = {
-        Groq: require('groq-sdk'),
-    };
-
-    /**
-    * Initializes the GroqAI service by setting up the Groq client and registering with the AI chat provider
-    * @returns {Promise<void>}
-    * @private
-    */
-    async _init () {
-        const Groq = require('groq-sdk');
-        this.client = new Groq({
-            apiKey: this.config.apiKey,
-        });
-
-        const svc_aiChat = this.services.get('ai-chat');
-        svc_aiChat.register_provider({
-            service_name: this.service_name,
-            alias: true,
-        });
-        this.meteringService = this.services.get('meteringService').meteringService; // TODO DS: move to proper extensions
-    }
-
-    /**
-    * Returns the default model ID for the Groq AI service
-    * @returns {string} The default model ID 'llama-3.1-8b-instant'
-    */
-    get_default_model () {
-        return 'llama-3.1-8b-instant';
-    }
-
-    static IMPLEMENTS = {
-        'puter-chat-completion': {
-            /**
-             * Returns a list of available models and their details.
-             * See AIChatService for more information.
-             *
-             * @returns Promise<Array<Object>> Array of model details
-             */
-            async models () {
-                return await this.models_();
-            },
-            /**
-            * Returns a list of available model names including their aliases
-            * @returns {Promise<string[]>} Array of model identifiers and their aliases
-            * @description Retrieves all available model IDs and their aliases,
-            * flattening them into a single array of strings that can be used for model selection
-            */
-            async list () {
-                // They send: { "object": "list", data }
-                const funny_wrapper = await this.client.models.list();
-                return funny_wrapper.data;
-            },
-            /**
-            * Completes a chat interaction using the Groq API
-            * @param {Object} options - The completion options
-            * @param {Array<Object>} options.messages - Array of message objects containing the conversation history
-            * @param {string} [options.model] - The model ID to use for completion. Defaults to service's default model
-            * @param {boolean} [options.stream] - Whether to stream the response
-            * @returns {TypedValue|Object} Returns either a TypedValue with streaming response or completion object with usage stats
-            */
-            async complete ({ messages, model, stream, tools, max_tokens, temperature }) {
-                model = model ?? this.get_default_model();
-
-                messages = await OpenAIUtil.process_input_messages(messages);
-                for ( const message of messages ) {
-                    // Curiously, DeepSeek has the exact same deviation
-                    if ( message.tool_calls && Array.isArray(message.content) ) {
-                        message.content = '';
-                    }
-                }
-
-                const actor = Context.get('actor');
-
-                const completion = await this.client.chat.completions.create({
-                    messages,
-                    model,
-                    stream,
-                    tools,
-                    max_completion_tokens: max_tokens, // max_tokens has been deprecated
-                    temperature,
-                });
-
-                const modelDetails = (await this.models_()).find(m => m.id === model);
-
-                return OpenAIUtil.handle_completion_output({
-                    deviations: {
-                        index_usage_from_stream_chunk: chunk =>
-                            chunk.x_groq?.usage,
-                    },
-                    usage_calculator: ({ usage }) => {
-                        const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
-                        this.meteringService.utilRecordUsageObject(trackedUsage, actor, `groq:${modelDetails.id}`);
-                        // Still return legacy cost calculation for compatibility
-                        const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
-                            model_details: modelDetails,
-                        });
-                        return legacyCostCalculator({ usage });
-                    },
-                    stream,
-                    completion,
-                });
-            },
-        },
-    };
-
-    /**
-    * Returns an array of available AI models with their specifications
-    *
-    * Each model object contains:
-    * - id: Unique identifier for the model
-    * - name: Human-readable name
-    * - context: Maximum context window size in tokens
-    * - cost: Pricing details including currency and token rates
-    *
-    * @returns {Array<Object>} Array of model specification objects
-    */
-    models_ () {
-        return [
-            {
-                id: 'gemma2-9b-it',
-                name: 'Gemma 2 9B 8k',
-                context: 8192,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 20,
-                    output: 20,
-                },
-                max_tokens: 8192,
-            },
-            {
-                id: 'gemma-7b-it',
-                name: 'Gemma 7B 8k Instruct',
-                context: 8192,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 7,
-                    output: 7,
-                },
-            },
-            {
-                id: 'llama3-groq-70b-8192-tool-use-preview',
-                name: 'Llama 3 Groq 70B Tool Use Preview 8k',
-                context: 8192,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 89,
-                    output: 89,
-                },
-            },
-            {
-                id: 'llama3-groq-8b-8192-tool-use-preview',
-                name: 'Llama 3 Groq 8B Tool Use Preview 8k',
-                context: 8192,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 19,
-                    output: 19,
-                },
-            },
-            {
-                'id': 'llama-3.1-70b-versatile',
-                'name': 'Llama 3.1 70B Versatile 128k',
-                'context': 128000,
-                'cost': {
-                    'currency': 'usd-cents',
-                    'tokens': 1000000,
-                    'input': 59,
-                    'output': 79,
-                },
-            },
-            {
-                // This was only available on their Discord, not
-                // on the pricing page.
-                'id': 'llama-3.1-70b-specdec',
-                'name': 'Llama 3.1 8B Instant 128k',
-                'context': 128000,
-                'cost': {
-                    'currency': 'usd-cents',
-                    'tokens': 1000000,
-                    'input': 59,
-                    'output': 99,
-                },
-            },
-            {
-                'id': 'llama-3.1-8b-instant',
-                'name': 'Llama 3.1 8B Instant 128k',
-                'context': 131072,
-                'cost': {
-                    'currency': 'usd-cents',
-                    'tokens': 1000000,
-                    'input': 5,
-                    'output': 8,
-                },
-                max_tokens: 131072,
-            },
-            {
-                id: 'meta-llama/llama-guard-4-12b',
-                name: 'Llama Guard 4 12B',
-                context: 131072,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1000000,
-                    input: 20,
-                    output: 20,
-                },
-                max_tokens: 1024,
-            },
-            {
-                id: 'meta-llama/llama-prompt-guard-2-86m',
-                name: 'Prompt Guard 2 86M',
-                context: 512,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1000000,
-                    input: 4,
-                    output: 4,
-                },
-                max_tokens: 512,
-            },
-            {
-                'id': 'llama-3.2-1b-preview',
-                'name': 'Llama 3.2 1B (Preview) 8k',
-                'context': 128000,
-                'cost': {
-                    'currency': 'usd-cents',
-                    'tokens': 1000000,
-                    'input': 4,
-                    'output': 4,
-                },
-            },
-            {
-                'id': 'llama-3.2-3b-preview',
-                'name': 'Llama 3.2 3B (Preview) 8k',
-                'context': 128000,
-                'cost': {
-                    'currency': 'usd-cents',
-                    'tokens': 1000000,
-                    'input': 6,
-                    'output': 6,
-                },
-            },
-            {
-                id: 'llama-3.2-11b-vision-preview',
-                name: 'Llama 3.2 11B Vision 8k (Preview)',
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 18,
-                    output: 18,
-                },
-            },
-            {
-                id: 'llama-3.2-90b-vision-preview',
-                name: 'Llama 3.2 90B Vision 8k (Preview)',
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 90,
-                    output: 90,
-                },
-            },
-            {
-                'id': 'llama3-70b-8192',
-                'name': 'Llama 3 70B 8k',
-                'context': 8192,
-                'cost': {
-                    'currency': 'usd-cents',
-                    'tokens': 1000000,
-                    'input': 59,
-                    'output': 79,
-                },
-            },
-            {
-                'id': 'llama3-8b-8192',
-                'name': 'Llama 3 8B 8k',
-                'context': 8192,
-                'cost': {
-                    'currency': 'usd-cents',
-                    'tokens': 1000000,
-                    'input': 5,
-                    'output': 8,
-                },
-            },
-            {
-                'id': 'mixtral-8x7b-32768',
-                'name': 'Mixtral 8x7B Instruct 32k',
-                'context': 32768,
-                'cost': {
-                    'currency': 'usd-cents',
-                    'tokens': 1000000,
-                    'input': 24,
-                    'output': 24,
-                },
-            },
-            {
-                'id': 'llama-guard-3-8b',
-                'name': 'Llama Guard 3 8B 8k',
-                'context': 8192,
-                'cost': {
-                    'currency': 'usd-cents',
-                    'tokens': 1000000,
-                    'input': 20,
-                    'output': 20,
-                },
-            },
-        ];
-    }
-}
-
-module.exports = {
-    GroqAIService,
-};
--- a/src/backend/src/modules/puterai/MistralAIService.js
+++ b/src/backend/src/modules/puterai/MistralAIService.js
@@ -1,621 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const BaseService = require('../../services/BaseService');
-const axios = require('axios');
-const OpenAIUtil = require('./lib/OpenAIUtil');
-const { Context } = require('../../util/context');
-const APIError = require('../../api/APIError');
-const mime = require('mime-types');
-const path = require('path');
-
-/**
-* MistralAIService class extends BaseService to provide integration with the Mistral AI API.
-* Implements chat completion functionality with support for various Mistral models including
-* mistral-large, pixtral, codestral, and ministral variants. Handles both streaming and
-* non-streaming responses, token usage tracking, and model management. Provides cost information
-* for different models and implements the puter-chat-completion interface.
-*/
-class MistralAIService extends BaseService {
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
-    meteringService;
-    static MODULES = {
-        '@mistralai/mistralai': require('@mistralai/mistralai'),
-    };
-    /**
-    * Initializes the service's cost structure for different Mistral AI models.
-    * Sets up pricing information for various models including token costs for input/output.
-    * Each model entry specifies currency (usd-cents) and costs per million tokens.
-    * @private
-    */
-    _construct () {
-        this.costs_ = {
-            'mistral-large-latest': {
-                aliases: ['mistral-large-2411'],
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 200,
-                    output: 600,
-                },
-                max_tokens: 128000,
-            },
-            'pixtral-large-latest': {
-                aliases: ['pixtral-large-2411'],
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 200,
-                    output: 600,
-                },
-                max_tokens: 128000,
-            },
-            'mistral-small-latest': {
-                aliases: ['mistral-small-2506'],
-                license: 'Apache-2.0',
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 20,
-                    output: 60,
-                },
-                max_tokens: 128000,
-            },
-            'codestral-latest': {
-                aliases: ['codestral-2501'],
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 30,
-                    output: 90,
-                },
-                max_tokens: 256000,
-            },
-            'ministral-8b-latest': {
-                aliases: ['ministral-8b-2410'],
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 10,
-                    output: 10,
-                },
-                max_tokens: 128000,
-            },
-            'ministral-3b-latest': {
-                aliases: ['ministral-3b-2410'],
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 4,
-                    output: 4,
-                },
-                max_tokens: 128000,
-            },
-            'pixtral-12b': {
-                aliases: ['pixtral-12b-2409'],
-                license: 'Apache-2.0',
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 15,
-                    output: 15,
-                },
-                max_tokens: 128000,
-            },
-            'mistral-nemo': {
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 15,
-                    output: 15,
-                },
-            },
-            'open-mistral-7b': {
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 25,
-                    output: 25,
-                },
-            },
-            'open-mixtral-8x7b': {
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 7,
-                    output: 7,
-                },
-            },
-            'open-mixtral-8x22b': {
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 2,
-                    output: 6,
-                },
-            },
-            'magistral-medium-latest': {
-                aliases: ['magistral-medium-2506'],
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 200,
-                    output: 500,
-                },
-                max_tokens: 40000,
-            },
-            'magistral-small-latest': {
-                aliases: ['magistral-small-2506'],
-                license: 'Apache-2.0',
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 10,
-                    output: 10,
-                },
-                max_tokens: 40000,
-            },
-            'mistral-medium-latest': {
-                aliases: ['mistral-medium-2505'],
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 40,
-                    output: 200,
-                },
-                max_tokens: 128000,
-            },
-            'mistral-moderation-latest': {
-                aliases: ['mistral-moderation-2411'],
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 10,
-                    output: 10,
-                },
-                max_tokens: 8000,
-            },
-            'devstral-small-latest': {
-                aliases: ['devstral-small-2505'],
-                license: 'Apache-2.0',
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 10,
-                    output: 10,
-                },
-                max_tokens: 128000,
-            },
-            'mistral-saba-latest': {
-                aliases: ['mistral-saba-2502'],
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 20,
-                    output: 60,
-                },
-            },
-            'open-mistral-nemo': {
-                aliases: ['open-mistral-nemo-2407'],
-                license: 'Apache-2.0',
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 10,
-                    output: 10,
-                },
-            },
-            'mistral-ocr-latest': {
-                aliases: ['mistral-ocr-2505'],
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 100,
-                    output: 300,
-                },
-            },
-        };
-    }
-    /**
-    * Initializes the service's cost structure for different Mistral AI models.
-    * Sets up pricing information for various models including token costs for input/output.
-    * Each model entry specifies currency (USD cents) and costs per million tokens.
-    * @private
-    */
-    async _init () {
-        const require = this.require;
-        const { Mistral } = require('@mistralai/mistralai');
-        this.api_base_url = 'https://api.mistral.ai/v1';
-        this.client = new Mistral({
-            apiKey: this.config.apiKey,
-        });
-
-        const svc_aiChat = this.services.get('ai-chat');
-        svc_aiChat.register_provider({
-            service_name: this.service_name,
-            alias: true,
-        });
-
-        this.meteringService = this.services.get('meteringService').meteringService;
-
-        // TODO: make this event-driven so it doesn't hold up boot
-        await this.populate_models_();
-    }
-    /**
-    * Populates the internal models array with available Mistral AI models and their configurations.
-    * Makes an API call to fetch model data, then processes and filters models based on cost information.
-    * Each model entry includes id, name, aliases, context window size, capabilities, and pricing.
-    * @private
-    * @returns {Promise<void>}
-    */
-    async populate_models_ () {
-        const resp = await axios({
-            method: 'get',
-            url: `${this.api_base_url }/models`,
-            headers: {
-                Authorization: `Bearer ${this.config.apiKey}`,
-            },
-        });
-
-        const response_json = resp.data;
-        const models = response_json.data;
-        this.models_array_ = [];
-        for ( const api_model of models ) {
-
-            let cost = this.costs_[api_model.id];
-            if ( ! cost ) {
-                for ( const alias of api_model.aliases ) {
-                    cost = this.costs_[alias];
-                    if ( cost ) break;
-                }
-            }
-            if ( ! cost ) continue;
-            const model = {
-                ...cost,
-                id: api_model.id,
-                name: api_model.description,
-                aliases: api_model.aliases,
-                context: api_model.max_context_length,
-                capabilities: api_model.capabilities,
-                vision: api_model.capabilities.vision,
-            };
-
-            this.models_array_.push(model);
-        }
-        // return resp.data;
-    }
-    /**
-    * Populates the internal models array with available Mistral AI models and their metadata
-    * Fetches model data from the API, filters based on cost configuration, and stores
-    * model objects containing ID, name, aliases, context length, capabilities, and pricing
-    * @private
-    * @async
-    * @returns {void}
-    */
-    get_default_model () {
-        return 'mistral-large-latest';
-    }
-    static IMPLEMENTS = {
-        'driver-capabilities': {
-            supports_test_mode (iface, method_name) {
-                return iface === 'puter-ocr' && method_name === 'recognize';
-            },
-        },
-        'puter-ocr': {
-            async recognize ({
-                source,
-                model,
-                pages,
-                includeImageBase64,
-                imageLimit,
-                imageMinSize,
-                bboxAnnotationFormat,
-                documentAnnotationFormat,
-                test_mode,
-            }) {
-                if ( test_mode ) {
-                    return this._sampleOcrResponse();
-                }
-                if ( ! source ) {
-                    throw APIError.create('missing_required_argument', {
-                        interface_name: 'puter-ocr',
-                        method_name: 'recognize',
-                        arg_name: 'source',
-                    });
-                }
-
-                const document = await this._buildDocumentChunkFromSource(source);
-                const payload = {
-                    model: model ?? 'mistral-ocr-latest',
-                    document,
-                };
-                if ( Array.isArray(pages) ) {
-                    payload.pages = pages;
-                }
-                if ( typeof includeImageBase64 === 'boolean' ) {
-                    payload.includeImageBase64 = includeImageBase64;
-                }
-                if ( typeof imageLimit === 'number' ) {
-                    payload.imageLimit = imageLimit;
-                }
-                if ( typeof imageMinSize === 'number' ) {
-                    payload.imageMinSize = imageMinSize;
-                }
-                if ( bboxAnnotationFormat !== undefined ) {
-                    payload.bboxAnnotationFormat = bboxAnnotationFormat;
-                }
-                if ( documentAnnotationFormat !== undefined ) {
-                    payload.documentAnnotationFormat = documentAnnotationFormat;
-                }
-
-                const response = await this.client.ocr.process(payload);
-                const annotationsRequested = (
-                    payload.documentAnnotationFormat !== undefined ||
-                    payload.bboxAnnotationFormat !== undefined
-                );
-                this._recordOcrUsage(response, payload.model, {
-                    annotationsRequested,
-                });
-                return this._normalizeOcrResponse(response);
-            },
-        },
-        'puter-chat-completion': {
-            /**
-             * Returns a list of available models and their details.
-             * See AIChatService for more information.
-             *
-             * @returns Promise<Array<Object>> Array of model details
-             */
-            async models () {
-                return this.models_array_;
-            },
-
-            /**
-            * Returns a list of available model names including their aliases
-            * @returns {Promise<string[]>} Array of model identifiers and their aliases
-            * @description Retrieves all available model IDs and their aliases,
-            * flattening them into a single array of strings that can be used for model selection
-            */
-            async list () {
-                return this.models_array_.map(m => m.id);
-            },
-
-            /**
-             * AI Chat completion method.
-             * See AIChatService for more details.
-             */
-            async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
-
-                messages = await OpenAIUtil.process_input_messages(messages);
-                for ( const message of messages ) {
-                    if ( message.tool_calls ) {
-                        message.toolCalls = message.tool_calls;
-                        delete message.tool_calls;
-                    }
-                    if ( message.tool_call_id ) {
-                        message.toolCallId = message.tool_call_id;
-                        delete message.tool_call_id;
-                    }
-                }
-
-                console.log('MESSAGES TO MISTRAL', messages);
-
-                const actor = Context.get('actor');
-                const completion = await this.client.chat[
-                    stream ? 'stream' : 'complete'
-                ]({
-                    model: model ?? this.get_default_model(),
-                    ...(tools ? { tools } : {}),
-                    messages,
-                    max_tokens: max_tokens,
-                    temperature,
-                });
-
-                const modelDetails = this.models_array_.find(m => m.id === (model ?? this.get_default_model()));
-
-                return await OpenAIUtil.handle_completion_output({
-                    deviations: {
-                        index_usage_from_stream_chunk: chunk => {
-                            if ( ! chunk.usage ) return;
-
-                            const snake_usage = {};
-                            for ( const key in chunk.usage ) {
-                                const snakeKey = key.replace(/([A-Z])/g, '_$1').toLowerCase();
-                                snake_usage[snakeKey] = chunk.usage[key];
-                            }
-
-                            return snake_usage;
-                        },
-                        chunk_but_like_actually: chunk => chunk.data,
-                        index_tool_calls_from_stream_choice: choice => choice.delta.toolCalls,
-                        coerce_completion_usage: completion => ({
-                            prompt_tokens: completion.usage.promptTokens,
-                            completion_tokens: completion.usage.completionTokens,
-                        }),
-                    },
-                    completion,
-                    stream,
-                    usage_calculator: ({ usage }) => {
-                        const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
-                        this.meteringService.utilRecordUsageObject(trackedUsage, actor, `mistral:${modelDetails.id}`);
-                        // Still return legacy cost calculation for compatibility
-                        const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
-                            model_details: modelDetails,
-                        });
-                        return legacyCostCalculator({ usage });
-                    },
-                });
-            },
-        },
-    };
-
-    async _buildDocumentChunkFromSource (fileFacade) {
-        const dataUrl = await this._safeFileValue(fileFacade, 'data_url');
-        const webUrl = await this._safeFileValue(fileFacade, 'web_url');
-        const filePath = await this._safeFileValue(fileFacade, 'path');
-        const fsNode = await this._safeFileValue(fileFacade, 'fs-node');
-        const fileName = filePath ? path.basename(filePath) : fsNode?.name;
-        const inferredMime = this._inferMimeFromName(fileName);
-
-        if ( webUrl ) {
-            return this._chunkFromUrl(webUrl, fileName, inferredMime);
-        }
-        if ( dataUrl ) {
-            const mimeFromUrl = this._extractMimeFromDataUrl(dataUrl) ?? inferredMime;
-            return this._chunkFromUrl(dataUrl, fileName, mimeFromUrl);
-        }
-
-        const buffer = await this._safeFileValue(fileFacade, 'buffer');
-        if ( ! buffer ) {
-            throw APIError.create('field_invalid', null, {
-                key: 'source',
-                expected: 'file, data URL, or web URL',
-            });
-        }
-        const mimeType = inferredMime ?? 'application/octet-stream';
-        const generatedDataUrl = this._createDataUrl(buffer, mimeType);
-        return this._chunkFromUrl(generatedDataUrl, fileName, mimeType);
-    }
-
-    async _safeFileValue (fileFacade, key) {
-        if ( !fileFacade || typeof fileFacade.get !== 'function' ) return undefined;
-        const maybeCache = fileFacade.values?.values;
-        if ( maybeCache && Object.prototype.hasOwnProperty.call(maybeCache, key) ) {
-            return maybeCache[key];
-        }
-        try {
-            return await fileFacade.get(key);
-        } catch (e) {
-            return undefined;
-        }
-    }
-
-    _chunkFromUrl (url, fileName, mimeType) {
-        const lowerName = fileName?.toLowerCase();
-        const urlLooksPdf = /\.pdf($|\?)/i.test(url);
-        const mimeLooksPdf = mimeType?.includes('pdf');
-        const isPdf = mimeLooksPdf || urlLooksPdf || (lowerName ? lowerName.endsWith('.pdf') : false);
-
-        if ( isPdf ) {
-            const chunk = {
-                type: 'document_url',
-                documentUrl: url,
-            };
-            if ( fileName ) {
-                chunk.documentName = fileName;
-            }
-            return chunk;
-        }
-
-        return {
-            type: 'image_url',
-            imageUrl: {
-                url,
-            },
-        };
-    }
-
-    _inferMimeFromName (name) {
-        if ( ! name ) return undefined;
-        return mime.lookup(name) || undefined;
-    }
-
-    _extractMimeFromDataUrl (url) {
-        if ( typeof url !== 'string' ) return undefined;
-        const match = url.match(/^data:([^;,]+)[;,]/);
-        return match ? match[1] : undefined;
-    }
-
-    _createDataUrl (buffer, mimeType) {
-        return `data:${mimeType || 'application/octet-stream'};base64,${buffer.toString('base64')}`;
-    }
-
-    _normalizeOcrResponse (response) {
-        if ( ! response ) return {};
-        const normalized = {
-            model: response.model,
-            pages: response.pages ?? [],
-            usage_info: response.usageInfo,
-        };
-        const blocks = [];
-        if ( Array.isArray(response.pages) ) {
-            for ( const page of response.pages ) {
-                if ( typeof page?.markdown !== 'string' ) continue;
-                const lines = page.markdown.split('\n').map(line => line.trim()).filter(Boolean);
-                for ( const line of lines ) {
-                    blocks.push({
-                        type: 'text/mistral:LINE',
-                        text: line,
-                        page: page.index,
-                    });
-                }
-            }
-        }
-        normalized.blocks = blocks;
-        if ( blocks.length ) {
-            normalized.text = blocks.map(block => block.text).join('\n');
-        } else if ( Array.isArray(response.pages) ) {
-            normalized.text = response.pages.map(page => page?.markdown || '').join('\n\n').trim();
-        }
-        return normalized;
-    }
-
-    _recordOcrUsage (response, model, { annotationsRequested } = {}) {
-        try {
-            if ( ! this.meteringService ) return;
-            const actor = Context.get('actor');
-            if ( ! actor ) return;
-            const pagesProcessed =
-                response?.usageInfo?.pagesProcessed ??
-                (Array.isArray(response?.pages) ? response.pages.length : 1);
-            this.meteringService.incrementUsage(actor, 'mistral-ocr:ocr:page', pagesProcessed);
-            if ( annotationsRequested ) {
-                this.meteringService.incrementUsage(actor, 'mistral-ocr:annotations:page', pagesProcessed);
-            }
-        } catch (e) {
-            // ignore metering failures to avoid blocking OCR results
-        }
-    }
-
-    _sampleOcrResponse () {
-        const markdown = 'Sample OCR output (test mode).';
-        return {
-            model: 'mistral-ocr-latest',
-            pages: [
-                {
-                    index: 0,
-                    markdown,
-                    images: [],
-                    dimensions: null,
-                },
-            ],
-            blocks: [
-                {
-                    type: 'text/mistral:LINE',
-                    text: markdown,
-                    page: 0,
-                },
-            ],
-            text: markdown,
-        };
-    }
-}
-
-module.exports = { MistralAIService };
--- a/src/backend/src/modules/puterai/OllamaService.js
+++ b/src/backend/src/modules/puterai/OllamaService.js
@@ -1,217 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const BaseService = require('../../services/BaseService');
-const OpenAIUtil = require('./lib/OpenAIUtil');
-const { Context } = require('../../util/context');
-const openai = require('openai');
-const uuidv4 = require('uuid').v4;
-const axios = require('axios');
-/**
-* OllamaService class - Provides integration with Ollama's API for chat completions
-* Extends BaseService to implement the puter-chat-completion interface.
-* Handles model management, message adaptation, streaming responses,
-* and usage tracking for Ollama's language models.
-* @extends BaseService
-*/
-class OllamaService extends BaseService {
-    static MODULES = {
-        kv: globalThis.kv,
-    };
-
-    /**
-    * Gets the system prompt used for AI interactions
-    * @returns {string} The base system prompt that identifies the AI as running on Puter
-    */
-    adapt_model (model) {
-        return model;
-    }
-
-    /**
-    * Initializes the Ollama service by setting up the Ollama client and registering with the AI chat provider
-    * @private
-    * @returns {Promise<void>} Resolves when initialization is complete
-    */
-    async _init () {
-        // Ollama typically runs on HTTP, not HTTPS
-        this.api_base_url = this.config?.api_base_url || 'http://localhost:11434';
-
-        // OpenAI SDK is used to interact with the Ollama API
-        this.openai = new openai.OpenAI({
-            apiKey: 'ollama', // Ollama doesn't use an API key, it uses the "ollama" string
-            baseURL: `${this.api_base_url }/v1`,
-        });
-        this.kvkey = uuidv4();
-
-        const svc_aiChat = this.services.get('ai-chat');
-        svc_aiChat.register_provider({
-            service_name: this.service_name,
-            alias: true,
-        });
-        // We don't need to meter usage for Ollama because it's a local service
-    }
-
-    /**
-    * Returns the default model identifier for the Ollama service
-    * @returns {string} The default model ID 'gpt-oss:20b'
-    */
-    get_default_model () {
-        return 'gpt-oss:20b';
-    }
-
-    static IMPLEMENTS = {
-        'puter-chat-completion': {
-            /**
-             * Returns a list of available models and their details.
-             * See AIChatService for more information.
-             *
-             * @returns Promise<Array<Object>> Array of model details
-             */
-            async models () {
-                return await this.models_();
-            },
-            /**
-            * Returns a list of available model names including their aliases
-            * @returns {Promise<string[]>} Array of model identifiers and their aliases
-            * @description Retrieves all available model IDs and their aliases,
-            * flattening them into a single array of strings that can be used for model selection
-            */
-            async list () {
-                const models = await this.models_();
-                const model_names = [];
-                for ( const model of models ) {
-                    model_names.push(model.id);
-                }
-                return model_names;
-            },
-
-            /**
-             * AI Chat completion method.
-             * See AIChatService for more details.
-             */
-            async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
-                model = this.adapt_model(model);
-
-                if ( model.startsWith('ollama:') ) {
-                    model = model.slice('ollama:'.length);
-                }
-
-                const actor = Context.get('actor');
-
-                messages = await OpenAIUtil.process_input_messages(messages);
-                const sdk_params = {
-                    messages,
-                    model: model ?? this.get_default_model(),
-                    ...(tools ? { tools } : {}),
-                    max_tokens,
-                    temperature: temperature, // default to 1.0
-                    stream,
-                    ...(stream ? {
-                        stream_options: { include_usage: true },
-                    } : {}),
-                };
-
-                const completion = await this.openai.chat.completions.create(sdk_params);
-
-                const modelDetails =  (await this.models_()).find(m => m.id === `ollama:${model}`);
-                return OpenAIUtil.handle_completion_output({
-                    usage_calculator: ({ usage }) => {
-
-                        const trackedUsage = {
-                            prompt: (usage.prompt_tokens ?? 1 ) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
-                            completion: usage.completion_tokens ?? 1,
-                            input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0,
-                        };
-                        const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => {
-                            return [k, 0]; // override to 0 since local is free
-                        }));
-                        this.meteringService.utilRecordUsageObject(trackedUsage, actor, modelDetails.id, { costOverwrites });
-                        const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
-                            model_details: modelDetails,
-                        });
-                        return legacyCostCalculator({ usage });
-                    },
-                    stream,
-                    completion,
-                });
-            },
-        },
-    };
-
-    /**
-    * Retrieves available AI models and their specifications
-    * @returns  Array of model objects containing:
-    *   - id: Model identifier string
-    *   - name: Human readable model name
-    *   - context: Maximum context window size
-    *   - cost: Pricing information object with currency and rates
-    * @private
-    */
-    async models_ (_rawPriceKeys = false) {
-
-        let models = this.modules.kv.get(`${this.kvkey}:models`);
-        if ( ! models ) {
-            try {
-                const resp = await axios.request({
-                    method: 'GET',
-                    url: `${this.api_base_url}/api/tags`,
-                });
-                models = resp.data.models || [];
-                if ( models.length > 0 ) {
-                    this.modules.kv.set(`${this.kvkey}:models`, models);
-                }
-            } catch ( error ) {
-                this.log.error('Failed to fetch models from Ollama:', error.message);
-                // Return empty array if Ollama is not available
-                return [];
-            }
-        }
-
-        if ( !models || models.length === 0 ) {
-            return [];
-        }
-
-        const coerced_models = [];
-        for ( const model of models ) {
-            // Ollama API returns models with 'name' property, not 'model'
-            const modelName = model.name || model.model || 'unknown';
-            const microcentCosts =  {
-                input: 0,
-                output: 0,
-            };
-            coerced_models.push({
-                id: `ollama:${ modelName}`,
-                name: `${modelName} (Ollama)`,
-                max_tokens: model.size || model.max_context || 8192,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    ...microcentCosts,
-                },
-            });
-        }
-        console.log('coerced_models', coerced_models);
-        return coerced_models;
-    }
-}
-
-module.exports = {
-    OllamaService,
-};
--- a/src/backend/src/modules/puterai/OpenAiCompletionService/index.mjs
+++ b/src/backend/src/modules/puterai/OpenAiCompletionService/index.mjs
@@ -1,57 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-
-import BaseService from '../../../services/BaseService.js';
-import { OpenAICompletionService } from './OpenAICompletionService.mjs';
-
-export class OpenAICompletionServiceWrapper extends BaseService {
-    /** @type {OpenAICompletionService} */
-    openAICompletionService;
-
-    _init () {
-        this.openAICompletionService = new OpenAICompletionService({
-            serviceName: this.service_name,
-            config: this.config,
-            globalConfig: this.global_config,
-            aiChatService: this.services.get('ai-chat'),
-            meteringService: this.services.get('meteringService').meteringService,
-        });
-    }
-
-    async check_moderation (text) {
-        return await this.openAICompletionService.checkModeration(text);
-    }
-
-    get_default_model () {
-        return this.openAICompletionService.get_default_model();
-    }
-
-    static IMPLEMENTS = {
-        ['puter-chat-completion']: Object.getOwnPropertyNames(OpenAICompletionService.prototype)
-            .filter(n => n !== 'constructor')
-            .reduce((acc, fn) => ({
-                ...acc,
-                [fn]: async function (...a) {
-                    return await this.openAICompletionService[fn](...a);
-                },
-            }), {}),
-    };
-}
--- a/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs
+++ b/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs
@@ -1,206 +0,0 @@
-// TODO DS: centralize somewhere
-
-export const OPEN_AI_MODELS = [
-    {
-        id: 'gpt-5.1',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 125,
-            output: 1000,
-        },
-        max_tokens: 128000,
-    },
-    {
-        id: 'gpt-5.1-codex',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 125,
-            output: 1000,
-        },
-        max_tokens: 128000,
-    },
-    {
-        id: 'gpt-5.1-codex-mini',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 25,
-            output: 200,
-        },
-        max_tokens: 128000,
-    },
-    {
-        id: 'gpt-5.1-chat-latest',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 125,
-            output: 1000,
-        },
-        max_tokens: 16384,
-    },
-    {
-        id: 'gpt-5-2025-08-07',
-        aliases: ['gpt-5'],
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 125,
-            output: 1000,
-        },
-        max_tokens: 128000,
-    },
-    {
-        id: 'gpt-5-mini-2025-08-07',
-        aliases: ['gpt-5-mini'],
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 25,
-            output: 200,
-        },
-        max_tokens: 128000,
-    },
-    {
-        id: 'gpt-5-nano-2025-08-07',
-        aliases: ['gpt-5-nano'],
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 5,
-            output: 40,
-        },
-        max_tokens: 128000,
-    },
-    {
-        id: 'gpt-5-chat-latest',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 125,
-            output: 1000,
-        },
-        max_tokens: 16384,
-    },
-    {
-        id: 'gpt-4o',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 250,
-            output: 1000,
-        },
-        max_tokens: 16384,
-    },
-    {
-        id: 'gpt-4o-mini',
-        max_tokens: 16384,
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 15,
-            output: 60,
-        },
-    },
-    {
-        id: 'o1',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 1500,
-            output: 6000,
-        },
-        max_tokens: 100000,
-    },
-    {
-        id: 'o1-mini',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 300,
-            output: 1200,
-        },
-        max_tokens: 65536,
-    },
-    {
-        id: 'o1-pro',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 15000,
-            output: 60000,
-        },
-        max_tokens: 100000,
-    },
-    {
-        id: 'o3',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 1000,
-            output: 4000,
-        },
-        max_tokens: 100000,
-    },
-    {
-        id: 'o3-mini',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 110,
-            output: 440,
-        },
-        max_tokens: 100000,
-    },
-    {
-        id: 'o4-mini',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 110,
-            output: 440,
-        },
-        max_tokens: 100000,
-    },
-    {
-        id: 'gpt-4.1',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 200,
-            output: 800,
-        },
-        max_tokens: 32768,
-    },
-    {
-        id: 'gpt-4.1-mini',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 40,
-            output: 160,
-        },
-        max_tokens: 32768,
-    },
-    {
-        id: 'gpt-4.1-nano',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 10,
-            output: 40,
-        },
-        max_tokens: 32768,
-    },
-    {
-        id: 'gpt-4.5-preview',
-        cost: {
-            currency: 'usd-cents',
-            tokens: 1_000_000,
-            input: 7500,
-            output: 15000,
-        },
-    },
-];
--- a/src/backend/src/modules/puterai/OpenRouterService.js
+++ b/src/backend/src/modules/puterai/OpenRouterService.js
@@ -1,214 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const APIError = require('../../api/APIError');
-const BaseService = require('../../services/BaseService');
-const OpenAIUtil = require('./lib/OpenAIUtil');
-const { Context } = require('../../util/context');
-const openai = require('openai');
-const uuidv4 = require('uuid').v4;
-const axios = require('axios');
-/**
-* XAIService class - Provides integration with X.AI's API for chat completions
-* Extends BaseService to implement the puter-chat-completion interface.
-* Handles model management, message adaptation, streaming responses,
-* and usage tracking for X.AI's language models like Grok.
-* @extends BaseService
-*/
-class OpenRouterService extends BaseService {
-    static MODULES = {
-        kv: globalThis.kv,
-    };
-
-    // TODO DS: extract this into driver wrapper like openAiService
-    static IMPLEMENTS = {
-        ['puter-chat-completion']: {
-            async models () {
-                return await this.models();
-            },
-            async list () {
-                return await this.list();
-            },
-            async complete (...params) {
-                return await this.complete(...params);
-            },
-        },
-    };
-
-    /**
-    * Gets the system prompt used for AI interactions
-    * @returns {string} The base system prompt that identifies the AI as running on Puter
-    */
-    adapt_model (model) {
-        return model;
-    }
-
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
-    meteringService;
-
-    /**
-    * Initializes the XAI service by setting up the OpenAI client and registering with the AI chat provider
-    * @private
-    * @returns {Promise<void>} Resolves when initialization is complete
-    */
-    async _init () {
-        this.api_base_url = 'https://openrouter.ai/api/v1';
-        this.openai = new openai.OpenAI({
-            apiKey: this.config.apiKey,
-            baseURL: this.api_base_url,
-        });
-        this.kvkey = uuidv4();
-
-        const svc_aiChat = this.services.get('ai-chat');
-        svc_aiChat.register_provider({
-            service_name: this.service_name,
-            alias: true,
-        });
-        this.meteringService = this.services.get('meteringService').meteringService; // TODO DS: move to proper extensions
-    }
-
-    /**
-    * Returns the default model identifier for the XAI service
-    * @returns {string} The default model ID 'grok-beta'
-    */
-    get_default_model () {
-    }
-    /**
-            * Returns a list of available model names including their aliases
-            * @returns {Promise<string[]>} Array of model identifiers and their aliases
-            * @description Retrieves all available model IDs and their aliases,
-            * flattening them into a single array of strings that can be used for model selection
-            */
-    async list () {
-        const models = await this.models();
-        const model_names = [];
-        for ( const model of models ) {
-            model_names.push(model.id);
-        }
-        return model_names;
-    }
-
-    /**
-             * AI Chat completion method.
-             * See AIChatService for more details.
-             */
-    async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
-        model = this.adapt_model(model);
-
-        if ( model.startsWith('openrouter:') ) {
-            model = model.slice('openrouter:'.length);
-        }
-
-        if ( model === 'openrouter/auto' ) {
-            throw APIError.create('field_invalid', null, {
-                key: 'model',
-                expected: 'allowed model',
-                got: 'disallowed model',
-            });
-        }
-
-        const actor = Context.get('actor');
-
-        messages = await OpenAIUtil.process_input_messages(messages);
-
-        const completion = await this.openai.chat.completions.create({
-            messages,
-            model: model ?? this.get_default_model(),
-            ...(tools ? { tools } : {}),
-            max_tokens,
-            temperature: temperature, // default to 1.0
-            stream,
-            ...(stream ? {
-                stream_options: { include_usage: true },
-            } : {}),
-            usage: { include: true },
-        });
-
-        const modelDetails =  (await this.models()).find(m => m.id === `openrouter:${ model}`);
-        const rawPriceModelDetails =  (await this.models(true)).find(m => m.id === `openrouter:${ model}`);
-        return OpenAIUtil.handle_completion_output({
-            usage_calculator: ({ usage }) => {
-                // custom open router logic because they're pricing are weird
-                const trackedUsage = {
-                    prompt: (usage.prompt_tokens ?? 0 ) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
-                    completion: usage.completion_tokens ?? 0,
-                    input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0,
-                };
-                const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => {
-                    return [k, rawPriceModelDetails.cost[k] * trackedUsage[k]];
-                }));
-                this.meteringService.utilRecordUsageObject(trackedUsage, actor, modelDetails.id, costOverwrites);
-                const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
-                    model_details: modelDetails,
-                });
-                return legacyCostCalculator({ usage });
-            },
-            stream,
-            completion,
-        });
-    }
-
-    /**
-    * Retrieves available AI models and their specifications
-    * @returns  Array of model objects containing:
-    *   - id: Model identifier string
-    *   - name: Human readable model name
-    *   - context: Maximum context window size
-    *   - cost: Pricing information object with currency and rates
-    */
-    async models (rawPriceKeys = false) {
-        let models = this.modules.kv.get(`${this.kvkey}:models`);
-        if ( ! models ) {
-            try {
-                const resp = await axios.request({
-                    method: 'GET',
-                    url: `${this.api_base_url}/models`,
-                });
-
-                models = resp.data.data;
-                this.modules.kv.set(`${this.kvkey}:models`, models);
-            } catch (e) {
-                console.log(e);
-            }
-        }
-        const coerced_models = [];
-        for ( const model of models ) {
-            const microcentCosts = rawPriceKeys ? Object.fromEntries(Object.entries(model.pricing).map(([k, v]) => [k, Math.round(v * 1_000_000 * 100)])) : {
-                input: Math.round(model.pricing.prompt * 1_000_000 * 100),
-                output: Math.round(model.pricing.completion * 1_000_000 * 100),
-            };
-            coerced_models.push({
-                id: `openrouter:${ model.id}`,
-                name: `${model.name } (OpenRouter)`,
-                max_tokens: model.top_provider.max_completion_tokens,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    ...microcentCosts,
-                },
-            });
-        }
-        return coerced_models;
-    }
-}
-
-module.exports = {
-    OpenRouterService,
-};
--- a/src/backend/src/modules/puterai/PuterAIModule.js
+++ b/src/backend/src/modules/puterai/PuterAIModule.js
@@ -1,164 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const { AdvancedBase } = require('@heyputer/putility');
-const config = require('../../config');
-
-/**
-* PuterAIModule class extends AdvancedBase to manage and register various AI services.
-* This module handles the initialization and registration of multiple AI-related services
-* including text processing, speech synthesis, chat completion, and image generation.
-* Services are conditionally registered based on configuration settings, allowing for
-* flexible deployment with different AI providers like AWS, OpenAI, Claude, Together AI,
-* Mistral, Groq, and XAI.
-* @extends AdvancedBase
-*/
-class PuterAIModule extends AdvancedBase {
-    /**
-    * Module for managing AI-related services in the Puter platform
-    * Extends AdvancedBase to provide core functionality
-    * Handles registration and configuration of various AI services like OpenAI, Claude, AWS services etc.
-    */
-    async install (context) {
-        const services = context.get('services');
-
-        const { AIInterfaceService } = require('./AIInterfaceService');
-        services.registerService('__ai-interfaces', AIInterfaceService);
-
-        // TODO: services should govern their own availability instead of
-        //       the module deciding what to register
-
-        if ( config?.services?.['aws-textract']?.aws ) {
-            const { AWSTextractService } = require('./AWSTextractService');
-            services.registerService('aws-textract', AWSTextractService);
-        }
-
-        if ( config?.services?.['aws-polly']?.aws ) {
-            const { AWSPollyService } = require('./AWSPollyService');
-            services.registerService('aws-polly', AWSPollyService);
-        }
-
-        if ( config?.services?.['elevenlabs'] || config?.elevenlabs ) {
-            const { ElevenLabsTTSService } = require('./ElevenLabsTTSService');
-            services.registerService('elevenlabs-tts', ElevenLabsTTSService);
-
-            const { ElevenLabsVoiceChangerService } = require('./ElevenLabsVoiceChangerService');
-            services.registerService('elevenlabs-voice-changer', ElevenLabsVoiceChangerService);
-        }
-
-        if ( config?.services?.openai || config?.openai ) {
-            const { OpenAICompletionServiceWrapper } = require('./OpenAiCompletionService/index.mjs');
-            services.registerService('openai-completion', OpenAICompletionServiceWrapper);
-
-            const { OpenAIImageGenerationService } = require('./OpenAIImageGenerationService');
-            services.registerService('openai-image-generation', OpenAIImageGenerationService);
-
-            const { OpenAIVideoGenerationService } = require('./OpenAIVideoGenerationService');
-            services.registerService('openai-video-generation', OpenAIVideoGenerationService);
-
-            const { OpenAITTSService } = require('./OpenAITTSService');
-            services.registerService('openai-tts', OpenAITTSService);
-
-            const { OpenAISpeechToTextService } = require('./OpenAISpeechToTextService');
-            services.registerService('openai-speech2txt', OpenAISpeechToTextService);
-        }
-
-        if ( config?.services?.claude ) {
-            const { ClaudeService } = require('./ClaudeService');
-            services.registerService('claude', ClaudeService);
-        }
-
-        if ( config?.services?.['together-ai'] ) {
-            const { TogetherAIService } = require('./TogetherAIService');
-            services.registerService('together-ai', TogetherAIService);
-
-            const { TogetherImageGenerationService } = require('./TogetherImageGenerationService');
-            services.registerService('together-image-generation', TogetherImageGenerationService);
-
-            const { TogetherVideoGenerationService } = require('./TogetherVideoGenerationService');
-            services.registerService('together-video-generation', TogetherVideoGenerationService);
-        }
-
-        if ( config?.services?.['mistral'] ) {
-            const { MistralAIService } = require('./MistralAIService');
-            services.registerService('mistral', MistralAIService);
-        }
-
-        if ( config?.services?.['groq'] ) {
-            const { GroqAIService } = require('./GroqAIService');
-            services.registerService('groq', GroqAIService);
-        }
-
-        if ( config?.services?.['xai'] ) {
-            const { XAIService } = require('./XAIService');
-            services.registerService('xai', XAIService);
-        }
-
-        if ( config?.services?.['deepseek'] ) {
-            const { DeepSeekService } = require('./DeepSeekService');
-            services.registerService('deepseek', DeepSeekService);
-        }
-        if ( config?.services?.['gemini'] ) {
-            const { GeminiService } =  require('./GeminiService/GeminiService.mjs');
-            const { GeminiImageGenerationService } = require('./GeminiImageGenerationService');
-
-            services.registerService('gemini', GeminiService);
-            services.registerService('gemini-image-generation', GeminiImageGenerationService);
-        }
-        if ( config?.services?.['openrouter'] ) {
-            const { OpenRouterService } = require('./OpenRouterService');
-            services.registerService('openrouter', OpenRouterService);
-        }
-
-        // Autodiscover Ollama service and then check if its disabled in the config
-        // if config.services.ollama.enabled is undefined, it means the user hasn't set it, so we should default to true
-        const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(_data => {
-            const ollama_enabled = config?.services?.['ollama']?.enabled;
-            if ( ollama_enabled === undefined ) {
-                return true;
-            }
-            return ollama_enabled;
-        }).catch(_err => {
-            return false;
-        });
-        // User can disable ollama in the config, but by default it should be enabled if discovery is successful
-        if ( ollama_available || config?.services?.['ollama']?.enabled ) {
-            console.log('Local AI support detected! Registering Ollama');
-            const { OllamaService } = require('./OllamaService');
-            services.registerService('ollama', OllamaService);
-        }
-
-        const { AIChatService } = require('./AIChatService');
-        services.registerService('ai-chat', AIChatService);
-
-        const { FakeChatService } = require('./FakeChatService');
-        services.registerService('fake-chat', FakeChatService);
-
-        const { AITestModeService } = require('./AITestModeService');
-        services.registerService('ai-test-mode', AITestModeService);
-
-        const { UsageLimitedChatService } = require('./UsageLimitedChatService');
-        services.registerService('usage-limited-chat', UsageLimitedChatService);
-    }
-}
-
-module.exports = {
-    PuterAIModule,
-};
--- a/src/backend/src/modules/puterai/TogetherAIService.js
+++ b/src/backend/src/modules/puterai/TogetherAIService.js
@@ -1,224 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const { PassThrough } = require('stream');
-const BaseService = require('../../services/BaseService');
-const { TypedValue } = require('../../services/drivers/meta/Runtime');
-const { nou } = require('../../util/langutil');
-const { Together } = require('together-ai');
-const OpenAIUtil = require('./lib/OpenAIUtil');
-const { Context } = require('../../util/context');
-
-/**
-* TogetherAIService class provides integration with Together AI's language models.
-* Extends BaseService to implement chat completion functionality through the
-* puter-chat-completion interface. Manages model listings, chat completions,
-* and streaming responses while handling usage tracking and model fallback testing.
-* @extends BaseService
-*/
-class TogetherAIService extends BaseService {
-    /**
-    * @type {import('../../services/MeteringService/MeteringService').MeteringService}
-    */
-    meteringService;
-    static MODULES = {
-        kv: globalThis.kv,
-        uuidv4: require('uuid').v4,
-    };
-
-    /**
-    * Initializes the TogetherAI service by setting up the API client and registering as a chat provider
-    * @async
-    * @returns {Promise<void>}
-    * @private
-    */
-    async _init () {
-        this.together = new Together({
-            apiKey: this.config.apiKey,
-        });
-        this.kvkey = this.modules.uuidv4();
-
-        const svc_aiChat = this.services.get('ai-chat');
-        svc_aiChat.register_provider({
-            service_name: this.service_name,
-            alias: true,
-        });
-        this.meteringService = this.services.get('meteringService').meteringService;
-    }
-
-    /**
-    * Returns the default model ID for the Together AI service
-    * @returns {string} The ID of the default model (meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo)
-    */
-    get_default_model () {
-        return 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo';
-    }
-
-    static IMPLEMENTS = {
-        ['puter-chat-completion']: {
-            /**
-             * Returns a list of available models and their details.
-             * See AIChatService for more information.
-             *
-             * @returns Promise<Array<Object>> Array of model details
-             */
-            async models () {
-                return await this.models_();
-            },
-
-            /**
-            * Returns a list of available model names including their aliases
-            * @returns {Promise<string[]>} Array of model identifiers and their aliases
-            * @description Retrieves all available model IDs and their aliases,
-            * flattening them into a single array of strings that can be used for model selection
-            */
-            async list () {
-                let models = this.modules.kv.get(`${this.kvkey}:models`);
-                if ( ! models ) models = await this.models_();
-                return models.map(model => model.id);
-            },
-            /**
-             * AI Chat completion method.
-             * See AIChatService for more details.
-             */
-            async complete ({ messages, stream, model }) {
-                if ( model === 'model-fallback-test-1' ) {
-                    throw new Error('Model Fallback Test 1');
-                }
-
-                /** @type {import('together-ai/streaming.mjs').Stream<import("together-ai/resources/chat/completions.mjs").ChatCompletionChunk>} */
-                const completion = await this.together.chat.completions.create({
-                    model: model ?? this.get_default_model(),
-                    messages: messages,
-                    stream,
-                });
-
-                // Metering integration
-                const actor = Context.get('actor');
-
-                const modelDetails = (await this.models_()).find(m => m.id === model || m.aliases?.include(model));
-                const modelId = modelDetails ?? this.get_default_model();
-
-                if ( stream ) {
-                    const stream = new PassThrough();
-                    const retval = new TypedValue({
-                        $: 'stream',
-                        content_type: 'application/x-ndjson',
-                        chunked: true,
-                    }, stream);
-                    (async () => {
-                        for await ( const chunk of completion ) {
-                            // DRY: same as openai
-                            if ( chunk.usage ) {
-                                // Metering: record usage for streamed chunks
-                                const trackedUsage = OpenAIUtil.extractMeteredUsage(chunk.usage);
-                                const costOverrides = {
-                                    prompt_tokens: trackedUsage.prompt_tokens * (modelDetails?.cost?.input ?? 0),
-                                    completion_tokens: trackedUsage.completion_tokens * (modelDetails?.cost?.output ?? 0),
-                                };
-                                this.meteringService.utilRecordUsageObject(trackedUsage, actor, modelId, costOverrides);
-                            }
-
-                            if ( chunk.choices.length < 1 ) continue;
-                            if ( chunk.choices[0].finish_reason ) {
-                                stream.end();
-                                break;
-                            }
-                            if ( nou(chunk.choices[0].delta.content) ) continue;
-                            const str = JSON.stringify({
-                                text: chunk.choices[0].delta.content,
-                            });
-                            stream.write(`${str }\n`);
-                        }
-                        stream.end();
-                    })();
-
-                    return {
-                        stream: true,
-                        response: retval,
-                    };
-                }
-
-                const ret = completion.choices[0];
-
-                ret.usage = {
-                    input_tokens: completion.usage.prompt_tokens,
-                    output_tokens: completion.usage.completion_tokens,
-                };
-
-                const trackedUsage = OpenAIUtil.extractMeteredUsage(completion.usage);
-                const costOverrides =  {
-                    prompt_tokens: trackedUsage.prompt_tokens * (modelDetails?.cost?.input ?? 0),
-                    completion_tokens: trackedUsage.completion_tokens * (modelDetails?.cost?.output ?? 0),
-                };
-                // Metering: record usage for non-streamed completion
-                this.meteringService.utilRecordUsageObject(completion.usage, actor, modelId, costOverrides);
-
-                return ret;
-            },
-        },
-    };
-
-    /**
-    * Fetches and caches available AI models from Together API
-    * @private
-    * @returns Array of model objects containing id, name, context length,
-    *                          description and pricing information
-    * @remarks Models are cached for 5 minutes in KV store
-    */
-    async models_ () {
-        let models = this.modules.kv.get(`${this.kvkey}:models`);
-        if ( models ) return models;
-        const api_models = await this.together.models.list();
-        models = [];
-        for ( const model of api_models ) {
-            models.push({
-                id: `togetherai:${model.id}`,
-                aliases: [model.id],
-                name: model.display_name,
-                context: model.context_length,
-                description: model.description,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: model.pricing.input,
-                    output: model.pricing.output,
-                },
-            });
-        }
-        models.push({
-            id: 'model-fallback-test-1',
-            name: 'Model Fallback Test 1',
-            context: 1000,
-            cost: {
-                currency: 'usd-cents',
-                tokens: 1_000_000,
-                input: 10,
-                output: 10,
-            },
-        });
-        this.modules.kv.set(`${this.kvkey}:models`, models, { EX: 5 * 60 });
-        return models;
-    }
-}
-
-module.exports = {
-    TogetherAIService,
-};
--- a/src/backend/src/modules/puterai/UsageLimitedChatService.js
+++ b/src/backend/src/modules/puterai/UsageLimitedChatService.js
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const { default: dedent } = require('dedent');
-const BaseService = require('../../services/BaseService');
-const { PassThrough } = require('stream');
-const Streaming = require('./lib/Streaming');
-
-/**
-* UsageLimitedChatService - A specialized chat service that returns resource exhaustion messages.
-* Extends BaseService to provide responses indicating the user has exceeded their usage limits.
-* Follows the same response format as real AI providers but with a custom message about upgrading.
-* Can handle both streaming and non-streaming requests consistently.
-*/
-class UsageLimitedChatService extends BaseService {
-    get_default_model () {
-        return 'usage-limited';
-    }
-
-    static IMPLEMENTS = {
-        ['puter-chat-completion']: {
-            /**
-            * Returns a list of available model names
-            * @returns {Promise<string[]>} Array containing the single model identifier
-            */
-            async list () {
-                return ['usage-limited'];
-            },
-
-            /**
-            * Returns model details for the usage-limited model
-            * @returns {Promise<Object[]>} Array containing the model details
-            */
-            async models () {
-                return [{
-                    id: 'usage-limited',
-                    name: 'Usage Limited',
-                    context: 16384,
-                    cost: {
-                        currency: 'usd-cents',
-                        tokens: 1_000_000,
-                        input: 0,
-                        output: 0,
-                    },
-                }];
-            },
-
-            /**
-            * Simulates a chat completion request with a usage limit message
-            * @param {Object} params - The completion parameters
-            * @param {Array} params.messages - Array of chat messages (unused)
-            * @param {boolean} params.stream - Whether to stream the response
-            * @param {string} params.model - The model to use (unused)
-            * @returns {Object|TypedValue} A chat completion response or streamed response
-            */
-            async complete ({ stream, customLimitMessage }) {
-                const limitMessage = customLimitMessage || dedent(`
-                    You have reached your AI usage limit for this account.
-                `);
-
-                // If streaming is requested, return a streaming response
-                if ( stream ) {
-                    const streamObj = new PassThrough();
-
-                    const chatStream = new Streaming.AIChatStream({
-                        stream: streamObj,
-                    });
-
-                    // Schedule the streaming response
-                    setTimeout(() => {
-                        chatStream.write({
-                            type: 'content_block_start',
-                            index: 0,
-                        });
-
-                        chatStream.write({
-                            type: 'content_block_delta',
-                            index: 0,
-                            delta: {
-                                type: 'text',
-                                text: limitMessage,
-                            },
-                        });
-
-                        chatStream.write({
-                            type: 'content_block_stop',
-                            index: 0,
-                        });
-
-                        chatStream.write({
-                            type: 'message_stop',
-                            stop_reason: 'end_turn',
-                        });
-
-                        chatStream.end();
-                    }, 10);
-
-                    return {
-                        stream: true,
-                        init_chat_stream: async ({ chatStream: cs }) => {
-                            // Copy contents from our stream to the provided one
-                            chatStream.stream.pipe(cs.stream);
-                        },
-                    };
-                }
-
-                // Non-streaming response
-                return {
-                    'index': 0,
-                    message: {
-                        'id': '00000000-0000-0000-0000-000000000000',
-                        'type': 'message',
-                        'role': 'assistant',
-                        'model': 'usage-limited',
-                        'content': [
-                            {
-                                'type': 'text',
-                                'text': limitMessage,
-                            },
-                        ],
-                        'stop_reason': 'end_turn',
-                        'stop_sequence': null,
-                        'usage': {
-                            'input_tokens': 0,
-                            'output_tokens': 1,
-                        },
-                    },
-                    'usage': {
-                        'input_tokens': 0,
-                        'output_tokens': 1,
-                    },
-                    'logprobs': null,
-                    'finish_reason': 'stop',
-                };
-            },
-        },
-    };
-}
-
-module.exports = {
-    UsageLimitedChatService,
-};
--- a/src/backend/src/modules/puterai/XAIService.js
+++ b/src/backend/src/modules/puterai/XAIService.js
@@ -1,251 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-// METADATA // {"ai-commented":{"service":"claude"}}
-const BaseService = require('../../services/BaseService');
-const { Context } = require('../../util/context');
-const OpenAIUtil = require('./lib/OpenAIUtil');
-
-/**
-* XAIService class - Provides integration with X.AI's API for chat completions
-* Extends BaseService to implement the puter-chat-completion interface.
-* Handles model management, message adaptation, streaming responses,
-* and usage tracking for X.AI's language models like Grok.
-* @extends BaseService
-*/
-class XAIService extends BaseService {
-    static MODULES = {
-        openai: require('openai'),
-    };
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
-    meteringService;
-
-    adapt_model (model) {
-        return model;
-    }
-
-    /**
-    * Initializes the XAI service by setting up the OpenAI client and registering with the AI chat provider
-    * @private
-    * @returns {Promise<void>} Resolves when initialization is complete
-    */
-    async _init () {
-        this.openai = new this.modules.openai.OpenAI({
-            apiKey: this.global_config.services.xai.apiKey,
-            baseURL: 'https://api.x.ai/v1',
-        });
-
-        const svc_aiChat = this.services.get('ai-chat');
-        svc_aiChat.register_provider({
-            service_name: this.service_name,
-            alias: true,
-        });
-        this.meteringService = this.services.get('meteringService').meteringService; // TODO DS: move to proper extensions
-    }
-
-    /**
-    * Returns the default model identifier for the XAI service
-    * @returns {string} The default model ID 'grok-beta'
-    */
-    get_default_model () {
-        return 'grok-beta';
-    }
-
-    static IMPLEMENTS = {
-        ['puter-chat-completion']: {
-            /**
-             * Returns a list of available models and their details.
-             * See AIChatService for more information.
-             *
-             * @returns Array<Object> Array of model details
-             */
-            models () {
-                return this.models_();
-            },
-            /**
-            * Returns a list of available model names including their aliases
-            * @returns {Promise<string[]>} Array of model identifiers and their aliases
-            * @description Retrieves all available model IDs and their aliases,
-            * flattening them into a single array of strings that can be used for model selection
-            */
-            async list () {
-                const models = await this.models_();
-                const model_names = [];
-                for ( const model of models ) {
-                    model_names.push(model.id);
-                    if ( model.aliases ) {
-                        model_names.push(...model.aliases);
-                    }
-                }
-                return model_names;
-            },
-
-            /**
-             * AI Chat completion method.
-             * See AIChatService for more details.
-             */
-            async complete ({ messages, stream, model, tools }) {
-                model = this.adapt_model(model);
-
-                messages = await OpenAIUtil.process_input_messages(messages);
-
-                const completion = await this.openai.chat.completions.create({
-                    messages,
-                    model: model ?? this.get_default_model(),
-                    ...(tools ? { tools } : {}),
-                    max_tokens: 1000,
-                    stream,
-                    ...(stream ? {
-                        stream_options: { include_usage: true },
-                    } : {}),
-                });
-
-                // Metering integration
-                const actor = Context.get('actor');
-
-                return OpenAIUtil.handle_completion_output({
-                    usage_calculator: ({ usage }) => {
-                        const modelDetails = this.models().find(m => m.id === model || m.aliases?.includes(model));
-                        const trackedUsage = {
-                            prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
-                            completion_tokens: usage.completion_tokens ?? 0,
-                            cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
-                        };
-
-                        this.meteringService.utilRecordUsageObject(trackedUsage, actor, `openai:${modelDetails.id}`);
-                        const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
-                            model_details: modelDetails,
-                        });
-
-                        return legacyCostCalculator({ usage });
-                    },
-                    stream,
-                    completion,
-                });
-            },
-        },
-    };
-
-    /**
-    * Retrieves available AI models and their specifications
-    * @returns Array of model objects containing:
-    *   - id: Model identifier string
-    *   - name: Human readable model name
-    *   - context: Maximum context window size
-    *   - cost: Pricing information object with currency and rates
-    * @private
-    */
-    models_ () {
-        return [
-            {
-                id: 'grok-beta',
-                name: 'Grok Beta',
-                context: 131072,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 500,
-                    output: 1500,
-                },
-            },
-            {
-                id: 'grok-vision-beta',
-                name: 'Grok Vision Beta',
-                context: 8192,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 500,
-                    output: 1500,
-                    image: 1000,
-                },
-            },
-            {
-                id: 'grok-3',
-                name: 'Grok 3',
-                context: 131072,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 300,
-                    output: 1500,
-                },
-            },
-            {
-                id: 'grok-3-fast',
-                name: 'Grok 3 Fast',
-                context: 131072,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 500,
-                    output: 2500,
-                },
-            },
-            {
-                id: 'grok-3-mini',
-                name: 'Grok 3 Mini',
-                context: 131072,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 30,
-                    output: 50,
-                },
-            },
-            {
-                id: 'grok-3-mini-fast',
-                name: 'Grok 3 Mini',
-                context: 131072,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 60,
-                    output: 400,
-                },
-            },
-            {
-                id: 'grok-2-vision',
-                name: 'Grok 2 Vision',
-                context: 8192,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 200,
-                    output: 1000,
-                },
-            },
-            {
-                id: 'grok-2',
-                name: 'Grok 2',
-                context: 131072,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 200,
-                    output: 1000,
-                },
-            },
-        ];
-    }
-}
-
-module.exports = {
-    XAIService,
-};
--- a/src/backend/src/modules/puterai/experiment/stream_claude.js
+++ b/src/backend/src/modules/puterai/experiment/stream_claude.js
@@ -1,58 +0,0 @@
-const { nou } = require('../../../util/langutil');
-const Streaming = require('../lib/Streaming');
-// const claude_sample = require('../samples/claude-1');
-const claude_sample = require('../samples/claude-tools-1');
-
-const echo_stream = {
-    write: data => {
-        console.log(data);
-    },
-};
-
-const chatStream = new Streaming.AIChatStream({ stream: echo_stream });
-
-let message;
-let contentBlock;
-for ( const event of claude_sample ) {
-    if ( event.type === 'message_start' ) {
-        message = chatStream.message();
-        continue;
-    }
-    if ( event.type === 'message_stop' ) {
-        message.end();
-        message = null;
-        continue;
-    }
-
-    if ( event.type === 'content_block_start' ) {
-        if ( event.content_block.type === 'tool_use' ) {
-            contentBlock = message.contentBlock({
-                type: event.content_block.type,
-                id: event.content_block.id,
-                name: event.content_block.name,
-            });
-            continue;
-        }
-        contentBlock = message.contentBlock({
-            type: event.content_block.type,
-        });
-        continue;
-    }
-
-    if ( event.type === 'content_block_stop' ) {
-        contentBlock.end();
-        contentBlock = null;
-        continue;
-    }
-
-    if ( event.type === 'content_block_delta' ) {
-        if ( event.delta.type === 'input_json_delta' ) {
-            contentBlock.addPartialJSON(event.delta.partial_json);
-            continue;
-        }
-        if ( event.delta.type === 'text_delta' ) {
-            contentBlock.addText(event.delta.text);
-            continue;
-        }
-    }
-}
--- a/src/backend/src/modules/puterai/experiment/stream_openai.js
+++ b/src/backend/src/modules/puterai/experiment/stream_openai.js
@@ -1,61 +0,0 @@
-const { nou } = require('../../../util/langutil');
-const FunctionCalling = require('../lib/FunctionCalling');
-const Streaming = require('../lib/Streaming');
-const openai_fish = require('../samples/openai-tools-1');
-
-const echo_stream = {
-    write: data => {
-        console.log(data);
-    },
-};
-
-const chatStream = new Streaming.AIChatStream({
-    stream: echo_stream,
-});
-
-const message = chatStream.message();
-let textblock = message.contentBlock({ type: 'text' });
-let toolblock = null;
-let mode = 'text';
-
-const tool_call_blocks = [];
-
-for ( const chunk of openai_fish ) {
-    if ( chunk.usage ) continue;
-    if ( chunk.choices.length < 1 ) continue;
-
-    const choice = chunk.choices[0];
-
-    if ( ! nou(choice.delta.content) ) {
-        if ( mode === 'tool' ) {
-            toolblock.end();
-            mode = 'text';
-            textblock = message.contentBlock({ type: 'text' });
-        }
-        textblock.addText(choice.delta.content);
-        continue;
-    }
-
-    if ( ! nou(choice.delta.tool_calls) ) {
-        if ( mode === 'text' ) {
-            mode = 'tool';
-            textblock.end();
-        }
-        for ( const tool_call of choice.delta.tool_calls ) {
-            if ( ! tool_call_blocks[tool_call.index] ) {
-                toolblock = message.contentBlock({
-                    type: 'tool_use',
-                    id: tool_call.function.name,
-                });
-                tool_call_blocks[tool_call.index] = toolblock;
-            } else {
-                toolblock = tool_call_blocks[tool_call.index];
-            }
-            toolblock.addPartialJSON(tool_call.function.arguments);
-        }
-    }
-}
-
-if ( mode === 'text' ) textblock.end();
-if ( mode === 'tool' ) toolblock.end();
-message.end();
--- a/src/backend/src/modules/puterai/lib/FunctionCalling.js
+++ b/src/backend/src/modules/puterai/lib/FunctionCalling.js
@@ -1,122 +0,0 @@
-module.exports = class FunctionCalling {
-    /**
-     * Normalizes the 'tools' object in-place.
-     *
-     * This function will accept an array of tools provided by the
-     * user, and produce a normalized object that can then be
-     * converted to the apprpriate representation for another
-     * service.
-     *
-     * We will accept conventions from either service that a user
-     * might expect to work, prioritizing the OpenAI convention
-     * when conflicting conventions are present.
-     *
-     * @param {*} tools
-     */
-    static normalize_tools_object (tools) {
-        for ( let i = 0 ; i < tools.length ; i++ ) {
-            const tool = tools[i];
-            let normalized_tool = {};
-
-            const normalize_function = fn => {
-                const normal_fn = {};
-                let parameters =
-                    fn.parameters ||
-                    fn.input_schema;
-
-                normal_fn.parameters = parameters ?? {
-                    type: 'object',
-                };
-
-                if ( parameters.properties ) {
-                    parameters = this.normalize_json_schema(parameters);
-                }
-
-                if ( fn.name ) {
-                    normal_fn.name = fn.name;
-                }
-
-                if ( fn.description ) {
-                    normal_fn.description = fn.description;
-                }
-
-                return normal_fn;
-            };
-
-            if ( tool.input_schema ) {
-                normalized_tool = {
-                    type: 'function',
-                    function: normalize_function(tool),
-                };
-            } else if ( tool.type === 'function' ) {
-                normalized_tool = {
-                    type: 'function',
-                    function: normalize_function(tool.function),
-                };
-            } else {
-                normalized_tool = {
-                    type: 'function',
-                    function: normalize_function(tool),
-                };
-            }
-
-            tools[i] = normalized_tool;
-        }
-        return tools;
-    }
-
-    static normalize_json_schema (schema) {
-        if ( ! schema ) return schema;
-
-        if ( schema.type === 'object' ) {
-            if ( ! schema.properties ) {
-                return schema;
-            }
-
-            const keys = Object.keys(schema.properties);
-            for ( const key of keys ) {
-                schema.properties[key] = this.normalize_json_schema(schema.properties[key]);
-            }
-        }
-
-        if ( schema.type === 'array' ) {
-            if ( ! schema.items ) {
-                schema.items = {};
-            } else {
-                schema.items = this.normalize_json_schema(schema.items);
-            }
-        }
-
-        return schema;
-    }
-
-    /**
-     * This function will convert a normalized tools object to the
-     * format expected by OpenAI.
-     *
-     * @param {*} tools
-     * @returns
-     */
-    static make_openai_tools (tools) {
-        return tools;
-    }
-
-    /**
-     * This function will convert a normalized tools object to the
-     * format expected by Claude.
-     *
-     * @param {*} tools
-     * @returns
-     */
-    static make_claude_tools (tools) {
-        if ( ! tools ) return undefined;
-        return tools.map(tool => {
-            const { name, description, parameters } = tool.function;
-            return {
-                name,
-                description,
-                input_schema: parameters,
-            };
-        });
-    }
-};
--- a/src/backend/src/modules/puterai/lib/Messages.js
+++ b/src/backend/src/modules/puterai/lib/Messages.js
@@ -1,186 +0,0 @@
-const { whatis } = require('../../../util/langutil');
-
-module.exports = class Messages {
-    /**
-     * Normalizes a single message into a standardized format with role and content array.
-     * Converts string messages to objects, ensures content is an array of content blocks,
-     * transforms tool_calls into tool_use content blocks, and coerces content items into objects.
-     *
-     * @param {string|Object} message - The message to normalize, either a string or message object
-     * @param {Object} params - Optional parameters including default role
-     * @returns {Object} Normalized message with role and content array
-     * @throws {Error} If message is not a string or object
-     * @throws {Error} If message has no content property and no tool_calls
-     * @throws {Error} If any content item is not a string or object
-     */
-    static normalize_single_message (message, params = {}) {
-        params = Object.assign({
-            role: 'user',
-        }, params);
-
-        if ( typeof message === 'string' ) {
-            message = {
-                content: [message],
-            };
-        }
-        if ( whatis(message) !== 'object' ) {
-            throw new Error('each message must be a string or object');
-        }
-        if ( ! message.role ) {
-            message.role = params.role;
-        }
-        if ( ! message.content ) {
-            if ( message.tool_calls ) {
-                message.content = [];
-                for ( let i = 0 ; i < message.tool_calls.length ; i++ ) {
-                    const tool_call = message.tool_calls[i];
-                    message.content.push({
-                        type: 'tool_use',
-                        id: tool_call.id,
-                        name: tool_call.function.name,
-                        input: tool_call.function.arguments,
-                    });
-                }
-                delete message.tool_calls;
-            } else {
-                throw new Error('each message must have a \'content\' property');
-            }
-        }
-        if ( whatis(message.content) !== 'array' ) {
-            message.content = [message.content];
-        }
-        // Coerce each content block into an object
-        for ( let i = 0 ; i < message.content.length ; i++ ) {
-            if ( whatis(message.content[i]) === 'string' ) {
-                message.content[i] = {
-                    type: 'text',
-                    text: message.content[i],
-                };
-            }
-            if ( whatis(message.content[i]) !== 'object' ) {
-                throw new Error('each message content item must be a string or object');
-            }
-            if ( typeof message.content[i].text === 'string' && !message.content[i].type ) {
-                message.content[i].type = 'text';
-            }
-        }
-
-        // Remove "text" properties from content blocks with type=tool_result
-        for ( let i = 0 ; i < message.content.length ; i++ ) {
-            if ( message.content[i].type !== 'tool_use' ) {
-                continue;
-            }
-            if ( message.content[i].hasOwnProperty('text') ) {
-                delete message.content[i].text;
-            }
-        }
-
-        return message;
-    }
-
-    /**
-     * Normalizes an array of messages by applying normalize_single_message to each,
-     * then splits messages with multiple content blocks into separate messages,
-     * and finally merges consecutive messages from the same role.
-     *
-     * @param {Array} messages - Array of messages to normalize
-     * @param {Object} params - Optional parameters passed to normalize_single_message
-     * @returns {Array} Normalized and merged array of messages
-     */
-    static normalize_messages (messages, params = {}) {
-        for ( let i = 0 ; i < messages.length ; i++ ) {
-            messages[i] = this.normalize_single_message(messages[i], params);
-        }
-
-        // Split messages with tool_use content into separate messages
-        // TODO: unit test this
-        messages = [...messages];
-        for ( let i = 0 ; i < messages.length ; i++ ) {
-            let message = messages[i];
-            let separated_messages = [];
-            for ( let j = 0 ; j < message.content.length ; j++ ) {
-                if ( message.content[j].type === 'tool_result' ) {
-                    separated_messages.push({
-                        ...message,
-                        content: [message.content[j]],
-                    });
-                } else {
-                    separated_messages.push({
-                        ...message,
-                        content: [message.content[j]],
-                    });
-                }
-            }
-            messages.splice(i, 1, ...separated_messages);
-        }
-
-        // If multiple messages are from the same role, merge them
-        let merged_messages = [];
-        let current_role = null;
-        for ( let i = 0 ; i < messages.length ; i++ ) {
-            if ( current_role === messages[i].role ) {
-                merged_messages[merged_messages.length - 1].content.push(...messages[i].content);
-            } else {
-                merged_messages.push(messages[i]);
-                current_role = messages[i].role;
-            }
-        }
-
-        return merged_messages;
-    }
-
-    /**
-     * Separates system messages from other messages in the array.
-     *
-     * @param {Array} messages - Array of messages to process
-     * @returns {Array} Tuple containing [system_messages, non_system_messages]
-     */
-    static extract_and_remove_system_messages (messages) {
-        let system_messages = [];
-        let new_messages = [];
-        for ( let i = 0 ; i < messages.length ; i++ ) {
-            if ( messages[i].role === 'system' ) {
-                system_messages.push(messages[i]);
-            } else {
-                new_messages.push(messages[i]);
-            }
-        }
-        return [system_messages, new_messages];
-    }
-
-    /**
-     * Extracts all text content from messages, handling various message formats.
-     * Processes strings, objects with content arrays, and nested content structures,
-     * joining all text with spaces.
-     *
-     * @param {Array} messages - Array of messages to extract text from
-     * @returns {string} Concatenated text content from all messages
-     * @throws {Error} If text content is not a string
-     */
-    static extract_text (messages) {
-        return messages.map(m => {
-            if ( whatis(m) === 'string' ) {
-                return m;
-            }
-            if ( whatis(m) !== 'object' ) {
-                return '';
-            }
-            if ( whatis(m.content) === 'array' ) {
-                return m.content.map(c => c.text).join(' ');
-            }
-            if ( whatis(m.content) === 'string' ) {
-                return m.content;
-            } else {
-                const is_text_type = m.content.type === 'text' ||
-                    !m.content.hasOwnProperty('type');
-                if ( is_text_type ) {
-                    if ( whatis(m.content.text) !== 'string' ) {
-                        throw new Error('text content must be a string');
-                    }
-                    return m.content.text;
-                }
-                return '';
-            }
-        }).join(' ');
-    }
-};
--- a/src/backend/src/modules/puterai/samples/claude-1.js
+++ b/src/backend/src/modules/puterai/samples/claude-1.js
@@ -1,65 +0,0 @@
-module.exports = [
-    {
-        type: 'message_start',
-        message: {
-            id: 'msg_01KKQeaUDpMzNovH9utP5qJc',
-            type: 'message',
-            role: 'assistant',
-            model: 'claude-3-5-sonnet-20241022',
-            content: [],
-            stop_reason: null,
-            stop_sequence: null,
-            usage: {
-                input_tokens: 82,
-                cache_creation_input_tokens: 0,
-                cache_read_input_tokens: 0,
-                output_tokens: 1,
-            },
-        },
-    },
-    {
-        type: 'content_block_start',
-        index: 0,
-        content_block: { type: 'text', text: '' },
-    },
-    {
-        type: 'content_block_delta',
-        index: 0,
-        delta: { type: 'text_delta', text: 'Some' },
-    },
-    {
-        type: 'content_block_delta',
-        index: 0,
-        delta: { type: 'text_delta', text: ' species of fish, like the electric' },
-    },
-    {
-        type: 'content_block_delta',
-        index: 0,
-        delta: {
-            type: 'text_delta',
-            text: ' eel, can generate powerful electrical',
-        },
-    },
-    {
-        type: 'content_block_delta',
-        index: 0,
-        delta: { type: 'text_delta', text: ' charges of up to 860 ' },
-    },
-    {
-        type: 'content_block_delta',
-        index: 0,
-        delta: { type: 'text_delta', text: 'volts to stun prey an' },
-    },
-    {
-        type: 'content_block_delta',
-        index: 0,
-        delta: { type: 'text_delta', text: 'd defend themselves.' },
-    },
-    { type: 'content_block_stop', index: 0 },
-    {
-        type: 'message_delta',
-        delta: { stop_reason: 'end_turn', stop_sequence: null },
-        usage: { output_tokens: 35 },
-    },
-    { type: 'message_stop' },
-];
--- a/src/backend/src/modules/puterai/samples/claude-tools-1.js
+++ b/src/backend/src/modules/puterai/samples/claude-tools-1.js
@@ -1,76 +0,0 @@
-module.exports = [
-    {
-        type: 'message_start',
-        message: {
-            id: 'msg_01GAy4THpFyFJcpxqWXBMrvx',
-            type: 'message',
-            role: 'assistant',
-            model: 'claude-3-5-sonnet-20241022',
-            content: [],
-            stop_reason: null,
-            stop_sequence: null,
-            usage: {
-                input_tokens: 458,
-                cache_creation_input_tokens: 0,
-                cache_read_input_tokens: 0,
-                output_tokens: 1,
-            },
-        },
-    },
-    {
-        type: 'content_block_start',
-        index: 0,
-        content_block: { type: 'text', text: '' },
-    },
-    {
-        type: 'content_block_delta',
-        index: 0,
-        delta: { type: 'text_delta', text: 'I' },
-    },
-    {
-        type: 'content_block_delta',
-        index: 0,
-        delta: {
-            type: 'text_delta',
-            text: "'ll check the weather in Vancouver for you.",
-        },
-    },
-    { type: 'content_block_stop', index: 0 },
-    {
-        type: 'content_block_start',
-        index: 1,
-        content_block: {
-            type: 'tool_use',
-            id: 'toolu_01E12jeyCenTtntPBk1j7rgc',
-            name: 'get_weather',
-            input: {},
-        },
-    },
-    {
-        type: 'content_block_delta',
-        index: 1,
-        delta: { type: 'input_json_delta', partial_json: '' },
-    },
-    {
-        type: 'content_block_delta',
-        index: 1,
-        delta: { type: 'input_json_delta', partial_json: '{"location"' },
-    },
-    {
-        type: 'content_block_delta',
-        index: 1,
-        delta: { type: 'input_json_delta', partial_json: ': "Van' },
-    },
-    {
-        type: 'content_block_delta',
-        index: 1,
-        delta: { type: 'input_json_delta', partial_json: 'couver"}' },
-    },
-    { type: 'content_block_stop', index: 1 },
-    {
-        type: 'message_delta',
-        delta: { stop_reason: 'tool_use', stop_sequence: null },
-        usage: { output_tokens: 64 },
-    },
-    { type: 'message_stop' },
-];
--- a/src/backend/src/modules/puterai/samples/openai-1.js
+++ b/src/backend/src/modules/puterai/samples/openai-1.js
@@ -1,46 +0,0 @@
-module.exports = [
-    {
-        id: 'chatcmpl-AvspmQTvFBBjKsFhHYhyiphFmKMY8',
-        object: 'chat.completion.chunk',
-        created: 1738358842,
-        model: 'gpt-4o-mini-2024-07-18',
-        service_tier: 'default',
-        system_fingerprint: 'fp_bd83329f63',
-        choices: [
-            {
-                index: 0,
-                delta: {
-                    role: 'assistant',
-                    content: '',
-                    refusal: null,
-                },
-                logprobs: null,
-                finish_reason: null,
-            },
-        ],
-        usage: null,
-    },
-    ...[
-        'Fish', ' are', ' diverse', ' aquatic', ' creatures', ' that', ' play',
-        ' a', ' crucial', ' role', ' in', ' marine', ' ecosystems', ' and',
-        ' human', ' diets', '.',
-    ].map(str => ({
-        id: 'chatcmpl-AvspmQTvFBBjKsFhHYhyiphFmKMY8',
-        object: 'chat.completion.chunk',
-        created: 1738358842,
-        model: 'gpt-4o-mini-2024-07-18',
-        service_tier: 'default',
-        system_fingerprint: 'fp_bd83329f63',
-        choices: [
-            {
-                index: 0,
-                delta: {
-                    content: str,
-                },
-                logprobs: null,
-                finish_reason: null,
-            },
-        ],
-        usage: null,
-    })),
-];
--- a/src/backend/src/modules/puterai/samples/openai-tools-1.js
+++ b/src/backend/src/modules/puterai/samples/openai-tools-1.js
@@ -1,102 +0,0 @@
-module.exports = [
-    {
-        id: 'chatcmpl-Avqr6AwmQoEFLXuwf1llkKknIR4Ry',
-        object: 'chat.completion.chunk',
-        created: 1738351236,
-        model: 'gpt-4o-mini-2024-07-18',
-        service_tier: 'default',
-        system_fingerprint: 'fp_72ed7ab54c',
-        choices: [
-            {
-                index: 0,
-                delta: {
-                    role: 'assistant',
-                    content: null,
-                    tool_calls: [
-                        {
-                            index: 0,
-                            id: 'call_ULl8cRKFQbYeJSIZ3giLAg6r',
-                            type: 'function',
-                            function: {
-                                name: 'get_weather',
-                                arguments: '',
-                            },
-                        },
-                    ],
-                    refusal: null,
-                },
-                logprobs: null,
-                finish_reason: null,
-            },
-        ],
-        usage: null,
-    },
-    ...[
-        '{"', 'location', '":"',
-        'V', 'ancouver',
-        '"}',
-    ].map(str => ({
-        id: 'chatcmpl-Avqr6AwmQoEFLXuwf1llkKknIR4Ry',
-        object: 'chat.completion.chunk',
-        created: 1738351236,
-        model: 'gpt-4o-mini-2024-07-18',
-        service_tier: 'default',
-        system_fingerprint: 'fp_72ed7ab54c',
-        choices: [
-            {
-                index: 0,
-                delta: {
-                    tool_calls: [
-                        {
-                            index: 0,
-                            function: {
-                                arguments: str,
-                            },
-                        },
-                    ],
-                },
-                logprobs: null,
-                finish_reason: null,
-            },
-        ],
-        usage: null,
-    })),
-    {
-        id: 'chatcmpl-Avqr6AwmQoEFLXuwf1llkKknIR4Ry',
-        object: 'chat.completion.chunk',
-        created: 1738351236,
-        model: 'gpt-4o-mini-2024-07-18',
-        service_tier: 'default',
-        system_fingerprint: 'fp_72ed7ab54c',
-        choices: [
-            {
-                index: 0,
-                delta: {},
-                logprobs: null,
-                finish_reason: 'tool_calls',
-            },
-        ],
-        usage: null,
-    },
-    {
-        id: 'chatcmpl-Avqr6AwmQoEFLXuwf1llkKknIR4Ry',
-        object: 'chat.completion.chunk',
-        created: 1738351236,
-        model: 'gpt-4o-mini-2024-07-18',
-        service_tier: 'default',
-        system_fingerprint: 'fp_72ed7ab54c',
-        choices: [],
-        usage: {
-            prompt_tokens: 62,
-            completion_tokens: 16,
-            total_tokens: 78,
-            prompt_tokens_details: { cached_tokens: 0, audio_tokens: 0 },
-            completion_tokens_details: {
-                reasoning_tokens: 0,
-                audio_tokens: 0,
-                accepted_prediction_tokens: 0,
-                rejected_prediction_tokens: 0,
-            },
-        },
-    },
-];
--- a/src/backend/src/modules/test-core/TestCoreModule.js
+++ b/src/backend/src/modules/test-core/TestCoreModule.js
@@ -1,19 +1,30 @@
-const { AnomalyService } = require('../../services/AnomalyService');
-const { GroupService } = require('../../services/auth/GroupService');
-const { PermissionService } = require('../../services/auth/PermissionService');
-const { CommandService } = require('../../services/CommandService');
-const { SqliteDatabaseAccessService } = require('../../services/database/SqliteDatabaseAccessService');
-const { DetailProviderService } = require('../../services/DetailProviderService');
-const { EventService } = require('../../services/EventService');
-const { GetUserService } = require('../../services/GetUserService');
-const { MeteringServiceWrapper } = require('../../services/MeteringService/MeteringServiceWrapper.mjs');
-const { DBKVServiceWrapper } = require('../../services/repositories/DBKVStore/index.mjs');
-const { SUService } = require('../../services/SUService');
-const { TraceService } = require('../../services/TraceService');
-const { AlarmService } = require('../core/AlarmService');
-const APIErrorService = require('../web/APIErrorService');
+import { FilesystemService } from '../../filesystem/FilesystemService';
+import { AnomalyService } from '../../services/AnomalyService';
+import { AuthService } from '../../services/auth/AuthService';
+import { GroupService } from '../../services/auth/GroupService';
+import { PermissionService } from '../../services/auth/PermissionService';
+import { TokenService } from '../../services/auth/TokenService';
+import { CommandService } from '../../services/CommandService';
+import { SqliteDatabaseAccessService } from '../../services/database/SqliteDatabaseAccessService';
+import { DetailProviderService } from '../../services/DetailProviderService';
+import { EventService } from '../../services/EventService';
+import { FeatureFlagService } from '../../services/FeatureFlagService';
+import { GetUserService } from '../../services/GetUserService';
+import { InformationService } from '../../services/information/InformationService';
+import { MeteringServiceWrapper } from '../../services/MeteringService/MeteringServiceWrapper.mjs';
+import { NotificationService } from '../../services/NotificationService';
+import { RegistrantService } from '../../services/RegistrantService';
+import { RegistryService } from '../../services/RegistryService';
+import { DBKVServiceWrapper } from '../../services/repositories/DBKVStore/index.mjs';
+import { ScriptService } from '../../services/ScriptService';
+import { SessionService } from '../../services/SessionService';
+import { SUService } from '../../services/SUService';
+import { SystemValidationService } from '../../services/SystemValidationService';
+import { TraceService } from '../../services/TraceService';
+import { AlarmService } from '../core/AlarmService';
+import APIErrorService from '../web/APIErrorService';

-class TestCoreModule {
+export class TestCoreModule {
    async install (context) {
        const services = context.get('services');
        services.registerService('whoami', DetailProviderService);
@@ -30,9 +41,16 @@ class TestCoreModule {
        services.registerService('group', GroupService);
        services.registerService('anomaly', AnomalyService);
        services.registerService('api-error', APIErrorService);
+        services.registerService('system-validation', SystemValidationService);
+        services.registerService('registry', RegistryService);
+        services.registerService('__registrant', RegistrantService);
+        services.registerService('feature-flag', FeatureFlagService);
+        services.registerService('token', TokenService);
+        services.registerService('information', InformationService);
+        services.registerService('auth', AuthService);
+        services.registerService('session', SessionService);
+        services.registerService('notification', NotificationService);
+        services.registerService('script', ScriptService);
+        services.registerService('filesystem', FilesystemService);
    }
 }
-
-module.exports = {
-    TestCoreModule,
-};
--- a/src/backend/src/om/entitystorage/BaseES.js
+++ b/src/backend/src/om/entitystorage/BaseES.js
@@ -18,7 +18,6 @@
 */
 const { AdvancedBase } = require('@heyputer/putility');
 const { WeakConstructorFeature } = require('../../traits/WeakConstructorFeature');
-const { Context } = require('../../util/context');

 /**
 * BaseES is a base class for Entity Store classes.
@@ -82,11 +81,6 @@ class BaseES extends AdvancedBase {

            this[k] = this.impl_methods[k];
        }
-
-        this.log = Context.get('services').get('log-service')
-            .create(`ES:${this.entity_name}:${this.constructor.name}`, {
-                concern: 'es',
-            });
    }

    async provide_context ( args ) {
@@ -97,9 +91,6 @@ class BaseES extends AdvancedBase {
        if ( this._on_context_provided ) {
            await this._on_context_provided(args);
        }
-
-        this.log = Context.get('services').get('log-service')
-            .create(`ES:${this.entity_name}:${this.constructor.name}`);
    }
    async read (uid) {
        let entity = await this.call_on_impl_('read', uid);
--- a/src/backend/src/om/entitystorage/SubdomainES.js
+++ b/src/backend/src/om/entitystorage/SubdomainES.js
@@ -27,59 +27,57 @@ const { BaseES } = require('./BaseES');
 const PERM_READ_ALL_SUBDOMAINS = 'read-all-subdomains';

 class SubdomainES extends BaseES {
-    static METHODS = {
-        async _on_context_provided () {
-            const services = this.context.get('services');
-            this.db = services.get('database').get(DB_READ, 'subdomains');
-        },
-        async create_predicate (id) {
-            if ( id === 'user-can-edit' ) {
-                return new Eq({
-                    key: 'owner',
-                    value: Context.get('user').id,
-                });
-            }
-        },
-        async upsert (entity, extra) {
-            if ( ! extra.old_entity ) {
-                await this._check_max_subdomains();
-            }
+    async _on_context_provided () {
+        const services = this.context.get('services');
+        this.db = services.get('database').get(DB_READ, 'subdomains');
+    }
+    async create_predicate (id) {
+        if ( id === 'user-can-edit' ) {
+            return new Eq({
+                key: 'owner',
+                value: Context.get('user').id,
+            });
+        }
+    }
+    async upsert (entity, extra) {
+        if ( ! extra.old_entity ) {
+            await this._check_max_subdomains();
+        }

-            return await this.upstream.upsert(entity, extra);
-        },
-        async select (options) {
-            const actor = Context.get('actor');
-            const user = actor.type.user;
+        return await this.upstream.upsert(entity, extra);
+    }
+    async select (options) {
+        const actor = Context.get('actor');
+        const user = actor.type.user;

-            // Note: we don't need to worry about read;
-            // non-owner users don't have permission to list
-            // but they still have permission to read.
-            const svc_permission = this.context.get('services').get('permission');
-            const has_permission_to_read_all = await svc_permission.check(Context.get('actor'), PERM_READ_ALL_SUBDOMAINS);
+        // Note: we don't need to worry about read;
+        // non-owner users don't have permission to list
+        // but they still have permission to read.
+        const svc_permission = this.context.get('services').get('permission');
+        const has_permission_to_read_all = await svc_permission.check(Context.get('actor'), PERM_READ_ALL_SUBDOMAINS);

-            if ( ! has_permission_to_read_all ) {
-                options.predicate = options.predicate.and(new Eq({
-                    key: 'owner',
-                    value: user.id,
-                }));
-            }
+        if ( ! has_permission_to_read_all ) {
+            options.predicate = options.predicate.and(new Eq({
+                key: 'owner',
+                value: user.id,
+            }));
+        }

-            return await this.upstream.select(options);
-        },
-        async _check_max_subdomains () {
-            const user = Context.get('user');
+        return await this.upstream.select(options);
+    }
+    async _check_max_subdomains () {
+        const user = Context.get('user');

-            let cnt = await this.db.read('SELECT COUNT(id) AS subdomain_count FROM subdomains WHERE user_id = ?',
-                            [user.id]);
+        let cnt = await this.db.read('SELECT COUNT(id) AS subdomain_count FROM subdomains WHERE user_id = ?',
+                        [user.id]);

-            const max_subdomains = user.max_subdomains ?? config.max_subdomains_per_user;
+        const max_subdomains = user.max_subdomains ?? config.max_subdomains_per_user;

-            if ( max_subdomains && cnt[0].subdomain_count >= max_subdomains ) {
-                throw APIError.create('subdomain_limit_reached', null, {
-                    limit: max_subdomains,
-                });
-            }
-        },
+        if ( max_subdomains && cnt[0].subdomain_count >= max_subdomains ) {
+            throw APIError.create('subdomain_limit_reached', null, {
+                limit: max_subdomains,
+            });
+        }
    };
 }

--- a/src/backend/src/routers/get-launch-apps.test.js
+++ b/src/backend/src/routers/get-launch-apps.test.js
@@ -18,7 +18,7 @@
 */

 import { describe, it, expect, beforeEach, vi } from 'vitest';
-const kvjs = require('@heyputer/kv.js');
+import { kv } from '../util/kvSingleton';
 const uuid = require('uuid');
 const proxyquire = require('proxyquire');

@@ -156,7 +156,7 @@ const get_mock_context = () => {
 };

 describe('GET /launch-apps', () => {
-    globalThis.kv = new kvjs();
+    globalThis.kv = kv;

    it('should return expected format', async () => {
        // First call
@@ -165,52 +165,6 @@ describe('GET /launch-apps', () => {
            req_mock.query = {};
            await get_launch_apps(req_mock, res_mock);

-            // TODO: bring this back, figure out what it's testing,
-            //       document why it needs to be here (if it does)
-            //       or remove it.
-            if ( false ) {
-
-                expect(res_mock.send).toHaveBeenCalledOnce();
-
-                const call = res_mock.send.mock.calls[0];
-                const response = call[0];
-                console.log('response', response);
-
-                expect(response).toBeTypeOf('object');
-
-                expect(response).toHaveProperty('recommended');
-                expect(response.recommended).toBeInstanceOf(Array);
-                expect(response.recommended).toHaveLength(apps_names_expected_to_exist.length);
-                expect(response.recommended).toEqual(
-                                data_mockapps
-                                    .filter(app => apps_names_expected_to_exist.includes(app.name))
-                                    .map(app => ({
-                                        uuid: app.uid,
-                                        name: app.name,
-                                        title: app.title,
-                                        icon: app.icon,
-                                        godmode: app.godmode,
-                                        maximize_on_start: app.maximize_on_start,
-                                        index_url: app.index_url,
-                                    })));
-
-                expect(response).toHaveProperty('recent');
-                expect(response.recent).toBeInstanceOf(Array);
-                expect(response.recent).toHaveLength(data_appopens.length);
-                expect(response.recent).toEqual(
-                                data_mockapps
-                                    .filter(app => data_appopens.map(app_open => app_open.app_uid).includes(app.uid))
-                                    .map(app => ({
-                                        uuid: app.uid,
-                                        name: app.name,
-                                        title: app.title,
-                                        icon: app.icon,
-                                        godmode: app.godmode,
-                                        maximize_on_start: app.maximize_on_start,
-                                        index_url: app.index_url,
-                                    })));
-            }
-
            // << HOW TO FIX >>
            // If you updated the list of recommended apps,
            // you can simply update this number to match the new length
--- a/src/backend/src/services/BaseService.d.ts
+++ b/src/backend/src/services/BaseService.d.ts
@@ -8,11 +8,11 @@ export interface ServiceResources {

 export type EventHandler = (id: string, ...args: any[]) => any;

-export type Logger = {
+export interface Logger {
    debug: (...args: any[]) => any;
    info: (...args: any[]) => any;
    [key: string]: any;
-};
+}

 export class BaseService {
    constructor (service_resources: ServiceResources, ...a: any[]);
@@ -26,7 +26,7 @@ export class BaseService {
    log: Logger;
    errors: any;

-    as(interfaceName: string): Record<string, unknown>;
+    as (interfaceName: string): Record<string, unknown>;

    run_as_early_as_possible (): Promise<void>;
    construct (): Promise<void>;
--- a/src/backend/src/services/ChatAPIService.js
+++ b/src/backend/src/services/ChatAPIService.js
@@ -75,7 +75,7 @@ class ChatAPIService extends BaseService {
                    const models = await svc_su.sudo(async () => {
                        const svc_aiChat = this.services.get('ai-chat');
                        // Return the simple model list which contains basic model information
-                        return svc_aiChat.simple_model_list;
+                        return svc_aiChat.list();
                    });

                    // Return the list of models
@@ -98,7 +98,7 @@ class ChatAPIService extends BaseService {
                    const models = await svc_su.sudo(async () => {
                        const svc_aiChat = this.services.get('ai-chat');
                        // Return the detailed model list which includes cost and capability information
-                        return svc_aiChat.detail_model_list;
+                        return svc_aiChat.models();
                    });

                    // Return the detailed list of models
--- a/src/backend/src/services/ChatAPIService.test.js
+++ b/src/backend/src/services/ChatAPIService.test.js
@@ -51,8 +51,8 @@ describe('ChatAPIService', () => {
    beforeEach(() => {
        // Mock AIChatService
        mockAIChatService = {
-            simple_model_list: ['model1', 'model2'],
-            detail_model_list: [
+            list: () => ['model1', 'model2'],
+            models: () => [
                { id: 'model1', name: 'Model 1', cost: { input: 1, output: 2 } },
                { id: 'model2', name: 'Model 2', cost: { input: 3, output: 4 } },
            ],
@@ -159,7 +159,7 @@ describe('ChatAPIService', () => {
            // Verify
            expect(mockSUService.sudo).toHaveBeenCalled();
            expect(mockRes.json).toHaveBeenCalledWith({
-                models: mockAIChatService.simple_model_list,
+                models: mockAIChatService.list(),
            });
        });
    });
@@ -179,7 +179,7 @@ describe('ChatAPIService', () => {
            // Verify
            expect(mockSUService.sudo).toHaveBeenCalled();
            expect(mockRes.json).toHaveBeenCalledWith({
-                models: mockAIChatService.detail_model_list,
+                models: mockAIChatService.models(),
            });
        });
    });
--- a/src/backend/src/services/MeteringService/MeteringService.ts
+++ b/src/backend/src/services/MeteringService/MeteringService.ts
@@ -514,7 +514,7 @@ export class MeteringService {
        const currentMonth = this.#getMonthYearString();
        const keyPrefix = `${METRICS_PREFIX}:puter:`;
        return this.#superUserService.sudo(async () => {
-            const keys = [];
+            const keys: string[] = [];
            for ( let shard = 0; shard < MeteringService.GLOBAL_SHARD_COUNT; shard++ ) {
                keys.push(`${keyPrefix}${shard}:${currentMonth}`);
            }
--- a/src/backend/src/services/MeteringService/costMaps/fileSystemCostMap.ts
+++ b/src/backend/src/services/MeteringService/costMaps/fileSystemCostMap.ts
@@ -1,4 +1,4 @@
-import { toMicroCents } from '../utils';
+import { toMicroCents } from '../utils.js';

 export const FILE_SYSTEM_COST_MAP = {
    'filesystem:ingress:bytes': 0,
--- a/src/backend/src/services/MeteringService/costMaps/index.ts
+++ b/src/backend/src/services/MeteringService/costMaps/index.ts
@@ -1,19 +1,19 @@
-import { AWS_POLLY_COST_MAP } from './awsPollyCostMap';
-import { AWS_TEXTRACT_COST_MAP } from './awsTextractCostMap';
-import { CLAUDE_COST_MAP } from './claudeCostMap';
-import { DEEPSEEK_COST_MAP } from './deepSeekCostMap';
-import { FILE_SYSTEM_COST_MAP } from './fileSystemCostMap';
-import { GEMINI_COST_MAP } from './geminiCostMap';
-import { GROQ_COST_MAP } from './groqCostMap';
-import { KV_COST_MAP } from './kvCostMap';
-import { MISTRAL_COST_MAP } from './mistralCostMap';
-import { OPENAI_COST_MAP } from './openAiCostMap';
-import { OPENAI_IMAGE_COST_MAP } from './openaiImageCostMap';
-import { OPENROUTER_COST_MAP } from './openrouterCostMap';
-import { OPENAI_VIDEO_COST_MAP } from './openaiVideoCostMap';
-import { TOGETHER_COST_MAP } from './togetherCostMap';
-import { XAI_COST_MAP } from './xaiCostMap';
-import { ELEVENLABS_COST_MAP } from './elevenlabsCostMap';
+import { AWS_POLLY_COST_MAP } from './awsPollyCostMap.js';
+import { AWS_TEXTRACT_COST_MAP } from './awsTextractCostMap.js';
+import { CLAUDE_COST_MAP } from './claudeCostMap.js';
+import { DEEPSEEK_COST_MAP } from './deepSeekCostMap.js';
+import { FILE_SYSTEM_COST_MAP } from './fileSystemCostMap.js';
+import { GEMINI_COST_MAP } from './geminiCostMap.js';
+import { GROQ_COST_MAP } from './groqCostMap.js';
+import { KV_COST_MAP } from './kvCostMap.js';
+import { MISTRAL_COST_MAP } from './mistralCostMap.js';
+import { OPENAI_COST_MAP } from './openAiCostMap.js';
+import { OPENAI_IMAGE_COST_MAP } from './openaiImageCostMap.js';
+import { OPENROUTER_COST_MAP } from './openrouterCostMap.js';
+import { OPENAI_VIDEO_COST_MAP } from './openaiVideoCostMap.js';
+import { TOGETHER_COST_MAP } from './togetherCostMap.js';
+import { XAI_COST_MAP } from './xaiCostMap.js';
+import { ELEVENLABS_COST_MAP } from './elevenlabsCostMap.js';

 export const COST_MAPS = {
    ...AWS_POLLY_COST_MAP,
--- a/src/backend/src/services/MeteringService/costMaps/openaiImageCostMap.ts
+++ b/src/backend/src/services/MeteringService/costMaps/openaiImageCostMap.ts
@@ -3,7 +3,7 @@
 // All costs are in microcents (1/1,000,000th of a cent). Example: 1,000,000 microcents = $0.01 USD.//
 // Naming pattern: "openai:{model}:{size}" or "openai:{model}:hd:{size}" for HD images

-import { toMicroCents } from '../utils';
+import { toMicroCents } from '../utils.js';

 export const OPENAI_IMAGE_COST_MAP = {
    // DALL-E 3
--- a/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts
+++ b/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts
@@ -1,4 +1,4 @@
-import { toMicroCents } from '../utils';
+import { toMicroCents } from '../utils.js';

 // Prices are per generated video-second.
 export const OPENAI_VIDEO_COST_MAP = {
--- a/src/backend/src/services/MeteringService/subPolicies/index.ts
+++ b/src/backend/src/services/MeteringService/subPolicies/index.ts
@@ -1,5 +1,5 @@
-import { REGISTERED_USER_FREE } from './registeredUserFreePolicy';
-import { TEMP_USER_FREE } from './tempUserFreePolicy';
+import { REGISTERED_USER_FREE } from './registeredUserFreePolicy.js';
+import { TEMP_USER_FREE } from './tempUserFreePolicy.js';

 export const SUB_POLICIES = [
    TEMP_USER_FREE,
--- a/src/backend/src/services/MeteringService/subPolicies/registeredUserFreePolicy.ts
+++ b/src/backend/src/services/MeteringService/subPolicies/registeredUserFreePolicy.ts
@@ -1,4 +1,4 @@
-import { toMicroCents } from '../utils';
+import { toMicroCents } from '../utils.js';

 export const REGISTERED_USER_FREE = {
    id: 'user_free',
--- a/src/backend/src/services/MeteringService/subPolicies/tempUserFreePolicy.ts
+++ b/src/backend/src/services/MeteringService/subPolicies/tempUserFreePolicy.ts
@@ -1,4 +1,4 @@
-import { toMicroCents } from '../utils';
+import { toMicroCents } from '../utils.js';

 export const TEMP_USER_FREE = {
    id: 'temp_free',
--- a/src/backend/src/services/RegistryService.js
+++ b/src/backend/src/services/RegistryService.js
@@ -19,6 +19,8 @@
 */
 const { AdvancedBase } = require('@heyputer/putility');
 const BaseService = require('./BaseService');
+const { kv } = require('../util/kvSingleton');
+const uuidv4 = require('uuid').v4;

 /**
 * @class MapCollection
@@ -29,10 +31,6 @@ const BaseService = require('./BaseService');
 * This class provides methods for basic CRUD operations (create, read, update, delete) on the key-value pairs, as well as methods for checking the existence of a key and retrieving all keys in the collection.
 */
 class MapCollection extends AdvancedBase {
-    static MODULES = {
-        kv: globalThis.kv,
-        uuidv4: require('uuid').v4,
-    };
    /**
    * @method MapCollection#_mk_key
    * @description Creates a unique key for the map collection.
@@ -43,7 +41,7 @@ class MapCollection extends AdvancedBase {
        super();
        // We use kvjs instead of a plain object because it doesn't
        // have a limit on the number of keys it can store.
-        this.map_id = this.modules.uuidv4();
+        this.map_id = uuidv4();
        this.kv = kv;
    }

--- a/src/backend/src/services/RegistryService.test.ts
+++ b/src/backend/src/services/RegistryService.test.ts
@@ -5,21 +5,21 @@ import { RegistryService } from './RegistryService';
 describe('RegistryService', async () => {
    // Initialize globalThis.kv for testing
    beforeAll(() => {
-        if (!globalThis.kv) {
+        if ( ! globalThis.kv ) {
            globalThis.kv = new Map();
-            globalThis.kv.set = function(key, value) {
+            globalThis.kv.set = function (key, value) {
                return Map.prototype.set.call(this, key, value);
            };
-            globalThis.kv.get = function(key) {
+            globalThis.kv.get = function (key) {
                return Map.prototype.get.call(this, key);
            };
-            globalThis.kv.exists = function(key) {
+            globalThis.kv.exists = function (key) {
                return this.has(key);
            };
-            globalThis.kv.del = function(key) {
+            globalThis.kv.del = function (key) {
                return this.delete(key);
            };
-            globalThis.kv.keys = function(pattern) {
+            globalThis.kv.keys = function (pattern) {
                const prefix = pattern.replace('*', '');
                return Array.from(this.keys()).filter(k => k.startsWith(prefix));
            };
@@ -72,27 +72,27 @@ describe('RegistryService', async () => {
    it('should allow checking existence in collection', () => {
        const collection = registryService.register_collection('exists-collection');
        collection.set('existing-key', 'value');
-        expect(collection.exists('existing-key')).toBe(true);
-        expect(collection.exists('non-existing-key')).toBe(false);
+        expect(collection.exists('existing-key')).toBeTruthy();
+        expect(collection.exists('non-existing-key')).toBeFalsy();
    });

-    it('should allow deleting from collection', () => {
+    it('should allow deleting from collection', async () => {
        const collection = registryService.register_collection('delete-collection');
        collection.set('delete-key', 'value');
-        expect(collection.exists('delete-key')).toBe(true);
+        const res =  collection.exists('delete-key');
+        expect(collection.exists('delete-key')).toBeTruthy();
        collection.del('delete-key');
-        expect(collection.exists('delete-key')).toBe(false);
+        expect(collection.exists('delete-key')).toBeFalsy();
    });

    it('should support multiple independent collections', () => {
        const collection1 = registryService.register_collection('coll1');
        const collection2 = registryService.register_collection('coll2');
-        
+
        collection1.set('key', 'value1');
        collection2.set('key', 'value2');
-        
+
        expect(collection1.get('key')).toBe('value1');
        expect(collection2.get('key')).toBe('value2');
    });
 });
-
--- a/src/backend/src/services/SUService.d.ts
+++ b/src/backend/src/services/SUService.d.ts
@@ -1,8 +0,0 @@
-import type { Actor } from './auth/Actor';
-
-export class SUService {
-    _construct (): void;
-    get_system_actor (): Promise<Actor>;
-    sudo<T>(callback: () => Promise<T>): Promise<T>;
-    sudo<T>(actorOrCallback: Actor, callback: () => Promise<T>): Promise<T>;
-}
--- a/src/backend/src/services/SUService.js
+++ b/src/backend/src/services/SUService.js
@@ -18,10 +18,10 @@
 */

 // METADATA // {"ai-commented":{"service":"openai-completion","model":"gpt-4o-mini"}}
-const { Context } = require('../util/context');
-const { TeePromise } = require('@heyputer/putility').libs.promise;
-const { Actor, UserActorType } = require('./auth/Actor');
-const BaseService = require('./BaseService');
+import { TeePromise } from '@heyputer/putility/src/libs/promise.js';
+import { Context } from '../util/context.js';
+import { Actor, UserActorType } from './auth/Actor.js';
+import BaseService from './BaseService.js';

 /**
 * "SUS"-Service (Super-User Service)
@@ -33,7 +33,7 @@ const BaseService = require('./BaseService');
 * instances, providing methods to retrieve the system actor
 * and perform actions with elevated privileges.
 */
-class SUService extends BaseService {
+export class SUService extends BaseService {
    /**
    * Initializes the SUService instance, creating promises for system user
    * and system actor. This method does not take any parameters and does
@@ -110,8 +110,4 @@ class SUService extends BaseService {
            user: actor.type.user,
        }).arun(callback);
    }
-}
-
-module.exports = {
-    SUService,
-};
+}
--- a/src/backend/src/services/User.d.ts
+++ b/src/backend/src/services/User.d.ts
@@ -4,7 +4,7 @@ export interface IUser {
    id: number,
    uuid: string,
    username: string,
-    email: string,
+    email?: string,
    subscription?: (typeof SUB_POLICIES)[number]['id'],
    metadata?: Record<string, unknown> & { hasDevAccountAccess?: boolean }
 }
--- a/src/backend/src/modules/puterai/AIInterfaceService.js
+++ b/src/backend/src/modules/puterai/AIInterfaceService.js
@@ -18,7 +18,7 @@
 */

 // METADATA // {"ai-commented":{"service":"claude"}}
-const BaseService = require('../../services/BaseService');
+const BaseService = require('../BaseService');

 /**
 * Service class that manages AI interface registrations and configurations.
--- a/src/backend/src/modules/puterai/README.md
+++ b/src/backend/src/modules/puterai/README.md
--- a/src/backend/src/services/ai/chat/.gitignore
+++ b/src/backend/src/services/ai/chat/.gitignore
@@ -0,0 +1,2 @@
+*.js
+*.js.map
--- a/src/backend/src/services/ai/chat/AIChatService.ts
+++ b/src/backend/src/services/ai/chat/AIChatService.ts
@@ -0,0 +1,652 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+// METADATA // {"ai-commented":{"service":"claude"}}
+import { createId as cuid2 } from '@paralleldrive/cuid2';
+import { PassThrough } from 'stream';
+import { APIError } from '../../../api/APIError.js';
+import { ErrorService } from '../../../modules/core/ErrorService.js';
+import { Context } from '../../../util/context.js';
+import { kv } from '../../../util/kvSingleton.js';
+import BaseService from '../../BaseService.js';
+import { BaseDatabaseAccessService } from '../../database/BaseDatabaseAccessService.js';
+import { DB_WRITE } from '../../database/consts.js';
+import { DriverService } from '../../drivers/DriverService.js';
+import { TypedValue } from '../../drivers/meta/Runtime.js';
+import { EventService } from '../../EventService.js';
+import { MeteringService } from '../../MeteringService/MeteringService.js';
+import { AsModeration } from '../moderation/AsModeration.js';
+import { normalize_tools_object } from '../utils/FunctionCalling.js';
+import { extract_text, normalize_messages, normalize_single_message } from '../utils/Messages.js';
+import Streaming from '../utils/Streaming.js';
+import { ClaudeProvider } from './providers/ClaudeProvider/ClaudeProvider.js';
+import { FakeChatProvider } from './providers/FakeChatProvider.js';
+import { GeminiChatProvider } from './providers/GeminiProvider/GeminiChatProvider.js';
+import { GroqAIProvider } from './providers/GroqAiProvider/GroqAIProvider.js';
+import { MistralAIProvider } from './providers/MistralAiProvider/MistralAiProvider.js';
+import { OpenAiChatProvider } from './providers/OpenAiProvider/OpenAiChatProvider.js';
+import { IChatModel, IChatProvider, ICompleteArguments } from './providers/types.js';
+import { UsageLimitedChatProvider } from './providers/UsageLimitedChatProvider.js';
+import { OllamaChatProvider } from './providers/OllamaProvider.js';
+import { DeepSeekProvider } from './providers/DeepSeekProvider/DeepSeekProvider.js';
+import { XAIProvider } from './providers/XAIProvider/XAIProvider.js';
+import { TogetherAIProvider } from './providers/TogetherAiProvider/TogetherAIProvider.js';
+import { OpenRouterProvider } from './providers/OpenRouterProvider/OpenRouterProvider.js';
+
+// Maximum number of fallback attempts when a model fails, including the first attempt
+const MAX_FALLBACKS = 3 + 1; // includes first attempt
+
+export class AIChatService extends BaseService {
+
+    static SERVICE_NAME = 'ai-chat';
+
+    static DEFAULT_PROVIDER = 'openai-completion';
+
+    get meteringService (): MeteringService {
+        return this.services.get('meteringService').meteringService;
+    }
+
+    get db (): BaseDatabaseAccessService {
+        return this.services.get('database').get(DB_WRITE, 'ai-service');
+    }
+
+    get errorService (): ErrorService {
+        return this.services.get('error-service');
+    }
+
+    get eventService (): EventService {
+        return this.services.get('event');
+    }
+
+    get driverService (): DriverService {
+        return this.services.get('driver');
+    }
+
+    getProvider (name: string): IChatProvider | undefined {
+        return this.#providers[name];
+    }
+
+    #providers: Record<string, IChatProvider> = {};
+    #modelIdMap: Record<string, IChatModel[]> = {};
+
+    /** Driver interfaces */
+    static IMPLEMENTS = {
+        ['driver-capabilities']: {
+            supports_test_mode (iface: string, method_name: string) {
+                return iface === 'puter-chat-completion' &&
+                    method_name === 'complete';
+            },
+        },
+        ['puter-chat-completion']: {
+
+            async models () {
+                return await (this as unknown as AIChatService).models();
+            },
+
+            async list () {
+                return await (this as unknown as AIChatService).list();
+            },
+
+            async complete (...parameters: Parameters<AIChatService['complete']>) {
+                return await (this as unknown as AIChatService).complete(...parameters);
+            },
+        },
+    };
+
+    getModel ({ modelId, provider}: { modelId: string, provider?: string }) {
+        const models = this.#modelIdMap[modelId];
+
+        if ( ! models ) {
+            throw new Error(`Model not found, please try one of the following models: ${ Object.keys(this.#modelIdMap).join(', ')}`);
+        }
+        if ( ! provider ) {
+            return models[0];
+        }
+        const model = models.find(m => m.provider === provider);
+        return model ?? models[0];
+    }
+
+    private async registerProviders () {
+        const claudeConfig =  this.config.providers?.['claude'] || this.global_config?.services?.['claude'];
+        if ( claudeConfig && claudeConfig.apiKey ) {
+            this.#providers['claude'] = new ClaudeProvider(this.meteringService, claudeConfig, this.errorService);
+        }
+        const openAiConfig = this.config.providers?.['openai-completion'] || this.global_config?.services?.['openai-completion'] || this.global_config?.openai;
+        if ( openAiConfig && (openAiConfig.apiKey || openAiConfig.secret_key) ) {
+            this.#providers['openai-completion'] = new OpenAiChatProvider(this.meteringService, openAiConfig);
+        }
+        const geminiConfig = this.config.providers?.['gemini'] || this.global_config?.services?.['gemini'];
+        if ( geminiConfig && geminiConfig.apiKey ) {
+            this.#providers['gemini'] = new GeminiChatProvider(this.meteringService, geminiConfig);
+        }
+        const groqConfig = this.config.providers?.['groq'] || this.global_config?.services?.['groq'];
+        if ( groqConfig && groqConfig.apiKey ) {
+            this.#providers['groq'] = new GroqAIProvider(groqConfig, this.meteringService);
+        }
+        const deepSeekConfig = this.config.providers?.['deepseek'] || this.global_config?.services?.['deepseek'];
+        if ( deepSeekConfig && deepSeekConfig.apiKey ) {
+            this.#providers['deepseek'] = new DeepSeekProvider(deepSeekConfig, this.meteringService);
+        }
+        const mistralConfig = this.config.providers?.['mistral'] || this.global_config?.services?.['mistral'];
+        if ( mistralConfig && mistralConfig.apiKey ) {
+            this.#providers['mistral'] = new MistralAIProvider(mistralConfig, this.meteringService);
+        }
+        const xaiConfig = this.config.providers?.['xai'] || this.global_config?.services?.['xai'];
+        if ( xaiConfig && xaiConfig.apiKey ) {
+            this.#providers['xai'] = new XAIProvider(xaiConfig, this.meteringService);
+        }
+        const togetherConfig = this.config.providers?.['together-ai'] || this.global_config?.services?.['together-ai'];
+        if ( togetherConfig && togetherConfig.apiKey ) {
+            this.#providers['together-ai'] = new TogetherAIProvider(togetherConfig, this.meteringService);
+        }
+        const openrouterConfig = this.config.providers?.['openrouter'] || this.global_config?.services?.['openrouter'];
+        if ( openrouterConfig && openrouterConfig.apiKey ) {
+            this.#providers['openrouter'] = new OpenRouterProvider(openrouterConfig, this.meteringService);
+        }
+
+        // ollama if local instance detected
+
+        // Autodiscover Ollama service and then check if its disabled in the config
+        // if config.services.ollama.enabled is undefined, it means the user hasn't set it, so we should default to true
+        const ollamaConfig = this.config.providers?.['ollama'] || this.global_config?.services?.ollama;
+        const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(_data => {
+            if ( ollamaConfig?.enabled === undefined ) {
+                return true;
+            }
+            return ollamaConfig?.enabled;
+        }).catch(_err => {
+            return false;
+        });
+        // User can disable ollama in the config, but by default it should be enabled if discovery is successful
+        if ( ollama_available || ollamaConfig?.enabled ) {
+            console.log('Local AI support detected! Registering Ollama');
+            this.#providers['ollama'] = new OllamaChatProvider(ollamaConfig, this.meteringService);
+        }
+
+        // fake and usage-limited providers last
+        this.#providers['fake-chat'] = new FakeChatProvider();
+        this.#providers['usage-limited-chat'] = new UsageLimitedChatProvider();
+
+        // emit event for extensions to add providers
+        const extensionProviders = {} as Record<string, IChatProvider>;
+        await this.eventService.emit('ai.chat.registerProviders', extensionProviders);
+        for ( const providerName in extensionProviders ) {
+            if ( this.#providers[providerName] ) {
+                console.warn('AIChatService: provider name conflict for ', providerName, ' registering with -extension suffix');
+                this.#providers[`${providerName}-extension`] = extensionProviders[providerName];
+                continue;
+            }
+            this.#providers[providerName] = extensionProviders[providerName];
+        }
+    }
+
+    protected async '__on_boot.consolidation' () {
+        // register chat providers here
+        await this.registerProviders();
+
+        // build model id map
+        for ( const providerName in this.#providers ) {
+            const provider = this.#providers[providerName];
+
+            // alias all driver requests to go here to support legacy routing
+            this.driverService.register_service_alias(AIChatService.SERVICE_NAME,
+                            providerName,
+                            { iface: 'puter-chat-completion' });
+
+            // build model id map
+            for ( const model of await provider.models() ) {
+                model.id = model.id.trim().toLowerCase();
+                if ( ! this.#modelIdMap[model.id] ) {
+                    this.#modelIdMap[model.id] = [];
+                }
+                this.#modelIdMap[model.id].push({ ...model, provider: providerName });
+                if ( model.aliases ) {
+                    for ( let alias of model.aliases ) {
+                        alias = alias.trim().toLowerCase();
+                        // join arrays which are aliased the same
+                        if ( ! this.#modelIdMap[alias] ) {
+                            this.#modelIdMap[alias] = this.#modelIdMap[model.id];
+                            continue;
+                        }
+                        if ( this.#modelIdMap[alias] !== this.#modelIdMap[model.id] ) {
+                            this.#modelIdMap[alias].push({ ...model, provider: providerName });
+                            this.#modelIdMap[model.id] = this.#modelIdMap[alias];
+                            continue;
+                        }
+                    }
+                }
+                this.#modelIdMap[model.id].sort((a, b) => {
+                    if ( a.costs[a.input_cost_key || 'input_tokens'] === b.costs[b.input_cost_key || 'input_tokens'] ) {
+                        return a.id.length - b.id.length; // use shorter id since its likely the official one
+                    }
+                    return a.costs[a.input_cost_key || 'input_tokens'] - b.costs[b.input_cost_key || 'input_tokens'];
+                });
+            }
+        }
+    }
+
+    models () {
+        const seen = new Set<string>();
+        return Object.entries(this.#modelIdMap)
+            .map(([_, models]) => models)
+            .flat()
+            .filter(model => {
+                if ( seen.has(model.id) ) {
+                    return false;
+                }
+                seen.add(model.id);
+                return true;
+            })
+            .sort((a, b) => {
+                if ( a.provider === b.provider ) {
+                    return a.id.localeCompare(b.id);
+                }
+                return a.provider!.localeCompare(b.provider!);
+            });
+    }
+
+    list () {
+        return this.models().map(m => m.id).sort();
+    }
+
+    async complete (parameters: ICompleteArguments) {
+        const clientDriverCall = Context.get('client_driver_call');
+        let { test_mode: testMode, response_metadata: resMetadata, intended_service: legacyProviderName } = clientDriverCall as { test_mode?: boolean; response_metadata: Record<string, unknown>; intended_service?: string };
+        const actor = Context.get('actor');
+
+        let intendedProvider = parameters.provider || legacyProviderName === AIChatService.SERVICE_NAME ? '' : legacyProviderName ; // should now all go through here
+
+        if ( !parameters.model && !intendedProvider ) {
+            intendedProvider = AIChatService.DEFAULT_PROVIDER;
+        }
+        if ( !parameters.model && intendedProvider ) {
+            parameters.model = this.#providers[intendedProvider].getDefaultModel();
+        }
+        let model = this.getModel({ modelId: parameters.model, provider: intendedProvider }) || this.getFallbackModel(parameters.model, [], []);
+        const abuseModel = this.getModel({ modelId: 'abuse' });
+        const usageLimitedModel = this.getModel({ modelId: 'usage-limited' });
+
+        const completionId = cuid2();
+        const event = {
+            actor,
+            completionId,
+            allow: true,
+            intended_service: intendedProvider || '',
+            parameters,
+        } as Record<string, unknown>;
+        await this.eventService.emit('ai.prompt.validate', event);
+        if ( ! event.allow ) {
+            testMode = true;
+            if ( event.custom ) parameters.custom = event.custom;
+        }
+
+        if ( parameters.messages ) {
+            parameters.messages =
+                normalize_messages(parameters.messages);
+        }
+
+        // Skip moderation for Ollama (local service) and other local services
+        const should_moderate = !testMode &&
+            parameters.provider !== 'ollama';
+
+        if ( should_moderate && !await this.moderate(parameters) ) {
+            testMode = true;
+            throw APIError.create('moderation_failed');
+        }
+
+        // Only set moderated flag if we actually ran moderation
+        if ( !testMode && should_moderate ) {
+            Context.set('moderated', true);
+        }
+
+        if ( testMode ) {
+            if ( event.abuse ) {
+                model = abuseModel;
+            }
+        }
+
+        if ( parameters.tools ) {
+            normalize_tools_object(parameters.tools);
+        }
+
+        if ( ! model ) {
+            // TODO DS: route them to new endpoints once ready
+            const availableModelsUrl = `${this.global_config.origin }/puterai/chat/models`;
+
+            throw APIError.create('field_invalid', undefined, {
+                key: 'model',
+                expected: `a valid model name from ${availableModelsUrl}`,
+                got: model,
+            });
+        }
+
+        const inputTokenCost = model.costs[model.input_cost_key || 'input_tokens'] as number;
+        const outputTokenCost =  model.costs[model.output_cost_key || 'output_tokens'] as number;
+        const maxTokens = model.max_tokens;
+        const text = extract_text(parameters.messages);
+        const approximateTokenCount = Math.floor(((text.length / 4) + (text.split(/\s+/).length * (4 / 3))) / 2); // see https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
+        const approximateInputCost = approximateTokenCount * inputTokenCost;
+        const usageAllowed = await this.meteringService.hasEnoughCredits(actor, approximateInputCost);
+
+        // Handle usage limits reached case
+        if ( ! usageAllowed ) {
+            model = usageLimitedModel;
+        }
+
+        const availableCredits = await this.meteringService.getRemainingUsage(actor);
+        const maxAllowedOutput =
+            availableCredits - approximateInputCost;
+
+        const maxAllowedOutputTokens =
+            maxAllowedOutput / outputTokenCost;
+
+        if ( maxAllowedOutputTokens ) {
+            parameters.max_tokens = Math.floor(Math.min(parameters.max_tokens ?? Number.POSITIVE_INFINITY,
+                            maxAllowedOutputTokens,
+                            maxTokens - approximateTokenCount));
+            if ( parameters.max_tokens < 1 ) {
+                parameters.max_tokens = undefined;
+            }
+        }
+
+        // call model provider;
+        let res: Awaited<ReturnType<IChatProvider['complete']>>;
+        const provider = this.#providers[model.provider!];
+        if ( ! provider ) {
+            throw new Error(`no provider found for model ${model.id}`);
+        }
+        try {
+            res = await provider.complete({
+                ...parameters,
+                model: model.id,
+                provider: model.provider,
+            });
+        } catch (e) {
+            const tried: string[] = [];
+            const triedProviders: string[] = [];
+
+            tried.push(model.id);
+            triedProviders.push(model.provider!);
+
+            let error = e as Error;
+
+            while ( error ) {
+
+                // TODO: simplify our error handling
+                // Distinguishing between user errors and service errors
+                // is very messy because of different conventions between
+                // services. This is a best-effort attempt to catch user
+                // errors and throw them as 400s.
+                const isRequestError = (() => {
+                    if ( error instanceof APIError ) {
+                        return true;
+                    }
+                    if ( (error as unknown as { type: string }).type === 'invalid_request_error' ) {
+                        return true;
+                    }
+                })();
+
+                if ( isRequestError ) {
+                    console.error((error as Error));
+                    throw APIError.create('error_400_from_delegate', error as Error, {
+                        delegate: model.provider,
+                        message: (error as Error).message,
+                    });
+                }
+
+                if ( this.config.disable_fallback_mechanisms ) {
+                    console.error((error as Error));
+                    throw error;
+                }
+
+                console.error('error calling ai chat provider for model: ', model, '\n trying fallbacks...');
+
+                // No fallbacks for pseudo-models
+                if ( model.provider === 'fake-chat' ) {
+                    break;
+                }
+
+                const fallback = this.getFallbackModel(model.id, tried, triedProviders);
+
+                if ( ! fallback ) {
+                    throw new Error('no fallback model available');
+                }
+
+                const {
+                    fallbackModelId,
+                    fallbackProvider,
+                } = fallback;
+
+                console.warn('model fallback', {
+                    fallbackModelId,
+                    fallbackProvider,
+                });
+
+                let fallBackModel = this.getModel({ modelId: fallbackModelId, provider: fallbackProvider });
+
+                const fallbackUsageAllowed = await this.meteringService.hasEnoughCredits(actor, 1); // we checked earlier, assume same costs
+
+                if ( ! fallbackUsageAllowed ) {
+                    fallBackModel = usageLimitedModel;
+                }
+
+                const provider = this.#providers[fallBackModel.provider!];
+                if ( ! provider ) {
+                    throw new Error(`no provider found for model ${fallBackModel.id}`);
+                }
+                try {
+                    res = await provider.complete({
+                        ...parameters,
+                        model: fallBackModel.id,
+                        provider: fallBackModel.provider,
+                    });
+                    model = fallBackModel;
+                    break; // success
+                } catch (e) {
+                    console.error('error during fallback selection: ', e);
+                    error = e as Error;
+                }
+            }
+        }
+
+        resMetadata.service_used = model.provider; // legacy field
+        resMetadata.providerUsed = model.id;
+
+        // Add flag if we're using the usage-limited service
+        if ( model.provider === 'usage-limited-chat' ) {
+            resMetadata.usage_limited = true;
+        }
+
+        const username = actor.type?.user?.username;
+
+        if ( ! res! ) {
+            throw new Error('No response from AI chat provider');
+        }
+
+        res.via_ai_chat_service = true; // legacy field always true now
+        if ( res.stream ) {
+            if ( res.init_chat_stream ) {
+                const stream = new PassThrough();
+                // TODO DS: simplify how we handle streaming responses and remove custom runtime types
+                const retval = new TypedValue({
+                    $: 'stream',
+                    content_type: 'application/x-ndjson',
+                    chunked: true,
+                }, stream);
+
+                const chatStream = new Streaming.AIChatStream({
+                    stream,
+                });
+
+                (async () => {
+                    try {
+                        await res.init_chat_stream({ chatStream });
+                    } catch (e) {
+                        this.errors.report('error during stream response', {
+                            source: e,
+                        });
+                        stream.write(`${JSON.stringify({
+                            type: 'error',
+                            message: (e as Error).message,
+                        }) }\n`);
+                        stream.end();
+                    } finally {
+                        if ( res.finally_fn ) {
+                            await res.finally_fn();
+                        }
+                    }
+                })();
+
+                return retval;
+            }
+
+            return res;
+        }
+        await this.eventService.emit('ai.prompt.complete', {
+            username,
+            intended_service: intendedProvider,
+            parameters,
+            result: res,
+            model_used: model.id,
+            service_used: model.provider,
+        });
+
+        if ( parameters.response?.normalize ) {
+            res = {
+                ...res,
+                message: normalize_single_message(res.message),
+                normalized: true,
+            };
+        }
+        return res;
+
+    }
+
+    async moderate ({ messages }: { messages: Array<unknown>; }) {
+        if ( process.env.TEST_MODERATION_FAILURE ) return false;
+        const fulltext = extract_text(messages);
+        let mod_last_error;
+        let mod_result: Awaited<ReturnType<IChatProvider['checkModeration']>>;
+        try {
+            const openaiProvider = this.#providers['openai-completion'];
+            mod_result = await openaiProvider.checkModeration(fulltext);
+            if ( mod_result.flagged ) return false;
+            return true;
+        } catch (e) {
+            console.error(e);
+            mod_last_error = e;
+        }
+        try {
+            const claudeChatProvider = this.#providers['claude'];
+            const mod = new AsModeration({
+                chatProvider: claudeChatProvider,
+                model: 'claude-3-haiku-20240307',
+            });
+            if ( ! await mod.moderate(fulltext) ) {
+                return false;
+            }
+            mod_last_error = null;
+            return true;
+        } catch (e) {
+            console.error(e);
+            mod_last_error = e;
+        }
+
+        if ( mod_last_error ) {
+            this.log.error('moderation error', {
+                fulltext,
+                mod_last_error,
+            });
+            throw new Error('no working moderation service');
+        }
+        return true;
+    }
+
+    /**
+     * Find an appropriate fallback model by sorting the list of models
+     * by the euclidean distance of the input/output prices and selecting
+     * the first one that is not in the tried list.
+     *
+     * @param {*} param0
+     * @returns
+     */
+    getFallbackModel (modelId: string, triedIds: string[], triedProviders: string[]) {
+        const models = this.#modelIdMap[modelId];
+
+        if ( ! models ) {
+            this.log.error('could not find model', { modelId });
+            throw new Error('could not find model');
+        }
+
+        const targetModel = models[0];
+
+        // First see if any models with the same id but different provider exist
+        for ( const model of models ) {
+            if ( triedProviders.includes(model.provider!) ) continue;
+            if ( model.provider === 'fake-chat' ) continue;
+            return {
+                fallbackProvider: model.provider,
+                fallbackModelId: model.id,
+            };
+        }
+
+        // First check KV for the sorted list
+        let potentialFallbacks = kv.get(`aichat:fallbacks:${targetModel.id}`);
+
+        if ( ! potentialFallbacks ) {
+            // Calculate the sorted list
+            const models =  this.models();
+
+            let aiProvider, modelToSearch;
+            if ( targetModel.id.startsWith('openrouter:') || targetModel.id.startsWith('togetherai:') ) {
+                [aiProvider, modelToSearch] = targetModel.id.replace('openrouter:', '').replace('togetherai:', '').toLowerCase().split('/');
+            } else {
+                [aiProvider, modelToSearch] = targetModel.provider!.toLowerCase().replace('gemini', 'google').replace('openai-completion', 'openai'), targetModel.id.toLowerCase();
+            }
+
+            const potentialMatches = models.filter(model => {
+                const possibleModelNames = [`openrouter:${aiProvider}/${modelToSearch}`,
+                    `togetherai:${aiProvider}/${modelToSearch}`, ...(targetModel.aliases?.map((alias) => [`openrouter:${aiProvider}/${alias}`,
+                        `togetherai:${aiProvider}/${alias}`])?.flat() ?? [])];
+
+                return !!possibleModelNames.find(possibleName => model.id.toLowerCase() === possibleName);
+            }).slice(0, MAX_FALLBACKS);
+
+            kv.set(`aichat:fallbacks:${modelId}`, potentialMatches);
+            potentialFallbacks = potentialMatches;
+        }
+
+        for ( const model of potentialFallbacks ) {
+            if ( triedIds.includes(model.id) ) continue;
+            if ( model.provider === 'fake-chat' ) continue;
+
+            return {
+                fallbackProvider: model.provider,
+                fallbackModelId: model.id,
+            };
+        }
+
+        // No fallbacks available
+        console.error('no fallbacks', {
+            potentialFallbacks,
+            triedIds,
+            triedProviders,
+        });
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/ChatProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/ChatProvider.ts
@@ -0,0 +1,26 @@
+import { ModerationCreateResponse } from 'openai/resources/moderations.js';
+import { IChatModel, IChatProvider, ICompleteArguments } from './types';
+
+/**
+ * Abstract base class for AI chat providers, and default hollow implementation;
+ */
+export class ChatProvider implements IChatProvider {
+    getDefaultModel (): string {
+        return '';
+    }
+    models (): IChatModel[] | Promise<IChatModel[]> {
+        return [];
+    }
+    list (): string[] | Promise<string[]> {
+        return [];
+    }
+    async checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        return {
+            flagged: false,
+            results: {} as ModerationCreateResponse,
+        };
+    }
+    async complete (_arg: ICompleteArguments): ReturnType<IChatProvider['complete']> {
+        throw new Error('Method not implemented.');
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/ClaudeProvider/ClaudeProvider.test.ts
+++ b/src/backend/src/services/ai/chat/providers/ClaudeProvider/ClaudeProvider.test.ts
@@ -1,33 +1,25 @@
 import { describe, expect, it, test } from 'vitest';
-import { createTestKernel } from '../../../tools/test.mjs';
-import { COST_MAPS } from '../../services/MeteringService/costMaps';
-import { SUService } from '../../services/SUService';
-import { AIChatService } from './AIChatService';
-import { ClaudeService } from './ClaudeService';
+import { createTestKernel } from '../../../../../../tools/test.mjs';
+import { COST_MAPS } from '../../../../MeteringService/costMaps/index.js';
+import { SUService } from '../../../../SUService.js';
+import { ClaudeProvider } from './ClaudeProvider.js';

-describe('ClaudeService ', async () => {
+describe('ClaudeProvider ', async () => {
    const testKernel = await createTestKernel({
-        serviceMap: {
-            'claude': ClaudeService,
-            'ai-chat': AIChatService,
-        },
        initLevelString: 'init',
        testCore: true,
        serviceConfigOverrideMap: {
            'database': {
                path: ':memory:',
            },
-            'claude': {
-                apiKey: process.env.PUTER_CLAUDE_API_KEY,
-            },
        },
    });

-    const target = testKernel.services!.get('claude') as ClaudeService;
+    const target = new ClaudeProvider(testKernel.services!.get('meteringService'), { apiKey: process.env.PUTER_CLAUDE_API_KEY || '' }, testKernel.services?.get('error-service'));
    const su = testKernel.services!.get('su') as SUService;

    it('should have all models mapped in cost maps', async () => {
-        const models = await target.models();
+        const models = target.models();

        for ( const model of models ) {
            const entry = Object.entries(COST_MAPS).find(([key, _value]) => key.startsWith('claude') && key.includes(model.id));
--- a/src/backend/src/services/ai/chat/providers/ClaudeProvider/ClaudeProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/ClaudeProvider/ClaudeProvider.ts
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+// METADATA // {"ai-commented":{"service":"claude"}}
+import Anthropic, { toFile } from '@anthropic-ai/sdk';
+import { Message } from '@anthropic-ai/sdk/resources';
+import { BetaUsage } from '@anthropic-ai/sdk/resources/beta.js';
+import { MessageCreateParams as BetaMessageCreateParams } from '@anthropic-ai/sdk/resources/beta/messages/messages.js';
+import { MessageCreateParams, Usage } from '@anthropic-ai/sdk/resources/messages.js';
+import mime from 'mime-types';
+import FSNodeParam from '../../../../../api/filesystem/FSNodeParam.js';
+import { LLRead } from '../../../../../filesystem/ll_operations/ll_read.js';
+import { ErrorService } from '../../../../../modules/core/ErrorService.js';
+import { Context } from '../../../../../util/context.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import { make_claude_tools } from '../../../utils/FunctionCalling.js';
+import { extract_and_remove_system_messages } from '../../../utils/Messages.js';
+import { AIChatStream, AIChatTextStream, AIChatToolUseStream } from '../../../utils/Streaming.js';
+import { IChatProvider, ICompleteArguments } from '../types.js';
+import { CLAUDE_MODELS } from './models.js';
+export class ClaudeProvider implements IChatProvider {
+    anthropic: Anthropic;
+
+    #meteringService: MeteringService;
+
+    errorService: ErrorService;
+
+    constructor (meteringService: MeteringService, config: { apiKey: string }, errorService: ErrorService) {
+
+        this.#meteringService = meteringService;
+        this.errorService = errorService;
+        this.anthropic = new Anthropic({
+            apiKey: config.apiKey,
+            // 10 minutes is the default; we need to override the timeout to
+            // disable an "aggressive" preemptive error that's thrown
+            // erroneously by the SDK.
+            // (https://github.com/anthropics/anthropic-sdk-typescript/issues/822)
+            timeout: 10 * 60 * 1001,
+        });
+    }
+    getDefaultModel () {
+        return 'claude-haiku-4-5-20251001';
+    }
+
+    async list () {
+        const models = this.models();
+        const model_names: string[] = [];
+        for ( const model of models ) {
+            model_names.push(model.id);
+            if ( model.aliases ) {
+                model_names.push(...model.aliases);
+            }
+        }
+        return model_names;
+    }
+
+    async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
+        tools = make_claude_tools(tools);
+
+        let system_prompts: string | any[];
+        // unsure why system_prompts is an array but it always seems to only have exactly one element,
+        // and the real array of system_prompts seems to be the [0].content -- NS
+        [system_prompts, messages] = extract_and_remove_system_messages(messages);
+
+        // Apply the cache control tag to all content blocks
+        if (
+            system_prompts.length > 0 &&
+            system_prompts[0].cache_control &&
+            system_prompts[0]?.content
+        ) {
+            system_prompts[0].content = system_prompts[0].content.map((prompt: { cache_control: unknown }) => {
+                prompt.cache_control = system_prompts[0].cache_control;
+                return prompt;
+            });
+        }
+
+        messages = messages.map(message => {
+            if ( message.cache_control ) {
+                message.content[0].cache_control = message.cache_control;
+            }
+            delete message.cache_control;
+            return message;
+        });
+
+        const modelUsed = this.models().find(m => [m.id, ...(m.aliases || [])].includes(model)) || this.models().find(m => m.id === this.getDefaultModel())!;
+        const sdkParams: MessageCreateParams = {
+            model: modelUsed.id,
+            max_tokens: Math.floor(max_tokens ||
+                ((
+                    model === 'claude-3-5-sonnet-20241022'
+                    || model === 'claude-3-5-sonnet-20240620'
+                ) ? 8192 : this.models().filter(e => (e.name === model || e.aliases?.includes(model)))[0]?.max_tokens || 4096)), //required
+            temperature: temperature || 0, // required
+            ...( (system_prompts && system_prompts[0]?.content) ? {
+                system: system_prompts[0]?.content,
+            } : {}),
+            tool_choice: {
+                type: 'auto',
+                disable_parallel_tool_use: true,
+            },
+            messages,
+            ...(tools ? { tools } : {}),
+        } as MessageCreateParams;
+
+        let beta_mode = false;
+
+        // Perform file uploads
+        const file_delete_tasks: { file_id: string }[] = [];
+        const actor = Context.get('actor');
+        const { user } = actor.type;
+
+        const file_input_tasks: any[] = [];
+        for ( const message of messages ) {
+            // We can assume `message.content` is not undefined because
+            // Messages.normalize_single_message ensures this.
+            for ( const contentPart of message.content ) {
+                if ( ! contentPart.puter_path ) continue;
+                file_input_tasks.push({
+                    node: await (new FSNodeParam(contentPart.puter_path)).consolidate({
+                        req: { user },
+                        getParam: () => contentPart.puter_path,
+                    }),
+                    contentPart,
+                });
+            }
+        }
+
+        const promises: Promise<unknown>[] = [];
+        for ( const task of file_input_tasks ) {
+            promises.push((async () => {
+                const ll_read = new LLRead();
+                const stream = await ll_read.run({
+                    actor: Context.get('actor'),
+                    fsNode: task.node,
+                });
+
+                const mimeType = mime.contentType(await task.node.get('name'));
+
+                beta_mode = true;
+                const fileUpload = await this.anthropic.beta.files.upload({
+                    file: await toFile(stream, undefined, { type: mimeType as string }),
+                }, {
+                    betas: ['files-api-2025-04-14'],
+                } as Parameters<typeof this.anthropic.beta.files.upload>[1]);
+
+                file_delete_tasks.push({ file_id: fileUpload.id });
+                // We have to copy a table from the documentation here:
+                // https://docs.anthropic.com/en/docs/build-with-claude/files
+                const contentBlockTypeForFileBasedOnMime = (() => {
+                    if ( mimeType && mimeType.startsWith('image/') ) {
+                        return 'image';
+                    }
+                    if ( mimeType && mimeType.startsWith('text/') ) {
+                        return 'document';
+                    }
+                    if ( mimeType && mimeType === 'application/pdf' || mimeType === 'application/x-pdf' ) {
+                        return 'document';
+                    }
+                    return 'container_upload';
+                })();
+
+                delete task.contentPart.puter_path,
+                task.contentPart.type = contentBlockTypeForFileBasedOnMime;
+                task.contentPart.source = {
+                    type: 'file',
+                    file_id: fileUpload.id,
+                };
+            })());
+        }
+        await Promise.all(promises);
+
+        const cleanup_files = async () => {
+            const promises: Promise<unknown>[] = [];
+            for ( const task of file_delete_tasks ) {
+                promises.push((async () => {
+                    try {
+                        await this.anthropic.beta.files.delete(task.file_id,
+                                        { betas: ['files-api-2025-04-14'] });
+                    } catch (e) {
+                        this.errorService.report('claude:file-delete-task', {
+                            source: e,
+                            trace: true,
+                            alarm: true,
+                            extra: { file_id: task.file_id },
+                        });
+                    }
+                })());
+            }
+            await Promise.all(promises);
+        };
+
+        if ( beta_mode ) {
+            (sdkParams as BetaMessageCreateParams).betas = ['files-api-2025-04-14'];
+        }
+        const anthropic = (beta_mode ? this.anthropic.beta : this.anthropic) as Anthropic;
+
+        if ( stream ) {
+            const init_chat_stream = async ({ chatStream }: { chatStream: AIChatStream }) => {
+                const completion = await anthropic.messages.stream(sdkParams as MessageCreateParams);
+                const usageSum: Record<string, number> = {};
+
+                let message, contentBlock;
+                for await ( const event of completion ) {
+
+                    if ( event.type === 'message_delta' ) {
+                        const usageObject = (event?.usage ?? {});
+                        const meteredData = this.#usageFormatterUtil(usageObject as Usage | BetaUsage);
+
+                        for ( const key in meteredData ) {
+                            if ( ! usageSum[key] ) usageSum[key] = 0;
+                            usageSum[key] += meteredData[key as keyof typeof meteredData];
+                        }
+                    }
+
+                    if ( event.type === 'message_start' ) {
+                        message = chatStream.message();
+                        continue;
+                    }
+                    if ( event.type === 'message_stop' ) {
+                        message!.end();
+                        message = null;
+                        continue;
+                    }
+
+                    if ( event.type === 'content_block_start' ) {
+                        if ( event.content_block.type === 'tool_use' ) {
+                            contentBlock = message!.contentBlock({
+                                type: event.content_block.type,
+                                id: event.content_block.id,
+                                name: event.content_block.name,
+                            });
+                            continue;
+                        }
+                        contentBlock = message!.contentBlock({
+                            type: event.content_block.type,
+                        });
+                        continue;
+                    }
+
+                    if ( event.type === 'content_block_stop' ) {
+                        contentBlock!.end();
+                        contentBlock = null;
+                        continue;
+                    }
+
+                    if ( event.type === 'content_block_delta' ) {
+                        if ( event.delta.type === 'input_json_delta' ) {
+                            (contentBlock as AIChatToolUseStream)!.addPartialJSON(event.delta.partial_json);
+                            continue;
+                        }
+                        if ( event.delta.type === 'text_delta' ) {
+                            (contentBlock as AIChatTextStream)!.addText(event.delta.text);
+                            continue;
+                        }
+                    }
+                }
+                chatStream.end(usageSum);
+                const costsOverrideFromModel = Object.fromEntries(Object.entries(usageSum).map(([k, v]) => {
+                    return [k, v * (modelUsed.costs[k] || 0)];
+                }));
+                this.#meteringService.utilRecordUsageObject(usageSum, actor, `claude:${modelUsed.id}`, costsOverrideFromModel);
+            };
+
+            return {
+                init_chat_stream,
+                stream: true,
+                finally_fn: cleanup_files,
+            };
+        }
+
+        const msg = await anthropic.messages.create(sdkParams);
+        await cleanup_files();
+
+        const usage = this.#usageFormatterUtil((msg as Message).usage as Usage | BetaUsage);
+        const costsOverrideFromModel = Object.fromEntries(Object.entries(usage).map(([k, v]) => {
+            return [k, v * (modelUsed.costs[k] || 0)];
+        }));
+        this.#meteringService.utilRecordUsageObject(usage, actor, `claude:${modelUsed.id}`, costsOverrideFromModel);
+
+        // TODO DS: cleanup old usage tracking
+        return {
+            message: msg,
+            usage: usage,
+            finish_reason: 'stop',
+        };
+    }
+
+    #usageFormatterUtil (usage: Usage | BetaUsage) {
+        return {
+            input_tokens: usage?.input_tokens || 0,
+            ephemeral_5m_input_tokens: usage?.cache_creation?.ephemeral_5m_input_tokens || usage.cache_creation_input_tokens || 0, // this is because they're api is a bit inconsistent
+            ephemeral_1h_input_tokens: usage?.cache_creation?.ephemeral_1h_input_tokens || 0,
+            cache_read_input_tokens: usage?.cache_read_input_tokens || 0,
+            output_tokens: usage?.output_tokens || 0,
+        };
+    };
+
+    models () {
+        return CLAUDE_MODELS;
+    }
+
+    checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        throw new Error('CheckModeration Not provided.');
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/ClaudeProvider/models.ts
+++ b/src/backend/src/services/ai/chat/providers/ClaudeProvider/models.ts
@@ -0,0 +1,184 @@
+import { IChatModel } from '../types';
+
+export const CLAUDE_MODELS: IChatModel[] = [
+    {
+        id: 'claude-opus-4-5-20251101',
+        aliases: ['claude-opus-4-5-latest', 'claude-opus-4-5', 'claude-opus-4.5'],
+        name: 'Claude Opus 4.5',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'input_tokens',
+        output_cost_key: 'output_tokens',
+        costs: {
+            tokens: 1_000_000,
+            input_tokens: 500,
+            ephemeral_5m_input_tokens: 500 * 1.25,
+            ephemeral_1h_input_tokens: 500 * 2,
+            cache_read_input_tokens: 500 * 0.1,
+            output_tokens: 2500,
+        },
+        context: 200000,
+        max_tokens: 64000,
+    },
+    {
+        id: 'claude-haiku-4-5-20251001',
+        aliases: ['claude-haiku-4.5', 'claude-haiku-4-5', 'claude-4-5-haiku'],
+        name: 'Claude Haiku 4.5',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'input_tokens',
+        output_cost_key: 'output_tokens',
+        costs: {
+            tokens: 1_000_000,
+            input_tokens: 100,
+            ephemeral_5m_input_tokens: 100 * 1.25,
+            ephemeral_1h_input_tokens: 100 * 2,
+            cache_read_input_tokens: 100 * 0.1,
+            output_tokens: 500,
+        },
+        context: 200000,
+        max_tokens: 64000,
+    },
+    {
+        id: 'claude-sonnet-4-5-20250929',
+        aliases: ['claude-sonnet-4.5', 'claude-sonnet-4-5'],
+        name: 'Claude Sonnet 4.5',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'input_tokens',
+        output_cost_key: 'output_tokens',
+        costs: {
+            tokens: 1_000_000,
+            input_tokens: 300,
+            ephemeral_5m_input_tokens: 300 * 1.25,
+            ephemeral_1h_input_tokens: 300 * 2,
+            cache_read_input_tokens: 300 * 0.1,
+            output_tokens: 1500,
+        },
+        context: 200000,
+        max_tokens: 64000,
+    },
+    {
+        id: 'claude-opus-4-1-20250805',
+        aliases: ['claude-opus-4-1'],
+        name: 'Claude Opus 4.1',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'input_tokens',
+        output_cost_key: 'output_tokens',
+        costs: {
+            tokens: 1_000_000,
+            input_tokens: 1500,
+            ephemeral_5m_input_tokens: 1500 * 1.25,
+            ephemeral_1h_input_tokens: 1500 * 2,
+            cache_read_input_tokens: 1500 * 0.1,
+            output_tokens: 7500,
+        },
+        context: 200000,
+        max_tokens: 32000,
+    },
+    {
+        id: 'claude-opus-4-20250514',
+        aliases: ['claude-opus-4', 'claude-opus-4-latest'],
+        name: 'Claude Opus 4',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'input_tokens',
+        output_cost_key: 'output_tokens',
+        costs: {
+            tokens: 1_000_000,
+            input_tokens: 1500,
+            ephemeral_5m_input_tokens: 1500 * 1.25,
+            ephemeral_1h_input_tokens: 1500 * 2,
+            cache_read_input_tokens: 1500 * 0.1,
+            output_tokens: 7500,
+        },
+        context: 200000,
+        max_tokens: 32000,
+    },
+    {
+        id: 'claude-sonnet-4-20250514',
+        aliases: ['claude-sonnet-4', 'claude-sonnet-4-latest'],
+        name: 'Claude Sonnet 4',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'input_tokens',
+        output_cost_key: 'output_tokens',
+        costs: {
+            tokens: 1_000_000,
+            input_tokens: 300,
+            ephemeral_5m_input_tokens: 300 * 1.25,
+            ephemeral_1h_input_tokens: 300 * 2,
+            cache_read_input_tokens: 300 * 0.1,
+            output_tokens: 1500,
+        },
+        context: 200000,
+        max_tokens: 64000,
+    },
+    {
+        id: 'claude-3-7-sonnet-20250219',
+        aliases: ['claude-3-7-sonnet-latest'],
+        succeeded_by: 'claude-sonnet-4-20250514',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'input_tokens',
+        output_cost_key: 'output_tokens',
+        costs: {
+            tokens: 1_000_000,
+            input_tokens: 300,
+            ephemeral_5m_input_tokens: 300 * 1.25,
+            ephemeral_1h_input_tokens: 300 * 2,
+            cache_read_input_tokens: 300 * 0.1,
+            output_tokens: 1500,
+        },
+        context: 200000,
+        max_tokens: 8192,
+    },
+    {
+        id: 'claude-3-5-sonnet-20241022',
+        name: 'Claude 3.5 Sonnet',
+        aliases: ['claude-3-5-sonnet-latest'],
+        costs_currency: 'usd-cents',
+        input_cost_key: 'input_tokens',
+        output_cost_key: 'output_tokens',
+        costs: {
+            tokens: 1_000_000,
+            input_tokens: 300,
+            ephemeral_5m_input_tokens: 300 * 1.25,
+            ephemeral_1h_input_tokens: 300 * 2,
+            cache_read_input_tokens: 300 * 0.1,
+            output_tokens: 1500,
+        },
+        qualitative_speed: 'fast',
+        training_cutoff: '2024-04',
+        context: 200000,
+        max_tokens: 8192,
+    },
+    {
+        id: 'claude-3-5-sonnet-20240620',
+        succeeded_by: 'claude-3-5-sonnet-20241022',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'input_tokens',
+        output_cost_key: 'output_tokens',
+        costs: {
+            tokens: 1_000_000,
+            input_tokens: 300,
+            ephemeral_5m_input_tokens: 300 * 1.25,
+            ephemeral_1h_input_tokens: 300 * 2,
+            cache_read_input_tokens: 300 * 0.1,
+            output_tokens: 1500,
+        },
+        context: 200000, // might be wrong
+        max_tokens: 8192,
+    },
+    {
+        id: 'claude-3-haiku-20240307',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'input_tokens',
+        output_cost_key: 'output_tokens',
+        costs: {
+            tokens: 1_000_000,
+            input_tokens: 25,
+            ephemeral_5m_input_tokens: 25 * 1.25,
+            ephemeral_1h_input_tokens: 25 * 2,
+            cache_read_input_tokens: 25 * 0.1,
+            output_tokens: 125,
+        },
+        qualitative_speed: 'fastest',
+        context: 200000,
+        max_tokens: 4096,
+    },
+];
--- a/src/backend/src/services/ai/chat/providers/DeepSeekProvider/DeepSeekProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/DeepSeekProvider/DeepSeekProvider.ts
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import dedent from 'dedent';
+import { OpenAI } from 'openai';
+import { ChatCompletionCreateParams } from 'openai/resources/index.js';
+import { Context } from '../../../../../util/context.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
+import { IChatProvider, ICompleteArguments } from '../types.js';
+import { DEEPSEEK_MODELS } from './models.js';
+
+export class DeepSeekProvider implements IChatProvider {
+    #openai: OpenAI;
+
+    #meteringService: MeteringService;
+
+    constructor (config: { apiKey: string }, meteringService: MeteringService) {
+        this.#openai = new OpenAI({
+            apiKey: config.apiKey,
+            baseURL: 'https://api.deepseek.com',
+        });
+        this.#meteringService = meteringService;
+    }
+
+    getDefaultModel () {
+        return 'deepseek-chat';
+    }
+
+    models () {
+        return DEEPSEEK_MODELS;
+    }
+
+    async list () {
+        const models = this.models();
+        const modelNames: string[] = [];
+        for ( const model of models ) {
+            modelNames.push(model.id);
+            if ( model.aliases ) {
+                modelNames.push(...model.aliases);
+            }
+        }
+        return modelNames;
+    }
+
+    async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
+        const actor = Context.get('actor');
+        const availableModels = this.models();
+        const modelUsed = availableModels.find(m => [m.id, ...(m.aliases || [])].includes(model)) || availableModels.find(m => m.id === this.getDefaultModel())!;
+
+        messages = await OpenAIUtil.process_input_messages(messages);
+        for ( const message of messages ) {
+            // DeepSeek doesn't accept string arrays alongside tool calls
+            if ( message.tool_calls && Array.isArray(message.content) ) {
+                message.content = '';
+            }
+        }
+
+        // Function calling currently loops unless we inject the tool result as a system message.
+        const TOOL_TEXT = (message: { tool_call_id: string; content: string }) => dedent(`
+            Hi DeepSeek V3, your tool calling is broken and you are not able to
+            obtain tool results in the expected way. That's okay, we can work
+            around this.
+
+            Please do not repeat this tool call.
+
+            We have provided the tool call results below:
+
+            Tool call ${message.tool_call_id} returned: ${message.content}.
+        `);
+        for ( let i = messages.length - 1; i >= 0; i-- ) {
+            const message = messages[i];
+            if ( message.role === 'tool' ) {
+                messages.splice(i + 1, 0, {
+                    role: 'system',
+                    content: [
+                        {
+                            type: 'text',
+                            text: TOOL_TEXT(message),
+                        },
+                    ],
+                });
+            }
+        }
+
+        const completion = await this.#openai.chat.completions.create({
+            messages,
+            model: modelUsed.id,
+            ...(tools ? { tools } : {}),
+            max_tokens: max_tokens || 1000,
+            temperature,
+            stream,
+            ...(stream ? {
+                stream_options: { include_usage: true },
+            } : {}),
+        } as ChatCompletionCreateParams);
+
+        return OpenAIUtil.handle_completion_output({
+            usage_calculator: ({ usage }) => {
+                const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
+                const costsOverrideFromModel = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
+                    return [k, v * (modelUsed.costs[k] || 0)];
+                }));
+                this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `deepseek:${modelUsed.id}`, costsOverrideFromModel);
+                return trackedUsage;
+            },
+            stream,
+            completion,
+        });
+    }
+
+    checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        throw new Error('Method not implemented.');
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/DeepSeekProvider/models.ts
+++ b/src/backend/src/services/ai/chat/providers/DeepSeekProvider/models.ts
@@ -0,0 +1,36 @@
+import { IChatModel } from '../types.js';
+
+export const DEEPSEEK_MODELS: IChatModel[] = [
+    {
+        id: 'deepseek-chat',
+        name: 'DeepSeek Chat',
+        aliases: [],
+        context: 128000,
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 56,
+            completion_tokens: 168,
+            cached_tokens: 0,
+        },
+        max_tokens: 8000,
+    },
+    {
+        id: 'deepseek-reasoner',
+        name: 'DeepSeek Reasoner',
+        aliases: [],
+        context: 128000,
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 56,
+            completion_tokens: 168,
+            cached_tokens: 0,
+        },
+        max_tokens: 64000,
+    },
+];
--- a/src/backend/src/services/ai/chat/providers/FakeChatProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/FakeChatProvider.ts
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+// METADATA // {"ai-commented":{"service":"claude"}}
+
+import dedent from 'dedent';
+import { LoremIpsum } from 'lorem-ipsum';
+import { AIChatStream } from '../../utils/Streaming';
+import { IChatProvider, ICompleteArguments } from './types';
+
+export class FakeChatProvider implements IChatProvider {
+    checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        throw new Error('Method not implemented.');
+    }
+
+    getDefaultModel () {
+        return 'fake';
+    }
+
+    async models () {
+        return [
+            {
+                id: 'fake',
+                aliases: [],
+                costs_currency: 'usd-cents',
+                costs: {
+                    'input-tokens': 0,
+                    'output-tokens': 0,
+                },
+                max_tokens: 8192,
+
+            },
+            {
+                id: 'costly',
+                aliases: [],
+                costs_currency: 'usd-cents',
+                costs: {
+                    'input-tokens': 1000, // 1000 microcents per million tokens (0.001 cents per 1000 tokens)
+                    'output-tokens': 2000, // 2000 microcents per million tokens (0.002 cents per 1000 tokens)
+                },
+                max_tokens: 8192,
+            },
+            {
+                id: 'abuse',
+                aliases: [],
+                costs_currency: 'usd-cents',
+                costs: {
+                    'input-tokens': 0,
+                    'output-tokens': 0,
+                },
+                max_tokens: 8192,
+            },
+        ];
+    }
+    async list () {
+        return ['fake', 'costly', 'abuse'];
+    }
+    async complete ({ messages, stream, model, max_tokens, custom }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
+
+        // Determine token counts based on messages and model
+        const usedModel = model || this.getDefaultModel();
+
+        // For the costly model, simulate actual token counting
+        const resp = this.getFakeResponse(usedModel, custom, messages, max_tokens);
+
+        if ( stream ) {
+            return {
+                init_chat_stream: async ({ chatStream }: { chatStream: AIChatStream }) => {
+                    await new Promise(rslv => setTimeout(rslv, 500));
+                    chatStream.stream.write(`${JSON.stringify({
+                        type: 'text',
+                        text: (await resp).message.content[0].text,
+                    }) }\n`);
+                    chatStream.end();
+                },
+                stream: true,
+                finally_fn: async () => {
+                    // no op
+                },
+            };
+        }
+
+        return resp;
+    }
+    async getFakeResponse (modelId: string, custom: unknown, messages: any[], maxTokens: number = 8192): ReturnType<IChatProvider['complete']> {
+        let inputTokens = 0;
+        let outputTokens = 0;
+
+        if ( modelId === 'costly' ) {
+            // Simple token estimation: roughly 4 chars per token for input
+            if ( messages && messages.length > 0 ) {
+                for ( const message of messages ) {
+                    if ( typeof message.content === 'string' ) {
+                        inputTokens += Math.ceil(message.content.length / 4);
+                    } else if ( Array.isArray(message.content) ) {
+                        for ( const content of message.content ) {
+                            if ( content.type === 'text' ) {
+                                inputTokens += Math.ceil(content.text.length / 4);
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Generate random output token count between 50 and 200
+            outputTokens = Math.floor(Math.min((Math.random() * 150) + 50, maxTokens));
+            // outputTokens = Math.floor(Math.random() * 150) + 50;
+        }
+
+        // Generate the response text
+        let responseText;
+        if ( modelId === 'abuse' ) {
+            responseText = dedent(`
+                <h2>Free AI and Cloud for everyone!</h2><br />
+                Come on down to <a href="https://puter.com">puter.com</a> and try it out!
+                ${custom ?? ''}
+            `);
+        } else {
+            // Generate 1-3 paragraphs for both fake and costly models
+            responseText = new LoremIpsum({
+                sentencesPerParagraph: {
+                    max: 8,
+                    min: 4,
+                },
+                wordsPerSentence: {
+                    max: 20,
+                    min: 12,
+                },
+            }).generateParagraphs(Math.floor(Math.random() * 3) + 1);
+        }
+
+        // Report usage based on model
+        const usage = {
+            'input_tokens': modelId === 'costly' ? inputTokens : 0,
+            'output_tokens': modelId === 'costly' ? outputTokens : 1,
+        };
+
+        return {
+            message: {
+                'id': '00000000-0000-0000-0000-000000000000',
+                'type': 'message',
+                'role': 'assistant',
+                'model': modelId,
+                'content': [
+                    {
+                        'type': 'text',
+                        'text': responseText,
+                    },
+                ],
+                'stop_reason': 'end_turn',
+                'stop_sequence': null,
+                'usage': usage,
+            },
+            'usage': usage,
+            'finish_reason': 'stop',
+        };
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/GeminiProvider/GeminiChatProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/GeminiProvider/GeminiChatProvider.ts
@@ -0,0 +1,94 @@
+// Preamble: Before this we used Gemini's SDK directly and as we found out
+// its actually kind of terrible. So we use the openai sdk now
+import openai, { OpenAI } from 'openai';
+import { Context } from '../../../../../util/context.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import { handle_completion_output, process_input_messages } from '../../../utils/OpenAIUtil.js';
+import { IChatProvider, ICompleteArguments } from '../types.js';
+import { GEMINI_MODELS } from './models.js';
+import { ChatCompletionCreateParams } from 'openai/resources/index.js';
+
+export class GeminiChatProvider implements IChatProvider {
+
+    meteringService: MeteringService;
+    openai: OpenAI;
+
+    defaultModel = 'gemini-2.5-flash';
+
+    constructor ( meteringService: MeteringService, config: { apiKey: string })
+    {
+        this.meteringService = meteringService;
+        this.openai = new openai.OpenAI({
+            apiKey: config.apiKey,
+            baseURL: 'https://generativelanguage.googleapis.com/v1beta/openai/',
+        });
+    }
+
+    getDefaultModel () {
+        return this.defaultModel;
+    }
+
+    async models () {
+        return GEMINI_MODELS;
+    }
+    async list () {
+        return (await this.models()).map(m => [m.id, ... (m.aliases || [])]).flat();
+    }
+
+    async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
+
+        const actor = Context.get('actor');
+        messages = await process_input_messages(messages);
+
+        // delete cache_control
+        messages = messages.map(m => {
+            delete m.cache_control;
+            return m;
+        });
+
+        const modelUsed = (await this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (await this.models()).find(m => m.id === this.getDefaultModel())!;
+        const sdk_params: ChatCompletionCreateParams = {
+            messages: messages,
+            model: modelUsed.id,
+            ...(tools ? { tools } : {}),
+            ...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
+            ...(temperature ? { temperature } : {}),
+            stream,
+            ...(stream ? {
+                stream_options: { include_usage: true },
+            } : {}),
+        } as ChatCompletionCreateParams;
+
+        let completion;
+        try {
+            completion = await this.openai.chat.completions.create(sdk_params);
+        } catch (e) {
+            console.error('Gemini completion error: ', e);
+            throw e;
+        }
+
+        return handle_completion_output({
+            usage_calculator: ({ usage }) => {
+                const trackedUsage = {
+                    prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
+                    completion_tokens: usage.completion_tokens ?? 0,
+                    cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
+                };
+
+                const costsOverrideFromModel = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
+                    return [k, v * (modelUsed.costs[k] || 0)];
+                }));
+                this.meteringService.utilRecordUsageObject(trackedUsage, actor, `gemini:${modelUsed?.id}`), costsOverrideFromModel;
+
+                return trackedUsage;
+            },
+            stream,
+            completion,
+        });
+
+    }
+
+    checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        throw new Error('No moderation logic.');
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/GeminiProvider/models.ts
+++ b/src/backend/src/services/ai/chat/providers/GeminiProvider/models.ts
@@ -0,0 +1,94 @@
+import { IChatModel } from '../types';
+
+export const GEMINI_MODELS: IChatModel[] = [
+    {
+        id: 'gemini-2.0-flash',
+        name: 'Gemini 2.0 Flash',
+        context: 131072,
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 10,
+            completion_tokens: 40,
+            cached_tokens: 3,
+
+        },
+        max_tokens: 8192,
+    },
+    {
+        id: 'gemini-2.0-flash-lite',
+        name: 'Gemini 2.0 Flash-Lite',
+        context: 1_048_576,
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 8,
+            completion_tokens: 30,
+        },
+        max_tokens: 8192,
+    },
+    {
+        id: 'gemini-2.5-flash',
+        name: 'Gemini 2.5 Flash',
+        context: 1_048_576,
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 30,
+            completion_tokens: 250,
+            cached_tokens: 3,
+        },
+        max_tokens: 65536,
+    },
+    {
+        id: 'gemini-2.5-flash-lite',
+        name: 'Gemini 2.5 Flash-Lite',
+        context: 1_048_576,
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 10,
+            completion_tokens: 40,
+            cached_tokens: 1,
+        },
+        max_tokens: 65536,
+    },
+    {
+        id: 'gemini-2.5-pro',
+        name: 'Gemini 2.5 Pro',
+        context: 1_048_576,
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 125,
+            completion_tokens: 1000,
+            cached_tokens: 13,
+        },
+        max_tokens: 200_000,
+    },
+    {
+        id: 'gemini-3-pro-preview',
+        name: 'Gemini 3 Pro',
+        context: 1_048_576,
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 200,
+            completion_tokens: 1200,
+            cached_tokens: 20,
+        },
+        max_tokens: 200_000,
+    },
+];
--- a/src/backend/src/services/ai/chat/providers/GroqAiProvider/GroqAIProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/GroqAiProvider/GroqAIProvider.ts
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import Groq from 'groq-sdk';
+import { ChatCompletionCreateParams } from 'groq-sdk/resources/chat/completions.mjs';
+import { CompletionUsage } from 'openai/resources';
+import { Context } from '../../../../../util/context.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
+import { IChatProvider, ICompleteArguments } from '../types.js';
+import { GROQ_MODELS } from './models.js';
+
+export class GroqAIProvider implements IChatProvider {
+    #client: Groq;
+
+    #meteringService: MeteringService;
+
+    constructor (config: { apiKey: string }, meteringService: MeteringService) {
+        this.#client = new Groq({
+            apiKey: config.apiKey,
+        });
+        this.#meteringService = meteringService;
+    }
+
+    getDefaultModel () {
+        return 'llama-3.1-8b-instant';
+    }
+
+    models () {
+        return GROQ_MODELS;
+    }
+
+    async list () {
+        const models = this.models();
+        const modelNames: string[] = [];
+        for ( const model of models ) {
+            modelNames.push(model.id);
+            if ( model.aliases ) {
+                modelNames.push(...model.aliases);
+            }
+        }
+        return modelNames;
+    }
+
+    async complete ({ messages, model, stream, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
+        const actor = Context.get('actor');
+        const availableModels = this.models();
+        const modelUsed = availableModels.find(m => [m.id, ...(m.aliases || [])].includes(model)) || availableModels.find(m => m.id === this.getDefaultModel())!;
+
+        messages = await OpenAIUtil.process_input_messages(messages);
+        for ( const message of messages ) {
+            if ( message.tool_calls && Array.isArray(message.content) ) {
+                message.content = '';
+            }
+        }
+
+        const completion = await this.#client.chat.completions.create({
+            messages,
+            model: modelUsed.id,
+            stream,
+            tools,
+            max_completion_tokens: max_tokens,
+            temperature,
+        } as ChatCompletionCreateParams);
+
+        return OpenAIUtil.handle_completion_output({
+            deviations: {
+                index_usage_from_stream_chunk: chunk =>
+                    // x_groq contains usage details for streamed responses
+                    (chunk as { x_groq?: { usage?: CompletionUsage } }).x_groq?.usage,
+            },
+            usage_calculator: ({ usage }) => {
+                const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
+                const costsOverride = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
+                    return [k, v * (modelUsed.costs[k] || 0)];
+                }));
+                this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `groq:${modelUsed.id}`, costsOverride);
+                return trackedUsage;
+            },
+            stream,
+            completion,
+        });
+    }
+
+    checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        throw new Error('Method not implemented.');
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/GroqAiProvider/models.ts
+++ b/src/backend/src/services/ai/chat/providers/GroqAiProvider/models.ts
@@ -0,0 +1,156 @@
+import { IChatModel } from '../types.js';
+
+const makeModel = ({
+    id,
+    name,
+    context,
+    input,
+    output,
+    max_tokens,
+}: {
+    id: string;
+    name: string;
+    context?: number;
+    input: number;
+    output: number;
+    max_tokens?: number;
+}): IChatModel => ({
+    id,
+    name,
+    context,
+    costs_currency: 'usd-cents',
+    input_cost_key: 'prompt_tokens',
+    output_cost_key: 'completion_tokens',
+    costs: {
+        tokens: 1_000_000,
+        prompt_tokens: input,
+        completion_tokens: output,
+        cached_tokens: 0,
+    },
+    max_tokens: max_tokens ?? context ?? 8192,
+});
+
+export const GROQ_MODELS: IChatModel[] = [
+    makeModel({
+        id: 'gemma2-9b-it',
+        name: 'Gemma 2 9B 8k',
+        context: 8192,
+        input: 20,
+        output: 20,
+    }),
+    makeModel({
+        id: 'gemma-7b-it',
+        name: 'Gemma 7B 8k Instruct',
+        context: 8192,
+        input: 7,
+        output: 7,
+    }),
+    makeModel({
+        id: 'llama3-groq-70b-8192-tool-use-preview',
+        name: 'Llama 3 Groq 70B Tool Use Preview 8k',
+        context: 8192,
+        input: 89,
+        output: 89,
+    }),
+    makeModel({
+        id: 'llama3-groq-8b-8192-tool-use-preview',
+        name: 'Llama 3 Groq 8B Tool Use Preview 8k',
+        context: 8192,
+        input: 19,
+        output: 19,
+    }),
+    makeModel({
+        id: 'llama-3.1-70b-versatile',
+        name: 'Llama 3.1 70B Versatile 128k',
+        context: 128000,
+        input: 59,
+        output: 79,
+    }),
+    makeModel({
+        id: 'llama-3.1-70b-specdec',
+        name: 'Llama 3.1 8B Instant 128k',
+        context: 128000,
+        input: 59,
+        output: 99,
+    }),
+    makeModel({
+        id: 'llama-3.1-8b-instant',
+        name: 'Llama 3.1 8B Instant 128k',
+        context: 131072,
+        input: 5,
+        output: 8,
+        max_tokens: 131072,
+    }),
+    makeModel({
+        id: 'meta-llama/llama-guard-4-12b',
+        name: 'Llama Guard 4 12B',
+        context: 131072,
+        input: 20,
+        output: 20,
+        max_tokens: 1024,
+    }),
+    makeModel({
+        id: 'meta-llama/llama-prompt-guard-2-86m',
+        name: 'Prompt Guard 2 86M',
+        context: 512,
+        input: 4,
+        output: 4,
+        max_tokens: 512,
+    }),
+    makeModel({
+        id: 'llama-3.2-1b-preview',
+        name: 'Llama 3.2 1B (Preview) 8k',
+        context: 128000,
+        input: 4,
+        output: 4,
+    }),
+    makeModel({
+        id: 'llama-3.2-3b-preview',
+        name: 'Llama 3.2 3B (Preview) 8k',
+        context: 128000,
+        input: 6,
+        output: 6,
+    }),
+    makeModel({
+        id: 'llama-3.2-11b-vision-preview',
+        name: 'Llama 3.2 11B Vision 8k (Preview)',
+        context: 8000,
+        input: 18,
+        output: 18,
+    }),
+    makeModel({
+        id: 'llama-3.2-90b-vision-preview',
+        name: 'Llama 3.2 90B Vision 8k (Preview)',
+        context: 8000,
+        input: 90,
+        output: 90,
+    }),
+    makeModel({
+        id: 'llama3-70b-8192',
+        name: 'Llama 3 70B 8k',
+        context: 8192,
+        input: 59,
+        output: 79,
+    }),
+    makeModel({
+        id: 'llama3-8b-8192',
+        name: 'Llama 3 8B 8k',
+        context: 8192,
+        input: 5,
+        output: 8,
+    }),
+    makeModel({
+        id: 'mixtral-8x7b-32768',
+        name: 'Mixtral 8x7B Instruct 32k',
+        context: 32768,
+        input: 24,
+        output: 24,
+    }),
+    makeModel({
+        id: 'llama-guard-3-8b',
+        name: 'Llama Guard 3 8B 8k',
+        context: 8192,
+        input: 20,
+        output: 20,
+    }),
+];
--- a/src/backend/src/services/ai/chat/providers/MistralAiProvider/MistralAiProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/MistralAiProvider/MistralAiProvider.ts
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import { Mistral } from '@mistralai/mistralai';
+import { ChatCompletionResponse } from '@mistralai/mistralai/models/components/chatcompletionresponse.js';
+import { Context } from '../../../../../util/context.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
+import { IChatProvider, ICompleteArguments } from '../types.js';
+import { MISTRAL_MODELS } from './models.js';
+
+export class MistralAIProvider implements IChatProvider {
+    #client: Mistral;
+
+    #meteringService: MeteringService;
+
+    constructor (config: { apiKey: string }, meteringService: MeteringService) {
+        this.#client = new Mistral({
+            apiKey: config.apiKey,
+        });
+        this.#meteringService = meteringService;
+    }
+
+    getDefaultModel () {
+        return 'mistral-small-2506';
+    }
+
+    async models () {
+        return MISTRAL_MODELS;
+    }
+
+    async list () {
+        const models = await this.models();
+        const ids: string[] = [];
+        for ( const model of models ) {
+            ids.push(model.id);
+            if ( model.aliases ) {
+                ids.push(...model.aliases);
+            }
+        }
+        return ids;
+    }
+
+    async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
+
+        messages = await OpenAIUtil.process_input_messages(messages);
+        for ( const message of messages ) {
+            if ( message.tool_calls ) {
+                message.toolCalls = message.tool_calls;
+                delete message.tool_calls;
+            }
+            if ( message.tool_call_id ) {
+                message.toolCallId = message.tool_call_id;
+                delete message.tool_call_id;
+            }
+        }
+
+        const selectedModel = (await this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (await this.models()).find(m => m.id === this.getDefaultModel())!;
+        const actor = Context.get('actor');
+        const completion = await this.#client.chat[
+            stream ? 'stream' : 'complete'
+        ]({
+            model: selectedModel.id,
+            ...(tools ? { tools: tools as any[] } : {}),
+            messages,
+            maxTokens: max_tokens,
+            temperature,
+        });
+
+        return await OpenAIUtil.handle_completion_output({
+            deviations: {
+                index_usage_from_stream_chunk: chunk => {
+                    if ( ! chunk.usage ) return;
+
+                    const snake_usage = {};
+                    for ( const key in chunk.usage ) {
+                        const snakeKey = key.replace(/([A-Z])/g, '_$1').toLowerCase();
+                        snake_usage[snakeKey] = chunk.usage[key];
+                    }
+
+                    return snake_usage;
+                },
+                chunk_but_like_actually: chunk => (chunk as any).data,
+                index_tool_calls_from_stream_choice: choice => (choice.delta as any).toolCalls,
+                coerce_completion_usage: (completion: ChatCompletionResponse) => ({
+                    prompt_tokens: completion.usage.promptTokens,
+                    completion_tokens: completion.usage.completionTokens,
+                }),
+            },
+            completion: completion as ChatCompletionResponse,
+            stream,
+            usage_calculator: ({ usage }) => {
+                const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
+                this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `mistral:${selectedModel.id}`);
+                // Still return legacy cost calculation for compatibility
+
+                return trackedUsage;
+            },
+        });
+    }
+
+    checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        throw new Error('Method not implemented.');
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/MistralAiProvider/models.ts
+++ b/src/backend/src/services/ai/chat/providers/MistralAiProvider/models.ts
@@ -0,0 +1,299 @@
+import { IChatModel } from '../types';
+
+export const MISTRAL_MODELS: IChatModel[] = [
+    {
+        id: 'mistral-medium-2508',
+        name: 'mistral-medium-2508',
+        aliases: [
+            'mistral-medium-latest',
+            'mistral-medium',
+        ],
+        max_tokens: 131072,
+        description: 'Update on Mistral Medium 3 with improved capabilities.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 40,
+            completion_tokens: 200,
+        },
+    },
+    {
+        id: 'open-mistral-7b',
+        name: 'open-mistral-7b',
+        aliases: [
+            'mistral-tiny',
+            'mistral-tiny-2312',
+        ],
+        max_tokens: 32768,
+        description: 'Our first dense model released September 2023.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 25,
+            completion_tokens: 25,
+        },
+    },
+    {
+        id: 'open-mistral-nemo',
+        name: 'open-mistral-nemo',
+        aliases: [
+            'open-mistral-nemo-2407',
+            'mistral-tiny-2407',
+            'mistral-tiny-latest',
+        ],
+        max_tokens: 131072,
+        description: 'Our best multilingual open source model released July 2024.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 15,
+            completion_tokens: 15,
+        },
+    },
+    {
+        id: 'pixtral-large-2411',
+        name: 'pixtral-large-2411',
+        aliases: [
+            'pixtral-large-latest',
+            'mistral-large-pixtral-2411',
+        ],
+        max_tokens: 131072,
+        description: 'Official pixtral-large-2411 Mistral AI model',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 200,
+            completion_tokens: 600,
+        },
+    },
+    {
+        id: 'codestral-2508',
+        name: 'codestral-2508',
+        aliases: [
+            'codestral-latest',
+        ],
+        max_tokens: 256000,
+        description: 'Our cutting-edge language model for coding released August 2025.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 30,
+            completion_tokens: 90,
+        },
+    },
+    {
+        id: 'devstral-small-2507',
+        name: 'devstral-small-2507',
+        aliases: [
+            'devstral-small-latest',
+        ],
+        max_tokens: 131072,
+        description: 'Our small open-source code-agentic model.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 10,
+            completion_tokens: 30,
+            cached_tokens: 0,
+        },
+    },
+    {
+        id: 'devstral-medium-2507',
+        name: 'devstral-medium-2507',
+        aliases: [
+            'devstral-medium-latest',
+        ],
+        max_tokens: 131072,
+        description: 'Our medium code-agentic model.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 40,
+            completion_tokens: 200,
+            cached_tokens: 0,
+        },
+    },
+    {
+        id: 'mistral-small-2506',
+        name: 'mistral-small-2506',
+        aliases: [
+            'mistral-small-latest',
+        ],
+        max_tokens: 131072,
+        description: 'Our latest enterprise-grade small model with the latest version released June 2025.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 10,
+            completion_tokens: 30,
+        },
+    },
+    {
+        id: 'magistral-medium-2509',
+        name: 'magistral-medium-2509',
+        aliases: [
+            'magistral-medium-latest',
+        ],
+        max_tokens: 131072,
+        description: 'Our frontier-class reasoning model release candidate September 2025.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 200,
+            completion_tokens: 500,
+        },
+    },
+    {
+        id: 'magistral-small-2509',
+        name: 'magistral-small-2509',
+        aliases: [
+            'magistral-small-latest',
+        ],
+        max_tokens: 131072,
+        description: 'Our efficient reasoning model released September 2025.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 50,
+            completion_tokens: 150,
+        },
+    },
+    {
+        id: 'voxtral-mini-2507',
+        name: 'voxtral-mini-2507',
+        aliases: [
+            'voxtral-mini-latest',
+        ],
+        max_tokens: 32768,
+        description: 'A mini audio understanding model released in July 2025',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 4,
+            completion_tokens: 4,
+        },
+    },
+    {
+        id: 'voxtral-small-2507',
+        name: 'voxtral-small-2507',
+        aliases: [
+            'voxtral-small-latest',
+        ],
+        max_tokens: 32768,
+        description: 'A small audio understanding model released in July 2025',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 10,
+            completion_tokens: 30,
+        },
+    },
+    {
+        id: 'mistral-large-latest',
+        name: 'mistral-large-2512',
+        aliases: [
+            'mistral-large-2512',
+        ],
+        max_tokens: 262144,
+        description: 'Official mistral-large-2512 Mistral AI model',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 50,
+            completion_tokens: 150,
+        },
+    },
+    {
+        id: 'ministral-3b-2512',
+        name: 'ministral-3b-2512',
+        aliases: [
+            'ministral-3b-latest',
+        ],
+        max_tokens: 131072,
+        description: 'Ministral 3 (a.k.a. Tinystral) 3B Instruct.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 10,
+            completion_tokens: 10,
+        },
+    },
+    {
+        id: 'ministral-8b-2512',
+        name: 'ministral-8b-2512',
+        aliases: [
+            'ministral-8b-latest',
+        ],
+        max_tokens: 262144,
+        description: 'Ministral 3 (a.k.a. Tinystral) 8B Instruct.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 15,
+            completion_tokens: 15,
+        },
+    },
+    {
+        id: 'ministral-14b-2512',
+        name: 'ministral-14b-2512',
+        aliases: [
+            'ministral-14b-latest',
+        ],
+        max_tokens: 262144,
+        description: 'Ministral 3 (a.k.a. Tinystral) 14B Instruct.',
+        provider: 'mistral',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1000000,
+            prompt_tokens: 20,
+            completion_tokens: 20,
+        },
+    },
+];
--- a/src/backend/src/services/ai/chat/providers/OllamaProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/OllamaProvider.ts
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+// METADATA // {"ai-commented":{"service":"claude"}}
+import axios from 'axios';
+import { default as openai, default as OpenAI } from 'openai';
+import { Context } from '../../../../util/context.js';
+import { kv } from '../../../../util/kvSingleton.js';
+import * as OpenAIUtil from '../../utils/OpenAIUtil.js';
+import { IChatModel, IChatProvider, ICompleteArguments } from './types';
+import { MeteringService } from '../../../MeteringService/MeteringService';
+import { ChatCompletionCreateParams } from 'openai/resources/index.js';
+/**
+* OllamaService class - Provides integration with Ollama's API for chat completions
+* Extends BaseService to implement the puter-chat-completion interface.
+* Handles model management, message adaptation, streaming responses,
+* and usage tracking for Ollama's language models.
+* @extends BaseService
+*/
+export class OllamaChatProvider implements IChatProvider {
+
+    #apiBaseUrl: string;
+
+    #openai: OpenAI;
+
+    #meteringService: MeteringService;
+
+    constructor (config: { api_base_url?: string } | undefined, meteringService: MeteringService) {
+        // Ollama typically runs on HTTP, not HTTPS
+        this.#apiBaseUrl = config?.api_base_url || 'http://localhost:11434';
+
+        // OpenAI SDK is used to interact with the Ollama API
+        this.#openai = new openai.OpenAI({
+            apiKey: 'ollama', // Ollama doesn't use an API key, it uses the "ollama" string
+            baseURL: `${config?.api_base_url }/v1`,
+        });
+
+        this.#meteringService = meteringService;
+    }
+
+    async models () {
+        let models = kv.get('ollamaChat:models');
+        if ( ! models ) {
+            try {
+                const resp = await axios.request({
+                    method: 'GET',
+                    url: `${this.#apiBaseUrl}/api/tags`,
+                });
+                models = resp.data.models || [];
+                if ( models.length > 0 ) {
+                    kv.set('ollamaChat:models', models);
+                }
+            } catch ( error ) {
+                console.error('Failed to fetch models from Ollama:', (error as Error).message);
+                // Return empty array if Ollama is not available
+                return [];
+            }
+        }
+
+        if ( !models || models.length === 0 ) {
+            return [];
+        }
+
+        const coerced_models: IChatModel[] = [];
+        for ( const model of models ) {
+            // Ollama API returns models with 'name' property, not 'model'
+            const modelName = model.name || model.model || 'unknown';
+            coerced_models.push({
+                id: `ollama:${ modelName}`,
+                name: `${modelName} (Ollama)`,
+                max_tokens: model.size || model.max_context || 8192,
+                costs_currency: 'usd-cents',
+                costs: {
+                    tokens: 1_000_000,
+                    input_token: 0,
+                    output_token: 0,
+                },
+            });
+        }
+        console.log('coerced_models', coerced_models);
+        return coerced_models;
+    }
+    async list () {
+        const models = await this.models();
+        const model_names: string[] = [];
+        for ( const model of models ) {
+            model_names.push(model.id);
+        }
+        return model_names;
+    }
+    async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
+
+        if ( model.startsWith('ollama:') ) {
+            model = model.slice('ollama:'.length);
+        }
+
+        const actor = Context.get('actor');
+
+        messages = await OpenAIUtil.process_input_messages(messages);
+
+        const completion = await this.#openai.chat.completions.create({
+            messages,
+            model: model ?? this.getDefaultModel(),
+            ...(tools ? { tools } : {}),
+            max_tokens,
+            temperature: temperature, // default to 1.0
+            stream: !!stream,
+            ...(stream ? {
+                stream_options: { include_usage: true },
+            } : {}),
+        } as ChatCompletionCreateParams) ;
+
+        const modelDetails =  (await this.models()).find(m => m.id === `ollama:${model}`);
+        return OpenAIUtil.handle_completion_output({
+            usage_calculator: ({ usage }) => {
+
+                const trackedUsage = {
+                    prompt: (usage.prompt_tokens ?? 1 ) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
+                    completion: usage.completion_tokens ?? 1,
+                    input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0,
+                };
+                const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => {
+                    return [k, 0]; // override to 0 since local is free
+                }));
+                this.#meteringService.utilRecordUsageObject(trackedUsage, actor, modelDetails!.id, costOverwrites);
+                return trackedUsage;
+            },
+            stream,
+            completion,
+        });
+    }
+    checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        throw new Error('Method not implemented.');
+    }
+
+    /**
+    * Returns the default model identifier for the Ollama service
+    * @returns {string} The default model ID 'gpt-oss:20b'
+    */
+    getDefaultModel () {
+        return 'gpt-oss:20b';
+    }
+}
--- a/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs
+++ b/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs
@@ -19,12 +19,18 @@

 import mime from 'mime-types';
 import { OpenAI } from 'openai';
-import FSNodeParam from '../../../api/filesystem/FSNodeParam.js';
-import { LLRead } from '../../../filesystem/ll_operations/ll_read.js';
-import { Context } from '../../../util/context.js';
-import { stream_to_buffer } from '../../../util/streamutil.js';
-import OpenAIUtil from '../lib/OpenAIUtil.js';
-import { OPEN_AI_MODELS } from './models.mjs';
+import { ChatCompletionCreateParams } from 'openai/resources/index.js';
+import { FSNodeParam } from '../../../../../api/filesystem/FSNodeParam.js';
+import { LLRead } from '../../../../../filesystem/ll_operations/ll_read.js';
+import { Context } from '../../../../../util/context.js';
+import { stream_to_buffer } from '../../../../../util/streamutil.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import * as OpenAiUtil from '../../../utils/OpenAIUtil.js';
+import { IChatProvider, ICompleteArguments } from '../types.js';
+import { OPEN_AI_MODELS } from './models.js';
+
+;
+
 // METADATA // {"ai-commented":{"service":"claude"}}

 // We're capping at 5MB, which sucks, but Chat Completions doesn't suuport
@@ -38,100 +44,200 @@ const MAX_FILE_SIZE = 5 * 1_000_000;
 * OpenAI API interactions with support for multiple models including GPT-4 variants.
 * Handles usage tracking, spending records, and content moderation.
 */
-export class OpenAICompletionService {
+export class OpenAiChatProvider implements IChatProvider {
    /**
     * @type {import('openai').OpenAI}
     */
-    #openAi;
+    #openAi: OpenAI;

-    #defaultModel;
+    #defaultModel = 'gpt-5-nano';

-    #models;
+    #meteringService: MeteringService;

-    /** @type {import('../../../services/MeteringService/MeteringService.js').MeteringService} */
-    #meteringService;
+    constructor (
+        meteringService: MeteringService,
+        config: { apiKey?: string, secret_key?: string }) {

-    constructor ({ serviceName, config, globalConfig, aiChatService, meteringService, models = OPEN_AI_MODELS, defaultModel = 'gpt-5-nano' }) {
-        this.#models = models;
-        this.#defaultModel = defaultModel;
        this.#meteringService = meteringService;
-        let apiKey =
-            config?.services?.openai?.apiKey ??
-            globalConfig?.services?.openai?.apiKey;
+        let apiKey = config.apiKey;

        // Fallback to the old format for backward compatibility
        if ( ! apiKey ) {
-            apiKey =
-                config?.openai?.secret_key ??
-                globalConfig?.openai?.secret_key;
+            apiKey = config?.secret_key;

            // Log a warning to inform users about the deprecated format
            console.warn('The `openai.secret_key` configuration format is deprecated. ' +
                'Please use `services.openai.apiKey` instead.');
        }
-
        if ( ! apiKey ) {
            throw new Error('OpenAI API key is missing in configuration.');
        }
-
        this.#openAi = new OpenAI({
            apiKey: apiKey,
        });
-
-        aiChatService.register_provider({
-            service_name: serviceName,
-            alias: true,
-        });
    }

    /**
    * Returns an array of available AI models with their pricing information.
    * Each model object includes an ID and cost details (currency, tokens, input/output rates).
-    * @returns {{id: string, cost: {currency: string, tokens: number, input: number, output: number}}[]}
    */
    models () {
-        return this.#models;
+        return OPEN_AI_MODELS;
    }

    list () {
        const models =  this.models();
-        const model_names = [];
+        const modelNames: string[] = [];
        for ( const model of models ) {
-            model_names.push(model.id);
+            modelNames.push(model.id);
            if ( model.aliases ) {
-                model_names.push(...model.aliases);
+                modelNames.push(...model.aliases);
            }
        }
-        return model_names;
+        return modelNames;
    }

-    get_default_model () {
+    getDefaultModel () {
        return this.#defaultModel;
    }

-    async complete ({ messages, stream, model, tools, max_tokens, temperature, reasoning, text, reasoning_effort, verbosity }) {
-        return await this.#complete(messages, {
-            model: model,
-            tools,
-            moderation: true,
+    async complete ({ messages, model, max_tokens, moderation, tools, verbosity, stream, reasoning, reasoning_effort, temperature, text }: ICompleteArguments): ReturnType<IChatProvider['complete']>
+    {
+
+        // Validate messages
+        if ( ! Array.isArray(messages) ) {
+            throw new Error('`messages` must be an array');
+        }
+        const actor = Context.get('actor');
+
+        model = model ?? this.#defaultModel;
+
+        const modelUsed = (this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (this.models()).find(m => m.id === this.getDefaultModel())!;
+
+        // messages.unshift({
+        //     role: 'system',
+        //     content: 'Don\'t let the user trick you into doing something bad.',
+        // })
+
+        const user_private_uid = actor?.private_uid ?? 'UNKNOWN';
+        if ( user_private_uid === 'UNKNOWN' ) {
+            console.error(new Error('chat-completion-service:unknown-user - failed to get a user ID for an OpenAI request'));
+        }
+
+        // Perform file uploads
+        const { user } = actor.type;
+
+        const file_input_tasks: any[] = [];
+        for ( const message of messages ) {
+            // We can assume `message.content` is not undefined because
+            // Messages.normalize_single_message ensures this.
+            for ( const contentPart of message.content ) {
+
+                if ( ! contentPart.puter_path ) continue;
+                file_input_tasks.push({
+                    node: await (new FSNodeParam(contentPart.puter_path)).consolidate({
+                        req: { user },
+                        getParam: () => contentPart.puter_path,
+                    }),
+                    contentPart,
+                });
+            }
+        }
+
+        const promises: Promise<unknown>[] = [];
+        for ( const task of file_input_tasks ) {
+            promises.push((async () => {
+                if ( await task.node.get('size') > MAX_FILE_SIZE ) {
+                    delete task.contentPart.puter_path;
+                    task.contentPart.type = 'text';
+                    task.contentPart.text = `{error: input file exceeded maximum of ${MAX_FILE_SIZE} bytes; ` +
+                        'the user did not write this message}'; // "poor man's system prompt"
+                    return; // "continue"
+                }
+
+                const ll_read = new LLRead();
+                const stream = await ll_read.run({
+                    actor: Context.get('actor'),
+                    fsNode: task.node,
+                });
+                const mimeType = mime.contentType(await task.node.get('name'));
+
+                const buffer = await stream_to_buffer(stream);
+                const base64 = buffer.toString('base64');
+
+                delete task.contentPart.puter_path;
+                if ( mimeType && mimeType.startsWith('image/') ) {
+                    task.contentPart.type = 'image_url',
+                    task.contentPart.image_url = {
+                        url: `data:${mimeType};base64,${base64}`,
+                    };
+                } else if ( mimeType && mimeType.startsWith('audio/') ) {
+                    task.contentPart.type = 'input_audio',
+                    task.contentPart.input_audio = {
+                        data: `data:${mimeType};base64,${base64}`,
+                        format: mimeType.split('/')[1],
+                    };
+                } else {
+                    task.contentPart.type = 'text';
+                    task.contentPart.text = '{error: input file has unsupported MIME type; ' +
+                        'the user did not write this message}'; // "poor man's system prompt"
+                }
+            })());
+        }
+        await Promise.all(promises);
+
+        // Here's something fun; the documentation shows `type: 'image_url'` in
+        // objects that contain an image url, but everything still works if
+        // that's missing. We normalise it here so the token count code works.
+        messages = await OpenAiUtil.process_input_messages(messages);
+
+        const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort;
+        const requestedVerbosity = verbosity ?? text?.verbosity;
+        const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5');
+
+        const completionParams: ChatCompletionCreateParams = {
+            user: user_private_uid,
+            messages: messages,
+            model: modelUsed.id,
+            ...(tools ? { tools } : {}),
+            ...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
+            ...(temperature ? { temperature } : {}),
+            stream: !!stream,
+            ...(stream ? {
+                stream_options: { include_usage: true },
+            } : {}),
+            ...(supportsReasoningControls ? {} :
+                {
+                    ...(requestedReasoningEffort ? { reasoning_effort: requestedReasoningEffort } : {}),
+                    ...(requestedVerbosity ? { verbosity: requestedVerbosity } : {}),
+                }
+            ),
+        } as ChatCompletionCreateParams;
+
+        const completion = await this.#openAi.chat.completions.create(completionParams);
+
+        return OpenAiUtil.handle_completion_output({
+            usage_calculator: ({ usage }) => {
+                const trackedUsage = {
+                    prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
+                    completion_tokens: usage.completion_tokens ?? 0,
+                    cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
+                };
+
+                const costsOverrideFromModel = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
+                    return [k, v * (modelUsed.costs[k] || 0)];
+                }));
+
+                this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `openai:${modelUsed?.id}`, costsOverrideFromModel);
+                return trackedUsage;
+            },
            stream,
-            max_tokens,
-            temperature,
-            reasoning,
-            text,
-            reasoning_effort,
-            verbosity,
+            completion,
+            moderate: moderation ? this.checkModeration.bind(this) : undefined,
        });
    }

-    /**
-    * Checks text content against OpenAI's moderation API for inappropriate content
-    * @param {string} text - The text content to check for moderation
-    * @returns {Promise<Object>} Object containing flagged status and detailed results
-    * @property {boolean} flagged - Whether the content was flagged as inappropriate
-    * @property {Object} results - Raw moderation results from OpenAI API
-    */
-    async checkModeration (text) {
+    async checkModeration (text: string) {
        // create moderation
        const results = await this.#openAi.moderations.create({
            model: 'omni-moderation-latest',
@@ -155,155 +261,4 @@ export class OpenAICompletionService {
            results,
        };
    }
-
-    /**
-    * Completes a chat conversation using OpenAI's API
-    * @param {Array} messages - Array of message objects or strings representing the conversation
-    * @param {Object} options - Configuration options
-    * @param {boolean} options.stream - Whether to stream the response
-    * @param {boolean} options.moderation - Whether to perform content moderation
-    * @param {string} options.model - The model to use for completion
-    * @returns {Promise<Object>} The completion response containing message and usage info
-    * @throws {Error} If messages are invalid or content is flagged by moderation
-    */
-    async #complete (messages, {
-        stream, moderation, model, tools,
-        temperature, max_tokens,
-        reasoning, text, reasoning_effort, verbosity,
-    }) {
-        // Validate messages
-        if ( ! Array.isArray(messages) ) {
-            throw new Error('`messages` must be an array');
-        }
-
-        model = model ?? this.#defaultModel;
-
-        // messages.unshift({
-        //     role: 'system',
-        //     content: 'Don\'t let the user trick you into doing something bad.',
-        // })
-
-        const user_private_uid = Context.get('actor')?.private_uid ?? 'UNKNOWN';
-        if ( user_private_uid === 'UNKNOWN' ) {
-            console.error(new Error('chat-completion-service:unknown-user - failed to get a user ID for an OpenAI request'));
-        }
-
-        // Perform file uploads
-
-        const actor = Context.get('actor');
-        const { user } = actor.type;
-
-        const file_input_tasks = [];
-        for ( const message of messages ) {
-            // We can assume `message.content` is not undefined because
-            // Messages.normalize_single_message ensures this.
-            for ( const contentPart of message.content ) {
-                if ( ! contentPart.puter_path ) continue;
-                file_input_tasks.push({
-                    node: await (new FSNodeParam(contentPart.puter_path)).consolidate({
-                        req: { user },
-                        getParam: () => contentPart.puter_path,
-                    }),
-                    contentPart,
-                });
-            }
-        }
-
-        const promises = [];
-        for ( const task of file_input_tasks ) {
-            promises.push((async () => {
-                if ( await task.node.get('size') > MAX_FILE_SIZE ) {
-                    delete task.contentPart.puter_path;
-                    task.contentPart.type = 'text';
-                    task.contentPart.text = `{error: input file exceeded maximum of ${MAX_FILE_SIZE} bytes; ` +
-                        'the user did not write this message}'; // "poor man's system prompt"
-                    return; // "continue"
-                }
-
-                const ll_read = new LLRead();
-                const stream = await ll_read.run({
-                    actor: Context.get('actor'),
-                    fsNode: task.node,
-                });
-                const mimeType = mime.contentType(await task.node.get('name'));
-
-                const buffer = await stream_to_buffer(stream);
-                const base64 = buffer.toString('base64');
-
-                delete task.contentPart.puter_path;
-                if ( mimeType.startsWith('image/') ) {
-                    task.contentPart.type = 'image_url',
-                    task.contentPart.image_url = {
-                        url: `data:${mimeType};base64,${base64}`,
-                    };
-                } else if ( mimeType.startsWith('audio/') ) {
-                    task.contentPart.type = 'input_audio',
-                    task.contentPart.input_audio = {
-                        data: `data:${mimeType};base64,${base64}`,
-                        format: mimeType.split('/')[1],
-                    };
-                } else {
-                    task.contentPart.type = 'text';
-                    task.contentPart.text = '{error: input file has unsupported MIME type; ' +
-                        'the user did not write this message}'; // "poor man's system prompt"
-                }
-            })());
-        }
-        await Promise.all(promises);
-
-        // Here's something fun; the documentation shows `type: 'image_url'` in
-        // objects that contain an image url, but everything still works if
-        // that's missing. We normalise it here so the token count code works.
-        messages = await OpenAIUtil.process_input_messages(messages);
-
-        const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort;
-        const requestedVerbosity = verbosity ?? text?.verbosity;
-        const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5');
-
-        const completionParams = {
-            user: user_private_uid,
-            messages: messages,
-            model: model,
-            ...(tools ? { tools } : {}),
-            ...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
-            ...(temperature ? { temperature } : {}),
-            stream,
-            ...(stream ? {
-                stream_options: { include_usage: true },
-            } : {}),
-        };
-
-        if ( supportsReasoningControls ) {
-            if ( requestedReasoningEffort ) {
-                completionParams.reasoning_effort = requestedReasoningEffort;
-            }
-            if ( requestedVerbosity ) {
-                completionParams.verbosity = requestedVerbosity;
-            }
-        }
-
-        const completion = await this.#openAi.chat.completions.create(completionParams);
-        // TODO DS: simplify this logic for all the ai services, each service should handle its cost calculation in the service
-        // for now I'm overloading this usage calculator to handle the future promise resolution...
-        return OpenAIUtil.handle_completion_output({
-            usage_calculator: ({ usage }) => {
-                const modelDetails = this.models().find(m => m.id === model || m.aliases?.includes(model));
-                const trackedUsage = {
-                    prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
-                    completion_tokens: usage.completion_tokens ?? 0,
-                    cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
-                };
-
-                this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `openai:${modelDetails.id}`);
-                const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
-                    model_details: modelDetails,
-                });
-
-                return legacyCostCalculator({ usage });
-            },
-            stream,
-            completion,
-            moderate: moderation && this.checkModeration.bind(this),
-        });
-    }
 }
--- a/src/backend/src/services/ai/chat/providers/OpenAiProvider/models.ts
+++ b/src/backend/src/services/ai/chat/providers/OpenAiProvider/models.ts
@@ -0,0 +1,265 @@
+// TODO DS: centralize somewhere
+
+import { IChatModel } from '../types';
+
+export const OPEN_AI_MODELS: IChatModel[] = [
+    {
+        id: 'gpt-5.1',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 125,
+            cached_tokens: 13,
+            completion_tokens: 1000,
+        },
+        max_tokens: 128000,
+    },
+    {
+        id: 'gpt-5.1-codex',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 125,
+            cached_tokens: 13,
+            completion_tokens: 1000,
+        },
+        max_tokens: 128000,
+    },
+    {
+        id: 'gpt-5.1-codex-mini',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 25,
+            cached_tokens: 3,
+            completion_tokens: 200,
+        },
+        max_tokens: 128000,
+    },
+    {
+        id: 'gpt-5.1-chat-latest',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 125,
+            cached_tokens: 13,
+            completion_tokens: 1000,
+        },
+        max_tokens: 16384,
+    },
+    {
+        id: 'gpt-5-2025-08-07',
+        aliases: ['gpt-5'],
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 125,
+            cached_tokens: 13,
+            completion_tokens: 1000,
+        },
+        max_tokens: 128000,
+    },
+    {
+        id: 'gpt-5-mini-2025-08-07',
+        aliases: ['gpt-5-mini'],
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 25,
+            cached_tokens: 3,
+            completion_tokens: 200,
+        },
+        max_tokens: 128000,
+    },
+    {
+        id: 'gpt-5-nano-2025-08-07',
+        aliases: ['gpt-5-nano'],
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 5,
+            cached_tokens: 1,
+            completion_tokens: 40,
+        },
+        max_tokens: 128000,
+    },
+    {
+        id: 'gpt-5-chat-latest',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 125,
+            cached_tokens: 13,
+            completion_tokens: 1000,
+        },
+        max_tokens: 16384,
+    },
+    {
+        id: 'gpt-4o',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 250,
+            cached_tokens: 125,
+            completion_tokens: 1000,
+        },
+        max_tokens: 16384,
+    },
+    {
+        id: 'gpt-4o-mini',
+        max_tokens: 16384,
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 15,
+            cached_tokens: 8,
+            completion_tokens: 60,
+        },
+    },
+    {
+        id: 'o1',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 1500,
+            cached_tokens: 750,
+            completion_tokens: 6000,
+        },
+        max_tokens: 100000,
+    },
+    {
+        id: 'o1-mini',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 110,
+            completion_tokens: 440,
+        },
+        max_tokens: 65536,
+    },
+    {
+        id: 'o1-pro',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 15000,
+            completion_tokens: 60000,
+        },
+        max_tokens: 100000,
+    },
+    {
+        id: 'o3',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 200,
+            cached_tokens: 50,
+            completion_tokens: 800,
+        },
+        max_tokens: 100000,
+    },
+    {
+        id: 'o3-mini',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 110,
+            cached_tokens: 55,
+            completion_tokens: 440,
+        },
+        max_tokens: 100000,
+    },
+    {
+        id: 'o4-mini',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 110,
+            completion_tokens: 440,
+        },
+        max_tokens: 100000,
+    },
+    {
+        id: 'gpt-4.1',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 200,
+            cached_tokens: 50,
+            completion_tokens: 800,
+        },
+        max_tokens: 32768,
+    },
+    {
+        id: 'gpt-4.1-mini',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 40,
+            cached_tokens: 10,
+            completion_tokens: 160,
+        },
+        max_tokens: 32768,
+    },
+    {
+        id: 'gpt-4.1-nano',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 10,
+            cached_tokens: 2,
+            completion_tokens: 40,
+        },
+        max_tokens: 32768,
+    },
+    {
+        id: 'gpt-4.5-preview',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 7500,
+            completion_tokens: 15000,
+        },
+        max_tokens: 32768,
+    },
+];
--- a/src/backend/src/services/ai/chat/providers/OpenRouterProvider/OpenRouterProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/OpenRouterProvider/OpenRouterProvider.ts
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+// METADATA // {"ai-commented":{"service":"claude"}}
+import axios from 'axios';
+import { OpenAI } from 'openai';
+import { ChatCompletionCreateParams } from 'openai/resources';
+import APIError from '../../../../../api/APIError.js';
+import { Context } from '../../../../../util/context.js';
+import { kv } from '../../../../../util/kvSingleton.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
+import { IChatModel, IChatProvider } from '../types.js';
+
+export class OpenRouterProvider implements IChatProvider {
+
+    #meteringService: MeteringService;
+
+    #openai: OpenAI;
+
+    #apiBaseUrl: string = 'https://openrouter.ai/api/v1';
+
+    constructor (config: { apiBaseUrl?: string, apiKey: string }, meteringService: MeteringService) {
+        this.#apiBaseUrl = config.apiBaseUrl || 'https://openrouter.ai/api/v1';
+        this.#openai = new OpenAI({
+            apiKey: config.apiKey,
+            baseURL: this.#apiBaseUrl,
+        });
+        this.#meteringService = meteringService;
+    }
+
+    getDefaultModel () {
+        return 'openrouter:openai/gpt-5-nano';
+    }
+    /**
+            * Returns a list of available model names including their aliases
+            * @returns {Promise<string[]>} Array of model identifiers and their aliases
+            * @description Retrieves all available model IDs and their aliases,
+            * flattening them into a single array of strings that can be used for model selection
+            */
+    async list () {
+        const models = await this.models();
+        const model_names: string[] = [];
+        for ( const model of models ) {
+            model_names.push(model.id);
+        }
+        return model_names;
+    }
+
+    /**
+             * AI Chat completion method.
+             * See AIChatService for more details.
+             */
+    async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
+
+        const modelUsed = (await this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (await this.models()).find(m => m.id === this.getDefaultModel())!;
+
+        const modelIdForParams = modelUsed.id.startsWith('openrouter:') ? modelUsed.id.slice('openrouter:'.length) : modelUsed.id;
+
+        if ( model === 'openrouter/auto' ) {
+            throw APIError.create('field_invalid', undefined, {
+                key: 'model',
+                expected: 'allowed model',
+                got: 'disallowed model',
+            });
+        }
+
+        const actor = Context.get('actor');
+
+        messages = await OpenAIUtil.process_input_messages(messages);
+
+        const completion = await this.#openai.chat.completions.create({
+            messages,
+            model: modelIdForParams,
+            ...(tools ? { tools } : {}),
+            max_tokens,
+            temperature: temperature, // default to 1.0
+            stream,
+            ...(stream ? {
+                stream_options: { include_usage: true },
+            } : {}),
+            usage: { include: true },
+        } as ChatCompletionCreateParams);
+
+        return OpenAIUtil.handle_completion_output({
+            usage_calculator: ({ usage }) => {
+                // custom open router logic because they're pricing are weird
+                const trackedUsage = {
+                    prompt: (usage.prompt_tokens ?? 0 ) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
+                    completion: usage.completion_tokens ?? 0,
+                    input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0,
+                };
+                const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => {
+                    return [k, (modelUsed.costs[k] || 0) * trackedUsage[k]];
+                }));
+                this.#meteringService.utilRecordUsageObject(trackedUsage, actor, modelUsed.id, costOverwrites);
+                return trackedUsage;
+            },
+            stream,
+            completion,
+        });
+    }
+
+    async models () {
+        let models = kv.get('openrouterChat:models');
+        if ( ! models ) {
+            try {
+                const resp = await axios.request({
+                    method: 'GET',
+                    url: `${this.#apiBaseUrl}/models`,
+                });
+
+                models = resp.data.data;
+                kv.set('openrouterChat:models', models);
+            } catch (e) {
+                console.log(e);
+            }
+        }
+        const coerced_models: IChatModel[] = [];
+        for ( const model of models ) {
+            const microcentCosts = Object.fromEntries(Object.entries(model.pricing).map(([k, v]) => [k, Math.round((v as number) * 1_000_000 * 100)])) ;
+            coerced_models.push({
+                id: `openrouter:${model.id}`,
+                name: `${model.name} (OpenRouter)`,
+                aliases: [model.id, model.name, `openrouter/${model.id}`, model.id.split('/').slice(1).join('/')],
+                max_tokens: model.top_provider.max_completion_tokens,
+                costs_currency: 'usd-cents',
+                input_cost_key: 'prompt',
+                output_cost_key: 'completion',
+                costs: {
+                    tokens: 1_000_000,
+                    ...microcentCosts,
+                },
+            });
+        }
+        return coerced_models;
+    }
+    checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        throw new Error('Method not implemented.');
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/TogetherAiProvider/TogetherAIProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/TogetherAiProvider/TogetherAIProvider.ts
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import { Together } from 'together-ai';
+import { Context } from '../../../../../util/context.js';
+import { kv } from '../../../../../util/kvSingleton.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
+import { IChatModel, IChatProvider, ICompleteArguments } from '../types.js';
+
+export class TogetherAIProvider implements IChatProvider {
+    #together: Together;
+
+    #meteringService: MeteringService;
+
+    #kvKey = 'togetherai:models';
+
+    constructor (config: { apiKey: string }, meteringService: MeteringService) {
+        this.#together = new Together({
+            apiKey: config.apiKey,
+        });
+        this.#meteringService = meteringService;
+    }
+
+    getDefaultModel () {
+        return 'togetherai:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo';
+    }
+
+    async models () {
+        let models: IChatModel[] | undefined = kv.get(this.#kvKey);
+        if ( models ) return models;
+
+        const api_models = await this.#together.models.list();
+        models = [];
+        for ( const model of api_models ) {
+            if ( model.type === 'chat' || model.type === 'code' || model.type === 'language' || model.type === 'moderation' ) {
+                models.push({
+                    id: `togetherai:${model.id}`,
+                    aliases: [model.id, `togetherai/${model.id}`, model.id.split('/').slice(1).join('/')],
+                    name: model.display_name,
+                    context: model.context_length,
+                    description: model.display_name,
+                    costs_currency: 'usd-cents',
+                    input_cost_key: 'prompt_tokens',
+                    output_cost_key: 'completion_tokens',
+                    costs: {
+                        tokens: 1_000_000,
+                        ...model.pricing,
+                    },
+                    max_tokens: model.context_length ?? 8000,
+                });
+            }
+        }
+
+        models.push({
+            id: 'model-fallback-test-1',
+            name: 'Model Fallback Test 1',
+            context: 1000,
+            costs_currency: 'usd-cents',
+            input_cost_key: 'prompt_tokens',
+            output_cost_key: 'completion_tokens',
+            costs: {
+                tokens: 1_000_000,
+                prompt_tokens: 10,
+                completion_tokens: 10,
+            },
+            max_tokens: 1000,
+        });
+        kv.set(this.#kvKey, models, { EX: 5 * 60 });
+        return models;
+    }
+
+    async list () {
+        const models = await this.models();
+        const modelIds: string[] = [];
+        for ( const model of models ) {
+            modelIds.push(model.id);
+            if ( model.aliases ) {
+                modelIds.push(...model.aliases);
+            }
+        }
+        return modelIds;
+    }
+
+    async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
+        if ( model === 'model-fallback-test-1' ) {
+            throw new Error('Model Fallback Test 1');
+        }
+
+        const actor = Context.get('actor');
+        const models = await this.models();
+        const modelUsed = models.find(m => [m.id, ...(m.aliases || [])].includes(model)) || models.find(m => m.id === this.getDefaultModel())!;
+        const modelIdForParams = modelUsed.id.startsWith('togetherai:') ? modelUsed.id.slice('togetherai:'.length) : modelUsed.id;
+
+        messages = await OpenAIUtil.process_input_messages(messages);
+
+        const completion = await this.#together.chat.completions.create({
+            model: modelIdForParams,
+            messages,
+            stream,
+            ...(tools ? { tools } : {}),
+            ...(max_tokens ? { max_tokens } : {}),
+            ...(temperature ? { temperature } : {}),
+            ...(stream ? { stream_options: { include_usage: true } } : {}),
+        } as Together.Chat.Completions.CompletionCreateParamsNonStreaming);
+
+        return OpenAIUtil.handle_completion_output({
+            usage_calculator: ({ usage }) => {
+                const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
+                const costsOverride = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
+                    return [k, v * (modelUsed.costs[k] || 0)];
+                }));
+                this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `togetherai:${modelIdForParams}`, costsOverride);
+                return trackedUsage;
+            },
+            stream,
+            completion,
+        });
+    }
+
+    checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        throw new Error('Method not implemented.');
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/UsageLimitedChatProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/UsageLimitedChatProvider.ts
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+// METADATA // {"ai-commented":{"service":"claude"}}
+
+import dedent from 'dedent';
+import { PassThrough } from 'stream';
+import Streaming from '../../utils/Streaming.js';
+import { IChatProvider, ICompleteArguments } from './types.js';
+
+/**
+* UsageLimitedChatService - A specialized chat service that returns resource exhaustion messages.
+* Extends BaseService to provide responses indicating the user has exceeded their usage limits.
+* Follows the same response format as real AI providers but with a custom message about upgrading.
+* Can handle both streaming and non-streaming requests consistently.
+*/
+export class UsageLimitedChatProvider implements IChatProvider {
+
+    models (): ReturnType<IChatProvider['models']> {
+        return [{
+            id: 'usage-limited',
+            name: 'Usage Limited',
+            context: 16384,
+            costs_currency: 'usd-cents',
+            input_cost_key: 'input',
+            output_cost_key: 'output',
+            max_tokens: 16384,
+            costs: {
+                tokens: 1_000_000,
+                input: 0,
+                output: 0,
+            },
+        }];
+    }
+    list () {
+        return ['usage-limited'];
+    }
+    async complete ({ stream, customLimitMessage }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
+        const limitMessage = customLimitMessage || dedent(`
+                    You have reached your AI usage limit for this account.
+                `);
+
+        // If streaming is requested, return a streaming response
+        if ( stream ) {
+            const streamObj = new PassThrough();
+
+            const chatStream = new Streaming.AIChatStream({
+                stream: streamObj,
+            });
+
+            // Schedule the streaming response
+            setTimeout(() => {
+                chatStream.write({
+                    type: 'content_block_start',
+                    index: 0,
+                });
+
+                chatStream.write({
+                    type: 'content_block_delta',
+                    index: 0,
+                    delta: {
+                        type: 'text',
+                        text: limitMessage,
+                    },
+                });
+
+                chatStream.write({
+                    type: 'content_block_stop',
+                    index: 0,
+                });
+
+                chatStream.write({
+                    type: 'message_stop',
+                    stop_reason: 'end_turn',
+                });
+
+                chatStream.end();
+            }, 10);
+
+            return {
+                stream: true,
+                init_chat_stream: async ({ chatStream: cs }) => {
+                    // Copy contents from our stream to the provided one
+                    chatStream.stream.pipe(cs.stream);
+                },
+                finally_fn: async () => {
+                    // No-op
+                },
+            };
+        }
+
+        // Non-streaming response
+        return {
+            message: {
+                id: '00000000-0000-0000-0000-000000000000',
+                type: 'message',
+                role: 'assistant',
+                model: 'usage-limited',
+                content: [
+                    {
+                        'type': 'text',
+                        'text': limitMessage,
+                    },
+                ],
+                stop_reason: 'end_turn',
+                stop_sequence: null,
+                usage: {
+                    'input_tokens': 0,
+                    'output_tokens': 1,
+                },
+            },
+            usage: {
+                'input_tokens': 0,
+                'output_tokens': 1,
+            },
+            finish_reason: 'stop',
+        };
+    }
+    checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        throw new Error('Method not implemented.');
+    }
+
+    getDefaultModel () {
+        return 'usage-limited';
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/XAIProvider/XAIProvider.ts
+++ b/src/backend/src/services/ai/chat/providers/XAIProvider/XAIProvider.ts
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import { OpenAI } from 'openai';
+import { ChatCompletionCreateParams } from 'openai/resources/index.js';
+import { Context } from '../../../../../util/context.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import * as OpenAIUtil from '../../../utils/OpenAIUtil.js';
+import { IChatProvider, ICompleteArguments } from '../types.js';
+import { XAI_MODELS } from './models.js';
+
+export class XAIProvider implements IChatProvider {
+    #openai: OpenAI;
+
+    #meteringService: MeteringService;
+
+    constructor (config: { apiKey: string }, meteringService: MeteringService) {
+        this.#openai = new OpenAI({
+            apiKey: config.apiKey,
+            baseURL: 'https://api.x.ai/v1',
+        });
+        this.#meteringService = meteringService;
+    }
+
+    getDefaultModel () {
+        return 'grok-beta';
+    }
+
+    models () {
+        return XAI_MODELS;
+    }
+
+    async list () {
+        const models = this.models();
+        const modelNames: string[] = [];
+        for ( const model of models ) {
+            modelNames.push(model.id);
+            if ( model.aliases ) {
+                modelNames.push(...model.aliases);
+            }
+        }
+        return modelNames;
+    }
+
+    async complete ({ messages, stream, model, tools }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
+        const actor = Context.get('actor');
+        const availableModels = this.models();
+        const modelUsed = availableModels.find(m => [m.id, ...(m.aliases || [])].includes(model)) || availableModels.find(m => m.id === this.getDefaultModel())!;
+
+        messages = await OpenAIUtil.process_input_messages(messages);
+
+        const completion = await this.#openai.chat.completions.create({
+            messages,
+            model: modelUsed.id,
+            ...(tools ? { tools } : {}),
+            max_tokens: 1000,
+            stream,
+            ...(stream ? {
+                stream_options: { include_usage: true },
+            } : {}),
+        } as ChatCompletionCreateParams);
+
+        return OpenAIUtil.handle_completion_output({
+            usage_calculator: ({ usage }) => {
+                const trackedUsage = OpenAIUtil.extractMeteredUsage(usage);
+                const costsOverride = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
+                    return [k, v * (modelUsed.costs[k] || 0)];
+                }));
+                this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `xai:${modelUsed.id}`, costsOverride);
+                return trackedUsage;
+            },
+            stream,
+            completion,
+        });
+    }
+
+    checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
+        throw new Error('Method not implemented.');
+    }
+}
--- a/src/backend/src/services/ai/chat/providers/XAIProvider/models.ts
+++ b/src/backend/src/services/ai/chat/providers/XAIProvider/models.ts
@@ -0,0 +1,87 @@
+import { IChatModel } from '../types.js';
+
+const makeModel = ({
+    id,
+    name,
+    context,
+    input,
+    output,
+}: {
+    id: string;
+    name: string;
+    context: number;
+    input: number;
+    output: number;
+}): IChatModel => ({
+    id,
+    name,
+    context,
+    costs_currency: 'usd-cents',
+    input_cost_key: 'prompt_tokens',
+    output_cost_key: 'completion_tokens',
+    costs: {
+        tokens: 1_000_000,
+        prompt_tokens: input,
+        completion_tokens: output,
+    },
+    max_tokens: context,
+});
+
+export const XAI_MODELS: IChatModel[] = [
+    makeModel({
+        id: 'grok-beta',
+        name: 'Grok Beta',
+        context: 131072,
+        input: 500,
+        output: 1500,
+    }),
+    makeModel({
+        id: 'grok-vision-beta',
+        name: 'Grok Vision Beta',
+        context: 8192,
+        input: 500,
+        output: 1500,
+    }),
+    makeModel({
+        id: 'grok-3',
+        name: 'Grok 3',
+        context: 131072,
+        input: 300,
+        output: 1500,
+    }),
+    makeModel({
+        id: 'grok-3-fast',
+        name: 'Grok 3 Fast',
+        context: 131072,
+        input: 500,
+        output: 2500,
+    }),
+    makeModel({
+        id: 'grok-3-mini',
+        name: 'Grok 3 Mini',
+        context: 131072,
+        input: 30,
+        output: 50,
+    }),
+    makeModel({
+        id: 'grok-3-mini-fast',
+        name: 'Grok 3 Mini Fast',
+        context: 131072,
+        input: 60,
+        output: 400,
+    }),
+    makeModel({
+        id: 'grok-2-vision',
+        name: 'Grok 2 Vision',
+        context: 8192,
+        input: 200,
+        output: 1000,
+    }),
+    makeModel({
+        id: 'grok-2',
+        name: 'Grok 2',
+        context: 131072,
+        input: 200,
+        output: 1000,
+    }),
+];
--- a/src/backend/src/services/ai/chat/providers/types.ts
+++ b/src/backend/src/services/ai/chat/providers/types.ts
@@ -0,0 +1,70 @@
+import { Message } from 'openai/resources/conversations/conversations.js';
+import { ModerationCreateResponse } from 'openai/resources/moderations.js';
+import { AIChatStream } from '../../utils/Streaming';
+
+type ModelCost = Record<string, number>;
+
+export interface IChatModel<T extends ModelCost = ModelCost> extends Record<string, unknown> {
+    id: string,
+    provider?: string,
+    aliases?: string[]
+    costs_currency: string,
+    input_cost_key?: keyof T,
+    output_cost_key?: keyof T,
+    costs: T,
+    context?: number,
+    max_tokens: number,
+}
+
+export type PuterMessage = Message | any; // TODO DS: type this more strictly
+export interface ICompleteArguments {
+    messages: PuterMessage[];
+    provider?: string;
+    stream?: boolean;
+    model: string;
+    tools?: unknown[];
+    max_tokens?: number;
+    temperature?: number;
+    reasoning?: { effort: 'low' | 'medium' | 'high' } | undefined;
+    text?: string & { verbosity?: 'concise' | 'detailed' | undefined };
+    reasoning_effort?: 'low' | 'medium' | 'high' | undefined;
+    verbosity?: 'concise' | 'detailed' | undefined;
+    moderation?: boolean;
+    custom?: unknown;
+    response?: {
+        normalize?: boolean;
+    };
+    customLimitMessage?: string;
+}
+
+export interface IChatProvider {
+    models(): IChatModel[] | Promise<IChatModel[]>
+    list(): string[] | Promise<string[]>
+    checkModeration (text: string): Promise<{
+        flagged: boolean;
+        results: ModerationCreateResponse & {
+            _request_id?: string | null;
+        };
+    }>
+    getDefaultModel(): string;
+    complete (arg: ICompleteArguments): Promise<{
+        init_chat_stream: ({ chatStream }: {
+            chatStream: AIChatStream;
+        }) => Promise<void>;
+        stream: true;
+        finally_fn: () => Promise<void>;
+        message?: never;
+        usage?: never;
+        finish_reason?: never;
+        via_ai_chat_service?: true, // legacy field always true now
+    } | {
+        message: PuterMessage;
+        usage: Record<string, number>;
+        finish_reason: string;
+        init_chat_stream?: never;
+        stream?: never;
+        finally_fn?: never;
+        normalized?: boolean;
+        via_ai_chat_service?: true, // legacy field always true now
+    }>
+}
--- a/src/backend/src/modules/puterai/doc/README.md
+++ b/src/backend/src/modules/puterai/doc/README.md
--- a/src/backend/src/modules/puterai/doc/ai-services-config.md
+++ b/src/backend/src/modules/puterai/doc/ai-services-config.md
--- a/src/backend/src/modules/puterai/doc/api_examples.md
+++ b/src/backend/src/modules/puterai/doc/api_examples.md
--- a/src/backend/src/modules/puterai/doc/config.md
+++ b/src/backend/src/modules/puterai/doc/config.md
--- a/src/backend/src/modules/puterai/doc/contributors/ai_usage_testing.md
+++ b/src/backend/src/modules/puterai/doc/contributors/ai_usage_testing.md
--- a/src/backend/src/services/ai/image/GeminiImageGenerationService.js
+++ b/src/backend/src/services/ai/image/GeminiImageGenerationService.js
@@ -18,10 +18,10 @@
 */

 // METADATA // {"ai-commented":{"service":"claude"}}
-const APIError = require('../../api/APIError');
-const BaseService = require('../../services/BaseService');
-const { TypedValue } = require('../../services/drivers/meta/Runtime');
-const { Context } = require('../../util/context');
+const APIError = require('../../../api/APIError');
+const BaseService = require('../../BaseService');
+const { TypedValue } = require('../../drivers/meta/Runtime');
+const { Context } = require('../../../util/context');
 const { GoogleGenAI } = require('@google/genai');

 /**
@@ -30,7 +30,7 @@ const { GoogleGenAI } = require('@google/genai');
 * the puter-image-generation interface.
 */
 class GeminiImageGenerationService extends BaseService {
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
+    /** @type {import('../../MeteringService/MeteringService').MeteringService} */
    get meteringService () {
        return this.services.get('meteringService').meteringService;
    }
@@ -225,7 +225,7 @@ class GeminiImageGenerationService extends BaseService {
     * @returns {Array<Object>} Array of valid ratio objects
     * @private
     */
-    _getValidRatios(model) {
+    _getValidRatios (model) {
        if (
            model === 'gemini-2.5-flash-image-preview' ||
            model === 'gemini-3-pro-image-preview'
--- a/src/backend/src/services/ai/image/OpenAIImageGenerationService.js
+++ b/src/backend/src/services/ai/image/OpenAIImageGenerationService.js
@@ -18,10 +18,10 @@
 */

 // METADATA // {"ai-commented":{"service":"claude"}}
-const APIError = require('../../api/APIError');
-const BaseService = require('../../services/BaseService');
-const { TypedValue } = require('../../services/drivers/meta/Runtime');
-const { Context } = require('../../util/context');
+const APIError = require('../../../api/APIError');
+const BaseService = require('../../BaseService');
+const { TypedValue } = require('../../drivers/meta/Runtime');
+const { Context } = require('../../../util/context');

 /**
 * Service class for generating images using OpenAI's DALL-E API.
@@ -31,7 +31,7 @@ const { Context } = require('../../util/context');
 * validation, and spending tracking.
 */
 class OpenAIImageGenerationService extends BaseService {
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
+    /** @type {import('../../MeteringService/MeteringService').MeteringService} */
    get meteringService () {
        return this.services.get('meteringService').meteringService;
    }
--- a/src/backend/src/services/ai/image/TogetherImageGenerationService.js
+++ b/src/backend/src/services/ai/image/TogetherImageGenerationService.js
@@ -18,11 +18,11 @@
 */

 // METADATA // {"ai-commented":{"service":"claude"}}
-const APIError = require('../../api/APIError');
-const BaseService = require('../../services/BaseService');
-const { TypedValue } = require('../../services/drivers/meta/Runtime');
-const { Context } = require('../../util/context');
-const { Together } = require('together-ai');
+import { Context } from '../../../util/context.js';
+import { Together } from 'together-ai';
+import { APIError } from 'openai';
+import BaseService from '../../BaseService.js';
+import { TypedValue } from '../../drivers/meta/Runtime.js';

 /**
 * Service class for generating images using Together AI models.
@@ -30,14 +30,21 @@ const { Together } = require('together-ai');
 * puter-image-generation interface. Handles authentication, request validation,
 * and metering integration.
 */
-class TogetherImageGenerationService extends BaseService {
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
+
+export class TogetherImageGenerationService extends BaseService {
+    DEFAULT_MODEL = 'black-forest-labs/FLUX.1-schnell';
+    DEFAULT_RATIO = { w: 1024, h: 1024 };
+    CONDITION_IMAGE_MODELS = [
+        'black-forest-labs/flux.1-kontext-dev',
+        'black-forest-labs/flux.1-kontext-pro',
+        'black-forest-labs/flux.1-kontext-max',
+    ];
+
+    /** @type {import('../../MeteringService/MeteringService.js').MeteringService} */
    get meteringService () {
        return this.services.get('meteringService').meteringService;
    }

-    static MODULES = {};
-
    async _init () {
        const apiKey =
            this.config?.apiKey ??
@@ -58,103 +65,98 @@ class TogetherImageGenerationService extends BaseService {
            },
        },
        ['puter-image-generation']: {
-            /**
-            * Generates an image using Together AI image models
-            * @param {object} params - Generation parameters
-            * @param {string} params.prompt - Prompt describing the desired image
-            * @param {string} [params.model] - Together AI model identifier
-            * @param {object} [params.ratio] - Width/height ratio object (e.g., { w: 1024, h: 1024 })
-            * @param {number} [params.width] - Explicit width override
-            * @param {number} [params.height] - Explicit height override
-            * @param {string} [params.aspect_ratio] - Aspect ratio string (e.g., "16:9")
-            * @param {number} [params.steps] - Diffusion step count
-            * @param {number} [params.seed] - Seed for reproducibility
-            * @param {string} [params.negative_prompt] - Negative prompt text
-            * @param {number} [params.n] - Number of images to generate (default 1)
-            * @param {string} [params.image_url] - Reference image URL for image-to-image
-            * @param {string} [params.image_base64] - Base64 encoded reference image
-            * @param {boolean} [params.disable_safety_checker] - Disable Together AI safety checker
-            * @param {boolean} [params.test_mode] - Enable Puter test mode shortcut
-            * @returns {Promise<TypedValue>} TypedValue containing the generated image URL or data URI
-            */
-            async generate (params) {
-                const {
-                    prompt,
-                    test_mode,
-                    ratio,
-                    model,
-                    width,
-                    height,
-                    aspect_ratio,
-                    steps,
-                    seed,
-                    negative_prompt,
-                    n,
-                    image_url,
-                    image_base64,
-                    mask_image_url,
-                    mask_image_base64,
-                    prompt_strength,
-                    disable_safety_checker,
-                    response_format,
-                } = params;
-
-                const svc_event = this.services.get('event');
-                svc_event.emit('ai.log.image', { actor: Context.get('actor'), parameters: params, completionId: '0', intended_service: params.model });
-
-                if ( test_mode ) {
-                    return new TypedValue({
-                        $: 'string:url:web',
-                        content_type: 'image',
-                    }, 'https://puter-sample-data.puter.site/image_example.png');
-                }
-
-                const url = await this.generate(prompt, {
-                    ratio,
-                    model,
-                    width,
-                    height,
-                    aspect_ratio,
-                    steps,
-                    seed,
-                    negative_prompt,
-                    n,
-                    image_url,
-                    image_base64,
-                    mask_image_url,
-                    mask_image_base64,
-                    prompt_strength,
-                    disable_safety_checker,
-                    response_format,
-                });
-
-                const isDataUrl = url.startsWith('data:');
-                return new TypedValue({
-                    $: isDataUrl ? 'string:url:data' : 'string:url:web',
-                    content_type: 'image',
-                }, url);
+            async generate (...args) {
+                return this.generate(...args);
            },
        },
+        ['models']: {
+
+        },
    };

-    static DEFAULT_MODEL = 'black-forest-labs/FLUX.1-schnell';
-    static DEFAULT_RATIO = { w: 1024, h: 1024 };
-    static CONDITION_IMAGE_MODELS = [
-        'black-forest-labs/flux.1-kontext-dev',
-        'black-forest-labs/flux.1-kontext-pro',
-        'black-forest-labs/flux.1-kontext-max',
-    ];
-
    /**
-    * Generates an image using Together AI client
-    * @private
+    * Generates an image using Together AI image models
+    * @param {object} params - Generation parameters
+    * @param {string} params.prompt - Prompt describing the desired image
+    * @param {string} [params.model] - Together AI model identifier
+    * @param {object} [params.ratio] - Width/height ratio object (e.g., { w: 1024, h: 1024 })
+    * @param {number} [params.width] - Explicit width override
+    * @param {number} [params.height] - Explicit height override
+    * @param {string} [params.aspect_ratio] - Aspect ratio string (e.g., "16:9")
+    * @param {number} [params.steps] - Diffusion step count
+    * @param {number} [params.seed] - Seed for reproducibility
+    * @param {string} [params.negative_prompt] - Negative prompt text
+    * @param {number} [params.n] - Number of images to generate (default 1)
+    * @param {string} [params.image_url] - Reference image URL for image-to-image
+    * @param {string} [params.image_base64] - Base64 encoded reference image
+    * @param {boolean} [params.disable_safety_checker] - Disable Together AI safety checker
+    * @param {boolean} [params.test_mode] - Enable Puter test mode shortcut
+    * @returns {Promise<TypedValue>} TypedValue containing the generated image URL or data URI
    */
-    async generate (prompt, options) {
+    async generate (params) {
+        const {
+            prompt,
+            test_mode,
+            ratio,
+            model,
+            width,
+            height,
+            aspect_ratio,
+            steps,
+            seed,
+            negative_prompt,
+            n,
+            image_url,
+            image_base64,
+            mask_image_url,
+            mask_image_base64,
+            prompt_strength,
+            disable_safety_checker,
+            response_format,
+        } = params;
+
+        const svc_event = this.services.get('event');
+        svc_event.emit('ai.log.image', { actor: Context.get('actor'), parameters: params, completionId: '0', intended_service: params.model });
+
+        if ( test_mode ) {
+            return new TypedValue({
+                $: 'string:url:web',
+                content_type: 'image',
+            }, 'https://puter-sample-data.puter.site/image_example.png');
+        }
+
+        const url = await this.#generate(prompt, {
+            ratio,
+            model,
+            width,
+            height,
+            aspect_ratio,
+            steps,
+            seed,
+            negative_prompt,
+            n,
+            image_url,
+            image_base64,
+            mask_image_url,
+            mask_image_base64,
+            prompt_strength,
+            disable_safety_checker,
+            response_format,
+        });
+
+        const isDataUrl = url.startsWith('data:');
+        return new TypedValue({
+            $: isDataUrl ? 'string:url:data' : 'string:url:web',
+            content_type: 'image',
+        }, url);
+    }
+
+    async #generate (prompt, options) {
        if ( typeof prompt !== 'string' || prompt.trim().length === 0 ) {
            throw new Error('`prompt` must be a non-empty string');
        }

-        const request = this._buildRequest(prompt, options);
+        const request = this.#buildRequest(prompt, options);

        const actor = Context.get('actor');
        if ( ! actor ) {
@@ -187,9 +189,8 @@ class TogetherImageGenerationService extends BaseService {

    /**
    * Normalizes Together AI image generation request parameters
-    * @private
    */
-    _buildRequest (prompt, options = {}) {
+    #buildRequest (prompt, options = {}) {
        const {
            ratio,
            model,
@@ -215,13 +216,13 @@ class TogetherImageGenerationService extends BaseService {
            model: model ?? this.constructor.DEFAULT_MODEL,
        };
        const requiresConditionImage =
-            this.constructor._modelRequiresConditionImage(request.model);
+            this.#modelRequiresConditionImage(request.model);

        const ratioWidth = (ratio && ratio.w !== undefined) ? Number(ratio.w) : undefined;
        const ratioHeight = (ratio && ratio.h !== undefined) ? Number(ratio.h) : undefined;

-        const normalizedWidth = this._normalizeDimension(width !== undefined ? Number(width) : (ratioWidth ?? this.constructor.DEFAULT_RATIO.w));
-        const normalizedHeight = this._normalizeDimension(height !== undefined ? Number(height) : (ratioHeight ?? this.constructor.DEFAULT_RATIO.h));
+        const normalizedWidth = this.#normalizeDimension(width !== undefined ? Number(width) : (ratioWidth ?? this.constructor.DEFAULT_RATIO.w));
+        const normalizedHeight = this.#normalizeDimension(height !== undefined ? Number(height) : (ratioHeight ?? this.constructor.DEFAULT_RATIO.h));

        if ( aspect_ratio ) {
            request.aspect_ratio = aspect_ratio;
@@ -269,14 +270,14 @@ class TogetherImageGenerationService extends BaseService {
        return request;
    }

-    _normalizeDimension (value) {
+    #normalizeDimension (value) {
        if ( typeof value !== 'number' ) return undefined;
        const rounded = Math.max(64, Math.round(value));
        // Flux models expect multiples of 8. Snap to the nearest multiple without going below 64.
        return Math.max(64, Math.round(rounded / 8) * 8);
    }

-    static _modelRequiresConditionImage (model) {
+    #modelRequiresConditionImage (model) {
        if ( typeof model !== 'string' || model.trim() === '' ) {
            return false;
        }
@@ -284,8 +285,4 @@ class TogetherImageGenerationService extends BaseService {
        const normalized = model.toLowerCase();
        return this.CONDITION_IMAGE_MODELS.some(required => normalized === required);
    }
-}
-
-module.exports = {
-    TogetherImageGenerationService,
-};
+}
--- a/src/backend/src/services/ai/moderation/AsModeration.js
+++ b/src/backend/src/services/ai/moderation/AsModeration.js
@@ -20,9 +20,21 @@
 const { default: dedent } = require('dedent');

 class AsModeration {
-    constructor ({ chat, model }) {
-        this.chat = chat;
-        this.model = model;
+
+    /** @type {import('../chat/providers/ChatProvider').IChatProvider} */
+    #chatProvider;
+
+    /** @type {string} */
+    #model;
+
+    /**
+     * @param {object} args
+     * @param {import('../chat/providers/ChatProvider').IChatProvider} args.chatProvider
+     * @param {string} args.model
+     */
+    constructor ({ chatProvider, model }) {
+        this.#chatProvider = chatProvider;
+        this.#model = model;
    }

    async moderate (text) {
@@ -51,10 +63,7 @@ class AsModeration {
            
            Message:
            <message>
-            ${text
-                    .replace('<', '&lt;')
-                    .replace('>', '&gt;')
-            }
+            ${text.replace('<', '&lt;').replace('>', '&gt;')}
            </message>
        
            Unsafe Categories:
@@ -70,17 +79,15 @@ class AsModeration {
            }}
        `);

-        const result = await this.chat.complete({
+        const result = await this.#chatProvider.complete({
            messages: [
                {
                    role: 'user',
                    content: assessment_prompt,
                },
            ],
+            model: this.#model,
        });
-
-        console.log('result???', require('util').inspect(result, { depth: null }));
-
        const str = result.message?.content?.[0]?.text ??
            result.messages?.[0]?.content?.[0]?.text ??
            '{ "violation": true }';
--- a/src/backend/src/services/ai/ocr/AWSTextractService.js
+++ b/src/backend/src/services/ai/ocr/AWSTextractService.js
@@ -20,9 +20,9 @@
 // METADATA // {"ai-commented":{"service":"claude"}}
 const { TextractClient, AnalyzeDocumentCommand, InvalidS3ObjectException } = require('@aws-sdk/client-textract');

-const BaseService = require('../../services/BaseService');
-const APIError = require('../../api/APIError');
-const { Context } = require('../../util/context');
+const BaseService = require('../../BaseService');
+const APIError = require('../../../api/APIError');
+const { Context } = require('../../../util/context');

 /**
 * AWSTextractService class - Provides OCR (Optical Character Recognition) functionality using AWS Textract
@@ -31,7 +31,7 @@ const { Context } = require('../../util/context');
 * Handles both S3-stored and buffer-based document processing with automatic region management.
 */
 class AWSTextractService extends BaseService {
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
+    /** @type {import('../../MeteringService/MeteringService').MeteringService} */
    get meteringService () {
        return this.services.get('meteringService').meteringService;
    }
--- a/src/backend/src/services/ai/ocr/MistralOCRService.js
+++ b/src/backend/src/services/ai/ocr/MistralOCRService.js
@@ -0,0 +1,298 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+// METADATA // {"ai-commented":{"service":"claude"}}
+import { Context } from '@heyputer/putility/src/libs/context.js';
+import { Mistral } from '@mistralai/mistralai';
+import mime from 'mime-types';
+import { APIError } from 'openai';
+import path from 'path';
+import BaseService from '../../BaseService.js';
+
+/**
+* MistralAIService class extends BaseService to provide integration with the Mistral AI API.
+* Implements chat completion functionality with support for various Mistral models including
+* mistral-large, pixtral, codestral, and ministral variants. Handles both streaming and
+* non-streaming responses, token usage tracking, and model management. Provides cost information
+* for different models and implements the puter-chat-completion interface.
+*/
+export class MistralOCRService extends BaseService {
+    /** @type {import('../../MeteringService/MeteringService.js').MeteringService} */
+    meteringService;
+    /**
+    * Initializes the service's cost structure for different Mistral AI models.
+    * Sets up pricing information for various models including token costs for input/output.
+    * Each model entry specifies currency (usd-cents) and costs per million tokens.
+    * @private
+    */
+
+    models = [
+        { id: 'mistral-ocr-latest',
+            aliases: ['mistral-ocr-2505', 'mistral-ocr'],
+            cost: {
+                currency: 'usd-cents',
+                pages: 1000,
+                input: 100,
+                output: 300,
+            },
+        },
+    ];
+
+    static IMPLEMENTS = {
+        'driver-capabilities': {
+            supports_test_mode (iface, method_name) {
+                return iface === 'puter-ocr' && method_name === 'recognize';
+            },
+        },
+        'puter-ocr': {
+            async recognize (...params) {
+                return this.recognize(...params);
+            },
+        },
+    };
+
+    /**
+    * Initializes the service's cost structure for different Mistral AI models.
+    * Sets up pricing information for various models including token costs for input/output.
+    * Each model entry specifies currency (USD cents) and costs per million tokens.
+    * @private
+    */
+    async _init () {
+        this.api_base_url = 'https://api.mistral.ai/v1';
+        this.client = new Mistral({
+            apiKey: this.config.apiKey,
+        });
+
+        const svc_aiChat = this.services.get('ai-chat');
+        svc_aiChat.register_provider({
+            service_name: this.service_name,
+            alias: true,
+        });
+
+        this.meteringService = this.services.get('meteringService').meteringService;
+    }
+
+    async recognize ({
+        source,
+        model,
+        pages,
+        includeImageBase64,
+        imageLimit,
+        imageMinSize,
+        bboxAnnotationFormat,
+        documentAnnotationFormat,
+        test_mode,
+    }) {
+        if ( test_mode ) {
+            return this.#sampleOcrResponse();
+        }
+        if ( ! source ) {
+            throw APIError.create('missing_required_argument', {
+                interface_name: 'puter-ocr',
+                method_name: 'recognize',
+                arg_name: 'source',
+            });
+        }
+
+        const document = await this._buildDocumentChunkFromSource(source);
+        const payload = {
+            model: model ?? 'mistral-ocr-latest',
+            document,
+        };
+        if ( Array.isArray(pages) ) {
+            payload.pages = pages;
+        }
+        if ( typeof includeImageBase64 === 'boolean' ) {
+            payload.includeImageBase64 = includeImageBase64;
+        }
+        if ( typeof imageLimit === 'number' ) {
+            payload.imageLimit = imageLimit;
+        }
+        if ( typeof imageMinSize === 'number' ) {
+            payload.imageMinSize = imageMinSize;
+        }
+        if ( bboxAnnotationFormat !== undefined ) {
+            payload.bboxAnnotationFormat = bboxAnnotationFormat;
+        }
+        if ( documentAnnotationFormat !== undefined ) {
+            payload.documentAnnotationFormat = documentAnnotationFormat;
+        }
+
+        const response = await this.client.ocr.process(payload);
+        const annotationsRequested = (
+            payload.documentAnnotationFormat !== undefined ||
+            payload.bboxAnnotationFormat !== undefined
+        );
+        this.#recordOcrUsage(response, payload.model, {
+            annotationsRequested,
+        });
+        return this.#normalizeOcrResponse(response);
+    }
+
+    async _buildDocumentChunkFromSource (fileFacade) {
+        const dataUrl = await this._safeFileValue(fileFacade, 'data_url');
+        const webUrl = await this._safeFileValue(fileFacade, 'web_url');
+        const filePath = await this._safeFileValue(fileFacade, 'path');
+        const fsNode = await this._safeFileValue(fileFacade, 'fs-node');
+        const fileName = filePath ? path.basename(filePath) : fsNode?.name;
+        const inferredMime = this._inferMimeFromName(fileName);
+
+        if ( webUrl ) {
+            return this._chunkFromUrl(webUrl, fileName, inferredMime);
+        }
+        if ( dataUrl ) {
+            const mimeFromUrl = this._extractMimeFromDataUrl(dataUrl) ?? inferredMime;
+            return this._chunkFromUrl(dataUrl, fileName, mimeFromUrl);
+        }
+
+        const buffer = await this._safeFileValue(fileFacade, 'buffer');
+        if ( ! buffer ) {
+            throw APIError.create('field_invalid', null, {
+                key: 'source',
+                expected: 'file, data URL, or web URL',
+            });
+        }
+        const mimeType = inferredMime ?? 'application/octet-stream';
+        const generatedDataUrl = this._createDataUrl(buffer, mimeType);
+        return this._chunkFromUrl(generatedDataUrl, fileName, mimeType);
+    }
+
+    async _safeFileValue (fileFacade, key) {
+        if ( !fileFacade || typeof fileFacade.get !== 'function' ) return undefined;
+        const maybeCache = fileFacade.values?.values;
+        if ( maybeCache && Object.prototype.hasOwnProperty.call(maybeCache, key) ) {
+            return maybeCache[key];
+        }
+        try {
+            return await fileFacade.get(key);
+        } catch (e) {
+            return undefined;
+        }
+    }
+
+    _chunkFromUrl (url, fileName, mimeType) {
+        const lowerName = fileName?.toLowerCase();
+        const urlLooksPdf = /\.pdf($|\?)/i.test(url);
+        const mimeLooksPdf = mimeType?.includes('pdf');
+        const isPdf = mimeLooksPdf || urlLooksPdf || (lowerName ? lowerName.endsWith('.pdf') : false);
+
+        if ( isPdf ) {
+            const chunk = {
+                type: 'document_url',
+                documentUrl: url,
+            };
+            if ( fileName ) {
+                chunk.documentName = fileName;
+            }
+            return chunk;
+        }
+
+        return {
+            type: 'image_url',
+            imageUrl: {
+                url,
+            },
+        };
+    }
+
+    _inferMimeFromName (name) {
+        if ( ! name ) return undefined;
+        return mime.lookup(name) || undefined;
+    }
+
+    _extractMimeFromDataUrl (url) {
+        if ( typeof url !== 'string' ) return undefined;
+        const match = url.match(/^data:([^;,]+)[;,]/);
+        return match ? match[1] : undefined;
+    }
+
+    _createDataUrl (buffer, mimeType) {
+        return `data:${mimeType || 'application/octet-stream'};base64,${buffer.toString('base64')}`;
+    }
+
+    #normalizeOcrResponse (response) {
+        if ( ! response ) return {};
+        const normalized = {
+            model: response.model,
+            pages: response.pages ?? [],
+            usage_info: response.usageInfo,
+        };
+        const blocks = [];
+        if ( Array.isArray(response.pages) ) {
+            for ( const page of response.pages ) {
+                if ( typeof page?.markdown !== 'string' ) continue;
+                const lines = page.markdown.split('\n').map(line => line.trim()).filter(Boolean);
+                for ( const line of lines ) {
+                    blocks.push({
+                        type: 'text/mistral:LINE',
+                        text: line,
+                        page: page.index,
+                    });
+                }
+            }
+        }
+        normalized.blocks = blocks;
+        if ( blocks.length ) {
+            normalized.text = blocks.map(block => block.text).join('\n');
+        } else if ( Array.isArray(response.pages) ) {
+            normalized.text = response.pages.map(page => page?.markdown || '').join('\n\n').trim();
+        }
+        return normalized;
+    }
+
+    #recordOcrUsage (response, model, { annotationsRequested } = {}) {
+        try {
+            if ( ! this.meteringService ) return;
+            const actor = Context.get('actor');
+            if ( ! actor ) return;
+            const pagesProcessed =
+                response?.usageInfo?.pagesProcessed ??
+                (Array.isArray(response?.pages) ? response.pages.length : 1);
+            this.meteringService.incrementUsage(actor, 'mistral-ocr:ocr:page', pagesProcessed);
+            if ( annotationsRequested ) {
+                this.meteringService.incrementUsage(actor, 'mistral-ocr:annotations:page', pagesProcessed);
+            }
+        } catch (e) {
+            // ignore metering failures to avoid blocking OCR results
+        }
+    }
+
+    #sampleOcrResponse () {
+        const markdown = 'Sample OCR output (test mode).';
+        return {
+            model: 'mistral-ocr-latest',
+            pages: [
+                {
+                    index: 0,
+                    markdown,
+                    images: [],
+                    dimensions: null,
+                },
+            ],
+            blocks: [
+                {
+                    type: 'text/mistral:LINE',
+                    text: markdown,
+                    page: 0,
+                },
+            ],
+            text: markdown,
+        };
+    }
+}
--- a/src/backend/src/services/ai/sts/ElevenLabsVoiceChangerService.js
+++ b/src/backend/src/services/ai/sts/ElevenLabsVoiceChangerService.js
@@ -18,11 +18,11 @@
 */

 const { Readable } = require('stream');
-const APIError = require('../../api/APIError');
-const BaseService = require('../../services/BaseService');
-const { TypedValue } = require('../../services/drivers/meta/Runtime');
-const { FileFacade } = require('../../services/drivers/FileFacade');
-const { Context } = require('../../util/context');
+const APIError = require('../../../api/APIError');
+const BaseService = require('../../BaseService');
+const { TypedValue } = require('../../drivers/meta/Runtime');
+const { FileFacade } = require('../../drivers/FileFacade');
+const { Context } = require('../../../util/context');

 const DEFAULT_MODEL = 'eleven_multilingual_sts_v2';
 const DEFAULT_VOICE_ID = '21m00Tcm4TlvDq8ikWAM';
@@ -34,7 +34,7 @@ const DEFAULT_OUTPUT_FORMAT = 'mp3_44100_128';
 * ElevenLabs voice changer (speech-to-speech).
 */
 class ElevenLabsVoiceChangerService extends BaseService {
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
+    /** @type {import('../../MeteringService/MeteringService').MeteringService} */
    get meteringService () {
        return this.services.get('meteringService').meteringService;
    }
@@ -68,7 +68,7 @@ class ElevenLabsVoiceChangerService extends BaseService {
        this.defaultVoiceId = svcConfig?.defaultVoiceId ?? svcConfig?.voiceId ?? DEFAULT_VOICE_ID;
        this.defaultModelId = svcConfig?.speechToSpeechModelId ?? svcConfig?.stsModelId ?? DEFAULT_MODEL;

-        if ( !this.apiKey ) {
+        if ( ! this.apiKey ) {
            throw new Error('ElevenLabs API key not configured');
        }
    }
@@ -99,11 +99,11 @@ class ElevenLabsVoiceChangerService extends BaseService {
            }, SAMPLE_AUDIO_URL);
        }

-        if ( !audio ) {
+        if ( ! audio ) {
            throw APIError.create('field_required', null, { key: 'audio' });
        }

-        if ( !(audio instanceof FileFacade) ) {
+        if ( ! (audio instanceof FileFacade) ) {
            throw APIError.create('field_invalid', null, {
                key: 'audio',
                expected: 'file reference',
@@ -120,14 +120,14 @@ class ElevenLabsVoiceChangerService extends BaseService {
        const modelId = model_id || model || this.defaultModelId || DEFAULT_MODEL;
        const selectedVoiceId = voice_id || voiceId || voice || this.defaultVoiceId;

-        if ( !selectedVoiceId ) {
+        if ( ! selectedVoiceId ) {
            throw APIError.create('field_required', null, { key: 'voice' });
        }

        const actor = Context.get('actor');
        const usageKey = `elevenlabs:${modelId}:second`;
        const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, estimatedSeconds);
-        if ( !usageAllowed ) {
+        if ( ! usageAllowed ) {
            throw APIError.create('insufficient_funds');
        }

@@ -182,7 +182,7 @@ class ElevenLabsVoiceChangerService extends BaseService {
            body: formData,
        });

-        if ( !response.ok ) {
+        if ( ! response.ok ) {
            let detail = null;
            try {
                detail = await response.json();
--- a/src/backend/src/services/ai/stt/OpenAISpeechToTextService.js
+++ b/src/backend/src/services/ai/stt/OpenAISpeechToTextService.js
@@ -17,10 +17,10 @@
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

-const BaseService = require('../../services/BaseService');
-const APIError = require('../../api/APIError');
-const { Context } = require('../../util/context');
-const { FileFacade } = require('../../services/drivers/FileFacade');
+const BaseService = require('../../BaseService');
+const APIError = require('../../../api/APIError');
+const { Context } = require('../../../util/context');
+const { FileFacade } = require('../../drivers/FileFacade');

 const MAX_AUDIO_FILE_SIZE = 25 * 1024 * 1024; // 25 MB per OpenAI limits
 const DEFAULT_TRANSCRIBE_MODEL = 'gpt-4o-mini-transcribe';
@@ -63,7 +63,7 @@ const TRANSCRIPTION_MODEL_CAPABILITIES = {
 };

 class OpenAISpeechToTextService extends BaseService {
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
+    /** @type {import('../../MeteringService/MeteringService').MeteringService} */
    get meteringService () {
        return this.services.get('meteringService').meteringService;
    }
--- a/src/backend/src/services/ai/tts/AWSPollyService.js
+++ b/src/backend/src/services/ai/tts/AWSPollyService.js
@@ -19,10 +19,10 @@

 // METADATA // {"ai-commented":{"service":"claude"}}
 const { PollyClient, SynthesizeSpeechCommand, DescribeVoicesCommand } = require('@aws-sdk/client-polly');
-const BaseService = require('../../services/BaseService');
-const { TypedValue } = require('../../services/drivers/meta/Runtime');
-const APIError = require('../../api/APIError');
-const { Context } = require('../../util/context');
+const BaseService = require('../../BaseService');
+const { TypedValue } = require('../../drivers/meta/Runtime');
+const APIError = require('../../../api/APIError');
+const { Context } = require('../../../util/context');

 // Polly price calculation per engine
 const ENGINE_PRICING = {
@@ -45,7 +45,7 @@ const VALID_ENGINES = ['standard', 'neural', 'long-form', 'generative'];
 */
 class AWSPollyService extends BaseService {

-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
+    /** @type {import('../../MeteringService/MeteringService').MeteringService} */
    get meteringService () {
        return this.services.get('meteringService').meteringService;
    }
--- a/src/backend/src/services/ai/tts/ElevenLabsTTSService.js
+++ b/src/backend/src/services/ai/tts/ElevenLabsTTSService.js
@@ -18,10 +18,10 @@
 */

 const { Readable } = require('stream');
-const APIError = require('../../api/APIError');
-const BaseService = require('../../services/BaseService');
-const { TypedValue } = require('../../services/drivers/meta/Runtime');
-const { Context } = require('../../util/context');
+const APIError = require('../../../api/APIError');
+const BaseService = require('../../BaseService');
+const { TypedValue } = require('../../drivers/meta/Runtime');
+const { Context } = require('../../../util/context');

 const DEFAULT_MODEL = 'eleven_multilingual_v2';
 const DEFAULT_VOICE_ID = '21m00Tcm4TlvDq8ikWAM'; // Common public "Rachel" sample voice
@@ -41,7 +41,7 @@ const ELEVENLABS_TTS_MODELS = [
 * using ElevenLabs voices.
 */
 class ElevenLabsTTSService extends BaseService {
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
+    /** @type {import('../../MeteringService/MeteringService').MeteringService} */
    get meteringService () {
        return this.services.get('meteringService').meteringService;
    }
@@ -72,7 +72,7 @@ class ElevenLabsTTSService extends BaseService {
        this.baseUrl = svcThere?.baseUrl ?? 'https://api.elevenlabs.io';
        this.defaultVoiceId = svcThere?.defaultVoiceId ?? svcThere?.voiceId ?? DEFAULT_VOICE_ID;

-        if ( !this.apiKey ) {
+        if ( ! this.apiKey ) {
            throw new Error('ElevenLabs API key not configured');
        }
    }
@@ -158,7 +158,7 @@ class ElevenLabsTTSService extends BaseService {
        const actor = Context.get('actor');
        const usageKey = `elevenlabs:${modelId}:character`;
        const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, text.length);
-        if ( !usageAllowed ) {
+        if ( ! usageAllowed ) {
            throw APIError.create('insufficient_funds');
        }

--- a/src/backend/src/services/ai/tts/OpenAITTSService.js
+++ b/src/backend/src/services/ai/tts/OpenAITTSService.js
@@ -18,10 +18,10 @@
 */

 const { Readable } = require('stream');
-const APIError = require('../../api/APIError');
-const BaseService = require('../../services/BaseService');
-const { TypedValue } = require('../../services/drivers/meta/Runtime');
-const { Context } = require('../../util/context');
+const APIError = require('../../../api/APIError');
+const BaseService = require('../../BaseService');
+const { TypedValue } = require('../../drivers/meta/Runtime');
+const { Context } = require('../../../util/context');

 const DEFAULT_MODEL = 'gpt-4o-mini-tts';
 const DEFAULT_VOICE = 'alloy';
@@ -73,7 +73,7 @@ const OPENAI_TTS_MODELS = [
 * the AWS Polly implementation.
 */
 class OpenAITTSService extends BaseService {
-    /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
+    /** @type {import('../../MeteringService/MeteringService').MeteringService} */
    get meteringService () {
        return this.services.get('meteringService').meteringService;
    }
--- a/src/backend/src/services/ai/utils/FunctionCalling.js
+++ b/src/backend/src/services/ai/utils/FunctionCalling.js
@@ -0,0 +1,120 @@
+/**
+     * Normalizes the 'tools' object in-place.
+     *
+     * This function will accept an array of tools provided by the
+     * user, and produce a normalized object that can then be
+     * converted to the apprpriate representation for another
+     * service.
+     *
+     * We will accept conventions from either service that a user
+     * might expect to work, prioritizing the OpenAI convention
+     * when conflicting conventions are present.
+     *
+     * @param {*} tools
+     */
+export const normalize_tools_object =  (tools) => {
+    for ( let i = 0 ; i < tools.length ; i++ ) {
+        const tool = tools[i];
+        let normalized_tool = {};
+
+        const normalize_function = fn => {
+            const normal_fn = {};
+            let parameters =
+                fn.parameters ||
+                fn.input_schema;
+
+            normal_fn.parameters = parameters ?? {
+                type: 'object',
+            };
+
+            if ( parameters.properties ) {
+                parameters = this.normalize_json_schema(parameters);
+            }
+
+            if ( fn.name ) {
+                normal_fn.name = fn.name;
+            }
+
+            if ( fn.description ) {
+                normal_fn.description = fn.description;
+            }
+
+            return normal_fn;
+        };
+
+        if ( tool.input_schema ) {
+            normalized_tool = {
+                type: 'function',
+                function: normalize_function(tool),
+            };
+        } else if ( tool.type === 'function' ) {
+            normalized_tool = {
+                type: 'function',
+                function: normalize_function(tool.function),
+            };
+        } else {
+            normalized_tool = {
+                type: 'function',
+                function: normalize_function(tool),
+            };
+        }
+
+        tools[i] = normalized_tool;
+    }
+    return tools;
+};
+
+export const normalize_json_schema =  (schema) => {
+    if ( ! schema ) return schema;
+
+    if ( schema.type === 'object' ) {
+        if ( ! schema.properties ) {
+            return schema;
+        }
+
+        const keys = Object.keys(schema.properties);
+        for ( const key of keys ) {
+            schema.properties[key] = this.normalize_json_schema(schema.properties[key]);
+        }
+    }
+
+    if ( schema.type === 'array' ) {
+        if ( ! schema.items ) {
+            schema.items = {};
+        } else {
+            schema.items = this.normalize_json_schema(schema.items);
+        }
+    }
+
+    return schema;
+};
+
+/**
+     * This function will convert a normalized tools object to the
+     * format expected by OpenAI.
+     *
+     * @param {*} tools
+     * @returns
+     */
+export const make_openai_tools =  (tools) => {
+    return tools;
+};
+
+/**
+     * This function will convert a normalized tools object to the
+     * format expected by Claude.
+     *
+     * @param {*} tools
+     * @returns
+     */
+export const make_claude_tools =  (tools) => {
+    if ( ! tools ) return undefined;
+    return tools.map(tool => {
+        const { name, description, parameters } = tool.function;
+        return {
+            name,
+            description,
+            input_schema: parameters,
+        };
+    });
+};
--- a/src/backend/src/services/ai/utils/Messages.js
+++ b/src/backend/src/services/ai/utils/Messages.js
@@ -0,0 +1,184 @@
+import { whatis } from '../../../util/langutil.js';
+
+/**
+     * Normalizes a single message into a standardized format with role and content array.
+     * Converts string messages to objects, ensures content is an array of content blocks,
+     * transforms tool_calls into tool_use content blocks, and coerces content items into objects.
+     *
+     * @param {string|Object} message - The message to normalize, either a string or message object
+     * @param {Object} params - Optional parameters including default role
+     * @returns {Object} Normalized message with role and content array
+     * @throws {Error} If message is not a string or object
+     * @throws {Error} If message has no content property and no tool_calls
+     * @throws {Error} If any content item is not a string or object
+     */
+export const normalize_single_message = (message, params = {}) => {
+    params = Object.assign({
+        role: 'user',
+    }, params);
+
+    if ( typeof message === 'string' ) {
+        message = {
+            content: [message],
+        };
+    }
+    if ( whatis(message) !== 'object' ) {
+        throw new Error('each message must be a string or object');
+    }
+    if ( ! message.role ) {
+        message.role = params.role;
+    }
+    if ( ! message.content ) {
+        if ( message.tool_calls ) {
+            message.content = [];
+            for ( let i = 0 ; i < message.tool_calls.length ; i++ ) {
+                const tool_call = message.tool_calls[i];
+                message.content.push({
+                    type: 'tool_use',
+                    id: tool_call.id,
+                    name: tool_call.function.name,
+                    input: tool_call.function.arguments,
+                });
+            }
+            delete message.tool_calls;
+        } else {
+            throw new Error('each message must have a \'content\' property');
+        }
+    }
+    if ( whatis(message.content) !== 'array' ) {
+        message.content = [message.content];
+    }
+    // Coerce each content block into an object
+    for ( let i = 0 ; i < message.content.length ; i++ ) {
+        if ( whatis(message.content[i]) === 'string' ) {
+            message.content[i] = {
+                type: 'text',
+                text: message.content[i],
+            };
+        }
+        if ( whatis(message.content[i]) !== 'object' ) {
+            throw new Error('each message content item must be a string or object');
+        }
+        if ( typeof message.content[i].text === 'string' && !message.content[i].type ) {
+            message.content[i].type = 'text';
+        }
+    }
+
+    // Remove "text" properties from content blocks with type=tool_result
+    for ( let i = 0 ; i < message.content.length ; i++ ) {
+        if ( message.content[i].type !== 'tool_use' ) {
+            continue;
+        }
+        if ( Object.prototype.hasOwnProperty.call(message.content[i], 'text') ) {
+            delete message.content[i].text;
+        }
+    }
+
+    return message;
+};
+
+/**
+     * Normalizes an array of messages by applying normalize_single_message to each,
+     * then splits messages with multiple content blocks into separate messages,
+     * and finally merges consecutive messages from the same role.
+     *
+     * @param {Array} messages - Array of messages to normalize
+     * @param {Object} params - Optional parameters passed to normalize_single_message
+     * @returns {Array} Normalized and merged array of messages
+     */
+export const normalize_messages = (messages, params = {}) => {
+    for ( let i = 0 ; i < messages.length ; i++ ) {
+        messages[i] = normalize_single_message(messages[i], params);
+    }
+
+    // Split messages with tool_use content into separate messages
+    // TODO: unit test this
+    messages = [...messages];
+    for ( let i = 0 ; i < messages.length ; i++ ) {
+        let message = messages[i];
+        let separated_messages = [];
+        for ( let j = 0 ; j < message.content.length ; j++ ) {
+            if ( message.content[j].type === 'tool_result' ) {
+                separated_messages.push({
+                    ...message,
+                    content: [message.content[j]],
+                });
+            } else {
+                separated_messages.push({
+                    ...message,
+                    content: [message.content[j]],
+                });
+            }
+        }
+        messages.splice(i, 1, ...separated_messages);
+    }
+
+    // If multiple messages are from the same role, merge them
+    let merged_messages = [];
+    let current_role = null;
+    for ( let i = 0 ; i < messages.length ; i++ ) {
+        if ( current_role === messages[i].role ) {
+            merged_messages[merged_messages.length - 1].content.push(...messages[i].content);
+        } else {
+            merged_messages.push(messages[i]);
+            current_role = messages[i].role;
+        }
+    }
+
+    return merged_messages;
+};
+
+/**
+     * Separates system messages from other messages in the array.
+     *
+     * @param {Array} messages - Array of messages to process
+     * @returns {Array} Tuple containing [system_messages, non_system_messages]
+     */
+export const extract_and_remove_system_messages = (messages) => {
+    let system_messages = [];
+    let new_messages = [];
+    for ( let i = 0 ; i < messages.length ; i++ ) {
+        if ( messages[i].role === 'system' ) {
+            system_messages.push(messages[i]);
+        } else {
+            new_messages.push(messages[i]);
+        }
+    }
+    return [system_messages, new_messages];
+};
+
+/**
+     * Extracts all text content from messages, handling various message formats.
+     * Processes strings, objects with content arrays, and nested content structures,
+     * joining all text with spaces.
+     *
+     * @param {Array} messages - Array of messages to extract text from
+     * @returns {string} Concatenated text content from all messages
+     * @throws {Error} If text content is not a string
+     */
+export const extract_text = (messages) => {
+    return messages.map(m => {
+        if ( whatis(m) === 'string' ) {
+            return m;
+        }
+        if ( whatis(m) !== 'object' ) {
+            return '';
+        }
+        if ( whatis(m.content) === 'array' ) {
+            return m.content.map(c => c.text).join(' ');
+        }
+        if ( whatis(m.content) === 'string' ) {
+            return m.content;
+        } else {
+            const is_text_type = m.content.type === 'text' ||
+                !Object.prototype.hasOwnProperty.call(m.content, 'type');
+            if ( is_text_type ) {
+                if ( whatis(m.content.text) !== 'string' ) {
+                    throw new Error('text content must be a string');
+                }
+                return m.content.text;
+            }
+            return '';
+        }
+    }).join(' ');
+};
--- a/src/backend/src/services/ai/utils/OpenAIUtil.d.ts
+++ b/src/backend/src/services/ai/utils/OpenAIUtil.d.ts
@@ -0,0 +1,116 @@
+import type {
+    ChatCompletion,
+    ChatCompletionChunk,
+    ChatCompletionContentPart,
+    ChatCompletionMessageParam,
+    ChatCompletionMessageToolCall,
+} from 'openai/resources/chat/completions';
+import type { CompletionUsage } from 'openai/resources/completions';
+import { IChatModel, IChatProvider } from '../chat/providers/types';
+
+export interface ToolUseContent {
+    type: 'tool_use';
+    id: string;
+    name: string;
+    input: unknown;
+    extra_content?: unknown;
+}
+
+export interface ToolResultContent {
+    type: 'tool_result';
+    tool_use_id: string;
+    content: unknown;
+}
+
+export type NormalizedContent =
+    | ChatCompletionContentPart
+    | ToolUseContent
+    | ToolResultContent
+    | ({ type?: 'image_url'; image_url: unknown; [key: string]: unknown });
+
+export interface NormalizedMessage extends Partial<ChatCompletionMessageParam> {
+    role?: ChatCompletionMessageParam['role'] | string;
+    content?: NormalizedContent[] | null;
+    tool_calls?: ChatCompletionMessageToolCall[];
+    tool_call_id?: string;
+    [key: string]: unknown;
+}
+
+export type UsageCalculator = (args: { usage: CompletionUsage }) => Record<string, number>;
+
+export interface ChatStream {
+    message(): {
+        contentBlock: (params: { type: 'text' } | { type: 'tool_use'; id: string; name: string; extra_content?: unknown }) => {
+            addText?(text: string): void;
+            addReasoning?(reasoning: string): void;
+            addExtraContent?(extra_content: unknown): void;
+            addPartialJSON?(partial_json: string): void;
+            end(): void;
+        };
+        end(): void;
+    };
+    end(): void;
+}
+
+export type StreamingToolCall = ChatCompletionChunk.Choice.Delta.ToolCall & { extra_content?: unknown };
+
+export type CompletionChunk = Omit<ChatCompletionChunk, 'choices' | 'usage'> & {
+    choices: Array<
+        Omit<ChatCompletionChunk['choices'][number], 'delta'> & {
+            delta: ChatCompletionChunk['choices'][number]['delta'] & {
+                reasoning_content?: string | null;
+                reasoning?: string | null;
+                extra_content?: unknown;
+                tool_calls?: StreamingToolCall[];
+            };
+        }
+    >;
+    usage?: CompletionUsage | null;
+};
+
+export interface StreamDeviations {
+    index_usage_from_stream_chunk?: (chunk: CompletionChunk) => Partial<CompletionUsage> | null | undefined;
+    chunk_but_like_actually?: (chunk: CompletionChunk) => Partial<CompletionChunk>;
+    index_tool_calls_from_stream_choice?: (choice: CompletionChunk['choices'][number]) => StreamingToolCall[] | undefined;
+}
+
+export interface CompletionDeviations<TCompletion = ChatCompletion> {
+    coerce_completion_usage?: (completion: TCompletion) => Partial<CompletionUsage>;
+    chunk_but_like_actually?: (chunk: CompletionChunk) => Partial<CompletionChunk>;
+    index_tool_calls_from_stream_choice?: (choice: CompletionChunk['choices'][number]) => StreamingToolCall[] | undefined;
+    index_usage_from_stream_chunk?: (chunk: CompletionChunk) => Partial<CompletionUsage> | null | undefined;
+
+}
+
+export function process_input_messages<TMessage extends NormalizedMessage> (messages: TMessage[]): Promise<TMessage[]>;
+
+export function create_usage_calculator (params: { model_details: IChatModel }): UsageCalculator;
+
+export function extractMeteredUsage (usage: {
+    prompt_tokens?: number | null;
+    completion_tokens?: number | null;
+    prompt_tokens_details?: { cached_tokens?: number | null } | null;
+}): {
+    prompt_tokens: number;
+    completion_tokens: number;
+    cached_tokens: number;
+};
+
+export function create_chat_stream_handler (params: {
+    deviations?: StreamDeviations;
+    completion: AsyncIterable<CompletionChunk>;
+    usage_calculator?: UsageCalculator;
+}): (args: { chatStream: ChatStream }) => Promise<void>;
+
+type CompletionChoice<TCompletion> = TCompletion extends { choices: Array<infer Choice> }
+    ? Choice
+    : ChatCompletion['choices'][number];
+
+export function handle_completion_output<TCompletion = ChatCompletion> (params: {
+    deviations?: CompletionDeviations<TCompletion>;
+    stream?: boolean;
+    completion: AsyncIterable<CompletionChunk> | TCompletion;
+    moderate?: (text: string) => Promise<{ flagged: boolean }>;
+    usage_calculator?: UsageCalculator;
+    finally_fn?: () => Promise<void>;
+}): ReturnType<IChatProvider['complete']>;
--- a/Show More
+++ b/Show More