Merge branch 'main' into feat/generic-vlm-provider

2026-01-04 04:19:57 -06:00 · 2025-11-26 11:09:11 -05:00
parent ebedf60ed7 fd65b4f955
commit eb7d84ee3a
72 changed files with 1341 additions and 294 deletions
--- a/README.md
+++ b/README.md
@@ -242,7 +242,7 @@ agent = ComputerAgent(model="huggingface-local/HelloKKMe/GTA1-7B+openai/gpt-5")
 agent = ComputerAgent(model="omniparser+openai/gpt-4o")

 # Combine state-of-the-art grounding with powerful reasoning
-agent = ComputerAgent(model="huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-3-5-sonnet-20241022")
+agent = ComputerAgent(model="huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-sonnet-4-5-20250929")

 # Combine two different vision models for enhanced capabilities
 agent = ComputerAgent(model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B+openai/gpt-4o")
--- a/blog/app-use.md
+++ b/blog/app-use.md
@@ -25,7 +25,7 @@ desktop = computer.create_desktop_from_apps(["Safari", "Notes"])

 # Your agent can now only see and interact with these apps
 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[desktop]
 )
 ```
@@ -94,7 +94,7 @@ async def main():

    # Initialize an agent
    agent = ComputerAgent(
-        model="anthropic/claude-3-5-sonnet-20241022",
+        model="anthropic/claude-sonnet-4-5-20250929",
        tools=[desktop]
    )

@@ -160,7 +160,7 @@ async def automate_iphone():

    # Initialize an agent for iPhone automation
    agent = ComputerAgent(
-        model="anthropic/claude-3-5-sonnet-20241022",
+        model="anthropic/claude-sonnet-4-5-20250929",
        tools=[my_iphone]
    )

--- a/blog/build-your-own-operator-on-macos-2.md
+++ b/blog/build-your-own-operator-on-macos-2.md
@@ -145,9 +145,9 @@ While the core concept remains the same across all agent loops, different AI mod
 | Agent Loop | Supported Models | Description | Set-Of-Marks |
 |:-----------|:-----------------|:------------|:-------------|
 | `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA Preview model | Not Required |
-| `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use Beta Tools | Not Required |
+| `AgentLoop.ANTHROPIC` | • `claude-sonnet-4-5-20250929`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use Beta Tools | Not Required |
 | `AgentLoop.UITARS` | • `ByteDance-Seed/UI-TARS-1.5-7B` | Uses ByteDance's UI-TARS 1.5 model | Not Required |
-| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama or OpenAI-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
+| `AgentLoop.OMNI` | • `claude-sonnet-4-5-20250929`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama or OpenAI-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |

 Each loop handles the same basic pattern we implemented manually in Part 1:

@@ -191,7 +191,7 @@ The performance of different Computer-Use models varies significantly across tas

 - **AgentLoop.OPENAI**: Choose when you have OpenAI Tier 3 access and need the most capable computer-use agent for web-based tasks. Uses the same [OpenAI Computer-Use Loop](https://platform.openai.com/docs/guides/tools-computer-use) as Part 1, delivering strong performance on browser-based benchmarks.

- **AgentLoop.ANTHROPIC**: Ideal for users with Anthropic API access who need strong reasoning capabilities with computer-use abilities. Works with `claude-3-5-sonnet-20240620` and `claude-3-7-sonnet-20250219` models following [Anthropic's Computer-Use tools](https://docs.anthropic.com/en/docs/agents-and-tools/computer-use#understanding-the-multi-agent-loop).
+- **AgentLoop.ANTHROPIC**: Ideal for users with Anthropic API access who need strong reasoning capabilities with computer-use abilities. Works with `claude-sonnet-4-5-20250929` and `claude-3-7-sonnet-20250219` models following [Anthropic's Computer-Use tools](https://docs.anthropic.com/en/docs/agents-and-tools/computer-use#understanding-the-multi-agent-loop).

 - **AgentLoop.UITARS**: Best for scenarios requiring more powerful OS/desktop, and latency-sensitive automation, as UI-TARS-1.5 leads in OS capabilities benchmarks. Requires running the model locally or accessing it through compatible endpoints (e.g. on Hugging Face).

--- a/blog/composite-agents.md
+++ b/blog/composite-agents.md
@@ -14,12 +14,12 @@ This is the kind of problem that makes you wonder if we're building the future o

 Agent framework 0.4 solves this by doing something radical: making all these different models speak the same language.

-Instead of writing separate code for each model's peculiarities, you now just pick a model with a string like `"anthropic/claude-3-5-sonnet-20241022"` or `"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"`, and everything else Just Works™. Behind the scenes, we handle all the coordinate normalization, token parsing, and image preprocessing so you don't have to.
+Instead of writing separate code for each model's peculiarities, you now just pick a model with a string like `"anthropic/claude-sonnet-4-5-20250929"` or `"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"`, and everything else Just Works™. Behind the scenes, we handle all the coordinate normalization, token parsing, and image preprocessing so you don't have to.

 ```python
 # This works the same whether you're using Anthropic, OpenAI, or that new model you found on Hugging Face
 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",  # or any other supported model
+    model="anthropic/claude-sonnet-4-5-20250929",  # or any other supported model
    tools=[computer]
 )
 ```
--- a/blog/computer-use-agents-for-growth-hacking.md
+++ b/blog/computer-use-agents-for-growth-hacking.md
@@ -8,13 +8,13 @@ Growing a developer-focused product is hard. Traditional marketing doesn't work.

 So we tried something different at Google DevFest Toronto: show up with backpacks full of cute cua-la keychains and see what happens.

-This is the story of how two new hires—a growth engineer and a designer/artist—guerrilla marketed their way through a major tech conference with $200 worth of merch and a post-event automation pipeline.
+This is the story of how two new hires, a growth engineer and a designer/artist, guerrilla marketed their way through a major tech conference with $200 worth of merch and a post-event automation pipeline.

 ## Meet the Team

 **Sarina** (Growth Engineering): Built the post-event automation pipeline that extracts LinkedIn connections and generates personalized messages while you sleep.

-**Esther** (Design + Art): Hand-crafted every piece of artwork, giving life to CUA through illustrations, branding, and yes, extremely cute cua-la keychains.
+**Esther** (Design + Art): Hand-crafted every piece of artwork, giving life to Cua through illustrations, branding, and yes, extremely cute cua-la keychains.

 The thesis: what if we could draw people in with irresistible physical merch, then use computer use agents to handle all the tedious follow-up work?

@@ -24,11 +24,9 @@ The thesis: what if we could draw people in with irresistible physical merch, th

 Google DevFest Toronto brought together hundreds of developers and AI enthusiasts. We didn't have a booth. We didn't have demos. We showed up with backpacks full of cua-la keychains with the cua.ai logo and started handing them out.

-That's it. Pure guerrilla marketing.
+That's it. Pure guerrilla marketing, the cua-las were absurdly effective.

-The cua-las were absurdly effective.
-
-People would literally crowd around us—not because they were interested in computer use (at first), but because they wanted a cua-la. We'd pitch CUA while handing out keychains, and suddenly we had an engaged audience. No booth required.
+People would literally crowd around us, not because they were interested in computer use (at first), but because they wanted a cua-la. We'd pitch Cua while handing out keychains, and suddenly we had an engaged audience!

 <img src="./assets/devfest-image.JPG" alt="DevFest crowd">

@@ -36,13 +34,13 @@ People would literally crowd around us—not because they were interested in com

 A few people stuck the cua-las on their bags immediately. Then, throughout the event, we started getting approached:

-"Wait, are you the CUA girls?"
+"Wait, are you the Cua girls?"

-They'd seen the cua-las on someone's bag, asked about it, and tracked us down. The keychains became walking advertisements.
+They'd seen the cua-las on someone's bag, asked about it, and tracked us down! The keychains became walking advertisements.

 <img src="./assets/htn-at-devfest.JPG" alt="Hack the North recognition at DevFest">

-Even better: two attendees recognized CUA from Hack the North. Our previous event marketing was actually working. People remembered us.
+Even better: two attendees recognized Cua from Hack the North. Our previous event marketing was actually working. People remembered us.

 ## Part 2: The Automation (Try It Yourself)

@@ -64,9 +62,9 @@ Sarina had a better idea: build the automation we wish existed, then open source
  LinkedIn scraping automation in action
 </video>

-The agent navigates LinkedIn like a human would—click profile, extract info, navigate back, repeat. But it does it overnight while you sleep.
+The agent navigates LinkedIn like a human would: click profile, extract info, navigate back, repeat. But it does it overnight while you sleep.

-The secret sauce: **VM session persistence**. By logging into LinkedIn once through CUA's VM, the session stays alive. No captchas, no bot detection, just smooth automation.
+The secret sauce: **VM session persistence**. By logging into LinkedIn once through Cua's VM, the session stays alive. No captchas, no bot detection, just smooth automation.

 <video controls width="100%">
  <source src="./assets/adding-row-csv.mp4" type="video/mp4">
@@ -98,7 +96,7 @@ Then use that data to craft personalized messages. Sarina wrote unique follow-up

 - People crowding around us for cua-las
 - Walking advertisements on bags throughout the event
- Instant brand recognition ("Are you the CUA girls?")
+- Instant brand recognition ("Are you the Cua girls?")
 - Two people who remembered us from Hack the North
 - 20+ quality connections extracted and messaged within 24 hours
 - Several demo requests from personalized follow-ups
@@ -119,11 +117,11 @@ We ran out faster than expected! Next time: bigger bag, or limit to one per pers
 The VM login step added friction. "Log in manually first, then run the script" confused some people who wanted to try it themselves. Need better first-run UX.

 **Message Personalization**
-While the extraction was automated, Sarina still wrote each follow-up message manually. The automation saved the data collection part, but not the creative writing part. (Though this probably led to better messages.)
+While the extraction was automated, I still wrote each follow-up message manually, I think we are looking for ways to better enrich messages with context from the event, which is hard to automate.

 ## What's Next: NeurIPS 2025

-NeurIPS is the biggest AI conference of the year. Thousands of researchers, hundreds of companies, and endless networking opportunities.
+NeurIPS is the biggest AI conference of the year. Thousands of researchers, hundreds of companies.

 **The good news**: We still have one giant bag of cua-las left. They're already packed and ready.

@@ -135,11 +133,11 @@ The cua-las get people interested. The automation ensures we actually follow thr

 Most event marketing fails at the follow-up stage. You collect business cards, connect on LinkedIn, and then... nothing. The moment passes. People forget.

-With CUA handling the mechanical work (data organization, connection tracking, follow-up scheduling), we can focus on the human part: genuine conversations, valuable introductions, and actually helping people.
+With Cua handling the mechanical work (data organization, connection tracking, follow-up scheduling), we can focus on the human part: genuine conversations, valuable introductions, and actually helping people.

 ## The Framework: Cute Merch + Smart Automation

-Traditional event marketing: show up, pitch, collect cards, never follow up.
+Traditional event marketing: show up, pitch, collect cards.

 Our approach: combine two forces that shouldn't work together but do.

@@ -167,19 +165,8 @@ Most companies nail one or the other:

 Do both, and you create a flywheel: each event builds brand recognition for the next, while automation ensures maximum value from every connection.

-## The Meta Lesson
-
-We built CUA to build CUA. Every automation we create for growth becomes:
-
-1. A real-world test of the product
-2. Documentation of what works (and what doesn't)
-3. An example for others to copy
-4. Marketing material that's actually useful
-
-Esther hand-draws artwork that makes people smile. Sarina builds automations that save time. Together, they're proving that developer tools can be both powerful and delightful.
-
-See you at NeurIPS 2025. We'll be the ones with the cua-las.
+See you at NeurIPS 2025!

 ---

-_Want to build your own growth hacking automations? Check out [CUA on GitHub](https://github.com/trycua/cua) or join our [Discord](https://discord.gg/cua) to share your experiments. cua-las not included (yet)._
+_Want to build your own growth hacking automations? Check out [Cua on GitHub](https://github.com/trycua/cua) or join our [Discord](https://discord.gg/cua) to share your experiments. cua-las not included (yet)._
--- a/blog/cua-playground-preview.md
+++ b/blog/cua-playground-preview.md
@@ -0,0 +1,86 @@
+# Cua Playground: Agents + Sandboxes in Your Browser
+
+Building computer-use agents means constant iteration—writing code, deploying to a sandbox, testing behavior, debugging issues, then repeating the cycle. Every test requires switching between your code editor, terminal, and VNC viewer. Want to try a different prompt? Edit your code, redeploy, and wait for the agent to restart. It works, but it's slow.
+
+Today we're launching the **Cua Playground**: a browser-based environment for testing computer-use agents without writing code. Send messages to your sandboxes, watch them execute in real-time, and iterate on prompts instantly—all from your dashboard at cua.ai.
+
+![Cua Playground](https://github.com/user-attachments/assets/af1071ba-3df3-4e4b-aafb-df8c3d00b0a5)
+
+**What's new with this release:**
+
+- Instant testing—send messages to any running sandbox directly from your browser
+- Real-time execution—watch your agent work with live tool call updates and screenshots
+- Multi-model support—test with Claude Sonnet 4.5, Haiku 4.5, and more
+- Persistent chat history—conversations save automatically to local storage
+
+The Playground connects to your existing Cua sandboxes—the same ones you use with the Agent SDK. Select a running sandbox and a model, then start chatting. The agent uses computer-use tools (mouse, keyboard, bash, editor) to complete your tasks, and you see every action it takes.
+
+## Getting Started Today
+
+<div align="center">
+  <video src="https://github.com/user-attachments/assets/9fef0f30-1024-4833-8b7a-6a2c02d8eb99" width="600" controls></video>
+</div>
+
+
+Sign up at [cua.ai/signin](https://cua.ai/signin) and grab your API key from the dashboard. Then navigate to the Playground:
+
+1. Navigate to Dashboard > Playground
+2. Select a sandbox from the dropdown (must be "running" status)
+3. Choose a model (we recommend Claude Sonnet 4.5 to start)
+4. Send a message: "Take a screenshot and describe what you see"
+5. Watch the agent execute computer actions in real-time
+
+Example use cases:
+
+**Prompt Testing**
+```
+❌ "Check the website"
+✅ "Navigate to example.com in Firefox and take a screenshot of the homepage"
+```
+
+**Model Comparison**
+Run the same task with different models to compare quality, speed, and cost.
+
+**Debugging Agent Behavior**
+1. Send: "Find the login button and click it"
+2. View tool calls to see each mouse movement
+3. Check screenshots to verify the agent found the right element
+4. Adjust your prompt based on what you observe
+
+## FAQs
+
+<details>
+<summary><strong>Do I need to know how to code?</strong></summary>
+
+No. The Playground is designed for testing agent behavior without writing code. However, for production deployments, you'll need to use the Agent SDK (Python/TypeScript).
+
+</details>
+
+<details>
+<summary><strong>Does this replace the Agent SDK?</strong></summary>
+
+No. The Playground is for rapid testing and experimentation. For production deployments, scheduled tasks, or complex workflows, use the Agent SDK.
+
+</details>
+
+<details>
+<summary><strong>How much does it cost?</strong></summary>
+
+Playground requests use the same credit system as Agent SDK requests. You're charged for model inference (varies by model) and sandbox runtime (billed per hour while running).
+
+</details>
+
+<details>
+<summary><strong>Why is my sandbox not showing up?</strong></summary>
+
+The sandbox must have `status = "running"` to appear in the dropdown. Check Dashboard > Sandboxes to verify status. If stopped, click "Start" and wait ~30 seconds for it to become available.
+
+</details>
+
+## Need help?
+
+If you hit issues getting the Playground working, reach out in [Discord](https://discord.gg/cua-ai). We respond fast and fix based on what people actually use.
+
+---
+
+Get started at [cua.ai](https://cua.ai) or try the Playground at [cua.ai/dashboard/playground](https://cua.ai/dashboard/playground).
--- a/blog/cua-vlm-router.md
+++ b/blog/cua-vlm-router.md
@@ -4,7 +4,6 @@ If you've been building computer-use agents, you know the reality: every model p

 Today we're launching the **Cua VLM Router**: a managed inference API that gives you unified access to multiple vision-language model providers through a single API key. We're starting with Anthropic's Claude models (Sonnet 4.5 and Haiku 4.5)—some of the most loved and widely-used computer-use models in the Cua ecosystem - with more providers coming soon.

-
 ![Cua VLM Router Banner](https://github.com/user-attachments/assets/1b978f62-2cae-4cf7-932a-55ac8c8f2e06)

 ## What You Get
@@ -12,21 +11,25 @@ Today we're launching the **Cua VLM Router**: a managed inference API that gives
 The Cua VLM Router handles the infrastructure so you can focus on building:

 **Single API Key**
+
 - One key for all model providers (no juggling multiple credentials)
 - Works for both model inference and sandbox access
 - Manage everything from one dashboard at cua.ai

 **Smart Routing**
+
 - Automatic provider selection for optimal availability and performance
 - For Anthropic models, we route to the best provider (Anthropic, AWS Bedrock, or Microsoft Foundry)
 - No configuration needed—just specify the model and we handle the rest

 **Cost Tracking & Optimization**
+
 - Unified usage dashboard across all models
 - Real-time credit balance tracking
 - Detailed cost breakdown per request (gateway cost + upstream cost)

 **Production-Ready**
+
 - OpenAI-compatible API (drop-in replacement for existing code)
 - Full streaming support with Server-Sent Events
 - Metadata about routing decisions in every response
@@ -35,10 +38,10 @@ The Cua VLM Router handles the infrastructure so you can focus on building:

 We're starting with Anthropic's latest Claude models:

-| Model | Best For |
-|-------|----------|
+| Model                             | Best For                           |
+| --------------------------------- | ---------------------------------- |
 | `cua/anthropic/claude-sonnet-4.5` | General-purpose tasks, recommended |
-| `cua/anthropic/claude-haiku-4.5` | Fast responses, cost-effective |
+| `cua/anthropic/claude-haiku-4.5`  | Fast responses, cost-effective     |

 ## How It Works

@@ -48,36 +51,38 @@ When you request an Anthropic model through Cua, we automatically route to the b

 Sign up at [cua.ai/signin](https://cua.ai/signin) and create your API key from **Dashboard > API Keys > New API Key** (save it immediately—you won't see it again).

-Set your environment variable:

-```bash
-export CUA_API_KEY="sk_cua-api01_..."
-```
-
-Use it with the Agent SDK:
+Use it with the Agent SDK (make sure to set your environment variable):

 ```python
+import asyncio
 from agent import ComputerAgent
 from computer import Computer

-computer = Computer(
+async def main():
+  # Initialize cloud computer
+  computer = Computer(
    os_type="linux",
    provider_type="cloud",
-    name="your-sandbox-name"
-)
+    name="your-container-name",
+    api_key="your-cua-api-key"
+  )

-agent = ComputerAgent(
-    model="cua/anthropic/claude-sonnet-4.5",  # Cua-routed model
+  # Initialize agent with Claude Sonnet 4.5
+  agent = ComputerAgent(
    tools=[computer],
-    max_trajectory_budget=5.0
-)
+    model="cua/anthropic/claude-sonnet-4.5",
+    api_key="your-cua-api-key",
+    instructions="You are a helpful assistant that can control computers",
+    only_n_most_recent_images=3
+  )

-messages = [{"role": "user", "content": "Take a screenshot and analyze what's on screen"}]
+  # Run a task
+  async for result in agent.run("Open a browser and search for Python tutorials"):
+    print(result)

-async for result in agent.run(messages):
-    for item in result["output"]:
-        if item["type"] == "message":
-            print(item["content"][0]["text"])
+if __name__ == "__main__":
+  asyncio.run(main())
 ```

 ## Migration is Simple
@@ -85,12 +90,14 @@ async for result in agent.run(messages):
 Already using Anthropic directly? Just add the `cua/` prefix:

 **Before:**
+
 ```python
 export ANTHROPIC_API_KEY="sk-ant-..."
 agent = ComputerAgent(model="anthropic/claude-sonnet-4-5-20250929")
 ```

 **After:**
+
 ```python
 export CUA_API_KEY="sk_cua-api01_..."
 agent = ComputerAgent(model="cua/anthropic/claude-sonnet-4.5")
--- a/blog/hud-agent-evals.md
+++ b/blog/hud-agent-evals.md
@@ -58,7 +58,7 @@ await run_full_dataset(
 # Or test on SheetBench (50 spreadsheet tasks)
 await run_full_dataset(
    dataset="hud-evals/SheetBench-V2",
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    split="train[:2]"
 )
 ```
--- a/blog/introducing-cua-cli.md
+++ b/blog/introducing-cua-cli.md
@@ -11,11 +11,13 @@ Today we're launching the **Cua CLI**: a command-line interface that brings the
 The Cua CLI handles everything you need to work with Cloud Sandboxes:

 **Authentication**
+
 - Browser-based OAuth login with automatic credential storage
 - Direct API key support for CI/CD pipelines
 - Export credentials to `.env` files for SDK integration

 **Sandbox Management**
+
 - Create sandboxes with your choice of OS, size, and region
 - List all your sandboxes with status and connection details
 - Start, stop, restart, and delete sandboxes
@@ -123,17 +125,20 @@ await computer.run()
 Create sandboxes in the size and region that fits your needs:

 **Sizes:**
+
 - `small` - 2 cores, 8 GB RAM, 128 GB SSD
 - `medium` - 4 cores, 16 GB RAM, 128 GB SSD
 - `large` - 8 cores, 32 GB RAM, 256 GB SSD

 **Regions:**
+
 - `north-america`
 - `europe`
 - `asia-pacific`
 - `south-america`

 **OS Options:**
+
 - `linux` - Ubuntu with XFCE desktop
 - `windows` - Windows 11 with Edge and Python
 - `macos` - macOS (preview access)
@@ -141,6 +146,7 @@ Create sandboxes in the size and region that fits your needs:
 ## Example Workflows

 **Quick Testing Environment**
+
 ```bash
 # Spin up a sandbox, test something, tear it down
 cua sb create --os linux --size small --region north-america
@@ -149,6 +155,7 @@ cua sb delete my-sandbox-abc123
 ```

 **Persistent Development Sandbox**
+
 ```bash
 # Create a sandbox for long-term use
 cua sb create --os linux --size medium --region north-america
@@ -221,11 +228,13 @@ Yes. The CLI and dashboard share the same API. Any sandbox you create in the das
 <summary><strong>How do I update the CLI?</strong></summary>

 If you installed via script:
+
 ```bash
 curl -LsSf https://cua.ai/cli/install.sh | sh
 ```

 If you installed via npm:
+
 ```bash
 npm install -g @trycua/cli@latest
 ```
@@ -235,6 +244,7 @@ npm install -g @trycua/cli@latest
 ## What's Next

 We're actively iterating based on feedback. Planned features include:
+
 - SSH key management for secure sandbox access
 - Template-based sandbox creation
 - Batch operations (start/stop multiple sandboxes)
--- a/docs/content/docs/agent-sdk/agent-loops.mdx
+++ b/docs/content/docs/agent-sdk/agent-loops.mdx
@@ -4,7 +4,11 @@ description: Supported computer-using agent loops and models
 ---

 <Callout>
-  A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/agent_nb.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.
+  A corresponding{' '}
+  <a href="https://github.com/trycua/cua/blob/main/notebooks/agent_nb.ipynb" target="_blank">
+    Jupyter Notebook
+  </a>{' '}
+  is available for this documentation.
 </Callout>

 An agent can be thought of as a loop - it generates actions, executes them, and repeats until done:
@@ -30,7 +34,7 @@ async def take_screenshot():
    ) as computer:

        agent = ComputerAgent(
-            model="anthropic/claude-3-5-sonnet-20241022",
+            model="anthropic/claude-sonnet-4-5-20250929",
            tools=[computer],
            max_trajectory_budget=5.0
        )
@@ -117,7 +121,7 @@ The output is an AsyncGenerator that yields response chunks.
 The `ComputerAgent` constructor provides a wide range of options for customizing agent behavior, tool integration, callbacks, resource management, and more.

 - `model` (`str`): Default: **required**
-  The LLM or agent model to use. Determines which agent loop is selected unless `custom_loop` is provided. (e.g., "claude-3-5-sonnet-20241022", "computer-use-preview", "omni+vertex_ai/gemini-pro")
+  The LLM or agent model to use. Determines which agent loop is selected unless `custom_loop` is provided. (e.g., "claude-sonnet-4-5-20250929", "computer-use-preview", "omni+vertex_ai/gemini-pro")
 - `tools` (`List[Any]`):
  List of tools the agent can use (e.g., `Computer`, sandboxed Python functions, etc.).
 - `custom_loop` (`Callable`):
@@ -155,7 +159,7 @@ from computer import Computer
 from agent.callbacks import ImageRetentionCallback

 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[Computer(...)],
    only_n_most_recent_images=3,
    callbacks=[ImageRetentionCallback(only_n_most_recent_images=3)],
--- a/docs/content/docs/agent-sdk/callbacks/cost-saving.mdx
+++ b/docs/content/docs/agent-sdk/callbacks/cost-saving.mdx
@@ -13,7 +13,7 @@ Optimize agent costs with budget management and image retention callbacks.
 from agent.callbacks import BudgetManagerCallback

 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    callbacks=[
        BudgetManagerCallback(
@@ -30,7 +30,7 @@ agent = ComputerAgent(
 ```python
 # Simple budget limit
 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    max_trajectory_budget=5.0  # $5 limit
 )
 ```
@@ -40,7 +40,7 @@ agent = ComputerAgent(
 ```python
 # Advanced budget configuration
 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    max_trajectory_budget={
        "max_budget": 10.0,
        "raise_error": True,  # Raise error when exceeded
@@ -55,7 +55,7 @@ agent = ComputerAgent(
 from agent.callbacks import ImageRetentionCallback

 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    callbacks=[
        ImageRetentionCallback(only_n_most_recent_images=3)
@@ -67,7 +67,7 @@ agent = ComputerAgent(

 ```python
 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    only_n_most_recent_images=3  # Auto-adds ImageRetentionCallback
 )
@@ -77,7 +77,7 @@ agent = ComputerAgent(

 ```python
 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    max_trajectory_budget=5.0,        # Budget limit
    only_n_most_recent_images=3,      # Image retention
--- a/docs/content/docs/agent-sdk/callbacks/index.mdx
+++ b/docs/content/docs/agent-sdk/callbacks/index.mdx
@@ -21,7 +21,7 @@ from agent.callbacks import (
 )

 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    callbacks=[
        ImageRetentionCallback(only_n_most_recent_images=3),
--- a/docs/content/docs/agent-sdk/callbacks/logging.mdx
+++ b/docs/content/docs/agent-sdk/callbacks/logging.mdx
@@ -14,7 +14,7 @@ from agent.callbacks import LoggingCallback
 import logging

 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    callbacks=[
        LoggingCallback(
@@ -29,7 +29,7 @@ agent = ComputerAgent(

 ```python
 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    verbosity=logging.INFO  # Auto-adds LoggingCallback
 )
@@ -72,7 +72,7 @@ class CustomLogger(AsyncCallbackHandler):

 # Use custom logger
 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    callbacks=[CustomLogger("my_agent")]
 )
--- a/docs/content/docs/agent-sdk/callbacks/trajectories.mdx
+++ b/docs/content/docs/agent-sdk/callbacks/trajectories.mdx
@@ -13,7 +13,7 @@ The TrajectorySaverCallback records complete agent conversations including messa
 from agent.callbacks import TrajectorySaverCallback

 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    tools=[computer],
    callbacks=[
        TrajectorySaverCallback(
@@ -28,7 +28,7 @@ agent = ComputerAgent(

 ```python
 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    trajectory_dir="trajectories",  # Auto-save trajectories
    tools=[computer]
 )
--- a/docs/content/docs/agent-sdk/customizing-computeragent.mdx
+++ b/docs/content/docs/agent-sdk/customizing-computeragent.mdx
@@ -3,7 +3,14 @@ title: Customize ComputerAgent
 ---

 <Callout>
-  A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/customizing_computeragent.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.
+  A corresponding{' '}
+  <a
+    href="https://github.com/trycua/cua/blob/main/notebooks/customizing_computeragent.ipynb"
+    target="_blank"
+  >
+    Jupyter Notebook
+  </a>{' '}
+  is available for this documentation.
 </Callout>

 The `ComputerAgent` interface provides an easy proxy to any computer-using model configuration, and it is a powerful framework for extending and building your own agentic systems.
--- a/docs/content/docs/agent-sdk/integrations/hud.mdx
+++ b/docs/content/docs/agent-sdk/integrations/hud.mdx
@@ -4,7 +4,11 @@ description: Use ComputerAgent with HUD for benchmarking and evaluation
 ---

 <Callout>
-  A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.
+  A corresponding{' '}
+  <a href="https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb" target="_blank">
+    Jupyter Notebook
+  </a>{' '}
+  is available for this documentation.
 </Callout>

 The HUD integration allows an agent to be benchmarked using the [HUD framework](https://www.hud.so/). Through the HUD integration, the agent controls a computer inside HUD, where tests are run to evaluate the success of each task.
--- a/docs/content/docs/agent-sdk/integrations/observability.mdx
+++ b/docs/content/docs/agent-sdk/integrations/observability.mdx
@@ -59,4 +59,8 @@ you will see all the agent execution steps, including computer actions, LLM call

 For each step, you will see the LLM call, the computer action. The computer actions are highlighted in the timeline in yellow.

-<img src="/docs/img/laminar_trace_example.png" alt="Example trace in Laminar showing the litellm.response span and its output." width="800px" />
+<img
+  src="/docs/img/laminar_trace_example.png"
+  alt="Example trace in Laminar showing the litellm.response span and its output."
+  width="800px"
+/>
--- a/docs/content/docs/agent-sdk/migration-guide.mdx
+++ b/docs/content/docs/agent-sdk/migration-guide.mdx
@@ -7,7 +7,7 @@ This guide lists **breaking changes** when migrating from the original `Computer
 ## Breaking Changes

 - **Initialization:**
-  - `ComputerAgent` (v0.4.x) uses `model` as a string (e.g. "anthropic/claude-3-5-sonnet-20241022") instead of `LLM` and `AgentLoop` objects.
+  - `ComputerAgent` (v0.4.x) uses `model` as a string (e.g. "anthropic/claude-sonnet-4-5-20250929") instead of `LLM` and `AgentLoop` objects.
  - `tools` is a list (can include multiple computers and decorated functions).
  - `callbacks` are now first-class for extensibility (image retention, budget, trajectory, logging, etc).
 - **No explicit `loop` parameter:**
@@ -39,7 +39,7 @@ async with Computer() as computer:
 ```python
 async with Computer() as computer:
    agent = ComputerAgent(
-        model="anthropic/claude-3-5-sonnet-20241022",
+        model="anthropic/claude-sonnet-4-5-20250929",
        tools=[computer]
    )
    messages = [{"role": "user", "content": "Take a screenshot"}]
--- a/docs/content/docs/agent-sdk/prompt-caching.mdx
+++ b/docs/content/docs/agent-sdk/prompt-caching.mdx
@@ -38,7 +38,7 @@ With the OpenAI provider, prompt caching is handled automatically for prompts of
 ```python
 from agent import ComputerAgent
 agent = ComputerAgent(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-5-20250929",
    use_prompt_caching=True,
 )
 ```
--- a/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx
+++ b/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx
@@ -32,7 +32,7 @@ Any vision-enabled LiteLLM-compatible model can be used as the planning componen
 - Any All‑in‑one CUA (planning-capable). See [All‑in‑one CUAs](./computer-use-agents).
 - Any VLM via LiteLLM providers: `anthropic/*`, `openai/*`, `openrouter/*`, `gemini/*`, `vertex_ai/*`, `huggingface-local/*`, `mlx/*`, etc.
 - Examples:
-  - **Anthropic**: `anthropic/claude-3-5-sonnet-20241022`, `anthropic/claude-opus-4-1-20250805`
+  - **Anthropic**: `anthropic/claude-sonnet-4-5-20250929`, `anthropic/claude-opus-4-1-20250805`
  - **OpenAI**: `openai/gpt-5`, `openai/gpt-o3`, `openai/gpt-4o`
  - **Google**: `gemini/gemini-1.5-pro`, `vertex_ai/gemini-pro-vision`
  - **Local models**: Any Hugging Face vision-language model
@@ -59,7 +59,7 @@ Combine state-of-the-art grounding with powerful reasoning:

 ```python
 agent = ComputerAgent(
-    "huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-3-5-sonnet-20241022",
+    "huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-sonnet-4-5-20250929",
    tools=[computer]
 )

@@ -113,7 +113,7 @@ async for _ in agent.run("Close the settings window, then open the Downloads fol
 Composed agents support both capabilities:

 ```python
-agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-3-5-sonnet-20241022")
+agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-sonnet-4-5-20250929")

 # Full computer-use agent capabilities
 async for _ in agent.run("Complete this online form"):
--- a/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx
+++ b/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx
@@ -29,10 +29,9 @@ Claude models with computer-use capabilities:
 - Claude 4.1: `claude-opus-4-1-20250805`
 - Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514`
 - Claude 3.7: `claude-3-7-sonnet-20250219`
- Claude 3.5: `claude-3-5-sonnet-20241022`

 ```python
-agent = ComputerAgent("claude-3-5-sonnet-20241022", tools=[computer])
+agent = ComputerAgent("claude-sonnet-4-5-20250929", tools=[computer])
 async for _ in agent.run("Open Firefox and navigate to github.com"):
    pass
 ```
--- a/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx
+++ b/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx
@@ -11,10 +11,10 @@ All models that support `ComputerAgent.run()` also support `ComputerAgent.predic

 ### Anthropic CUAs

+- Claude 4.5: `claude-sonnet-4-5-20250929`
 - Claude 4.1: `claude-opus-4-1-20250805`
 - Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514`
 - Claude 3.7: `claude-3-7-sonnet-20250219`
- Claude 3.5: `claude-3-5-sonnet-20241022`

 ### OpenAI CUA Preview

@@ -61,7 +61,7 @@ Moondream3 is a powerful small model that can perform UI grounding and click pre

 ```python
 # Using any grounding model for click prediction
-agent = ComputerAgent("claude-3-5-sonnet-20241022", tools=[computer])
+agent = ComputerAgent("claude-sonnet-4-5-20250929", tools=[computer])

 # Predict coordinates for specific elements
 login_coords = agent.predict_click("find the login button")
@@ -75,7 +75,7 @@ print(f"Menu icon: {menu_coords}")

 ```python
 # OmniParser is just for OCR, so it requires an LLM for predict_click
-agent = ComputerAgent("omniparser+anthropic/claude-3-5-sonnet-20241022", tools=[computer])
+agent = ComputerAgent("omniparser+anthropic/claude-sonnet-4-5-20250929", tools=[computer])

 # Predict click coordinates using composed agent
 coords = agent.predict_click("find the submit button")
--- a/docs/content/docs/agent-sdk/supported-model-providers/cua-vlm-router.mdx
+++ b/docs/content/docs/agent-sdk/supported-model-providers/cua-vlm-router.mdx
@@ -55,10 +55,10 @@ async for result in agent.run(messages):

 The CUA VLM Router currently supports these models:

-| Model ID | Provider | Description | Best For |
-|----------|----------|-------------|----------|
+| Model ID                          | Provider  | Description       | Best For                           |
+| --------------------------------- | --------- | ----------------- | ---------------------------------- |
 | `cua/anthropic/claude-sonnet-4.5` | Anthropic | Claude Sonnet 4.5 | General-purpose tasks, recommended |
-| `cua/anthropic/claude-haiku-4.5` | Anthropic | Claude Haiku 4.5 | Fast responses, cost-effective |
+| `cua/anthropic/claude-haiku-4.5`  | Anthropic | Claude Haiku 4.5  | Fast responses, cost-effective     |

 ## How It Works

@@ -95,6 +95,7 @@ GET /v1/models
 ```

 **Response:**
+
 ```json
 {
  "data": [
@@ -117,12 +118,11 @@ Content-Type: application/json
 ```

 **Request:**
+
 ```json
 {
  "model": "anthropic/claude-sonnet-4.5",
-  "messages": [
-    {"role": "user", "content": "Hello!"}
-  ],
+  "messages": [{ "role": "user", "content": "Hello!" }],
  "max_tokens": 100,
  "temperature": 0.7,
  "stream": false
@@ -130,20 +130,23 @@ Content-Type: application/json
 ```

 **Response:**
+
 ```json
 {
  "id": "gen_...",
  "object": "chat.completion",
  "created": 1763554838,
  "model": "anthropic/claude-sonnet-4.5",
-  "choices": [{
-    "index": 0,
-    "message": {
-      "role": "assistant",
-      "content": "Hello! How can I help you today?"
-    },
-    "finish_reason": "stop"
-  }],
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "Hello! How can I help you today?"
+      },
+      "finish_reason": "stop"
+    }
+  ],
  "usage": {
    "prompt_tokens": 10,
    "completion_tokens": 12,
@@ -170,6 +173,7 @@ curl -X POST https://inference.cua.ai/v1/chat/completions \
 ```

 **Response (SSE format):**
+
 ```
 data: {"id":"gen_...","choices":[{"delta":{"content":"1"}}],"object":"chat.completion.chunk"}

@@ -187,6 +191,7 @@ GET /v1/balance
 ```

 **Response:**
+
 ```json
 {
  "balance": 211689.85,
@@ -201,6 +206,7 @@ CUA VLM Router provides detailed cost information in every response:
 ### Credit System

 Requests are billed in **credits**:
+
 - Credits are deducted from your CUA account balance
 - Prices vary by model and usage
 - CUA manages all provider API keys and infrastructure
@@ -210,8 +216,8 @@ Requests are billed in **credits**:
 ```json
 {
  "usage": {
-    "cost": 0.01,                    // CUA gateway cost in credits
-    "market_cost": 0.000065          // Actual upstream API cost
+    "cost": 0.01, // CUA gateway cost in credits
+    "market_cost": 0.000065 // Actual upstream API cost
  }
 }
 ```
@@ -251,19 +257,20 @@ agent = ComputerAgent(

 ## Benefits Over Direct Provider Access

-| Feature | CUA VLM Router | Direct Provider (BYOK) |
-|---------|---------------|------------------------|
-| **Single API Key** | ✅ One key for all providers | ❌ Multiple keys to manage |
-| **Managed Infrastructure** | ✅ No API key management | ❌ Manage multiple provider keys |
-| **Usage Tracking** | ✅ Unified dashboard | ❌ Per-provider tracking |
-| **Model Switching** | ✅ Change model string only | ❌ Change code + keys |
-| **Setup Complexity** | ✅ One environment variable | ❌ Multiple environment variables |
+| Feature                    | CUA VLM Router               | Direct Provider (BYOK)            |
+| -------------------------- | ---------------------------- | --------------------------------- |
+| **Single API Key**         | ✅ One key for all providers | ❌ Multiple keys to manage        |
+| **Managed Infrastructure** | ✅ No API key management     | ❌ Manage multiple provider keys  |
+| **Usage Tracking**         | ✅ Unified dashboard         | ❌ Per-provider tracking          |
+| **Model Switching**        | ✅ Change model string only  | ❌ Change code + keys             |
+| **Setup Complexity**       | ✅ One environment variable  | ❌ Multiple environment variables |

 ## Error Handling

 ### Common Error Responses

 #### Invalid API Key
+
 ```json
 {
  "detail": "Insufficient credits. Current balance: 0.00 credits"
@@ -271,6 +278,7 @@ agent = ComputerAgent(
 ```

 #### Missing Authorization
+
 ```json
 {
  "detail": "Missing Authorization: Bearer token"
@@ -278,6 +286,7 @@ agent = ComputerAgent(
 ```

 #### Invalid Model
+
 ```json
 {
  "detail": "Invalid or unavailable model"
@@ -343,6 +352,7 @@ agent = ComputerAgent(
 Switching from direct provider access (BYOK) to CUA VLM Router is simple:

 **Before (Direct Provider Access with BYOK):**
+
 ```python
 import os
 # Required: Provider-specific API key
@@ -355,6 +365,7 @@ agent = ComputerAgent(
 ```

 **After (CUA VLM Router - Cloud Service):**
+
 ```python
 import os
 # Required: CUA API key only (no provider keys needed)
--- a/docs/content/docs/agent-sdk/supported-model-providers/index.mdx
+++ b/docs/content/docs/agent-sdk/supported-model-providers/index.mdx
@@ -14,6 +14,7 @@ model="cua/anthropic/claude-haiku-4.5"    # Claude Haiku 4.5 (faster)
 ```

 **Benefits:**
+
 - Single API key for multiple providers
 - Cost tracking and optimization
 - Fully managed infrastructure (no provider keys to manage)
@@ -27,7 +28,6 @@ model="cua/anthropic/claude-haiku-4.5"    # Claude Haiku 4.5 (faster)
 Direct access to Anthropic's Claude models using your own Anthropic API key (BYOK - Bring Your Own Key).

 ```python
-model="anthropic/claude-3-5-sonnet-20241022"
 model="anthropic/claude-3-7-sonnet-20250219"
 model="anthropic/claude-opus-4-20250514"
 model="anthropic/claude-sonnet-4-20250514"
@@ -61,6 +61,6 @@ Combine Omniparser for UI understanding with any LLM provider.
 ```python
 model="omniparser+ollama_chat/mistral-small3.2"
 model="omniparser+vertex_ai/gemini-pro"
-model="omniparser+anthropic/claude-3-5-sonnet-20241022"
+model="omniparser+anthropic/claude-sonnet-4-5-20250929"
 model="omniparser+openai/gpt-4o"
 ```
--- a/docs/content/docs/agent-sdk/telemetry.mdx
+++ b/docs/content/docs/agent-sdk/telemetry.mdx
@@ -19,6 +19,7 @@ Cua collects anonymized usage and error statistics. We follow [Posthog's ethical
 ### Disabled by default (opt-in)

 **Trajectory logging** captures full conversation history:
+
 - User messages and agent responses
 - Computer actions and outputs
 - Agent reasoning traces
@@ -117,7 +118,7 @@ Telemetry settings are configured at initialization and can't be changed afterwa
 | Event Name              | Data Collected                                                                                                                                                                                                                                                                                                        | Trigger Notes                                                         |
 | ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------- |
 | **module_init**         | • `module`: "agent"<br />• `version`: Package version<br />• `python_version`: Full Python version string                                                                                                                                                                                                             | Triggered once when the agent package is imported for the first time  |
-| **agent_session_start** | • `session_id`: Unique UUID for this agent instance<br />• `agent_type`: Class name (e.g., "ComputerAgent")<br />• `model`: Model name (e.g., "claude-3-5-sonnet")<br />• `os`: Operating system<br />• `os_version`: OS version<br />• `python_version`: Python version                                              | Triggered when TelemetryCallback is initialized (agent instantiation) |
+| **agent_session_start** | • `session_id`: Unique UUID for this agent instance<br />• `agent_type`: Class name (e.g., "ComputerAgent")<br />• `model`: Model name (e.g., "claude-sonnet-4-5")<br />• `os`: Operating system<br />• `os_version`: OS version<br />• `python_version`: Python version                                              | Triggered when TelemetryCallback is initialized (agent instantiation) |
 | **agent_run_start**     | • `session_id`: Agent session UUID<br />• `run_id`: Unique UUID for this run<br />• `start_time`: Unix timestamp<br />• `input_context_size`: Character count of input messages<br />• `num_existing_messages`: Count of existing messages<br />• `uploaded_trajectory`: Full conversation items (opt-in)             | Triggered at the start of each agent.run() call                       |
 | **agent_run_end**       | • `session_id`: Agent session UUID<br />• `run_id`: Run UUID<br />• `end_time`: Unix timestamp<br />• `duration_seconds`: Total run duration<br />• `num_steps`: Total steps taken in this run<br />• `total_usage`: Accumulated token usage and costs<br />• `uploaded_trajectory`: Full conversation items (opt-in) | Triggered at the end of each agent.run() call                         |
 | **agent_step**          | • `session_id`: Agent session UUID<br />• `run_id`: Run UUID<br />• `step`: Step number (incremental)<br />• `timestamp`: Unix timestamp<br />• `duration_seconds`: Duration of previous step                                                                                                                         | Triggered on each agent response/step during a run                    |
--- a/docs/content/docs/computer-sdk/computer-ui.mdx
+++ b/docs/content/docs/computer-sdk/computer-ui.mdx
@@ -3,7 +3,8 @@ title: Computer UI (Deprecated)
 ---

 <Callout type="warn" title="Deprecated">
-  The Computer UI is deprecated and will be replaced with a revamped playground experience soon. We recommend using VNC or Screen Sharing for precise control of the computer instead.
+  The Computer UI is deprecated and will be replaced with a revamped playground experience soon. We
+  recommend using VNC or Screen Sharing for precise control of the computer instead.
 </Callout>

 The computer module includes a Gradio UI for creating and sharing demonstration data. We make it easy for people to build community datasets for better computer use models with an upload to Huggingface feature.
--- a/docs/content/docs/computer-sdk/sandboxed-python.mdx
+++ b/docs/content/docs/computer-sdk/sandboxed-python.mdx
@@ -4,7 +4,14 @@ slug: sandboxed-python
 ---

 <Callout>
-  A corresponding <a href="https://github.com/trycua/cua/blob/main/examples/sandboxed_functions_examples.py" target="_blank">Python example</a> is available for this documentation.
+  A corresponding{' '}
+  <a
+    href="https://github.com/trycua/cua/blob/main/examples/sandboxed_functions_examples.py"
+    target="_blank"
+  >
+    Python example
+  </a>{' '}
+  is available for this documentation.
 </Callout>

 You can run Python functions securely inside a sandboxed virtual environment on a remote Cua Computer. This is useful for executing untrusted user code, isolating dependencies, or providing a safe environment for automation tasks.
--- a/docs/content/docs/example-usecases/form-filling.mdx
+++ b/docs/content/docs/example-usecases/form-filling.mdx
@@ -473,6 +473,7 @@ python form_filling.py
 ```

 The agent will:
+
 1. Download the PDF resume from Overleaf
 2. Extract information from the PDF
 3. Fill out the JotForm with the extracted information
--- a/docs/content/docs/example-usecases/gemini-complex-ui-navigation.mdx
+++ b/docs/content/docs/example-usecases/gemini-complex-ui-navigation.mdx
@@ -0,0 +1,640 @@
+---
+title: GUI Grounding with Gemini 3
+description: Using Google's Gemini 3 with OmniParser for Advanced GUI Grounding Tasks
+---
+
+import { Step, Steps } from 'fumadocs-ui/components/steps';
+import { Tab, Tabs } from 'fumadocs-ui/components/tabs';
+import { Callout } from 'fumadocs-ui/components/callout';
+
+## Overview
+
+This example demonstrates how to use Google's Gemini 3 models with OmniParser for complex GUI grounding tasks. Gemini 3 Pro achieves exceptional performance on the [ScreenSpot-Pro benchmark](https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding) with a **72.7% accuracy** (compared to Claude Sonnet 4.5's 36.2%), making it ideal for precise UI element location and complex navigation tasks.
+
+<img
+  src="/docs/img/grounding-with-gemini3.gif"
+  alt="Demo of Gemini 3 with OmniParser performing complex GUI navigation tasks"
+  width="800px"
+/>
+
+<Callout type="info" title="Why Gemini 3 for UI Navigation?">
+  According to [Google's Gemini 3 announcement](https://blog.google/products/gemini/gemini-3/),
+  Gemini 3 Pro achieves: - **72.7%** on ScreenSpot-Pro (vs. Gemini 2.5 Pro's 11.4%) -
+  Industry-leading performance on complex UI navigation tasks - Advanced multimodal understanding
+  for high-resolution screens
+</Callout>
+
+### What You'll Build
+
+This guide shows how to:
+
+- Set up Vertex AI with proper authentication
+- Use OmniParser with Gemini 3 for GUI element detection
+- Leverage Gemini 3-specific features like `thinking_level` and `media_resolution`
+- Create agents that can perform complex multi-step UI interactions
+
+---
+
+<Steps>
+
+<Step>
+
+### Set Up Google Cloud and Vertex AI
+
+Before using Gemini 3 models, you need to enable Vertex AI in Google Cloud Console.
+
+#### 1. Create a Google Cloud Project
+
+1. Go to [Google Cloud Console](https://console.cloud.google.com/)
+2. Click **Select a project** → **New Project**
+3. Enter a project name and click **Create**
+4. Note your **Project ID** (you'll need this later)
+
+#### 2. Enable Vertex AI API
+
+1. Navigate to [Vertex AI API](https://console.cloud.google.com/apis/library/aiplatform.googleapis.com)
+2. Select your project
+3. Click **Enable**
+
+#### 3. Enable Billing
+
+1. Go to [Billing](https://console.cloud.google.com/billing)
+2. Link a billing account to your project
+3. Vertex AI offers a [free tier](https://cloud.google.com/vertex-ai/pricing) for testing
+
+#### 4. Create a Service Account
+
+1. Go to [IAM & Admin > Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts)
+2. Click **Create Service Account**
+3. Enter a name (e.g., "cua-gemini-agent")
+4. Click **Create and Continue**
+5. Grant the **Vertex AI User** role
+6. Click **Done**
+
+#### 5. Create and Download Service Account Key
+
+1. Click on your newly created service account
+2. Go to **Keys** tab
+3. Click **Add Key** → **Create new key**
+4. Select **JSON** format
+5. Click **Create** (the key file will download automatically)
+6. **Important**: Store this key file securely! It contains credentials for accessing your Google Cloud resources
+
+<Callout type="warn">
+  Never commit your service account JSON key to version control! Add it to `.gitignore` immediately.
+</Callout>
+
+</Step>
+
+<Step>
+
+### Install Dependencies
+
+Install the required packages for OmniParser and Gemini 3:
+
+Create a `requirements.txt` file:
+
+```text
+cua-agent
+cua-computer
+cua-som  # OmniParser for GUI element detection
+litellm>=1.0.0
+python-dotenv>=1.0.0
+google-cloud-aiplatform>=1.70.0
+```
+
+Install the dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+</Step>
+
+<Step>
+
+### Configure Environment Variables
+
+Create a `.env` file in your project root:
+
+```text
+# Google Cloud / Vertex AI credentials
+GOOGLE_CLOUD_PROJECT=your-project-id
+GOOGLE_APPLICATION_CREDENTIALS=/path/to/your-service-account-key.json
+
+# Cua credentials (for cloud sandboxes)
+CUA_API_KEY=sk_cua-api01...
+CUA_SANDBOX_NAME=your-sandbox-name
+```
+
+Replace the values:
+
+- `your-project-id`: Your Google Cloud Project ID from Step 1
+- `/path/to/your-service-account-key.json`: Path to the JSON key file you downloaded
+- `sk_cua-api01...`: Your Cua API key from the [Cua dashboard](https://cua.dev)
+- `your-sandbox-name`: Your sandbox name (if using cloud sandboxes)
+
+</Step>
+
+<Step>
+
+### Create Your Complex UI Navigation Script
+
+Create a Python file (e.g., `gemini_ui_navigation.py`):
+
+<Tabs items={['Cloud Sandbox', 'Linux on Docker', 'macOS Sandbox']}>
+  <Tab value="Cloud Sandbox">
+
+```python
+import asyncio
+import logging
+import os
+import signal
+import traceback
+
+from agent import ComputerAgent
+from computer import Computer, VMProviderType
+from dotenv import load_dotenv
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+def handle_sigint(sig, frame):
+    print("\n\nExecution interrupted by user. Exiting gracefully...")
+    exit(0)
+
+async def complex_ui_navigation():
+    """
+    Demonstrate Gemini 3's exceptional UI grounding capabilities
+    with complex, multi-step navigation tasks.
+    """
+    try:
+        async with Computer(
+            os_type="linux",
+            provider_type=VMProviderType.CLOUD,
+            name=os.environ["CUA_SANDBOX_NAME"],
+            api_key=os.environ["CUA_API_KEY"],
+            verbosity=logging.INFO,
+        ) as computer:
+
+            agent = ComputerAgent(
+                # Use OmniParser with Gemini 3 Pro for optimal GUI grounding
+                model="omniparser+vertex_ai/gemini-3-pro-preview",
+                tools=[computer],
+                only_n_most_recent_images=3,
+                verbosity=logging.INFO,
+                trajectory_dir="trajectories",
+                use_prompt_caching=False,
+                max_trajectory_budget=5.0,
+                # Gemini 3-specific parameters
+                thinking_level="high",  # Enables deeper reasoning (vs "low")
+                media_resolution="high",  # High-resolution image processing (vs "low" or "medium")
+            )
+
+            # Complex GUI grounding tasks inspired by ScreenSpot-Pro benchmark
+            # These test precise element location in professional UIs
+            tasks = [
+                # Task 1: GitHub repository navigation
+                {
+                    "instruction": (
+                        "Go to github.com/trycua/cua. "
+                        "Find and click on the 'Issues' tab. "
+                        "Then locate and click on the search box within the issues page "
+                        "(not the global GitHub search). "
+                        "Type 'omniparser' and press Enter."
+                    ),
+                    "description": "Tests precise UI element distinction in a complex interface",
+                },
+
+                # Task 2: Search for and install Visual Studio Code
+                {
+                    "instruction": (
+                        "Open your system's app store (e.g., Microsoft Store). "
+                        "Search for 'Visual Studio Code'. "
+                        "In the search results, select 'Visual Studio Code'. "
+                        "Click on 'Install' or 'Get' to begin the installation. "
+                        "If prompted, accept any permissions or confirm the installation. "
+                        "Wait for Visual Studio Code to finish installing."
+                    ),
+                    "description": "Tests the ability to search for an application and complete its installation through a step-by-step app store workflow.",
+                },
+            ]
+
+            history = []
+
+            for i, task_info in enumerate(tasks, 1):
+                task = task_info["instruction"]
+                print(f"\n{'='*60}")
+                print(f"[Task {i}/{len(tasks)}] {task_info['description']}")
+                print(f"{'='*60}")
+                print(f"\nInstruction: {task}\n")
+
+                # Add user message to history
+                history.append({"role": "user", "content": task})
+
+                # Run agent with conversation history
+                async for result in agent.run(history, stream=False):
+                    history += result.get("output", [])
+
+                    # Print output for debugging
+                    for item in result.get("output", []):
+                        if item.get("type") == "message":
+                            content = item.get("content", [])
+                            for content_part in content:
+                                if content_part.get("text"):
+                                    logger.info(f"Agent: {content_part.get('text')}")
+                        elif item.get("type") == "computer_call":
+                            action = item.get("action", {})
+                            action_type = action.get("type", "")
+                            logger.debug(f"Computer Action: {action_type}")
+
+                print(f"\n✅ Task {i}/{len(tasks)} completed")
+
+            print("\n🎉 All complex UI navigation tasks completed successfully!")
+
+    except Exception as e:
+        logger.error(f"Error in complex_ui_navigation: {e}")
+        traceback.print_exc()
+        raise
+
+def main():
+    try:
+        load_dotenv()
+
+        # Validate required environment variables
+        required_vars = [
+            "GOOGLE_CLOUD_PROJECT",
+            "GOOGLE_APPLICATION_CREDENTIALS",
+            "CUA_API_KEY",
+            "CUA_SANDBOX_NAME",
+        ]
+
+        missing_vars = [var for var in required_vars if not os.environ.get(var)]
+        if missing_vars:
+            raise RuntimeError(
+                f"Missing required environment variables: {', '.join(missing_vars)}\n"
+                f"Please check your .env file and ensure all keys are set.\n"
+                f"See the setup guide for details on configuring Vertex AI credentials."
+            )
+
+        signal.signal(signal.SIGINT, handle_sigint)
+
+        asyncio.run(complex_ui_navigation())
+
+    except Exception as e:
+        logger.error(f"Error running automation: {e}")
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    main()
+```
+
+  </Tab>
+  <Tab value="Linux on Docker">
+
+```python
+import asyncio
+import logging
+import os
+import signal
+import traceback
+
+from agent import ComputerAgent
+from computer import Computer, VMProviderType
+from dotenv import load_dotenv
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+def handle_sigint(sig, frame):
+    print("\n\nExecution interrupted by user. Exiting gracefully...")
+    exit(0)
+
+async def complex_ui_navigation():
+    """
+    Demonstrate Gemini 3's exceptional UI grounding capabilities
+    with complex, multi-step navigation tasks.
+    """
+    try:
+        async with Computer(
+            os_type="linux",
+            provider_type=VMProviderType.DOCKER,
+            image="trycua/cua-xfce:latest",
+            verbosity=logging.INFO,
+        ) as computer:
+
+            agent = ComputerAgent(
+                # Use OmniParser with Gemini 3 Pro for optimal GUI grounding
+                model="omniparser+vertex_ai/gemini-3-pro-preview",
+                tools=[computer],
+                only_n_most_recent_images=3,
+                verbosity=logging.INFO,
+                trajectory_dir="trajectories",
+                use_prompt_caching=False,
+                max_trajectory_budget=5.0,
+                # Gemini 3-specific parameters
+                thinking_level="high",  # Enables deeper reasoning (vs "low")
+                media_resolution="high",  # High-resolution image processing (vs "low" or "medium")
+            )
+
+            # Complex GUI grounding tasks inspired by ScreenSpot-Pro benchmark
+            tasks = [
+                {
+                    "instruction": (
+                        "Go to github.com/trycua/cua. "
+                        "Find and click on the 'Issues' tab. "
+                        "Then locate and click on the search box within the issues page "
+                        "(not the global GitHub search). "
+                        "Type 'omniparser' and press Enter."
+                    ),
+                    "description": "Tests precise UI element distinction in a complex interface",
+                },
+            ]
+
+            history = []
+
+            for i, task_info in enumerate(tasks, 1):
+                task = task_info["instruction"]
+                print(f"\n{'='*60}")
+                print(f"[Task {i}/{len(tasks)}] {task_info['description']}")
+                print(f"{'='*60}")
+                print(f"\nInstruction: {task}\n")
+
+                history.append({"role": "user", "content": task})
+
+                async for result in agent.run(history, stream=False):
+                    history += result.get("output", [])
+
+                    for item in result.get("output", []):
+                        if item.get("type") == "message":
+                            content = item.get("content", [])
+                            for content_part in content:
+                                if content_part.get("text"):
+                                    logger.info(f"Agent: {content_part.get('text')}")
+                        elif item.get("type") == "computer_call":
+                            action = item.get("action", {})
+                            action_type = action.get("type", "")
+                            logger.debug(f"Computer Action: {action_type}")
+
+                print(f"\n✅ Task {i}/{len(tasks)} completed")
+
+            print("\n🎉 All complex UI navigation tasks completed successfully!")
+
+    except Exception as e:
+        logger.error(f"Error in complex_ui_navigation: {e}")
+        traceback.print_exc()
+        raise
+
+def main():
+    try:
+        load_dotenv()
+
+        required_vars = [
+            "GOOGLE_CLOUD_PROJECT",
+            "GOOGLE_APPLICATION_CREDENTIALS",
+        ]
+
+        missing_vars = [var for var in required_vars if not os.environ.get(var)]
+        if missing_vars:
+            raise RuntimeError(
+                f"Missing required environment variables: {', '.join(missing_vars)}\n"
+                f"Please check your .env file."
+            )
+
+        signal.signal(signal.SIGINT, handle_sigint)
+
+        asyncio.run(complex_ui_navigation())
+
+    except Exception as e:
+        logger.error(f"Error running automation: {e}")
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    main()
+```
+
+  </Tab>
+  <Tab value="macOS Sandbox">
+
+```python
+import asyncio
+import logging
+import os
+import signal
+import traceback
+
+from agent import ComputerAgent
+from computer import Computer, VMProviderType
+from dotenv import load_dotenv
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+def handle_sigint(sig, frame):
+    print("\n\nExecution interrupted by user. Exiting gracefully...")
+    exit(0)
+
+async def complex_ui_navigation():
+    """
+    Demonstrate Gemini 3's exceptional UI grounding capabilities
+    with complex, multi-step navigation tasks.
+    """
+    try:
+        async with Computer(
+            os_type="macos",
+            provider_type=VMProviderType.LUME,
+            name="macos-sequoia-cua:latest",
+            verbosity=logging.INFO,
+        ) as computer:
+
+            agent = ComputerAgent(
+                # Use OmniParser with Gemini 3 Pro for optimal GUI grounding
+                model="omniparser+vertex_ai/gemini-3-pro-preview",
+                tools=[computer],
+                only_n_most_recent_images=3,
+                verbosity=logging.INFO,
+                trajectory_dir="trajectories",
+                use_prompt_caching=False,
+                max_trajectory_budget=5.0,
+                # Gemini 3-specific parameters
+                thinking_level="high",  # Enables deeper reasoning (vs "low")
+                media_resolution="high",  # High-resolution image processing (vs "low" or "medium")
+            )
+
+            # Complex GUI grounding tasks inspired by ScreenSpot-Pro benchmark
+            tasks = [
+                {
+                    "instruction": (
+                        "Go to github.com/trycua/cua. "
+                        "Find and click on the 'Issues' tab. "
+                        "Then locate and click on the search box within the issues page "
+                        "(not the global GitHub search). "
+                        "Type 'omniparser' and press Enter."
+                    ),
+                    "description": "Tests precise UI element distinction in a complex interface",
+                },
+            ]
+
+            history = []
+
+            for i, task_info in enumerate(tasks, 1):
+                task = task_info["instruction"]
+                print(f"\n{'='*60}")
+                print(f"[Task {i}/{len(tasks)}] {task_info['description']}")
+                print(f"{'='*60}")
+                print(f"\nInstruction: {task}\n")
+
+                history.append({"role": "user", "content": task})
+
+                async for result in agent.run(history, stream=False):
+                    history += result.get("output", [])
+
+                    for item in result.get("output", []):
+                        if item.get("type") == "message":
+                            content = item.get("content", [])
+                            for content_part in content:
+                                if content_part.get("text"):
+                                    logger.info(f"Agent: {content_part.get('text')}")
+                        elif item.get("type") == "computer_call":
+                            action = item.get("action", {})
+                            action_type = action.get("type", "")
+                            logger.debug(f"Computer Action: {action_type}")
+
+                print(f"\n✅ Task {i}/{len(tasks)} completed")
+
+            print("\n🎉 All complex UI navigation tasks completed successfully!")
+
+    except Exception as e:
+        logger.error(f"Error in complex_ui_navigation: {e}")
+        traceback.print_exc()
+        raise
+
+def main():
+    try:
+        load_dotenv()
+
+        required_vars = [
+            "GOOGLE_CLOUD_PROJECT",
+            "GOOGLE_APPLICATION_CREDENTIALS",
+        ]
+
+        missing_vars = [var for var in required_vars if not os.environ.get(var)]
+        if missing_vars:
+            raise RuntimeError(
+                f"Missing required environment variables: {', '.join(missing_vars)}\n"
+                f"Please check your .env file."
+            )
+
+        signal.signal(signal.SIGINT, handle_sigint)
+
+        asyncio.run(complex_ui_navigation())
+
+    except Exception as e:
+        logger.error(f"Error running automation: {e}")
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    main()
+```
+
+  </Tab>
+</Tabs>
+
+</Step>
+
+<Step>
+
+### Run Your Script
+
+Execute your complex UI navigation automation:
+
+```bash
+python gemini_ui_navigation.py
+```
+
+The agent will:
+
+1. Navigate to GitHub and locate specific UI elements
+2. Distinguish between similar elements (e.g., global search vs. issues search)
+3. Perform multi-step interactions with visual feedback
+4. Use Gemini 3's advanced reasoning for precise element grounding
+
+Monitor the output to see the agent's progress through each task.
+
+</Step>
+
+</Steps>
+
+---
+
+## Understanding Gemini 3-Specific Parameters
+
+### `thinking_level`
+
+Controls the amount of internal reasoning the model performs:
+
+- `"high"`: Deeper reasoning, better for complex UI navigation (recommended for ScreenSpot-like tasks)
+- `"low"`: Faster responses, suitable for simpler tasks
+
+### `media_resolution`
+
+Controls vision processing for multimodal inputs:
+
+- `"high"`: Best for complex UIs with many small elements (recommended)
+- `"medium"`: Balanced quality and speed
+- `"low"`: Faster processing for simple interfaces
+
+<Callout type="info">
+  For tasks requiring precise GUI element location (like ScreenSpot-Pro), use
+  `thinking_level="high"` and `media_resolution="high"` for optimal performance.
+</Callout>
+
+---
+
+## Benchmark Performance
+
+Gemini 3 Pro's performance on ScreenSpot-Pro demonstrates its exceptional UI grounding capabilities:
+
+| Model             | ScreenSpot-Pro Score |
+| ----------------- | -------------------- |
+| **Gemini 3 Pro**  | **72.7%**            |
+| Claude Sonnet 4.5 | 36.2%                |
+| Gemini 2.5 Pro    | 11.4%                |
+| GPT-5.1           | 3.5%                 |
+
+This makes Gemini 3 the ideal choice for complex UI navigation, element detection, and professional GUI automation tasks.
+
+---
+
+## Troubleshooting
+
+### Authentication Issues
+
+If you encounter authentication errors:
+
+1. Verify your service account JSON key path is correct
+2. Ensure the service account has the **Vertex AI User** role
+3. Check that the Vertex AI API is enabled in your project
+4. Confirm your `GOOGLE_CLOUD_PROJECT` matches your actual project ID
+
+### "Vertex AI API not enabled" Error
+
+Run this command to enable the API:
+
+```bash
+gcloud services enable aiplatform.googleapis.com --project=YOUR_PROJECT_ID
+```
+
+### Billing Issues
+
+Ensure billing is enabled for your Google Cloud project. Visit the [Billing section](https://console.cloud.google.com/billing) to verify.
+
+---
+
+## Next Steps
+
+- Learn more about [OmniParser agent loops](/agent-sdk/agent-loops)
+- Explore [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing)
+- Read about [ScreenSpot-Pro benchmark](https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding)
+- Check out [Google's Gemini 3 announcement](https://blog.google/products/gemini/gemini-3/)
+- Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for help
--- a/docs/content/docs/example-usecases/meta.json
+++ b/docs/content/docs/example-usecases/meta.json
@@ -1,5 +1,10 @@
 {
  "title": "Cookbook",
  "description": "Real-world examples of building with Cua",
-  "pages": ["windows-app-behind-vpn", "form-filling", "post-event-contact-export"]
+  "pages": [
+    "windows-app-behind-vpn",
+    "form-filling",
+    "post-event-contact-export",
+    "gemini-complex-ui-navigation"
+  ]
 }
--- a/docs/content/docs/example-usecases/post-event-contact-export.mdx
+++ b/docs/content/docs/example-usecases/post-event-contact-export.mdx
@@ -441,6 +441,7 @@ python contact_export.py
 ```

 The agent will:
+
 1. Navigate to your LinkedIn connections page
 2. Extract data from 20 contacts (first name, last name, role, company, LinkedIn URL)
 3. Save contacts to a timestamped CSV file
--- a/docs/content/docs/example-usecases/windows-app-behind-vpn.mdx
+++ b/docs/content/docs/example-usecases/windows-app-behind-vpn.mdx
@@ -11,19 +11,23 @@ import { Tab, Tabs } from 'fumadocs-ui/components/tabs';
 This guide demonstrates how to automate Windows desktop applications (like eGecko HR/payroll systems) that run behind corporate VPN. This is a common enterprise scenario where legacy desktop applications require manual data entry, report generation, or workflow execution.

 **Use cases:**
+
 - HR/payroll processing (employee onboarding, payroll runs, benefits administration)
 - Desktop ERP systems behind corporate networks
 - Legacy financial applications requiring VPN access
 - Compliance reporting from on-premise systems

 **Architecture:**
+
 - Client-side Cua agent (Python SDK or Playground UI)
 - Windows VM/Sandbox with VPN client configured
 - RDP/remote desktop connection to target environment
 - Desktop application automation via computer vision and UI control

 <Callout type="info">
-  **Production Deployment**: For production use, consider workflow mining and custom finetuning to create vertical-specific actions (e.g., "Run payroll", "Onboard employee") instead of generic UI automation. This provides better audit trails and higher success rates.
+  **Production Deployment**: For production use, consider workflow mining and custom finetuning to
+  create vertical-specific actions (e.g., "Run payroll", "Onboard employee") instead of generic UI
+  automation. This provides better audit trails and higher success rates.
 </Callout>

 ---
@@ -31,7 +35,11 @@ This guide demonstrates how to automate Windows desktop applications (like eGeck
 ## Video Demo

 <div className="rounded-lg border bg-card text-card-foreground shadow-sm p-4 mb-6">
-  <video src="https://github.com/user-attachments/assets/8ab07646-6018-4128-87ce-53180cfea696" controls className="w-full rounded">
+  <video
+    src="https://github.com/user-attachments/assets/8ab07646-6018-4128-87ce-53180cfea696"
+    controls
+    className="w-full rounded"
+  >
    Your browser does not support the video tag.
  </video>
  <div className="text-sm text-muted-foreground mt-2">
@@ -106,7 +114,8 @@ For local development on Windows 10 Pro/Enterprise or Windows 11:
 4. Configure your desktop application installation within the sandbox

 <Callout type="warn">
-  **Manual VPN Setup**: Windows Sandbox requires manual VPN configuration each time it starts. For production use, consider Cloud Sandbox or self-hosted VMs with persistent VPN connections.
+  **Manual VPN Setup**: Windows Sandbox requires manual VPN configuration each time it starts. For
+  production use, consider Cloud Sandbox or self-hosted VMs with persistent VPN connections.
 </Callout>

  </Tab>
@@ -421,6 +430,7 @@ python hr_automation.py
 ```

 The agent will:
+
 1. Connect to your Windows environment (with VPN if configured)
 2. Launch and navigate the desktop application
 3. Execute each workflow step sequentially
@@ -506,6 +516,7 @@ agent = ComputerAgent(
 ### 1. Workflow Mining

 Before deploying, analyze your actual workflows:
+
 - Record user interactions with the application
 - Identify common patterns and edge cases
 - Map out decision trees and validation requirements
@@ -524,6 +535,7 @@ tasks = ["onboard_employee", "run_payroll", "generate_compliance_report"]
 ```

 This provides:
+
 - Better audit trails
 - Approval gates at business logic level
 - Higher success rates
@@ -547,12 +559,14 @@ agent = ComputerAgent(
 Choose your deployment model:

 **Managed (Recommended)**
+
 - Cua hosts Windows sandboxes, VPN/RDP stack, and agent runtime
 - You get UI/API endpoints for triggering workflows
 - Automatic scaling, monitoring, and maintenance
 - SLA guarantees and enterprise support

 **Self-Hosted**
+
 - You manage Windows VMs, VPN infrastructure, and agent deployment
 - Full control over data and security
 - Custom network configurations
--- a/docs/content/docs/index.mdx
+++ b/docs/content/docs/index.mdx
@@ -5,7 +5,8 @@ title: Introduction
 import { Monitor, Code, BookOpen, Zap, Bot, Boxes, Rocket } from 'lucide-react';

 <div className="rounded-lg border bg-card text-card-foreground shadow-sm px-4 py-2 mb-6">
-Cua is an open-source framework for building **Computer-Use Agents** - AI systems that see, understand, and interact with desktop applications through vision and action, just like humans do.
+  Cua is an open-source framework for building **Computer-Use Agents** - AI systems that see,
+  understand, and interact with desktop applications through vision and action, just like humans do.
 </div>

 ## Why Cua?
--- a/docs/content/docs/libraries/computer-server/index.mdx
+++ b/docs/content/docs/libraries/computer-server/index.mdx
@@ -7,7 +7,14 @@ github:
 ---

 <Callout>
-  A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/computer_server_nb.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.
+  A corresponding{' '}
+  <a
+    href="https://github.com/trycua/cua/blob/main/notebooks/computer_server_nb.ipynb"
+    target="_blank"
+  >
+    Jupyter Notebook
+  </a>{' '}
+  is available for this documentation.
 </Callout>

 The Computer Server API reference documentation is currently under development.
--- a/docs/content/docs/libraries/cua-cli/commands.mdx
+++ b/docs/content/docs/libraries/cua-cli/commands.mdx
@@ -15,6 +15,7 @@ The CUA CLI provides commands for authentication and sandbox management.
 The CLI supports **two command styles** for flexibility:

 **Flat style** (quick & concise):
+
 ```bash
 cua list
 cua create --os linux --size small --region north-america
@@ -22,6 +23,7 @@ cua start my-sandbox
 ```

 **Grouped style** (explicit & clear):
+
 ```bash
 cua sb list         # or: cua sandbox list
 cua sb create       # or: cua sandbox create
@@ -54,9 +56,11 @@ cua login --api-key sk-your-api-key-here
 ```

 **Options:**
+
 - `--api-key <key>` - Provide API key directly instead of browser flow

 **Example:**
+
 ```bash
 $ cua auth login
 Opening browser for CLI auth...
@@ -75,12 +79,14 @@ cua env
 ```

 **Example:**
+
 ```bash
 $ cua auth env
 Wrote /path/to/your/project/.env
 ```

 The generated `.env` file will contain:
+
 ```
 CUA_API_KEY=sk-your-api-key-here
 ```
@@ -97,6 +103,7 @@ cua logout
 ```

 **Example:**
+
 ```bash
 $ cua auth logout
 Logged out
@@ -121,6 +128,7 @@ cua ps
 ```

 **Example Output (default, passwords hidden):**
+
 ```
 NAME              STATUS    HOST
 my-dev-sandbox    running   my-dev-sandbox.sandbox.cua.ai
@@ -128,6 +136,7 @@ test-windows      stopped   test-windows.sandbox.cua.ai
 ```

 **Example Output (with --show-passwords):**
+
 ```
 NAME              STATUS    PASSWORD           HOST
 my-dev-sandbox    running   secure-pass-123    my-dev-sandbox.sandbox.cua.ai
@@ -143,11 +152,13 @@ cua create --os <OS> --size <SIZE> --region <REGION>
 ```

 **Required Options:**
+
 - `--os` - Operating system: `linux`, `windows`, `macos`
 - `--size` - Sandbox size: `small`, `medium`, `large`
 - `--region` - Region: `north-america`, `europe`, `asia-pacific`, `south-america`

 **Examples:**
+
 ```bash
 # Create a small Linux sandbox in North America
 cua create --os linux --size small --region north-america
@@ -162,6 +173,7 @@ cua create --os macos --size large --region asia-pacific
 **Response Types:**

 **Immediate (Status 200):**
+
 ```bash
 Sandbox created and ready: my-new-sandbox-abc123
 Password: secure-password-here
@@ -169,6 +181,7 @@ Host: my-new-sandbox-abc123.sandbox.cua.ai
 ```

 **Provisioning (Status 202):**
+
 ```bash
 Sandbox provisioning started: my-new-sandbox-abc123
 Job ID: job-xyz789
@@ -184,6 +197,7 @@ cua start <name>
 ```

 **Example:**
+
 ```bash
 $ cua start my-dev-sandbox
 Start accepted
@@ -198,6 +212,7 @@ cua stop <name>
 ```

 **Example:**
+
 ```bash
 $ cua stop my-dev-sandbox
 stopping
@@ -212,6 +227,7 @@ cua restart <name>
 ```

 **Example:**
+
 ```bash
 $ cua restart my-dev-sandbox
 restarting
@@ -226,6 +242,7 @@ cua delete <name>
 ```

 **Example:**
+
 ```bash
 $ cua delete old-test-sandbox
 Sandbox deletion initiated: deleting
@@ -247,6 +264,7 @@ cua open <name>
 ```

 **Example:**
+
 ```bash
 $ cua vnc my-dev-sandbox
 Opening NoVNC: https://my-dev-sandbox.sandbox.cua.ai/vnc.html?autoconnect=true&password=...
@@ -254,7 +272,6 @@ Opening NoVNC: https://my-dev-sandbox.sandbox.cua.ai/vnc.html?autoconnect=true&p

 This command automatically opens your default browser to the VNC interface with the correct password pre-filled.

-
 ## Global Options

 ### Help
@@ -273,18 +290,21 @@ cua list --help
 The CLI provides clear error messages for common issues:

 ### Authentication Errors
+
 ```bash
 $ cua list
 Unauthorized. Try 'cua auth login' again.
 ```

 ### Sandbox Not Found
+
 ```bash
 $ cua start nonexistent-sandbox
 Sandbox not found
 ```

 ### Invalid Configuration
+
 ```bash
 $ cua create --os invalid --configuration small --region north-america
 Invalid request or unsupported configuration
@@ -293,6 +313,7 @@ Invalid request or unsupported configuration
 ## Tips and Best Practices

 ### 1. Use Descriptive Sandbox Names
+
 ```bash
 # Good
 cua create --os linux --size small --region north-america
@@ -304,6 +325,7 @@ cua list  # Check the generated name
 ```

 ### 2. Environment Management
+
 ```bash
 # Set up your project with API key
 cd my-project
@@ -312,6 +334,7 @@ cua auth env
 ```

 ### 3. Quick Sandbox Access
+
 ```bash
 # Create aliases for frequently used sandboxes
 alias dev-sandbox="cua vnc my-development-sandbox"
@@ -319,6 +342,7 @@ alias prod-sandbox="cua vnc my-production-sandbox"
 ```

 ### 4. Monitoring Provisioning
+
 ```bash
 # For sandboxes that need provisioning time
 cua create --os windows --size large --region europe
--- a/docs/content/docs/libraries/cua-cli/index.mdx
+++ b/docs/content/docs/libraries/cua-cli/index.mdx
@@ -34,16 +34,19 @@ cua sb list
 ## Use Cases

 ### Development Workflow
+
 - Quickly spin up cloud sandboxes for testing
 - Manage multiple sandboxes across different regions
 - Integrate with CI/CD pipelines

 ### Team Collaboration
+
 - Share sandbox configurations and access
 - Standardize development environments
 - Quick onboarding for new team members

 ### Automation
+
 - Script sandbox provisioning and management
 - Integrate with deployment workflows
 - Automate environment setup
--- a/docs/content/docs/libraries/cua-cli/installation.mdx
+++ b/docs/content/docs/libraries/cua-cli/installation.mdx
@@ -11,24 +11,21 @@ import { Callout } from 'fumadocs-ui/components/callout';
 The fastest way to install the CUA CLI is using our installation scripts:

 <Tabs items={['macOS / Linux', 'Windows']}>
-  <Tab value="macOS / Linux">
-    ```bash
-    curl -LsSf https://cua.ai/cli/install.sh | sh
-    ```
-  </Tab>
+  <Tab value="macOS / Linux">```bash curl -LsSf https://cua.ai/cli/install.sh | sh ```</Tab>
  <Tab value="Windows">
-    ```powershell
-    powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
+    ```powershell powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
    ```
  </Tab>
 </Tabs>

 These scripts will automatically:
+
 1. Install [Bun](https://bun.sh) (a fast JavaScript runtime)
 2. Install the CUA CLI via `bun add -g @trycua/cli`

 <Callout type="info">
-  The installation scripts will automatically detect your system and install the appropriate binary to your PATH.
+  The installation scripts will automatically detect your system and install the appropriate binary
+  to your PATH.
 </Callout>

 ## Alternative: Install with Bun
@@ -44,8 +41,8 @@ bun add -g @trycua/cli
 ```

 <Callout type="info">
-  Using Bun provides faster installation and better performance compared to npm.
-  If you don't have Bun installed, the first command will install it for you.
+  Using Bun provides faster installation and better performance compared to npm. If you don't have
+  Bun installed, the first command will install it for you.
 </Callout>

 ## Verify Installation
@@ -76,40 +73,21 @@ To update to the latest version:

 <Tabs items={['Script Install', 'npm Install']}>
  <Tab value="Script Install">
-    Re-run the installation script:
-    ```bash
-    # macOS/Linux
-    curl -LsSf https://cua.ai/cli/install.sh | sh
-    
-    # Windows
-    powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
-    ```
-  </Tab>
-  <Tab value="npm Install">
-    ```bash
-    npm update -g @trycua/cli
+    Re-run the installation script: ```bash # macOS/Linux curl -LsSf https://cua.ai/cli/install.sh |
+    sh # Windows powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
    ```
  </Tab>
+  <Tab value="npm Install">```bash npm update -g @trycua/cli ```</Tab>
 </Tabs>

 ## Uninstalling

 <Tabs items={['Script Install', 'npm Install']}>
  <Tab value="Script Install">
-    Remove the binary from your PATH:
-    ```bash
-    # macOS/Linux
-    rm $(which cua)
-    
-    # Windows
-    # Remove from your PATH or delete the executable
-    ```
-  </Tab>
-  <Tab value="npm Install">
-    ```bash
-    npm uninstall -g @trycua/cli
-    ```
+    Remove the binary from your PATH: ```bash # macOS/Linux rm $(which cua) # Windows # Remove from
+    your PATH or delete the executable ```
  </Tab>
+  <Tab value="npm Install">```bash npm uninstall -g @trycua/cli ```</Tab>
 </Tabs>

 ## Troubleshooting
@@ -128,17 +106,12 @@ If you encounter permission issues during installation:

 <Tabs items={['macOS / Linux', 'Windows']}>
  <Tab value="macOS / Linux">
-    Try running with sudo (not recommended for the curl method):
-    ```bash
-    # If using npm
-    sudo npm install -g @trycua/cli
-    ```
+    Try running with sudo (not recommended for the curl method): ```bash # If using npm sudo npm
+    install -g @trycua/cli ```
  </Tab>
  <Tab value="Windows">
-    Run PowerShell as Administrator:
-    ```powershell
-    # Right-click PowerShell and "Run as Administrator"
-    powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
+    Run PowerShell as Administrator: ```powershell # Right-click PowerShell and "Run as
+    Administrator" powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
    ```
  </Tab>
 </Tabs>
--- a/docs/content/docs/libraries/mcp-server/client-integrations.mdx
+++ b/docs/content/docs/libraries/mcp-server/client-integrations.mdx
@@ -30,13 +30,15 @@ To use with Claude Desktop, add an entry to your Claude Desktop configuration (`
 If you're working with the CUA source code:

 **Standard VM Mode:**
+
 ```json
 {
  "mcpServers": {
    "cua-agent": {
      "command": "/usr/bin/env",
      "args": [
-        "bash", "-lc",
+        "bash",
+        "-lc",
        "export CUA_MODEL_NAME='anthropic/claude-sonnet-4-20250514'; export ANTHROPIC_API_KEY='your-anthropic-api-key-here'; /path/to/cua/libs/python/mcp-server/scripts/start_mcp_server.sh"
      ]
    }
@@ -45,13 +47,15 @@ If you're working with the CUA source code:
 ```

 **Host Computer Control Mode:**
+
 ```json
 {
  "mcpServers": {
    "cua-agent": {
      "command": "/usr/bin/env",
      "args": [
-        "bash", "-lc",
+        "bash",
+        "-lc",
        "export CUA_MODEL_NAME='anthropic/claude-sonnet-4-20250514'; export ANTHROPIC_API_KEY='your-anthropic-api-key-here'; export CUA_USE_HOST_COMPUTER_SERVER='true'; export CUA_MAX_IMAGES='1'; /path/to/cua/libs/python/mcp-server/scripts/start_mcp_server.sh"
      ]
    }
@@ -62,6 +66,7 @@ If you're working with the CUA source code:
 **Note**: Replace `/path/to/cua` with the absolute path to your CUA repository directory.

 **⚠️ Host Computer Control Setup**: When using `CUA_USE_HOST_COMPUTER_SERVER='true'`, you must also:
+
 1. Install computer server dependencies: `python3 -m pip install uvicorn fastapi`
 2. Install the computer server: `python3 -m pip install -e libs/python/computer-server --break-system-packages`
 3. Start the computer server: `python -m computer_server --log-level debug`
--- a/docs/content/docs/libraries/mcp-server/configuration.mdx
+++ b/docs/content/docs/libraries/mcp-server/configuration.mdx
@@ -4,19 +4,20 @@ title: Configuration

 The server is configured using environment variables (can be set in the Claude Desktop config):

-| Variable | Description | Default |
-|----------|-------------|---------|
-| `CUA_MODEL_NAME` | Model string (e.g., "anthropic/claude-sonnet-4-20250514", "anthropic/claude-3-5-sonnet-20240620", "openai/computer-use-preview", "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", "omniparser+litellm/gpt-4o", "omniparser+ollama_chat/gemma3") | anthropic/claude-sonnet-4-20250514 |
-| `ANTHROPIC_API_KEY` | Your Anthropic API key (required for Anthropic models) | None |
-| `CUA_MAX_IMAGES` | Maximum number of images to keep in context | 3 |
-| `CUA_USE_HOST_COMPUTER_SERVER` | Target your local desktop instead of a VM. Set to "true" to use your host system. **Warning:** AI models may perform risky actions. | false |
+| Variable                       | Description                                                                                                                                                                                                                                       | Default                            |
+| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------- |
+| `CUA_MODEL_NAME`               | Model string (e.g., "anthropic/claude-sonnet-4-20250514", "openai/computer-use-preview", "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", "omniparser+litellm/gpt-4o", "omniparser+ollama_chat/gemma3") | anthropic/claude-sonnet-4-20250514 |
+| `ANTHROPIC_API_KEY`            | Your Anthropic API key (required for Anthropic models)                                                                                                                                                                                            | None                               |
+| `CUA_MAX_IMAGES`               | Maximum number of images to keep in context                                                                                                                                                                                                       | 3                                  |
+| `CUA_USE_HOST_COMPUTER_SERVER` | Target your local desktop instead of a VM. Set to "true" to use your host system. **Warning:** AI models may perform risky actions.                                                                                                               | false                              |

 ## Model Configuration

 The `CUA_MODEL_NAME` environment variable supports various model providers through LiteLLM integration:

 ### Supported Providers
- **Anthropic**: `anthropic/claude-sonnet-4-20250514`, `anthropic/claude-3-5-sonnet-20240620`, `anthropic/claude-3-haiku-20240307`
+
+- **Anthropic**: `anthropic/claude-sonnet-4-20250514`, 
 - **OpenAI**: `openai/computer-use-preview`, `openai/gpt-4o`
 - **Local Models**: `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B`
 - **Omni + LiteLLM**: `omniparser+litellm/gpt-4o`, `omniparser+litellm/claude-3-haiku`
@@ -25,6 +26,7 @@ The `CUA_MODEL_NAME` environment variable supports various model providers throu
 ### Example Configurations

 **Claude Desktop Configuration:**
+
 ```json
 {
  "mcpServers": {
@@ -43,6 +45,7 @@ The `CUA_MODEL_NAME` environment variable supports various model providers throu
 ```

 **Local Model Configuration:**
+
 ```json
 {
  "mcpServers": {
@@ -61,6 +64,7 @@ The `CUA_MODEL_NAME` environment variable supports various model providers throu
 ## Session Management Configuration

 The MCP server automatically manages sessions with the following defaults:
+
 - **Max Concurrent Sessions**: 10
 - **Session Timeout**: 10 minutes of inactivity
 - **Computer Pool Size**: 5 instances
--- a/docs/content/docs/libraries/mcp-server/installation.mdx
+++ b/docs/content/docs/libraries/mcp-server/installation.mdx
@@ -58,7 +58,8 @@ If you're working with the CUA source code directly (like in the CUA repository)
    "cua-agent": {
      "command": "/usr/bin/env",
      "args": [
-        "bash", "-lc",
+        "bash",
+        "-lc",
        "export CUA_MODEL_NAME='anthropic/claude-sonnet-4-20250514'; export ANTHROPIC_API_KEY='your-anthropic-api-key-here'; /path/to/cua/libs/python/mcp-server/scripts/start_mcp_server.sh"
      ]
    }
@@ -69,16 +70,19 @@ If you're working with the CUA source code directly (like in the CUA repository)
 **For host computer control** (development setup):

 1. **Install Computer Server Dependencies**:
+
   ```bash
   python3 -m pip install uvicorn fastapi
   python3 -m pip install -e libs/python/computer-server --break-system-packages
   ```

 2. **Start the Computer Server**:
+
   ```bash
   cd /path/to/cua
   python -m computer_server --log-level debug
   ```
+
   This will start the computer server on `http://localhost:8000` that controls your actual desktop.

 3. **Configure Claude Desktop**:
@@ -88,7 +92,8 @@ If you're working with the CUA source code directly (like in the CUA repository)
       "cua-agent": {
         "command": "/usr/bin/env",
         "args": [
-           "bash", "-lc",
+           "bash",
+           "-lc",
           "export CUA_MODEL_NAME='anthropic/claude-sonnet-4-20250514'; export ANTHROPIC_API_KEY='your-anthropic-api-key-here'; export CUA_USE_HOST_COMPUTER_SERVER='true'; export CUA_MAX_IMAGES='1'; /path/to/cua/libs/python/mcp-server/scripts/start_mcp_server.sh"
         ]
       }
@@ -110,6 +115,7 @@ If you're working with the CUA source code directly (like in the CUA repository)
   - Check logs for specific error messages

 2. **"Missing Anthropic API Key"** - Add your API key to the configuration:
+
   ```json
   "env": {
     "ANTHROPIC_API_KEY": "your-api-key-here"
@@ -118,8 +124,6 @@ If you're working with the CUA source code directly (like in the CUA repository)

 3. **"model not found"** - Use a valid model name:
   - ✅ `anthropic/claude-sonnet-4-20250514`
-   - ✅ `anthropic/claude-3-5-sonnet-20240620`
-   - ❌ `anthropic/claude-3-5-sonnet-20241022` (doesn't exist)

 4. **Script not found** - If you get a `/bin/bash: ~/cua/libs/python/mcp-server/scripts/start_mcp_server.sh: No such file or directory` error, try changing the path to the script to be absolute instead of relative.

@@ -130,6 +134,7 @@ If you're working with the CUA source code directly (like in the CUA repository)
   - **Image size errors**: Use `CUA_MAX_IMAGES='1'` to reduce image context size

 **Viewing Logs:**
+
 ```bash
 tail -n 20 -f ~/Library/Logs/Claude/mcp*.log
 ```
--- a/docs/content/docs/libraries/mcp-server/llm-integrations.mdx
+++ b/docs/content/docs/libraries/mcp-server/llm-integrations.mdx
@@ -12,7 +12,7 @@ This MCP server features comprehensive liteLLM integration, allowing you to use

 ### Model String Examples:

- **Anthropic**: `"anthropic/claude-3-5-sonnet-20241022"`
+- **Anthropic**: `"anthropic/claude-sonnet-4-5-20250929"`
 - **OpenAI**: `"openai/computer-use-preview"`
 - **UI-TARS**: `"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"`
 - **Omni + Any LiteLLM**: `"omniparser+litellm/gpt-4o"`, `"omniparser+litellm/claude-3-haiku"`, `"omniparser+ollama_chat/gemma3"`
--- a/docs/content/docs/libraries/mcp-server/tools.mdx
+++ b/docs/content/docs/libraries/mcp-server/tools.mdx
@@ -45,17 +45,20 @@ The MCP server supports multi-client sessions with automatic resource management
 ## Usage Examples

 ### Basic Task Execution
+
 ```
 "Open Chrome and navigate to github.com"
 "Create a folder called 'Projects' on my desktop"
 ```

 ### Multi-Task Execution
+
 ```
 "Run these tasks: 1) Open Finder, 2) Navigate to Documents, 3) Create a new folder called 'Work'"
 ```

 ### Session Management
+
 ```
 "Take a screenshot of the current screen"
 "Show me the session statistics"
--- a/docs/content/docs/libraries/mcp-server/usage.mdx
+++ b/docs/content/docs/libraries/mcp-server/usage.mdx
@@ -16,27 +16,35 @@ Claude will automatically use your CUA agent to perform these tasks.
 ## Advanced Features

 ### Progress Reporting
+
 The MCP server provides real-time progress updates during task execution:
+
 - Task progress is reported as percentages (0-100%)
 - Multi-task operations show progress for each individual task
 - Progress updates are streamed to the MCP client for real-time feedback

 ### Error Handling
+
 Robust error handling ensures reliable operation:
+
 - Failed tasks return error messages with screenshots when possible
 - Session state is preserved even when individual tasks fail
 - Automatic cleanup prevents resource leaks
 - Detailed error logging for troubleshooting

 ### Concurrent Task Execution
+
 For improved performance, multiple tasks can run concurrently:
+
 - Set `concurrent=true` in `run_multi_cua_tasks` for parallel execution
 - Each task runs in its own context with isolated state
 - Progress tracking works for both sequential and concurrent modes
 - Resource pooling ensures efficient computer instance usage

 ### Session Management
+
 Multi-client support with automatic resource management:
+
 - Each client gets isolated sessions with separate computer instances
 - Sessions automatically clean up after 10 minutes of inactivity
 - Resource pooling prevents resource exhaustion
@@ -55,7 +63,8 @@ No additional configuration is needed - this is the default behavior.
 ### Option: Targeting Your Local Desktop

 <Callout type="warn">
-  **Warning:** When targeting your local system, AI models have direct access to your desktop and may perform risky actions. Use with caution.
+  **Warning:** When targeting your local system, AI models have direct access to your desktop and
+  may perform risky actions. Use with caution.
 </Callout>

 To have the MCP server control your local desktop instead of a VM:
@@ -82,13 +91,14 @@ Add the `CUA_USE_HOST_COMPUTER_SERVER` environment variable to your MCP client c
          "command": "/bin/bash",
          "args": ["~/.cua/start_mcp_server.sh"],
          "env": {
-            "CUA_MODEL_NAME": "anthropic/claude-3-5-sonnet-20241022",
+            "CUA_MODEL_NAME": "anthropic/claude-sonnet-4-5-20250929",
            "CUA_USE_HOST_COMPUTER_SERVER": "true"
          }
        }
      }
    }
    ```
+
  </Tab>
  <Tab value="Other MCP Clients">
    Set the environment variable in your MCP client configuration:
@@ -98,6 +108,7 @@ Add the `CUA_USE_HOST_COMPUTER_SERVER` environment variable to your MCP client c
    ```

    Then start your MCP client as usual.
+
  </Tab>
 </Tabs>

@@ -108,6 +119,7 @@ Now Claude will control your local desktop directly when you ask it to perform c
 ## Usage Examples

 ### Single Task Execution
+
 ```
 "Open Safari and navigate to apple.com"
 "Create a new folder on the desktop called 'My Projects'"
@@ -115,16 +127,19 @@ Now Claude will control your local desktop directly when you ask it to perform c
 ```

 ### Multi-Task Execution (Sequential)
+
 ```
 "Run these tasks in order: 1) Open Finder, 2) Navigate to Documents folder, 3) Create a new folder called 'Work'"
 ```

 ### Multi-Task Execution (Concurrent)
+
 ```
 "Run these tasks simultaneously: 1) Open Chrome, 2) Open Safari, 3) Open Finder"
 ```

 ### Session Management
+
 ```
 "Show me the current session statistics"
 "Take a screenshot using session abc123"
@@ -132,6 +147,7 @@ Now Claude will control your local desktop directly when you ask it to perform c
 ```

 ### Error Recovery
+
 ```
 "Try to open a non-existent application and show me the error"
 "Find all files with .tmp extension and delete them safely"
@@ -140,13 +156,14 @@ Now Claude will control your local desktop directly when you ask it to perform c
 ## First-time Usage Notes

 **API Keys**: Ensure you have valid API keys:
-   - Add your Anthropic API key in the Claude Desktop config (as shown above)
-   - Or set it as an environment variable in your shell profile
-   - **Required**: The MCP server needs an API key to authenticate with the model provider
+
+- Add your Anthropic API key in the Claude Desktop config (as shown above)
+- Or set it as an environment variable in your shell profile
+- **Required**: The MCP server needs an API key to authenticate with the model provider

 **Model Selection**: Choose the appropriate model for your needs:
-   - **Claude Sonnet 4**: Latest model with best performance (`anthropic/claude-sonnet-4-20250514`)
-   - **Claude 3.5 Sonnet**: Reliable performance (`anthropic/claude-3-5-sonnet-20240620`)
-   - **Computer-Use Preview**: Specialized for computer tasks (`openai/computer-use-preview`)
-   - **Local Models**: For privacy-sensitive environments
-   - **Ollama**: For offline usage
+
+- **Claude Sonnet 4**: Latest model with best performance (`anthropic/claude-sonnet-4-20250514`)
+- **Computer-Use Preview**: Specialized for computer tasks (`openai/computer-use-preview`)
+- **Local Models**: For privacy-sensitive environments
+- **Ollama**: For offline usage
--- a/docs/content/docs/libraries/som/index.mdx
+++ b/docs/content/docs/libraries/som/index.mdx
@@ -7,7 +7,11 @@ github:
 ---

 <Callout>
-  A corresponding <a href="https://github.com/trycua/cua/blob/main/examples/som_examples.py" target="_blank">Python example</a> is available for this documentation.
+  A corresponding{' '}
+  <a href="https://github.com/trycua/cua/blob/main/examples/som_examples.py" target="_blank">
+    Python example
+  </a>{' '}
+  is available for this documentation.
 </Callout>

 ## Overview
--- a/docs/public/img/grounding-with-gemini3.gif
+++ b/docs/public/img/grounding-with-gemini3.gif
--- a/examples/agent_examples.py
+++ b/examples/agent_examples.py
@@ -53,6 +53,10 @@ async def run_agent_example():
            # == Omniparser + Any LLM ==
            # model="omniparser+anthropic/claude-opus-4-20250514",
            # model="omniparser+ollama_chat/gemma3:12b-it-q4_K_M",
+            # == Omniparser + Vertex AI Gemini 3 (with thinking_level) ==
+            # model="omni+vertex_ai/gemini-3-flash",
+            # thinking_level="high",  # or "low"
+            # media_resolution="medium",  # or "low" or "high"
            tools=[computer],
            only_n_most_recent_images=3,
            verbosity=logging.DEBUG,
--- a/libs/python/agent/README.md
+++ b/libs/python/agent/README.md
@@ -51,7 +51,7 @@ async def main():

        # Create agent
        agent = ComputerAgent(
-            model="anthropic/claude-3-5-sonnet-20241022",
+            model="anthropic/claude-sonnet-4-5-20250929",
            tools=[computer],
            only_n_most_recent_images=3,
            trajectory_dir="trajectories",
--- a/libs/python/agent/agent/agent.py
+++ b/libs/python/agent/agent/agent.py
@@ -189,7 +189,7 @@ class ComputerAgent:
        Initialize ComputerAgent.

        Args:
-            model: Model name (e.g., "claude-3-5-sonnet-20241022", "computer-use-preview", "omni+vertex_ai/gemini-pro")
+            model: Model name (e.g., "claude-sonnet-4-5-20250929", "computer-use-preview", "omni+vertex_ai/gemini-pro")
            tools: List of tools (computer objects, decorated functions, etc.)
            custom_loop: Custom agent loop function to use instead of auto-selection
            only_n_most_recent_images: If set, only keep the N most recent images in message history. Adds ImageRetentionCallback automatically.
--- a/libs/python/agent/agent/cli.py
+++ b/libs/python/agent/agent/cli.py
@@ -7,7 +7,7 @@ Usage:
 Examples:
    python -m agent.cli openai/computer-use-preview
    python -m agent.cli anthropic/claude-sonnet-4-5-20250929
-    python -m agent.cli omniparser+anthropic/claude-3-5-sonnet-20241022
+    python -m agent.cli omniparser+anthropic/claude-sonnet-4-5-20250929
 """

 try:
@@ -233,7 +233,7 @@ async def main():
 Examples:
  python -m agent.cli openai/computer-use-preview
  python -m agent.cli anthropic/claude-sonnet-4-5-20250929
-  python -m agent.cli omniparser+anthropic/claude-3-5-sonnet-20241022
+  python -m agent.cli omniparser+anthropic/claude-sonnet-4-5-20250929
  python -m agent.cli huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
        """,
    )
--- a/libs/python/agent/agent/loops/anthropic.py
+++ b/libs/python/agent/agent/loops/anthropic.py
@@ -671,11 +671,12 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
                        # Handle custom function tools (not computer tools)
                        if tool_name != "computer":
                            from ..responses import make_function_call_item
-                            responses_items.append(make_function_call_item(
-                                function_name=tool_name,
-                                arguments=tool_input,
-                                call_id=call_id
-                            ))
+
+                            responses_items.append(
+                                make_function_call_item(
+                                    function_name=tool_name, arguments=tool_input, call_id=call_id
+                                )
+                            )
                            continue

                        # Computer tool - process actions
@@ -883,16 +884,17 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
            # Handle custom function tools
            if tool_name != "computer":
                from ..responses import make_function_call_item
+
                # tool_call.function.arguments is a JSON string, need to parse it
                try:
                    args_dict = json.loads(tool_call.function.arguments)
                except json.JSONDecodeError:
                    args_dict = {}
-                responses_items.append(make_function_call_item(
-                    function_name=tool_name,
-                    arguments=args_dict,
-                    call_id=tool_call.id
-                ))
+                responses_items.append(
+                    make_function_call_item(
+                        function_name=tool_name, arguments=args_dict, call_id=tool_call.id
+                    )
+                )
                continue

            # Handle computer tool
--- a/libs/python/agent/agent/loops/generic_vlm.py
+++ b/libs/python/agent/agent/loops/generic_vlm.py
@@ -20,6 +20,7 @@ from ..loops.base import AsyncAgentConfig
 from ..responses import (
    convert_completion_messages_to_responses_items,
    convert_responses_items_to_completion_messages,
+    make_reasoning_item,
 )
 from ..types import AgentCapability

@@ -373,13 +374,23 @@ class GenericVlmConfig(AsyncAgentConfig):
        if _on_usage:
            await _on_usage(usage)

-        # Parse tool call from text; then convert to responses items via fake tool_calls
+        # Extract response data
        resp_dict = response.model_dump()  # type: ignore
        choice = (resp_dict.get("choices") or [{}])[0]
-        content_text = ((choice.get("message") or {}).get("content")) or ""
-        tool_call = _parse_tool_call_from_text(content_text)
+        message = choice.get("message") or {}
+        content_text = message.get("content") or ""
+        tool_calls_array = message.get("tool_calls") or []
+        reasoning_text = message.get("reasoning") or ""

        output_items: List[Dict[str, Any]] = []
+
+        # Add reasoning if present (Ollama Cloud format)
+        if reasoning_text:
+            output_items.append(make_reasoning_item(reasoning_text))
+
+        # Priority 1: Try to parse tool call from content text (OpenRouter format)
+        tool_call = _parse_tool_call_from_text(content_text)
+
        if tool_call and isinstance(tool_call, dict):
            fn_name = tool_call.get("name") or "computer"
            raw_args = tool_call.get("arguments") or {}
@@ -405,8 +416,50 @@ class GenericVlmConfig(AsyncAgentConfig):
                ],
            }
            output_items.extend(convert_completion_messages_to_responses_items([fake_cm]))
+        elif tool_calls_array:
+            # Priority 2: Use tool_calls field if present (Ollama Cloud format)
+            # Process and unnormalize coordinates in tool calls
+            processed_tool_calls = []
+            for tc in tool_calls_array:
+                function = tc.get("function", {})
+                fn_name = function.get("name", "computer")
+                args_str = function.get("arguments", "{}")
+
+                try:
+                    args = json.loads(args_str)
+
+                    # Unnormalize coordinates if present
+                    if "coordinate" in args and last_rw is not None and last_rh is not None:
+                        args = await _unnormalize_coordinate(args, (last_rw, last_rh))
+
+                    # Convert Qwen format to Computer Calls format if this is a computer tool
+                    if fn_name == "computer":
+                        converted_action = convert_qwen_tool_args_to_computer_action(args)
+                        if converted_action:
+                            args = converted_action
+
+                    processed_tool_calls.append(
+                        {
+                            "type": tc.get("type", "function"),
+                            "id": tc.get("id", "call_0"),
+                            "function": {
+                                "name": fn_name,
+                                "arguments": json.dumps(args),
+                            },
+                        }
+                    )
+                except json.JSONDecodeError:
+                    # Keep original if parsing fails
+                    processed_tool_calls.append(tc)
+
+            fake_cm = {
+                "role": "assistant",
+                "content": content_text if content_text else "",
+                "tool_calls": processed_tool_calls,
+            }
+            output_items.extend(convert_completion_messages_to_responses_items([fake_cm]))
        else:
-            # Fallback: just return assistant text
+            # No tool calls found in either format, return text response
            fake_cm = {"role": "assistant", "content": content_text}
            output_items.extend(convert_completion_messages_to_responses_items([fake_cm]))

--- a/libs/python/agent/agent/loops/omniparser.py
+++ b/libs/python/agent/agent/loops/omniparser.py
@@ -365,6 +365,22 @@ class OmniparserConfig(AsyncAgentConfig):
            **kwargs,
        }

+        # Add Vertex AI specific parameters if using vertex_ai models
+        if llm_model.startswith("vertex_ai/"):
+            import os
+
+            # Pass vertex_project and vertex_location to liteLLM
+            if "vertex_project" not in api_kwargs:
+                api_kwargs["vertex_project"] = os.getenv("GOOGLE_CLOUD_PROJECT")
+            if "vertex_location" not in api_kwargs:
+                api_kwargs["vertex_location"] = "global"
+
+            # Pass through Gemini 3-specific parameters if provided
+            if "thinking_level" in kwargs:
+                api_kwargs["thinking_level"] = kwargs["thinking_level"]
+            if "media_resolution" in kwargs:
+                api_kwargs["media_resolution"] = kwargs["media_resolution"]
+
        # Call API start hook
        if _on_api_start:
            await _on_api_start(api_kwargs)
--- a/libs/python/agent/agent/loops/uitars2.py
+++ b/libs/python/agent/agent/loops/uitars2.py
@@ -5,13 +5,14 @@ UITARS-2 agent loop implementation using LiteLLM.
 - Calls litellm.acompletion
 - Parses <seed:tool_call> ... </seed:tool_call> outputs back into Responses items (computer actions)
 """
+
 from __future__ import annotations

-import re
-from typing import Any, Dict, List, Optional, Tuple
 import base64
 import io
 import json
+import re
+from typing import Any, Dict, List, Optional, Tuple

 import litellm
 from litellm.responses.litellm_completion_transformation.transformation import (
@@ -20,37 +21,45 @@ from litellm.responses.litellm_completion_transformation.transformation import (

 from ..decorators import register_agent
 from .omniparser import get_last_computer_call_output  # type: ignore
+
 try:
    from PIL import Image  # type: ignore
 except Exception:  # pragma: no cover
    Image = None  # type: ignore
 from ..responses import (
+    convert_responses_items_to_completion_messages,
    make_click_item,
    make_double_click_item,
    make_drag_item,
    make_function_call_item,
    make_keypress_item,
-    make_screenshot_item,
    make_move_item,
    make_output_text_item,
    make_reasoning_item,
+    make_screenshot_item,
    make_scroll_item,
    make_type_item,
    make_wait_item,
-    convert_responses_items_to_completion_messages,
 )
 from ..types import AgentCapability

-
 TOOL_SCHEMAS: List[Dict[str, Any]] = [
-    {"type": "function", "name": "open_computer", "parameters": {}, "description": "Open computer."},
+    {
+        "type": "function",
+        "name": "open_computer",
+        "parameters": {},
+        "description": "Open computer.",
+    },
    {
        "type": "function",
        "name": "click",
        "parameters": {
            "type": "object",
            "properties": {
-                "point": {"type": "string", "description": "Click coordinates. The format is: <point>x y</point>"}
+                "point": {
+                    "type": "string",
+                    "description": "Click coordinates. The format is: <point>x y</point>",
+                }
            },
            "required": ["point"],
        },
@@ -62,7 +71,10 @@ TOOL_SCHEMAS: List[Dict[str, Any]] = [
        "parameters": {
            "type": "object",
            "properties": {
-                "point": {"type": "string", "description": "Click coordinates. The format is: <point>x y</point>"}
+                "point": {
+                    "type": "string",
+                    "description": "Click coordinates. The format is: <point>x y</point>",
+                }
            },
            "required": ["point"],
        },
@@ -74,7 +86,10 @@ TOOL_SCHEMAS: List[Dict[str, Any]] = [
        "parameters": {
            "type": "object",
            "properties": {
-                "point": {"type": "string", "description": "Click coordinates. The format is: <point>x y</point>"}
+                "point": {
+                    "type": "string",
+                    "description": "Click coordinates. The format is: <point>x y</point>",
+                }
            },
            "required": ["point"],
        },
@@ -106,7 +121,10 @@ TOOL_SCHEMAS: List[Dict[str, Any]] = [
        "parameters": {
            "type": "object",
            "properties": {
-                "point": {"type": "string", "description": "Target coordinates. The format is: <point>x y</point>"}
+                "point": {
+                    "type": "string",
+                    "description": "Target coordinates. The format is: <point>x y</point>",
+                }
            },
            "required": ["point"],
        },
@@ -117,7 +135,12 @@ TOOL_SCHEMAS: List[Dict[str, Any]] = [
        "name": "hotkey",
        "parameters": {
            "type": "object",
-            "properties": {"key": {"type": "string", "description": "Hotkeys you want to press. Split keys with a space and use lowercase."}},
+            "properties": {
+                "key": {
+                    "type": "string",
+                    "description": "Hotkeys you want to press. Split keys with a space and use lowercase.",
+                }
+            },
            "required": ["key"],
        },
        "description": "Press hotkey.",
@@ -227,9 +250,7 @@ TOOL_SCHEMAS: List[Dict[str, Any]] = [
        "name": "wait",
        "parameters": {
            "type": "object",
-            "properties": {
-                "time": {"type": "integer", "description": "Wait time in seconds."}
-            },
+            "properties": {"time": {"type": "integer", "description": "Wait time in seconds."}},
            "required": [],
        },
        "description": "Wait for a while.",
@@ -268,7 +289,12 @@ TOOL_SCHEMAS: List[Dict[str, Any]] = [
        },
        "description": "Type content.",
    },
-    {"type": "function", "name": "take_screenshot", "parameters": {}, "description": "Take screenshot."},
+    {
+        "type": "function",
+        "name": "take_screenshot",
+        "parameters": {},
+        "description": "Take screenshot.",
+    },
 ]


@@ -319,7 +345,9 @@ _PROMPT_SUFFIX = (
 SYSTEM_PROMPT = _PROMPT_PREFIX + _format_tool_schemas_json_lines(TOOL_SCHEMAS) + _PROMPT_SUFFIX


-def _extract_function_schemas_from_tools(tools: Optional[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
+def _extract_function_schemas_from_tools(
+    tools: Optional[List[Dict[str, Any]]],
+) -> List[Dict[str, Any]]:
    schemas: List[Dict[str, Any]] = []
    if not tools:
        return schemas
@@ -330,12 +358,14 @@ def _extract_function_schemas_from_tools(tools: Optional[List[Dict[str, Any]]])
            params = fn.get("parameters", {})
            desc = fn.get("description", "")
            if name:
-                schemas.append({
-                    "type": "function",
-                    "name": name,
-                    "parameters": params if isinstance(params, dict) else {},
-                    "description": desc,
-                })
+                schemas.append(
+                    {
+                        "type": "function",
+                        "name": name,
+                        "parameters": params if isinstance(params, dict) else {},
+                        "description": desc,
+                    }
+                )
    return schemas


@@ -392,7 +422,9 @@ def _denormalize_xy_from_uitars(nx: float, ny: float, width: int, height: int) -
    return x, y


-def _map_computer_action_to_function(action: Dict[str, Any], width: int, height: int) -> Optional[Dict[str, Any]]:
+def _map_computer_action_to_function(
+    action: Dict[str, Any], width: int, height: int
+) -> Optional[Dict[str, Any]]:
    """Map a computer action item to a UITARS function + parameters dict of strings.
    Returns dict like {"function": name, "parameters": {..}} or None if unknown.
    """
@@ -404,7 +436,10 @@ def _map_computer_action_to_function(action: Dict[str, Any], width: int, height:
            return None
        nx, ny = _normalize_xy_to_uitars(int(x), int(y), width, height)
        if btn == "right":
-            return {"function": "right_single", "parameters": {"point": f"<point>{nx} {ny}</point>"}}
+            return {
+                "function": "right_single",
+                "parameters": {"point": f"<point>{nx} {ny}</point>"},
+            }
        return {"function": "click", "parameters": {"point": f"<point>{nx} {ny}</point>"}}
    if atype == "double_click":
        x, y = action.get("x"), action.get("y")
@@ -434,8 +469,19 @@ def _map_computer_action_to_function(action: Dict[str, Any], width: int, height:
        nx, ny = _normalize_xy_to_uitars(int(x), int(y), width, height)
        sx, sy = action.get("scroll_x", 0), action.get("scroll_y", 0)
        # Our parser used positive sy for up
-        direction = "up" if sy and sy > 0 else ("down" if sy and sy < 0 else ("right" if sx and sx > 0 else ("left" if sx and sx < 0 else "down")))
-        return {"function": "scroll", "parameters": {"direction": direction, "point": f"<point>{nx} {ny}</point>"}}
+        direction = (
+            "up"
+            if sy and sy > 0
+            else (
+                "down"
+                if sy and sy < 0
+                else ("right" if sx and sx > 0 else ("left" if sx and sx < 0 else "down"))
+            )
+        )
+        return {
+            "function": "scroll",
+            "parameters": {"direction": direction, "point": f"<point>{nx} {ny}</point>"},
+        }
    if atype == "drag":
        path = action.get("path", [])
        if isinstance(path, list) and len(path) >= 2:
@@ -461,7 +507,9 @@ def _map_computer_action_to_function(action: Dict[str, Any], width: int, height:
    return None


-def _to_uitars_messages(messages: List[Dict[str, Any]], width: int, height: int) -> List[Dict[str, Any]]:
+def _to_uitars_messages(
+    messages: List[Dict[str, Any]], width: int, height: int
+) -> List[Dict[str, Any]]:
    """Convert responses items into completion messages tailored for UI-TARS.

    - User content is passed through similar to convert_responses_items_to_completion_messages
@@ -505,7 +553,9 @@ def _to_uitars_messages(messages: List[Dict[str, Any]], width: int, height: int)
                completion_content = []
                for item in content:
                    if item.get("type") == "input_image":
-                        completion_content.append({"type": "image_url", "image_url": {"url": item.get("image_url")}})
+                        completion_content.append(
+                            {"type": "image_url", "image_url": {"url": item.get("image_url")}}
+                        )
                    elif item.get("type") in ("input_text", "text"):
                        completion_content.append({"type": "text", "text": item.get("text")})
                uitars_messages.append({"role": "user", "content": completion_content})
@@ -517,7 +567,11 @@ def _to_uitars_messages(messages: List[Dict[str, Any]], width: int, height: int)
        if mtype == "reasoning":
            # Responses reasoning stores summary list
            summary = msg.get("summary", [])
-            texts = [s.get("text", "") for s in summary if isinstance(s, dict) and s.get("type") == "summary_text"]
+            texts = [
+                s.get("text", "")
+                for s in summary
+                if isinstance(s, dict) and s.get("type") == "summary_text"
+            ]
            if texts:
                pending_think = "\n".join([t for t in texts if t])
            continue
@@ -546,9 +600,15 @@ def _to_uitars_messages(messages: List[Dict[str, Any]], width: int, height: int)
            pending_think, pending_functions = None, []
            content = msg.get("content", [])
            if isinstance(content, list):
-                texts = [c.get("text", "") for c in content if isinstance(c, dict) and c.get("type") in ("output_text", "text")]
+                texts = [
+                    c.get("text", "")
+                    for c in content
+                    if isinstance(c, dict) and c.get("type") in ("output_text", "text")
+                ]
                if texts:
-                    uitars_messages.append({"role": "assistant", "content": "\n".join([t for t in texts if t])})
+                    uitars_messages.append(
+                        {"role": "assistant", "content": "\n".join([t for t in texts if t])}
+                    )
            elif isinstance(content, str) and content:
                uitars_messages.append({"role": "assistant", "content": content})
            continue
@@ -581,8 +641,12 @@ def _to_uitars_messages(messages: List[Dict[str, Any]], width: int, height: int)

    return uitars_messages

+
 def _to_response_items(
-    actions: List[Dict[str, Any]], tool_names: Optional[set[str]] = None, width: Optional[int] = None, height: Optional[int] = None
+    actions: List[Dict[str, Any]],
+    tool_names: Optional[set[str]] = None,
+    width: Optional[int] = None,
+    height: Optional[int] = None,
 ) -> List[Any]:
    """Map parsed actions into Responses items (computer actions + optional reasoning)."""
    items: List[Any] = []
@@ -736,8 +800,12 @@ class UITARS2Config:

        # Build dynamic system prompt by concatenating built-in schemas and provided function tools
        provided_fn_schemas = _extract_function_schemas_from_tools(tools)
-        combined_schemas = TOOL_SCHEMAS + provided_fn_schemas if provided_fn_schemas else TOOL_SCHEMAS
-        dynamic_system_prompt = _PROMPT_PREFIX + _format_tool_schemas_json_lines(combined_schemas) + _PROMPT_SUFFIX
+        combined_schemas = (
+            TOOL_SCHEMAS + provided_fn_schemas if provided_fn_schemas else TOOL_SCHEMAS
+        )
+        dynamic_system_prompt = (
+            _PROMPT_PREFIX + _format_tool_schemas_json_lines(combined_schemas) + _PROMPT_SUFFIX
+        )

        # Prepend system prompt (based on training prompts + provided tools)
        litellm_messages: List[Dict[str, Any]] = [
@@ -829,7 +897,10 @@ class UITARS2Config:
                "role": "user",
                "content": [
                    {"type": "text", "text": "Please return a single click action."},
-                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/png;base64,{image_b64}"},
+                    },
                ],
            },
        ]
@@ -841,7 +912,9 @@ class UITARS2Config:
            "temperature": kwargs.get("temperature", 0.0),
            "do_sample": kwargs.get("temperature", 0.0) > 0.0,
        }
-        api_kwargs.update({k: v for k, v in (kwargs or {}).items() if k not in ["max_tokens", "temperature"]})
+        api_kwargs.update(
+            {k: v for k, v in (kwargs or {}).items() if k not in ["max_tokens", "temperature"]}
+        )

        response = await litellm.acompletion(**api_kwargs)
        # Extract response content
@@ -852,7 +925,11 @@ class UITARS2Config:
        msg = choices[0].get("message", {})
        content_text = msg.get("content", "")
        if isinstance(content_text, list):
-            text_parts = [p.get("text", "") for p in content_text if isinstance(p, dict) and p.get("type") == "text"]
+            text_parts = [
+                p.get("text", "")
+                for p in content_text
+                if isinstance(p, dict) and p.get("type") == "text"
+            ]
            content_text = "\n".join([t for t in text_parts if t])
        if not isinstance(content_text, str):
            return None
--- a/libs/python/agent/agent/proxy/examples.py
+++ b/libs/python/agent/agent/proxy/examples.py
@@ -22,14 +22,14 @@ async def test_http_endpoint():

    # Example 1: Simple text request
    simple_request = {
-        "model": "anthropic/claude-3-5-sonnet-20241022",
+        "model": "anthropic/claude-sonnet-4-5-20250929",
        "input": "Tell me a three sentence bedtime story about a unicorn.",
        "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
    }

    # Example 2: Multi-modal request with image
    multimodal_request = {
-        "model": "anthropic/claude-3-5-sonnet-20241022",
+        "model": "anthropic/claude-sonnet-4-5-20250929",
        "input": [
            {
                "role": "user",
@@ -47,7 +47,7 @@ async def test_http_endpoint():

    # Example 3: Request with custom agent and computer kwargs
    custom_request = {
-        "model": "anthropic/claude-3-5-sonnet-20241022",
+        "model": "anthropic/claude-sonnet-4-5-20250929",
        "input": "Take a screenshot and tell me what you see",
        "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
    }
@@ -95,7 +95,7 @@ def curl_examples():
        """curl http://localhost:8000/responses \\
  -H "Content-Type: application/json" \\
  -d '{
-    "model": "anthropic/claude-3-5-sonnet-20241022",
+    "model": "anthropic/claude-sonnet-4-5-20250929",
    "input": "Tell me a three sentence bedtime story about a unicorn."
  }'"""
    )
@@ -105,7 +105,7 @@ def curl_examples():
        """curl http://localhost:8000/responses \\
  -H "Content-Type: application/json" \\
  -d '{
-    "model": "anthropic/claude-3-5-sonnet-20241022",
+    "model": "anthropic/claude-sonnet-4-5-20250929",
    "input": [
      {
        "role": "user",
@@ -126,7 +126,7 @@ def curl_examples():
        """curl http://localhost:8000/responses \\
  -H "Content-Type: application/json" \\
  -d '{
-    "model": "anthropic/claude-3-5-sonnet-20241022",
+    "model": "anthropic/claude-sonnet-4-5-20250929",
    "input": "Take a screenshot and tell me what you see",
    "agent_kwargs": {
      "save_trajectory": true,
@@ -166,7 +166,7 @@ async def test_p2p_client():

            # Send a test request
            request = {
-                "model": "anthropic/claude-3-5-sonnet-20241022",
+                "model": "anthropic/claude-sonnet-4-5-20250929",
                "input": "Hello from P2P client!",
            }
            await connection.send(json.dumps(request))
--- a/libs/python/agent/agent/ui/gradio/app.py
+++ b/libs/python/agent/agent/ui/gradio/app.py
@@ -6,9 +6,9 @@ with an advanced UI for model selection and configuration.

 Supported Agent Models:
 - OpenAI: openai/computer-use-preview
- Anthropic: anthropic/claude-3-5-sonnet-20241022, anthropic/claude-3-7-sonnet-20250219
+- Anthropic: anthropic/claude-sonnet-4-5-20250929, anthropic/claude-3-7-sonnet-20250219
 - UI-TARS: huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
- Omniparser: omniparser+anthropic/claude-3-5-sonnet-20241022, omniparser+ollama_chat/gemma3
+- Omniparser: omniparser+anthropic/claude-sonnet-4-5-20250929, omniparser+ollama_chat/gemma3

 Requirements:
    - Mac with Apple Silicon (M1/M2/M3/M4), Linux, or Windows
@@ -116,14 +116,12 @@ MODEL_MAPPINGS = {
        "Anthropic: Claude 4 Opus (20250514)": "anthropic/claude-opus-4-20250514",
        "Anthropic: Claude 4 Sonnet (20250514)": "anthropic/claude-sonnet-4-20250514",
        "Anthropic: Claude 3.7 Sonnet (20250219)": "anthropic/claude-3-7-sonnet-20250219",
-        "Anthropic: Claude 3.5 Sonnet (20241022)": "anthropic/claude-3-5-sonnet-20241022",
    },
    "omni": {
        "default": "omniparser+openai/gpt-4o",
        "OMNI: OpenAI GPT-4o": "omniparser+openai/gpt-4o",
        "OMNI: OpenAI GPT-4o mini": "omniparser+openai/gpt-4o-mini",
        "OMNI: Claude 3.7 Sonnet (20250219)": "omniparser+anthropic/claude-3-7-sonnet-20250219",
-        "OMNI: Claude 3.5 Sonnet (20241022)": "omniparser+anthropic/claude-3-5-sonnet-20241022",
    },
    "uitars": {
        "default": "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B" if is_mac else "ui-tars",
--- a/libs/python/agent/agent/ui/gradio/ui_components.py
+++ b/libs/python/agent/agent/ui/gradio/ui_components.py
@@ -44,13 +44,11 @@ def create_gradio_ui() -> gr.Blocks:
        "Anthropic: Claude 4 Opus (20250514)",
        "Anthropic: Claude 4 Sonnet (20250514)",
        "Anthropic: Claude 3.7 Sonnet (20250219)",
-        "Anthropic: Claude 3.5 Sonnet (20241022)",
    ]
    omni_models = [
        "OMNI: OpenAI GPT-4o",
        "OMNI: OpenAI GPT-4o mini",
        "OMNI: Claude 3.7 Sonnet (20250219)",
-        "OMNI: Claude 3.5 Sonnet (20241022)",
    ]

    # Check if API keys are available
--- a/libs/python/agent/example.py
+++ b/libs/python/agent/example.py
@@ -102,7 +102,7 @@ async def main():
            # model="anthropic/claude-opus-4-20250514",
            # model="anthropic/claude-sonnet-4-20250514",
            # model="anthropic/claude-3-7-sonnet-20250219",
-            # model="anthropic/claude-3-5-sonnet-20241022",
+            # model="anthropic/claude-sonnet-4-5-20250929",
            # == UI-TARS ==
            # model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
            # TODO: add local mlx provider
--- a/libs/python/agent/tests/conftest.py
+++ b/libs/python/agent/tests/conftest.py
@@ -24,7 +24,7 @@ def mock_litellm():
                "id": "chatcmpl-test123",
                "object": "chat.completion",
                "created": 1234567890,
-                "model": kwargs.get("model", "anthropic/claude-3-5-sonnet-20241022"),
+                "model": kwargs.get("model", "anthropic/claude-sonnet-4-5-20250929"),
                "choices": [
                    {
                        "index": 0,
--- a/libs/python/agent/tests/test_computer_agent.py
+++ b/libs/python/agent/tests/test_computer_agent.py
@@ -18,18 +18,18 @@ class TestComputerAgentInitialization:
        """Test that agent can be initialized with a model string."""
        from agent import ComputerAgent

-        agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022")
+        agent = ComputerAgent(model="anthropic/claude-sonnet-4-5-20250929")

        assert agent is not None
        assert hasattr(agent, "model")
-        assert agent.model == "anthropic/claude-3-5-sonnet-20241022"
+        assert agent.model == "anthropic/claude-sonnet-4-5-20250929"

    @patch("agent.agent.litellm")
    def test_agent_initialization_with_tools(self, mock_litellm, disable_telemetry, mock_computer):
        """Test that agent can be initialized with tools."""
        from agent import ComputerAgent

-        agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022", tools=[mock_computer])
+        agent = ComputerAgent(model="anthropic/claude-sonnet-4-5-20250929", tools=[mock_computer])

        assert agent is not None
        assert hasattr(agent, "tools")
@@ -41,7 +41,7 @@ class TestComputerAgentInitialization:

        budget = 5.0
        agent = ComputerAgent(
-            model="anthropic/claude-3-5-sonnet-20241022", max_trajectory_budget=budget
+            model="anthropic/claude-sonnet-4-5-20250929", max_trajectory_budget=budget
        )

        assert agent is not None
@@ -79,7 +79,7 @@ class TestComputerAgentRun:

        mock_litellm.acompletion = AsyncMock(return_value=mock_response)

-        agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022")
+        agent = ComputerAgent(model="anthropic/claude-sonnet-4-5-20250929")

        # Run should return an async generator
        result_generator = agent.run(sample_messages)
@@ -92,7 +92,7 @@ class TestComputerAgentRun:
        """Test that agent has run method available."""
        from agent import ComputerAgent

-        agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022")
+        agent = ComputerAgent(model="anthropic/claude-sonnet-4-5-20250929")

        # Verify run method exists
        assert hasattr(agent, "run")
@@ -102,7 +102,7 @@ class TestComputerAgentRun:
        """Test that agent has agent_loop initialized."""
        from agent import ComputerAgent

-        agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022")
+        agent = ComputerAgent(model="anthropic/claude-sonnet-4-5-20250929")

        # Verify agent_loop is initialized
        assert hasattr(agent, "agent_loop")
@@ -132,7 +132,7 @@ class TestComputerAgentIntegration:
        """Test that agent can be initialized with Computer tool."""
        from agent import ComputerAgent

-        agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022", tools=[mock_computer])
+        agent = ComputerAgent(model="anthropic/claude-sonnet-4-5-20250929", tools=[mock_computer])

        # Verify agent accepted the tool
        assert agent is not None
--- a/libs/python/mcp-server/CONCURRENT_SESSIONS.md
+++ b/libs/python/mcp-server/CONCURRENT_SESSIONS.md
@@ -133,7 +133,7 @@ await cleanup_session(ctx, "session-to-cleanup")

 ### Environment Variables

- `CUA_MODEL_NAME`: Model to use (default: `anthropic/claude-3-5-sonnet-20241022`)
+- `CUA_MODEL_NAME`: Model to use (default: `anthropic/claude-sonnet-4-5-20250929`)
 - `CUA_MAX_IMAGES`: Maximum images to keep (default: `3`)

 ### Session Manager Configuration
--- a/libs/python/mcp-server/README.md
+++ b/libs/python/mcp-server/README.md
@@ -44,7 +44,7 @@ Add this to your MCP client configuration:
      "args": [
        "bash",
        "-lc",
-        "export CUA_MODEL_NAME='anthropic/claude-3-5-sonnet-20241022'; ~/.cua/start_mcp_server.sh"
+        "export CUA_MODEL_NAME='anthropic/claude-sonnet-4-5-20250929'; ~/.cua/start_mcp_server.sh"
      ]
    }
  }
--- a/libs/python/mcp-server/mcp_server/server.py
+++ b/libs/python/mcp-server/mcp_server/server.py
@@ -156,7 +156,7 @@ def serve() -> FastMCP:

                try:
                    # Get model name
-                    model_name = os.getenv("CUA_MODEL_NAME", "anthropic/claude-3-5-sonnet-20241022")
+                    model_name = os.getenv("CUA_MODEL_NAME", "anthropic/claude-sonnet-4-5-20250929")
                    logger.info(f"Using model: {model_name}")

                    # Create agent with the new v0.4.x API
--- a/libs/python/mcp-server/quick_test_local_option.py
+++ b/libs/python/mcp-server/quick_test_local_option.py
@@ -168,7 +168,7 @@ def print_usage_examples():
      "command": "/bin/bash",
      "args": ["~/.cua/start_mcp_server.sh"],
      "env": {
-        "CUA_MODEL_NAME": "anthropic/claude-3-5-sonnet-20241022"
+        "CUA_MODEL_NAME": "anthropic/claude-sonnet-4-5-20250929"
      }
    }
  }
@@ -192,7 +192,7 @@ Step 2: Configure MCP client:
      "command": "/bin/bash",
      "args": ["~/.cua/start_mcp_server.sh"],
      "env": {
-        "CUA_MODEL_NAME": "anthropic/claude-3-5-sonnet-20241022",
+        "CUA_MODEL_NAME": "anthropic/claude-sonnet-4-5-20250929",
        "CUA_USE_HOST_COMPUTER_SERVER": "true"
      }
    }
--- a/libs/typescript/agent/README.md
+++ b/libs/typescript/agent/README.md
@@ -32,7 +32,7 @@ const peerClient = new AgentClient('peer://my-agent-proxy');

 // Send a simple text request
 const response = await client.responses.create({
-  model: 'anthropic/claude-3-5-sonnet-20241022',
+  model: 'anthropic/claude-sonnet-4-5-20250929',
  input: 'Write a one-sentence bedtime story about a unicorn.',
  // Optional per-request env overrides
  env: {
@@ -47,7 +47,7 @@ console.log(response.output);

 ```typescript
 const response = await client.responses.create({
-  model: 'anthropic/claude-3-5-sonnet-20241022',
+  model: 'anthropic/claude-sonnet-4-5-20250929',
  input: [
    {
      role: 'user',
@@ -74,7 +74,7 @@ const client = new AgentClient('https://localhost:8000', {
 });

 const response = await client.responses.create({
-  model: 'anthropic/claude-3-5-sonnet-20241022',
+  model: 'anthropic/claude-sonnet-4-5-20250929',
  input: 'Hello, world!',
  agent_kwargs: {
    save_trajectory: true,
--- a/libs/typescript/agent/examples/README.md
+++ b/libs/typescript/agent/examples/README.md
@@ -42,7 +42,7 @@ A simple HTML page that demonstrates using the CUA Agent Client in a browser env

 4. **Configure and test:**
   - Enter an agent URL (e.g., `https://localhost:8000` or `peer://some-peer-id`)
-   - Enter a model name (e.g., `anthropic/claude-3-5-sonnet-20241022`)
+   - Enter a model name (e.g., `anthropic/claude-sonnet-4-5-20250929`)
   - Type a message and click "Send Message" or press Enter
   - View the response in the output textarea

@@ -53,7 +53,7 @@ A simple HTML page that demonstrates using the CUA Agent Client in a browser env

 **Example Models:**

- `anthropic/claude-3-5-sonnet-20241022`
+- `anthropic/claude-sonnet-4-5-20250929`
 - `openai/gpt-4`
 - `huggingface-local/microsoft/UI-TARS-7B`

--- a/libs/typescript/cua-cli/package.json
+++ b/libs/typescript/cua-cli/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@trycua/cli",
-  "version": "0.1.4",
+  "version": "0.1.5",
  "packageManager": "bun@1.1.38",
  "description": "Command-line interface for CUA cloud sandboxes and authentication",
  "type": "module",
--- a/libs/typescript/cua-cli/src/cli.ts
+++ b/libs/typescript/cua-cli/src/cli.ts
@@ -17,7 +17,9 @@ export async function runCli() {
        '  cua sb <command>       Create and manage cloud sandboxes\n' +
        '    list               View all your sandboxes\n' +
        '    create             Provision a new sandbox\n' +
-        '    start/stop         Control sandbox state\n' +
+        '    start              Start or resume a sandbox\n' +
+        '    stop               Stop a sandbox (preserves disk)\n' +
+        '    suspend            Suspend a sandbox (preserves memory)\n' +
        '    vnc                Open remote desktop\n' +
        '\n' +
        'Documentation: https://docs.cua.ai/libraries/cua-cli/commands'
--- a/libs/typescript/cua-cli/src/commands/sandbox.ts
+++ b/libs/typescript/cua-cli/src/commands/sandbox.ts
@@ -191,6 +191,41 @@ const restartHandler = async (argv: Record<string, unknown>) => {
  process.exit(1);
 };

+const suspendHandler = async (argv: Record<string, unknown>) => {
+  const token = await ensureApiKeyInteractive();
+  const name = String((argv as any).name);
+  const res = await http(`/v1/vms/${encodeURIComponent(name)}/suspend`, {
+    token,
+    method: 'POST',
+  });
+  if (res.status === 202) {
+    const body = (await res.json().catch(() => ({}))) as {
+      status?: string;
+    };
+    console.log(body.status ?? 'suspending');
+    return;
+  }
+  if (res.status === 404) {
+    console.error('Sandbox not found');
+    process.exit(1);
+  }
+  if (res.status === 401) {
+    clearApiKey();
+    console.error("Unauthorized. Try 'cua login' again.");
+    process.exit(1);
+  }
+  if (res.status === 400 || res.status === 500) {
+    const body = (await res.json().catch(() => ({}))) as { error?: string };
+    console.error(
+      body.error ??
+        "Suspend not supported for this VM. Use 'cua sb stop' instead."
+    );
+    process.exit(1);
+  }
+  console.error(`Unexpected status: ${res.status}`);
+  process.exit(1);
+};
+
 const openHandler = async (argv: Record<string, unknown>) => {
  const token = await ensureApiKeyInteractive();
  const name = String((argv as any).name);
@@ -296,6 +331,13 @@ export function registerSandboxCommands(y: Argv) {
            y.positional('name', { type: 'string', describe: 'Sandbox name' }),
          restartHandler
        )
+        .command(
+          'suspend <name>',
+          'Suspend a sandbox, preserving memory state (use start to resume)',
+          (y) =>
+            y.positional('name', { type: 'string', describe: 'Sandbox name' }),
+          suspendHandler
+        )
        .command(
          ['vnc <name>', 'open <name>'],
          'Open remote desktop (VNC) connection in your browser',
@@ -378,6 +420,13 @@ export function registerSandboxCommands(y: Argv) {
        y.positional('name', { type: 'string', describe: 'Sandbox name' }),
      handler: restartHandler,
    } as any)
+    .command({
+      command: 'suspend <name>',
+      describe: false as any, // Hide from help
+      builder: (y: Argv) =>
+        y.positional('name', { type: 'string', describe: 'Sandbox name' }),
+      handler: suspendHandler,
+    } as any)
    .command({
      command: ['vnc <name>', 'open <name>'],
      describe: false as any, // Hide from help
--- a/libs/typescript/cua-cli/src/util.ts
+++ b/libs/typescript/cua-cli/src/util.ts
@@ -16,6 +16,8 @@ export type SandboxStatus =
  | 'pending'
  | 'running'
  | 'stopped'
+  | 'suspended'
+  | 'suspending'
  | 'terminated'
  | 'failed';
 export type SandboxItem = {
--- a/notebooks/ollama_nb.ipynb
+++ b/notebooks/ollama_nb.ipynb
@@ -203,7 +203,7 @@
    "\n",
    "Examples:\n",
    "- `openai/computer-use-preview+ollama/gemma3:4b`\n",
-    "- `anthropic/claude-3-5-sonnet-20241022+ollama/gemma3:4b`\n"
+    "- `anthropic/claude-sonnet-4-5-20250929+ollama/gemma3:4b`\n"
   ]
  },
  {
@@ -217,7 +217,7 @@
    "import logging\n",
    "\n",
    "agent_composed = ComputerAgent(\n",
-    "    model=\"anthropic/claude-3-5-sonnet-20241022+ollama/gemma3:4b\",\n",
+    "    model=\"anthropic/claude-sonnet-4-5-20250929+ollama/gemma3:4b\",\n",
    "    tools=[computer],\n",
    "    trajectory_dir=\"trajectories\",\n",
    "    only_n_most_recent_images=3,\n",
@@ -234,7 +234,20 @@
   "cell_type": "markdown",
   "id": "section-3-conceptual",
   "metadata": {},
-   "source": "## 3) Customize your agent 🛠️\n\nFor a few customization options, see: https://cua.ai/docs/agent-sdk/customizing-computeragent\n\nLevels of customization you can explore:\n\n1) Simple — Prompt engineering\n2) Easy — Tools\n3) Intermediate — Callbacks\n4) Expert — Custom agent via `register_agent` (see `libs/python/agent/agent/decorators.py` → `register_agent`)\n\nor, incorporate the ComputerAgent into your own agent framework!"
+   "source": [
+    "## 3) Customize your agent 🛠️\n",
+    "\n",
+    "For a few customization options, see: https://cua.ai/docs/agent-sdk/customizing-computeragent\n",
+    "\n",
+    "Levels of customization you can explore:\n",
+    "\n",
+    "1) Simple — Prompt engineering\n",
+    "2) Easy — Tools\n",
+    "3) Intermediate — Callbacks\n",
+    "4) Expert — Custom agent via `register_agent` (see `libs/python/agent/agent/decorators.py` → `register_agent`)\n",
+    "\n",
+    "or, incorporate the ComputerAgent into your own agent framework!"
+   ]
  },
  {
   "cell_type": "markdown",
@@ -274,4 +287,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 5
-}
+}
--- a/tests/agent_loop_testing/agent_test.py
+++ b/tests/agent_loop_testing/agent_test.py
@@ -184,7 +184,7 @@ if __name__ == "__main__":

    parser = argparse.ArgumentParser(description="Test CUA Agent with mock computer")
    parser.add_argument(
-        "--model", default="anthropic/claude-sonnet-4-20250514", help="CUA model to test"
+        "--model", default="anthropic/claude-sonnet-4-5-20250929", help="CUA model to test"
    )
    args = parser.parse_args()

--- a/uv.lock
+++ b/uv.lock
@@ -861,7 +861,7 @@ wheels = [

 [[package]]
 name = "cua-agent"
-version = "0.4.39"
+version = "0.4.53"
 source = { editable = "libs/python/agent" }
 dependencies = [
    { name = "aiohttp" },
@@ -885,7 +885,6 @@ all = [
    { name = "einops" },
    { name = "google-genai" },
    { name = "gradio" },
-    { name = "hud-python" },
    { name = "mlx-vlm", marker = "sys_platform == 'darwin'" },
    { name = "pillow" },
    { name = "python-dotenv" },
@@ -975,7 +974,6 @@ requires-dist = [
    { name = "gradio", marker = "extra == 'all'", specifier = ">=5.23.3" },
    { name = "gradio", marker = "extra == 'ui'", specifier = ">=5.23.3" },
    { name = "httpx", specifier = ">=0.27.0" },
-    { name = "hud-python", marker = "extra == 'all'", specifier = "==0.4.52" },
    { name = "hud-python", marker = "extra == 'hud'", specifier = "==0.4.52" },
    { name = "litellm", specifier = ">=1.74.12" },
    { name = "mlx-vlm", marker = "sys_platform == 'darwin' and extra == 'all'", specifier = ">=0.1.27" },
@@ -1015,7 +1013,7 @@ provides-extras = ["openai", "anthropic", "qwen", "omni", "uitars", "uitars-mlx"

 [[package]]
 name = "cua-computer"
-version = "0.4.12"
+version = "0.4.17"
 source = { editable = "libs/python/computer" }
 dependencies = [
    { name = "aiohttp" },