Merge branch 'main' into feat/cua-bench-submodules

2026-01-01 11:00:31 -06:00 · 2025-12-09 15:25:46 -05:00
parent 378ce68c5b 5cf48510dd
commit ed9a0159ec
117 changed files with 6147 additions and 4331 deletions
--- a/.github/workflows/docker-publish-cua-linux.yml
+++ b/.github/workflows/docker-publish-cua-linux.yml
@@ -0,0 +1,29 @@
+name: Build and Publish CUA Linux Container
+
+on:
+  push:
+    branches:
+      - main
+    tags:
+      - "docker-cua-linux-v*.*.*"
+    paths:
+      - "libs/qemu-docker/linux/**"
+      - ".github/workflows/docker-publish-cua-linux.yml"
+      - ".github/workflows/docker-reusable-publish.yml"
+  pull_request:
+    paths:
+      - "libs/qemu-docker/linux/**"
+      - ".github/workflows/docker-publish-cua-linux.yml"
+      - ".github/workflows/docker-reusable-publish.yml"
+
+jobs:
+  publish:
+    uses: ./.github/workflows/docker-reusable-publish.yml
+    with:
+      image_name: cua-linux
+      context_dir: libs/qemu-docker/linux
+      dockerfile_path: Dockerfile
+      tag_prefix: docker-cua-linux-v
+      docker_hub_org: trycua
+    secrets:
+      DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }}
--- a/.github/workflows/docker-publish-cua-windows.yml
+++ b/.github/workflows/docker-publish-cua-windows.yml
@@ -0,0 +1,29 @@
+name: Build and Publish CUA Windows Container
+
+on:
+  push:
+    branches:
+      - main
+    tags:
+      - "docker-cua-windows-v*.*.*"
+    paths:
+      - "libs/qemu-docker/windows/**"
+      - ".github/workflows/docker-publish-cua-windows.yml"
+      - ".github/workflows/docker-reusable-publish.yml"
+  pull_request:
+    paths:
+      - "libs/qemu-docker/windows/**"
+      - ".github/workflows/docker-publish-cua-windows.yml"
+      - ".github/workflows/docker-reusable-publish.yml"
+
+jobs:
+  publish:
+    uses: ./.github/workflows/docker-reusable-publish.yml
+    with:
+      image_name: cua-windows
+      context_dir: libs/qemu-docker/windows
+      dockerfile_path: Dockerfile
+      tag_prefix: docker-cua-windows-v
+      docker_hub_org: trycua
+    secrets:
+      DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }}
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -2,8 +2,7 @@ name: Lint & Format Check

 on:
  pull_request:
-    branches:
-      - main
+
  push:
    branches:
      - main
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+**/image/setup.iso
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,6 +15,8 @@ repos:
        name: TypeScript type check
        entry: node ./scripts/typescript-typecheck.js
        language: node
+        files: \.(ts|tsx)$
+        pass_filenames: false

  - repo: https://github.com/PyCQA/isort
    rev: 7.0.0
--- a/README.md
+++ b/README.md
@@ -1,14 +1,22 @@
 <div align="center">
-  <picture>
-    <source media="(prefers-color-scheme: dark)" alt="Cua logo" height="150" srcset="img/logo_white.png">
-    <source media="(prefers-color-scheme: light)" alt="Cua logo" height="150" srcset="img/logo_black.png">
-    <img alt="Cua logo" height="150" src="img/logo_black.png">
-  </picture>
+  <a href="https://cua.ai" target="_blank" rel="noopener noreferrer">
+    <picture>
+      <source media="(prefers-color-scheme: dark)" alt="Cua logo" width="150" srcset="img/logo_white.png">
+      <source media="(prefers-color-scheme: light)" alt="Cua logo" width="150" srcset="img/logo_black.png">
+      <img alt="Cua logo" width="500" src="img/logo_black.png">
+    </picture>
+  </a>

-[![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
-[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
-<br>
+  <p align="center">Build and deploy AI agents that can reason, plan and act on any Computers</p>
+
+  <p align="center">
+    <a href="https://cua.ai" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/cua.ai-0ea5e9" alt="cua.ai"></a>
+    <a href="https://discord.com/invite/cua-ai" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/Discord-Join%20Server-10b981?logo=discord&logoColor=white" alt="Discord"></a>
+    <a href="https://x.com/trycua" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/twitter/follow/trycua?style=social" alt="Twitter"></a>
+    <a href="https://cua.ai/docs" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/Docs-0ea5e9.svg" alt="Documentation"></a>
+    <br>
 <a href="https://trendshift.io/repositories/13685" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13685" alt="trycua%2Fcua | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+  </p>

 </div>

@@ -101,6 +109,10 @@ Core utilities for Cua
 - [Get started with the Cua SDKs](https://cua.ai/docs/quickstart-devs)
 - [Get started with the Cua CLI](https://cua.ai/docs/quickstart-cli)

+## Python Version Compatibility
+
+Cua packages require **Python 3.12 or 3.13**. Python 3.14 is not currently supported due to dependency compatibility issues (pydantic-core/PyO3 compatibility). If you encounter build errors on Python 3.14, please use Python 3.13 or earlier.
+
 # Agent SDK

 Install the agent SDK:
--- a/blog/cua-playground-preview.md
+++ b/blog/cua-playground-preview.md
@@ -21,7 +21,6 @@ The Playground connects to your existing Cua sandboxes—the same ones you use w
  <video src="https://github.com/user-attachments/assets/9fef0f30-1024-4833-8b7a-6a2c02d8eb99" width="600" controls></video>
 </div>

-
 Sign up at [cua.ai/signin](https://cua.ai/signin) and grab your API key from the dashboard. Then navigate to the Playground:

 1. Navigate to Dashboard > Playground
@@ -33,6 +32,7 @@ Sign up at [cua.ai/signin](https://cua.ai/signin) and grab your API key from the
 Example use cases:

 **Prompt Testing**
+
 ```
 ❌ "Check the website"
 ✅ "Navigate to example.com in Firefox and take a screenshot of the homepage"
@@ -42,6 +42,7 @@ Example use cases:
 Run the same task with different models to compare quality, speed, and cost.

 **Debugging Agent Behavior**
+
 1. Send: "Find the login button and click it"
 2. View tool calls to see each mouse movement
 3. Check screenshots to verify the agent found the right element
--- a/blog/cua-vlm-router.md
+++ b/blog/cua-vlm-router.md
@@ -51,7 +51,6 @@ When you request an Anthropic model through Cua, we automatically route to the b

 Sign up at [cua.ai/signin](https://cua.ai/signin) and create your API key from **Dashboard > API Keys > New API Key** (save it immediately—you won't see it again).

-
 Use it with the Agent SDK (make sure to set your environment variable):

 ```python
--- a/blog/neurips-2025-cua-papers.md
+++ b/blog/neurips-2025-cua-papers.md
@@ -29,13 +29,13 @@ A few papers stand out for their immediate relevance to anyone building or deplo

 ## Summary Statistics

-| Category | Count |
-|----------|-------|
-| Benchmarks & Datasets | 18 |
-| Safety & Security | 12 |
-| Grounding & Visual Reasoning | 14 |
-| Agent Architectures & Training | 11 |
-| Adversarial Attacks | 8 |
+| Category                       | Count |
+| ------------------------------ | ----- |
+| Benchmarks & Datasets          | 18    |
+| Safety & Security              | 12    |
+| Grounding & Visual Reasoning   | 14    |
+| Agent Architectures & Training | 11    |
+| Adversarial Attacks            | 8     |

 **Total Papers:** 45

@@ -56,6 +56,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** The first comprehensive benchmark for evaluating GUI agents on macOS. Features 202 multilingual interactive tasks across 30 applications (28 macOS-exclusive), with support for 5 languages (English, Chinese, Arabic, Japanese, Russian). Reveals a dramatic gap: proprietary agents achieve 30%+ success rate while open-source models lag below 5%. Also includes safety benchmarking for deception attacks.

 **Key Findings:**
+
 - Proprietary computer-use agents lead at above 30% success rate
 - Open-source lightweight models struggle below 5%, highlighting need for macOS domain adaptation
 - Multilingual benchmarks expose weaknesses, especially in Arabic (28.8% degradation vs English)
@@ -70,6 +71,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A comprehensive safety benchmark built on OSWorld for testing computer-use agents across three harm categories: deliberate user misuse, prompt injection attacks, and model misbehavior. Includes 150 tasks spanning harassment, copyright infringement, disinformation, data exfiltration, and more. Proposes an automated judge achieving high agreement with human annotations (0.76-0.79 F1 score).

 **Key Findings:**
+
 - All tested models (o4-mini, Claude 3.7 Sonnet, Gemini 2.5 Pro) tend to directly comply with many deliberate misuse queries
 - Models are relatively vulnerable to static prompt injections
 - Models occasionally perform unsafe actions without explicit malicious prompts
@@ -83,6 +85,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A comprehensive open-source framework for scaling computer-use agent data and foundation models. Introduces AgentNet, the first large-scale computer-use task dataset spanning 3 operating systems and 200+ applications/websites. OpenCUA-72B achieves 45% success rate on OSWorld-Verified, establishing new state-of-the-art among open-source models.

 **Key Contributions:**
+
 - Annotation infrastructure for capturing human computer-use demonstrations
 - AgentNet: large-scale dataset across 3 OSes and 200+ apps
 - Scalable pipeline transforming demonstrations into state-action pairs with reflective Chain-of-Thought reasoning
@@ -97,6 +100,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A benchmark of 130 realistic, high-quality, long-horizon tasks for agentic search systems (like Deep Research), requiring real-time web browsing and extensive information synthesis. Constructed with 1000+ hours of human labor. Introduces Agent-as-a-Judge framework using tree-structured rubric design for automated evaluation.

 **Key Findings:**
+
 - OpenAI Deep Research achieves 50-70% of human performance while spending half the time
 - First systematic evaluation of ten frontier agentic search systems vs. human performance
 - Addresses the challenge of evaluating time-varying, complex answers
@@ -110,6 +114,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Addresses GUI grounding—mapping natural language to specific UI actions—as a critical bottleneck in agent development. Introduces OSWorld-G benchmark (564 annotated samples) and Jedi dataset (4 million synthetic examples), the largest computer-use grounding dataset. Improved grounding directly enhances agentic capabilities, boosting OSWorld performance from 23% to 51%.

 **Key Contributions:**
+
 - OSWorld-G: comprehensive benchmark for diverse grounding tasks (text matching, element recognition, layout understanding, precise manipulation)
 - Jedi: 4M examples through multi-perspective task decoupling
 - Demonstrates compositional generalization to novel interfaces
@@ -123,6 +128,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Evaluates potential safety risks of MLLM-based agents during real-world computer manipulation. Features 492 risky tasks spanning web, social media, multimedia, OS, email, and office software. Categorizes risks into user-originated and environmental risks, evaluating both risk goal intention and completion.

 **Key Findings:**
+
 - Current computer-use agents face significant safety risks in real-world scenarios
 - Safety principles designed for dialogue scenarios don't transfer well to computer-use
 - Highlights necessity and urgency of safety alignment for computer-use agents
@@ -136,6 +142,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A benchmark featuring high-fidelity, deterministic replicas of 11 widely-used websites across e-commerce, travel, communication, and professional networking. Contains 112 practical tasks requiring both information retrieval and state-changing actions. Enables reproducible evaluation without safety risks.

 **Key Findings:**
+
 - Best frontier language models achieve only 41% success rate
 - Highlights critical gaps in autonomous web navigation and task completion
 - Supports scalable post-training data generation
@@ -149,6 +156,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** An RL-based framework for GUI grounding incorporating seed data curation, dense policy gradients, and self-evolutionary reinforcement finetuning using attention maps. With only 3K training samples, the 7B model achieves state-of-the-art on three grounding benchmarks, outperforming UI-TARS-72B by 24.2% on ScreenSpot-Pro.

 **Key Results:**
+
 - 47.3% accuracy on ScreenSpot-Pro with 7B model
 - Outperforms 72B models with fraction of training data
 - Demonstrates effectiveness of RL for high-resolution, complex environments
@@ -162,6 +170,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A generative adversarial framework that manipulates agent decision-making using diffusion-based semantic injections. Combines negative prompt degradation with positive semantic optimization. Without model access, produces visually natural images that induce consistent decision biases in agents.

 **Key Findings:**
+
 - Consistently induces decision-level preference redirection on LLaVA-34B, Gemma3, GPT-4o, and Mistral-3.2
 - Outperforms baselines (SPSA, Bandit, standard diffusion)
 - Exposes vulnerability: autonomous agents can be misled through visually subtle, semantically-guided manipulations
@@ -175,6 +184,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** An extensible benchmark simulating a small software company environment where AI agents interact like digital workers: browsing the web, writing code, running programs, and communicating with coworkers. Tests agents on real professional tasks with important implications for industry adoption and labor market effects.

 **Key Findings:**
+
 - Best agent achieves 30% autonomous task completion
 - Simpler tasks are solvable autonomously
 - More difficult long-horizon tasks remain beyond current systems' reach
@@ -188,6 +198,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A comprehensive benchmark for VLMs in video game QA, encompassing visual unit testing, visual regression testing, needle-in-a-haystack challenges, glitch detection, and bug report generation for both images and videos. Addresses the need for standardized benchmarks in this labor-intensive domain.

 **Key Focus:**
+
 - First benchmark specifically designed for video game QA with VLMs
 - Covers wide range of QA activities across images and videos
 - Addresses lack of automation in game development workflows
@@ -201,6 +212,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** End-to-end benchmark for evaluating web agent security against prompt injection attacks. Tests realistic scenarios where even simple, low-effort human-written injections can deceive top-tier AI models including those with advanced reasoning.

 **Key Findings:**
+
 - Attacks partially succeed in up to 86% of cases
 - State-of-the-art agents often struggle to fully complete attacker goals
 - Reveals "security by incompetence"—agents' limitations sometimes prevent full attack success
@@ -214,6 +226,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Measures whether AI web-navigation agents follow the privacy principle of "data minimization"—using sensitive information only when truly necessary to complete a task. Simulates realistic web interaction scenarios end-to-end.

 **Key Findings:**
+
 - Agents built on GPT-4, Llama-3, and Claude are prone to inadvertent use of unnecessary sensitive information
 - Proposes prompting-based defense that reduces information leakage
 - End-to-end benchmarking provides more realistic measure than probing LLMs about privacy
@@ -227,6 +240,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A novel paradigm for AI agents that fluidly bridge embodiment and web-scale reasoning. Creates unified simulation integrating realistic 3D indoor/outdoor environments with functional web interfaces. Tasks include cooking from online recipes, navigating with dynamic map data, and interpreting landmarks using web knowledge.

 **Key Contributions:**
+
 - Unified platform combining 3D environments with web interfaces
 - Benchmark spanning cooking, navigation, shopping, tourism, and geolocation
 - Reveals significant performance gaps between AI systems and humans
@@ -240,6 +254,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** The first attempt to model UI interactions for precision engineering tasks. Features 41K+ annotated video recordings of CAD operations with time horizons up to 20x longer than existing datasets. Proposes VideoCADFormer for learning CAD interactions directly from video.

 **Key Contributions:**
+
 - Large-scale synthetic dataset for CAD UI interactions
 - VQA benchmark for evaluating spatial reasoning and video understanding
 - Reveals challenges in precise action grounding and long-horizon dependencies
@@ -253,6 +268,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Introduces a pre-operative critic mechanism that provides feedback before action execution by reasoning about potential outcomes. Proposes Suggestion-aware Group Relative Policy Optimization (S-GRPO) for building the GUI-Critic-R1 model with fully automated data generation.

 **Key Results:**
+
 - Significant advantages in critic accuracy compared to current MLLMs
 - Improved success rates and operational efficiency on GUI automation benchmarks
 - Works across both mobile and web domains
@@ -266,7 +282,8 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A vision-language model trained with RL to explicitly anchor each reasoning step to specific visual coordinates. Introduces multi-turn RL framework enabling dynamic zooming into predicted coordinates during reasoning.

 **Key Results:**
- 86.4% on V*Bench for visual search
+
+- 86.4% on V\*Bench for visual search
 - Outperforms supervised fine-tuning and conventional RL across spatial reasoning, visual search, and web-based grounding
 - Grounding amplifies region exploration, subgoal setting, and visual verification

@@ -279,6 +296,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A VLM-based method for coordinate-free GUI grounding using an attention-based action head. Enables proposing one or more action regions in a single forward pass with a grounding verifier for selection.

 **Key Results:**
+
 - GUI-Actor-7B achieves 44.6 on ScreenSpot-Pro with Qwen2.5-VL, outperforming UI-TARS-72B (38.1)
 - Improved generalization to unseen resolutions and layouts
 - Fine-tuning only ~100M parameters achieves SOTA performance
@@ -292,11 +310,13 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Extensive analysis of the R1-Zero paradigm (online RL + chain-of-thought reasoning) for GUI grounding. Identifies issues: longer reasoning chains lead to worse performance, reward hacking via box size exploitation, and overfitting easy examples.

 **Solutions Proposed:**
+
 - Fast Thinking Template for direct answer generation
 - Box size constraint in reward function
 - Difficulty-aware scaling in RL objective

 **Key Results:**
+
 - GUI-G1-3B achieves 90.3% on ScreenSpot and 37.1% on ScreenSpot-Pro
 - Outperforms larger UI-TARS-7B with only 3B parameters

@@ -309,6 +329,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Framework integrating self-reflection and error correction into end-to-end multimodal GUI models through GUI-specific pre-training, offline SFT, and online reflection tuning. Enables self-reflection emergence with fully automated data generation.

 **Key Contributions:**
+
 - Scalable pipelines for automatic reflection/correction data from successful trajectories
 - GUI-Reflection Task Suite for reflection-oriented abilities
 - Diverse environment for online training on mobile devices
@@ -323,6 +344,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A generalist agent capable of multimodal computer interaction (text, images, audio, video). Integrates tool-based and pure vision agents within highly modular architecture, enabling collaborative step-by-step task solving.

 **Key Results:**
+
 - 7.27 accuracy gain over Claude-Computer-Use on OSWorld
 - Evaluated on pure vision benchmarks (OSWorld), general benchmarks (GAIA), and tool-intensive benchmarks (SWE-Bench)
 - Demonstrates value of modular, collaborative agent architecture
@@ -336,6 +358,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A fine-grained adversarial attack framework that modifies VLM perception of only key objects while preserving semantics of remaining regions. Unlike broad semantic disruption, this targeted approach reduces conflicts with task context, making VLMs output valid but incorrect decisions that affect agent actions in the physical world.

 **Key Contributions:**
+
 - AdvEDM-R: removes semantics of specific objects from images
 - AdvEDM-A: adds semantics of new objects into images
 - Demonstrates fine-grained control with excellent attack performance in embodied decision-making tasks
@@ -349,6 +372,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A vision-centric reasoning benchmark grounded in challenging perceptual tasks. Unlike prior benchmarks, it moves beyond shallow perception ("see") to require fine-grained observation and analytical reasoning ("observe"). Features natural adversarial image pairs and annotated reasoning chains for process evaluation.

 **Key Findings:**
+
 - Tests 20 leading MLLMs including 12 foundation models and 8 reasoning-enhanced models
 - Existing reasoning strategies (chain-of-thought, self-criticism) result in unstable and redundant reasoning
 - Repeated image observation improves performance across models
@@ -363,6 +387,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** First systematic investigation of backdoor vulnerabilities in VLA models. Proposes Objective-Decoupled Optimization with two stages: explicit feature-space separation to isolate trigger representations, and conditional control deviations activated only by triggers.

 **Key Findings:**
+
 - Consistently achieves near-100% attack success rates with minimal impact on clean task accuracy
 - Robust against common input perturbations, task transfers, and model fine-tuning
 - Exposes critical security vulnerabilities in current VLA deployments under Training-as-a-Service paradigm
@@ -376,6 +401,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Benchmark for proactively inferring user goals from multimodal contextual observations for wearable assistant agents (smart glasses). Dataset comprises ~30 hours from 363 participants across 3,482 recordings with visual, audio, digital, and longitudinal context.

 **Key Findings:**
+
 - Humans achieve 93% MCQ accuracy; best VLM reaches ~84%
 - For open-ended generation, best models produce relevant goals only ~57% of the time
 - Smaller models (suited for wearables) achieve ~49% accuracy
@@ -390,6 +416,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A game-theoretic multi-agent framework formulating reasoning as a non-zero-sum game between base agents (visual perception specialists) and a critical agent (logic/fact verification). Features uncertainty-aware controller for dynamic agent collaboration with multi-round debates.

 **Key Results:**
+
 - Boosts small-to-mid scale models (Qwen2.5-VL-7B, InternVL3-14B) by 5-6%
 - Enhances strong models like GPT-4o by 2-3%
 - Modular, scalable, and generalizable framework
@@ -403,6 +430,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Introduces Grounded Reasoning with Images and Texts—a method for training MLLMs to generate reasoning chains interleaving natural language with explicit bounding box coordinates. Uses GRPO-GR reinforcement learning with rewards focused on answer accuracy and grounding format.

 **Key Contributions:**
+
 - Exceptional data efficiency: requires as few as 20 image-question-answer triplets
 - Successfully unifies reasoning and grounding abilities
 - Eliminates need for reasoning chain annotations or explicit bounding box labels
@@ -416,6 +444,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** First multimodal safety alignment framework. Introduces BeaverTails-V (first dataset with dual preference annotations for helpfulness and safety), and Beaver-Guard-V (multi-level guardrail system defending against unsafe queries and adversarial attacks).

 **Key Results:**
+
 - Guard model improves precursor model's safety by average of 40.9% over five filtering rounds
 - Safe RLHF-V enhances model safety by 34.2% and helpfulness by 34.3%
 - First exploration of multi-modal safety alignment within constrained optimization
@@ -429,6 +458,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** An inference-time approach that quantifies visual token uncertainty and selectively masks uncertain tokens. Decomposes uncertainty into aleatoric and epistemic components, focusing on epistemic uncertainty for perception-related errors.

 **Key Results:**
+
 - Significantly reduces object hallucinations
 - Enhances reliability and quality of LVLM outputs across diverse visual contexts
 - Validated on CHAIR, THRONE, and MMBench benchmarks
@@ -442,6 +472,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A unified LVLM integrating segmentation-aware perception and controllable object-centric generation. Uses dual-branch visual encoder for global semantic context and fine-grained spatial details, with MoVQGAN-based visual tokenizer for discrete visual tokens.

 **Key Contributions:**
+
 - Progressive multi-stage training pipeline
 - Segmentation masks jointly optimized as spatial condition prompts
 - Bridges segmentation-aware perception with fine-grained visual synthesis
@@ -455,6 +486,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Introduces Multi-Model Monte Carlo Tree Search (M3CTS) for generating diverse Long Chain-of-Thought reasoning trajectories. Proposes fine-grained Direct Preference Optimization (fDPO) with segment-specific preference granularity guided by spatial reward mechanism.

 **Key Results:**
+
 - fDPO achieves 4.1% and 9.0% gains over standard DPO on spatial quality and quantity tasks
 - SpatialReasoner-R1 sets new SOTA on SpatialRGPT-Bench, outperforming strongest baseline by 9.8%
 - Maintains competitive performance on general vision-language tasks
@@ -468,6 +500,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A two-stage reinforcement fine-tuning framework: SFT with curated Chain-of-Thought data activates reasoning potential, followed by RL based on Group Relative Policy Optimization (GRPO) for domain shift adaptability.

 **Key Advantages:**
+
 - State-of-the-art results outperforming both open-source and proprietary models
 - Robust performance under domain shifts across various tasks
 - Excellent data efficiency in few-shot learning scenarios
@@ -481,6 +514,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Reveals that safe images can be exploited for jailbreaking when combined with additional safe images and prompts, exploiting LVLMs' universal reasoning capabilities and safety snowball effect. Proposes Safety Snowball Agent (SSA) framework.

 **Key Findings:**
+
 - SSA can use nearly any image to induce LVLMs to produce unsafe content
 - Achieves high jailbreak success rates against latest LVLMs
 - Exploits inherent LVLM properties rather than alignment flaws
@@ -494,6 +528,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Uncovers novel attack vector: Malicious Image Patches (MIPs)—adversarially perturbed screen regions that induce OS agents to perform harmful actions. MIPs can be embedded in wallpapers or shared on social media to exfiltrate sensitive data.

 **Key Findings:**
+
 - MIPs generalize across user prompts and screen configurations
 - Can hijack multiple OS agents during execution of benign instructions
 - Exposes critical security vulnerabilities requiring attention before widespread deployment
@@ -507,6 +542,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A framework leveraging instruction-driven routing and sparsification for VLA efficiency. Features 3-stage progressive architecture inspired by human multimodal coordination: Encoder-FiLM Aggregation Routing, LLM-FiLM Pruning Routing, and V-L-A Coupled Attention.

 **Key Results:**
+
 - 97.4% success rate on LIBERO benchmark, 70.0% on real-world robotic tasks
 - Reduces training costs by 2.5x and inference latency by 2.8x compared to OpenVLA
 - Achieves state-of-the-art performance
@@ -520,6 +556,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Novel off-policy RL algorithm applying direct policy updates for positive samples and conservative, regularized updates for negative ones. Augmented with Successful Transition Replay (STR) for prioritizing successful interactions.

 **Key Results:**
+
 - At least 17% relative increase over existing methods on AndroidWorld benchmark
 - Substantially fewer computational resources than GPT-4o-based methods
 - 5-60x faster inference
@@ -533,6 +570,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** An API-centric stress testing framework that uncovers intent integrity violations in LLM agents. Uses semantic partitioning to organize tasks into meaningful categories, with targeted mutations to expose subtle agent errors while preserving user intent.

 **Key Contributions:**
+
 - Datatype-aware strategy memory for retrieving effective mutation patterns
 - Lightweight predictor for ranking mutations by error likelihood
 - Generalizes to stronger target models using smaller LLMs for test generation
@@ -546,6 +584,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** A dual-system framework bridging high-level reasoning with low-level action execution. Trains multimodal LLM to generate embodied reasoning plans guided by action-aligned visual rewards, compressed into visual plan latents for downstream action execution.

 **Key Capabilities:**
+
 - Few-shot adaptation
 - Long-horizon planning
 - Self-correction behaviors in complex embodied AI tasks
@@ -559,6 +598,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Automated attack framework that constructs chains of images with risky visual thoughts to challenge VLMs. Exploits the conflict between logical processing and safety protocols, leading to unsafe content generation.

 **Key Results:**
+
 - Improves average attack success rate by 26.71% (from 63.70% to 90.41%)
 - Tested on 9 open-source and 6 commercial VLMs
 - Outperforms state-of-the-art methods
@@ -572,6 +612,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** First web-based benchmark evaluating MLLM agents on diverse CAPTCHA puzzles. Spans 20 modern CAPTCHA types (225 total) with novel metric: CAPTCHA Reasoning Depth quantifying cognitive and motor steps required.

 **Key Findings:**
+
 - Humans achieve 93.3% success rate
 - State-of-the-art agents achieve at most 40.0% (Browser-Use OpenAI-o3)
 - Highlights significant gap between human and agent capabilities
@@ -585,7 +626,8 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Introduces pixel-space reasoning framework where VLMs use visual operations (zoom-in, select-frame) to directly inspect and infer from visual evidence. Two-phase training: instruction tuning on synthesized traces, then RL with curiosity-driven rewards.

 **Key Results:**
- 84% on V*Bench, 74% on TallyQA-Complex, 84% on InfographicsVQA
+
+- 84% on V\*Bench, 74% on TallyQA-Complex, 84% on InfographicsVQA
 - Highest accuracy achieved by any open-source 7B model
 - Enables proactive information gathering from complex visual inputs

@@ -598,6 +640,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Brain-inspired framework decomposing interactions into three biologically plausible phases: Blink (rapid detection via saccadic-like attention), Think (higher-level reasoning/planning), and Link (executable command generation for motor control).

 **Key Innovations:**
+
 - Automated annotation pipeline for blink data
 - BTL Reward: first rule-based reward mechanism driven by both process and outcome
 - Competitive performance on static GUI understanding and dynamic interaction tasks
@@ -611,6 +654,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Simulation environment engine enabling flexible definition of screens, icons, and navigation graphs with full environment access for agent training/evaluation. Demonstrates progressive training approach from SFT to multi-turn RL.

 **Key Findings:**
+
 - Supervised fine-tuning enables memorization of fundamental knowledge
 - Single-turn RL enhances generalization to unseen scenarios
 - Multi-turn RL encourages exploration strategies through interactive trial and error
@@ -624,6 +668,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Reasoning-enhanced framework integrating structured reasoning, action prediction, and history summarization. Uses Chain-of-Thought analyses combining progress estimation and decision reasoning, trained via SFT and GRPO with history-aware rewards.

 **Key Results:**
+
 - State-of-the-art under identical training data conditions
 - Particularly strong in out-of-domain scenarios
 - Robust reasoning and generalization across diverse GUI navigation tasks
@@ -637,6 +682,7 @@ We'll be at NeurIPS in San Diego. If you're working on computer-use agents, buil
 **Summary:** Self-improving framework addressing trajectory verification and training data scalability. Features UI-Genie-RM (image-text interleaved reward model) and self-improvement pipeline with reward-guided exploration and outcome verification.

 **Key Contributions:**
+
 - UI-Genie-RM-517k: first reward-specific dataset for GUI agents
 - UI-Genie-Agent-16k: high-quality synthetic trajectories without manual annotation
 - State-of-the-art across multiple GUI agent benchmarks through three generations of self-improvement
--- a/docs/content/docs/agent-sdk/agent-loops.mdx
+++ b/docs/content/docs/agent-sdk/agent-loops.mdx
@@ -4,11 +4,7 @@ description: Supported computer-using agent loops and models
 ---

 <Callout>
-  A corresponding{' '}
-  <a href="https://github.com/trycua/cua/blob/main/notebooks/agent_nb.ipynb" target="_blank">
-    Jupyter Notebook
-  </a>{' '}
-  is available for this documentation.
+  A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/agent_nb.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.
 </Callout>

 An agent can be thought of as a loop - it generates actions, executes them, and repeats until done:
--- a/docs/content/docs/agent-sdk/customizing-computeragent.mdx
+++ b/docs/content/docs/agent-sdk/customizing-computeragent.mdx
@@ -3,14 +3,7 @@ title: Customize ComputerAgent
 ---

 <Callout>
-  A corresponding{' '}
-  <a
-    href="https://github.com/trycua/cua/blob/main/notebooks/customizing_computeragent.ipynb"
-    target="_blank"
-  >
-    Jupyter Notebook
-  </a>{' '}
-  is available for this documentation.
+  A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/customizing_computeragent.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.
 </Callout>

 The `ComputerAgent` interface provides an easy proxy to any computer-using model configuration, and it is a powerful framework for extending and building your own agentic systems.
--- a/docs/content/docs/agent-sdk/integrations/hud.mdx
+++ b/docs/content/docs/agent-sdk/integrations/hud.mdx
@@ -4,11 +4,7 @@ description: Use ComputerAgent with HUD for benchmarking and evaluation
 ---

 <Callout>
-  A corresponding{' '}
-  <a href="https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb" target="_blank">
-    Jupyter Notebook
-  </a>{' '}
-  is available for this documentation.
+  A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.
 </Callout>

 The HUD integration allows an agent to be benchmarked using the [HUD framework](https://www.hud.so/). Through the HUD integration, the agent controls a computer inside HUD, where tests are run to evaluate the success of each task.
--- a/docs/content/docs/agent-sdk/mcp-server/client-integrations.mdx
+++ b/docs/content/docs/agent-sdk/mcp-server/client-integrations.mdx
--- a/docs/content/docs/agent-sdk/mcp-server/configuration.mdx
+++ b/docs/content/docs/agent-sdk/mcp-server/configuration.mdx
@@ -4,12 +4,12 @@ title: Configuration

 The server is configured using environment variables (can be set in the Claude Desktop config):

-| Variable                       | Description                                                                                                                                                                                                                                       | Default                            |
-| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------- |
+| Variable                       | Description                                                                                                                                                                                               | Default                            |
+| ------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------- |
 | `CUA_MODEL_NAME`               | Model string (e.g., "anthropic/claude-sonnet-4-20250514", "openai/computer-use-preview", "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", "omniparser+litellm/gpt-4o", "omniparser+ollama_chat/gemma3") | anthropic/claude-sonnet-4-20250514 |
-| `ANTHROPIC_API_KEY`            | Your Anthropic API key (required for Anthropic models)                                                                                                                                                                                            | None                               |
-| `CUA_MAX_IMAGES`               | Maximum number of images to keep in context                                                                                                                                                                                                       | 3                                  |
-| `CUA_USE_HOST_COMPUTER_SERVER` | Target your local desktop instead of a VM. Set to "true" to use your host system. **Warning:** AI models may perform risky actions.                                                                                                               | false                              |
+| `ANTHROPIC_API_KEY`            | Your Anthropic API key (required for Anthropic models)                                                                                                                                                    | None                               |
+| `CUA_MAX_IMAGES`               | Maximum number of images to keep in context                                                                                                                                                               | 3                                  |
+| `CUA_USE_HOST_COMPUTER_SERVER` | Target your local desktop instead of a VM. Set to "true" to use your host system. **Warning:** AI models may perform risky actions.                                                                       | false                              |

 ## Model Configuration

@@ -17,7 +17,7 @@ The `CUA_MODEL_NAME` environment variable supports various model providers throu

 ### Supported Providers

- **Anthropic**: `anthropic/claude-sonnet-4-20250514`, 
+- **Anthropic**: `anthropic/claude-sonnet-4-20250514`,
 - **OpenAI**: `openai/computer-use-preview`, `openai/gpt-4o`
 - **Local Models**: `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B`
 - **Omni + LiteLLM**: `omniparser+litellm/gpt-4o`, `omniparser+litellm/claude-3-haiku`
--- a/docs/content/docs/agent-sdk/mcp-server/index.mdx
+++ b/docs/content/docs/agent-sdk/mcp-server/index.mdx
@@ -0,0 +1,17 @@
+---
+title: MCP Server
+description: Run Cua agents through Claude Desktop and other MCP clients
+---
+
+The MCP Server exposes Cua agents as tools for [Model Context Protocol](https://modelcontextprotocol.io/) clients like Claude Desktop. This lets you ask Claude to perform computer tasks directly from the chat interface.
+
+```bash
+pip install cua-mcp-server
+```
+
+## Key Features
+
+- **Claude Desktop integration** - Use Cua agents directly in Claude's chat
+- **Multi-client support** - Concurrent sessions with automatic resource management
+- **Progress reporting** - Real-time updates during task execution
+- **VM safety** - Runs in sandboxed VMs by default
--- a/docs/content/docs/agent-sdk/mcp-server/installation.mdx
+++ b/docs/content/docs/agent-sdk/mcp-server/installation.mdx
--- a/docs/content/docs/agent-sdk/mcp-server/llm-integrations.mdx
+++ b/docs/content/docs/agent-sdk/mcp-server/llm-integrations.mdx
--- a/docs/content/docs/agent-sdk/mcp-server/meta.json
+++ b/docs/content/docs/agent-sdk/mcp-server/meta.json
--- a/docs/content/docs/agent-sdk/mcp-server/tools.mdx
+++ b/docs/content/docs/agent-sdk/mcp-server/tools.mdx
--- a/docs/content/docs/agent-sdk/mcp-server/usage.mdx
+++ b/docs/content/docs/agent-sdk/mcp-server/usage.mdx
--- a/docs/content/docs/agent-sdk/meta.json
+++ b/docs/content/docs/agent-sdk/meta.json
@@ -14,6 +14,7 @@
    "usage-tracking",
    "telemetry",
    "benchmarks",
-    "integrations"
+    "integrations",
+    "mcp-server"
  ]
 }
--- a/docs/content/docs/libraries/cua-cli/commands.mdx
+++ b/docs/content/docs/libraries/cua-cli/commands.mdx
@@ -1,5 +1,5 @@
 ---
-title: Commands
+title: Command Reference
 description: Complete reference for all CUA CLI commands
 ---

@@ -35,7 +35,7 @@ Both styles work identically - use whichever you prefer!
 ### Available Commands

 - **Authentication** - `cua auth login`, `cua auth env`, `cua auth logout` (also available as flat commands: `cua login`, `cua env`, `cua logout`)
- **Sandbox Management** - `cua list`, `cua create`, `cua start`, `cua stop`, `cua restart`, `cua delete`, `cua vnc`
+- **Sandbox Management** - `cua list`, `cua create`, `cua get`, `cua start`, `cua stop`, `cua restart`, `cua delete`, `cua vnc`

 ## Authentication Commands

@@ -188,6 +188,79 @@ Job ID: job-xyz789
 Use 'cua list' to monitor provisioning progress
 ```

+### `cua get`
+
+Get detailed information about a specific sandbox, including computer-server health status.
+
+```bash
+cua get <name>
+
+# With additional options
+cua get <name> --json
+cua get <name> --show-passwords
+cua get <name> --show-vnc-url
+```
+
+**Options:**
+
+- `--json` - Output all details in JSON format
+- `--show-passwords` - Include password in output
+- `--show-vnc-url` - Include computed NoVNC URL
+
+**Example Output (default):**
+
+```bash
+$ cua get my-dev-sandbox
+Name: my-dev-sandbox
+Status: running
+Host: my-dev-sandbox.containers.cloud.trycua.com
+OS Type: linux
+Computer Server Version: 0.1.30
+Computer Server Status: healthy
+```
+
+**Example Output (with --show-passwords and --show-vnc-url):**
+
+```bash
+$ cua get my-dev-sandbox --show-passwords --show-vnc-url
+Name: my-dev-sandbox
+Status: running
+Host: my-dev-sandbox.containers.cloud.trycua.com
+Password: secure-pass-123
+OS Type: linux
+Computer Server Version: 0.1.30
+Computer Server Status: healthy
+VNC URL: https://my-dev-sandbox.containers.cloud.trycua.com/vnc.html?autoconnect=true&password=secure-pass-123
+```
+
+**Example Output (JSON format):**
+
+```bash
+$ cua get my-dev-sandbox --json
+{
+  "name": "my-dev-sandbox",
+  "status": "running",
+  "host": "my-dev-sandbox.containers.cloud.trycua.com",
+  "os_type": "linux",
+  "computer_server_version": "0.1.30",
+  "computer_server_status": "healthy"
+}
+```
+
+**Computer Server Health Check:**
+
+The `cua get` command automatically probes the computer-server when the sandbox is running:
+
+- Checks OS type via `https://{host}:8443/status`
+- Checks version via `https://{host}:8443/cmd`
+- Shows "Computer Server Status: healthy" when both probes succeed
+- Uses a 3-second timeout for each probe
+
+<Callout type="info">
+  The computer server status is only checked for running sandboxes. Stopped or suspended sandboxes
+  will not show computer server information.
+</Callout>
+
 ### `cua start`

 Start a stopped sandbox.
--- a/docs/content/docs/cli-playbook/index.mdx
+++ b/docs/content/docs/cli-playbook/index.mdx
@@ -0,0 +1,68 @@
+---
+title: Getting Started
+description: Install and set up the CUA CLI
+---
+
+import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
+import { Callout } from 'fumadocs-ui/components/callout';
+
+The Cua CLI is a command-line tool for managing your Cua cloud sandboxes. Create, start, stop, and connect to sandboxes directly from your terminal.
+
+## Installation
+
+<Tabs items={['macOS / Linux', 'Windows']}>
+  <Tab value="macOS / Linux">
+    ```bash
+    curl -LsSf https://cua.ai/cli/install.sh | sh
+    ```
+  </Tab>
+  <Tab value="Windows">
+    ```powershell
+    powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
+    ```
+  </Tab>
+</Tabs>
+
+This installs [Bun](https://bun.sh) and the CUA CLI. Verify with:
+
+```bash
+cua --help
+```
+
+## Authentication
+
+Login to your CUA account:
+
+```bash
+# Browser-based login
+cua auth login
+
+# Or with API key
+cua auth login --api-key sk-your-api-key-here
+```
+
+Generate a `.env` file for your project:
+
+```bash
+cua auth env
+```
+
+## Quick Start
+
+```bash
+# Create a sandbox
+cua create --os linux --size small --region north-america
+
+# List sandboxes
+cua list
+
+# Open VNC in browser
+cua vnc my-sandbox
+
+# Stop a sandbox
+cua stop my-sandbox
+```
+
+## Next Steps
+
+- [Command Reference](/cli-playbook/commands) - Full list of available commands
--- a/docs/content/docs/cli-playbook/meta.json
+++ b/docs/content/docs/cli-playbook/meta.json
@@ -0,0 +1,5 @@
+{
+  "title": "Cloud CLI",
+  "description": "Command-line interface for CUA Cloud",
+  "pages": ["index", "commands"]
+}
--- a/docs/content/docs/computer-sdk/commands.mdx
+++ b/docs/content/docs/computer-sdk/commands.mdx
@@ -5,7 +5,7 @@ description: Computer commands and interface methods

 This page describes the set of supported **commands** you can use to control a Cua Computer directly via the Python SDK.

-These commands map to the same actions available in the [Computer Server API Commands Reference](../libraries/computer-server/Commands), and provide low-level, async access to system operations from your agent or automation code.
+These commands map to the same actions available in the [Computer Server API Commands Reference](/computer-sdk/computer-server/Commands), and provide low-level, async access to system operations from your agent or automation code.

 ## Shell Actions

--- a/docs/content/docs/computer-sdk/computer-server/Commands.mdx
+++ b/docs/content/docs/computer-sdk/computer-server/Commands.mdx
--- a/docs/content/docs/computer-sdk/computer-server/REST-API.mdx
+++ b/docs/content/docs/computer-sdk/computer-server/REST-API.mdx
--- a/docs/content/docs/computer-sdk/computer-server/WebSocket-API.mdx
+++ b/docs/content/docs/computer-sdk/computer-server/WebSocket-API.mdx
--- a/docs/content/docs/computer-sdk/computer-server/index.mdx
+++ b/docs/content/docs/computer-sdk/computer-server/index.mdx
@@ -0,0 +1,15 @@
+---
+title: Computer Server
+description: HTTP/WebSocket server for remote computer control
+---
+
+The Computer Server is an HTTP and WebSocket server that runs inside each Cua sandbox (VM or container). It exposes APIs for remote computer control - allowing the Computer SDK and agents to execute actions like clicking, typing, taking screenshots, and running commands on the sandboxed environment.
+
+When you use `Computer(provider_type="cloud")` or any other provider, the Computer SDK communicates with this server running inside the sandbox to execute your automation commands.
+
+## Key Features
+
+- **REST API** - Execute commands, take screenshots, manage files
+- **WebSocket API** - Real-time streaming for continuous interaction
+- **Cross-platform** - Runs on Linux, macOS, and Windows sandboxes
+- **Secure** - Isolated inside the sandbox environment
--- a/docs/content/docs/computer-sdk/computer-server/meta.json
+++ b/docs/content/docs/computer-sdk/computer-server/meta.json
@@ -0,0 +1,4 @@
+{
+  "title": "Computer Server",
+  "pages": ["index", "Commands", "REST-API", "WebSocket-API"]
+}
--- a/docs/content/docs/computer-sdk/computer-ui.mdx
+++ b/docs/content/docs/computer-sdk/computer-ui.mdx
@@ -1,5 +1,5 @@
 ---
-title: Computer UI (Deprecated)
+title: Computer UI
 ---

 <Callout type="warn" title="Deprecated">
--- a/docs/content/docs/computer-sdk/meta.json
+++ b/docs/content/docs/computer-sdk/meta.json
@@ -7,6 +7,7 @@
    "tracing-api",
    "sandboxed-python",
    "custom-computer-handlers",
-    "computer-ui"
+    "computer-ui",
+    "computer-server"
  ]
 }
--- a/docs/content/docs/computer-sdk/sandboxed-python.mdx
+++ b/docs/content/docs/computer-sdk/sandboxed-python.mdx
@@ -4,14 +4,7 @@ slug: sandboxed-python
 ---

 <Callout>
-  A corresponding{' '}
-  <a
-    href="https://github.com/trycua/cua/blob/main/examples/sandboxed_functions_examples.py"
-    target="_blank"
-  >
-    Python example
-  </a>{' '}
-  is available for this documentation.
+  A corresponding <a href="https://github.com/trycua/cua/blob/main/examples/sandboxed_functions_examples.py" target="_blank">Python example</a> is available for this documentation.
 </Callout>

 You can run Python functions securely inside a sandboxed virtual environment on a remote Cua Computer. This is useful for executing untrusted user code, isolating dependencies, or providing a safe environment for automation tasks.
--- a/docs/content/docs/computer-sdk/tracing-api.mdx
+++ b/docs/content/docs/computer-sdk/tracing-api.mdx
@@ -1,9 +1,9 @@
 ---
-title: Computer Tracing API
+title: Tracing
 description: Record computer interactions for debugging, training, and analysis
 ---

-# Computer Tracing API
+# Tracing

 The Computer tracing API provides a powerful way to record computer interactions for debugging, training, analysis, and compliance purposes. Inspired by Playwright's tracing functionality, it offers flexible recording options and standardized output formats.

--- a/docs/content/docs/get-started/quickstart.mdx
+++ b/docs/content/docs/get-started/quickstart.mdx
@@ -19,8 +19,6 @@ import { Code, Terminal } from 'lucide-react';
  </Card>
 </div> */}

---
-
 ## Set Up Your Computer Environment

 Choose how you want to run your Cua computer. This will be the environment where your automated tasks will execute.
@@ -43,7 +41,7 @@ You can run your Cua computer in the cloud (recommended for easiest setup), loca
    **Option 1: Via Website**

    1. Navigate to **Dashboard > Sandboxes > Create Sandbox**
-    2. Create a **Small** sandbox, choosing **Linux**, **Windows**, or **macOS**
+    2. Create a sandbox, choosing **Linux**, **Windows**, or **macOS**
    3. Note your sandbox name

    **Option 2: Via CLI**
@@ -122,6 +120,10 @@ You can run your Cua computer in the cloud (recommended for easiest setup), loca

 ## Developer Quickstart

+<Callout type="warn" title="Python Version Compatibility">
+  Cua packages require **Python 3.12 or 3.13**. Python 3.14 is not currently supported due to dependency compatibility issues (pydantic-core/PyO3 compatibility). If you encounter build errors on Python 3.14, please use Python 3.13 or earlier.
+</Callout>
+
 <Steps>

 <Step>
--- a/docs/content/docs/index.mdx
+++ b/docs/content/docs/index.mdx
@@ -4,55 +4,46 @@ title: Introduction

 import { Monitor, Code, BookOpen, Zap, Bot, Boxes, Rocket } from 'lucide-react';

-<div className="rounded-lg border bg-card text-card-foreground shadow-sm px-4 py-2 mb-6">
-  Cua is an open-source framework for building **Computer-Use Agents** - AI systems that see,
-  understand, and interact with desktop applications through vision and action, just like humans do.
+<div className="not-prose -mt-2 mb-6">
+  <p className="text-fd-primary font-semibold text-sm mb-1">Welcome</p>
+  <h1 className="text-3xl font-bold tracking-tight md:text-4xl">Welcome to Cua</h1>
 </div>

-## Why Cua?
+**Cua** is an open-source framework for building, deploying and evaluating Computer-Use Agents - AI systems that autonomously interact with computer interfaces by understanding visual elements and executing actions. Cua provides SDKs for easy integration with 100+ vision-language models (VLMs), supporting everything from simple task automation to complex multi-step workflows across Windows, Linux, and macOS environments.

-Cua gives you everything you need to automate any desktop application without brittle selectors or APIs.
-
-Some highlights include:
-
- **Model flexibility** - Connect to 100+ LLM providers through liteLLM's standard interface. Use models from Anthropic, OpenAI, Google, and more - or run them locally with Ollama, Hugging Face, or MLX.
- **Composed agents** - Mix and match grounding models with planning models for optimal performance. Use specialized models like GTA, OpenCUA, or OmniParser for UI element detection paired with powerful reasoning models like Claude or GPT-4.
- **Cross-platform sandboxes** - Run agents safely in isolated environments. Choose from Docker containers, macOS VMs with Lume, Windows Sandbox, or deploy to Cua Cloud with production-ready infrastructure.
- **Computer SDK** - Control any application with a PyAutoGUI-like API. Click, type, scroll, take screenshots, manage windows, read/write files - everything you need for desktop automation.
- **Agent SDK** - Build autonomous agents with trajectory tracing, prompt caching, cost tracking, and budget controls. Test agents on industry-standard benchmarks like OSWorld-Verified with one line of code.
- **Human-in-the-loop** - Pause agent execution and await user input or approval before continuing. Use the `human/human` model string to let humans control the agent directly.
- **Production essentials** - Ship reliable agents with built-in PII anonymization, cost tracking, trajectory logging, and integration with observability platforms like Laminar and HUD.
-
-## What can you build?
-
- RPA automation that works with any application - even legacy software without APIs.
- Form-filling agents that handle complex multi-step web workflows.
- Testing automation that adapts to UI changes without brittle selectors.
- Data extraction from desktop applications and document processing.
- Cross-application workflows that combine multiple tools and services.
- Research agents that browse, read, and synthesize information from the web.
-
-Explore real-world examples in our [blog posts](https://cua.ai/blog).
-
-## Get started
-
-Follow the [Quickstart guide](/docs/get-started/quickstart) for step-by-step setup with Python or TypeScript.
-
-If you're new to computer-use agents, check out our [tutorials](https://cua.ai/blog), [examples](https://github.com/trycua/cua/tree/main/examples), and [notebooks](https://github.com/trycua/cua/tree/main/notebooks) to start building with Cua today.
-
-<div className="grid grid-cols-1 md:grid-cols-2 gap-6 mt-8">
-  <Card icon={<Rocket />} href="/get-started/quickstart" title="Quickstart">
-    Get up and running in 3 steps with Python or TypeScript.
-  </Card>
-  <Card icon={<Zap />} href="/agent-sdk/agent-loops" title="Agent Loops">
-    Learn how agents work and how to build your own.
-  </Card>
-  <Card icon={<BookOpen />} href="/computer-sdk/computers" title="Computer SDK">
-    Control desktop applications with the Computer SDK.
-  </Card>
-  <Card icon={<Monitor />} href="/example-usecases/form-filling" title="Example Use Cases">
-    See Cua in action with real-world examples.
-  </Card>
+<div className="not-prose relative rounded-xl overflow-hidden my-8 w-full">
+  <img src="/docs/img/hero.png" alt="Cua" className="w-full h-auto rounded-xl" />
 </div>

-We can't wait to see what you build with Cua ✨
+## What is a Computer-Use Agent?
+
+Computer-Use Agents (CUAs) are AI systems that can autonomously interact with computer interfaces through visual understanding and action execution. They work by capturing screenshots, feeding them to a vision-language model (VLM), and letting the model determine the next action to take - such as clicking, typing, or scrolling - in a continuous loop until the task is complete.
+
+## What is a Computer-Use Sandbox?
+
+Computer-Use Sandboxes are isolated, controlled environments where AI agents can safely interact with computer interfaces. They provide a secure execution space for agents to perform actions such as clicking, typing, and running code, test automation workflows, and learn from interactions without affecting production systems.
+
+## Key Features
+
+With the **Computer SDK**, you can:
+- Automate **Windows, Linux, and macOS** sandboxes with a consistent, pyautogui-like API
+- Create & manage sandboxes locally or using **Cua Cloud**
+
+With the **Agent SDK**, you can:
+- Run computer-use models with a consistent schema
+- Benchmark on **OSWorld-Verified**, **SheetBench-V2**, and **ScreenSpot**
+- Combine UI grounding models with any LLM using **composed agents**
+- Use **100+ models** via API or local inference (Claude, GPT-4, Gemini, Ollama, MLX)
+
+## Get Started
+
+Follow the [Quickstart guide](/get-started/quickstart) for step-by-step setup with Python or TypeScript.
+
+Check out our [tutorials](https://cua.ai/blog), [examples](https://github.com/trycua/cua/tree/main/examples), and [notebooks](https://github.com/trycua/cua/tree/main/notebooks) to start building with Cua today.
+
+<div className="grid grid-cols-2 md:grid-cols-4 gap-2 mt-4 text-sm">
+  <Card icon={<Rocket className="w-4 h-4" />} href="/get-started/quickstart" title="Quickstart" />
+  <Card icon={<Zap className="w-4 h-4" />} href="/agent-sdk/agent-loops" title="Agent Loops" />
+  <Card icon={<BookOpen className="w-4 h-4" />} href="/computer-sdk/computers" title="Computer SDK" />
+  <Card icon={<Monitor className="w-4 h-4" />} href="/example-usecases/form-filling" title="Examples" />
+</div>
--- a/docs/content/docs/libraries/agent/index.mdx
+++ b/docs/content/docs/libraries/agent/index.mdx
@@ -1,21 +0,0 @@
---
-title: Agent
-description: Reference for the current version of the Agent library.
-pypi: cua-agent
-github:
-  - https://github.com/trycua/cua/tree/main/libs/python/agent
---
-
-The Agent library provides the ComputerAgent class and tools for building AI agents that automate workflows on Cua Computers.
-
-## Agent Loops
-
-See the [Agent Loops](../agent-sdk/agent-loops) documentation for how agents process information and take actions.
-
-## Chat History
-
-See the [Chat History](../agent-sdk/chat-history) documentation for managing conversational context and turn-by-turn interactions.
-
-## Callbacks
-
-See the [Callbacks](../agent-sdk/callbacks) documentation for extending and customizing agent behavior with custom hooks.
--- a/docs/content/docs/libraries/computer-server/index.mdx
+++ b/docs/content/docs/libraries/computer-server/index.mdx
@@ -1,24 +0,0 @@
---
-title: Computer Server
-descrption: Reference for the current version of the Computer Server library.
-pypi: cua-computer-server
-github:
-  - https://github.com/trycua/cua/tree/main/libs/python/computer-server
---
-
-<Callout>
-  A corresponding{' '}
-  <a
-    href="https://github.com/trycua/cua/blob/main/notebooks/computer_server_nb.ipynb"
-    target="_blank"
-  >
-    Jupyter Notebook
-  </a>{' '}
-  is available for this documentation.
-</Callout>
-
-The Computer Server API reference documentation is currently under development.
-
-## Overview
-
-The Computer Server provides WebSocket and REST API endpoints for remote computer control and automation.
--- a/docs/content/docs/libraries/computer/index.mdx
+++ b/docs/content/docs/libraries/computer/index.mdx
@@ -1,23 +0,0 @@
---
-title: Computer
-description: Reference for the current version of the Computer library.
-pypi: cua-computer
-npm: '@trycua/computer'
-github:
-  - https://github.com/trycua/cua/tree/main/libs/python/computer
-  - https://github.com/trycua/cua/tree/main/libs/typescript/computer
---
-
-The Computer library provides a Computer class for controlling and automating containers running the Computer Server.
-
-## Connecting to Computers
-
-See the [Cua Computers](../computer-sdk/computers) documentation for how to connect to different computer types (cloud, local, or host desktop).
-
-## Computer Commands
-
-See the [Commands](../computer-sdk/commands) documentation for all supported commands and interface methods (Shell, Mouse, Keyboard, File System, etc.).
-
-## Sandboxed Python Functions
-
-See the [Sandboxed Python](../computer-sdk/sandboxed-python) documentation for running Python functions securely in isolated environments on a remote Cua Computer.
--- a/docs/content/docs/libraries/core/index.mdx
+++ b/docs/content/docs/libraries/core/index.mdx
@@ -1,13 +0,0 @@
---
-title: Core
-description: Reference for the current version of the Core library.
-pypi: cua-core
-npm: '@trycua/core'
-github:
-  - https://github.com/trycua/cua/tree/main/libs/python/core
-  - https://github.com/trycua/cua/tree/main/libs/typescript/core
---
-
-## Overview
-
-The Core library provides foundational utilities and shared functionality across the CUA ecosystem.
--- a/docs/content/docs/libraries/cua-cli/index.mdx
+++ b/docs/content/docs/libraries/cua-cli/index.mdx
@@ -1,58 +0,0 @@
---
-title: Cua CLI
-description: Command-line interface for managing Cua cloud sandboxes and authentication
---
-
-import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
-
-The Cua CLI is a command-line tool that provides an intuitive interface for managing your Cua cloud sandboxes and authentication. It offers a streamlined workflow for creating, managing, and connecting to cloud sandboxes.
-
-## Key Features
-
- **Authentication Management**: Secure login with browser-based OAuth flow
- **Sandbox Lifecycle**: Create, start, stop, restart, and delete cloud sandboxes
- **Quick Access**: Direct links to VNC and playground interfaces
- **Cross-Platform**: Works on macOS, Linux, and Windows
- **Environment Integration**: Automatic `.env` file generation
-
-## Quick Example
-
-```bash
-# Install the CLI (installs Bun + CUA CLI)
-curl -LsSf https://cua.ai/cli/install.sh | sh
-
-# Login to your CUA account
-cua auth login
-
-# Create a new Linux sandbox
-cua sb create --os linux --size small --region north-america
-
-# List your sandboxes
-cua sb list
-```
-
-## Use Cases
-
-### Development Workflow
-
- Quickly spin up cloud sandboxes for testing
- Manage multiple sandboxes across different regions
- Integrate with CI/CD pipelines
-
-### Team Collaboration
-
- Share sandbox configurations and access
- Standardize development environments
- Quick onboarding for new team members
-
-### Automation
-
- Script sandbox provisioning and management
- Integrate with deployment workflows
- Automate environment setup
-
-## Next Steps
-
- [Install the CLI](/libraries/cua-cli/installation)
- [Learn about available commands](/libraries/cua-cli/commands)
- [Get started with the quickstart guide](/get-started/quickstart#cli-quickstart)
--- a/docs/content/docs/libraries/cua-cli/installation.mdx
+++ b/docs/content/docs/libraries/cua-cli/installation.mdx
@@ -1,130 +0,0 @@
---
-title: Installation
-description: Install the CUA CLI on your system
---
-
-import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
-import { Callout } from 'fumadocs-ui/components/callout';
-
-## Quick Install
-
-The fastest way to install the CUA CLI is using our installation scripts:
-
-<Tabs items={['macOS / Linux', 'Windows']}>
-  <Tab value="macOS / Linux">```bash curl -LsSf https://cua.ai/cli/install.sh | sh ```</Tab>
-  <Tab value="Windows">
-    ```powershell powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
-    ```
-  </Tab>
-</Tabs>
-
-These scripts will automatically:
-
-1. Install [Bun](https://bun.sh) (a fast JavaScript runtime)
-2. Install the CUA CLI via `bun add -g @trycua/cli`
-
-<Callout type="info">
-  The installation scripts will automatically detect your system and install the appropriate binary
-  to your PATH.
-</Callout>
-
-## Alternative: Install with Bun
-
-You can also install the CLI directly using Bun:
-
-```bash
-# Install Bun if you don't have it
-curl -fsSL https://bun.sh/install | bash
-
-# Install CUA CLI
-bun add -g @trycua/cli
-```
-
-<Callout type="info">
-  Using Bun provides faster installation and better performance compared to npm. If you don't have
-  Bun installed, the first command will install it for you.
-</Callout>
-
-## Verify Installation
-
-After installation, verify the CLI is working:
-
-```bash
-cua --help
-```
-
-You should see the CLI help output with available commands.
-
-## First Time Setup
-
-After installation, you'll need to authenticate with your CUA account:
-
-```bash
-# Login with browser-based OAuth flow
-cua auth login
-
-# Or provide your API key directly
-cua auth login --api-key sk-your-api-key-here
-```
-
-## Updating
-
-To update to the latest version:
-
-<Tabs items={['Script Install', 'npm Install']}>
-  <Tab value="Script Install">
-    Re-run the installation script: ```bash # macOS/Linux curl -LsSf https://cua.ai/cli/install.sh |
-    sh # Windows powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
-    ```
-  </Tab>
-  <Tab value="npm Install">```bash npm update -g @trycua/cli ```</Tab>
-</Tabs>
-
-## Uninstalling
-
-<Tabs items={['Script Install', 'npm Install']}>
-  <Tab value="Script Install">
-    Remove the binary from your PATH: ```bash # macOS/Linux rm $(which cua) # Windows # Remove from
-    your PATH or delete the executable ```
-  </Tab>
-  <Tab value="npm Install">```bash npm uninstall -g @trycua/cli ```</Tab>
-</Tabs>
-
-## Troubleshooting
-
-### Command Not Found
-
-If you get a "command not found" error after installation:
-
-1. **Check your PATH**: Make sure the installation directory is in your PATH
-2. **Restart your terminal**: Close and reopen your terminal/command prompt
-3. **Manual PATH setup**: Add the installation directory to your PATH manually
-
-### Permission Issues
-
-If you encounter permission issues during installation:
-
-<Tabs items={['macOS / Linux', 'Windows']}>
-  <Tab value="macOS / Linux">
-    Try running with sudo (not recommended for the curl method): ```bash # If using npm sudo npm
-    install -g @trycua/cli ```
-  </Tab>
-  <Tab value="Windows">
-    Run PowerShell as Administrator: ```powershell # Right-click PowerShell and "Run as
-    Administrator" powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
-    ```
-  </Tab>
-</Tabs>
-
-### Network Issues
-
-If the installation script fails due to network issues:
-
-1. **Check your internet connection**
-2. **Try the npm installation method instead**
-3. **Check if your firewall is blocking the download**
-
-## Next Steps
-
- [Learn about CLI commands](/libraries/cua-cli/commands)
- [Follow the quickstart guide](/get-started/quickstart#cli-quickstart)
--- a/docs/content/docs/libraries/cua-cli/meta.json
+++ b/docs/content/docs/libraries/cua-cli/meta.json
@@ -1,5 +0,0 @@
-{
-  "title": "CLI",
-  "description": "Command-line interface for CUA",
-  "pages": ["index", "installation", "commands"]
-}
--- a/docs/content/docs/libraries/mcp-server/index.mdx
+++ b/docs/content/docs/libraries/mcp-server/index.mdx
@@ -1,27 +0,0 @@
---
-title: MCP Server
-description: Reference for the current version of the MCP Server library.
-pypi: cua-mcp-server
-github:
-  - https://github.com/trycua/cua/tree/main/libs/python/mcp-server
---
-
-**cua-mcp-server** is a MCP server for the Computer-Use Agent (CUA), allowing you to run CUA through Claude Desktop or other MCP clients.
-
-## Features
-
- **Multi-Client Support**: Concurrent sessions with automatic resource management
- **Progress Reporting**: Real-time progress updates during task execution
- **Error Handling**: Robust error recovery with screenshot capture
- **Concurrent Execution**: Run multiple tasks in parallel for improved performance
- **Session Management**: Automatic cleanup and resource pooling
- **LiteLLM Integration**: Support for multiple model providers
- **VM Safety**: Default VM execution with optional host system control
-
-## Quick Start
-
-1. **Install**: `pip install cua-mcp-server`
-2. **Configure**: Add to your MCP client configuration
-3. **Use**: Ask Claude to perform computer tasks
-
-See the [Installation](/docs/libraries/mcp-server/installation) guide for detailed setup instructions.
--- a/docs/content/docs/libraries/som/configuration.mdx
+++ b/docs/content/docs/libraries/som/configuration.mdx
@@ -1,78 +0,0 @@
---
-title: Configuration
---
-
-### Detection Parameters
-
-#### Box Threshold (0.3)
-
-Controls the confidence threshold for accepting detections:
-
-<img
-  src="/docs/img/som_box_threshold.png"
-  alt="Illustration of confidence thresholds in object detection, with a high-confidence detection accepted and a low-confidence detection rejected."
-  width="500px"
-/>
- Higher values (0.3) yield more precise but fewer detections - Lower values (0.01) catch more
-potential icons but increase false positives - Default is 0.3 for optimal precision/recall balance
-
-#### IOU Threshold (0.1)
-
-Controls how overlapping detections are merged:
-
-<img
-  src="/docs/img/som_iou_threshold.png"
-  alt="Diagram showing Intersection over Union (IOU) with low overlap between two boxes kept separate and high overlap leading to merging."
-  width="500px"
-/>
- Lower values (0.1) more aggressively remove overlapping boxes - Higher values (0.5) allow more
-overlapping detections - Default is 0.1 to handle densely packed UI elements
-
-### OCR Configuration
-
- **Engine**: EasyOCR
-  - Primary choice for all platforms
-  - Fast initialization and processing
-  - Built-in English language support
-  - GPU acceleration when available
-
- **Settings**:
-  - Timeout: 5 seconds
-  - Confidence threshold: 0.5
-  - Paragraph mode: Disabled
-  - Language: English only
-
-## Performance
-
-### Hardware Acceleration
-
-#### MPS (Metal Performance Shaders)
-
- Multi-scale detection (640px, 1280px, 1920px)
- Test-time augmentation enabled
- Half-precision (FP16)
- Average detection time: ~0.4s
- Best for production use when available
-
-#### CPU
-
- Single-scale detection (1280px)
- Full-precision (FP32)
- Average detection time: ~1.3s
- Reliable fallback option
-
-### Example Output Structure
-
-```
-examples/output/
-├── {timestamp}_no_ocr/
-│   ├── annotated_images/
-│   │   └── screenshot_analyzed.png
-│   ├── screen_details.txt
-│   └── summary.json
-└── {timestamp}_ocr/
-    ├── annotated_images/
-    │   └── screenshot_analyzed.png
-    ├── screen_details.txt
-    └── summary.json
-```
--- a/docs/content/docs/libraries/som/index.mdx
+++ b/docs/content/docs/libraries/som/index.mdx
@@ -1,66 +0,0 @@
---
-title: Set-of-Mark
-description: Reference for the current version of the Set-of-Mark library.
-pypi: cua-som
-github:
-  - https://github.com/trycua/cua/tree/main/libs/python/som
---
-
-<Callout>
-  A corresponding{' '}
-  <a href="https://github.com/trycua/cua/blob/main/examples/som_examples.py" target="_blank">
-    Python example
-  </a>{' '}
-  is available for this documentation.
-</Callout>
-
-## Overview
-
-The SOM library provides visual element detection and interaction capabilities. It is based on the [Set-of-Mark](https://arxiv.org/abs/2310.11441) research paper and the [OmniParser](https://github.com/microsoft/OmniParser) model.
-
-## API Documentation
-
-### OmniParser Class
-
-```python
-class OmniParser:
-    def __init__(self, device: str = "auto"):
-        """Initialize the parser with automatic device detection"""
-
-    def parse(
-        self,
-        image: PIL.Image,
-        box_threshold: float = 0.3,
-        iou_threshold: float = 0.1,
-        use_ocr: bool = True,
-        ocr_engine: str = "easyocr"
-    ) -> ParseResult:
-        """Parse UI elements from an image"""
-```
-
-### ParseResult Object
-
-```python
-@dataclass
-class ParseResult:
-    elements: List[UIElement]      # Detected elements
-    visualized_image: PIL.Image    # Annotated image
-    processing_time: float         # Time in seconds
-
-    def to_dict(self) -> dict:
-        """Convert to JSON-serializable dictionary"""
-
-    def filter_by_type(self, elem_type: str) -> List[UIElement]:
-        """Filter elements by type ('icon' or 'text')"""
-```
-
-### UIElement
-
-```python
-class UIElement(BaseModel):
-    id: Optional[int] = Field(None)     # Element ID (1-indexed)
-    type: Literal["icon", "text"]       # Element type
-    bbox: BoundingBox                   # Bounding box coordinates { x1, y1, x2, y2 }
-    interactivity: bool = Field(default=False)   # Whether the element is interactive
-    confidence: float = Field(default=1.0)       # Detection confidence
-```
--- a/docs/content/docs/macos-vm-cli-playbook/lume/cli-reference.mdx
+++ b/docs/content/docs/macos-vm-cli-playbook/lume/cli-reference.mdx
--- a/docs/content/docs/macos-vm-cli-playbook/lume/faq.md
+++ b/docs/content/docs/macos-vm-cli-playbook/lume/faq.md
--- a/docs/content/docs/macos-vm-cli-playbook/lume/http-api.mdx
+++ b/docs/content/docs/macos-vm-cli-playbook/lume/http-api.mdx
--- a/docs/content/docs/macos-vm-cli-playbook/lume/index.mdx
+++ b/docs/content/docs/macos-vm-cli-playbook/lume/index.mdx
--- a/docs/content/docs/macos-vm-cli-playbook/lume/installation.mdx
+++ b/docs/content/docs/macos-vm-cli-playbook/lume/installation.mdx
--- a/docs/content/docs/macos-vm-cli-playbook/lume/meta.json
+++ b/docs/content/docs/macos-vm-cli-playbook/lume/meta.json
--- a/docs/content/docs/macos-vm-cli-playbook/lume/prebuilt-images.mdx
+++ b/docs/content/docs/macos-vm-cli-playbook/lume/prebuilt-images.mdx
--- a/docs/content/docs/macos-vm-cli-playbook/lumier/building-lumier.mdx
+++ b/docs/content/docs/macos-vm-cli-playbook/lumier/building-lumier.mdx
--- a/docs/content/docs/macos-vm-cli-playbook/lumier/docker-compose.mdx
+++ b/docs/content/docs/macos-vm-cli-playbook/lumier/docker-compose.mdx
--- a/docs/content/docs/macos-vm-cli-playbook/lumier/docker.mdx
+++ b/docs/content/docs/macos-vm-cli-playbook/lumier/docker.mdx
--- a/docs/content/docs/macos-vm-cli-playbook/lumier/index.mdx
+++ b/docs/content/docs/macos-vm-cli-playbook/lumier/index.mdx
--- a/docs/content/docs/macos-vm-cli-playbook/lumier/installation.mdx
+++ b/docs/content/docs/macos-vm-cli-playbook/lumier/installation.mdx
--- a/docs/content/docs/macos-vm-cli-playbook/lumier/meta.json
+++ b/docs/content/docs/macos-vm-cli-playbook/lumier/meta.json
--- a/docs/content/docs/macos-vm-cli-playbook/meta.json
+++ b/docs/content/docs/macos-vm-cli-playbook/meta.json
@@ -0,0 +1,5 @@
+{
+  "title": "macOS VM CLI",
+  "description": "CLI tools for macOS virtualization",
+  "pages": ["lume", "lumier"]
+}
--- a/docs/content/docs/meta.json
+++ b/docs/content/docs/meta.json
@@ -10,9 +10,11 @@
    "...example-usecases",
    "---[BookCopy]Computer Playbook---",
    "...computer-sdk",
-    "---[BookCopy]Agent Playbook---",
+    "---[Bot]Agent Playbook---",
    "...agent-sdk",
-    "---[CodeXml]API Reference---",
-    "...libraries"
+    "---[Terminal]Cloud CLI Playbook---",
+    "...cli-playbook",
+    "---[Terminal]macOS VM CLI Playbook---",
+    "...macos-vm-cli-playbook"
  ]
 }
--- a/docs/package.json
+++ b/docs/package.json
@@ -10,11 +10,11 @@
  },
  "dependencies": {
    "fumadocs-core": "16.0.8",
-    "fumadocs-mdx": "13.0.5",
+    "fumadocs-mdx": "13.0.8",
    "fumadocs-ui": "16.0.8",
    "lucide-react": "^0.525.0",
-    "mermaid": "^11.8.1",
-    "next": "16.0.1",
+    "mermaid": "^11.12.1",
+    "next": "16.0.7",
    "next-themes": "^0.4.6",
    "posthog-js": "^1.276.0",
    "react": "^19.2.0",
@@ -42,6 +42,9 @@
      "@tailwindcss/oxide",
      "esbuild",
      "sharp"
-    ]
+    ],
+    "overrides": {
+      "js-yaml@>=4.0.0 <4.1.1": ">=4.1.1"
+    }
  }
 }
--- a/docs/pnpm-lock.yaml
+++ b/docs/pnpm-lock.yaml
@@ -4,28 +4,31 @@ settings:
  autoInstallPeers: true
  excludeLinksFromLockfile: false

+overrides:
+  js-yaml@>=4.0.0 <4.1.1: '>=4.1.1'
+
 importers:

  .:
    dependencies:
      fumadocs-core:
        specifier: 16.0.8
-        version: 16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
+        version: 16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
      fumadocs-mdx:
-        specifier: 13.0.5
-        version: 13.0.5(fumadocs-core@16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(next@16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react@19.2.0)
+        specifier: 13.0.8
+        version: 13.0.8(fumadocs-core@16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(next@16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react@19.2.0)
      fumadocs-ui:
        specifier: 16.0.8
-        version: 16.0.8(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)(tailwindcss@4.1.10)
+        version: 16.0.8(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)(tailwindcss@4.1.10)
      lucide-react:
        specifier: ^0.525.0
        version: 0.525.0(react@19.2.0)
      mermaid:
-        specifier: ^11.8.1
-        version: 11.8.1
+        specifier: ^11.12.1
+        version: 11.12.1
      next:
-        specifier: 16.0.1
-        version: 16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
+        specifier: 16.0.7
+        version: 16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
      next-themes:
        specifier: ^0.4.6
        version: 0.4.6(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
@@ -98,9 +101,6 @@ packages:
  '@antfu/install-pkg@1.1.0':
    resolution: {integrity: sha512-MGQsmw10ZyI+EJo45CdSER4zEb+p31LpDAFp2Z3gkSd1yqVZGi0Ebx++YTEMonJy4oChEMLsxZ64j8FH6sSqtQ==}

-  '@antfu/utils@8.1.1':
-    resolution: {integrity: sha512-Mex9nXf9vR6AhcXmMrlz/HVgYYZpVGJ6YlPgwl7UnaFpnshXs6EK/oa5Gpf3CzENMjkvEx2tQtntGnb7UtSTOQ==}
-
  '@braintree/sanitize-url@7.1.1':
    resolution: {integrity: sha512-i1L7noDNxtFyL5DmZafWy1wRVhGehQmzZaz1HiN5e7iylJMSZR7ekOV7NsIqa5qBldlLrsKv4HbgFUVlQrz8Mw==}

@@ -299,8 +299,8 @@ packages:
  '@iconify/types@2.0.0':
    resolution: {integrity: sha512-+wluvCrRhXrhyOmRDJ3q8mux9JkKy5SJ/v8ol2tu4FVjyYvtEzkc/3pK15ET6RKg4b4w4BmTk1+gsCUhf21Ykg==}

-  '@iconify/utils@2.3.0':
-    resolution: {integrity: sha512-GmQ78prtwYW6EtzXRU1rY+KwOKfz32PD7iJh6Iyqw68GiKuoZ2A6pRtzWONz5VQJbp50mEjXh/7NkumtrAgRKA==}
+  '@iconify/utils@3.1.0':
+    resolution: {integrity: sha512-Zlzem1ZXhI1iHeeERabLNzBHdOa4VhQbqAcOQaMKuTuyZCpwKbC2R4Dd0Zo3g9EAc+Y4fiarO8HIHRAth7+skw==}

  '@img/colour@1.0.0':
    resolution: {integrity: sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw==}
@@ -464,56 +464,56 @@ packages:
  '@mdx-js/mdx@3.1.1':
    resolution: {integrity: sha512-f6ZO2ifpwAQIpzGWaBQT2TXxPv6z3RBzQKpVftEWN78Vl/YweF1uwussDx8ECAXVtr3Rs89fKyG9YlzUs9DyGQ==}

-  '@mermaid-js/parser@0.6.1':
-    resolution: {integrity: sha512-lCQNpV8R4lgsGcjX5667UiuDLk2micCtjtxR1YKbBXvN5w2v+FeLYoHrTSSrjwXdMcDYvE4ZBPvKT31dfeSmmA==}
+  '@mermaid-js/parser@0.6.3':
+    resolution: {integrity: sha512-lnjOhe7zyHjc+If7yT4zoedx2vo4sHaTmtkl1+or8BRTnCtDmcTpAjpzDSfCZrshM5bCoz0GyidzadJAH1xobA==}

-  '@next/env@16.0.1':
-    resolution: {integrity: sha512-LFvlK0TG2L3fEOX77OC35KowL8D7DlFF45C0OvKMC4hy8c/md1RC4UMNDlUGJqfCoCS2VWrZ4dSE6OjaX5+8mw==}
+  '@next/env@16.0.7':
+    resolution: {integrity: sha512-gpaNgUh5nftFKRkRQGnVi5dpcYSKGcZZkQffZ172OrG/XkrnS7UBTQ648YY+8ME92cC4IojpI2LqTC8sTDhAaw==}

-  '@next/swc-darwin-arm64@16.0.1':
-    resolution: {integrity: sha512-R0YxRp6/4W7yG1nKbfu41bp3d96a0EalonQXiMe+1H9GTHfKxGNCGFNWUho18avRBPsO8T3RmdWuzmfurlQPbg==}
+  '@next/swc-darwin-arm64@16.0.7':
+    resolution: {integrity: sha512-LlDtCYOEj/rfSnEn/Idi+j1QKHxY9BJFmxx7108A6D8K0SB+bNgfYQATPk/4LqOl4C0Wo3LACg2ie6s7xqMpJg==}
    engines: {node: '>= 10'}
    cpu: [arm64]
    os: [darwin]

-  '@next/swc-darwin-x64@16.0.1':
-    resolution: {integrity: sha512-kETZBocRux3xITiZtOtVoVvXyQLB7VBxN7L6EPqgI5paZiUlnsgYv4q8diTNYeHmF9EiehydOBo20lTttCbHAg==}
+  '@next/swc-darwin-x64@16.0.7':
+    resolution: {integrity: sha512-rtZ7BhnVvO1ICf3QzfW9H3aPz7GhBrnSIMZyr4Qy6boXF0b5E3QLs+cvJmg3PsTCG2M1PBoC+DANUi4wCOKXpA==}
    engines: {node: '>= 10'}
    cpu: [x64]
    os: [darwin]

-  '@next/swc-linux-arm64-gnu@16.0.1':
-    resolution: {integrity: sha512-hWg3BtsxQuSKhfe0LunJoqxjO4NEpBmKkE+P2Sroos7yB//OOX3jD5ISP2wv8QdUwtRehMdwYz6VB50mY6hqAg==}
+  '@next/swc-linux-arm64-gnu@16.0.7':
+    resolution: {integrity: sha512-mloD5WcPIeIeeZqAIP5c2kdaTa6StwP4/2EGy1mUw8HiexSHGK/jcM7lFuS3u3i2zn+xH9+wXJs6njO7VrAqww==}
    engines: {node: '>= 10'}
    cpu: [arm64]
    os: [linux]

-  '@next/swc-linux-arm64-musl@16.0.1':
-    resolution: {integrity: sha512-UPnOvYg+fjAhP3b1iQStcYPWeBFRLrugEyK/lDKGk7kLNua8t5/DvDbAEFotfV1YfcOY6bru76qN9qnjLoyHCQ==}
+  '@next/swc-linux-arm64-musl@16.0.7':
+    resolution: {integrity: sha512-+ksWNrZrthisXuo9gd1XnjHRowCbMtl/YgMpbRvFeDEqEBd523YHPWpBuDjomod88U8Xliw5DHhekBC3EOOd9g==}
    engines: {node: '>= 10'}
    cpu: [arm64]
    os: [linux]

-  '@next/swc-linux-x64-gnu@16.0.1':
-    resolution: {integrity: sha512-Et81SdWkcRqAJziIgFtsFyJizHoWne4fzJkvjd6V4wEkWTB4MX6J0uByUb0peiJQ4WeAt6GGmMszE5KrXK6WKg==}
+  '@next/swc-linux-x64-gnu@16.0.7':
+    resolution: {integrity: sha512-4WtJU5cRDxpEE44Ana2Xro1284hnyVpBb62lIpU5k85D8xXxatT+rXxBgPkc7C1XwkZMWpK5rXLXTh9PFipWsA==}
    engines: {node: '>= 10'}
    cpu: [x64]
    os: [linux]

-  '@next/swc-linux-x64-musl@16.0.1':
-    resolution: {integrity: sha512-qBbgYEBRrC1egcG03FZaVfVxrJm8wBl7vr8UFKplnxNRprctdP26xEv9nJ07Ggq4y1adwa0nz2mz83CELY7N6Q==}
+  '@next/swc-linux-x64-musl@16.0.7':
+    resolution: {integrity: sha512-HYlhqIP6kBPXalW2dbMTSuB4+8fe+j9juyxwfMwCe9kQPPeiyFn7NMjNfoFOfJ2eXkeQsoUGXg+O2SE3m4Qg2w==}
    engines: {node: '>= 10'}
    cpu: [x64]
    os: [linux]

-  '@next/swc-win32-arm64-msvc@16.0.1':
-    resolution: {integrity: sha512-cPuBjYP6I699/RdbHJonb3BiRNEDm5CKEBuJ6SD8k3oLam2fDRMKAvmrli4QMDgT2ixyRJ0+DTkiODbIQhRkeQ==}
+  '@next/swc-win32-arm64-msvc@16.0.7':
+    resolution: {integrity: sha512-EviG+43iOoBRZg9deGauXExjRphhuYmIOJ12b9sAPy0eQ6iwcPxfED2asb/s2/yiLYOdm37kPaiZu8uXSYPs0Q==}
    engines: {node: '>= 10'}
    cpu: [arm64]
    os: [win32]

-  '@next/swc-win32-x64-msvc@16.0.1':
-    resolution: {integrity: sha512-XeEUJsE4JYtfrXe/LaJn3z1pD19fK0Q6Er8Qoufi+HqvdO4LEPyCxLUt4rxA+4RfYo6S9gMlmzCMU2F+AatFqQ==}
+  '@next/swc-win32-x64-msvc@16.0.7':
+    resolution: {integrity: sha512-gniPjy55zp5Eg0896qSrf3yB1dw4F/3s8VK1ephdsZZ129j2n6e1WqCbE2YgcKhW9hPB9TVZENugquWJD5x0ug==}
    engines: {node: '>= 10'}
    cpu: [x64]
    os: [win32]
@@ -1239,9 +1239,6 @@ packages:
  confbox@0.1.8:
    resolution: {integrity: sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w==}

-  confbox@0.2.2:
-    resolution: {integrity: sha512-1NB+BKqhtNipMsov4xI/NnhCKp9XG9NamYp5PVm9klAT0fsrNPjaFICsCFhNhwZJKNh7zB/3q8qXz0E9oaMNtQ==}
-
  core-js@3.46.0:
    resolution: {integrity: sha512-vDMm9B0xnqqZ8uSBpZ8sNtRtOdmfShrvT6h2TuQGLs0Is+cR0DYbj/KWP6ALVNbWPpqA/qPLoOuppJN07humpA==}

@@ -1412,11 +1409,11 @@ packages:
    resolution: {integrity: sha512-e1U46jVP+w7Iut8Jt8ri1YsPOvFpg46k+K8TpCb0P+zjCkjkPnV7WzfDJzMHy1LnA+wj5pLT1wjO901gLXeEhA==}
    engines: {node: '>=12'}

-  dagre-d3-es@7.0.11:
-    resolution: {integrity: sha512-tvlJLyQf834SylNKax8Wkzco/1ias1OPw8DcUMDE7oUIoSEW25riQVuiu/0OWEFqT0cxHT3Pa9/D82Jr47IONw==}
+  dagre-d3-es@7.0.13:
+    resolution: {integrity: sha512-efEhnxpSuwpYOKRm/L5KbqoZmNNukHa/Flty4Wp62JRvgH2ojwVgPgdYyr4twpieZnyRDdIH7PY2mopX26+j2Q==}

-  dayjs@1.11.13:
-    resolution: {integrity: sha512-oaMBel6gjolK862uaPQOVTA7q3TZhuSvuMQAAglQDOWYO9A91IrAOUJEyKVlqJlHE0vq5p5UXxzdPfMH/x6xNg==}
+  dayjs@1.11.19:
+    resolution: {integrity: sha512-t5EcLVS6QPBNqM2z8fakk/NKel+Xzshgt8FFKAn+qwlD1pzZWxh0nVCrvFK7ZDb6XucZeF9z8C7CBWTRIVApAw==}

  debug@4.4.1:
    resolution: {integrity: sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==}
@@ -1497,9 +1494,6 @@ packages:
  estree-walker@3.0.3:
    resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==}

-  exsolve@1.0.7:
-    resolution: {integrity: sha512-VO5fQUzZtI6C+vx4w/4BWJpg3s/5l+6pRQEHzFRM8WFi4XffSP1Z+4qi7GbjWbvRQEbdIco5mIMq+zX4rPuLrw==}
-
  extend@3.0.2:
    resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==}

@@ -1553,8 +1547,8 @@ packages:
      waku:
        optional: true

-  fumadocs-mdx@13.0.5:
-    resolution: {integrity: sha512-ERhPxQzoTwEdtuel5dN5OmUItOhGGXTLR1uCjiGPABYeVkc57vAexyTRQSYZMxGlcfjkJaYqt3qY1p5j7i4g7A==}
+  fumadocs-mdx@13.0.8:
+    resolution: {integrity: sha512-UbUwH0iGvYbytnxhmfd7tWJKFK8L0mrbTAmrQYnpg6Wi/h8afNMJmbHBOzVcaEWJKeFipZ1CGDAsNA2fztwXNg==}
    hasBin: true
    peerDependencies:
      '@fumadocs/mdx-remote': ^1.4.0
@@ -1595,10 +1589,6 @@ packages:
  github-slugger@2.0.0:
    resolution: {integrity: sha512-IaOQ9puYtjrkq7Y0Ygl9KDZnrf/aiUJYUpVf89y8kyaxbRG7Y1SrX/jaumrv81vc61+kiMempujsM3Yw7w5qcw==}

-  globals@15.15.0:
-    resolution: {integrity: sha512-7ACyT3wmyp3I61S4fG682L0VA2RGD9otkqGJIwNUMF1SWUombIIk+af1unuDYgMm082aHYwD+mzJvv9Iu8dsgg==}
-    engines: {node: '>=18'}
-
  graceful-fs@4.2.11:
    resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}

@@ -1662,8 +1652,8 @@ packages:
    resolution: {integrity: sha512-rg9zJN+G4n2nfJl5MW3BMygZX56zKPNVEYYqq7adpmMh4Jn2QNEwhvQlFy6jPVdcod7txZtKHWnyZiA3a0zP7A==}
    hasBin: true

-  js-yaml@4.1.0:
-    resolution: {integrity: sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==}
+  js-yaml@4.1.1:
+    resolution: {integrity: sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==}
    hasBin: true

  katex@0.16.22:
@@ -1673,9 +1663,6 @@ packages:
  khroma@2.1.0:
    resolution: {integrity: sha512-Ls993zuzfayK269Svk9hzpeGUKob/sIgZzyHYdjQoAdQetRKpOLj+k/QQQ/6Qi0Yz65mlROrfd+Ev+1+7dz9Kw==}

-  kolorist@1.8.0:
-    resolution: {integrity: sha512-Y+60/zizpJ3HRH8DCss+q95yr6145JXZo46OTpFvDZWLfRCE4qChOyk1b26nMaNpfHHgxagk9dXT5OP0Tfe+dQ==}
-
  langium@3.3.1:
    resolution: {integrity: sha512-QJv/h939gDpvT+9SiLVlY7tZC3xB2qK57v0J04Sh9wpMb6MP1q8gB21L3WIo8T5P1MSMg3Ep14L7KkDCFG3y4w==}
    engines: {node: '>=16.0.0'}
@@ -1750,10 +1737,6 @@ packages:
    resolution: {integrity: sha512-xi6IyHML+c9+Q3W0S4fCQJOym42pyurFiJUHEcEyHS0CeKzia4yZDEsLlqOFykxOdHpNy0NmvVO31vcSqAxJCg==}
    engines: {node: '>= 12.0.0'}

-  local-pkg@1.1.1:
-    resolution: {integrity: sha512-WunYko2W1NcdfAFpuLUoucsgULmgDBRkdxHxWQ7mK0cQqwPiy8E1enjuRBrhLtZkB5iScJ1XIPdhVEFK8aOLSg==}
-    engines: {node: '>=14'}
-
  lodash-es@4.17.21:
    resolution: {integrity: sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==}

@@ -1782,9 +1765,9 @@ packages:
  markdown-table@3.0.4:
    resolution: {integrity: sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==}

-  marked@15.0.12:
-    resolution: {integrity: sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==}
-    engines: {node: '>= 18'}
+  marked@16.4.2:
+    resolution: {integrity: sha512-TI3V8YYWvkVf3KJe1dRkpnjs68JUPyEa5vjKrp1XEEJUAOaQc+Qj+L1qWbPd0SJuAdQkFU0h73sXXqwDYxsiDA==}
+    engines: {node: '>= 20'}
    hasBin: true

  mdast-util-find-and-replace@3.0.2:
@@ -1835,8 +1818,8 @@ packages:
  mdast-util-to-string@4.0.0:
    resolution: {integrity: sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==}

-  mermaid@11.8.1:
-    resolution: {integrity: sha512-VSXJLqP1Sqw5sGr273mhvpPRhXwE6NlmMSqBZQw+yZJoAJkOIPPn/uT3teeCBx60Fkt5zEI3FrH2eVT0jXRDzw==}
+  mermaid@11.12.1:
+    resolution: {integrity: sha512-UlIZrRariB11TY1RtTgUWp65tphtBv4CSq7vyS2ZZ2TgoMjs2nloq+wFqxiwcxlhHUvs7DPGgMjs2aeQxz5h9g==}

  micromark-core-commonmark@2.0.3:
    resolution: {integrity: sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg==}
@@ -1956,8 +1939,8 @@ packages:
    engines: {node: '>=10'}
    hasBin: true

-  mlly@1.7.4:
-    resolution: {integrity: sha512-qmdSIPC4bDJXgZTCR7XosJiNKySV7O215tsPtDN9iEO/7q/76b/ijtgRu/+epFXSJhijtTCCGp3DWS549P3xKw==}
+  mlly@1.8.0:
+    resolution: {integrity: sha512-l8D9ODSRWLe2KHJSifWGwBqpTZXIXTeo8mlKjY+E2HAakaTeNpqAyBZ8GSqLzHgw4XmHmC8whvpjJNMbFZN7/g==}

  ms@2.1.3:
    resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
@@ -1977,8 +1960,8 @@ packages:
      react: ^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc
      react-dom: ^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc

-  next@16.0.1:
-    resolution: {integrity: sha512-e9RLSssZwd35p7/vOa+hoDFggUZIUbZhIUSLZuETCwrCVvxOs87NamoUzT+vbcNAL8Ld9GobBnWOA6SbV/arOw==}
+  next@16.0.7:
+    resolution: {integrity: sha512-3mBRJyPxT4LOxAJI6IsXeFtKfiJUbjCLgvXO02fV8Wy/lIhPvP94Fe7dGhUgHXcQy4sSuYwQNcOLhIfOm0rL0A==}
    engines: {node: '>=20.9.0'}
    hasBin: true
    peerDependencies:
@@ -2008,8 +1991,8 @@ packages:
  oniguruma-to-es@4.3.3:
    resolution: {integrity: sha512-rPiZhzC3wXwE59YQMRDodUwwT9FZ9nNBwQQfsd1wfdtlKEyCdRV0avrTcSZ5xlIvGRVPd/cx6ZN45ECmS39xvg==}

-  package-manager-detector@1.3.0:
-    resolution: {integrity: sha512-ZsEbbZORsyHuO00lY1kV3/t72yp6Ysay6Pd17ZAlNGuGwmWDLCJxFpRs0IzfXfj1o4icJOkUEioexFHzyPurSQ==}
+  package-manager-detector@1.6.0:
+    resolution: {integrity: sha512-61A5ThoTiDG/C8s8UMZwSorAGwMJ0ERVGj2OjoW5pAalsNOg15+iQiPzrLJ4jhZ1HJzmC2PIHT2oEiH3R5fzNA==}

  parse-entities@4.0.2:
    resolution: {integrity: sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==}
@@ -2033,9 +2016,6 @@ packages:
  pkg-types@1.3.1:
    resolution: {integrity: sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ==}

-  pkg-types@2.2.0:
-    resolution: {integrity: sha512-2SM/GZGAEkPp3KWORxQZns4M+WSeXbC2HEvmOIJe3Cmiv6ieAJvdVhDldtHqM5J1Y7MrR1XhkBT/rMlhh9FdqQ==}
-
  points-on-curve@0.2.0:
    resolution: {integrity: sha512-0mYKnYYe9ZcqMCWhUjItv/oHjvgEsfKvnUTg8sAtnHr3GVy7rGkXCb6d5cSyqrWqL4k81b9CPg3urd+T7aop3A==}

@@ -2076,9 +2056,6 @@ packages:
  property-information@7.1.0:
    resolution: {integrity: sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==}

-  quansync@0.2.10:
-    resolution: {integrity: sha512-t41VRkMYbkHyCYmOvx/6URnN80H7k4X0lLdBMGsz+maAwrJQYB1djpV6vHrQIBE0WBSGqhtEHrK9U3DWWH8v7A==}
-
  react-dom@19.2.0:
    resolution: {integrity: sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==}
    peerDependencies:
@@ -2258,8 +2235,9 @@ packages:
    resolution: {integrity: sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==}
    engines: {node: '>=18'}

-  tinyexec@1.0.1:
-    resolution: {integrity: sha512-5uC6DDlmeqiOwCPmK9jMSdOuZTh8bU39Ys6yidB+UTt5hfZUPGAypSgFRiEp+jbi9qH40BLDvy85jIU88wKSqw==}
+  tinyexec@1.0.2:
+    resolution: {integrity: sha512-W/KYk+NFhkmsYpuHq5JykngiOCnxeVL8v8dFnqxSD8qEEdRfXk1SDM6JzNqcERbcGYj9tMrDQBYV9cjgnunFIg==}
+    engines: {node: '>=18'}

  tinyglobby@0.2.15:
    resolution: {integrity: sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==}
@@ -2390,10 +2368,8 @@ snapshots:

  '@antfu/install-pkg@1.1.0':
    dependencies:
-      package-manager-detector: 1.3.0
-      tinyexec: 1.0.1
-
-  '@antfu/utils@8.1.1': {}
+      package-manager-detector: 1.6.0
+      tinyexec: 1.0.2

  '@braintree/sanitize-url@7.1.1': {}

@@ -2520,18 +2496,11 @@ snapshots:

  '@iconify/types@2.0.0': {}

-  '@iconify/utils@2.3.0':
+  '@iconify/utils@3.1.0':
    dependencies:
      '@antfu/install-pkg': 1.1.0
-      '@antfu/utils': 8.1.1
      '@iconify/types': 2.0.0
-      debug: 4.4.1
-      globals: 15.15.0
-      kolorist: 1.8.0
-      local-pkg: 1.1.1
-      mlly: 1.7.4
-    transitivePeerDependencies:
-      - supports-color
+      mlly: 1.8.0

  '@img/colour@1.0.0':
    optional: true
@@ -2681,34 +2650,34 @@ snapshots:
    transitivePeerDependencies:
      - supports-color

-  '@mermaid-js/parser@0.6.1':
+  '@mermaid-js/parser@0.6.3':
    dependencies:
      langium: 3.3.1

-  '@next/env@16.0.1': {}
+  '@next/env@16.0.7': {}

-  '@next/swc-darwin-arm64@16.0.1':
+  '@next/swc-darwin-arm64@16.0.7':
    optional: true

-  '@next/swc-darwin-x64@16.0.1':
+  '@next/swc-darwin-x64@16.0.7':
    optional: true

-  '@next/swc-linux-arm64-gnu@16.0.1':
+  '@next/swc-linux-arm64-gnu@16.0.7':
    optional: true

-  '@next/swc-linux-arm64-musl@16.0.1':
+  '@next/swc-linux-arm64-musl@16.0.7':
    optional: true

-  '@next/swc-linux-x64-gnu@16.0.1':
+  '@next/swc-linux-x64-gnu@16.0.7':
    optional: true

-  '@next/swc-linux-x64-musl@16.0.1':
+  '@next/swc-linux-x64-musl@16.0.7':
    optional: true

-  '@next/swc-win32-arm64-msvc@16.0.1':
+  '@next/swc-win32-arm64-msvc@16.0.7':
    optional: true

-  '@next/swc-win32-x64-msvc@16.0.1':
+  '@next/swc-win32-x64-msvc@16.0.7':
    optional: true

  '@orama/orama@3.1.16': {}
@@ -3426,8 +3395,6 @@ snapshots:

  confbox@0.1.8: {}

-  confbox@0.2.2: {}
-
  core-js@3.46.0: {}

  cose-base@1.0.3:
@@ -3621,12 +3588,12 @@ snapshots:
      d3-transition: 3.0.1(d3-selection@3.0.0)
      d3-zoom: 3.0.0

-  dagre-d3-es@7.0.11:
+  dagre-d3-es@7.0.13:
    dependencies:
      d3: 7.9.0
      lodash-es: 4.17.21

-  dayjs@1.11.13: {}
+  dayjs@1.11.19: {}

  debug@4.4.1:
    dependencies:
@@ -3744,8 +3711,6 @@ snapshots:
    dependencies:
      '@types/estree': 1.0.8

-  exsolve@1.0.7: {}
-
  extend@3.0.2: {}

  fdir@6.5.0(picomatch@4.0.3):
@@ -3754,7 +3719,7 @@ snapshots:

  fflate@0.4.8: {}

-  fumadocs-core@16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0):
+  fumadocs-core@16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0):
    dependencies:
      '@formatjs/intl-localematcher': 0.6.2
      '@orama/orama': 3.1.16
@@ -3777,39 +3742,39 @@ snapshots:
    optionalDependencies:
      '@types/react': 19.1.8
      lucide-react: 0.525.0(react@19.2.0)
-      next: 16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
+      next: 16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
      react: 19.2.0
      react-dom: 19.2.0(react@19.2.0)
    transitivePeerDependencies:
      - supports-color

-  fumadocs-mdx@13.0.5(fumadocs-core@16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(next@16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react@19.2.0):
+  fumadocs-mdx@13.0.8(fumadocs-core@16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(next@16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react@19.2.0):
    dependencies:
      '@mdx-js/mdx': 3.1.1
      '@standard-schema/spec': 1.0.0
      chokidar: 4.0.3
      esbuild: 0.25.12
      estree-util-value-to-estree: 3.5.0
-      fumadocs-core: 16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
-      js-yaml: 4.1.0
+      fumadocs-core: 16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
+      js-yaml: 4.1.1
      lru-cache: 11.2.2
      mdast-util-to-markdown: 2.1.2
      picocolors: 1.1.1
      picomatch: 4.0.3
      remark-mdx: 3.1.1
-      tinyexec: 1.0.1
+      tinyexec: 1.0.2
      tinyglobby: 0.2.15
      unified: 11.0.5
      unist-util-remove-position: 5.0.0
      unist-util-visit: 5.0.0
      zod: 4.1.12
    optionalDependencies:
-      next: 16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
+      next: 16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
      react: 19.2.0
    transitivePeerDependencies:
      - supports-color

-  fumadocs-ui@16.0.8(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)(tailwindcss@4.1.10):
+  fumadocs-ui@16.0.8(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)(tailwindcss@4.1.10):
    dependencies:
      '@radix-ui/react-accordion': 1.2.12(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
      '@radix-ui/react-collapsible': 1.1.12(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
@@ -3822,7 +3787,7 @@ snapshots:
      '@radix-ui/react-slot': 1.2.4(@types/react@19.1.8)(react@19.2.0)
      '@radix-ui/react-tabs': 1.1.13(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
      class-variance-authority: 0.7.1
-      fumadocs-core: 16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
+      fumadocs-core: 16.0.8(@types/react@19.1.8)(lucide-react@0.525.0(react@19.2.0))(next@16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
      lodash.merge: 4.6.2
      next-themes: 0.4.6(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
      postcss-selector-parser: 7.1.0
@@ -3833,7 +3798,7 @@ snapshots:
      tailwind-merge: 3.3.1
    optionalDependencies:
      '@types/react': 19.1.8
-      next: 16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
+      next: 16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0)
      tailwindcss: 4.1.10
    transitivePeerDependencies:
      - '@mixedbread/sdk'
@@ -3850,8 +3815,6 @@ snapshots:

  github-slugger@2.0.0: {}

-  globals@15.15.0: {}
-
  graceful-fs@4.2.11: {}

  hachure-fill@0.5.2: {}
@@ -3948,7 +3911,7 @@ snapshots:

  jiti@2.4.2: {}

-  js-yaml@4.1.0:
+  js-yaml@4.1.1:
    dependencies:
      argparse: 2.0.1

@@ -3958,8 +3921,6 @@ snapshots:

  khroma@2.1.0: {}

-  kolorist@1.8.0: {}
-
  langium@3.3.1:
    dependencies:
      chevrotain: 11.0.3
@@ -4017,12 +3978,6 @@ snapshots:
      lightningcss-win32-arm64-msvc: 1.30.1
      lightningcss-win32-x64-msvc: 1.30.1

-  local-pkg@1.1.1:
-    dependencies:
-      mlly: 1.7.4
-      pkg-types: 2.2.0
-      quansync: 0.2.10
-
  lodash-es@4.17.21: {}

  lodash.merge@4.6.2: {}
@@ -4043,7 +3998,7 @@ snapshots:

  markdown-table@3.0.4: {}

-  marked@15.0.12: {}
+  marked@16.4.2: {}

  mdast-util-find-and-replace@3.0.2:
    dependencies:
@@ -4208,30 +4163,28 @@ snapshots:
    dependencies:
      '@types/mdast': 4.0.4

-  mermaid@11.8.1:
+  mermaid@11.12.1:
    dependencies:
      '@braintree/sanitize-url': 7.1.1
-      '@iconify/utils': 2.3.0
-      '@mermaid-js/parser': 0.6.1
+      '@iconify/utils': 3.1.0
+      '@mermaid-js/parser': 0.6.3
      '@types/d3': 7.4.3
      cytoscape: 3.32.1
      cytoscape-cose-bilkent: 4.1.0(cytoscape@3.32.1)
      cytoscape-fcose: 2.2.0(cytoscape@3.32.1)
      d3: 7.9.0
      d3-sankey: 0.12.3
-      dagre-d3-es: 7.0.11
-      dayjs: 1.11.13
+      dagre-d3-es: 7.0.13
+      dayjs: 1.11.19
      dompurify: 3.2.6
      katex: 0.16.22
      khroma: 2.1.0
      lodash-es: 4.17.21
-      marked: 15.0.12
+      marked: 16.4.2
      roughjs: 4.6.6
      stylis: 4.3.6
      ts-dedent: 2.2.0
      uuid: 11.1.0
-    transitivePeerDependencies:
-      - supports-color

  micromark-core-commonmark@2.0.3:
    dependencies:
@@ -4505,7 +4458,7 @@ snapshots:

  mkdirp@3.0.1: {}

-  mlly@1.7.4:
+  mlly@1.8.0:
    dependencies:
      acorn: 8.15.0
      pathe: 2.0.3
@@ -4523,9 +4476,9 @@ snapshots:
      react: 19.2.0
      react-dom: 19.2.0(react@19.2.0)

-  next@16.0.1(react-dom@19.2.0(react@19.2.0))(react@19.2.0):
+  next@16.0.7(react-dom@19.2.0(react@19.2.0))(react@19.2.0):
    dependencies:
-      '@next/env': 16.0.1
+      '@next/env': 16.0.7
      '@swc/helpers': 0.5.15
      caniuse-lite: 1.0.30001724
      postcss: 8.4.31
@@ -4533,14 +4486,14 @@ snapshots:
      react-dom: 19.2.0(react@19.2.0)
      styled-jsx: 5.1.6(react@19.2.0)
    optionalDependencies:
-      '@next/swc-darwin-arm64': 16.0.1
-      '@next/swc-darwin-x64': 16.0.1
-      '@next/swc-linux-arm64-gnu': 16.0.1
-      '@next/swc-linux-arm64-musl': 16.0.1
-      '@next/swc-linux-x64-gnu': 16.0.1
-      '@next/swc-linux-x64-musl': 16.0.1
-      '@next/swc-win32-arm64-msvc': 16.0.1
-      '@next/swc-win32-x64-msvc': 16.0.1
+      '@next/swc-darwin-arm64': 16.0.7
+      '@next/swc-darwin-x64': 16.0.7
+      '@next/swc-linux-arm64-gnu': 16.0.7
+      '@next/swc-linux-arm64-musl': 16.0.7
+      '@next/swc-linux-x64-gnu': 16.0.7
+      '@next/swc-linux-x64-musl': 16.0.7
+      '@next/swc-win32-arm64-msvc': 16.0.7
+      '@next/swc-win32-x64-msvc': 16.0.7
      sharp: 0.34.5
    transitivePeerDependencies:
      - '@babel/core'
@@ -4556,7 +4509,7 @@ snapshots:
      regex: 6.0.1
      regex-recursion: 6.0.2

-  package-manager-detector@1.3.0: {}
+  package-manager-detector@1.6.0: {}

  parse-entities@4.0.2:
    dependencies:
@@ -4581,13 +4534,7 @@ snapshots:
  pkg-types@1.3.1:
    dependencies:
      confbox: 0.1.8
-      mlly: 1.7.4
-      pathe: 2.0.3
-
-  pkg-types@2.2.0:
-    dependencies:
-      confbox: 0.2.2
-      exsolve: 1.0.7
+      mlly: 1.8.0
      pathe: 2.0.3

  points-on-curve@0.2.0: {}
@@ -4628,8 +4575,6 @@ snapshots:

  property-information@7.1.0: {}

-  quansync@0.2.10: {}
-
  react-dom@19.2.0(react@19.2.0):
    dependencies:
      react: 19.2.0
@@ -4886,7 +4831,7 @@ snapshots:
      mkdirp: 3.0.1
      yallist: 5.0.0

-  tinyexec@1.0.1: {}
+  tinyexec@1.0.2: {}

  tinyglobby@0.2.15:
    dependencies:
--- a/docs/public/img/bg-dark.jpg
+++ b/docs/public/img/bg-dark.jpg
--- a/docs/public/img/bg-light.jpg
+++ b/docs/public/img/bg-light.jpg
--- a/docs/public/img/hero.png
+++ b/docs/public/img/hero.png
--- a/docs/src/app/(home)/[[...slug]]/page.tsx
+++ b/docs/src/app/(home)/[[...slug]]/page.tsx
@@ -200,7 +200,7 @@ export default async function Page(props: { params: Promise<{ slug?: string[] }>
      <div className="flex flex-row w-full items-start">
        <div className="flex-1">
          <div className="flex flex-row w-full">
-            <DocsTitle>{page.data.title}</DocsTitle>
+            {slug.length > 0 && <DocsTitle>{page.data.title}</DocsTitle>}

            <div className="ml-auto flex items-center gap-2">
              {apiSection && versionItems.length > 1 && (
--- a/docs/src/app/global.css
+++ b/docs/src/app/global.css
@@ -2,6 +2,34 @@
@import 'fumadocs-ui/css/neutral.css';
@import 'fumadocs-ui/css/preset.css';

+/* Custom Sky + Emerald theme */
+@theme {
+  --color-fd-primary: hsl(199, 89%, 48%); /* sky-500 */
+  --color-fd-primary-foreground: hsl(0, 0%, 100%);
+  --color-fd-ring: hsl(199, 89%, 48%); /* sky-500 */
+  --color-fd-muted: hsl(160, 84%, 95%); /* emerald-50 */
+  --color-fd-accent: hsl(152, 76%, 92%); /* emerald-100 */
+  --font-sans: var(--font-geist-sans);
+  --font-mono: var(--font-geist-mono);
+}
+
+.dark {
+  --color-fd-primary: hsl(199, 89%, 48%); /* sky-500 */
+  --color-fd-primary-foreground: hsl(0, 0%, 100%);
+  --color-fd-ring: hsl(199, 89%, 48%); /* sky-500 */
+  --color-fd-muted: hsl(199, 89%, 14%); /* sky-950 */
+  --color-fd-accent: hsl(199, 89%, 20%); /* sky dark */
+}
+
+.dark body {
+  background-image: linear-gradient(
+    rgba(14, 165, 233, 0.1),
+    transparent 20rem,
+    transparent
+  );
+  background-repeat: no-repeat;
+}
+
 /* Fix TOC overflow on production builds */
 #nd-toc {
  overflow-y: auto;
--- a/docs/src/app/layout.tsx
+++ b/docs/src/app/layout.tsx
@@ -1,6 +1,6 @@
 import './global.css';
 import { RootProvider } from 'fumadocs-ui/provider';
-import { Inter } from 'next/font/google';
+import { Geist, Geist_Mono } from 'next/font/google';
 import type { ReactNode } from 'react';
 import { PHProvider, PostHogPageView } from '@/providers/posthog-provider';
 import { AnalyticsTracker } from '@/components/analytics-tracker';
@@ -8,13 +8,19 @@ import { CookieConsent } from '@/components/cookie-consent';
 import { Footer } from '@/components/footer';
 import { Suspense } from 'react';

-const inter = Inter({
+const geist = Geist({
  subsets: ['latin'],
+  variable: '--font-geist-sans',
+});
+
+const geistMono = Geist_Mono({
+  subsets: ['latin'],
+  variable: '--font-geist-mono',
 });

 export default function Layout({ children }: { children: ReactNode }) {
  return (
-    <html lang="en" className={inter.className} suppressHydrationWarning>
+    <html lang="en" className={`${geist.variable} ${geistMono.variable} font-sans`} suppressHydrationWarning>
      <head>
        <link rel="icon" href="/docs/favicon.ico" sizes="any" />
      </head>
--- a/examples/browser_tool_example.py
+++ b/examples/browser_tool_example.py
@@ -0,0 +1,119 @@
+"""
+Browser Tool Example
+
+Demonstrates how to use the BrowserTool to control a browser programmatically
+via the computer server. The browser runs visibly on the XFCE desktop so visual
+agents can see it.
+
+Prerequisites:
+    - Computer server running (Docker container or local)
+    - For Docker: Container should be running with browser tool support
+    - For local: Playwright and Firefox must be installed
+
+Usage:
+    python examples/browser_tool_example.py
+"""
+
+import asyncio
+import logging
+import sys
+from pathlib import Path
+
+# Add the libs path to sys.path
+libs_path = Path(__file__).parent.parent / "libs" / "python"
+sys.path.insert(0, str(libs_path))
+
+from agent.tools.browser_tool import BrowserTool
+
+# Import Computer interface and BrowserTool
+from computer import Computer
+
+# Configure logging to see what's happening
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+async def test_browser_tool():
+    """Test the BrowserTool with various commands."""
+
+    # Initialize the computer interface
+    # For local testing, use provider_type="docker"
+    # For provider_type="cloud", provide name and api_key
+    computer = Computer(provider_type="docker", os_type="linux", image="cua-xfce:dev")
+    await computer.run()
+
+    # Initialize the browser tool with the computer interface
+    browser = BrowserTool(interface=computer)
+
+    logger.info("Testing Browser Tool...")
+
+    try:
+        # Test 0: Take a screenshot (pre-init)
+        logger.info("Test 0: Taking a screenshot...")
+        screenshot_bytes = await browser.screenshot()
+        screenshot_path = Path(__file__).parent / "browser_screenshot_init.png"
+        with open(screenshot_path, "wb") as f:
+            f.write(screenshot_bytes)
+        logger.info(f"Screenshot captured: {len(screenshot_bytes)} bytes")
+
+        # Test 1: Visit a URL
+        logger.info("Test 1: Visiting a URL...")
+        result = await browser.visit_url("https://www.trycua.com")
+        logger.info(f"Visit URL result: {result}")
+
+        # Wait a bit for the page to load
+        await asyncio.sleep(2)
+
+        # Test 2: Take a screenshot
+        logger.info("Test 2: Taking a screenshot...")
+        screenshot_bytes = await browser.screenshot()
+        screenshot_path = Path(__file__).parent / "browser_screenshot.png"
+        with open(screenshot_path, "wb") as f:
+            f.write(screenshot_bytes)
+        logger.info(f"Screenshot captured: {len(screenshot_bytes)} bytes")
+
+        # Wait a bit
+        await asyncio.sleep(1)
+
+        # Test 3: Visit bot detector
+        logger.info("Test 3: Visiting bot detector...")
+        result = await browser.visit_url("https://bot-detector.rebrowser.net/")
+        logger.info(f"Visit URL result: {result}")
+
+        # Test 2: Web search
+        logger.info("Test 2: Performing a web search...")
+        result = await browser.web_search("Python programming")
+        logger.info(f"Web search result: {result}")
+
+        # Wait a bit
+        await asyncio.sleep(2)
+
+        # Test 3: Scroll
+        logger.info("Test 3: Scrolling the page...")
+        result = await browser.scroll(delta_x=0, delta_y=500)
+        logger.info(f"Scroll result: {result}")
+
+        # Wait a bit
+        await asyncio.sleep(1)
+
+        # Test 4: Click (example coordinates - adjust based on your screen)
+        logger.info("Test 4: Clicking at coordinates...")
+        result = await browser.click(x=500, y=300)
+        logger.info(f"Click result: {result}")
+
+        # Wait a bit
+        await asyncio.sleep(1)
+
+        # Test 5: Type text (if there's a focused input field)
+        logger.info("Test 5: Typing text...")
+        result = await browser.type("Hello from BrowserTool!")
+        logger.info(f"Type result: {result}")
+
+        logger.info("All tests completed!")
+
+    except Exception as e:
+        logger.error(f"Error during testing: {e}", exc_info=True)
+
+
+if __name__ == "__main__":
+    asyncio.run(test_browser_tool())
--- a/libs/python/agent/agent/loops/init.py
+++ b/libs/python/agent/agent/loops/init.py
@@ -8,6 +8,7 @@ from . import (
    composed_grounded,
    gelato,
    gemini,
+    generic_vlm,
    glm45v,
    gta1,
    holo,
@@ -16,7 +17,6 @@ from . import (
    omniparser,
    openai,
    opencua,
-    generic_vlm,
    uiins,
    uitars,
    uitars2,
@@ -24,19 +24,19 @@ from . import (

 __all__ = [
    "anthropic",
-    "openai",
-    "uitars",
-    "omniparser",
-    "gta1",
    "composed_grounded",
-    "glm45v",
-    "opencua",
-    "internvl",
-    "holo",
-    "moondream3",
+    "gelato",
    "gemini",
    "generic_vlm",
+    "glm45v",
+    "gta1",
+    "holo",
+    "internvl",
+    "moondream3",
+    "omniparser",
+    "openai",
+    "opencua",
    "uiins",
-    "gelato",
+    "uitars",
    "uitars2",
 ]
--- a/libs/python/agent/agent/responses.py
+++ b/libs/python/agent/agent/responses.py
@@ -442,7 +442,7 @@ def get_all_element_descriptions(responses_items: List[Dict[str, Any]]) -> List[

 # Conversion functions between responses_items and completion messages formats
 def convert_responses_items_to_completion_messages(
-    messages: List[Dict[str, Any]], 
+    messages: List[Dict[str, Any]],
    allow_images_in_tool_results: bool = True,
    send_multiple_user_images_per_parallel_tool_results: bool = False,
 ) -> List[Dict[str, Any]]:
@@ -573,25 +573,33 @@ def convert_responses_items_to_completion_messages(
                        "computer_call_output",
                    ]
                    # Send tool message + separate user message with image (OpenAI compatible)
-                    completion_messages += [
-                        {
-                            "role": "tool",
-                            "tool_call_id": call_id,
-                            "content": "[Execution completed. See screenshot below]",
-                        },
-                        {
-                            "role": "user",
-                            "content": [
-                                {"type": "image_url", "image_url": {"url": output.get("image_url")}}
-                            ],
-                        },
-                    ] if send_multiple_user_images_per_parallel_tool_results or (not is_next_message_image_result) else [
-                        {
-                            "role": "tool",
-                            "tool_call_id": call_id,
-                            "content": "[Execution completed. See screenshot below]",
-                        },
-                    ]
+                    completion_messages += (
+                        [
+                            {
+                                "role": "tool",
+                                "tool_call_id": call_id,
+                                "content": "[Execution completed. See screenshot below]",
+                            },
+                            {
+                                "role": "user",
+                                "content": [
+                                    {
+                                        "type": "image_url",
+                                        "image_url": {"url": output.get("image_url")},
+                                    }
+                                ],
+                            },
+                        ]
+                        if send_multiple_user_images_per_parallel_tool_results
+                        or (not is_next_message_image_result)
+                        else [
+                            {
+                                "role": "tool",
+                                "tool_call_id": call_id,
+                                "content": "[Execution completed. See screenshot below]",
+                            },
+                        ]
+                    )
            else:
                # Handle text output as tool response
                completion_messages.append(
--- a/libs/python/agent/agent/tools/init.py
+++ b/libs/python/agent/agent/tools/init.py
@@ -0,0 +1,6 @@
+"""Tools for agent interactions."""
+
+from .browser_tool import BrowserTool
+
+__all__ = ["BrowserTool"]
+
--- a/libs/python/agent/agent/tools/browser_tool.py
+++ b/libs/python/agent/agent/tools/browser_tool.py
@@ -0,0 +1,135 @@
+"""
+Browser Tool for agent interactions.
+Allows agents to control a browser programmatically via Playwright.
+"""
+
+import logging
+from typing import TYPE_CHECKING, Optional
+
+if TYPE_CHECKING:
+    from computer.interface import GenericComputerInterface
+
+logger = logging.getLogger(__name__)
+
+
+class BrowserTool:
+    """
+    Browser tool that uses the computer SDK's interface to control a browser.
+    Implements the Fara/Magentic-One agent interface for browser control.
+    """
+
+    def __init__(
+        self,
+        interface: "GenericComputerInterface",
+    ):
+        """
+        Initialize the BrowserTool.
+
+        Args:
+            interface: A GenericComputerInterface instance that provides playwright_exec
+        """
+        self.interface = interface
+        self.logger = logger
+
+    async def _execute_command(self, command: str, params: dict) -> dict:
+        """
+        Execute a browser command via the computer interface.
+
+        Args:
+            command: Command name
+            params: Command parameters
+
+        Returns:
+            Response dictionary
+        """
+        try:
+            result = await self.interface.playwright_exec(command, params)
+            if not result.get("success"):
+                self.logger.error(
+                    f"Browser command '{command}' failed: {result.get('error', 'Unknown error')}"
+                )
+            return result
+        except Exception as e:
+            self.logger.error(f"Error executing browser command '{command}': {e}")
+            return {"success": False, "error": str(e)}
+
+    async def visit_url(self, url: str) -> dict:
+        """
+        Navigate to a URL.
+
+        Args:
+            url: URL to visit
+
+        Returns:
+            Response dictionary with success status and current URL
+        """
+        return await self._execute_command("visit_url", {"url": url})
+
+    async def click(self, x: int, y: int) -> dict:
+        """
+        Click at coordinates.
+
+        Args:
+            x: X coordinate
+            y: Y coordinate
+
+        Returns:
+            Response dictionary with success status
+        """
+        return await self._execute_command("click", {"x": x, "y": y})
+
+    async def type(self, text: str) -> dict:
+        """
+        Type text into the focused element.
+
+        Args:
+            text: Text to type
+
+        Returns:
+            Response dictionary with success status
+        """
+        return await self._execute_command("type", {"text": text})
+
+    async def scroll(self, delta_x: int, delta_y: int) -> dict:
+        """
+        Scroll the page.
+
+        Args:
+            delta_x: Horizontal scroll delta
+            delta_y: Vertical scroll delta
+
+        Returns:
+            Response dictionary with success status
+        """
+        return await self._execute_command("scroll", {"delta_x": delta_x, "delta_y": delta_y})
+
+    async def web_search(self, query: str) -> dict:
+        """
+        Navigate to a Google search for the query.
+
+        Args:
+            query: Search query
+
+        Returns:
+            Response dictionary with success status and current URL
+        """
+        return await self._execute_command("web_search", {"query": query})
+
+    async def screenshot(self) -> bytes:
+        """
+        Take a screenshot of the current browser page.
+
+        Returns:
+            Screenshot image data as bytes (PNG format)
+        """
+        import base64
+
+        result = await self._execute_command("screenshot", {})
+        if result.get("success") and result.get("screenshot"):
+            # Decode base64 screenshot to bytes
+            screenshot_b64 = result["screenshot"]
+            screenshot_bytes = base64.b64decode(screenshot_b64)
+            return screenshot_bytes
+        else:
+            error = result.get("error", "Unknown error")
+            raise RuntimeError(f"Failed to take screenshot: {error}")
--- a/libs/python/agent/pyproject.toml
+++ b/libs/python/agent/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
    "certifi>=2024.2.2",
    "litellm>=1.74.12"
 ]
-requires-python = ">=3.12"
+requires-python = ">=3.12,<3.14"

 [project.optional-dependencies]
 openai = []
--- a/libs/python/computer-server/.bumpversion.cfg
+++ b/libs/python/computer-server/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.30
+current_version = 0.1.31
 commit = True
 tag = True
 tag_name = computer-server-v{new_version}
--- a/libs/python/computer-server/computer_server/browser.py
+++ b/libs/python/computer-server/computer_server/browser.py
@@ -0,0 +1,361 @@
+"""
+Browser manager using Playwright for programmatic browser control.
+This allows agents to control a browser that runs visibly on the XFCE desktop.
+"""
+
+import asyncio
+import logging
+import os
+from typing import Any, Dict, Optional
+
+try:
+    from playwright.async_api import Browser, BrowserContext, Page, async_playwright
+except ImportError:
+    async_playwright = None
+    Browser = None
+    BrowserContext = None
+    Page = None
+
+logger = logging.getLogger(__name__)
+
+
+class BrowserManager:
+    """
+    Manages a Playwright browser instance that runs visibly on the XFCE desktop.
+    Uses persistent context to maintain cookies and sessions.
+    """
+
+    def __init__(self):
+        """Initialize the BrowserManager."""
+        self.playwright = None
+        self.browser: Optional[Browser] = None
+        self.context: Optional[BrowserContext] = None
+        self.page: Optional[Page] = None
+        self._initialized = False
+        self._initialization_error: Optional[str] = None
+        self._lock = asyncio.Lock()
+
+    async def _ensure_initialized(self):
+        """Ensure the browser is initialized."""
+        # Check if browser was closed and needs reinitialization
+        if self._initialized:
+            try:
+                # Check if context is still valid by trying to access it
+                if self.context:
+                    # Try to get pages - this will raise if context is closed
+                    _ = self.context.pages
+                    # If we get here, context is still alive
+                    return
+                else:
+                    # Context was closed, need to reinitialize
+                    self._initialized = False
+                    logger.warning("Browser context was closed, will reinitialize...")
+            except Exception as e:
+                # Context is dead, need to reinitialize
+                logger.warning(f"Browser context is dead ({e}), will reinitialize...")
+                self._initialized = False
+                self.context = None
+                self.page = None
+                # Clean up playwright if it exists
+                if self.playwright:
+                    try:
+                        await self.playwright.stop()
+                    except Exception:
+                        pass
+                    self.playwright = None
+
+        async with self._lock:
+            # Double-check after acquiring lock (another thread might have initialized it)
+            if self._initialized:
+                try:
+                    if self.context:
+                        _ = self.context.pages
+                        return
+                except Exception:
+                    self._initialized = False
+                    self.context = None
+                    self.page = None
+                    if self.playwright:
+                        try:
+                            await self.playwright.stop()
+                        except Exception:
+                            pass
+                        self.playwright = None
+
+            if async_playwright is None:
+                raise RuntimeError(
+                    "playwright is not installed. Please install it with: pip install playwright && playwright install --with-deps firefox"
+                )
+
+            try:
+                # Get display from environment or default to :1
+                display = os.environ.get("DISPLAY", ":1")
+                logger.info(f"Initializing browser with DISPLAY={display}")
+
+                # Start playwright
+                self.playwright = await async_playwright().start()
+
+                # Launch Firefox with persistent context (keeps cookies/sessions)
+                # headless=False is CRITICAL so the visual agent can see it
+                user_data_dir = os.path.join(os.path.expanduser("~"), ".playwright-firefox")
+                os.makedirs(user_data_dir, exist_ok=True)
+
+                # launch_persistent_context returns a BrowserContext, not a Browser
+                # Note: Removed --kiosk mode so the desktop remains visible
+                self.context = await self.playwright.firefox.launch_persistent_context(
+                    user_data_dir=user_data_dir,
+                    headless=False,  # CRITICAL: visible for visual agent
+                    viewport={"width": 1024, "height": 768},
+                    # Removed --kiosk to allow desktop visibility
+                )
+
+                # Add init script to make the browser less detectable
+                await self.context.add_init_script(
+                    """const defaultGetter = Object.getOwnPropertyDescriptor(
+      Navigator.prototype,
+      "webdriver"
+    ).get;
+    defaultGetter.apply(navigator);
+    defaultGetter.toString();
+    Object.defineProperty(Navigator.prototype, "webdriver", {
+      set: undefined,
+      enumerable: true,
+      configurable: true,
+      get: new Proxy(defaultGetter, {
+        apply: (target, thisArg, args) => {
+          Reflect.apply(target, thisArg, args);
+          return false;
+        },
+      }),
+    });
+    const patchedGetter = Object.getOwnPropertyDescriptor(
+      Navigator.prototype,
+      "webdriver"
+    ).get;
+    patchedGetter.apply(navigator);
+    patchedGetter.toString();"""
+                )
+
+                # Get the first page or create one
+                pages = self.context.pages
+                if pages:
+                    self.page = pages[0]
+                else:
+                    self.page = await self.context.new_page()
+
+                self._initialized = True
+                logger.info("Browser initialized successfully")
+
+            except Exception as e:
+                logger.error(f"Failed to initialize browser: {e}")
+                import traceback
+
+                logger.error(traceback.format_exc())
+                # Don't raise - return error in execute_command instead
+                self._initialization_error = str(e)
+                raise
+
+    async def _execute_command_impl(self, cmd: str, params: Dict[str, Any]) -> Dict[str, Any]:
+        """Internal implementation of command execution."""
+        if cmd == "visit_url":
+            url = params.get("url")
+            if not url:
+                return {"success": False, "error": "url parameter is required"}
+            await self.page.goto(url, wait_until="domcontentloaded", timeout=30000)
+            return {"success": True, "url": self.page.url}
+
+        elif cmd == "click":
+            x = params.get("x")
+            y = params.get("y")
+            if x is None or y is None:
+                return {"success": False, "error": "x and y parameters are required"}
+            await self.page.mouse.click(x, y)
+            return {"success": True}
+
+        elif cmd == "type":
+            text = params.get("text")
+            if text is None:
+                return {"success": False, "error": "text parameter is required"}
+            await self.page.keyboard.type(text)
+            return {"success": True}
+
+        elif cmd == "scroll":
+            delta_x = params.get("delta_x", 0)
+            delta_y = params.get("delta_y", 0)
+            await self.page.mouse.wheel(delta_x, delta_y)
+            return {"success": True}
+
+        elif cmd == "web_search":
+            query = params.get("query")
+            if not query:
+                return {"success": False, "error": "query parameter is required"}
+            # Navigate to Google search
+            search_url = f"https://www.google.com/search?q={query}"
+            await self.page.goto(search_url, wait_until="domcontentloaded", timeout=30000)
+            return {"success": True, "url": self.page.url}
+
+        elif cmd == "screenshot":
+            # Take a screenshot and return as base64
+            import base64
+
+            screenshot_bytes = await self.page.screenshot(type="png")
+            screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
+            return {"success": True, "screenshot": screenshot_b64}
+
+        else:
+            return {"success": False, "error": f"Unknown command: {cmd}"}
+
+    async def execute_command(self, cmd: str, params: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Execute a browser command with automatic recovery.
+
+        Args:
+            cmd: Command name (visit_url, click, type, scroll, web_search)
+            params: Command parameters
+
+        Returns:
+            Result dictionary with success status and any data
+        """
+        max_retries = 2
+        for attempt in range(max_retries):
+            try:
+                await self._ensure_initialized()
+            except Exception as e:
+                error_msg = getattr(self, "_initialization_error", None) or str(e)
+                logger.error(f"Browser initialization failed: {error_msg}")
+                return {
+                    "success": False,
+                    "error": f"Browser initialization failed: {error_msg}. "
+                    f"Make sure Playwright and Firefox are installed, and DISPLAY is set correctly.",
+                }
+
+            # Check if page is still valid and get a new one if needed
+            page_valid = False
+            try:
+                if self.page is not None and not self.page.is_closed():
+                    # Try to access page.url to check if it's still valid
+                    _ = self.page.url
+                    page_valid = True
+            except Exception as e:
+                logger.warning(f"Page is invalid: {e}, will get a new page...")
+                self.page = None
+
+            # Get a valid page if we don't have one
+            if not page_valid or self.page is None:
+                try:
+                    if self.context:
+                        pages = self.context.pages
+                        if pages:
+                            # Find first non-closed page
+                            for p in pages:
+                                try:
+                                    if not p.is_closed():
+                                        self.page = p
+                                        logger.info("Reusing existing open page")
+                                        page_valid = True
+                                        break
+                                except Exception:
+                                    continue
+
+                        # If no valid page found, create a new one
+                        if not page_valid:
+                            self.page = await self.context.new_page()
+                            logger.info("Created new page")
+                except Exception as e:
+                    logger.error(f"Failed to get new page: {e}, browser may be closed")
+                    # Browser was closed - force reinitialization
+                    self._initialized = False
+                    self.context = None
+                    self.page = None
+                    if self.playwright:
+                        try:
+                            await self.playwright.stop()
+                        except Exception:
+                            pass
+                        self.playwright = None
+
+                    # If this isn't the last attempt, continue to retry
+                    if attempt < max_retries - 1:
+                        logger.info("Browser was closed, retrying with fresh initialization...")
+                        continue
+                    else:
+                        return {
+                            "success": False,
+                            "error": f"Browser was closed and cannot be recovered: {e}",
+                        }
+
+            # Try to execute the command
+            try:
+                return await self._execute_command_impl(cmd, params)
+            except Exception as e:
+                error_str = str(e)
+                logger.error(f"Error executing command {cmd}: {e}")
+
+                # Check if this is a "browser/page/context closed" error
+                if any(keyword in error_str.lower() for keyword in ["closed", "target", "context"]):
+                    logger.warning(
+                        f"Browser/page was closed during command execution (attempt {attempt + 1}/{max_retries})"
+                    )
+
+                    # Force reinitialization
+                    self._initialized = False
+                    self.context = None
+                    self.page = None
+                    if self.playwright:
+                        try:
+                            await self.playwright.stop()
+                        except Exception:
+                            pass
+                        self.playwright = None
+
+                    # If this isn't the last attempt, retry
+                    if attempt < max_retries - 1:
+                        logger.info("Retrying command after browser reinitialization...")
+                        continue
+                    else:
+                        return {
+                            "success": False,
+                            "error": f"Command failed after {max_retries} attempts: {error_str}",
+                        }
+                else:
+                    # Not a browser closed error, return immediately
+                    import traceback
+
+                    logger.error(traceback.format_exc())
+                    return {"success": False, "error": error_str}
+
+        # Should never reach here, but just in case
+        return {"success": False, "error": "Command failed after all retries"}
+
+    async def close(self):
+        """Close the browser and cleanup resources."""
+        async with self._lock:
+            try:
+                if self.context:
+                    await self.context.close()
+                    self.context = None
+                if self.browser:
+                    await self.browser.close()
+                    self.browser = None
+
+                if self.playwright:
+                    await self.playwright.stop()
+                    self.playwright = None
+
+                self.page = None
+                self._initialized = False
+                logger.info("Browser closed successfully")
+            except Exception as e:
+                logger.error(f"Error closing browser: {e}")
+
+
+# Global instance
+_browser_manager: Optional[BrowserManager] = None
+
+
+def get_browser_manager() -> BrowserManager:
+    """Get or create the global BrowserManager instance."""
+    global _browser_manager
+    if _browser_manager is None:
+        _browser_manager = BrowserManager()
+    return _browser_manager
--- a/libs/python/computer-server/computer_server/handlers/macos.py
+++ b/libs/python/computer-server/computer_server/handlers/macos.py
@@ -55,6 +55,34 @@ from .base import BaseAccessibilityHandler, BaseAutomationHandler

 logger = logging.getLogger(__name__)

+# Trigger accessibility permissions prompt on macOS
+try:
+    # Source - https://stackoverflow.com/a/17134
+    # Posted by Andreas
+    # Retrieved 2025-12-03, License - CC BY-SA 4.0
+    # Attempt to create and post a mouse event to trigger the permissions prompt
+    # This will cause macOS to show "Python would like to control this computer using accessibility features"
+    current_pos = CGEventGetLocation(CGEventCreate(None))
+    p = CGPoint()
+    p.x = current_pos.x
+    p.y = current_pos.y
+
+    me = CGEventCreateMouseEvent(None, kCGEventMouseMoved, p, 0)
+    if me:
+        CGEventPost(kCGHIDEventTap, me)
+        CFRelease(me)
+except Exception as e:
+    logger.debug(f"Failed to trigger accessibility permissions prompt: {e}")
+
+# Trigger screen recording prompt on macOS
+try:
+    import pyautogui
+
+    pyautogui.screenshot()
+except Exception as e:
+    logger.debug(f"Failed to trigger screenshot permissions prompt: {e}")
+
+
 # Constants for accessibility API
 kAXErrorSuccess = 0
 kAXRoleAttribute = "AXRole"
--- a/libs/python/computer-server/computer_server/main.py
+++ b/libs/python/computer-server/computer_server/main.py
@@ -25,6 +25,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse

 from .handlers.factory import HandlerFactory
+from .browser import get_browser_manager

 # Authentication session TTL (in seconds). Override via env var CUA_AUTH_TTL_SECONDS. Default: 60s
 AUTH_SESSION_TTL_SECONDS: int = int(os.environ.get("CUA_AUTH_TTL_SECONDS", "60"))
@@ -749,5 +750,71 @@ async def agent_response_endpoint(
    return JSONResponse(content=payload, headers=headers)


+@app.post("/playwright_exec")
+async def playwright_exec_endpoint(
+    request: Request,
+    container_name: Optional[str] = Header(None, alias="X-Container-Name"),
+    api_key: Optional[str] = Header(None, alias="X-API-Key"),
+):
+    """
+    Execute Playwright browser commands.
+
+    Headers:
+    - X-Container-Name: Container name for cloud authentication
+    - X-API-Key: API key for cloud authentication
+
+    Body:
+    {
+        "command": "visit_url|click|type|scroll|web_search",
+        "params": {...}
+    }
+    """
+    # Parse request body
+    try:
+        body = await request.json()
+        command = body.get("command")
+        params = body.get("params", {})
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Invalid JSON body: {str(e)}")
+
+    if not command:
+        raise HTTPException(status_code=400, detail="Command is required")
+
+    # Check if CONTAINER_NAME is set (indicating cloud provider)
+    server_container_name = os.environ.get("CONTAINER_NAME")
+
+    # If cloud provider, perform authentication
+    if server_container_name:
+        logger.info(
+            f"Cloud provider detected. CONTAINER_NAME: {server_container_name}. Performing authentication..."
+        )
+
+        # Validate required headers
+        if not container_name:
+            raise HTTPException(status_code=401, detail="Container name required")
+
+        if not api_key:
+            raise HTTPException(status_code=401, detail="API key required")
+
+        # Validate with AuthenticationManager
+        is_authenticated = await auth_manager.auth(container_name, api_key)
+        if not is_authenticated:
+            raise HTTPException(status_code=401, detail="Authentication failed")
+
+    # Get browser manager and execute command
+    try:
+        browser_manager = get_browser_manager()
+        result = await browser_manager.execute_command(command, params)
+        
+        if result.get("success"):
+            return JSONResponse(content=result)
+        else:
+            raise HTTPException(status_code=400, detail=result.get("error", "Command failed"))
+    except Exception as e:
+        logger.error(f"Error executing playwright command: {str(e)}")
+        logger.error(traceback.format_exc())
+        raise HTTPException(status_code=500, detail=str(e))
+
+
 if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/libs/python/computer-server/pyproject.toml
+++ b/libs/python/computer-server/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "pdm.backend"

 [project]
 name = "cua-computer-server"
-version = "0.1.30"
+version = "0.1.31"

 description = "Server component for the Computer-Use Interface (CUI) framework powering Cua"
 authors = [
@@ -12,7 +12,7 @@ authors = [
 ]
 readme = "README.md"
 license = { text = "MIT" }
-requires-python = ">=3.12"
+requires-python = ">=3.12,<3.14"
 dependencies = [
    "fastapi>=0.111.0",
    "uvicorn[standard]>=0.27.0",
@@ -24,6 +24,7 @@ dependencies = [
    "pyperclip>=1.9.0",
    "websockets>=12.0",
    "pywinctl>=0.4.1",
+    "playwright>=1.40.0",
    # OS-specific runtime deps
    "pyobjc-framework-Cocoa>=10.1; sys_platform == 'darwin'",
    "pyobjc-framework-Quartz>=10.1; sys_platform == 'darwin'",
--- a/libs/python/computer/computer/computer.py
+++ b/libs/python/computer/computer/computer.py
@@ -969,6 +969,35 @@ class Computer:
        """
        return await self.interface.to_screenshot_coordinates(x, y)

+    async def playwright_exec(self, command: str, params: Optional[Dict] = None) -> Dict[str, Any]:
+        """
+        Execute a Playwright browser command.
+
+        Args:
+            command: The browser command to execute (visit_url, click, type, scroll, web_search)
+            params: Command parameters
+
+        Returns:
+            Dict containing the command result
+
+        Examples:
+            # Navigate to a URL
+            await computer.playwright_exec("visit_url", {"url": "https://example.com"})
+
+            # Click at coordinates
+            await computer.playwright_exec("click", {"x": 100, "y": 200})
+
+            # Type text
+            await computer.playwright_exec("type", {"text": "Hello, world!"})
+
+            # Scroll
+            await computer.playwright_exec("scroll", {"delta_x": 0, "delta_y": -100})
+
+            # Web search
+            await computer.playwright_exec("web_search", {"query": "computer use agent"})
+        """
+        return await self.interface.playwright_exec(command, params)
+
    # Add virtual environment management functions to computer interface
    async def venv_install(self, venv_name: str, requirements: list[str]):
        """Install packages in a virtual environment.
--- a/libs/python/computer/computer/interface/generic.py
+++ b/libs/python/computer/computer/interface/generic.py
@@ -667,6 +667,56 @@ class GenericComputerInterface(BaseComputerInterface):

        return screenshot_x, screenshot_y

+    # Playwright browser control
+    async def playwright_exec(self, command: str, params: Optional[Dict] = None) -> Dict[str, Any]:
+        """
+        Execute a Playwright browser command.
+
+        Args:
+            command: The browser command to execute (visit_url, click, type, scroll, web_search)
+            params: Command parameters
+
+        Returns:
+            Dict containing the command result
+
+        Examples:
+            # Navigate to a URL
+            await interface.playwright_exec("visit_url", {"url": "https://example.com"})
+
+            # Click at coordinates
+            await interface.playwright_exec("click", {"x": 100, "y": 200})
+
+            # Type text
+            await interface.playwright_exec("type", {"text": "Hello, world!"})
+
+            # Scroll
+            await interface.playwright_exec("scroll", {"delta_x": 0, "delta_y": -100})
+
+            # Web search
+            await interface.playwright_exec("web_search", {"query": "computer use agent"})
+        """
+        protocol = "https" if self.api_key else "http"
+        port = "8443" if self.api_key else "8000"
+        url = f"{protocol}://{self.ip_address}:{port}/playwright_exec"
+
+        payload = {"command": command, "params": params or {}}
+        headers = {"Content-Type": "application/json"}
+        if self.api_key:
+            headers["X-API-Key"] = self.api_key
+        if self.vm_name:
+            headers["X-Container-Name"] = self.vm_name
+
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(url, json=payload, headers=headers) as response:
+                    if response.status == 200:
+                        return await response.json()
+                    else:
+                        error_text = await response.text()
+                        return {"success": False, "error": error_text}
+        except Exception as e:
+            return {"success": False, "error": str(e)}
+
    # Websocket Methods
    async def _keep_alive(self):
        """Keep the WebSocket connection alive with automatic reconnection."""
--- a/libs/python/computer/computer/providers/cloud/provider.py
+++ b/libs/python/computer/computer/providers/cloud/provider.py
@@ -45,7 +45,9 @@ class CloudProvider(BaseVMProvider):
        # Fall back to environment variable if api_key not provided
        if api_key is None:
            api_key = os.getenv("CUA_API_KEY")
-        assert api_key, "api_key required for CloudProvider (provide via parameter or CUA_API_KEY environment variable)"
+        assert (
+            api_key
+        ), "api_key required for CloudProvider (provide via parameter or CUA_API_KEY environment variable)"
        self.api_key = api_key
        self.verbose = verbose
        self.api_base = (api_base or DEFAULT_API_BASE).rstrip("/")
--- a/libs/python/computer/pyproject.toml
+++ b/libs/python/computer/pyproject.toml
@@ -19,7 +19,7 @@ dependencies = [
    "pydantic>=2.11.1",
    "mslex>=1.3.0",
 ]
-requires-python = ">=3.12"
+requires-python = ">=3.12,<3.14"

 [project.optional-dependencies]
 lume = [
--- a/libs/python/core/pyproject.toml
+++ b/libs/python/core/pyproject.toml
@@ -15,7 +15,7 @@ dependencies = [
    "httpx>=0.24.0",
    "posthog>=3.20.0"
 ]
-requires-python = ">=3.12"
+requires-python = ">=3.12,<3.14"

 [tool.pdm]
 distribution = true
--- a/libs/python/mcp-server/pyproject.toml
+++ b/libs/python/mcp-server/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
 name = "cua-mcp-server"
 description = "MCP Server for Computer-Use Agent (CUA)"
 readme = "README.md"
-requires-python = ">=3.12"
+requires-python = ">=3.12,<3.14"
 version = "0.1.15"
 authors = [
    {name = "TryCua", email = "gh@trycua.com"}
--- a/libs/python/som/pyproject.toml
+++ b/libs/python/som/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
    "typing-extensions>=4.9.0",
    "pydantic>=2.6.3"
 ]
-requires-python = ">=3.12"
+requires-python = ">=3.12,<3.14"
 readme = "README.md"
 license = {text = "AGPL-3.0-or-later"}
 keywords = ["computer-vision", "ocr", "ui-analysis", "icon-detection"]
--- a/libs/qemu-docker/README.md
+++ b/libs/qemu-docker/README.md
@@ -0,0 +1,14 @@
+# QEMU Docker Containers
+
+Docker containers running desktop operating systems via QEMU/KVM for Computer-Using Agents (CUA).
+
+## Structure
+
+```
+qemu-docker/
+└── windows/          # Windows 11 container with CUA computer-server
+```
+
+## Windows Container
+
+See [windows/README.md](windows/README.md) for complete documentation on the Windows 11 QEMU container.
--- a/libs/qemu-docker/linux/Dockerfile
+++ b/libs/qemu-docker/linux/Dockerfile
@@ -0,0 +1,14 @@
+FROM trycua/qemu-local:latest
+
+COPY src/vm/setup/. /oem/
+
+COPY --chmod=755 src/entry.sh /entry.sh
+
+ENV RAM_SIZE="8G"
+ENV CPU_CORES="8"
+ENV DISK_SIZE="64G"
+ENV ARGUMENTS="-qmp tcp:0.0.0.0:7200,server,nowait"
+
+EXPOSE 5000 8006
+
+ENTRYPOINT ["/entry.sh"]
--- a/libs/qemu-docker/linux/README.md
+++ b/libs/qemu-docker/linux/README.md
@@ -0,0 +1,146 @@
+# CUA Linux Container
+
+Containerized Ubuntu 22.04 LTS virtual desktop for Computer-Using Agents (CUA). Utilizes QEMU/KVM with Ubuntu Desktop and computer-server pre-installed for remote computer control.
+
+## Features
+
+- Ubuntu 22.04 LTS Desktop running in QEMU/KVM
+- Automated installation via cloud-init autoinstall
+- Pre-installed CUA computer-server for remote computer control
+- Support for custom OEM scripts during setup
+- noVNC access for visual desktop interaction
+
+## Quick Start
+
+### 1. Download Ubuntu Server ISO
+
+**Download Ubuntu 22.04 LTS Server ISO:**
+
+1. Visit & download the [server ISO](https://releases.ubuntu.com/22.04/ubuntu-22.04.5-live-server-amd64.iso)
+2. After downloading, rename the file to `setup.iso`
+3. Copy it to the directory `src/vm/image/`
+
+This ISO is used for automated Ubuntu installation with cloud-init on first run.
+
+### 2. Build the Image
+
+```bash
+docker build -t cua-linux:dev .
+```
+
+### 3. First Run - Create Golden Image
+
+On first run, the container will install Ubuntu from scratch and create a golden image. This takes 15-30 minutes.
+
+```bash
+# Create storage directory
+mkdir -p ./storage
+
+# Run with ubuntu.iso to create golden image
+docker run -it --rm \
+    --device=/dev/kvm \
+    --name cua-linux \
+    --mount type=bind,source=/path/to/ubuntu.iso,target=/custom.iso \
+    --cap-add NET_ADMIN \
+    -v $(pwd)/storage:/storage \
+    -p 8006:8006 \
+    -p 5000:5000 \
+    -e RAM_SIZE=8G \
+    -e CPU_CORES=4 \
+    -e DISK_SIZE=64G \
+    cua-linux:dev
+```
+
+**What happens during first run:**
+
+1. Ubuntu 22.04 Server installs automatically using cloud-init autoinstall
+2. Minimal desktop environment is installed with auto-login enabled
+3. OEM setup scripts install Python 3, create venv, and install CUA computer-server
+4. systemd service created for CUA server (runs automatically on login)
+5. X11 access configured for GUI automation
+6. Golden image is saved to `/storage` directory
+7. Container exits after setup completes
+
+### 4. Subsequent Runs - Use Golden Image
+
+After the golden image is created, subsequent runs boot much faster (30 sec - 2 min):
+
+```bash
+# Run without ubuntu.iso - uses existing golden image
+docker run -it --rm \
+    --device=/dev/kvm \
+    --name cua-linux \
+    --cap-add NET_ADMIN \
+    -v $(pwd)/storage:/storage \
+    -p 8006:8006 \
+    -p 5000:5000 \
+    -e RAM_SIZE=8G \
+    -e CPU_CORES=4 \
+    cua-linux:dev
+```
+
+**Access points:**
+
+- **Computer Server API**: `http://localhost:5000`
+- **noVNC Browser**: `http://localhost:8006`
+
+## Container Configuration
+
+### Ports
+
+- **5000**: CUA computer-server API endpoint
+- **8006**: noVNC web interface for visual desktop access
+
+### Environment Variables
+
+- `RAM_SIZE`: RAM allocated to Ubuntu VM (default: "8G", recommended: "8G" for WSL2)
+- `CPU_CORES`: CPU cores allocated to VM (default: "8")
+- `DISK_SIZE`: VM disk size (default: "64G", minimum: "32G")
+
+### Volumes
+
+- `/storage`: Persistent VM storage (golden image, disk)
+- `/custom.iso`: Mount point for ubuntu.iso (only needed for first run)
+- `/oem`: Optional mount point for custom OEM scripts (built-in scripts included in image)
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────┐
+│  Docker Container (Linux host)                          │
+│                                                         │
+│  • Port forwarding: localhost:5000 → EMULATOR_IP:5000   │
+│  • Exposes: 5000 (API), 8006 (noVNC)                    │
+│                                                         │
+│  ┌────────────────────────────────────────────────────┐ │
+│  │  QEMU VM (Ubuntu 22.04)                            │ │
+│  │                                                    │ │
+│  │  • CUA computer-server listens on 5000             │ │
+│  │                                                    │ │
+│  └────────────────────────────────────────────────────┘ │
+│                                                         │
+└─────────────────────────────────────────────────────────┘
+```
+
+**Communication Flow:**
+
+1. External client → `localhost:5000` (host)
+2. Docker port mapping → Container's `localhost:5000`
+3. Container detects VM IP and waits for server to be ready
+4. CUA computer-server in Ubuntu VM processes request
+
+## Development
+
+### Modifying Setup Scripts
+
+Setup scripts are in `src/vm/setup/`:
+
+- `install.sh`: Entry point called after cloud-init installation (runs OEM setup)
+- `setup.sh`: Main setup orchestration (copies scripts to /opt/oem)
+- `setup-cua-server.sh`: CUA server installation with isolated venv and systemd service
+
+After modifying, rebuild the image:
+
+```bash
+docker build -t cua-linux:dev .
+```
--- a/libs/qemu-docker/linux/src/entry.sh
+++ b/libs/qemu-docker/linux/src/entry.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+cleanup() {
+  echo "Received signal, shutting down gracefully..."
+  if [ -n "$VM_PID" ]; then
+    kill -TERM "$VM_PID" 2>/dev/null
+    wait "$VM_PID" 2>/dev/null
+  fi
+  exit 0
+}
+
+# Install trap for signals
+trap cleanup SIGTERM SIGINT SIGHUP SIGQUIT
+
+# Start the VM in the background
+echo "Starting Ubuntu VM..."
+/usr/bin/tini -s /run/entry.sh &
+VM_PID=$!
+echo "Live stream accessible at localhost:8006"
+
+echo "Waiting for Ubuntu to boot and CUA computer-server to start..."
+
+VM_IP=""
+while true; do
+  # Wait for VM and get the IP
+  if [ -z "$VM_IP" ]; then
+    VM_IP=$(ps aux | grep dnsmasq | grep -oP '(?<=--dhcp-range=)[0-9.]+' | head -1)
+    if [ -n "$VM_IP" ]; then
+      echo "Detected VM IP: $VM_IP"
+    else
+      echo "Waiting for VM to start..."
+      sleep 5
+      continue
+    fi
+  fi
+
+  # Check if server is ready
+  response=$(curl --write-out '%{http_code}' --silent --output /dev/null $VM_IP:5000/status)
+
+  if [ "${response:-0}" -eq 200 ]; then
+    break
+  fi
+
+  echo "Waiting for CUA computer-server to be ready. This might take a while..."
+  sleep 5
+done
+
+echo "VM is up and running, and the CUA Computer Server is ready!"
+
+echo "Computer server accessible at localhost:5000"
+
+# Detect initial setup by presence of custom ISO
+CUSTOM_ISO=$(find / -maxdepth 1 -type f -iname "*.iso" -print -quit 2>/dev/null || true)
+if [ -n "$CUSTOM_ISO" ]; then
+  echo "Preparation complete. Shutting down gracefully..."
+  cleanup
+fi
+
+# Keep container alive for golden image boots
+echo "Container running. Press Ctrl+C to stop."
+tail -f /dev/null
--- a/libs/qemu-docker/linux/src/vm/image/README.md
+++ b/libs/qemu-docker/linux/src/vm/image/README.md
@@ -0,0 +1,7 @@
+> Add your Ubuntu 22.04 live server setup.iso to this folder
+
+**Download Ubuntu 22.04 LTS Server ISO:**
+
+1. Visit & download the [server ISO](https://releases.ubuntu.com/22.04/ubuntu-22.04.5-live-server-amd64.iso)
+2. After downloading, rename the file to `setup.iso`
+3. Copy it to the current directory.
--- a/libs/qemu-docker/linux/src/vm/setup/install.sh
+++ b/libs/qemu-docker/linux/src/vm/setup/install.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# OEM Installation Entry Point for Linux
+# This script is called by the OEM systemd service on first boot
+
+set -e
+
+SCRIPT_DIR="/opt/oem"
+LOG_FILE="$SCRIPT_DIR/setup.log"
+
+log() {
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
+}
+
+log "=== Starting OEM Setup ==="
+
+# Run main setup script
+if [ -f "$SCRIPT_DIR/setup.sh" ]; then
+    log "Running setup.sh..."
+    bash "$SCRIPT_DIR/setup.sh" 2>&1 | tee -a "$LOG_FILE"
+    log "setup.sh completed with exit code: $?"
+else
+    log "ERROR: setup.sh not found at $SCRIPT_DIR/setup.sh"
+    exit 1
+fi
+
+log "=== OEM Setup Completed ==="
--- a/libs/qemu-docker/linux/src/vm/setup/setup-cua-server.sh
+++ b/libs/qemu-docker/linux/src/vm/setup/setup-cua-server.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+# Setup CUA Computer Server on Linux
+# Creates a system-level systemd service to run computer server in background
+
+set -e
+
+USER_NAME="docker"
+USER_HOME="/home/$USER_NAME"
+SCRIPT_DIR="/opt/oem"
+CUA_DIR="/opt/cua-server"
+VENV_DIR="$CUA_DIR/venv"
+SERVICE_NAME="cua-computer-server"
+LOG_FILE="$SCRIPT_DIR/setup.log"
+
+log() {
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
+}
+
+log "=== Installing CUA Computer Server ==="
+
+# Install Python 3 and venv
+log "Installing Python 3 and dependencies..."
+sudo apt-get install -y python3 python3-venv python3-pip python3-tk python3-dev
+
+# Create CUA directory
+log "Creating CUA directory at $CUA_DIR..."
+sudo mkdir -p "$CUA_DIR"
+sudo chown "$USER_NAME:$USER_NAME" "$CUA_DIR"
+
+# Create virtual environment
+if [ -f "$VENV_DIR/bin/python" ]; then
+    log "Existing venv detected; skipping creation"
+else
+    log "Creating Python virtual environment at $VENV_DIR..."
+    python3 -m venv "$VENV_DIR"
+    log "Virtual environment created successfully"
+fi
+
+# Activate and install packages
+log "Upgrading pip, setuptools, and wheel..."
+"$VENV_DIR/bin/pip" install --upgrade pip setuptools wheel
+
+log "Installing cua-computer-server..."
+"$VENV_DIR/bin/pip" install --upgrade cua-computer-server
+log "cua-computer-server installed successfully"
+
+# Open firewall for port 5000 (if ufw is available)
+if command -v ufw &> /dev/null; then
+    log "Opening firewall for port 5000..."
+    sudo ufw allow 5000/tcp || true
+    log "Firewall rule added"
+fi
+
+# Create start script with auto-restart
+START_SCRIPT="$CUA_DIR/start-server.sh"
+log "Creating start script at $START_SCRIPT..."
+
+cat > "$START_SCRIPT" << 'EOF'
+#!/bin/bash
+# CUA Computer Server Start Script with auto-restart
+
+CUA_DIR="/opt/cua-server"
+VENV_DIR="$CUA_DIR/venv"
+LOG_FILE="$CUA_DIR/server.log"
+
+start_server() {
+    echo "$(date '+%Y-%m-%d %H:%M:%S') Updating cua-computer-server..." >> "$LOG_FILE"
+    "$VENV_DIR/bin/pip" install --upgrade cua-computer-server >> "$LOG_FILE" 2>&1
+
+    echo "$(date '+%Y-%m-%d %H:%M:%S') Starting CUA Computer Server on port 5000..." >> "$LOG_FILE"
+    "$VENV_DIR/bin/python" -m computer_server --port 5000 >> "$LOG_FILE" 2>&1
+    return $?
+}
+
+while true; do
+    start_server
+    EXIT_CODE=$?
+    echo "$(date '+%Y-%m-%d %H:%M:%S') Server exited with code: $EXIT_CODE. Restarting in 5s..." >> "$LOG_FILE"
+    sleep 5
+done
+EOF
+
+chmod +x "$START_SCRIPT"
+log "Start script created"
+
+# Create xhost script for X11 access
+log "Creating xhost script..."
+sudo tee /etc/X11/Xsession.d/99xauth > /dev/null << 'EOF'
+#!/bin/sh
+# Grant local X11 access for CUA Computer Server
+export DISPLAY=:0
+xhost +local: 2>/dev/null || true
+EOF
+sudo chmod +x /etc/X11/Xsession.d/99xauth
+log "X11 access script created"
+
+# Create system-level systemd service
+log "Creating systemd system service..."
+
+sudo tee /etc/systemd/system/$SERVICE_NAME.service > /dev/null << EOF
+[Unit]
+Description=CUA Computer Server
+After=graphical.target
+
+[Service]
+Type=simple
+ExecStart=$START_SCRIPT
+Restart=always
+RestartSec=5
+Environment=PYTHONUNBUFFERED=1
+Environment=DISPLAY=:0
+Environment=XAUTHORITY=$USER_HOME/.Xauthority
+User=$USER_NAME
+WorkingDirectory=$CUA_DIR
+
+[Install]
+WantedBy=graphical.target
+EOF
+
+log "Systemd service created at /etc/systemd/system/$SERVICE_NAME.service"
+
+# Ensure proper ownership of CUA directory
+log "Setting ownership of $CUA_DIR to $USER_NAME..."
+sudo chown -R "$USER_NAME:$USER_NAME" "$CUA_DIR"
+
+# Enable and start the service
+log "Enabling systemd service..."
+sudo systemctl daemon-reload
+sudo systemctl enable "$SERVICE_NAME.service"
+
+log "Starting CUA Computer Server service..."
+sudo systemctl start "$SERVICE_NAME.service" || true
+
+log "=== CUA Computer Server setup completed ==="
+log "Service status: $(sudo systemctl is-active $SERVICE_NAME.service 2>/dev/null || echo 'unknown')"
--- a/libs/qemu-docker/linux/src/vm/setup/setup.sh
+++ b/libs/qemu-docker/linux/src/vm/setup/setup.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Main Setup Script for Linux
+# Installs dependencies and sets up CUA Computer Server
+
+set -e
+
+SCRIPT_DIR="/opt/oem"
+LOG_FILE="$SCRIPT_DIR/setup.log"
+
+log() {
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
+}
+
+log "=== Running Main Setup ==="
+
+# Update package lists
+log "Updating package lists..."
+sudo apt-get update
+
+# Install Git
+log "Installing Git..."
+sudo apt-get install -y git
+
+# Setup CUA Computer Server
+log "Setting up CUA Computer Server..."
+if [ -f "$SCRIPT_DIR/setup-cua-server.sh" ]; then
+    bash "$SCRIPT_DIR/setup-cua-server.sh" 2>&1 | tee -a "$LOG_FILE"
+    log "CUA Computer Server setup completed."
+else
+    log "ERROR: setup-cua-server.sh not found at $SCRIPT_DIR/setup-cua-server.sh"
+fi
+
+log "=== Main Setup Completed ==="
--- a/libs/qemu-docker/windows/Dockerfile
+++ b/libs/qemu-docker/windows/Dockerfile
@@ -0,0 +1,15 @@
+FROM trycua/windows-local:latest
+
+COPY src/vm/setup/. /oem/
+
+COPY --chmod=755 src/entry.sh /entry.sh
+
+ENV RAM_SIZE="8G"
+ENV CPU_CORES="8"
+ENV VERSION="win11x64-enterprise-eval"
+ENV DISK_SIZE="30G"
+ENV ARGUMENTS="-qmp tcp:0.0.0.0:7200,server,nowait"
+
+EXPOSE 5000 8006
+
+ENTRYPOINT ["/entry.sh"]
--- a/libs/qemu-docker/windows/README.md
+++ b/libs/qemu-docker/windows/README.md
@@ -0,0 +1,159 @@
+# CUA Windows Container
+
+Containerized Windows 11 virtual desktop for Computer-Using Agents (CUA). Utilizes QEMU/KVM with Windows 11 and computer-server pre-installed for remote computer control.
+
+## Features
+
+- Windows 11 Enterprise running in QEMU/KVM
+- Pre-installed CUA computer-server for remote computer control
+- Caddy reverse proxy (port 9222 → 1337) for browser automation
+- noVNC access for visual desktop interaction
+- Automated setup via unattended installation
+- Support for both dev (shared folder) and azure (OEM folder) deployment modes
+- Python 3.12 with isolated virtual environment for CUA computer-server
+- Services run hidden in background via Windows scheduled tasks
+- Essential tools pre-installed (Chrome, LibreOffice, VLC, GIMP, VSCode, Thunderbird)
+
+## Quick Start
+
+### 1. Download and Prepare setup.iso
+
+**Download Windows 11 Evaluation ISO:**
+
+1. Visit [Microsoft Evaluation Center](https://info.microsoft.com/ww-landing-windows-11-enterprise.html)
+2. Accept the Terms of Service
+3. Download **Windows 11 Enterprise Evaluation (90-day trial, English, United States)** ISO file [~6GB]
+4. After downloading, rename the file to `setup.iso`
+5. Copy it to the directory `src/vm/image/`
+
+This ISO is used for automated Windows installation on first run.
+
+### 2. Build the Image
+
+```bash
+docker build -t cua-windows:dev .
+```
+
+### 3. First Run - Create Golden Image
+
+On first run, the container will install Windows from scratch and create a golden image. This takes 15-30 minutes.
+
+```bash
+# Create storage directory
+mkdir -p ./storage
+
+# Run with setup.iso to create golden image
+docker run -it --rm \
+    --device=/dev/kvm \
+    --platform linux/amd64 \
+    --name cua-windows \
+    --mount type=bind,source=$(pwd)/src/vm/image/setup.iso,target=/custom.iso \
+    --cap-add NET_ADMIN \
+    -v $(pwd)/storage:/storage \
+    -p 8006:8006 \
+    -p 5000:5000 \
+    -e RAM_SIZE=8G \
+    -e CPU_CORES=4 \
+    -e DISK_SIZE=20G \
+    cua-windows:dev
+```
+
+**What happens during first run:**
+
+1. Windows 11 installs automatically using unattended configuration
+2. Setup scripts install Python 3.12, Git, and CUA computer-server in isolated venv
+3. Windows scheduled tasks created for CUA server and Caddy proxy (run hidden in background)
+4. Golden image is saved to `/storage` directory
+5. Container exits after setup completes
+
+### 4. Subsequent Runs - Use Golden Image
+
+After the golden image is created, subsequent runs boot much faster (30 sec - 2 min):
+
+```bash
+# Run without setup.iso - uses existing golden image
+docker run -it --rm \
+    --device=/dev/kvm \
+    --platform linux/amd64 \
+    --name cua-windows \
+    --cap-add NET_ADMIN \
+    -v $(pwd)/storage:/storage \
+    -p 8006:8006 \
+    -p 5000:5000 \
+    -e RAM_SIZE=8G \
+    -e CPU_CORES=4 \
+    cua-windows:dev
+```
+
+**Access points:**
+
+- **Computer Server API**: `http://localhost:5000`
+- **noVNC Browser**: `http://localhost:8006`
+
+## Container Configuration
+
+### Ports
+
+- **5000**: CUA computer-server API endpoint
+- **8006**: noVNC web interface for visual desktop access
+
+### Environment Variables
+
+- `RAM_SIZE`: RAM allocated to Windows VM (default: "8G", recommended: "8G" for WSL2)
+- `CPU_CORES`: CPU cores allocated to VM (default: "8")
+- `DISK_SIZE`: VM disk size (default: "30G", minimum: "20G")
+- `VERSION`: Windows version (default: "win11x64-enterprise-eval")
+
+### Volumes
+
+- `/storage`: Persistent VM storage (golden image, disk, firmware)
+- `/custom.iso`: Mount point for setup.iso (only needed for first run)
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────┐
+│  Docker Container (Linux host)                          │
+│                                                         │
+│  • Port forwarding: localhost:5000 → EMULATOR_IP:5000   │
+│  • Exposes: 5000 (API), 8006 (noVNC)                    │
+│                                                         │
+│  ┌────────────────────────────────────────────────────┐ │
+│  │  QEMU VM (Windows 11)                              │ │
+│  │                                                    │ │
+│  │  • CUA computer-server listens on 5000             │ │
+│  │                                                    │ │
+│  └────────────────────────────────────────────────────┘ │
+│                                                         │
+└─────────────────────────────────────────────────────────┘
+```
+
+**Communication Flow:**
+
+1. External client → `localhost:5000` (host)
+2. Docker port mapping → Container's `localhost:5000`
+3. socat port forwarding → `20.20.20.21:5000` (VM)
+4. CUA computer-server in Windows VM processes request
+
+## Development
+
+### Modifying Setup Scripts
+
+Setup scripts are in `src/vm/setup/`:
+
+- `install.bat`: Entry point called by Windows setup
+- `setup.ps1`: Main setup orchestration (installs software, configures Windows)
+- `setup-cua-server.ps1`: CUA server installation with isolated venv
+- `on-logon.ps1`: Runs on user logon (starts scheduled tasks)
+- `setup-utils.psm1`: Helpers functions for setup
+
+After modifying, rebuild the image:
+
+```bash
+docker build -t cua-windows:dev .
+```
+
+## Credits
+
+- Built on [Dockur Windows](https://github.com/dockur/windows) base image
+- Inspired by [Windows Agent Arena](https://github.com/microsoft/WindowsAgentArena)
--- a/libs/qemu-docker/windows/src/entry.sh
+++ b/libs/qemu-docker/windows/src/entry.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+cleanup() {
+  echo "Received signal, shutting down gracefully..."
+  if [ -n "$VM_PID" ]; then
+    kill -TERM "$VM_PID" 2>/dev/null
+    wait "$VM_PID" 2>/dev/null
+  fi
+  exit 0
+}
+
+# Install trap for signals
+trap cleanup SIGTERM SIGINT SIGHUP SIGQUIT
+
+# Create windows.boot file if it doesn't exist (required for proper boot)
+if [ -d "/storage" -a ! -f "/storage/windows.boot" ]; then
+  echo "Creating windows.boot file in /storage..."
+  touch /storage/windows.boot
+fi
+
+# Start the VM in the background
+echo "Starting Windows VM..."
+/usr/bin/tini -s /run/entry.sh &
+VM_PID=$!
+echo "Live stream accessible at localhost:8006"
+
+echo "Waiting for Windows to boot and CUA computer-server to start..."
+
+VM_IP=""
+while true; do
+  # Wait from VM and get the IP
+  if [ -z "$VM_IP" ]; then
+    VM_IP=$(ps aux | grep dnsmasq | grep -oP '(?<=--dhcp-range=)[0-9.]+' | head -1)
+    if [ -n "$VM_IP" ]; then
+      echo "Detected VM IP: $VM_IP"
+    else
+      echo "Waiting for VM to start..."
+      sleep 5
+      continue
+    fi
+  fi
+
+  # Check if server is ready
+  response=$(curl --write-out '%{http_code}' --silent --output /dev/null $VM_IP:5000/status)
+
+  if [ "${response:-0}" -eq 200 ]; then
+    break
+  fi
+
+  echo "Waiting for CUA computer-server to be ready. This might take a while..."
+  sleep 5
+done
+
+echo "VM is up and running, and the CUA Computer Server is ready!"
+
+echo "Computer server accessible at localhost:5000"
+
+# Detect initial setup by presence of custom ISO
+CUSTOM_ISO=$(find / -maxdepth 1 -type f -iname "*.iso" -print -quit 2>/dev/null || true)
+if [ -n "$CUSTOM_ISO" ]; then
+  echo "Preparation complete. Shutting down gracefully..."
+  cleanup
+fi
+
+# Keep container alive for golden image boots
+echo "Container running. Press Ctrl+C to stop."
+tail -f /dev/null
--- a/libs/qemu-docker/windows/src/vm/image/README.md
+++ b/libs/qemu-docker/windows/src/vm/image/README.md
@@ -0,0 +1,9 @@
+> Add your Win11E setup.iso to this folder
+
+**Download Windows 11 Evaluation ISO:**
+
+1. Visit [Microsoft Evaluation Center](https://info.microsoft.com/ww-landing-windows-11-enterprise.html)
+2. Accept the Terms of Service
+3. Download **Windows 11 Enterprise Evaluation (90-day trial, English, United States)** ISO file [~6GB]
+4. After downloading, rename the file to `setup.iso`
+5. Copy it to the current directory.
--- a/libs/qemu-docker/windows/src/vm/setup/install.bat
+++ b/libs/qemu-docker/windows/src/vm/setup/install.bat
@@ -0,0 +1,31 @@
+@echo off
+
+SET ScriptFolder=C:\OEM
+SET LogFile=%ScriptFolder%\ps_script_log.txt
+
+echo Running PowerShell script... > %LogFile%
+
+:: Check for PowerShell availability
+where powershell >> %LogFile% 2>&1
+if %ERRORLEVEL% neq 0 (
+    echo PowerShell is not available! >> %LogFile%
+    echo PowerShell is not available!
+    exit /b 1
+)
+
+:: Add a 30-second delay
+echo Waiting for 30 seconds before continuing... >> %LogFile%
+timeout /t 30 /nobreak >> %LogFile% 2>&1
+
+:: Run PowerShell script with ExecutionPolicy Bypass and log errors
+echo Running setup.ps1... >> %LogFile%
+
+powershell -ExecutionPolicy Bypass -File "%ScriptFolder%\setup.ps1" >> %LogFile% 2>&1
+
+if %ERRORLEVEL% neq 0 (
+    echo An error occurred. See %LogFile% for details.
+) else (
+    echo PowerShell script has completed successfully.
+)
+
+echo PowerShell script has completed.
--- a/Show More
+++ b/Show More