diff --git a/docs/content/docs/get-started/quickstart.mdx b/docs/content/docs/get-started/quickstart.mdx
index cea2b335..894856a2 100644
--- a/docs/content/docs/get-started/quickstart.mdx
+++ b/docs/content/docs/get-started/quickstart.mdx
@@ -332,253 +332,151 @@ Learn more about agents in [Agent Loops](/agent-sdk/agent-loops) and available m
## CLI Quickstart
+Get started quickly with the CUA CLI - the easiest way to manage cloud VMs and run AI agents.
+
-### Install Cua
+### Install the CUA CLI
-
-
-
-
-#### Install uv
-
-
-
-
-```bash
-# Use curl to download the script and execute it with sh:
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# If your system doesn't have curl, you can use wget:
-# wget -qO- https://astral.sh/uv/install.sh | sh
-```
-
-
-
-
-```powershell
-# Use irm to download the script and execute it with iex:
-powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
-```
-
-
+
+
+ ```bash
+ curl -LsSf https://cua.ai/cli/install.sh | sh
+ ```
+
+
+ ```powershell
+ powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
+ ```
+
+
+ ```bash
+ npm install -g @trycua/cli
+ ```
+
+
+ ```bash
+ # Install Bun (macOS/Linux)
+ curl -fsSL https://bun.sh/install | bash
+
+ # Install Bun (Windows)
+ # powershell -c "irm bun.sh/install.ps1|iex"
+
+ # Clone the repo
+ git clone https://github.com/trycua/cua
+ cd cua/libs/typescript/cua-cli
+
+ # Install the CLI
+ bun install
+ bun link
+ bun link cua-cli
+ ```
+
-#### Install Python 3.12
-
-```bash
-uv python install 3.12
-# uv will install Cua dependencies automatically when you use --with "cua-agent[cli]"
-```
-
-
-
-
-
-#### Install conda
-
-
-
-
-```bash
-mkdir -p ~/miniconda3
-curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh -o ~/miniconda3/miniconda.sh
-bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
-rm ~/miniconda3/miniconda.sh
-source ~/miniconda3/bin/activate
-```
-
-
-
-
-```bash
-mkdir -p ~/miniconda3
-wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
-bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
-rm ~/miniconda3/miniconda.sh
-source ~/miniconda3/bin/activate
-```
-
-
-
-
-```powershell
-wget "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe" -outfile ".\miniconda.exe"
-Start-Process -FilePath ".\miniconda.exe" -ArgumentList "/S" -Wait
-del .\miniconda.exe
-```
-
-
-
-
-#### Create and activate Python 3.12 environment
-
-```bash
-conda create -n cua python=3.12
-conda activate cua
-```
-
-#### Install Cua
-
-```bash
-pip install "cua-agent[cli]" cua-computer
-```
-
-
-
-
-
-#### Install Cua
-
-```bash
-pip install "cua-agent[cli]" cua-computer
-```
-
-
-
-
-
-### Run Cua CLI
+### Authenticate with CUA
-Choose your preferred AI model:
-
-#### OpenAI Computer Use Preview
-
-
-
+Login to your CUA account:
```bash
-uv run --with "cua-agent[cli]" -m agent.cli openai/computer-use-preview
+# Interactive browser login (recommended)
+cua auth login
+
+# Or provide your API key directly
+cua auth login --api-key sk-your-api-key-here
```
-
-
+If you don't have a CUA account yet, sign up at [cua.ai/signin](https://cua.ai/signin).
+
+
+
+
+
+### Create Your First VM
+
+Create a cloud sandbox where your AI agents will run:
```bash
-python -m agent.cli openai/computer-use-preview
+# Create a Linux VM (recommended for most use cases)
+cua vm create --os linux --configuration small --region north-america
+
+# Or create a Windows VM
+cua vm create --os windows --configuration small --region north-america
+
+# Or create a macOS VM
+cua vm create --os macos --configuration small --region north-america
```
-
-
+Your VM will be created and you'll see output like:
+```
+VM created and ready: my-vm-abc123
+Password: secure-password-here
+Host: my-vm-abc123.containers.cloud.trycua.com
+```
-#### Anthropic Claude
+
-
-
+
+### Start Using Your VM
+
+You can now interact with your VM in multiple ways:
+
+#### Option 1: Open the AI Playground (Recommended)
```bash
-uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-sonnet-4-5-20250929
-uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-20250514
-uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-1-20250805
-uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-sonnet-4-20250514
-uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-3-5-sonnet-20241022
+cua vm chat my-vm-abc123
```
+This opens the full CUA playground in your browser where you can chat with AI agents that control your VM.
-
-
-
+#### Option 2: Access VNC Desktop
```bash
-python -m agent.cli anthropic/claude-sonnet-4-5-20250929
-python -m agent.cli anthropic/claude-opus-4-1-20250805
-python -m agent.cli anthropic/claude-opus-4-20250514
-python -m agent.cli anthropic/claude-sonnet-4-20250514
-python -m agent.cli anthropic/claude-3-5-sonnet-20241022
+cua vm vnc my-vm-abc123
```
+This opens a remote desktop connection to your VM.
-
-
-
-#### Omniparser + LLMs
-
-
-
-
+#### Option 3: List and Manage VMs
```bash
-uv run --with "cua-agent[cli]" -m agent.cli omniparser+anthropic/claude-3-5-sonnet-20241022
-uv run --with "cua-agent[cli]" -m agent.cli omniparser+openai/gpt-4o
-uv run --with "cua-agent[cli]" -m agent.cli omniparser+vertex_ai/gemini-pro
+# List all your VMs
+cua vm list
+
+# Start/stop VMs as needed
+cua vm stop my-vm-abc123
+cua vm start my-vm-abc123
+
+# Delete VMs when done
+cua vm delete my-vm-abc123
```
-
-
+
-```bash
-python -m agent.cli omniparser+anthropic/claude-3-5-sonnet-20241022
-python -m agent.cli omniparser+openai/gpt-4o
-python -m agent.cli omniparser+vertex_ai/gemini-pro
-```
+
-
-
+### Try Some AI Tasks
-#### Local Models
-
-
-
-
-```bash
-# Hugging Face models (local)
-uv run --with "cua-agent[cli]" -m agent.cli huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
-
-# MLX models (Apple Silicon)
-uv run --with "cua-agent[cli]" -m agent.cli mlx/mlx-community/UI-TARS-1.5-7B-6bit
-
-# Ollama models
-uv run --with "cua-agent[cli]" -m agent.cli omniparser+ollama_chat/llama3.2:latest
-```
-
-
-
-
-```bash
-# Hugging Face models (local)
-python -m agent.cli huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
-
-# MLX models (Apple Silicon)
-python -m agent.cli mlx/mlx-community/UI-TARS-1.5-7B-6bit
-
-# Ollama models
-python -m agent.cli omniparser+ollama_chat/llama3.2:latest
-```
-
-
-
-
-#### Interactive Setup
-
-If you haven't set up environment variables, the CLI will guide you through the setup:
-
-1. **Sandbox Name**: Enter your Cua sandbox name (or get one at [cua.ai](https://cua.ai/))
-2. **CUA API Key**: Enter your Cua API key
-3. **Provider API Key**: Enter your AI provider API key (OpenAI, Anthropic, etc.)
-
-#### Start Chatting
-
-Once connected, you'll see:
-
-```
-π» Connected to your-container-name (model, agent_loop)
-Type 'exit' to quit.
-
->
-```
-
-You can ask your agent to perform actions like:
+Once you have the playground open (`cua vm chat`), try asking the AI to:
- "Take a screenshot and tell me what's on the screen"
-- "Open Firefox and go to github.com"
-- "Type 'Hello world' into the terminal"
-- "Close the current window"
-- "Click on the search button"
+- "Open Firefox and navigate to github.com"
+- "Create a new text file and write 'Hello World' in it"
+- "Install Python and run a simple script"
+- "Take a screenshot of the desktop"
+
+The AI agent will automatically control your VM to complete these tasks!
+### What's Next?
+
+- **Explore more commands**: Check out the [complete CLI reference](/libraries/cua-cli/commands)
+- **Learn about programming**: Try the [Developer Quickstart](#developer-quickstart) to build custom automations
+- **Join the community**: Get help in our [Discord community](https://discord.com/invite/mVnXXpdE85)
+
---
For running models locally, see [Running Models Locally](/agent-sdk/supported-model-providers/local-models).
diff --git a/docs/content/docs/libraries/cua-cli/commands.mdx b/docs/content/docs/libraries/cua-cli/commands.mdx
new file mode 100644
index 00000000..c60d5a12
--- /dev/null
+++ b/docs/content/docs/libraries/cua-cli/commands.mdx
@@ -0,0 +1,320 @@
+---
+title: Commands
+description: Complete reference for all CUA CLI commands
+---
+
+import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
+import { Callout } from 'fumadocs-ui/components/callout';
+
+## Overview
+
+The CUA CLI provides two main command groups:
+
+- **`cua auth`** - Authentication and API key management
+- **`cua vm`** - Virtual machine lifecycle management
+
+## Authentication Commands
+
+### `cua auth login`
+
+Authenticate with your CUA account using browser-based OAuth flow.
+
+```bash
+# Interactive browser login
+cua auth login
+
+# Direct API key login
+cua auth login --api-key sk-your-api-key-here
+```
+
+**Options:**
+- `--api-key ` - Provide API key directly instead of browser flow
+
+**Example:**
+```bash
+$ cua auth login
+Opening browser for CLI auth...
+API key saved
+```
+
+### `cua auth pull`
+
+Create or update a `.env` file in the current directory with your CUA API key.
+
+```bash
+cua auth pull
+```
+
+**Example:**
+```bash
+$ cua auth pull
+Wrote /path/to/your/project/.env
+```
+
+The generated `.env` file will contain:
+```
+CUA_API_KEY=sk-your-api-key-here
+```
+
+### `cua auth logout`
+
+Remove the stored API key from your system.
+
+```bash
+cua auth logout
+```
+
+**Example:**
+```bash
+$ cua auth logout
+Logged out
+```
+
+## Virtual Machine Commands
+
+### `cua vm list`
+
+List all your virtual machines with their current status.
+
+```bash
+cua vm list
+```
+
+**Example Output:**
+```
+βββββββββββββββββββ¬βββββββββββ¬βββββββββ¬ββββββββββββββββββ¬βββββββββββββββββββββββββββββββββββββββ
+β Name β Status β OS β Configuration β Host β
+βββββββββββββββββββΌβββββββββββΌβββββββββΌββββββββββββββββββΌβββββββββββββββββββββββββββββββββββββββ€
+β my-dev-vm β running β linux β small β my-dev-vm.containers.cloud.trycua.com β
+β test-windows β stopped β windowsβ medium β test-windows.containers.cloud.trycua.com β
+βββββββββββββββββββ΄βββββββββββ΄βββββββββ΄ββββββββββββββββββ΄βββββββββββββββββββββββββββββββββββββββ
+```
+
+### `cua vm create`
+
+Create a new virtual machine.
+
+```bash
+cua vm create --os --configuration --region
+```
+
+**Required Options:**
+- `--os` - Operating system: `linux`, `windows`, `macos`
+- `--configuration` - VM size: `small`, `medium`, `large`
+- `--region` - Region: `north-america`, `europe`, `asia-pacific`, `south-america`
+
+**Examples:**
+```bash
+# Create a small Linux VM in North America
+cua vm create --os linux --configuration small --region north-america
+
+# Create a medium Windows VM in Europe
+cua vm create --os windows --configuration medium --region europe
+
+# Create a large macOS VM in Asia Pacific
+cua vm create --os macos --configuration large --region asia-pacific
+```
+
+**Response Types:**
+
+**Immediate (Status 200):**
+```bash
+VM created and ready: my-new-vm-abc123
+Password: secure-password-here
+Host: my-new-vm-abc123.containers.cloud.trycua.com
+```
+
+**Provisioning (Status 202):**
+```bash
+VM provisioning started: my-new-vm-abc123
+Job ID: job-xyz789
+Use 'cua vm list' to monitor provisioning progress
+```
+
+### `cua vm start`
+
+Start a stopped virtual machine.
+
+```bash
+cua vm start
+```
+
+**Example:**
+```bash
+$ cua vm start my-dev-vm
+Start accepted
+```
+
+### `cua vm stop`
+
+Stop a running virtual machine.
+
+```bash
+cua vm stop
+```
+
+**Example:**
+```bash
+$ cua vm stop my-dev-vm
+stopping
+```
+
+### `cua vm restart`
+
+Restart a virtual machine.
+
+```bash
+cua vm restart
+```
+
+**Example:**
+```bash
+$ cua vm restart my-dev-vm
+restarting
+```
+
+### `cua vm delete`
+
+Delete a virtual machine permanently.
+
+```bash
+cua vm delete
+```
+
+**Example:**
+```bash
+$ cua vm delete old-test-vm
+VM deletion initiated: deleting
+```
+
+
+ This action is irreversible. All data on the VM will be permanently lost.
+
+
+### `cua vm vnc`
+
+Open the VNC interface for a VM in your browser.
+
+```bash
+cua vm vnc
+```
+
+**Example:**
+```bash
+$ cua vm vnc my-dev-vm
+Opening NoVNC: https://my-dev-vm.containers.cloud.trycua.com/vnc.html?autoconnect=true&password=...
+```
+
+This command automatically opens your default browser to the VNC interface with the correct password pre-filled.
+
+### `cua vm chat`
+
+Open the CUA Dashboard Playground for a VM in your browser.
+
+```bash
+cua vm chat
+```
+
+**Example:**
+```bash
+$ cua vm chat my-dev-vm
+Opening Playground: https://cua.ai/dashboard/playground?host=...
+```
+
+This opens the full CUA playground interface where you can interact with your VM using AI agents.
+
+## Global Options
+
+### Help
+
+Get help for any command:
+
+```bash
+cua --help
+cua auth --help
+cua vm --help
+cua vm create --help
+```
+
+### Environment Variables
+
+You can override default endpoints using environment variables:
+
+```bash
+# Use staging environment
+export CUA_API_BASE=https://api.staging.cua.ai
+export CUA_WEBSITE_URL=https://staging.cua.ai
+
+cua vm list # Uses staging API
+```
+
+**Available Variables:**
+- `CUA_API_BASE` - API endpoint (default: `https://api.cua.ai`)
+- `CUA_WEBSITE_URL` - Website URL (default: `https://cua.ai`)
+
+## Error Handling
+
+The CLI provides clear error messages for common issues:
+
+### Authentication Errors
+```bash
+$ cua vm list
+Unauthorized. Try 'cua auth login' again.
+```
+
+### VM Not Found
+```bash
+$ cua vm start nonexistent-vm
+VM not found
+```
+
+### Invalid Configuration
+```bash
+$ cua vm create --os invalid --configuration small --region north-america
+Invalid request or unsupported configuration
+```
+
+## Tips and Best Practices
+
+### 1. Use Descriptive VM Names
+```bash
+# Good
+cua vm create --os linux --configuration small --region north-america
+# Then rename or use meaningful names in the dashboard
+
+# Better workflow
+cua vm list # Check the generated name
+# Use that name consistently
+```
+
+### 2. Environment Management
+```bash
+# Set up your project with API key
+cd my-project
+cua auth pull
+# Now your project has CUA_API_KEY in .env
+```
+
+### 3. Quick VM Access
+```bash
+# Create aliases for frequently used VMs
+alias dev-vm="cua vm chat my-development-vm"
+alias prod-vm="cua vm vnc my-production-vm"
+```
+
+### 4. Monitoring Provisioning
+```bash
+# For VMs that need provisioning time
+cua vm create --os windows --configuration large --region europe
+# VM provisioning started: my-vm-abc123
+# Job ID: job-xyz789
+
+# Check status periodically
+watch -n 5 cua vm list
+```
+
+## Next Steps
+
+- [Get started with the quickstart guide](/get-started/quickstart#cli-quickstart)
+- [Learn about CUA computers](/computer-sdk/computers)
+- [Explore agent automation](/agent-sdk/agent-loops)
diff --git a/docs/content/docs/libraries/cua-cli/index.mdx b/docs/content/docs/libraries/cua-cli/index.mdx
new file mode 100644
index 00000000..3fe90c0f
--- /dev/null
+++ b/docs/content/docs/libraries/cua-cli/index.mdx
@@ -0,0 +1,58 @@
+---
+title: Cua CLI
+description: Command-line interface for managing Cua cloud VMs and authentication
+---
+
+import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
+
+The Cua CLI is a command-line tool that provides an intuitive interface for managing your Cua cloud virtual machines and authentication. It offers a streamlined workflow for creating, managing, and connecting to cloud sandboxes.
+
+## Key Features
+
+- **Authentication Management**: Secure login with browser-based OAuth flow
+- **VM Lifecycle**: Create, start, stop, restart, and delete cloud VMs
+- **Quick Access**: Direct links to VNC and playground interfaces
+- **Cross-Platform**: Works on macOS, Linux, and Windows
+- **Environment Integration**: Automatic `.env` file generation
+
+## Quick Example
+
+```bash
+# Install the CLI (installs Bun + CUA CLI)
+curl -LsSf https://cua.ai/cli/install.sh | sh
+
+# Login to your CUA account
+cua auth login
+
+# Create a new Linux VM
+cua vm create --os linux --configuration small --region north-america
+
+# List your VMs
+cua vm list
+
+# Open the playground for your VM
+cua vm chat my-vm-name
+```
+
+## Use Cases
+
+### Development Workflow
+- Quickly spin up cloud sandboxes for testing
+- Manage multiple VMs across different regions
+- Integrate with CI/CD pipelines
+
+### Team Collaboration
+- Share VM configurations and access
+- Standardize development environments
+- Quick onboarding for new team members
+
+### Automation
+- Script VM provisioning and management
+- Integrate with deployment workflows
+- Automate environment setup
+
+## Next Steps
+
+- [Install the CLI](/libraries/cua-cli/installation)
+- [Learn about available commands](/libraries/cua-cli/commands)
+- [Get started with the quickstart guide](/get-started/quickstart#cli-quickstart)
diff --git a/docs/content/docs/libraries/cua-cli/installation.mdx b/docs/content/docs/libraries/cua-cli/installation.mdx
new file mode 100644
index 00000000..5a2fd49b
--- /dev/null
+++ b/docs/content/docs/libraries/cua-cli/installation.mdx
@@ -0,0 +1,152 @@
+---
+title: Installation
+description: Install the CUA CLI on your system
+---
+
+import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
+import { Callout } from 'fumadocs-ui/components/callout';
+
+## Quick Install
+
+The fastest way to install the CUA CLI is using our installation scripts:
+
+
+
+ ```bash
+ curl -LsSf https://cua.ai/cli/install.sh | sh
+ ```
+
+
+ ```powershell
+ powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
+ ```
+
+
+
+These scripts will automatically:
+1. Install [Bun](https://bun.sh) (a fast JavaScript runtime)
+2. Install the CUA CLI via `bun add -g @trycua/cli`
+
+
+ The installation scripts will automatically detect your system and install the appropriate binary to your PATH.
+
+
+## Alternative: npm Install
+
+You can also install the CLI via npm if you prefer:
+
+```bash
+npm install -g @trycua/cli
+```
+
+
+ The npm package requires Node.js 18+ to be installed on your system.
+
+
+## Verify Installation
+
+After installation, verify the CLI is working:
+
+```bash
+cua --help
+```
+
+You should see the CLI help output with available commands.
+
+## First Time Setup
+
+After installation, you'll need to authenticate with your CUA account:
+
+```bash
+# Login with browser-based OAuth flow
+cua auth login
+
+# Or provide your API key directly
+cua auth login --api-key sk-your-api-key-here
+```
+
+## Updating
+
+To update to the latest version:
+
+
+
+ Re-run the installation script:
+ ```bash
+ # macOS/Linux
+ curl -LsSf https://cua.ai/cli/install.sh | sh
+
+ # Windows
+ powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
+ ```
+
+
+ ```bash
+ npm update -g @trycua/cli
+ ```
+
+
+
+## Uninstalling
+
+
+
+ Remove the binary from your PATH:
+ ```bash
+ # macOS/Linux
+ rm $(which cua)
+
+ # Windows
+ # Remove from your PATH or delete the executable
+ ```
+
+
+ ```bash
+ npm uninstall -g @trycua/cli
+ ```
+
+
+
+## Troubleshooting
+
+### Command Not Found
+
+If you get a "command not found" error after installation:
+
+1. **Check your PATH**: Make sure the installation directory is in your PATH
+2. **Restart your terminal**: Close and reopen your terminal/command prompt
+3. **Manual PATH setup**: Add the installation directory to your PATH manually
+
+### Permission Issues
+
+If you encounter permission issues during installation:
+
+
+
+ Try running with sudo (not recommended for the curl method):
+ ```bash
+ # If using npm
+ sudo npm install -g @trycua/cli
+ ```
+
+
+ Run PowerShell as Administrator:
+ ```powershell
+ # Right-click PowerShell and "Run as Administrator"
+ powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
+ ```
+
+
+
+### Network Issues
+
+If the installation script fails due to network issues:
+
+1. **Check your internet connection**
+2. **Try the npm installation method instead**
+3. **Check if your firewall is blocking the download**
+
+## Next Steps
+
+- [Learn about CLI commands](/libraries/cua-cli/commands)
+- [Follow the quickstart guide](/get-started/quickstart#cli-quickstart)
diff --git a/docs/content/docs/libraries/cua-cli/meta.json b/docs/content/docs/libraries/cua-cli/meta.json
new file mode 100644
index 00000000..8c40c41d
--- /dev/null
+++ b/docs/content/docs/libraries/cua-cli/meta.json
@@ -0,0 +1,9 @@
+{
+ "title": "CLI",
+ "description": "Command-line interface for CUA",
+ "pages": [
+ "index",
+ "installation",
+ "commands"
+ ]
+}
\ No newline at end of file
diff --git a/libs/python/agent/.bumpversion.cfg b/libs/python/agent/.bumpversion.cfg
index ab6acb97..ef4bfda4 100644
--- a/libs/python/agent/.bumpversion.cfg
+++ b/libs/python/agent/.bumpversion.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 0.4.37
+current_version = 0.4.38
commit = True
tag = True
tag_name = agent-v{new_version}
diff --git a/libs/python/agent/agent/loops/__init__.py b/libs/python/agent/agent/loops/__init__.py
index ab23ac27..1fdb2c22 100644
--- a/libs/python/agent/agent/loops/__init__.py
+++ b/libs/python/agent/agent/loops/__init__.py
@@ -1,36 +1,40 @@
-"""
-Agent loops for agent
-"""
-
-# Import the loops to register them
-from . import (
- anthropic,
- composed_grounded,
- gemini,
- glm45v,
- gta1,
- holo,
- internvl,
- moondream3,
- omniparser,
- openai,
- opencua,
- qwen,
- uitars,
-)
-
-__all__ = [
- "anthropic",
- "openai",
- "uitars",
- "omniparser",
- "gta1",
- "composed_grounded",
- "glm45v",
- "opencua",
- "internvl",
- "holo",
- "moondream3",
- "gemini",
- "qwen",
-]
+"""
+Agent loops for agent
+"""
+
+# Import the loops to register them
+from . import (
+ anthropic,
+ composed_grounded,
+ gelato,
+ gemini,
+ glm45v,
+ gta1,
+ holo,
+ internvl,
+ moondream3,
+ omniparser,
+ openai,
+ opencua,
+ qwen,
+ uiins,
+ uitars,
+)
+
+__all__ = [
+ "anthropic",
+ "openai",
+ "uitars",
+ "omniparser",
+ "gta1",
+ "composed_grounded",
+ "glm45v",
+ "opencua",
+ "internvl",
+ "holo",
+ "moondream3",
+ "gemini",
+ "qwen",
+ "uiins",
+ "gelato",
+]
diff --git a/libs/python/agent/agent/loops/gelato.py b/libs/python/agent/agent/loops/gelato.py
new file mode 100644
index 00000000..e3032472
--- /dev/null
+++ b/libs/python/agent/agent/loops/gelato.py
@@ -0,0 +1,183 @@
+"""
+Gelato agent loop implementation for click prediction using litellm.acompletion
+Model: https://huggingface.co/mlfoundations/Gelato-30B-A3B
+Code: https://github.com/mlfoundations/Gelato/tree/main
+"""
+
+import base64
+import math
+import re
+from io import BytesIO
+from typing import Any, Dict, List, Optional, Tuple
+
+import litellm
+from PIL import Image
+
+from ..decorators import register_agent
+from ..loops.base import AsyncAgentConfig
+from ..types import AgentCapability
+
+SYSTEM_PROMPT = """
+You are an expert UI element locator. Given a GUI image and a user's element description, provide the coordinates of the specified element as a single (x,y) point. For elements with area, return the center point.
+
+Output the coordinate pair exactly:
+(x,y)
+"""
+
+
+def extract_coordinates(raw_string):
+ """
+ Extract the coordinates from the raw string.
+ Args:
+ raw_string: str (e.g. "(100, 200)")
+ Returns:
+ x: float (e.g. 100.0)
+ y: float (e.g. 200.0)
+ """
+ try:
+ matches = re.findall(r"\((-?\d*\.?\d+),\s*(-?\d*\.?\d+)\)", raw_string)
+ return [tuple(map(int, match)) for match in matches][0]
+ except:
+ return 0, 0
+
+
+def smart_resize(
+ height: int,
+ width: int,
+ factor: int = 28,
+ min_pixels: int = 3136,
+ max_pixels: int = 8847360,
+) -> Tuple[int, int]:
+ """Smart resize function similar to qwen_vl_utils."""
+ # Calculate the total pixels
+ total_pixels = height * width
+
+ # If already within bounds, return original dimensions
+ if min_pixels <= total_pixels <= max_pixels:
+ # Round to nearest factor
+ new_height = (height // factor) * factor
+ new_width = (width // factor) * factor
+ return new_height, new_width
+
+ # Calculate scaling factor
+ if total_pixels > max_pixels:
+ scale = (max_pixels / total_pixels) ** 0.5
+ else:
+ scale = (min_pixels / total_pixels) ** 0.5
+
+ # Apply scaling
+ new_height = int(height * scale)
+ new_width = int(width * scale)
+
+ # Round to nearest factor
+ new_height = (new_height // factor) * factor
+ new_width = (new_width // factor) * factor
+
+ # Ensure minimum size
+ new_height = max(new_height, factor)
+ new_width = max(new_width, factor)
+
+ return new_height, new_width
+
+
+@register_agent(models=r".*Gelato.*")
+class GelatoConfig(AsyncAgentConfig):
+ """Gelato agent configuration implementing AsyncAgentConfig protocol for click prediction."""
+
+ def __init__(self):
+ self.current_model = None
+ self.last_screenshot_b64 = None
+
+ async def predict_step(
+ self,
+ messages: List[Dict[str, Any]],
+ model: str,
+ tools: Optional[List[Dict[str, Any]]] = None,
+ max_retries: Optional[int] = None,
+ stream: bool = False,
+ computer_handler=None,
+ _on_api_start=None,
+ _on_api_end=None,
+ _on_usage=None,
+ _on_screenshot=None,
+ **kwargs,
+ ) -> Dict[str, Any]:
+ raise NotImplementedError()
+
+ async def predict_click(
+ self, model: str, image_b64: str, instruction: str, **kwargs
+ ) -> Optional[Tuple[float, float]]:
+ """
+ Predict click coordinates using UI-Ins model via litellm.acompletion.
+
+ Args:
+ model: The UI-Ins model name
+ image_b64: Base64 encoded image
+ instruction: Instruction for where to click
+
+ Returns:
+ Tuple of (x, y) coordinates or None if prediction fails
+ """
+ # Decode base64 image
+ image_data = base64.b64decode(image_b64)
+ image = Image.open(BytesIO(image_data))
+ width, height = image.width, image.height
+
+ # Smart resize the image (similar to qwen_vl_utils)
+ resized_height, resized_width = smart_resize(
+ height,
+ width,
+ factor=28, # Default factor for Qwen models
+ min_pixels=3136,
+ max_pixels=4096 * 2160,
+ )
+ resized_image = image.resize((resized_width, resized_height))
+ scale_x, scale_y = width / resized_width, height / resized_height
+
+ # Convert resized image back to base64
+ buffered = BytesIO()
+ resized_image.save(buffered, format="PNG")
+ resized_image_b64 = base64.b64encode(buffered.getvalue()).decode()
+
+ # Prepare system and user messages
+ system_message = {
+ "role": "system",
+ "content": [{"type": "text", "text": SYSTEM_PROMPT.strip()}],
+ }
+
+ user_message = {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {"url": f"data:image/png;base64,{resized_image_b64}"},
+ },
+ {"type": "text", "text": instruction},
+ ],
+ }
+
+ # Prepare API call kwargs
+ api_kwargs = {
+ "model": model,
+ "messages": [system_message, user_message],
+ "max_tokens": 2056,
+ "temperature": 0.0,
+ **kwargs,
+ }
+
+ # Use liteLLM acompletion
+ response = await litellm.acompletion(**api_kwargs)
+
+ # Extract response text
+ output_text = response.choices[0].message.content # type: ignore
+
+ # Extract and rescale coordinates
+ pred_x, pred_y = extract_coordinates(output_text) # type: ignore
+ pred_x *= scale_x
+ pred_y *= scale_y
+
+ return (math.floor(pred_x), math.floor(pred_y))
+
+ def get_capabilities(self) -> List[AgentCapability]:
+ """Return the capabilities supported by this agent."""
+ return ["click"]
diff --git a/libs/python/agent/agent/loops/uiins.py b/libs/python/agent/agent/loops/uiins.py
new file mode 100644
index 00000000..10956948
--- /dev/null
+++ b/libs/python/agent/agent/loops/uiins.py
@@ -0,0 +1,175 @@
+"""
+UI-Ins agent loop implementation for click prediction using litellm.acompletion
+Paper: https://arxiv.org/pdf/2510.202861
+Code: https://github.com/alibaba/UI-Ins
+"""
+
+import asyncio
+import base64
+import json
+import math
+import re
+import uuid
+from io import BytesIO
+from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
+
+import litellm
+from PIL import Image
+
+from ..decorators import register_agent
+from ..loops.base import AsyncAgentConfig
+from ..types import AgentCapability, AgentResponse, Messages, Tools
+
+SYSTEM_PROMPT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.\n\n## Output Format\nReturn a json object with a reasoning process in tags, a function name and arguments within XML tags:\n```\n\n...\n\n\n{"name": "grounding", "arguments": }\n\n```\n represents the following item of the action space:\n## Action Space{"action": "click", "coordinate": [x, y]}\nYour task is to accurately locate a UI element based on the instruction. You should first analyze instruction in tags and finally output the function in tags.\n"""
+
+
+def parse_coordinates(raw_string: str) -> tuple[int, int]:
+ matches = re.findall(r"\[(\d+),\s*(\d+)\]", raw_string)
+ if matches:
+ return tuple(map(int, matches[0]))
+ return -1, -1
+
+
+def smart_resize(
+ height: int,
+ width: int,
+ factor: int = 28,
+ min_pixels: int = 3136,
+ max_pixels: int = 8847360,
+) -> Tuple[int, int]:
+ """Smart resize function similar to qwen_vl_utils."""
+ # Calculate the total pixels
+ total_pixels = height * width
+
+ # If already within bounds, return original dimensions
+ if min_pixels <= total_pixels <= max_pixels:
+ # Round to nearest factor
+ new_height = (height // factor) * factor
+ new_width = (width // factor) * factor
+ return new_height, new_width
+
+ # Calculate scaling factor
+ if total_pixels > max_pixels:
+ scale = (max_pixels / total_pixels) ** 0.5
+ else:
+ scale = (min_pixels / total_pixels) ** 0.5
+
+ # Apply scaling
+ new_height = int(height * scale)
+ new_width = int(width * scale)
+
+ # Round to nearest factor
+ new_height = (new_height // factor) * factor
+ new_width = (new_width // factor) * factor
+
+ # Ensure minimum size
+ new_height = max(new_height, factor)
+ new_width = max(new_width, factor)
+
+ return new_height, new_width
+
+
+@register_agent(models=r".*UI-Ins.*")
+class UIInsConfig(AsyncAgentConfig):
+ """UI-Ins agent configuration implementing AsyncAgentConfig protocol for click prediction."""
+
+ def __init__(self):
+ self.current_model = None
+ self.last_screenshot_b64 = None
+
+ async def predict_step(
+ self,
+ messages: List[Dict[str, Any]],
+ model: str,
+ tools: Optional[List[Dict[str, Any]]] = None,
+ max_retries: Optional[int] = None,
+ stream: bool = False,
+ computer_handler=None,
+ _on_api_start=None,
+ _on_api_end=None,
+ _on_usage=None,
+ _on_screenshot=None,
+ **kwargs,
+ ) -> Dict[str, Any]:
+ raise NotImplementedError()
+
+ async def predict_click(
+ self, model: str, image_b64: str, instruction: str, **kwargs
+ ) -> Optional[Tuple[float, float]]:
+ """
+ Predict click coordinates using UI-Ins model via litellm.acompletion.
+
+ Args:
+ model: The UI-Ins model name
+ image_b64: Base64 encoded image
+ instruction: Instruction for where to click
+
+ Returns:
+ Tuple of (x, y) coordinates or None if prediction fails
+ """
+ # Decode base64 image
+ image_data = base64.b64decode(image_b64)
+ image = Image.open(BytesIO(image_data))
+ width, height = image.width, image.height
+
+ # Smart resize the image (similar to qwen_vl_utils)
+ resized_height, resized_width = smart_resize(
+ height,
+ width,
+ factor=28, # Default factor for Qwen models
+ min_pixels=3136,
+ max_pixels=4096 * 2160,
+ )
+ resized_image = image.resize((resized_width, resized_height))
+ scale_x, scale_y = width / resized_width, height / resized_height
+
+ # Convert resized image back to base64
+ buffered = BytesIO()
+ resized_image.save(buffered, format="PNG")
+ resized_image_b64 = base64.b64encode(buffered.getvalue()).decode()
+
+ # Prepare system and user messages
+ system_message = {
+ "role": "system",
+ "content": [
+ {"type": "text", "text": "You are a helpful assistant."},
+ {"type": "text", "text": SYSTEM_PROMPT},
+ ],
+ }
+
+ user_message = {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {"url": f"data:image/png;base64,{resized_image_b64}"},
+ },
+ {"type": "text", "text": instruction},
+ ],
+ }
+
+ # Prepare API call kwargs
+ api_kwargs = {
+ "model": model,
+ "messages": [system_message, user_message],
+ "max_tokens": 2056,
+ "temperature": 0.0,
+ **kwargs,
+ }
+
+ # Use liteLLM acompletion
+ response = await litellm.acompletion(**api_kwargs)
+
+ # Extract response text
+ output_text = response.choices[0].message.content # type: ignore
+
+ # Extract and rescale coordinates
+ pred_x, pred_y = parse_coordinates(output_text) # type: ignore
+ pred_x *= scale_x
+ pred_y *= scale_y
+
+ return (math.floor(pred_x), math.floor(pred_y))
+
+ def get_capabilities(self) -> List[AgentCapability]:
+ """Return the capabilities supported by this agent."""
+ return ["click"]
diff --git a/libs/python/agent/pyproject.toml b/libs/python/agent/pyproject.toml
index fbb4bc9b..e240e4ff 100644
--- a/libs/python/agent/pyproject.toml
+++ b/libs/python/agent/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "pdm.backend"
[project]
name = "cua-agent"
-version = "0.4.37"
+version = "0.4.38"
description = "CUA (Computer Use) Agent for AI-driven computer interaction"
readme = "README.md"
authors = [