Merge branch 'improve-docs-home' of https://github.com/trycua/cua into improve-docs-home

2026-05-14 12:28:59 -05:00 · 2025-11-12 19:04:00 +01:00
parent 925d2bf607 046b33c3ca
commit dffcf176f6
10 changed files with 1044 additions and 245 deletions
@@ -332,253 +332,151 @@ Learn more about agents in [Agent Loops](/agent-sdk/agent-loops) and available m

 ## CLI Quickstart

+Get started quickly with the CUA CLI - the easiest way to manage cloud VMs and run AI agents.
+
 <Steps>
 <Step>

-### Install Cua
+### Install the CUA CLI

-<Accordions type="single" defaultValue="uv">
-
-<Accordion title="uv (Recommended)" value="uv">
-
-#### Install uv
-
-<Tabs items={['macOS / Linux', 'Windows']} persist>
-<Tab value="macOS / Linux">
-
-```bash
-# Use curl to download the script and execute it with sh:
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# If your system doesn't have curl, you can use wget:
-# wget -qO- https://astral.sh/uv/install.sh | sh
-```
-
-</Tab>
-<Tab value="Windows">
-
-```powershell
-# Use irm to download the script and execute it with iex:
-powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
-```
-
-</Tab>
+<Tabs items={['macOS / Linux', 'Windows', 'npm (Alternative)', 'From Source']}>
+  <Tab value="macOS / Linux">
+    ```bash
+    curl -LsSf https://cua.ai/cli/install.sh | sh
+    ```
+  </Tab>
+  <Tab value="Windows">
+    ```powershell
+    powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
+    ```
+  </Tab>
+  <Tab value="npm (Alternative)">
+    ```bash
+    npm install -g @trycua/cli
+    ```
+  </Tab>
+  <Tab value="From Source">
+    ```bash
+    # Install Bun (macOS/Linux)
+    curl -fsSL https://bun.sh/install | bash
+    
+    # Install Bun (Windows)
+    # powershell -c "irm bun.sh/install.ps1|iex"
+    
+    # Clone the repo
+    git clone https://github.com/trycua/cua
+    cd cua/libs/typescript/cua-cli
+    
+    # Install the CLI
+    bun install
+    bun link
+    bun link cua-cli
+    ```
+  </Tab>
 </Tabs>

-#### Install Python 3.12
-
-```bash
-uv python install 3.12
-# uv will install Cua dependencies automatically when you use --with "cua-agent[cli]"
-```
-
-</Accordion>
-
-<Accordion title="conda" value="conda">
-
-#### Install conda
-
-<Tabs items={['macOS', 'Linux', 'Windows']} persist>
-<Tab value="macOS">
-
-```bash
-mkdir -p ~/miniconda3
-curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh -o ~/miniconda3/miniconda.sh
-bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
-rm ~/miniconda3/miniconda.sh
-source ~/miniconda3/bin/activate
-```
-
-</Tab>
-<Tab value="Linux">
-
-```bash
-mkdir -p ~/miniconda3
-wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
-bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
-rm ~/miniconda3/miniconda.sh
-source ~/miniconda3/bin/activate
-```
-
-</Tab>
-<Tab value="Windows">
-
-```powershell
-wget "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe" -outfile ".\miniconda.exe"
-Start-Process -FilePath ".\miniconda.exe" -ArgumentList "/S" -Wait
-del .\miniconda.exe
-```
-
-</Tab>
-</Tabs>
-
-#### Create and activate Python 3.12 environment
-
-```bash
-conda create -n cua python=3.12
-conda activate cua
-```
-
-#### Install Cua
-
-```bash
-pip install "cua-agent[cli]" cua-computer
-```
-
-</Accordion>
-
-<Accordion title="pip" value="pip">
-
-#### Install Cua
-
-```bash
-pip install "cua-agent[cli]" cua-computer
-```
-
-</Accordion>
-
-</Accordions>
-
 </Step>

 <Step>

-### Run Cua CLI
+### Authenticate with CUA

-Choose your preferred AI model:
-
-#### OpenAI Computer Use Preview
-
-<Tabs items={['uv', 'conda/pip']} persist>
-<Tab value="uv">
+Login to your CUA account:

 ```bash
-uv run --with "cua-agent[cli]" -m agent.cli openai/computer-use-preview
+# Interactive browser login (recommended)
+cua auth login
+
+# Or provide your API key directly
+cua auth login --api-key sk-your-api-key-here
 ```

-</Tab>
-<Tab value="conda/pip">
+If you don't have a CUA account yet, sign up at [cua.ai/signin](https://cua.ai/signin).
+
+</Step>
+
+<Step>
+
+### Create Your First VM
+
+Create a cloud sandbox where your AI agents will run:

 ```bash
-python -m agent.cli openai/computer-use-preview
+# Create a Linux VM (recommended for most use cases)
+cua vm create --os linux --configuration small --region north-america
+
+# Or create a Windows VM
+cua vm create --os windows --configuration small --region north-america
+
+# Or create a macOS VM
+cua vm create --os macos --configuration small --region north-america
 ```

-</Tab>
-</Tabs>
+Your VM will be created and you'll see output like:
+```
+VM created and ready: my-vm-abc123
+Password: secure-password-here
+Host: my-vm-abc123.containers.cloud.trycua.com
+```

-#### Anthropic Claude
+</Step>

-<Tabs items={['uv', 'conda/pip']} persist>
-<Tab value="uv">
+<Step>

+### Start Using Your VM
+
+You can now interact with your VM in multiple ways:
+
+#### Option 1: Open the AI Playground (Recommended)
 ```bash
-uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-sonnet-4-5-20250929
-uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-20250514
-uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-1-20250805
-uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-sonnet-4-20250514
-uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-3-5-sonnet-20241022
+cua vm chat my-vm-abc123
 ```
+This opens the full CUA playground in your browser where you can chat with AI agents that control your VM.

-</Tab>
-<Tab value="conda/pip">
-
+#### Option 2: Access VNC Desktop
 ```bash
-python -m agent.cli anthropic/claude-sonnet-4-5-20250929
-python -m agent.cli anthropic/claude-opus-4-1-20250805
-python -m agent.cli anthropic/claude-opus-4-20250514
-python -m agent.cli anthropic/claude-sonnet-4-20250514
-python -m agent.cli anthropic/claude-3-5-sonnet-20241022
+cua vm vnc my-vm-abc123
 ```
+This opens a remote desktop connection to your VM.

-</Tab>
-</Tabs>
-
-#### Omniparser + LLMs
-
-<Tabs items={['uv', 'conda/pip']} persist>
-<Tab value="uv">
-
+#### Option 3: List and Manage VMs
 ```bash
-uv run --with "cua-agent[cli]" -m agent.cli omniparser+anthropic/claude-3-5-sonnet-20241022
-uv run --with "cua-agent[cli]" -m agent.cli omniparser+openai/gpt-4o
-uv run --with "cua-agent[cli]" -m agent.cli omniparser+vertex_ai/gemini-pro
+# List all your VMs
+cua vm list
+
+# Start/stop VMs as needed
+cua vm stop my-vm-abc123
+cua vm start my-vm-abc123
+
+# Delete VMs when done
+cua vm delete my-vm-abc123
 ```

-</Tab>
-<Tab value="conda/pip">
+</Step>

-```bash
-python -m agent.cli omniparser+anthropic/claude-3-5-sonnet-20241022
-python -m agent.cli omniparser+openai/gpt-4o
-python -m agent.cli omniparser+vertex_ai/gemini-pro
-```
+<Step>

-</Tab>
-</Tabs>
+### Try Some AI Tasks

-#### Local Models
-
-<Tabs items={['uv', 'conda/pip']} persist>
-<Tab value="uv">
-
-```bash
-# Hugging Face models (local)
-uv run --with "cua-agent[cli]" -m agent.cli huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
-
-# MLX models (Apple Silicon)
-uv run --with "cua-agent[cli]" -m agent.cli mlx/mlx-community/UI-TARS-1.5-7B-6bit
-
-# Ollama models
-uv run --with "cua-agent[cli]" -m agent.cli omniparser+ollama_chat/llama3.2:latest
-```
-
-</Tab>
-<Tab value="conda/pip">
-
-```bash
-# Hugging Face models (local)
-python -m agent.cli huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
-
-# MLX models (Apple Silicon)
-python -m agent.cli mlx/mlx-community/UI-TARS-1.5-7B-6bit
-
-# Ollama models
-python -m agent.cli omniparser+ollama_chat/llama3.2:latest
-```
-
-</Tab>
-</Tabs>
-
-#### Interactive Setup
-
-If you haven't set up environment variables, the CLI will guide you through the setup:
-
-1. **Sandbox Name**: Enter your Cua sandbox name (or get one at [cua.ai](https://cua.ai/))
-2. **CUA API Key**: Enter your Cua API key
-3. **Provider API Key**: Enter your AI provider API key (OpenAI, Anthropic, etc.)
-
-#### Start Chatting
-
-Once connected, you'll see:
-
-```
-💻 Connected to your-container-name (model, agent_loop)
-Type 'exit' to quit.
-
->
-```
-
-You can ask your agent to perform actions like:
+Once you have the playground open (`cua vm chat`), try asking the AI to:

 - "Take a screenshot and tell me what's on the screen"
- "Open Firefox and go to github.com"
- "Type 'Hello world' into the terminal"
- "Close the current window"
- "Click on the search button"
+- "Open Firefox and navigate to github.com"
+- "Create a new text file and write 'Hello World' in it"
+- "Install Python and run a simple script"
+- "Take a screenshot of the desktop"
+
+The AI agent will automatically control your VM to complete these tasks!

 </Step>
 </Steps>

+### What's Next?
+
+- **Explore more commands**: Check out the [complete CLI reference](/libraries/cua-cli/commands)
+- **Learn about programming**: Try the [Developer Quickstart](#developer-quickstart) to build custom automations
+- **Join the community**: Get help in our [Discord community](https://discord.com/invite/mVnXXpdE85)
+
 ---

 For running models locally, see [Running Models Locally](/agent-sdk/supported-model-providers/local-models).
@@ -0,0 +1,320 @@
+---
+title: Commands
+description: Complete reference for all CUA CLI commands
+---
+
+import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
+import { Callout } from 'fumadocs-ui/components/callout';
+
+## Overview
+
+The CUA CLI provides two main command groups:
+
+- **`cua auth`** - Authentication and API key management
+- **`cua vm`** - Virtual machine lifecycle management
+
+## Authentication Commands
+
+### `cua auth login`
+
+Authenticate with your CUA account using browser-based OAuth flow.
+
+```bash
+# Interactive browser login
+cua auth login
+
+# Direct API key login
+cua auth login --api-key sk-your-api-key-here
+```
+
+**Options:**
+- `--api-key <key>` - Provide API key directly instead of browser flow
+
+**Example:**
+```bash
+$ cua auth login
+Opening browser for CLI auth...
+API key saved
+```
+
+### `cua auth pull`
+
+Create or update a `.env` file in the current directory with your CUA API key.
+
+```bash
+cua auth pull
+```
+
+**Example:**
+```bash
+$ cua auth pull
+Wrote /path/to/your/project/.env
+```
+
+The generated `.env` file will contain:
+```
+CUA_API_KEY=sk-your-api-key-here
+```
+
+### `cua auth logout`
+
+Remove the stored API key from your system.
+
+```bash
+cua auth logout
+```
+
+**Example:**
+```bash
+$ cua auth logout
+Logged out
+```
+
+## Virtual Machine Commands
+
+### `cua vm list`
+
+List all your virtual machines with their current status.
+
+```bash
+cua vm list
+```
+
+**Example Output:**
+```
+┌─────────────────┬──────────┬────────┬─────────────────┬──────────────────────────────────────┐
+│ Name            │ Status   │ OS     │ Configuration   │ Host                                 │
+├─────────────────┼──────────┼────────┼─────────────────┼──────────────────────────────────────┤
+│ my-dev-vm       │ running  │ linux  │ small          │ my-dev-vm.containers.cloud.trycua.com │
+│ test-windows    │ stopped  │ windows│ medium         │ test-windows.containers.cloud.trycua.com │
+└─────────────────┴──────────┴────────┴─────────────────┴──────────────────────────────────────┘
+```
+
+### `cua vm create`
+
+Create a new virtual machine.
+
+```bash
+cua vm create --os <OS> --configuration <SIZE> --region <REGION>
+```
+
+**Required Options:**
+- `--os` - Operating system: `linux`, `windows`, `macos`
+- `--configuration` - VM size: `small`, `medium`, `large`
+- `--region` - Region: `north-america`, `europe`, `asia-pacific`, `south-america`
+
+**Examples:**
+```bash
+# Create a small Linux VM in North America
+cua vm create --os linux --configuration small --region north-america
+
+# Create a medium Windows VM in Europe
+cua vm create --os windows --configuration medium --region europe
+
+# Create a large macOS VM in Asia Pacific
+cua vm create --os macos --configuration large --region asia-pacific
+```
+
+**Response Types:**
+
+**Immediate (Status 200):**
+```bash
+VM created and ready: my-new-vm-abc123
+Password: secure-password-here
+Host: my-new-vm-abc123.containers.cloud.trycua.com
+```
+
+**Provisioning (Status 202):**
+```bash
+VM provisioning started: my-new-vm-abc123
+Job ID: job-xyz789
+Use 'cua vm list' to monitor provisioning progress
+```
+
+### `cua vm start`
+
+Start a stopped virtual machine.
+
+```bash
+cua vm start <name>
+```
+
+**Example:**
+```bash
+$ cua vm start my-dev-vm
+Start accepted
+```
+
+### `cua vm stop`
+
+Stop a running virtual machine.
+
+```bash
+cua vm stop <name>
+```
+
+**Example:**
+```bash
+$ cua vm stop my-dev-vm
+stopping
+```
+
+### `cua vm restart`
+
+Restart a virtual machine.
+
+```bash
+cua vm restart <name>
+```
+
+**Example:**
+```bash
+$ cua vm restart my-dev-vm
+restarting
+```
+
+### `cua vm delete`
+
+Delete a virtual machine permanently.
+
+```bash
+cua vm delete <name>
+```
+
+**Example:**
+```bash
+$ cua vm delete old-test-vm
+VM deletion initiated: deleting
+```
+
+<Callout type="warn">
+  This action is irreversible. All data on the VM will be permanently lost.
+</Callout>
+
+### `cua vm vnc`
+
+Open the VNC interface for a VM in your browser.
+
+```bash
+cua vm vnc <name>
+```
+
+**Example:**
+```bash
+$ cua vm vnc my-dev-vm
+Opening NoVNC: https://my-dev-vm.containers.cloud.trycua.com/vnc.html?autoconnect=true&password=...
+```
+
+This command automatically opens your default browser to the VNC interface with the correct password pre-filled.
+
+### `cua vm chat`
+
+Open the CUA Dashboard Playground for a VM in your browser.
+
+```bash
+cua vm chat <name>
+```
+
+**Example:**
+```bash
+$ cua vm chat my-dev-vm
+Opening Playground: https://cua.ai/dashboard/playground?host=...
+```
+
+This opens the full CUA playground interface where you can interact with your VM using AI agents.
+
+## Global Options
+
+### Help
+
+Get help for any command:
+
+```bash
+cua --help
+cua auth --help
+cua vm --help
+cua vm create --help
+```
+
+### Environment Variables
+
+You can override default endpoints using environment variables:
+
+```bash
+# Use staging environment
+export CUA_API_BASE=https://api.staging.cua.ai
+export CUA_WEBSITE_URL=https://staging.cua.ai
+
+cua vm list  # Uses staging API
+```
+
+**Available Variables:**
+- `CUA_API_BASE` - API endpoint (default: `https://api.cua.ai`)
+- `CUA_WEBSITE_URL` - Website URL (default: `https://cua.ai`)
+
+## Error Handling
+
+The CLI provides clear error messages for common issues:
+
+### Authentication Errors
+```bash
+$ cua vm list
+Unauthorized. Try 'cua auth login' again.
+```
+
+### VM Not Found
+```bash
+$ cua vm start nonexistent-vm
+VM not found
+```
+
+### Invalid Configuration
+```bash
+$ cua vm create --os invalid --configuration small --region north-america
+Invalid request or unsupported configuration
+```
+
+## Tips and Best Practices
+
+### 1. Use Descriptive VM Names
+```bash
+# Good
+cua vm create --os linux --configuration small --region north-america
+# Then rename or use meaningful names in the dashboard
+
+# Better workflow
+cua vm list  # Check the generated name
+# Use that name consistently
+```
+
+### 2. Environment Management
+```bash
+# Set up your project with API key
+cd my-project
+cua auth pull
+# Now your project has CUA_API_KEY in .env
+```
+
+### 3. Quick VM Access
+```bash
+# Create aliases for frequently used VMs
+alias dev-vm="cua vm chat my-development-vm"
+alias prod-vm="cua vm vnc my-production-vm"
+```
+
+### 4. Monitoring Provisioning
+```bash
+# For VMs that need provisioning time
+cua vm create --os windows --configuration large --region europe
+# VM provisioning started: my-vm-abc123
+# Job ID: job-xyz789
+
+# Check status periodically
+watch -n 5 cua vm list
+```
+
+## Next Steps
+
+- [Get started with the quickstart guide](/get-started/quickstart#cli-quickstart)
+- [Learn about CUA computers](/computer-sdk/computers)
+- [Explore agent automation](/agent-sdk/agent-loops)
@@ -0,0 +1,58 @@
+---
+title: Cua CLI
+description: Command-line interface for managing Cua cloud VMs and authentication
+---
+
+import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
+
+The Cua CLI is a command-line tool that provides an intuitive interface for managing your Cua cloud virtual machines and authentication. It offers a streamlined workflow for creating, managing, and connecting to cloud sandboxes.
+
+## Key Features
+
+- **Authentication Management**: Secure login with browser-based OAuth flow
+- **VM Lifecycle**: Create, start, stop, restart, and delete cloud VMs
+- **Quick Access**: Direct links to VNC and playground interfaces
+- **Cross-Platform**: Works on macOS, Linux, and Windows
+- **Environment Integration**: Automatic `.env` file generation
+
+## Quick Example
+
+```bash
+# Install the CLI (installs Bun + CUA CLI)
+curl -LsSf https://cua.ai/cli/install.sh | sh
+
+# Login to your CUA account
+cua auth login
+
+# Create a new Linux VM
+cua vm create --os linux --configuration small --region north-america
+
+# List your VMs
+cua vm list
+
+# Open the playground for your VM
+cua vm chat my-vm-name
+```
+
+## Use Cases
+
+### Development Workflow
+- Quickly spin up cloud sandboxes for testing
+- Manage multiple VMs across different regions
+- Integrate with CI/CD pipelines
+
+### Team Collaboration
+- Share VM configurations and access
+- Standardize development environments
+- Quick onboarding for new team members
+
+### Automation
+- Script VM provisioning and management
+- Integrate with deployment workflows
+- Automate environment setup
+
+## Next Steps
+
+- [Install the CLI](/libraries/cua-cli/installation)
+- [Learn about available commands](/libraries/cua-cli/commands)
+- [Get started with the quickstart guide](/get-started/quickstart#cli-quickstart)
@@ -0,0 +1,152 @@
+---
+title: Installation
+description: Install the CUA CLI on your system
+---
+
+import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
+import { Callout } from 'fumadocs-ui/components/callout';
+
+## Quick Install
+
+The fastest way to install the CUA CLI is using our installation scripts:
+
+<Tabs items={['macOS / Linux', 'Windows']}>
+  <Tab value="macOS / Linux">
+    ```bash
+    curl -LsSf https://cua.ai/cli/install.sh | sh
+    ```
+  </Tab>
+  <Tab value="Windows">
+    ```powershell
+    powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
+    ```
+  </Tab>
+</Tabs>
+
+These scripts will automatically:
+1. Install [Bun](https://bun.sh) (a fast JavaScript runtime)
+2. Install the CUA CLI via `bun add -g @trycua/cli`
+
+<Callout type="info">
+  The installation scripts will automatically detect your system and install the appropriate binary to your PATH.
+</Callout>
+
+## Alternative: npm Install
+
+You can also install the CLI via npm if you prefer:
+
+```bash
+npm install -g @trycua/cli
+```
+
+<Callout type="warn">
+  The npm package requires Node.js 18+ to be installed on your system.
+</Callout>
+
+## Verify Installation
+
+After installation, verify the CLI is working:
+
+```bash
+cua --help
+```
+
+You should see the CLI help output with available commands.
+
+## First Time Setup
+
+After installation, you'll need to authenticate with your CUA account:
+
+```bash
+# Login with browser-based OAuth flow
+cua auth login
+
+# Or provide your API key directly
+cua auth login --api-key sk-your-api-key-here
+```
+
+## Updating
+
+To update to the latest version:
+
+<Tabs items={['Script Install', 'npm Install']}>
+  <Tab value="Script Install">
+    Re-run the installation script:
+    ```bash
+    # macOS/Linux
+    curl -LsSf https://cua.ai/cli/install.sh | sh
+    
+    # Windows
+    powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
+    ```
+  </Tab>
+  <Tab value="npm Install">
+    ```bash
+    npm update -g @trycua/cli
+    ```
+  </Tab>
+</Tabs>
+
+## Uninstalling
+
+<Tabs items={['Script Install', 'npm Install']}>
+  <Tab value="Script Install">
+    Remove the binary from your PATH:
+    ```bash
+    # macOS/Linux
+    rm $(which cua)
+    
+    # Windows
+    # Remove from your PATH or delete the executable
+    ```
+  </Tab>
+  <Tab value="npm Install">
+    ```bash
+    npm uninstall -g @trycua/cli
+    ```
+  </Tab>
+</Tabs>
+
+## Troubleshooting
+
+### Command Not Found
+
+If you get a "command not found" error after installation:
+
+1. **Check your PATH**: Make sure the installation directory is in your PATH
+2. **Restart your terminal**: Close and reopen your terminal/command prompt
+3. **Manual PATH setup**: Add the installation directory to your PATH manually
+
+### Permission Issues
+
+If you encounter permission issues during installation:
+
+<Tabs items={['macOS / Linux', 'Windows']}>
+  <Tab value="macOS / Linux">
+    Try running with sudo (not recommended for the curl method):
+    ```bash
+    # If using npm
+    sudo npm install -g @trycua/cli
+    ```
+  </Tab>
+  <Tab value="Windows">
+    Run PowerShell as Administrator:
+    ```powershell
+    # Right-click PowerShell and "Run as Administrator"
+    powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
+    ```
+  </Tab>
+</Tabs>
+
+### Network Issues
+
+If the installation script fails due to network issues:
+
+1. **Check your internet connection**
+2. **Try the npm installation method instead**
+3. **Check if your firewall is blocking the download**
+
+## Next Steps
+
+- [Learn about CLI commands](/libraries/cua-cli/commands)
+- [Follow the quickstart guide](/get-started/quickstart#cli-quickstart)
@@ -0,0 +1,9 @@
+{
+  "title": "CLI",
+  "description": "Command-line interface for CUA",
+  "pages": [
+    "index",
+    "installation",
+    "commands"
+  ]
+}
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.37
+current_version = 0.4.38
 commit = True
 tag = True
 tag_name = agent-v{new_version}
@@ -1,36 +1,40 @@
-"""
-Agent loops for agent
-"""
-
-# Import the loops to register them
-from . import (
-    anthropic,
-    composed_grounded,
-    gemini,
-    glm45v,
-    gta1,
-    holo,
-    internvl,
-    moondream3,
-    omniparser,
-    openai,
-    opencua,
-    qwen,
-    uitars,
-)
-
-__all__ = [
-    "anthropic",
-    "openai",
-    "uitars",
-    "omniparser",
-    "gta1",
-    "composed_grounded",
-    "glm45v",
-    "opencua",
-    "internvl",
-    "holo",
-    "moondream3",
-    "gemini",
-    "qwen",
-]
+"""
+Agent loops for agent
+"""
+
+# Import the loops to register them
+from . import (
+    anthropic,
+    composed_grounded,
+    gelato,
+    gemini,
+    glm45v,
+    gta1,
+    holo,
+    internvl,
+    moondream3,
+    omniparser,
+    openai,
+    opencua,
+    qwen,
+    uiins,
+    uitars,
+)
+
+__all__ = [
+    "anthropic",
+    "openai",
+    "uitars",
+    "omniparser",
+    "gta1",
+    "composed_grounded",
+    "glm45v",
+    "opencua",
+    "internvl",
+    "holo",
+    "moondream3",
+    "gemini",
+    "qwen",
+    "uiins",
+    "gelato",
+]
@@ -0,0 +1,183 @@
+"""
+Gelato agent loop implementation for click prediction using litellm.acompletion
+Model: https://huggingface.co/mlfoundations/Gelato-30B-A3B
+Code: https://github.com/mlfoundations/Gelato/tree/main
+"""
+
+import base64
+import math
+import re
+from io import BytesIO
+from typing import Any, Dict, List, Optional, Tuple
+
+import litellm
+from PIL import Image
+
+from ..decorators import register_agent
+from ..loops.base import AsyncAgentConfig
+from ..types import AgentCapability
+
+SYSTEM_PROMPT = """
+You are an expert UI element locator. Given a GUI image and a user's element description, provide the coordinates of the specified element as a single (x,y) point. For elements with area, return the center point.
+
+Output the coordinate pair exactly:
+(x,y)
+"""
+
+
+def extract_coordinates(raw_string):
+    """
+    Extract the coordinates from the raw string.
+    Args:
+        raw_string: str (e.g. "(100, 200)")
+    Returns:
+        x: float (e.g. 100.0)
+        y: float (e.g. 200.0)
+    """
+    try:
+        matches = re.findall(r"\((-?\d*\.?\d+),\s*(-?\d*\.?\d+)\)", raw_string)
+        return [tuple(map(int, match)) for match in matches][0]
+    except:
+        return 0, 0
+
+
+def smart_resize(
+    height: int,
+    width: int,
+    factor: int = 28,
+    min_pixels: int = 3136,
+    max_pixels: int = 8847360,
+) -> Tuple[int, int]:
+    """Smart resize function similar to qwen_vl_utils."""
+    # Calculate the total pixels
+    total_pixels = height * width
+
+    # If already within bounds, return original dimensions
+    if min_pixels <= total_pixels <= max_pixels:
+        # Round to nearest factor
+        new_height = (height // factor) * factor
+        new_width = (width // factor) * factor
+        return new_height, new_width
+
+    # Calculate scaling factor
+    if total_pixels > max_pixels:
+        scale = (max_pixels / total_pixels) ** 0.5
+    else:
+        scale = (min_pixels / total_pixels) ** 0.5
+
+    # Apply scaling
+    new_height = int(height * scale)
+    new_width = int(width * scale)
+
+    # Round to nearest factor
+    new_height = (new_height // factor) * factor
+    new_width = (new_width // factor) * factor
+
+    # Ensure minimum size
+    new_height = max(new_height, factor)
+    new_width = max(new_width, factor)
+
+    return new_height, new_width
+
+
+@register_agent(models=r".*Gelato.*")
+class GelatoConfig(AsyncAgentConfig):
+    """Gelato agent configuration implementing AsyncAgentConfig protocol for click prediction."""
+
+    def __init__(self):
+        self.current_model = None
+        self.last_screenshot_b64 = None
+
+    async def predict_step(
+        self,
+        messages: List[Dict[str, Any]],
+        model: str,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        max_retries: Optional[int] = None,
+        stream: bool = False,
+        computer_handler=None,
+        _on_api_start=None,
+        _on_api_end=None,
+        _on_usage=None,
+        _on_screenshot=None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        raise NotImplementedError()
+
+    async def predict_click(
+        self, model: str, image_b64: str, instruction: str, **kwargs
+    ) -> Optional[Tuple[float, float]]:
+        """
+        Predict click coordinates using UI-Ins model via litellm.acompletion.
+
+        Args:
+            model: The UI-Ins model name
+            image_b64: Base64 encoded image
+            instruction: Instruction for where to click
+
+        Returns:
+            Tuple of (x, y) coordinates or None if prediction fails
+        """
+        # Decode base64 image
+        image_data = base64.b64decode(image_b64)
+        image = Image.open(BytesIO(image_data))
+        width, height = image.width, image.height
+
+        # Smart resize the image (similar to qwen_vl_utils)
+        resized_height, resized_width = smart_resize(
+            height,
+            width,
+            factor=28,  # Default factor for Qwen models
+            min_pixels=3136,
+            max_pixels=4096 * 2160,
+        )
+        resized_image = image.resize((resized_width, resized_height))
+        scale_x, scale_y = width / resized_width, height / resized_height
+
+        # Convert resized image back to base64
+        buffered = BytesIO()
+        resized_image.save(buffered, format="PNG")
+        resized_image_b64 = base64.b64encode(buffered.getvalue()).decode()
+
+        # Prepare system and user messages
+        system_message = {
+            "role": "system",
+            "content": [{"type": "text", "text": SYSTEM_PROMPT.strip()}],
+        }
+
+        user_message = {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{resized_image_b64}"},
+                },
+                {"type": "text", "text": instruction},
+            ],
+        }
+
+        # Prepare API call kwargs
+        api_kwargs = {
+            "model": model,
+            "messages": [system_message, user_message],
+            "max_tokens": 2056,
+            "temperature": 0.0,
+            **kwargs,
+        }
+
+        # Use liteLLM acompletion
+        response = await litellm.acompletion(**api_kwargs)
+
+        # Extract response text
+        output_text = response.choices[0].message.content  # type: ignore
+
+        # Extract and rescale coordinates
+        pred_x, pred_y = extract_coordinates(output_text)  # type: ignore
+        pred_x *= scale_x
+        pred_y *= scale_y
+
+        return (math.floor(pred_x), math.floor(pred_y))
+
+    def get_capabilities(self) -> List[AgentCapability]:
+        """Return the capabilities supported by this agent."""
+        return ["click"]
@@ -0,0 +1,175 @@
+"""
+UI-Ins agent loop implementation for click prediction using litellm.acompletion
+Paper: https://arxiv.org/pdf/2510.202861
+Code: https://github.com/alibaba/UI-Ins
+"""
+
+import asyncio
+import base64
+import json
+import math
+import re
+import uuid
+from io import BytesIO
+from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
+
+import litellm
+from PIL import Image
+
+from ..decorators import register_agent
+from ..loops.base import AsyncAgentConfig
+from ..types import AgentCapability, AgentResponse, Messages, Tools
+
+SYSTEM_PROMPT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.\n\n## Output Format\nReturn a json object with a reasoning process in  tags, a function name and arguments within  XML tags:\n```\n\n...\n\n\n{"name": "grounding", "arguments": }\n\n```\n represents the following item of the action space:\n## Action Space{"action": "click", "coordinate": [x, y]}\nYour task is to accurately locate a UI element based on the instruction. You should first analyze instruction in  tags and finally output the function in  tags.\n"""
+
+
+def parse_coordinates(raw_string: str) -> tuple[int, int]:
+    matches = re.findall(r"\[(\d+),\s*(\d+)\]", raw_string)
+    if matches:
+        return tuple(map(int, matches[0]))
+    return -1, -1
+
+
+def smart_resize(
+    height: int,
+    width: int,
+    factor: int = 28,
+    min_pixels: int = 3136,
+    max_pixels: int = 8847360,
+) -> Tuple[int, int]:
+    """Smart resize function similar to qwen_vl_utils."""
+    # Calculate the total pixels
+    total_pixels = height * width
+
+    # If already within bounds, return original dimensions
+    if min_pixels <= total_pixels <= max_pixels:
+        # Round to nearest factor
+        new_height = (height // factor) * factor
+        new_width = (width // factor) * factor
+        return new_height, new_width
+
+    # Calculate scaling factor
+    if total_pixels > max_pixels:
+        scale = (max_pixels / total_pixels) ** 0.5
+    else:
+        scale = (min_pixels / total_pixels) ** 0.5
+
+    # Apply scaling
+    new_height = int(height * scale)
+    new_width = int(width * scale)
+
+    # Round to nearest factor
+    new_height = (new_height // factor) * factor
+    new_width = (new_width // factor) * factor
+
+    # Ensure minimum size
+    new_height = max(new_height, factor)
+    new_width = max(new_width, factor)
+
+    return new_height, new_width
+
+
+@register_agent(models=r".*UI-Ins.*")
+class UIInsConfig(AsyncAgentConfig):
+    """UI-Ins agent configuration implementing AsyncAgentConfig protocol for click prediction."""
+
+    def __init__(self):
+        self.current_model = None
+        self.last_screenshot_b64 = None
+
+    async def predict_step(
+        self,
+        messages: List[Dict[str, Any]],
+        model: str,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        max_retries: Optional[int] = None,
+        stream: bool = False,
+        computer_handler=None,
+        _on_api_start=None,
+        _on_api_end=None,
+        _on_usage=None,
+        _on_screenshot=None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        raise NotImplementedError()
+
+    async def predict_click(
+        self, model: str, image_b64: str, instruction: str, **kwargs
+    ) -> Optional[Tuple[float, float]]:
+        """
+        Predict click coordinates using UI-Ins model via litellm.acompletion.
+
+        Args:
+            model: The UI-Ins model name
+            image_b64: Base64 encoded image
+            instruction: Instruction for where to click
+
+        Returns:
+            Tuple of (x, y) coordinates or None if prediction fails
+        """
+        # Decode base64 image
+        image_data = base64.b64decode(image_b64)
+        image = Image.open(BytesIO(image_data))
+        width, height = image.width, image.height
+
+        # Smart resize the image (similar to qwen_vl_utils)
+        resized_height, resized_width = smart_resize(
+            height,
+            width,
+            factor=28,  # Default factor for Qwen models
+            min_pixels=3136,
+            max_pixels=4096 * 2160,
+        )
+        resized_image = image.resize((resized_width, resized_height))
+        scale_x, scale_y = width / resized_width, height / resized_height
+
+        # Convert resized image back to base64
+        buffered = BytesIO()
+        resized_image.save(buffered, format="PNG")
+        resized_image_b64 = base64.b64encode(buffered.getvalue()).decode()
+
+        # Prepare system and user messages
+        system_message = {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are a helpful assistant."},
+                {"type": "text", "text": SYSTEM_PROMPT},
+            ],
+        }
+
+        user_message = {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{resized_image_b64}"},
+                },
+                {"type": "text", "text": instruction},
+            ],
+        }
+
+        # Prepare API call kwargs
+        api_kwargs = {
+            "model": model,
+            "messages": [system_message, user_message],
+            "max_tokens": 2056,
+            "temperature": 0.0,
+            **kwargs,
+        }
+
+        # Use liteLLM acompletion
+        response = await litellm.acompletion(**api_kwargs)
+
+        # Extract response text
+        output_text = response.choices[0].message.content  # type: ignore
+
+        # Extract and rescale coordinates
+        pred_x, pred_y = parse_coordinates(output_text)  # type: ignore
+        pred_x *= scale_x
+        pred_y *= scale_y
+
+        return (math.floor(pred_x), math.floor(pred_y))
+
+    def get_capabilities(self) -> List[AgentCapability]:
+        """Return the capabilities supported by this agent."""
+        return ["click"]
@@ -4,7 +4,7 @@ build-backend = "pdm.backend"

 [project]
 name = "cua-agent"
-version = "0.4.37"
+version = "0.4.38"
 description = "CUA (Computer Use) Agent for AI-driven computer interaction"
 readme = "README.md"
 authors = [