mirror of
https://github.com/trycua/computer.git
synced 2026-05-14 12:28:59 -05:00
Merge branch 'improve-docs-home' of https://github.com/trycua/cua into improve-docs-home
This commit is contained in:
@@ -332,253 +332,151 @@ Learn more about agents in [Agent Loops](/agent-sdk/agent-loops) and available m
|
||||
|
||||
## CLI Quickstart
|
||||
|
||||
Get started quickly with the CUA CLI - the easiest way to manage cloud VMs and run AI agents.
|
||||
|
||||
<Steps>
|
||||
<Step>
|
||||
|
||||
### Install Cua
|
||||
### Install the CUA CLI
|
||||
|
||||
<Accordions type="single" defaultValue="uv">
|
||||
|
||||
<Accordion title="uv (Recommended)" value="uv">
|
||||
|
||||
#### Install uv
|
||||
|
||||
<Tabs items={['macOS / Linux', 'Windows']} persist>
|
||||
<Tab value="macOS / Linux">
|
||||
|
||||
```bash
|
||||
# Use curl to download the script and execute it with sh:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
# If your system doesn't have curl, you can use wget:
|
||||
# wget -qO- https://astral.sh/uv/install.sh | sh
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="Windows">
|
||||
|
||||
```powershell
|
||||
# Use irm to download the script and execute it with iex:
|
||||
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tabs items={['macOS / Linux', 'Windows', 'npm (Alternative)', 'From Source']}>
|
||||
<Tab value="macOS / Linux">
|
||||
```bash
|
||||
curl -LsSf https://cua.ai/cli/install.sh | sh
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="Windows">
|
||||
```powershell
|
||||
powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="npm (Alternative)">
|
||||
```bash
|
||||
npm install -g @trycua/cli
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="From Source">
|
||||
```bash
|
||||
# Install Bun (macOS/Linux)
|
||||
curl -fsSL https://bun.sh/install | bash
|
||||
|
||||
# Install Bun (Windows)
|
||||
# powershell -c "irm bun.sh/install.ps1|iex"
|
||||
|
||||
# Clone the repo
|
||||
git clone https://github.com/trycua/cua
|
||||
cd cua/libs/typescript/cua-cli
|
||||
|
||||
# Install the CLI
|
||||
bun install
|
||||
bun link
|
||||
bun link cua-cli
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
#### Install Python 3.12
|
||||
|
||||
```bash
|
||||
uv python install 3.12
|
||||
# uv will install Cua dependencies automatically when you use --with "cua-agent[cli]"
|
||||
```
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="conda" value="conda">
|
||||
|
||||
#### Install conda
|
||||
|
||||
<Tabs items={['macOS', 'Linux', 'Windows']} persist>
|
||||
<Tab value="macOS">
|
||||
|
||||
```bash
|
||||
mkdir -p ~/miniconda3
|
||||
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh -o ~/miniconda3/miniconda.sh
|
||||
bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
|
||||
rm ~/miniconda3/miniconda.sh
|
||||
source ~/miniconda3/bin/activate
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="Linux">
|
||||
|
||||
```bash
|
||||
mkdir -p ~/miniconda3
|
||||
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
|
||||
bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
|
||||
rm ~/miniconda3/miniconda.sh
|
||||
source ~/miniconda3/bin/activate
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="Windows">
|
||||
|
||||
```powershell
|
||||
wget "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe" -outfile ".\miniconda.exe"
|
||||
Start-Process -FilePath ".\miniconda.exe" -ArgumentList "/S" -Wait
|
||||
del .\miniconda.exe
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
#### Create and activate Python 3.12 environment
|
||||
|
||||
```bash
|
||||
conda create -n cua python=3.12
|
||||
conda activate cua
|
||||
```
|
||||
|
||||
#### Install Cua
|
||||
|
||||
```bash
|
||||
pip install "cua-agent[cli]" cua-computer
|
||||
```
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="pip" value="pip">
|
||||
|
||||
#### Install Cua
|
||||
|
||||
```bash
|
||||
pip install "cua-agent[cli]" cua-computer
|
||||
```
|
||||
|
||||
</Accordion>
|
||||
|
||||
</Accordions>
|
||||
|
||||
</Step>
|
||||
|
||||
<Step>
|
||||
|
||||
### Run Cua CLI
|
||||
### Authenticate with CUA
|
||||
|
||||
Choose your preferred AI model:
|
||||
|
||||
#### OpenAI Computer Use Preview
|
||||
|
||||
<Tabs items={['uv', 'conda/pip']} persist>
|
||||
<Tab value="uv">
|
||||
Login to your CUA account:
|
||||
|
||||
```bash
|
||||
uv run --with "cua-agent[cli]" -m agent.cli openai/computer-use-preview
|
||||
# Interactive browser login (recommended)
|
||||
cua auth login
|
||||
|
||||
# Or provide your API key directly
|
||||
cua auth login --api-key sk-your-api-key-here
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="conda/pip">
|
||||
If you don't have a CUA account yet, sign up at [cua.ai/signin](https://cua.ai/signin).
|
||||
|
||||
</Step>
|
||||
|
||||
<Step>
|
||||
|
||||
### Create Your First VM
|
||||
|
||||
Create a cloud sandbox where your AI agents will run:
|
||||
|
||||
```bash
|
||||
python -m agent.cli openai/computer-use-preview
|
||||
# Create a Linux VM (recommended for most use cases)
|
||||
cua vm create --os linux --configuration small --region north-america
|
||||
|
||||
# Or create a Windows VM
|
||||
cua vm create --os windows --configuration small --region north-america
|
||||
|
||||
# Or create a macOS VM
|
||||
cua vm create --os macos --configuration small --region north-america
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
Your VM will be created and you'll see output like:
|
||||
```
|
||||
VM created and ready: my-vm-abc123
|
||||
Password: secure-password-here
|
||||
Host: my-vm-abc123.containers.cloud.trycua.com
|
||||
```
|
||||
|
||||
#### Anthropic Claude
|
||||
</Step>
|
||||
|
||||
<Tabs items={['uv', 'conda/pip']} persist>
|
||||
<Tab value="uv">
|
||||
<Step>
|
||||
|
||||
### Start Using Your VM
|
||||
|
||||
You can now interact with your VM in multiple ways:
|
||||
|
||||
#### Option 1: Open the AI Playground (Recommended)
|
||||
```bash
|
||||
uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-sonnet-4-5-20250929
|
||||
uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-20250514
|
||||
uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-1-20250805
|
||||
uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-sonnet-4-20250514
|
||||
uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-3-5-sonnet-20241022
|
||||
cua vm chat my-vm-abc123
|
||||
```
|
||||
This opens the full CUA playground in your browser where you can chat with AI agents that control your VM.
|
||||
|
||||
</Tab>
|
||||
<Tab value="conda/pip">
|
||||
|
||||
#### Option 2: Access VNC Desktop
|
||||
```bash
|
||||
python -m agent.cli anthropic/claude-sonnet-4-5-20250929
|
||||
python -m agent.cli anthropic/claude-opus-4-1-20250805
|
||||
python -m agent.cli anthropic/claude-opus-4-20250514
|
||||
python -m agent.cli anthropic/claude-sonnet-4-20250514
|
||||
python -m agent.cli anthropic/claude-3-5-sonnet-20241022
|
||||
cua vm vnc my-vm-abc123
|
||||
```
|
||||
This opens a remote desktop connection to your VM.
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
#### Omniparser + LLMs
|
||||
|
||||
<Tabs items={['uv', 'conda/pip']} persist>
|
||||
<Tab value="uv">
|
||||
|
||||
#### Option 3: List and Manage VMs
|
||||
```bash
|
||||
uv run --with "cua-agent[cli]" -m agent.cli omniparser+anthropic/claude-3-5-sonnet-20241022
|
||||
uv run --with "cua-agent[cli]" -m agent.cli omniparser+openai/gpt-4o
|
||||
uv run --with "cua-agent[cli]" -m agent.cli omniparser+vertex_ai/gemini-pro
|
||||
# List all your VMs
|
||||
cua vm list
|
||||
|
||||
# Start/stop VMs as needed
|
||||
cua vm stop my-vm-abc123
|
||||
cua vm start my-vm-abc123
|
||||
|
||||
# Delete VMs when done
|
||||
cua vm delete my-vm-abc123
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="conda/pip">
|
||||
</Step>
|
||||
|
||||
```bash
|
||||
python -m agent.cli omniparser+anthropic/claude-3-5-sonnet-20241022
|
||||
python -m agent.cli omniparser+openai/gpt-4o
|
||||
python -m agent.cli omniparser+vertex_ai/gemini-pro
|
||||
```
|
||||
<Step>
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
### Try Some AI Tasks
|
||||
|
||||
#### Local Models
|
||||
|
||||
<Tabs items={['uv', 'conda/pip']} persist>
|
||||
<Tab value="uv">
|
||||
|
||||
```bash
|
||||
# Hugging Face models (local)
|
||||
uv run --with "cua-agent[cli]" -m agent.cli huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
|
||||
|
||||
# MLX models (Apple Silicon)
|
||||
uv run --with "cua-agent[cli]" -m agent.cli mlx/mlx-community/UI-TARS-1.5-7B-6bit
|
||||
|
||||
# Ollama models
|
||||
uv run --with "cua-agent[cli]" -m agent.cli omniparser+ollama_chat/llama3.2:latest
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="conda/pip">
|
||||
|
||||
```bash
|
||||
# Hugging Face models (local)
|
||||
python -m agent.cli huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
|
||||
|
||||
# MLX models (Apple Silicon)
|
||||
python -m agent.cli mlx/mlx-community/UI-TARS-1.5-7B-6bit
|
||||
|
||||
# Ollama models
|
||||
python -m agent.cli omniparser+ollama_chat/llama3.2:latest
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
#### Interactive Setup
|
||||
|
||||
If you haven't set up environment variables, the CLI will guide you through the setup:
|
||||
|
||||
1. **Sandbox Name**: Enter your Cua sandbox name (or get one at [cua.ai](https://cua.ai/))
|
||||
2. **CUA API Key**: Enter your Cua API key
|
||||
3. **Provider API Key**: Enter your AI provider API key (OpenAI, Anthropic, etc.)
|
||||
|
||||
#### Start Chatting
|
||||
|
||||
Once connected, you'll see:
|
||||
|
||||
```
|
||||
💻 Connected to your-container-name (model, agent_loop)
|
||||
Type 'exit' to quit.
|
||||
|
||||
>
|
||||
```
|
||||
|
||||
You can ask your agent to perform actions like:
|
||||
Once you have the playground open (`cua vm chat`), try asking the AI to:
|
||||
|
||||
- "Take a screenshot and tell me what's on the screen"
|
||||
- "Open Firefox and go to github.com"
|
||||
- "Type 'Hello world' into the terminal"
|
||||
- "Close the current window"
|
||||
- "Click on the search button"
|
||||
- "Open Firefox and navigate to github.com"
|
||||
- "Create a new text file and write 'Hello World' in it"
|
||||
- "Install Python and run a simple script"
|
||||
- "Take a screenshot of the desktop"
|
||||
|
||||
The AI agent will automatically control your VM to complete these tasks!
|
||||
|
||||
</Step>
|
||||
</Steps>
|
||||
|
||||
### What's Next?
|
||||
|
||||
- **Explore more commands**: Check out the [complete CLI reference](/libraries/cua-cli/commands)
|
||||
- **Learn about programming**: Try the [Developer Quickstart](#developer-quickstart) to build custom automations
|
||||
- **Join the community**: Get help in our [Discord community](https://discord.com/invite/mVnXXpdE85)
|
||||
|
||||
---
|
||||
|
||||
For running models locally, see [Running Models Locally](/agent-sdk/supported-model-providers/local-models).
|
||||
|
||||
@@ -0,0 +1,320 @@
|
||||
---
|
||||
title: Commands
|
||||
description: Complete reference for all CUA CLI commands
|
||||
---
|
||||
|
||||
import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
|
||||
import { Callout } from 'fumadocs-ui/components/callout';
|
||||
|
||||
## Overview
|
||||
|
||||
The CUA CLI provides two main command groups:
|
||||
|
||||
- **`cua auth`** - Authentication and API key management
|
||||
- **`cua vm`** - Virtual machine lifecycle management
|
||||
|
||||
## Authentication Commands
|
||||
|
||||
### `cua auth login`
|
||||
|
||||
Authenticate with your CUA account using browser-based OAuth flow.
|
||||
|
||||
```bash
|
||||
# Interactive browser login
|
||||
cua auth login
|
||||
|
||||
# Direct API key login
|
||||
cua auth login --api-key sk-your-api-key-here
|
||||
```
|
||||
|
||||
**Options:**
|
||||
- `--api-key <key>` - Provide API key directly instead of browser flow
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
$ cua auth login
|
||||
Opening browser for CLI auth...
|
||||
API key saved
|
||||
```
|
||||
|
||||
### `cua auth pull`
|
||||
|
||||
Create or update a `.env` file in the current directory with your CUA API key.
|
||||
|
||||
```bash
|
||||
cua auth pull
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
$ cua auth pull
|
||||
Wrote /path/to/your/project/.env
|
||||
```
|
||||
|
||||
The generated `.env` file will contain:
|
||||
```
|
||||
CUA_API_KEY=sk-your-api-key-here
|
||||
```
|
||||
|
||||
### `cua auth logout`
|
||||
|
||||
Remove the stored API key from your system.
|
||||
|
||||
```bash
|
||||
cua auth logout
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
$ cua auth logout
|
||||
Logged out
|
||||
```
|
||||
|
||||
## Virtual Machine Commands
|
||||
|
||||
### `cua vm list`
|
||||
|
||||
List all your virtual machines with their current status.
|
||||
|
||||
```bash
|
||||
cua vm list
|
||||
```
|
||||
|
||||
**Example Output:**
|
||||
```
|
||||
┌─────────────────┬──────────┬────────┬─────────────────┬──────────────────────────────────────┐
|
||||
│ Name │ Status │ OS │ Configuration │ Host │
|
||||
├─────────────────┼──────────┼────────┼─────────────────┼──────────────────────────────────────┤
|
||||
│ my-dev-vm │ running │ linux │ small │ my-dev-vm.containers.cloud.trycua.com │
|
||||
│ test-windows │ stopped │ windows│ medium │ test-windows.containers.cloud.trycua.com │
|
||||
└─────────────────┴──────────┴────────┴─────────────────┴──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### `cua vm create`
|
||||
|
||||
Create a new virtual machine.
|
||||
|
||||
```bash
|
||||
cua vm create --os <OS> --configuration <SIZE> --region <REGION>
|
||||
```
|
||||
|
||||
**Required Options:**
|
||||
- `--os` - Operating system: `linux`, `windows`, `macos`
|
||||
- `--configuration` - VM size: `small`, `medium`, `large`
|
||||
- `--region` - Region: `north-america`, `europe`, `asia-pacific`, `south-america`
|
||||
|
||||
**Examples:**
|
||||
```bash
|
||||
# Create a small Linux VM in North America
|
||||
cua vm create --os linux --configuration small --region north-america
|
||||
|
||||
# Create a medium Windows VM in Europe
|
||||
cua vm create --os windows --configuration medium --region europe
|
||||
|
||||
# Create a large macOS VM in Asia Pacific
|
||||
cua vm create --os macos --configuration large --region asia-pacific
|
||||
```
|
||||
|
||||
**Response Types:**
|
||||
|
||||
**Immediate (Status 200):**
|
||||
```bash
|
||||
VM created and ready: my-new-vm-abc123
|
||||
Password: secure-password-here
|
||||
Host: my-new-vm-abc123.containers.cloud.trycua.com
|
||||
```
|
||||
|
||||
**Provisioning (Status 202):**
|
||||
```bash
|
||||
VM provisioning started: my-new-vm-abc123
|
||||
Job ID: job-xyz789
|
||||
Use 'cua vm list' to monitor provisioning progress
|
||||
```
|
||||
|
||||
### `cua vm start`
|
||||
|
||||
Start a stopped virtual machine.
|
||||
|
||||
```bash
|
||||
cua vm start <name>
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
$ cua vm start my-dev-vm
|
||||
Start accepted
|
||||
```
|
||||
|
||||
### `cua vm stop`
|
||||
|
||||
Stop a running virtual machine.
|
||||
|
||||
```bash
|
||||
cua vm stop <name>
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
$ cua vm stop my-dev-vm
|
||||
stopping
|
||||
```
|
||||
|
||||
### `cua vm restart`
|
||||
|
||||
Restart a virtual machine.
|
||||
|
||||
```bash
|
||||
cua vm restart <name>
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
$ cua vm restart my-dev-vm
|
||||
restarting
|
||||
```
|
||||
|
||||
### `cua vm delete`
|
||||
|
||||
Delete a virtual machine permanently.
|
||||
|
||||
```bash
|
||||
cua vm delete <name>
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
$ cua vm delete old-test-vm
|
||||
VM deletion initiated: deleting
|
||||
```
|
||||
|
||||
<Callout type="warn">
|
||||
This action is irreversible. All data on the VM will be permanently lost.
|
||||
</Callout>
|
||||
|
||||
### `cua vm vnc`
|
||||
|
||||
Open the VNC interface for a VM in your browser.
|
||||
|
||||
```bash
|
||||
cua vm vnc <name>
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
$ cua vm vnc my-dev-vm
|
||||
Opening NoVNC: https://my-dev-vm.containers.cloud.trycua.com/vnc.html?autoconnect=true&password=...
|
||||
```
|
||||
|
||||
This command automatically opens your default browser to the VNC interface with the correct password pre-filled.
|
||||
|
||||
### `cua vm chat`
|
||||
|
||||
Open the CUA Dashboard Playground for a VM in your browser.
|
||||
|
||||
```bash
|
||||
cua vm chat <name>
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
$ cua vm chat my-dev-vm
|
||||
Opening Playground: https://cua.ai/dashboard/playground?host=...
|
||||
```
|
||||
|
||||
This opens the full CUA playground interface where you can interact with your VM using AI agents.
|
||||
|
||||
## Global Options
|
||||
|
||||
### Help
|
||||
|
||||
Get help for any command:
|
||||
|
||||
```bash
|
||||
cua --help
|
||||
cua auth --help
|
||||
cua vm --help
|
||||
cua vm create --help
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
You can override default endpoints using environment variables:
|
||||
|
||||
```bash
|
||||
# Use staging environment
|
||||
export CUA_API_BASE=https://api.staging.cua.ai
|
||||
export CUA_WEBSITE_URL=https://staging.cua.ai
|
||||
|
||||
cua vm list # Uses staging API
|
||||
```
|
||||
|
||||
**Available Variables:**
|
||||
- `CUA_API_BASE` - API endpoint (default: `https://api.cua.ai`)
|
||||
- `CUA_WEBSITE_URL` - Website URL (default: `https://cua.ai`)
|
||||
|
||||
## Error Handling
|
||||
|
||||
The CLI provides clear error messages for common issues:
|
||||
|
||||
### Authentication Errors
|
||||
```bash
|
||||
$ cua vm list
|
||||
Unauthorized. Try 'cua auth login' again.
|
||||
```
|
||||
|
||||
### VM Not Found
|
||||
```bash
|
||||
$ cua vm start nonexistent-vm
|
||||
VM not found
|
||||
```
|
||||
|
||||
### Invalid Configuration
|
||||
```bash
|
||||
$ cua vm create --os invalid --configuration small --region north-america
|
||||
Invalid request or unsupported configuration
|
||||
```
|
||||
|
||||
## Tips and Best Practices
|
||||
|
||||
### 1. Use Descriptive VM Names
|
||||
```bash
|
||||
# Good
|
||||
cua vm create --os linux --configuration small --region north-america
|
||||
# Then rename or use meaningful names in the dashboard
|
||||
|
||||
# Better workflow
|
||||
cua vm list # Check the generated name
|
||||
# Use that name consistently
|
||||
```
|
||||
|
||||
### 2. Environment Management
|
||||
```bash
|
||||
# Set up your project with API key
|
||||
cd my-project
|
||||
cua auth pull
|
||||
# Now your project has CUA_API_KEY in .env
|
||||
```
|
||||
|
||||
### 3. Quick VM Access
|
||||
```bash
|
||||
# Create aliases for frequently used VMs
|
||||
alias dev-vm="cua vm chat my-development-vm"
|
||||
alias prod-vm="cua vm vnc my-production-vm"
|
||||
```
|
||||
|
||||
### 4. Monitoring Provisioning
|
||||
```bash
|
||||
# For VMs that need provisioning time
|
||||
cua vm create --os windows --configuration large --region europe
|
||||
# VM provisioning started: my-vm-abc123
|
||||
# Job ID: job-xyz789
|
||||
|
||||
# Check status periodically
|
||||
watch -n 5 cua vm list
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Get started with the quickstart guide](/get-started/quickstart#cli-quickstart)
|
||||
- [Learn about CUA computers](/computer-sdk/computers)
|
||||
- [Explore agent automation](/agent-sdk/agent-loops)
|
||||
@@ -0,0 +1,58 @@
|
||||
---
|
||||
title: Cua CLI
|
||||
description: Command-line interface for managing Cua cloud VMs and authentication
|
||||
---
|
||||
|
||||
import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
|
||||
|
||||
The Cua CLI is a command-line tool that provides an intuitive interface for managing your Cua cloud virtual machines and authentication. It offers a streamlined workflow for creating, managing, and connecting to cloud sandboxes.
|
||||
|
||||
## Key Features
|
||||
|
||||
- **Authentication Management**: Secure login with browser-based OAuth flow
|
||||
- **VM Lifecycle**: Create, start, stop, restart, and delete cloud VMs
|
||||
- **Quick Access**: Direct links to VNC and playground interfaces
|
||||
- **Cross-Platform**: Works on macOS, Linux, and Windows
|
||||
- **Environment Integration**: Automatic `.env` file generation
|
||||
|
||||
## Quick Example
|
||||
|
||||
```bash
|
||||
# Install the CLI (installs Bun + CUA CLI)
|
||||
curl -LsSf https://cua.ai/cli/install.sh | sh
|
||||
|
||||
# Login to your CUA account
|
||||
cua auth login
|
||||
|
||||
# Create a new Linux VM
|
||||
cua vm create --os linux --configuration small --region north-america
|
||||
|
||||
# List your VMs
|
||||
cua vm list
|
||||
|
||||
# Open the playground for your VM
|
||||
cua vm chat my-vm-name
|
||||
```
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Development Workflow
|
||||
- Quickly spin up cloud sandboxes for testing
|
||||
- Manage multiple VMs across different regions
|
||||
- Integrate with CI/CD pipelines
|
||||
|
||||
### Team Collaboration
|
||||
- Share VM configurations and access
|
||||
- Standardize development environments
|
||||
- Quick onboarding for new team members
|
||||
|
||||
### Automation
|
||||
- Script VM provisioning and management
|
||||
- Integrate with deployment workflows
|
||||
- Automate environment setup
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Install the CLI](/libraries/cua-cli/installation)
|
||||
- [Learn about available commands](/libraries/cua-cli/commands)
|
||||
- [Get started with the quickstart guide](/get-started/quickstart#cli-quickstart)
|
||||
@@ -0,0 +1,152 @@
|
||||
---
|
||||
title: Installation
|
||||
description: Install the CUA CLI on your system
|
||||
---
|
||||
|
||||
import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
|
||||
import { Callout } from 'fumadocs-ui/components/callout';
|
||||
|
||||
## Quick Install
|
||||
|
||||
The fastest way to install the CUA CLI is using our installation scripts:
|
||||
|
||||
<Tabs items={['macOS / Linux', 'Windows']}>
|
||||
<Tab value="macOS / Linux">
|
||||
```bash
|
||||
curl -LsSf https://cua.ai/cli/install.sh | sh
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="Windows">
|
||||
```powershell
|
||||
powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
These scripts will automatically:
|
||||
1. Install [Bun](https://bun.sh) (a fast JavaScript runtime)
|
||||
2. Install the CUA CLI via `bun add -g @trycua/cli`
|
||||
|
||||
<Callout type="info">
|
||||
The installation scripts will automatically detect your system and install the appropriate binary to your PATH.
|
||||
</Callout>
|
||||
|
||||
## Alternative: npm Install
|
||||
|
||||
You can also install the CLI via npm if you prefer:
|
||||
|
||||
```bash
|
||||
npm install -g @trycua/cli
|
||||
```
|
||||
|
||||
<Callout type="warn">
|
||||
The npm package requires Node.js 18+ to be installed on your system.
|
||||
</Callout>
|
||||
|
||||
## Verify Installation
|
||||
|
||||
After installation, verify the CLI is working:
|
||||
|
||||
```bash
|
||||
cua --help
|
||||
```
|
||||
|
||||
You should see the CLI help output with available commands.
|
||||
|
||||
## First Time Setup
|
||||
|
||||
After installation, you'll need to authenticate with your CUA account:
|
||||
|
||||
```bash
|
||||
# Login with browser-based OAuth flow
|
||||
cua auth login
|
||||
|
||||
# Or provide your API key directly
|
||||
cua auth login --api-key sk-your-api-key-here
|
||||
```
|
||||
|
||||
## Updating
|
||||
|
||||
To update to the latest version:
|
||||
|
||||
<Tabs items={['Script Install', 'npm Install']}>
|
||||
<Tab value="Script Install">
|
||||
Re-run the installation script:
|
||||
```bash
|
||||
# macOS/Linux
|
||||
curl -LsSf https://cua.ai/cli/install.sh | sh
|
||||
|
||||
# Windows
|
||||
powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="npm Install">
|
||||
```bash
|
||||
npm update -g @trycua/cli
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Uninstalling
|
||||
|
||||
<Tabs items={['Script Install', 'npm Install']}>
|
||||
<Tab value="Script Install">
|
||||
Remove the binary from your PATH:
|
||||
```bash
|
||||
# macOS/Linux
|
||||
rm $(which cua)
|
||||
|
||||
# Windows
|
||||
# Remove from your PATH or delete the executable
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="npm Install">
|
||||
```bash
|
||||
npm uninstall -g @trycua/cli
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Command Not Found
|
||||
|
||||
If you get a "command not found" error after installation:
|
||||
|
||||
1. **Check your PATH**: Make sure the installation directory is in your PATH
|
||||
2. **Restart your terminal**: Close and reopen your terminal/command prompt
|
||||
3. **Manual PATH setup**: Add the installation directory to your PATH manually
|
||||
|
||||
### Permission Issues
|
||||
|
||||
If you encounter permission issues during installation:
|
||||
|
||||
<Tabs items={['macOS / Linux', 'Windows']}>
|
||||
<Tab value="macOS / Linux">
|
||||
Try running with sudo (not recommended for the curl method):
|
||||
```bash
|
||||
# If using npm
|
||||
sudo npm install -g @trycua/cli
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="Windows">
|
||||
Run PowerShell as Administrator:
|
||||
```powershell
|
||||
# Right-click PowerShell and "Run as Administrator"
|
||||
powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
### Network Issues
|
||||
|
||||
If the installation script fails due to network issues:
|
||||
|
||||
1. **Check your internet connection**
|
||||
2. **Try the npm installation method instead**
|
||||
3. **Check if your firewall is blocking the download**
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Learn about CLI commands](/libraries/cua-cli/commands)
|
||||
- [Follow the quickstart guide](/get-started/quickstart#cli-quickstart)
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"title": "CLI",
|
||||
"description": "Command-line interface for CUA",
|
||||
"pages": [
|
||||
"index",
|
||||
"installation",
|
||||
"commands"
|
||||
]
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.4.37
|
||||
current_version = 0.4.38
|
||||
commit = True
|
||||
tag = True
|
||||
tag_name = agent-v{new_version}
|
||||
|
||||
@@ -1,36 +1,40 @@
|
||||
"""
|
||||
Agent loops for agent
|
||||
"""
|
||||
|
||||
# Import the loops to register them
|
||||
from . import (
|
||||
anthropic,
|
||||
composed_grounded,
|
||||
gemini,
|
||||
glm45v,
|
||||
gta1,
|
||||
holo,
|
||||
internvl,
|
||||
moondream3,
|
||||
omniparser,
|
||||
openai,
|
||||
opencua,
|
||||
qwen,
|
||||
uitars,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"anthropic",
|
||||
"openai",
|
||||
"uitars",
|
||||
"omniparser",
|
||||
"gta1",
|
||||
"composed_grounded",
|
||||
"glm45v",
|
||||
"opencua",
|
||||
"internvl",
|
||||
"holo",
|
||||
"moondream3",
|
||||
"gemini",
|
||||
"qwen",
|
||||
]
|
||||
"""
|
||||
Agent loops for agent
|
||||
"""
|
||||
|
||||
# Import the loops to register them
|
||||
from . import (
|
||||
anthropic,
|
||||
composed_grounded,
|
||||
gelato,
|
||||
gemini,
|
||||
glm45v,
|
||||
gta1,
|
||||
holo,
|
||||
internvl,
|
||||
moondream3,
|
||||
omniparser,
|
||||
openai,
|
||||
opencua,
|
||||
qwen,
|
||||
uiins,
|
||||
uitars,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"anthropic",
|
||||
"openai",
|
||||
"uitars",
|
||||
"omniparser",
|
||||
"gta1",
|
||||
"composed_grounded",
|
||||
"glm45v",
|
||||
"opencua",
|
||||
"internvl",
|
||||
"holo",
|
||||
"moondream3",
|
||||
"gemini",
|
||||
"qwen",
|
||||
"uiins",
|
||||
"gelato",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,183 @@
|
||||
"""
|
||||
Gelato agent loop implementation for click prediction using litellm.acompletion
|
||||
Model: https://huggingface.co/mlfoundations/Gelato-30B-A3B
|
||||
Code: https://github.com/mlfoundations/Gelato/tree/main
|
||||
"""
|
||||
|
||||
import base64
|
||||
import math
|
||||
import re
|
||||
from io import BytesIO
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import litellm
|
||||
from PIL import Image
|
||||
|
||||
from ..decorators import register_agent
|
||||
from ..loops.base import AsyncAgentConfig
|
||||
from ..types import AgentCapability
|
||||
|
||||
SYSTEM_PROMPT = """
|
||||
You are an expert UI element locator. Given a GUI image and a user's element description, provide the coordinates of the specified element as a single (x,y) point. For elements with area, return the center point.
|
||||
|
||||
Output the coordinate pair exactly:
|
||||
(x,y)
|
||||
"""
|
||||
|
||||
|
||||
def extract_coordinates(raw_string):
|
||||
"""
|
||||
Extract the coordinates from the raw string.
|
||||
Args:
|
||||
raw_string: str (e.g. "(100, 200)")
|
||||
Returns:
|
||||
x: float (e.g. 100.0)
|
||||
y: float (e.g. 200.0)
|
||||
"""
|
||||
try:
|
||||
matches = re.findall(r"\((-?\d*\.?\d+),\s*(-?\d*\.?\d+)\)", raw_string)
|
||||
return [tuple(map(int, match)) for match in matches][0]
|
||||
except:
|
||||
return 0, 0
|
||||
|
||||
|
||||
def smart_resize(
|
||||
height: int,
|
||||
width: int,
|
||||
factor: int = 28,
|
||||
min_pixels: int = 3136,
|
||||
max_pixels: int = 8847360,
|
||||
) -> Tuple[int, int]:
|
||||
"""Smart resize function similar to qwen_vl_utils."""
|
||||
# Calculate the total pixels
|
||||
total_pixels = height * width
|
||||
|
||||
# If already within bounds, return original dimensions
|
||||
if min_pixels <= total_pixels <= max_pixels:
|
||||
# Round to nearest factor
|
||||
new_height = (height // factor) * factor
|
||||
new_width = (width // factor) * factor
|
||||
return new_height, new_width
|
||||
|
||||
# Calculate scaling factor
|
||||
if total_pixels > max_pixels:
|
||||
scale = (max_pixels / total_pixels) ** 0.5
|
||||
else:
|
||||
scale = (min_pixels / total_pixels) ** 0.5
|
||||
|
||||
# Apply scaling
|
||||
new_height = int(height * scale)
|
||||
new_width = int(width * scale)
|
||||
|
||||
# Round to nearest factor
|
||||
new_height = (new_height // factor) * factor
|
||||
new_width = (new_width // factor) * factor
|
||||
|
||||
# Ensure minimum size
|
||||
new_height = max(new_height, factor)
|
||||
new_width = max(new_width, factor)
|
||||
|
||||
return new_height, new_width
|
||||
|
||||
|
||||
@register_agent(models=r".*Gelato.*")
|
||||
class GelatoConfig(AsyncAgentConfig):
|
||||
"""Gelato agent configuration implementing AsyncAgentConfig protocol for click prediction."""
|
||||
|
||||
def __init__(self):
|
||||
self.current_model = None
|
||||
self.last_screenshot_b64 = None
|
||||
|
||||
async def predict_step(
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
model: str,
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
max_retries: Optional[int] = None,
|
||||
stream: bool = False,
|
||||
computer_handler=None,
|
||||
_on_api_start=None,
|
||||
_on_api_end=None,
|
||||
_on_usage=None,
|
||||
_on_screenshot=None,
|
||||
**kwargs,
|
||||
) -> Dict[str, Any]:
|
||||
raise NotImplementedError()
|
||||
|
||||
async def predict_click(
|
||||
self, model: str, image_b64: str, instruction: str, **kwargs
|
||||
) -> Optional[Tuple[float, float]]:
|
||||
"""
|
||||
Predict click coordinates using UI-Ins model via litellm.acompletion.
|
||||
|
||||
Args:
|
||||
model: The UI-Ins model name
|
||||
image_b64: Base64 encoded image
|
||||
instruction: Instruction for where to click
|
||||
|
||||
Returns:
|
||||
Tuple of (x, y) coordinates or None if prediction fails
|
||||
"""
|
||||
# Decode base64 image
|
||||
image_data = base64.b64decode(image_b64)
|
||||
image = Image.open(BytesIO(image_data))
|
||||
width, height = image.width, image.height
|
||||
|
||||
# Smart resize the image (similar to qwen_vl_utils)
|
||||
resized_height, resized_width = smart_resize(
|
||||
height,
|
||||
width,
|
||||
factor=28, # Default factor for Qwen models
|
||||
min_pixels=3136,
|
||||
max_pixels=4096 * 2160,
|
||||
)
|
||||
resized_image = image.resize((resized_width, resized_height))
|
||||
scale_x, scale_y = width / resized_width, height / resized_height
|
||||
|
||||
# Convert resized image back to base64
|
||||
buffered = BytesIO()
|
||||
resized_image.save(buffered, format="PNG")
|
||||
resized_image_b64 = base64.b64encode(buffered.getvalue()).decode()
|
||||
|
||||
# Prepare system and user messages
|
||||
system_message = {
|
||||
"role": "system",
|
||||
"content": [{"type": "text", "text": SYSTEM_PROMPT.strip()}],
|
||||
}
|
||||
|
||||
user_message = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{resized_image_b64}"},
|
||||
},
|
||||
{"type": "text", "text": instruction},
|
||||
],
|
||||
}
|
||||
|
||||
# Prepare API call kwargs
|
||||
api_kwargs = {
|
||||
"model": model,
|
||||
"messages": [system_message, user_message],
|
||||
"max_tokens": 2056,
|
||||
"temperature": 0.0,
|
||||
**kwargs,
|
||||
}
|
||||
|
||||
# Use liteLLM acompletion
|
||||
response = await litellm.acompletion(**api_kwargs)
|
||||
|
||||
# Extract response text
|
||||
output_text = response.choices[0].message.content # type: ignore
|
||||
|
||||
# Extract and rescale coordinates
|
||||
pred_x, pred_y = extract_coordinates(output_text) # type: ignore
|
||||
pred_x *= scale_x
|
||||
pred_y *= scale_y
|
||||
|
||||
return (math.floor(pred_x), math.floor(pred_y))
|
||||
|
||||
def get_capabilities(self) -> List[AgentCapability]:
|
||||
"""Return the capabilities supported by this agent."""
|
||||
return ["click"]
|
||||
@@ -0,0 +1,175 @@
|
||||
"""
|
||||
UI-Ins agent loop implementation for click prediction using litellm.acompletion
|
||||
Paper: https://arxiv.org/pdf/2510.202861
|
||||
Code: https://github.com/alibaba/UI-Ins
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
import uuid
|
||||
from io import BytesIO
|
||||
from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import litellm
|
||||
from PIL import Image
|
||||
|
||||
from ..decorators import register_agent
|
||||
from ..loops.base import AsyncAgentConfig
|
||||
from ..types import AgentCapability, AgentResponse, Messages, Tools
|
||||
|
||||
SYSTEM_PROMPT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.\n\n## Output Format\nReturn a json object with a reasoning process in tags, a function name and arguments within XML tags:\n```\n\n...\n\n\n{"name": "grounding", "arguments": }\n\n```\n represents the following item of the action space:\n## Action Space{"action": "click", "coordinate": [x, y]}\nYour task is to accurately locate a UI element based on the instruction. You should first analyze instruction in tags and finally output the function in tags.\n"""
|
||||
|
||||
|
||||
def parse_coordinates(raw_string: str) -> tuple[int, int]:
|
||||
matches = re.findall(r"\[(\d+),\s*(\d+)\]", raw_string)
|
||||
if matches:
|
||||
return tuple(map(int, matches[0]))
|
||||
return -1, -1
|
||||
|
||||
|
||||
def smart_resize(
|
||||
height: int,
|
||||
width: int,
|
||||
factor: int = 28,
|
||||
min_pixels: int = 3136,
|
||||
max_pixels: int = 8847360,
|
||||
) -> Tuple[int, int]:
|
||||
"""Smart resize function similar to qwen_vl_utils."""
|
||||
# Calculate the total pixels
|
||||
total_pixels = height * width
|
||||
|
||||
# If already within bounds, return original dimensions
|
||||
if min_pixels <= total_pixels <= max_pixels:
|
||||
# Round to nearest factor
|
||||
new_height = (height // factor) * factor
|
||||
new_width = (width // factor) * factor
|
||||
return new_height, new_width
|
||||
|
||||
# Calculate scaling factor
|
||||
if total_pixels > max_pixels:
|
||||
scale = (max_pixels / total_pixels) ** 0.5
|
||||
else:
|
||||
scale = (min_pixels / total_pixels) ** 0.5
|
||||
|
||||
# Apply scaling
|
||||
new_height = int(height * scale)
|
||||
new_width = int(width * scale)
|
||||
|
||||
# Round to nearest factor
|
||||
new_height = (new_height // factor) * factor
|
||||
new_width = (new_width // factor) * factor
|
||||
|
||||
# Ensure minimum size
|
||||
new_height = max(new_height, factor)
|
||||
new_width = max(new_width, factor)
|
||||
|
||||
return new_height, new_width
|
||||
|
||||
|
||||
@register_agent(models=r".*UI-Ins.*")
|
||||
class UIInsConfig(AsyncAgentConfig):
|
||||
"""UI-Ins agent configuration implementing AsyncAgentConfig protocol for click prediction."""
|
||||
|
||||
def __init__(self):
|
||||
self.current_model = None
|
||||
self.last_screenshot_b64 = None
|
||||
|
||||
async def predict_step(
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
model: str,
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
max_retries: Optional[int] = None,
|
||||
stream: bool = False,
|
||||
computer_handler=None,
|
||||
_on_api_start=None,
|
||||
_on_api_end=None,
|
||||
_on_usage=None,
|
||||
_on_screenshot=None,
|
||||
**kwargs,
|
||||
) -> Dict[str, Any]:
|
||||
raise NotImplementedError()
|
||||
|
||||
async def predict_click(
|
||||
self, model: str, image_b64: str, instruction: str, **kwargs
|
||||
) -> Optional[Tuple[float, float]]:
|
||||
"""
|
||||
Predict click coordinates using UI-Ins model via litellm.acompletion.
|
||||
|
||||
Args:
|
||||
model: The UI-Ins model name
|
||||
image_b64: Base64 encoded image
|
||||
instruction: Instruction for where to click
|
||||
|
||||
Returns:
|
||||
Tuple of (x, y) coordinates or None if prediction fails
|
||||
"""
|
||||
# Decode base64 image
|
||||
image_data = base64.b64decode(image_b64)
|
||||
image = Image.open(BytesIO(image_data))
|
||||
width, height = image.width, image.height
|
||||
|
||||
# Smart resize the image (similar to qwen_vl_utils)
|
||||
resized_height, resized_width = smart_resize(
|
||||
height,
|
||||
width,
|
||||
factor=28, # Default factor for Qwen models
|
||||
min_pixels=3136,
|
||||
max_pixels=4096 * 2160,
|
||||
)
|
||||
resized_image = image.resize((resized_width, resized_height))
|
||||
scale_x, scale_y = width / resized_width, height / resized_height
|
||||
|
||||
# Convert resized image back to base64
|
||||
buffered = BytesIO()
|
||||
resized_image.save(buffered, format="PNG")
|
||||
resized_image_b64 = base64.b64encode(buffered.getvalue()).decode()
|
||||
|
||||
# Prepare system and user messages
|
||||
system_message = {
|
||||
"role": "system",
|
||||
"content": [
|
||||
{"type": "text", "text": "You are a helpful assistant."},
|
||||
{"type": "text", "text": SYSTEM_PROMPT},
|
||||
],
|
||||
}
|
||||
|
||||
user_message = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{resized_image_b64}"},
|
||||
},
|
||||
{"type": "text", "text": instruction},
|
||||
],
|
||||
}
|
||||
|
||||
# Prepare API call kwargs
|
||||
api_kwargs = {
|
||||
"model": model,
|
||||
"messages": [system_message, user_message],
|
||||
"max_tokens": 2056,
|
||||
"temperature": 0.0,
|
||||
**kwargs,
|
||||
}
|
||||
|
||||
# Use liteLLM acompletion
|
||||
response = await litellm.acompletion(**api_kwargs)
|
||||
|
||||
# Extract response text
|
||||
output_text = response.choices[0].message.content # type: ignore
|
||||
|
||||
# Extract and rescale coordinates
|
||||
pred_x, pred_y = parse_coordinates(output_text) # type: ignore
|
||||
pred_x *= scale_x
|
||||
pred_y *= scale_y
|
||||
|
||||
return (math.floor(pred_x), math.floor(pred_y))
|
||||
|
||||
def get_capabilities(self) -> List[AgentCapability]:
|
||||
"""Return the capabilities supported by this agent."""
|
||||
return ["click"]
|
||||
@@ -4,7 +4,7 @@ build-backend = "pdm.backend"
|
||||
|
||||
[project]
|
||||
name = "cua-agent"
|
||||
version = "0.4.37"
|
||||
version = "0.4.38"
|
||||
description = "CUA (Computer Use) Agent for AI-driven computer interaction"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
|
||||
Reference in New Issue
Block a user