From 7ed96d5ff6a3491562a439c432e966b50d0c4e26 Mon Sep 17 00:00:00 2001 From: Morgan Dean Date: Fri, 4 Jul 2025 14:53:13 -0700 Subject: [PATCH] Move references from readme.md to docs. Update lucide. Move guides from readme.md to docs. --- README.md | 255 +----------------- docs/content/docs/api/agent/index.mdx | 25 +- docs/content/docs/api/computer/index.mdx | 76 +++++- .../docs/home/{cua => }/compatibility.mdx | 1 + .../cua/computer-use-agent-quickstart.mdx | 31 +-- .../{usage-guide.mdx => cua-usage-guide.mdx} | 0 .../docs/home/cua/dev-container-setup.mdx | 82 ++++++ docs/content/docs/home/{cua => }/faq.mdx | 5 +- docs/content/docs/home/index.mdx | 65 ++++- docs/content/docs/home/libraries/agent.mdx | 17 +- docs/content/docs/home/libraries/computer.mdx | 26 +- docs/content/docs/home/meta.json | 8 +- docs/content/docs/home/telemetry.mdx | 81 ++++++ docs/package.json | 2 +- docs/pnpm-lock.yaml | 10 +- 15 files changed, 386 insertions(+), 298 deletions(-) rename docs/content/docs/home/{cua => }/compatibility.mdx (99%) rename docs/content/docs/home/cua/{usage-guide.mdx => cua-usage-guide.mdx} (100%) create mode 100644 docs/content/docs/home/cua/dev-container-setup.mdx rename docs/content/docs/home/{cua => }/faq.mdx (96%) create mode 100644 docs/content/docs/home/telemetry.mdx diff --git a/README.md b/README.md index d418a67a..cb83cfd7 100644 --- a/README.md +++ b/README.md @@ -47,146 +47,25 @@
-# 🚀 Quick Start with a Computer-Use Agent UI +# 🚀 Quick Start -**Need to automate desktop tasks? Launch the Computer-Use Agent UI with a single command.** +Read our guide on getting started with a Computer-Use Agent: +[Computer-Use Agent Quickstart](https://docs.trycua.com/home/guides/usage-guide) -### Option 1: Fully-managed install with Docker (recommended) +Get started using C/ua services on your machine: +[C/ua Usage Guide](https://docs.trycua.com/home/guides/cua-usage-guide) -*Docker-based guided install for quick use* +Set up a development environment with the Dev Container: +[Dev Container Setup](https://docs.trycua.com/home/guides/dev-container-setup) -**macOS/Linux/Windows (via WSL):** +## Lume -```bash -# Requires Docker -/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/scripts/playground-docker.sh)" -``` - -This script will guide you through setup using Docker containers and launch the Computer-Use Agent UI. - ---- - -### Option 2: [Dev Container](./.devcontainer/README.md) - -*Best for contributors and development* - -This repository includes a [Dev Container](./.devcontainer/README.md) configuration that simplifies setup to a few steps: - -1. **Install the Dev Containers extension ([VS Code](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [WindSurf](https://docs.windsurf.com/windsurf/advanced#dev-containers-beta))** -2. **Open the repository in the Dev Container:** - - Press `Ctrl+Shift+P` (or `⌘+Shift+P` on macOS) - - Select `Dev Containers: Clone Repository in Container Volume...` and paste the repository URL: `https://github.com/trycua/cua.git` (if not cloned) or `Dev Containers: Open Folder in Container...` (if git cloned). - > **Note**: On WindSurf, the post install hook might not run automatically. If so, run `/bin/bash .devcontainer/post-install.sh` manually. -3. **Open the VS Code workspace:** Once the post-install.sh is done running, open the `.vscode/py.code-workspace` workspace and press ![Open Workspace](https://github.com/user-attachments/assets/923bdd43-8c8f-4060-8d78-75bfa302b48c) -. -4. **Run the Agent UI example:** Click ![Run Agent UI](https://github.com/user-attachments/assets/7a61ef34-4b22-4dab-9864-f86bf83e290b) - to start the Gradio UI. If prompted to install **debugpy (Python Debugger)** to enable remote debugging, select 'Yes' to proceed. -5. **Access the Gradio UI:** The Gradio UI will be available at `http://localhost:7860` and will automatically forward to your host machine. - ---- - -### Option 3: PyPI - -*Direct Python package installation* - -```bash -# conda create -yn cua python==3.12 - -pip install -U "cua-computer[all]" "cua-agent[all]" -python -m agent.ui # Start the agent UI -``` - -Or check out the [Usage Guide](#-usage-guide) to learn how to use our Python SDK in your own code. - ---- - -## Supported [Agent Loops](https://github.com/trycua/cua/blob/main/libs/python/agent/README.md#agent-loops) - -- [UITARS-1.5](https://github.com/trycua/cua/blob/main/libs/python/agent/README.md#agent-loops) - Run locally on Apple Silicon with MLX, or use cloud providers -- [OpenAI CUA](https://github.com/trycua/cua/blob/main/libs/python/agent/README.md#agent-loops) - Use OpenAI's Computer-Use Preview model -- [Anthropic CUA](https://github.com/trycua/cua/blob/main/libs/python/agent/README.md#agent-loops) - Use Anthropic's Computer-Use capabilities -- [OmniParser-v2.0](https://github.com/trycua/cua/blob/main/libs/python/agent/README.md#agent-loops) - Control UI with [Set-of-Marks prompting](https://som-gpt4v.github.io/) using any vision model - -## 🖥️ Compatibility - -For detailed compatibility information including host OS support, VM emulation capabilities, and model provider compatibility, see the [Compatibility Matrix](./COMPATIBILITY.md). - -
-
- -# 🐍 Usage Guide - -Follow these steps to use C/ua in your own Python code. See [Developer Guide](./docs/Developer-Guide.md) for building from source. - -### Step 1: Install Lume CLI - -```bash -/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" -``` - -Lume CLI manages high-performance macOS/Linux VMs with near-native speed on Apple Silicon. - -### Step 2: Pull the macOS CUA Image - -```bash -lume pull macos-sequoia-cua:latest -``` - -The macOS CUA image contains the default Mac apps and the Computer Server for easy automation. - -### Step 3: Install Python SDK - -```bash -pip install "cua-computer[all]" "cua-agent[all]" -``` - -### Step 4: Use in Your Code - -```python -from computer import Computer -from agent import ComputerAgent, LLM - -async def main(): - # Start a local macOS VM - computer = Computer(os_type="macos") - await computer.run() - - # Or with C/ua Cloud Container - computer = Computer( - os_type="linux", - api_key="your_cua_api_key_here", - name="your_container_name_here" - ) - - # Example: Direct control of a macOS VM with Computer - await computer.interface.left_click(100, 200) - await computer.interface.type_text("Hello, world!") - screenshot_bytes = await computer.interface.screenshot() - - # Example: Create and run an agent locally using mlx-community/UI-TARS-1.5-7B-6bit - agent = ComputerAgent( - computer=computer, - loop="uitars", - model=LLM(provider="mlxvlm", name="mlx-community/UI-TARS-1.5-7B-6bit") - ) - async for result in agent.run("Find the trycua/cua repository on GitHub and follow the quick start guide"): - print(result) - -if __name__ == "__main__": - asyncio.run(main()) -``` - -For ready-to-use examples, check out our [Notebooks](./notebooks/) collection. - -### Lume CLI Reference +For managing and creating virtual machines on macOS, check out [Lume](./libs/lume/README.md). ```bash # Install Lume CLI and background service curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash -# List all VMs -lume ls - # Pull a VM image lume pull macos-sequoia-cua:latest @@ -198,12 +77,9 @@ lume run macos-sequoia-cua:latest # Stop a VM lume stop macos-sequoia-cua_latest - -# Delete a VM -lume delete macos-sequoia-cua_latest ``` -### Lumier CLI Reference +## Lumier For advanced container-like virtualization, check out [Lumier](./libs/lumier/README.md) - a Docker interface for macOS and Linux VMs. @@ -226,7 +102,7 @@ docker run -it --rm \ trycua/lumier:latest ``` -## Resources +# Resources - [How to use the MCP Server with Claude Desktop or other MCP clients](./libs/python/mcp-server/README.md) - One of the easiest ways to get started with C/ua - [How to use OpenAI Computer-Use, Anthropic, OmniParser, or UI-TARS for your Computer-Use Agent](./libs/python/agent/README.md) @@ -234,7 +110,7 @@ docker run -it --rm \ - [Training Computer-Use Models: Collecting Human Trajectories with C/ua (Part 1)](https://www.trycua.com/blog/training-computer-use-models-trajectories-1) - [Build Your Own Operator on macOS (Part 1)](https://www.trycua.com/blog/build-your-own-operator-on-macos-1) -## Modules +# Modules | Module | Description | Installation | |--------|-------------|---------------| @@ -249,113 +125,6 @@ docker run -it --rm \ | [**Core (Python)**](./libs/python/core/README.md) | Python Core utilities | `pip install cua-core` | | [**Core (Typescript)**](./libs/typescript/core/README.md) | Typescript Core utilities | `npm install @trycua/core` | -## Computer Interface Reference - -For complete examples, see [computer_examples.py](./examples/computer_examples.py) or [computer_nb.ipynb](./notebooks/computer_nb.ipynb) - -```python -# Shell Actions -result = await computer.interface.run_command(cmd) # Run shell command -# result.stdout, result.stderr, result.returncode - -# Mouse Actions -await computer.interface.left_click(x, y) # Left click at coordinates -await computer.interface.right_click(x, y) # Right click at coordinates -await computer.interface.double_click(x, y) # Double click at coordinates -await computer.interface.move_cursor(x, y) # Move cursor to coordinates -await computer.interface.drag_to(x, y, duration) # Drag to coordinates -await computer.interface.get_cursor_position() # Get current cursor position -await computer.interface.mouse_down(x, y, button="left") # Press and hold a mouse button -await computer.interface.mouse_up(x, y, button="left") # Release a mouse button - -# Keyboard Actions -await computer.interface.type_text("Hello") # Type text -await computer.interface.press_key("enter") # Press a single key -await computer.interface.hotkey("command", "c") # Press key combination -await computer.interface.key_down("command") # Press and hold a key -await computer.interface.key_up("command") # Release a key - -# Scrolling Actions -await computer.interface.scroll(x, y) # Scroll the mouse wheel -await computer.interface.scroll_down(clicks) # Scroll down -await computer.interface.scroll_up(clicks) # Scroll up - -# Screen Actions -await computer.interface.screenshot() # Take a screenshot -await computer.interface.get_screen_size() # Get screen dimensions - -# Clipboard Actions -await computer.interface.set_clipboard(text) # Set clipboard content -await computer.interface.copy_to_clipboard() # Get clipboard content - -# File System Operations -await computer.interface.file_exists(path) # Check if file exists -await computer.interface.directory_exists(path) # Check if directory exists -await computer.interface.read_text(path, encoding="utf-8") # Read file content -await computer.interface.write_text(path, content, encoding="utf-8") # Write file content -await computer.interface.read_bytes(path) # Read file content as bytes -await computer.interface.write_bytes(path, content) # Write file content as bytes -await computer.interface.delete_file(path) # Delete file -await computer.interface.create_dir(path) # Create directory -await computer.interface.delete_dir(path) # Delete directory -await computer.interface.list_dir(path) # List directory contents - -# Accessibility -await computer.interface.get_accessibility_tree() # Get accessibility tree - -# Python Virtual Environment Operations -await computer.venv_install("demo_venv", ["requests", "macos-pyxa"]) # Install packages in a virtual environment -await computer.venv_cmd("demo_venv", "python -c 'import requests; print(requests.get(`https://httpbin.org/ip`).json())'") # Run a shell command in a virtual environment -await computer.venv_exec("demo_venv", python_function_or_code, *args, **kwargs) # Run a Python function in a virtual environment and return the result / raise an exception - -# Example: Use sandboxed functions to execute code in a C/ua Container -from computer.helpers import sandboxed - -@sandboxed("demo_venv") -def greet_and_print(name): - """Get the HTML of the current Safari tab""" - import PyXA - safari = PyXA.Application("Safari") - html = safari.current_document.source() - print(f"Hello from inside the container, {name}!") - return {"greeted": name, "safari_html": html} - -# When a @sandboxed function is called, it will execute in the container -result = await greet_and_print("C/ua") -# Result: {"greeted": "C/ua", "safari_html": "..."} -# stdout and stderr are also captured and printed / raised -print("Result from sandboxed function:", result) -``` - -## ComputerAgent Reference - -For complete examples, see [agent_examples.py](./examples/agent_examples.py) or [agent_nb.ipynb](./notebooks/agent_nb.ipynb) - -```python -# Import necessary components -from agent import ComputerAgent, LLM, AgentLoop, LLMProvider - -# UI-TARS-1.5 agent for local execution with MLX -ComputerAgent(loop=AgentLoop.UITARS, model=LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-6bit")) -# OpenAI Computer-Use agent using OPENAI_API_KEY -ComputerAgent(loop=AgentLoop.OPENAI, model=LLM(provider=LLMProvider.OPENAI, name="computer-use-preview")) -# Anthropic Claude agent using ANTHROPIC_API_KEY -ComputerAgent(loop=AgentLoop.ANTHROPIC, model=LLM(provider=LLMProvider.ANTHROPIC)) - -# OmniParser loop for UI control using Set-of-Marks (SOM) prompting and any vision LLM -ComputerAgent(loop=AgentLoop.OMNI, model=LLM(provider=LLMProvider.OLLAMA, name="gemma3:12b-it-q4_K_M")) -# OpenRouter example using OAICOMPAT provider -ComputerAgent( - loop=AgentLoop.OMNI, - model=LLM( - provider=LLMProvider.OAICOMPAT, - name="openai/gpt-4o-mini", - provider_base_url="https://openrouter.ai/api/v1" - ), - api_key="your-openrouter-api-key" -) -``` - ## Community Join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas, get assistance, or share your demos! diff --git a/docs/content/docs/api/agent/index.mdx b/docs/content/docs/api/agent/index.mdx index 43a53470..44c654fc 100644 --- a/docs/content/docs/api/agent/index.mdx +++ b/docs/content/docs/api/agent/index.mdx @@ -11,4 +11,27 @@ The Agent library provides programmatic interfaces for AI agent interactions. ## API Documentation -Coming soon. +```python +# Import necessary components +from agent import ComputerAgent, LLM, AgentLoop, LLMProvider + +# UI-TARS-1.5 agent for local execution with MLX +ComputerAgent(loop=AgentLoop.UITARS, model=LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-6bit")) +# OpenAI Computer-Use agent using OPENAI_API_KEY +ComputerAgent(loop=AgentLoop.OPENAI, model=LLM(provider=LLMProvider.OPENAI, name="computer-use-preview")) +# Anthropic Claude agent using ANTHROPIC_API_KEY +ComputerAgent(loop=AgentLoop.ANTHROPIC, model=LLM(provider=LLMProvider.ANTHROPIC)) + +# OmniParser loop for UI control using Set-of-Marks (SOM) prompting and any vision LLM +ComputerAgent(loop=AgentLoop.OMNI, model=LLM(provider=LLMProvider.OLLAMA, name="gemma3:12b-it-q4_K_M")) +# OpenRouter example using OAICOMPAT provider +ComputerAgent( + loop=AgentLoop.OMNI, + model=LLM( + provider=LLMProvider.OAICOMPAT, + name="openai/gpt-4o-mini", + provider_base_url="https://openrouter.ai/api/v1" + ), + api_key="your-openrouter-api-key" +) +``` diff --git a/docs/content/docs/api/computer/index.mdx b/docs/content/docs/api/computer/index.mdx index ed55279a..0d57d60d 100644 --- a/docs/content/docs/api/computer/index.mdx +++ b/docs/content/docs/api/computer/index.mdx @@ -8,6 +8,78 @@ The Computer API reference documentation is currently under development. The Computer library provides programmatic interfaces for computer automation and control. -## API Documentation +## Reference -Coming soon. +```python +# Shell Actions +result = await computer.interface.run_command(cmd) # Run shell command +# result.stdout, result.stderr, result.returncode + +# Mouse Actions +await computer.interface.left_click(x, y) # Left click at coordinates +await computer.interface.right_click(x, y) # Right click at coordinates +await computer.interface.double_click(x, y) # Double click at coordinates +await computer.interface.move_cursor(x, y) # Move cursor to coordinates +await computer.interface.drag_to(x, y, duration) # Drag to coordinates +await computer.interface.get_cursor_position() # Get current cursor position +await computer.interface.mouse_down(x, y, button="left") # Press and hold a mouse button +await computer.interface.mouse_up(x, y, button="left") # Release a mouse button + +# Keyboard Actions +await computer.interface.type_text("Hello") # Type text +await computer.interface.press_key("enter") # Press a single key +await computer.interface.hotkey("command", "c") # Press key combination +await computer.interface.key_down("command") # Press and hold a key +await computer.interface.key_up("command") # Release a key + +# Scrolling Actions +await computer.interface.scroll(x, y) # Scroll the mouse wheel +await computer.interface.scroll_down(clicks) # Scroll down +await computer.interface.scroll_up(clicks) # Scroll up + +# Screen Actions +await computer.interface.screenshot() # Take a screenshot +await computer.interface.get_screen_size() # Get screen dimensions + +# Clipboard Actions +await computer.interface.set_clipboard(text) # Set clipboard content +await computer.interface.copy_to_clipboard() # Get clipboard content + +# File System Operations +await computer.interface.file_exists(path) # Check if file exists +await computer.interface.directory_exists(path) # Check if directory exists +await computer.interface.read_text(path, encoding="utf-8") # Read file content +await computer.interface.write_text(path, content, encoding="utf-8") # Write file content +await computer.interface.read_bytes(path) # Read file content as bytes +await computer.interface.write_bytes(path, content) # Write file content as bytes +await computer.interface.delete_file(path) # Delete file +await computer.interface.create_dir(path) # Create directory +await computer.interface.delete_dir(path) # Delete directory +await computer.interface.list_dir(path) # List directory contents + +# Accessibility +await computer.interface.get_accessibility_tree() # Get accessibility tree + +# Python Virtual Environment Operations +await computer.venv_install("demo_venv", ["requests", "macos-pyxa"]) # Install packages in a virtual environment +await computer.venv_cmd("demo_venv", "python -c 'import requests; print(requests.get(`https://httpbin.org/ip`).json())'") # Run a shell command in a virtual environment +await computer.venv_exec("demo_venv", python_function_or_code, *args, **kwargs) # Run a Python function in a virtual environment and return the result / raise an exception + +# Example: Use sandboxed functions to execute code in a C/ua Container +from computer.helpers import sandboxed + +@sandboxed("demo_venv") +def greet_and_print(name): + """Get the HTML of the current Safari tab""" + import PyXA + safari = PyXA.Application("Safari") + html = safari.current_document.source() + print(f"Hello from inside the container, {name}!") + return {"greeted": name, "safari_html": html} + +# When a @sandboxed function is called, it will execute in the container +result = await greet_and_print("C/ua") +# Result: {"greeted": "C/ua", "safari_html": "..."} +# stdout and stderr are also captured and printed / raised +print("Result from sandboxed function:", result) +``` diff --git a/docs/content/docs/home/cua/compatibility.mdx b/docs/content/docs/home/compatibility.mdx similarity index 99% rename from docs/content/docs/home/cua/compatibility.mdx rename to docs/content/docs/home/compatibility.mdx index 443e3766..81b39ed6 100644 --- a/docs/content/docs/home/cua/compatibility.mdx +++ b/docs/content/docs/home/compatibility.mdx @@ -1,6 +1,7 @@ --- title: Compatibility description: Compatibility information for running c/ua services. +icon: MonitorCheck --- # Host OS Compatibility diff --git a/docs/content/docs/home/cua/computer-use-agent-quickstart.mdx b/docs/content/docs/home/cua/computer-use-agent-quickstart.mdx index a5a2597b..634fca74 100644 --- a/docs/content/docs/home/cua/computer-use-agent-quickstart.mdx +++ b/docs/content/docs/home/cua/computer-use-agent-quickstart.mdx @@ -20,32 +20,7 @@ Run the following command to setup the Docker containers and launch the Computer _Best for contributors and active development._ -This repository includes a [Dev Container](./.devcontainer/README.md) configuration that simplifies setup to a few steps: - -1. **Install the Dev Containers extension ([VSCode](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [WindSurf](https://docs.windsurf.com/windsurf/advanced#dev-containers-beta))** -2. **Open the repository in the Dev Container:** - - - Press `Ctrl+Shift+P` (or `⌘+Shift+P` on macOS) - - **If you have _not_ cloned the repo:** - - - Select `Dev Containers: Clone Repository in Container Volume...` and paste the repository URL: - - ``` - https://github.com/trycua/cua.git - ``` - - - **If you have already cloned the repo:** - - Select `Dev Containers: Open Folder in Container...` and choose your local folder. - > **Note**: On WindSurf, the post install hook might not run automatically. If it doesn't, run: - > - > ``` - > /bin/bash .devcontainer/post-install.sh - > ``` - -3. **Open the VS Code workspace:** Once the post-install.sh is done running, open the python workspace located at `.vscode/py.code-workspace`. -4. **Run the Agent UI example:** Click - to start the Gradio UI. If prompted to install **debugpy (Python Debugger)** for remote debugging, select 'Yes' to proceed. -5. **Access the Gradio UI:** The Gradio UI will now be accessible http://localhost:7860. +Visit the [Dev Container](./dev-container-setup) guide to use the configuration that simplifies development setup to a few steps. ## PyPI @@ -58,7 +33,7 @@ pip install -U "cua-computer[all]" "cua-agent[all]" python -m agent.ui # Start the agent UI ``` -Or check out the [Usage Guide](#-usage-guide) to learn how to use our Python SDK in your own code. +Or check out the [Usage Guide](./cua-usage-guide) to learn how to use our Python SDK in your own code. --- @@ -73,4 +48,4 @@ Or check out the [Usage Guide](#-usage-guide) to learn how to use our Python SDK # Compatibility -For detailed compatibility information including host OS support, VM emulation capabilities, and model provider compatibility, see the [Compatibility Guide](./compatibility). +For detailed compatibility information including host OS support, VM emulation capabilities, and model provider compatibility, see the [Compatibility Guide](../compatibility). diff --git a/docs/content/docs/home/cua/usage-guide.mdx b/docs/content/docs/home/cua/cua-usage-guide.mdx similarity index 100% rename from docs/content/docs/home/cua/usage-guide.mdx rename to docs/content/docs/home/cua/cua-usage-guide.mdx diff --git a/docs/content/docs/home/cua/dev-container-setup.mdx b/docs/content/docs/home/cua/dev-container-setup.mdx new file mode 100644 index 00000000..0ca80b01 --- /dev/null +++ b/docs/content/docs/home/cua/dev-container-setup.mdx @@ -0,0 +1,82 @@ +--- +title: Dev Container Setup +description: Learn how to set up the Dev Container configuration that simplifies the development setup. +--- + +## Quick Start + +![Guide-Animation](https://github.com/user-attachments/assets/447eaeeb-0eec-4354-9a82-44446e202e06) + +1. **Install the Dev Containers extension ([VSCode](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [WindSurf](https://docs.windsurf.com/windsurf/advanced#dev-containers-beta))** +2. **Open the repository in the Dev Container:** + + - Press `Ctrl+Shift+P` (or `⌘+Shift+P` on macOS) + - **If you have _not_ cloned the repo:** + + - Select `Dev Containers: Clone Repository in Container Volume...` and paste the repository URL: + + ``` + https://github.com/trycua/cua.git + ``` + + - **If you have already cloned the repo:** - Select `Dev Containers: Open Folder in Container...` and choose your local folder. + + The post install hook might not run automatically if you're using + Windsurf. If it didn't run, execute it manually: +
+         /bin/bash .devcontainer/post-install.sh
+       
+
+ +3. **Open the VS Code workspace:** Once the post-install.sh is done running, open the python workspace located at `.vscode/py.code-workspace`. +4. **Run the Agent UI example:** Click + to start the Gradio UI. If prompted to install **debugpy (Python Debugger)** for remote debugging, select 'Yes' to proceed. +5. **Access the Gradio UI:** The Gradio UI will now be accessible http://localhost:7860. + +## What's Included + +The dev container automatically: + +- ✅ Sets up Python 3.11 environment +- ✅ Installs all system dependencies (build tools, OpenGL, etc.) +- ✅ Configures Python paths for all packages +- ✅ Installs Python extensions (Black, Ruff, Pylance) +- ✅ Forwards port 7860 for the Gradio web UI +- ✅ Mounts your source code for live editing +- ✅ Creates the required `.env.local` file + +## Running Examples + +After the container is built, you can run examples directly: + +```bash +# Run the agent UI (Gradio web interface) +python examples/agent_ui_examples.py + +# Run computer examples +python examples/computer_examples.py + +# Run computer UI examples +python examples/computer_ui_examples.py +``` + +The Gradio UI will be available at `http://localhost:7860` and will automatically forward to your host machine. + +## Environment Variables + +You'll need to add your API keys to `.env.local`: + +```bash +# Required for Anthropic provider +ANTHROPIC_API_KEY=your_anthropic_key_here + +# Required for OpenAI provider +OPENAI_API_KEY=your_openai_key_here +``` + +## Notes + +- The container connects to `host.docker.internal:7777` for Lume server communication +- All Python packages are pre-installed and configured +- Source code changes are reflected immediately (no rebuild needed) +- The container uses the same Dockerfile as the regular Docker development environment diff --git a/docs/content/docs/home/cua/faq.mdx b/docs/content/docs/home/faq.mdx similarity index 96% rename from docs/content/docs/home/cua/faq.mdx rename to docs/content/docs/home/faq.mdx index 37c1137f..99327b8f 100644 --- a/docs/content/docs/home/cua/faq.mdx +++ b/docs/content/docs/home/faq.mdx @@ -1,6 +1,7 @@ --- title: FAQ -description: Frequently Asked Questions +description: C/ua's frequently asked questions. Find answers to the most common issues or questions when using C/ua tools. +icon: CircleQuestionMark --- ### Why a local sandbox? @@ -132,4 +133,4 @@ Where `` is the process ID shown in the output of the `lsof` command. After ### What information does Cua track? -Cua tracks anonymized usage and error report statistics; we ascribe to Posthog's approach as detailed [here](https://posthog.com/blog/open-source-telemetry-ethical). If you would like to opt out of sending anonymized info, you can set `telemetry_enabled` to false in the Computer or Agent constructor. Check out our [Telemetry](Telemetry.md) documentation for more details. +Cua tracks anonymized usage and error report statistics; we ascribe to Posthog's approach as detailed [here](https://posthog.com/blog/open-source-telemetry-ethical). If you would like to opt out of sending anonymized info, you can set `telemetry_enabled` to false in the Computer or Agent constructor. Check out our [telemetry](./telemetry) documentation for more details. diff --git a/docs/content/docs/home/index.mdx b/docs/content/docs/home/index.mdx index 8d6a7868..7a756852 100644 --- a/docs/content/docs/home/index.mdx +++ b/docs/content/docs/home/index.mdx @@ -1,13 +1,70 @@ --- title: Home -description: c/ua Documentation Home +icon: House --- ## What is C/ua? -C/ua is a collection of libraries and tools for building Computer-Use AI agents. +C/ua is a collection of cross-platform libraries and tools for building Computer-Use AI agents. + +## Quick Start - - + + Read our guide on getting started with a Computer-Use Agent. + + + + Get started using C/ua services on your machine. + + + + Set up a development environment with the Dev Container. + + + +## Resources + +- [How to use the MCP Server with Claude Desktop or other MCP clients](./libraries/mcp-server) - One of the easiest ways to get started with C/ua +- [How to use OpenAI Computer-Use, Anthropic, OmniParser, or UI-TARS for your Computer-Use Agent](./libraries/agent) +- [How to use Lume CLI for managing desktops](./libraries/lume) +- [Training Computer-Use Models: Collecting Human Trajectories with C/ua (Part 1)](https://www.trycua.com/blog/training-computer-use-models-trajectories-1) +- [Build Your Own Operator on macOS (Part 1)](https://www.trycua.com/blog/build-your-own-operator-on-macos-1) + +## Modules + +| Module | Description | Installation | +| ------------------------------------------------------ | -------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- | +| [**Lume**](./libraries/lume.mdx) | VM management for macOS/Linux using Apple's Virtualization.Framework | `curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh \| bash` | +| [**Lumier**](./libraries/lumier.mdx) | Docker interface for macOS and Linux VMs | `docker pull trycua/lumier:latest` | +| [**Computer**](./libraries/computer.mdx) | Python Interface for controlling virtual machines | `pip install "cua-computer[all]"`

`npm install @trycua/computer` | +| [**Agent**](./libraries/agent.mdx) | AI agent framework for automating tasks | `pip install "cua-agent[all]"` | +| [**MCP Server**](./libraries/mcp-server.mdx) | MCP server for using CUA with Claude Desktop | `pip install cua-mcp-server` | +| [**SOM**](./libs/python/som/README.md) | Self-of-Mark library for Agent | `pip install cua-som` | +| [**Computer Server**](./libraries/computer-server.mdx) | Server component for Computer | `pip install cua-computer-server` | +| [**Core**](./libraries/core.mdx) | Python Core utilities | `pip install cua-core`

`npm install @trycua/core` | + +## Community + +Join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas, get assistance, or share your demos! + +## License + +Cua is open-sourced under the MIT License - see the [LICENSE](https://github.com/trycua/cua/blob/main/LICENSE.md) file for details. + +Microsoft's OmniParser, which is used in this project, is licensed under the Creative Commons Attribution 4.0 International License (CC-BY-4.0) - see the [OmniParser LICENSE](https://github.com/microsoft/OmniParser/blob/master/LICENSE) file for details. + +## Contributing + +We welcome contributions to CUA! Please refer to our [Contributing Guidelines](https://github.com/trycua/cua/blob/main/CONTRIBUTING.md) for details. + +## Trademarks + +Apple, macOS, and Apple Silicon are trademarks of Apple Inc. Ubuntu and Canonical are registered trademarks of Canonical Ltd. Microsoft is a registered trademark of Microsoft Corporation. This project is not affiliated with, endorsed by, or sponsored by Apple Inc., Canonical Ltd., or Microsoft Corporation. diff --git a/docs/content/docs/home/libraries/agent.mdx b/docs/content/docs/home/libraries/agent.mdx index a14dbd69..cc9ecf4e 100644 --- a/docs/content/docs/home/libraries/agent.mdx +++ b/docs/content/docs/home/libraries/agent.mdx @@ -10,8 +10,7 @@ title: Agent margin: '0 auto', width: '100%', justifyContent: 'center', - }} -> + }}> +
+ + Reference + +
+ **cua-agent** is a general Computer-Use framework for running multi-app agentic workflows targeting macOS and Linux sandbox created with C/ua, supporting local (Ollama) and cloud model providers (OpenAI, Anthropic, Groq, DeepSeek, Qwen). ### Get started with Agent diff --git a/docs/content/docs/home/libraries/computer.mdx b/docs/content/docs/home/libraries/computer.mdx index 86c340b9..fffde4c5 100644 --- a/docs/content/docs/home/libraries/computer.mdx +++ b/docs/content/docs/home/libraries/computer.mdx @@ -2,7 +2,7 @@ title: Computer --- -import { Tabs, Tab } from "fumadocs-ui/components/tabs"; +import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
+
+ + Reference + +
-**cua-computer** is a Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes. It's PyAutoGUI-compatible and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). Computer relies on [Lume](https://github.com/trycua/lume) for creating and managing sandbox environments. +**cua-computer** is a Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes. It's PyAutoGUI-compatible and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). Computer relies on [Lume](./lume.mdx) for creating and managing sandbox environments.
@@ -219,8 +232,7 @@ For examples, see [Computer UI Examples](https://github.com/trycua/cua/tree/main + width="600"> Record yourself performing various computer tasks using the UI. @@ -232,8 +244,7 @@ Record yourself performing various computer tasks using the UI. + width="600"> Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding"). @@ -249,8 +260,7 @@ Repeat steps 3 and 4 until you have a good amount of demonstrations covering dif + width="600"> Upload your dataset to Huggingface by: diff --git a/docs/content/docs/home/meta.json b/docs/content/docs/home/meta.json index ba0260a0..2c71daa7 100644 --- a/docs/content/docs/home/meta.json +++ b/docs/content/docs/home/meta.json @@ -4,9 +4,13 @@ "root": true, "defaultOpen": true, "pages": [ - "---[House]c/ua---", + "index", + "compatibility", + "faq", + "telemetry", + "---[BookCopy]Guides---", "...cua", "---[Library]Libraries---", "...libraries" ] -} +} \ No newline at end of file diff --git a/docs/content/docs/home/telemetry.mdx b/docs/content/docs/home/telemetry.mdx new file mode 100644 index 00000000..b74289c1 --- /dev/null +++ b/docs/content/docs/home/telemetry.mdx @@ -0,0 +1,81 @@ +--- +title: Telemetry +description: This document explains how telemetry works in CUA libraries and how you can control it. +icon: RadioTower +--- + +# Telemetry in CUA + +CUA tracks anonymized usage and error report statistics; we ascribe to Posthog's approach as detailed [here](https://posthog.com/blog/open-source-telemetry-ethical). If you would like to opt out of sending anonymized info, you can set `telemetry_enabled` to false. + +## What telemetry data we collect + +CUA libraries collect minimal anonymous usage data to help improve our software. The telemetry data we collect is specifically limited to: + +- Basic system information: + - Operating system (e.g., 'darwin', 'win32', 'linux') + - Python version (e.g., '3.11.0') +- Module initialization events: + - When a module (like 'computer' or 'agent') is imported + - Version of the module being used + +We do NOT collect: + +- Personal information +- Contents of files +- Specific text being typed +- Actual screenshots or screen contents +- User-specific identifiers +- API keys +- File contents +- Application data or content +- User interactions with the computer +- Information about files being accessed + +## Controlling Telemetry + +We are committed to transparency and user control over telemetry. There are two ways to control telemetry: + +### 1. Environment Variable (Global Control) + +Telemetry is enabled by default. To disable telemetry, set the `CUA_TELEMETRY_ENABLED` environment variable to a falsy value (`0`, `false`, `no`, or `off`): + +```bash +# Disable telemetry before running your script +export CUA_TELEMETRY_ENABLED=false + +# Or as part of the command +CUA_TELEMETRY_ENABLED=1 python your_script.py + +``` + +Or from Python: + +```python +import os +os.environ["CUA_TELEMETRY_ENABLED"] = "false" +``` + +### 2. Instance-Level Control + +You can control telemetry for specific CUA instances by setting `telemetry_enabled` when creating them: + +```python +# Disable telemetry for a specific Computer instance +computer = Computer(telemetry_enabled=False) + +# Enable telemetry for a specific Agent instance +agent = ComputerAgent(telemetry_enabled=True) +``` + +You can check if telemetry is enabled for an instance: + +```python +print(computer.telemetry_enabled) # Will print True or False +``` + +Note that telemetry settings must be configured during initialization and cannot be changed after the object is created. + +## Transparency + +We believe in being transparent about the data we collect. If you have any questions about our telemetry practices, please open an issue on our GitHub repository. diff --git a/docs/package.json b/docs/package.json index 01bba20e..765b79f3 100644 --- a/docs/package.json +++ b/docs/package.json @@ -12,7 +12,7 @@ "fumadocs-core": "15.5.1", "fumadocs-mdx": "11.6.7", "fumadocs-ui": "15.5.1", - "lucide-react": "^0.514.0", + "lucide-react": "^0.525.0", "next": "15.3.3", "react": "^19.1.0", "react-dom": "^19.1.0" diff --git a/docs/pnpm-lock.yaml b/docs/pnpm-lock.yaml index c3caa0a8..1da75851 100644 --- a/docs/pnpm-lock.yaml +++ b/docs/pnpm-lock.yaml @@ -18,8 +18,8 @@ importers: specifier: 15.5.1 version: 15.5.1(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(next@15.3.3(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(tailwindcss@4.1.10) lucide-react: - specifier: ^0.514.0 - version: 0.514.0(react@19.1.0) + specifier: ^0.525.0 + version: 0.525.0(react@19.1.0) next: specifier: 15.3.3 version: 15.3.3(react-dom@19.1.0(react@19.1.0))(react@19.1.0) @@ -1391,8 +1391,8 @@ packages: resolution: {integrity: sha512-QIXZUBJUx+2zHUdQujWejBkcD9+cs94tLn0+YL8UrCh+D5sCXZ4c7LaEH48pNwRY3MLDgqUFyhlCyjJPf1WP0A==} engines: {node: 20 || >=22} - lucide-react@0.514.0: - resolution: {integrity: sha512-HXD0OAMd+JM2xCjlwG1EGW9Nuab64dhjO3+MvdyD+pSUeOTBaVAPhQblKIYmmX4RyBYbdzW0VWnJpjJmxWGr6w==} + lucide-react@0.525.0: + resolution: {integrity: sha512-Tm1txJ2OkymCGkvwoHt33Y2JpN5xucVq1slHcgE6Lk0WjDfjgKWor5CdVER8U6DvcfMwh4M8XxmpTiyzfmfDYQ==} peerDependencies: react: ^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0 @@ -3160,7 +3160,7 @@ snapshots: lru-cache@11.1.0: {} - lucide-react@0.514.0(react@19.1.0): + lucide-react@0.525.0(react@19.1.0): dependencies: react: 19.1.0