mirror of
https://github.com/trycua/computer.git
synced 2025-12-31 10:29:59 -06:00
Merge upstream/main to resolve conflicts with trycua/cua
This commit is contained in:
@@ -1,66 +0,0 @@
|
||||
# Dev Container Setup
|
||||
|
||||
This repository includes a Dev Container configuration that simplifies the development setup to just 3 steps:
|
||||
|
||||
## Quick Start
|
||||
|
||||

|
||||
|
||||
1. **Install the Dev Containers extension ([VS Code](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [WindSurf](https://docs.windsurf.com/windsurf/advanced#dev-containers-beta))**
|
||||
2. **Open the repository in the Dev Container:**
|
||||
- Press `Ctrl+Shift+P` (or `⌘+Shift+P` on macOS)
|
||||
- Select `Dev Containers: Clone Repository in Container Volume...` and paste the repository URL: `https://github.com/trycua/cua.git` (if not cloned) or `Dev Containers: Open Folder in Container...` (if git cloned).
|
||||
> **Note**: On WindSurf, the post install hook might not run automatically. If so, run `/bin/bash .devcontainer/post-install.sh` manually.
|
||||
3. **Open the VS Code workspace:** Once the post-install.sh is done running, open the `.vscode/py.code-workspace` workspace and press 
|
||||
.
|
||||
4. **Run the Agent UI example:** Click 
|
||||
to start the Gradio UI. If prompted to install **debugpy (Python Debugger)** to enable remote debugging, select 'Yes' to proceed.
|
||||
5. **Access the Gradio UI:** The Gradio UI will be available at `http://localhost:7860` and will automatically forward to your host machine.
|
||||
|
||||
## What's Included
|
||||
|
||||
The dev container automatically:
|
||||
|
||||
- ✅ Sets up Python 3.11 environment
|
||||
- ✅ Installs all system dependencies (build tools, OpenGL, etc.)
|
||||
- ✅ Configures Python paths for all packages
|
||||
- ✅ Installs Python extensions (Black, Ruff, Pylance)
|
||||
- ✅ Forwards port 7860 for the Gradio web UI
|
||||
- ✅ Mounts your source code for live editing
|
||||
- ✅ Creates the required `.env.local` file
|
||||
|
||||
## Running Examples
|
||||
|
||||
After the container is built, you can run examples directly:
|
||||
|
||||
```bash
|
||||
# Run the agent UI (Gradio web interface)
|
||||
python examples/agent_ui_examples.py
|
||||
|
||||
# Run computer examples
|
||||
python examples/computer_examples.py
|
||||
|
||||
# Run computer UI examples
|
||||
python examples/computer_ui_examples.py
|
||||
```
|
||||
|
||||
The Gradio UI will be available at `http://localhost:7860` and will automatically forward to your host machine.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
You'll need to add your API keys to `.env.local`:
|
||||
|
||||
```bash
|
||||
# Required for Anthropic provider
|
||||
ANTHROPIC_API_KEY=your_anthropic_key_here
|
||||
|
||||
# Required for OpenAI provider
|
||||
OPENAI_API_KEY=your_openai_key_here
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- The container connects to `host.docker.internal:7777` for Lume server communication
|
||||
- All Python packages are pre-installed and configured
|
||||
- Source code changes are reflected immediately (no rebuild needed)
|
||||
- The container uses the same Dockerfile as the regular Docker development environment
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"name": "Cua - OSS",
|
||||
"build": {
|
||||
"dockerfile": "../Dockerfile"
|
||||
},
|
||||
"containerEnv": {
|
||||
"DISPLAY": "",
|
||||
"PYLUME_HOST": "host.docker.internal"
|
||||
},
|
||||
"forwardPorts": [7860],
|
||||
"portsAttributes": {
|
||||
"7860": {
|
||||
"label": "Cua web client (Gradio)",
|
||||
"onAutoForward": "silent"
|
||||
}
|
||||
},
|
||||
"postCreateCommand": "/bin/bash .devcontainer/post-install.sh"
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
WORKSPACE="/workspaces/cua"
|
||||
|
||||
# Setup .env.local
|
||||
echo "PYTHON_BIN=python" > /workspaces/cua/.env.local
|
||||
|
||||
# Run /scripts/build.sh
|
||||
./scripts/build.sh
|
||||
|
||||
# ---
|
||||
# Build is complete. Show user a clear message to open the workspace manually.
|
||||
# ---
|
||||
|
||||
cat << 'EOM'
|
||||
|
||||
============================================
|
||||
🚀 Build complete!
|
||||
|
||||
👉 Next steps:
|
||||
|
||||
1. Open '.vscode/py.code-workspace'
|
||||
2. Press 'Open Workspace'
|
||||
|
||||
Happy coding!
|
||||
============================================
|
||||
|
||||
EOM
|
||||
12
.editorconfig
Normal file
12
.editorconfig
Normal file
@@ -0,0 +1,12 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
insert_final_newline = false
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
[*.{js,ts,jsx,tsx,json,css,scss,html,md}]
|
||||
indent_size = 2
|
||||
15
.github/FUNDING.yml
vendored
Normal file
15
.github/FUNDING.yml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: trycua
|
||||
patreon: # Replace with a single Patreon username
|
||||
open_collective: # Replace with a single Open Collective username
|
||||
ko_fi: # Replace with a single Ko-fi username
|
||||
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
||||
liberapay: # Replace with a single Liberapay username
|
||||
issuehunt: # Replace with a single IssueHunt username
|
||||
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
|
||||
polar: # Replace with a single Polar username
|
||||
buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
|
||||
thanks_dev: # Replace with a single thanks.dev username
|
||||
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
|
||||
78
.github/scripts/get_pyproject_version.py
vendored
Executable file
78
.github/scripts/get_pyproject_version.py
vendored
Executable file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Verifies that the version in pyproject.toml matches the expected version.
|
||||
|
||||
Usage:
|
||||
python get_pyproject_version.py <pyproject_path> <expected_version>
|
||||
|
||||
Exit codes:
|
||||
0 - Versions match
|
||||
1 - Versions don't match or error occurred
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
try:
|
||||
import tomllib
|
||||
except ImportError:
|
||||
# Fallback for Python < 3.11
|
||||
import toml as tomllib
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print(
|
||||
"Usage: python get_pyproject_version.py <pyproject_path> <expected_version>",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
pyproject_path = sys.argv[1]
|
||||
expected_version = sys.argv[2]
|
||||
|
||||
# tomllib requires binary mode
|
||||
try:
|
||||
with open(pyproject_path, "rb") as f:
|
||||
data = tomllib.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
# Fallback to toml if using the old library or handle other errors
|
||||
try:
|
||||
import toml
|
||||
|
||||
data = toml.load(pyproject_path)
|
||||
except FileNotFoundError:
|
||||
print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except Exception as toml_err:
|
||||
print(f"❌ ERROR: Failed to parse TOML file: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
actual_version = data.get("project", {}).get("version")
|
||||
|
||||
if not actual_version:
|
||||
print("❌ ERROR: No version found in pyproject.toml", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if actual_version != expected_version:
|
||||
print("❌ Version mismatch detected!", file=sys.stderr)
|
||||
print(f" pyproject.toml version: {actual_version}", file=sys.stderr)
|
||||
print(f" Expected version: {expected_version}", file=sys.stderr)
|
||||
print("", file=sys.stderr)
|
||||
print(
|
||||
"The version in pyproject.toml must match the version being published.", file=sys.stderr
|
||||
)
|
||||
print(
|
||||
f"Please update pyproject.toml to version {expected_version} or use the correct tag.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"✅ Version consistency check passed: {actual_version}")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
137
.github/scripts/tests/README.md
vendored
Normal file
137
.github/scripts/tests/README.md
vendored
Normal file
@@ -0,0 +1,137 @@
|
||||
# Tests for .github/scripts
|
||||
|
||||
This directory contains comprehensive tests for the GitHub workflow scripts using Python's built-in testing framework.
|
||||
|
||||
## Requirements
|
||||
|
||||
**No external dependencies required!**
|
||||
|
||||
This test suite uses:
|
||||
|
||||
- `unittest` - Python's built-in testing framework
|
||||
- `tomllib` - Python 3.11+ built-in TOML parser
|
||||
|
||||
For Python < 3.11, the `toml` package is used as a fallback.
|
||||
|
||||
## Running Tests
|
||||
|
||||
### Run all tests
|
||||
|
||||
```bash
|
||||
cd .github/scripts/tests
|
||||
python3 -m unittest discover -v
|
||||
```
|
||||
|
||||
### Run a specific test file
|
||||
|
||||
```bash
|
||||
python3 -m unittest test_get_pyproject_version -v
|
||||
```
|
||||
|
||||
### Run a specific test class
|
||||
|
||||
```bash
|
||||
python3 -m unittest test_get_pyproject_version.TestGetPyprojectVersion -v
|
||||
```
|
||||
|
||||
### Run a specific test method
|
||||
|
||||
```bash
|
||||
python3 -m unittest test_get_pyproject_version.TestGetPyprojectVersion.test_matching_versions -v
|
||||
```
|
||||
|
||||
### Run tests directly from the test file
|
||||
|
||||
```bash
|
||||
python3 test_get_pyproject_version.py
|
||||
```
|
||||
|
||||
## Test Structure
|
||||
|
||||
### test_get_pyproject_version.py
|
||||
|
||||
Comprehensive tests for `get_pyproject_version.py` covering:
|
||||
|
||||
- ✅ **Version matching**: Tests successful version validation
|
||||
- ✅ **Version mismatch**: Tests error handling when versions don't match
|
||||
- ✅ **Missing version**: Tests handling of pyproject.toml without version field
|
||||
- ✅ **Missing project section**: Tests handling of pyproject.toml without project section
|
||||
- ✅ **File not found**: Tests handling of non-existent files
|
||||
- ✅ **Malformed TOML**: Tests handling of invalid TOML syntax
|
||||
- ✅ **Argument validation**: Tests proper argument count validation
|
||||
- ✅ **Semantic versioning**: Tests various semantic version formats
|
||||
- ✅ **Pre-release tags**: Tests versions with alpha, beta, rc tags
|
||||
- ✅ **Build metadata**: Tests versions with build metadata
|
||||
- ✅ **Edge cases**: Tests empty versions and other edge cases
|
||||
|
||||
**Total Tests**: 17+ test cases covering all functionality
|
||||
|
||||
## Best Practices Implemented
|
||||
|
||||
1. **Fixture Management**: Uses `setUp()` and `tearDown()` for clean test isolation
|
||||
2. **Helper Methods**: Provides reusable helpers for creating test fixtures
|
||||
3. **Temporary Files**: Uses `tempfile` for file creation with proper cleanup
|
||||
4. **Comprehensive Coverage**: Tests happy paths, error conditions, and edge cases
|
||||
5. **Clear Documentation**: Each test has a descriptive docstring
|
||||
6. **Output Capture**: Uses `unittest.mock.patch` and `StringIO` to test stdout/stderr
|
||||
7. **Exit Code Validation**: Properly tests script exit codes with `assertRaises(SystemExit)`
|
||||
8. **Type Hints**: Uses type hints in helper methods for clarity
|
||||
9. **PEP 8 Compliance**: Follows Python style guidelines
|
||||
10. **Zero External Dependencies**: Uses only Python standard library
|
||||
|
||||
## Continuous Integration
|
||||
|
||||
These tests can be integrated into GitHub Actions workflows with no additional dependencies:
|
||||
|
||||
```yaml
|
||||
- name: Run .github scripts tests
|
||||
run: |
|
||||
cd .github/scripts/tests
|
||||
python3 -m unittest discover -v
|
||||
```
|
||||
|
||||
## Test Output Example
|
||||
|
||||
```
|
||||
test_empty_version_string (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test handling of empty version string. ... ok
|
||||
test_file_not_found (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test handling of non-existent pyproject.toml file. ... ok
|
||||
test_malformed_toml (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test handling of malformed TOML file. ... ok
|
||||
test_matching_versions (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test that matching versions result in success. ... ok
|
||||
test_missing_project_section (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test handling of pyproject.toml without a project section. ... ok
|
||||
test_missing_version_field (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test handling of pyproject.toml without a version field. ... ok
|
||||
test_no_arguments (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test that providing no arguments results in usage error. ... ok
|
||||
test_semantic_version_0_0_1 (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test semantic version 0.0.1. ... ok
|
||||
test_semantic_version_1_0_0 (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test semantic version 1.0.0. ... ok
|
||||
test_semantic_version_10_20_30 (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test semantic version 10.20.30. ... ok
|
||||
test_semantic_version_alpha (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test semantic version with alpha tag. ... ok
|
||||
test_semantic_version_beta (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test semantic version with beta tag. ... ok
|
||||
test_semantic_version_rc_with_build (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test semantic version with rc and build metadata. ... ok
|
||||
test_too_few_arguments (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test that providing too few arguments results in usage error. ... ok
|
||||
test_too_many_arguments (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test that providing too many arguments results in usage error. ... ok
|
||||
test_version_mismatch (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test that mismatched versions result in failure with appropriate error message. ... ok
|
||||
test_version_with_build_metadata (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test matching versions with build metadata. ... ok
|
||||
test_version_with_prerelease_tags (test_get_pyproject_version.TestGetPyprojectVersion)
|
||||
Test matching versions with pre-release tags like alpha, beta, rc. ... ok
|
||||
|
||||
----------------------------------------------------------------------
|
||||
Ran 18 tests in 0.XXXs
|
||||
|
||||
OK
|
||||
```
|
||||
1
.github/scripts/tests/__init__.py
vendored
Normal file
1
.github/scripts/tests/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
"""Tests for .github/scripts."""
|
||||
360
.github/scripts/tests/test_get_pyproject_version.py
vendored
Normal file
360
.github/scripts/tests/test_get_pyproject_version.py
vendored
Normal file
@@ -0,0 +1,360 @@
|
||||
"""
|
||||
Comprehensive tests for get_pyproject_version.py script using unittest.
|
||||
|
||||
This test suite covers:
|
||||
- Version matching validation
|
||||
- Error handling for missing versions
|
||||
- Invalid input handling
|
||||
- File not found scenarios
|
||||
- Malformed TOML handling
|
||||
"""
|
||||
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
# Add parent directory to path to import the module
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
# Import after path is modified
|
||||
import get_pyproject_version
|
||||
|
||||
|
||||
class TestGetPyprojectVersion(unittest.TestCase):
|
||||
"""Test suite for get_pyproject_version.py functionality."""
|
||||
|
||||
def setUp(self):
|
||||
"""Reset sys.argv before each test."""
|
||||
self.original_argv = sys.argv.copy()
|
||||
|
||||
def tearDown(self):
|
||||
"""Restore sys.argv after each test."""
|
||||
sys.argv = self.original_argv
|
||||
|
||||
def create_pyproject_toml(self, version: str) -> Path:
|
||||
"""Helper to create a temporary pyproject.toml file with a given version."""
|
||||
temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".toml", delete=False)
|
||||
temp_file.write(
|
||||
f"""
|
||||
[project]
|
||||
name = "test-project"
|
||||
version = "{version}"
|
||||
description = "A test project"
|
||||
"""
|
||||
)
|
||||
temp_file.close()
|
||||
return Path(temp_file.name)
|
||||
|
||||
def create_pyproject_toml_no_version(self) -> Path:
|
||||
"""Helper to create a pyproject.toml without a version field."""
|
||||
temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".toml", delete=False)
|
||||
temp_file.write(
|
||||
"""
|
||||
[project]
|
||||
name = "test-project"
|
||||
description = "A test project without version"
|
||||
"""
|
||||
)
|
||||
temp_file.close()
|
||||
return Path(temp_file.name)
|
||||
|
||||
def create_pyproject_toml_no_project(self) -> Path:
|
||||
"""Helper to create a pyproject.toml without a project section."""
|
||||
temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".toml", delete=False)
|
||||
temp_file.write(
|
||||
"""
|
||||
[tool.poetry]
|
||||
name = "test-project"
|
||||
version = "1.0.0"
|
||||
"""
|
||||
)
|
||||
temp_file.close()
|
||||
return Path(temp_file.name)
|
||||
|
||||
def create_malformed_toml(self) -> Path:
|
||||
"""Helper to create a malformed TOML file."""
|
||||
temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".toml", delete=False)
|
||||
temp_file.write(
|
||||
"""
|
||||
[project
|
||||
name = "test-project
|
||||
version = "1.0.0"
|
||||
"""
|
||||
)
|
||||
temp_file.close()
|
||||
return Path(temp_file.name)
|
||||
|
||||
# Test: Successful version match
|
||||
def test_matching_versions(self):
|
||||
"""Test that matching versions result in success."""
|
||||
pyproject_file = self.create_pyproject_toml("1.2.3")
|
||||
|
||||
try:
|
||||
sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.2.3"]
|
||||
|
||||
# Capture stdout
|
||||
captured_output = StringIO()
|
||||
with patch("sys.stdout", captured_output):
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 0)
|
||||
self.assertIn("✅ Version consistency check passed: 1.2.3", captured_output.getvalue())
|
||||
finally:
|
||||
pyproject_file.unlink()
|
||||
|
||||
# Test: Version mismatch
|
||||
def test_version_mismatch(self):
|
||||
"""Test that mismatched versions result in failure with appropriate error message."""
|
||||
pyproject_file = self.create_pyproject_toml("1.2.3")
|
||||
|
||||
try:
|
||||
sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.2.4"]
|
||||
|
||||
# Capture stderr
|
||||
captured_error = StringIO()
|
||||
with patch("sys.stderr", captured_error):
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 1)
|
||||
error_output = captured_error.getvalue()
|
||||
self.assertIn("❌ Version mismatch detected!", error_output)
|
||||
self.assertIn("pyproject.toml version: 1.2.3", error_output)
|
||||
self.assertIn("Expected version: 1.2.4", error_output)
|
||||
self.assertIn("Please update pyproject.toml to version 1.2.4", error_output)
|
||||
finally:
|
||||
pyproject_file.unlink()
|
||||
|
||||
# Test: Missing version in pyproject.toml
|
||||
def test_missing_version_field(self):
|
||||
"""Test handling of pyproject.toml without a version field."""
|
||||
pyproject_file = self.create_pyproject_toml_no_version()
|
||||
|
||||
try:
|
||||
sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.0.0"]
|
||||
|
||||
captured_error = StringIO()
|
||||
with patch("sys.stderr", captured_error):
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 1)
|
||||
self.assertIn("❌ ERROR: No version found in pyproject.toml", captured_error.getvalue())
|
||||
finally:
|
||||
pyproject_file.unlink()
|
||||
|
||||
# Test: Missing project section
|
||||
def test_missing_project_section(self):
|
||||
"""Test handling of pyproject.toml without a project section."""
|
||||
pyproject_file = self.create_pyproject_toml_no_project()
|
||||
|
||||
try:
|
||||
sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.0.0"]
|
||||
|
||||
captured_error = StringIO()
|
||||
with patch("sys.stderr", captured_error):
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 1)
|
||||
self.assertIn("❌ ERROR: No version found in pyproject.toml", captured_error.getvalue())
|
||||
finally:
|
||||
pyproject_file.unlink()
|
||||
|
||||
# Test: File not found
|
||||
def test_file_not_found(self):
|
||||
"""Test handling of non-existent pyproject.toml file."""
|
||||
sys.argv = ["get_pyproject_version.py", "/nonexistent/pyproject.toml", "1.0.0"]
|
||||
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 1)
|
||||
|
||||
# Test: Malformed TOML
|
||||
def test_malformed_toml(self):
|
||||
"""Test handling of malformed TOML file."""
|
||||
pyproject_file = self.create_malformed_toml()
|
||||
|
||||
try:
|
||||
sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.0.0"]
|
||||
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 1)
|
||||
finally:
|
||||
pyproject_file.unlink()
|
||||
|
||||
# Test: Incorrect number of arguments - too few
|
||||
def test_too_few_arguments(self):
|
||||
"""Test that providing too few arguments results in usage error."""
|
||||
sys.argv = ["get_pyproject_version.py", "pyproject.toml"]
|
||||
|
||||
captured_error = StringIO()
|
||||
with patch("sys.stderr", captured_error):
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 1)
|
||||
self.assertIn(
|
||||
"Usage: python get_pyproject_version.py <pyproject_path> <expected_version>",
|
||||
captured_error.getvalue(),
|
||||
)
|
||||
|
||||
# Test: Incorrect number of arguments - too many
|
||||
def test_too_many_arguments(self):
|
||||
"""Test that providing too many arguments results in usage error."""
|
||||
sys.argv = ["get_pyproject_version.py", "pyproject.toml", "1.0.0", "extra"]
|
||||
|
||||
captured_error = StringIO()
|
||||
with patch("sys.stderr", captured_error):
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 1)
|
||||
self.assertIn(
|
||||
"Usage: python get_pyproject_version.py <pyproject_path> <expected_version>",
|
||||
captured_error.getvalue(),
|
||||
)
|
||||
|
||||
# Test: No arguments
|
||||
def test_no_arguments(self):
|
||||
"""Test that providing no arguments results in usage error."""
|
||||
sys.argv = ["get_pyproject_version.py"]
|
||||
|
||||
captured_error = StringIO()
|
||||
with patch("sys.stderr", captured_error):
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 1)
|
||||
self.assertIn(
|
||||
"Usage: python get_pyproject_version.py <pyproject_path> <expected_version>",
|
||||
captured_error.getvalue(),
|
||||
)
|
||||
|
||||
# Test: Version with pre-release tags
|
||||
def test_version_with_prerelease_tags(self):
|
||||
"""Test matching versions with pre-release tags like alpha, beta, rc."""
|
||||
pyproject_file = self.create_pyproject_toml("1.2.3-rc.1")
|
||||
|
||||
try:
|
||||
sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.2.3-rc.1"]
|
||||
|
||||
captured_output = StringIO()
|
||||
with patch("sys.stdout", captured_output):
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 0)
|
||||
self.assertIn(
|
||||
"✅ Version consistency check passed: 1.2.3-rc.1", captured_output.getvalue()
|
||||
)
|
||||
finally:
|
||||
pyproject_file.unlink()
|
||||
|
||||
# Test: Version with build metadata
|
||||
def test_version_with_build_metadata(self):
|
||||
"""Test matching versions with build metadata."""
|
||||
pyproject_file = self.create_pyproject_toml("1.2.3+build.123")
|
||||
|
||||
try:
|
||||
sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.2.3+build.123"]
|
||||
|
||||
captured_output = StringIO()
|
||||
with patch("sys.stdout", captured_output):
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 0)
|
||||
self.assertIn(
|
||||
"✅ Version consistency check passed: 1.2.3+build.123", captured_output.getvalue()
|
||||
)
|
||||
finally:
|
||||
pyproject_file.unlink()
|
||||
|
||||
# Test: Various semantic version formats
|
||||
def test_semantic_version_0_0_1(self):
|
||||
"""Test semantic version 0.0.1."""
|
||||
self._test_version_format("0.0.1")
|
||||
|
||||
def test_semantic_version_1_0_0(self):
|
||||
"""Test semantic version 1.0.0."""
|
||||
self._test_version_format("1.0.0")
|
||||
|
||||
def test_semantic_version_10_20_30(self):
|
||||
"""Test semantic version 10.20.30."""
|
||||
self._test_version_format("10.20.30")
|
||||
|
||||
def test_semantic_version_alpha(self):
|
||||
"""Test semantic version with alpha tag."""
|
||||
self._test_version_format("1.2.3-alpha")
|
||||
|
||||
def test_semantic_version_beta(self):
|
||||
"""Test semantic version with beta tag."""
|
||||
self._test_version_format("1.2.3-beta.1")
|
||||
|
||||
def test_semantic_version_rc_with_build(self):
|
||||
"""Test semantic version with rc and build metadata."""
|
||||
self._test_version_format("1.2.3-rc.1+build.456")
|
||||
|
||||
def _test_version_format(self, version: str):
|
||||
"""Helper method to test various semantic version formats."""
|
||||
pyproject_file = self.create_pyproject_toml(version)
|
||||
|
||||
try:
|
||||
sys.argv = ["get_pyproject_version.py", str(pyproject_file), version]
|
||||
|
||||
captured_output = StringIO()
|
||||
with patch("sys.stdout", captured_output):
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 0)
|
||||
self.assertIn(
|
||||
f"✅ Version consistency check passed: {version}", captured_output.getvalue()
|
||||
)
|
||||
finally:
|
||||
pyproject_file.unlink()
|
||||
|
||||
# Test: Empty version string
|
||||
def test_empty_version_string(self):
|
||||
"""Test handling of empty version string."""
|
||||
pyproject_file = self.create_pyproject_toml("")
|
||||
|
||||
try:
|
||||
sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.0.0"]
|
||||
|
||||
captured_error = StringIO()
|
||||
with patch("sys.stderr", captured_error):
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
get_pyproject_version.main()
|
||||
|
||||
self.assertEqual(cm.exception.code, 1)
|
||||
# Empty string is falsy, so it should trigger error
|
||||
self.assertIn("❌", captured_error.getvalue())
|
||||
finally:
|
||||
pyproject_file.unlink()
|
||||
|
||||
|
||||
class TestSuiteInfo(unittest.TestCase):
|
||||
"""Test suite metadata."""
|
||||
|
||||
def test_suite_info(self):
|
||||
"""Display test suite information."""
|
||||
print("\n" + "=" * 70)
|
||||
print("Test Suite: get_pyproject_version.py")
|
||||
print("Framework: unittest (Python built-in)")
|
||||
print("TOML Library: tomllib (Python 3.11+ built-in)")
|
||||
print("=" * 70)
|
||||
self.assertTrue(True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run tests with verbose output
|
||||
unittest.main(verbosity=2)
|
||||
91
.github/workflows/bump-version.yml
vendored
Normal file
91
.github/workflows/bump-version.yml
vendored
Normal file
@@ -0,0 +1,91 @@
|
||||
name: Bump Version
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
service:
|
||||
description: "Service/Package to bump"
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- cua-agent
|
||||
- cua-computer
|
||||
- cua-computer-server
|
||||
- cua-core
|
||||
- cua-mcp-server
|
||||
- cua-som
|
||||
- pylume
|
||||
bump_type:
|
||||
description: "Version bump type"
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- patch
|
||||
- minor
|
||||
- major
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
bump-version:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Set package directory
|
||||
id: package
|
||||
run: |
|
||||
case "${{ inputs.service }}" in
|
||||
"cua-agent")
|
||||
echo "directory=libs/python/agent" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
"cua-computer")
|
||||
echo "directory=libs/python/computer" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
"cua-computer-server")
|
||||
echo "directory=libs/python/computer-server" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
"cua-core")
|
||||
echo "directory=libs/python/core" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
"cua-mcp-server")
|
||||
echo "directory=libs/python/mcp-server" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
"cua-som")
|
||||
echo "directory=libs/python/som" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
"pylume")
|
||||
echo "directory=libs/python/pylume" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
*)
|
||||
echo "Unknown service: ${{ inputs.service }}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install bump2version
|
||||
run: pip install bump2version
|
||||
|
||||
- name: Configure Git
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
- name: Run bump2version
|
||||
run: |
|
||||
cd ${{ steps.package.outputs.directory }}
|
||||
bump2version ${{ inputs.bump_type }}
|
||||
|
||||
- name: Push changes
|
||||
run: |
|
||||
git push origin main --follow-tags
|
||||
29
.github/workflows/docker-publish-kasm.yml
vendored
Normal file
29
.github/workflows/docker-publish-kasm.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
name: Build and Publish CUA Ubuntu Container
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
tags:
|
||||
- "docker-kasm-v*.*.*"
|
||||
paths:
|
||||
- "libs/kasm/**"
|
||||
- ".github/workflows/docker-publish-kasm.yml"
|
||||
- ".github/workflows/docker-reusable-publish.yml"
|
||||
pull_request:
|
||||
paths:
|
||||
- "libs/kasm/**"
|
||||
- ".github/workflows/docker-publish-kasm.yml"
|
||||
- ".github/workflows/docker-reusable-publish.yml"
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
uses: ./.github/workflows/docker-reusable-publish.yml
|
||||
with:
|
||||
image_name: cua-ubuntu
|
||||
context_dir: libs/kasm
|
||||
dockerfile_path: Dockerfile
|
||||
tag_prefix: docker-kasm-v
|
||||
docker_hub_org: trycua
|
||||
secrets:
|
||||
DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }}
|
||||
29
.github/workflows/docker-publish-xfce.yml
vendored
Normal file
29
.github/workflows/docker-publish-xfce.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
name: Build and Publish CUA XFCE Container
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
tags:
|
||||
- "docker-xfce-v*.*.*"
|
||||
paths:
|
||||
- "libs/xfce/**"
|
||||
- ".github/workflows/docker-publish-xfce.yml"
|
||||
- ".github/workflows/docker-reusable-publish.yml"
|
||||
pull_request:
|
||||
paths:
|
||||
- "libs/xfce/**"
|
||||
- ".github/workflows/docker-publish-xfce.yml"
|
||||
- ".github/workflows/docker-reusable-publish.yml"
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
uses: ./.github/workflows/docker-reusable-publish.yml
|
||||
with:
|
||||
image_name: cua-xfce
|
||||
context_dir: libs/xfce
|
||||
dockerfile_path: Dockerfile
|
||||
tag_prefix: docker-xfce-v
|
||||
docker_hub_org: trycua
|
||||
secrets:
|
||||
DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }}
|
||||
155
.github/workflows/docker-reusable-publish.yml
vendored
Normal file
155
.github/workflows/docker-reusable-publish.yml
vendored
Normal file
@@ -0,0 +1,155 @@
|
||||
name: Reusable Docker Publish Workflow
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
image_name:
|
||||
description: "Name of the Docker image (e.g. cua-ubuntu, cua-xfce)"
|
||||
required: true
|
||||
type: string
|
||||
context_dir:
|
||||
description: "Directory containing the Dockerfile relative to workspace root (e.g. libs/kasm, libs/xfce)"
|
||||
required: true
|
||||
type: string
|
||||
dockerfile_path:
|
||||
description: "Path to Dockerfile relative to context_dir (e.g. Dockerfile)"
|
||||
required: false
|
||||
type: string
|
||||
default: "Dockerfile"
|
||||
tag_prefix:
|
||||
description: "Prefix for semantic version tags (e.g. docker-kasm-v, docker-xfce-v)"
|
||||
required: true
|
||||
type: string
|
||||
docker_hub_org:
|
||||
description: "Docker Hub organization name"
|
||||
required: false
|
||||
type: string
|
||||
default: "trycua"
|
||||
secrets:
|
||||
DOCKER_HUB_TOKEN:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Prepare platform tag
|
||||
id: platform
|
||||
run: |
|
||||
# Convert platform (e.g., linux/amd64) to a valid tag suffix (e.g., linux-amd64)
|
||||
PLATFORM_TAG=$(echo "${{ matrix.platform }}" | sed 's/\//-/g')
|
||||
echo "tag=${PLATFORM_TAG}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ inputs.docker_hub_org }}
|
||||
password: ${{ secrets.DOCKER_HUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (PR)
|
||||
if: github.event_name == 'pull_request'
|
||||
id: meta-pr
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }}
|
||||
tags: |
|
||||
type=raw,value=${{ github.sha }}
|
||||
|
||||
- name: Extract metadata (main branch)
|
||||
if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
|
||||
id: meta-main
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }}
|
||||
tags: |
|
||||
type=raw,value=latest
|
||||
|
||||
- name: Extract metadata (semantic version tag)
|
||||
if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix))
|
||||
id: meta-semver
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }}
|
||||
tags: |
|
||||
type=semver,pattern={{version}},prefix=${{ inputs.tag_prefix }}
|
||||
type=semver,pattern={{major}}.{{minor}},prefix=${{ inputs.tag_prefix }}
|
||||
type=semver,pattern={{major}},prefix=${{ inputs.tag_prefix }}
|
||||
type=raw,value=latest
|
||||
|
||||
- name: Build and push Docker image (PR)
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./${{ inputs.context_dir }}
|
||||
file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }}
|
||||
push: true
|
||||
tags: ${{ steps.meta-pr.outputs.tags }}
|
||||
labels: ${{ steps.meta-pr.outputs.labels }}
|
||||
platforms: ${{ matrix.platform }}
|
||||
cache-from: |
|
||||
type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }}
|
||||
type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest
|
||||
cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max
|
||||
|
||||
- name: Build and push Docker image (main branch)
|
||||
if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./${{ inputs.context_dir }}
|
||||
file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }}
|
||||
push: true
|
||||
tags: ${{ steps.meta-main.outputs.tags }}
|
||||
labels: ${{ steps.meta-main.outputs.labels }}
|
||||
platforms: ${{ matrix.platform }}
|
||||
cache-from: |
|
||||
type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }}
|
||||
type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest
|
||||
cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max
|
||||
|
||||
- name: Build and push Docker image (semantic version tag)
|
||||
if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix))
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./${{ inputs.context_dir }}
|
||||
file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }}
|
||||
push: true
|
||||
tags: ${{ steps.meta-semver.outputs.tags }}
|
||||
labels: ${{ steps.meta-semver.outputs.labels }}
|
||||
platforms: ${{ matrix.platform }}
|
||||
cache-from: |
|
||||
type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }}
|
||||
type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest
|
||||
cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max
|
||||
|
||||
- name: Image digest
|
||||
if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix))
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ]; then
|
||||
echo "Image pushed with digest ${{ steps.meta-pr.outputs.digest }}"
|
||||
elif [[ "${{ github.ref }}" == refs/tags/${{ inputs.tag_prefix }}* ]]; then
|
||||
echo "Image pushed with digest ${{ steps.meta-semver.outputs.digest }}"
|
||||
else
|
||||
echo "Image pushed with digest ${{ steps.meta-main.outputs.digest }}"
|
||||
fi
|
||||
|
||||
- name: print image tags
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ]; then
|
||||
echo "Image tags: ${{ steps.meta-pr.outputs.tags }}"
|
||||
elif [[ "${{ github.ref }}" == refs/tags/${{ inputs.tag_prefix }}* ]]; then
|
||||
echo "Image tags: ${{ steps.meta-semver.outputs.tags }}"
|
||||
else
|
||||
echo "Image tags: ${{ steps.meta-main.outputs.tags }}"
|
||||
fi
|
||||
60
.github/workflows/lint.yml
vendored
Normal file
60
.github/workflows/lint.yml
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
name: Lint & Format Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: Lint & Format
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
|
||||
- name: Set up pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.12
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
pip install uv
|
||||
uv sync
|
||||
|
||||
- name: Install Node dependencies
|
||||
run: |
|
||||
pnpm install --frozen-lockfile
|
||||
pnpm -C libs/typescript install --frozen-lockfile
|
||||
|
||||
# Python checks (isort, black, ruff, mypy)
|
||||
- name: Python lint & typecheck
|
||||
run: |
|
||||
uv run isort --check-only .
|
||||
uv run black --check .
|
||||
uv run ruff check .
|
||||
# Temporarily disabled due to untyped codebase
|
||||
# uv run mypy .
|
||||
|
||||
# TypeScript type check
|
||||
- name: TypeScript typecheck
|
||||
run: node ./scripts/typescript-typecheck.js
|
||||
|
||||
# JS/TS/Markdown/YAML checks
|
||||
- name: Prettier check
|
||||
run: pnpm prettier --check "**/*.{ts,tsx,js,jsx,json,md,yaml,yml}"
|
||||
25
.github/workflows/pypi-reusable-publish.yml
vendored
25
.github/workflows/pypi-reusable-publish.yml
vendored
@@ -71,6 +71,16 @@ jobs:
|
||||
echo "VERSION=${{ inputs.version }}" >> $GITHUB_ENV
|
||||
echo "version=${{ inputs.version }}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Verify version consistency
|
||||
run: |
|
||||
# Install toml parser
|
||||
pip install toml
|
||||
|
||||
# Verify version matches using script (exits with error if mismatch)
|
||||
python ${GITHUB_WORKSPACE}/.github/scripts/get_pyproject_version.py \
|
||||
${{ inputs.package_dir }}/pyproject.toml \
|
||||
${{ inputs.version }}
|
||||
|
||||
- name: Initialize PDM in package directory
|
||||
run: |
|
||||
# Make sure we're working with a properly initialized PDM project
|
||||
@@ -82,21 +92,6 @@ jobs:
|
||||
pdm lock
|
||||
fi
|
||||
|
||||
- name: Set version in package
|
||||
run: |
|
||||
cd ${{ inputs.package_dir }}
|
||||
# Replace pdm bump with direct edit of pyproject.toml
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
# macOS version of sed needs an empty string for -i
|
||||
sed -i '' "s/version = \".*\"/version = \"$VERSION\"/" pyproject.toml
|
||||
else
|
||||
# Linux version
|
||||
sed -i "s/version = \".*\"/version = \"$VERSION\"/" pyproject.toml
|
||||
fi
|
||||
# Verify version was updated
|
||||
echo "Updated version in pyproject.toml:"
|
||||
grep "version =" pyproject.toml
|
||||
|
||||
# Conditional step for lume binary download (only for pylume package)
|
||||
- name: Download and setup lume binary
|
||||
if: inputs.is_lume_package
|
||||
|
||||
36
.github/workflows/test-validation-script.yml
vendored
Normal file
36
.github/workflows/test-validation-script.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: Test valididation script
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- ".github/scripts/**"
|
||||
- ".github/workflows/test-scripts.yml"
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- ".github/scripts/**"
|
||||
- ".github/workflows/test-scripts.yml"
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pytest toml
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
cd .github/scripts
|
||||
pytest tests/ -v
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -111,6 +111,9 @@ ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Git worktrees
|
||||
.worktrees/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
48
.pre-commit-config.yaml
Normal file
48
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,48 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/mirrors-prettier
|
||||
rev: v3.0.0
|
||||
hooks:
|
||||
- id: prettier
|
||||
name: Prettier (TS/JS/JSON/Markdown/YAML)
|
||||
entry: prettier --write
|
||||
language: node
|
||||
additional_dependencies: ["prettier@3.6.2"]
|
||||
files: \.(ts|tsx|js|jsx|json|md|yaml|yml)$
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: tsc
|
||||
name: TypeScript type check
|
||||
entry: node ./scripts/typescript-typecheck.js
|
||||
language: node
|
||||
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 7.0.0
|
||||
hooks:
|
||||
- id: isort
|
||||
name: isort code formatter
|
||||
args: ["--profile", "black"]
|
||||
files: \.(py)$
|
||||
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 25.9.0
|
||||
hooks:
|
||||
- id: black
|
||||
name: Black code formatter
|
||||
files: \.(py)$
|
||||
|
||||
- repo: https://github.com/charliermarsh/ruff-pre-commit
|
||||
rev: v0.14.1
|
||||
hooks:
|
||||
- id: ruff
|
||||
name: ruff linter
|
||||
args: ["--fix"]
|
||||
files: \.(py)$
|
||||
|
||||
# Temporarily disabled due to untyped codebase
|
||||
# - repo: https://github.com/pre-commit/mirrors-mypy
|
||||
# rev: v1.5.1
|
||||
# hooks:
|
||||
# - id: mypy
|
||||
# name: mypy type checker
|
||||
# files: \.(py)$
|
||||
32
.prettierignore
Normal file
32
.prettierignore
Normal file
@@ -0,0 +1,32 @@
|
||||
# Node / JS
|
||||
node_modules/
|
||||
dist/
|
||||
build/
|
||||
out/
|
||||
.next/
|
||||
*.min.js
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
.venv/
|
||||
venv/
|
||||
.env
|
||||
.env.local
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
*.tmp
|
||||
|
||||
# VSCode / editor files
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# Other generated files
|
||||
*.lock
|
||||
*.db
|
||||
*.sqlite
|
||||
pnpm-lock.yaml
|
||||
uv.lock
|
||||
12
.prettierrc.yaml
Normal file
12
.prettierrc.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
semi: true
|
||||
singleQuote: true
|
||||
trailingComma: es5
|
||||
tabWidth: 2
|
||||
printWidth: 100
|
||||
arrowParens: always
|
||||
bracketSpacing: true
|
||||
|
||||
overrides:
|
||||
- files: "*.{yml,yaml}"
|
||||
options:
|
||||
singleQuote: false
|
||||
10
.vscode/extensions.json
vendored
Normal file
10
.vscode/extensions.json
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"esbenp.prettier-vscode",
|
||||
"charliermarsh.ruff",
|
||||
"ms-python.black-formatter",
|
||||
"ms-python.mypy-type-checker",
|
||||
"ms-python.vscode-pylance",
|
||||
"ms-python.isort"
|
||||
]
|
||||
}
|
||||
2
.vscode/libs-ts.code-workspace
vendored
2
.vscode/libs-ts.code-workspace
vendored
@@ -7,7 +7,7 @@
|
||||
],
|
||||
"extensions": {
|
||||
"recommendations": [
|
||||
"biomejs.biome",
|
||||
"esbenp.prettier-vscode"
|
||||
]
|
||||
}
|
||||
}
|
||||
25
.vscode/settings.json
vendored
Normal file
25
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"python-envs.pythonProjects": [],
|
||||
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": "explicit",
|
||||
"source.fixAll": "explicit"
|
||||
},
|
||||
"extensions.ignoreRecommendations": false,
|
||||
"python.formatting.provider": "black",
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||
},
|
||||
"[javascript][typescript][typescriptreact][javascriptreact]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
},
|
||||
"ruff.configuration": "${workspaceFolder}/pyproject.toml",
|
||||
"mypy-type-checker.args": [
|
||||
"--config-file",
|
||||
"${workspaceFolder}/pyproject.toml"
|
||||
],
|
||||
"mypy-type-checker.path": [
|
||||
"${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
# Contributing to cua
|
||||
# Contributing to Cua
|
||||
|
||||
We deeply appreciate your interest in contributing to cua! Whether you're reporting bugs, suggesting enhancements, improving docs, or submitting pull requests, your contributions help improve the project for everyone.
|
||||
We deeply appreciate your interest in contributing to Cua! Whether you're reporting bugs, suggesting enhancements, improving docs, or submitting pull requests, your contributions help improve the project for everyone.
|
||||
|
||||
## Reporting Bugs
|
||||
|
||||
@@ -35,17 +35,20 @@ We follow strict code formatting guidelines to ensure consistency across the cod
|
||||
3. **Run Formatting Tools**: Always run the formatting tools before submitting a PR:
|
||||
```bash
|
||||
# For Python code
|
||||
pdm run black .
|
||||
pdm run ruff check --fix .
|
||||
uv run black .
|
||||
uv run isort .
|
||||
uv run ruff check --fix .
|
||||
```
|
||||
4. **Validate Your Code**: Ensure your code passes all checks:
|
||||
```bash
|
||||
pdm run mypy .
|
||||
uv run mypy .
|
||||
```
|
||||
5. Every time you try to commit code, a pre-commit hook will automatically run the formatting and validation tools. If any issues are found, the commit will be blocked until they are resolved. Please make sure to address any issues reported by the pre-commit hook before attempting to commit again. Once all issues are resolved, you can proceed with your commit.
|
||||
|
||||
## Documentation
|
||||
|
||||
Documentation improvements are always welcome. You can:
|
||||
|
||||
- Fix typos or unclear explanations
|
||||
- Add examples and use cases
|
||||
- Improve API documentation
|
||||
@@ -53,4 +56,4 @@ Documentation improvements are always welcome. You can:
|
||||
|
||||
For detailed instructions on setting up your development environment and submitting code contributions, please see our [Developer-Guide](Development.md).
|
||||
|
||||
Feel free to join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas or get help with your contributions.
|
||||
Feel free to join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas or get help with your contributions.
|
||||
|
||||
419
Development.md
419
Development.md
@@ -10,44 +10,71 @@ The project is organized as a monorepo with these main packages:
|
||||
- `libs/som/` - Set-of-Mark parser
|
||||
- `libs/computer-server/` - Server component for VM
|
||||
- `libs/lume/` - Lume CLI
|
||||
- `libs/pylume/` - Python bindings for Lume
|
||||
|
||||
Each package has its own virtual environment and dependencies, managed through PDM.
|
||||
These packages are part of a uv workspace which manages a shared virtual environment and dependencies.
|
||||
|
||||
## Local Development Setup
|
||||
|
||||
1. Install Lume CLI:
|
||||
|
||||
```bash
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
|
||||
```
|
||||
```bash
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
|
||||
```
|
||||
|
||||
2. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/trycua/cua.git
|
||||
cd cua
|
||||
```
|
||||
```bash
|
||||
git clone https://github.com/trycua/cua.git
|
||||
cd cua
|
||||
```
|
||||
|
||||
3. Create a `.env.local` file in the root directory with your API keys:
|
||||
|
||||
```bash
|
||||
# Required for Anthropic provider
|
||||
ANTHROPIC_API_KEY=your_anthropic_key_here
|
||||
```bash
|
||||
# Required for Anthropic provider
|
||||
ANTHROPIC_API_KEY=your_anthropic_key_here
|
||||
|
||||
# Required for OpenAI provider
|
||||
OPENAI_API_KEY=your_openai_key_here
|
||||
```
|
||||
# Required for OpenAI provider
|
||||
OPENAI_API_KEY=your_openai_key_here
|
||||
```
|
||||
|
||||
4. Open the workspace in VSCode or Cursor:
|
||||
4. Install Node.js dependencies for Prettier and other scripts:
|
||||
|
||||
```bash
|
||||
# For Cua Python development
|
||||
code .vscode/py.code-workspace
|
||||
```bash
|
||||
# Install pnpm if you don't have it
|
||||
npm install -g pnpm
|
||||
|
||||
# For Lume (Swift) development
|
||||
code .vscode/lume.code-workspace
|
||||
```
|
||||
# Install all JS/TS dependencies
|
||||
pnpm install
|
||||
```
|
||||
|
||||
5. Install Python dependencies and workspace packages:
|
||||
|
||||
```bash
|
||||
# First install uv if you don't have it
|
||||
pip install uv
|
||||
|
||||
# Then install all Python dependencies
|
||||
uv sync
|
||||
```
|
||||
|
||||
6. Open the workspace in VSCode or Cursor:
|
||||
|
||||
```bash
|
||||
# For Cua Python development
|
||||
code .vscode/py.code-workspace
|
||||
|
||||
# For Lume (Swift) development
|
||||
code .vscode/lume.code-workspace
|
||||
```
|
||||
|
||||
7. Install Pre-commit hooks:
|
||||
|
||||
This ensures code formatting and validation run automatically on each commit.
|
||||
|
||||
```bash
|
||||
uv run pre-commit install
|
||||
```
|
||||
|
||||
Using the workspace file is strongly recommended as it:
|
||||
|
||||
@@ -62,39 +89,33 @@ Refer to the [Lume README](./libs/lume/Development.md) for instructions on how t
|
||||
|
||||
## Python Development
|
||||
|
||||
There are two ways to install Lume:
|
||||
### Setup
|
||||
|
||||
### Run the build script
|
||||
|
||||
Run the build script to set up all packages:
|
||||
Install all of workspace dependencies with a single command:
|
||||
|
||||
```bash
|
||||
./scripts/build.sh
|
||||
uv sync
|
||||
```
|
||||
|
||||
The build script creates a shared virtual environment for all packages. The workspace configuration automatically handles import paths with the correct Python path settings.
|
||||
This installs all dependencies in the virtual environment `.venv`.
|
||||
|
||||
This will:
|
||||
Each Cua package is installed in editable mode, which means changes to the source code are immediately reflected in the installed package.
|
||||
|
||||
- Create a virtual environment for the project
|
||||
- Install all packages in development mode
|
||||
- Set up the correct Python path
|
||||
- Install development tools
|
||||
The `.venv` environment is also configured as the default VS Code Python interpreter in `.vscode/settings.json`.
|
||||
|
||||
### Install with PDM
|
||||
### Running Python Scripts
|
||||
|
||||
If PDM is not already installed, you can follow the installation instructions [here](https://pdm-project.org/en/latest/#installation).
|
||||
To run Python scripts in the workspace, use the `uv run` command:
|
||||
|
||||
To install with PDM, simply run:
|
||||
|
||||
```console
|
||||
pdm install -G:all
|
||||
```bash
|
||||
uv run python examples/agent_examples.py
|
||||
```
|
||||
|
||||
This installs all the dependencies for development, testing, and building the docs. If you'd only like development dependencies, you can run:
|
||||
Or activate the virtual environment manually:
|
||||
|
||||
```console
|
||||
pdm install -d
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
python examples/agent_examples.py
|
||||
```
|
||||
|
||||
## Running Examples
|
||||
@@ -114,71 +135,9 @@ The workspace also includes compound launch configurations:
|
||||
|
||||
- "Run Computer Examples + Server" - Runs both the Computer Examples and Server simultaneously
|
||||
|
||||
## Docker Development Environment
|
||||
|
||||
As an alternative to installing directly on your host machine, you can use Docker for development. This approach has several advantages:
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Docker installed on your machine
|
||||
- Lume server running on your host (port 7777): `lume serve`
|
||||
|
||||
### Setup and Usage
|
||||
|
||||
1. Build the development Docker image:
|
||||
|
||||
```bash
|
||||
./scripts/run-docker-dev.sh build
|
||||
```
|
||||
|
||||
2. Run an example in the container:
|
||||
|
||||
```bash
|
||||
./scripts/run-docker-dev.sh run computer_examples.py
|
||||
```
|
||||
|
||||
3. Get an interactive shell in the container:
|
||||
|
||||
```bash
|
||||
./scripts/run-docker-dev.sh run --interactive
|
||||
```
|
||||
|
||||
4. Stop any running containers:
|
||||
|
||||
```bash
|
||||
./scripts/run-docker-dev.sh stop
|
||||
```
|
||||
|
||||
### How it Works
|
||||
|
||||
The Docker development environment:
|
||||
|
||||
- Installs all required Python dependencies in the container
|
||||
- Mounts your source code from the host at runtime
|
||||
- Automatically configures the connection to use host.docker.internal:7777 for accessing the Lume server on your host machine
|
||||
- Preserves your code changes without requiring rebuilds (source code is mounted as a volume)
|
||||
|
||||
> **Note**: The Docker container doesn't include the macOS-specific Lume executable. Instead, it connects to the Lume server running on your host machine via host.docker.internal:7777. Make sure to start the Lume server on your host before running examples in the container.
|
||||
|
||||
## Cleanup and Reset
|
||||
|
||||
If you need to clean up the environment (non-docker) and start fresh:
|
||||
|
||||
```bash
|
||||
./scripts/cleanup.sh
|
||||
```
|
||||
|
||||
This will:
|
||||
|
||||
- Remove all virtual environments
|
||||
- Clean Python cache files and directories
|
||||
- Remove build artifacts
|
||||
- Clean PDM-related files
|
||||
- Reset environment configurations
|
||||
|
||||
## Code Formatting Standards
|
||||
|
||||
The cua project follows strict code formatting standards to ensure consistency across all packages.
|
||||
The Cua project follows strict code formatting standards to ensure consistency across all packages.
|
||||
|
||||
### Python Code Formatting
|
||||
|
||||
@@ -187,10 +146,11 @@ The cua project follows strict code formatting standards to ensure consistency a
|
||||
The project uses the following tools for code formatting and linting:
|
||||
|
||||
- **[Black](https://black.readthedocs.io/)**: Code formatter
|
||||
- **[isort](https://pycqa.github.io/isort/)**: Import sorter
|
||||
- **[Ruff](https://beta.ruff.rs/docs/)**: Fast linter and formatter
|
||||
- **[MyPy](https://mypy.readthedocs.io/)**: Static type checker
|
||||
|
||||
These tools are automatically installed when you set up the development environment using the `./scripts/build.sh` script.
|
||||
These tools are automatically installed when you set up the development environment.
|
||||
|
||||
#### Configuration
|
||||
|
||||
@@ -202,23 +162,34 @@ line-length = 100
|
||||
target-version = ["py311"]
|
||||
|
||||
[tool.ruff]
|
||||
fix = true
|
||||
line-length = 100
|
||||
target-version = "py311"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "B", "I"]
|
||||
ignore = [
|
||||
"E501", "E402", "I001", "I002", "B007", "B023", "B024", "B027", "B028",
|
||||
"B904", "B905", "E711", "E712", "E722", "E731", "F401", "F403", "F405",
|
||||
"F811", "F821", "F841"
|
||||
]
|
||||
fix = true
|
||||
|
||||
[tool.ruff.format]
|
||||
docstring-code-format = true
|
||||
|
||||
[tool.mypy]
|
||||
strict = true
|
||||
python_version = "3.11"
|
||||
ignore_missing_imports = true
|
||||
disallow_untyped_defs = true
|
||||
check_untyped_defs = true
|
||||
warn_return_any = true
|
||||
disallow_untyped_defs = true
|
||||
ignore_missing_imports = true
|
||||
python_version = "3.11"
|
||||
show_error_codes = true
|
||||
strict = true
|
||||
warn_return_any = true
|
||||
warn_unused_ignores = false
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
```
|
||||
|
||||
#### Key Formatting Rules
|
||||
@@ -232,23 +203,48 @@ warn_unused_ignores = false
|
||||
|
||||
The repository includes VSCode workspace configurations that enable automatic formatting. When you open the workspace files (as recommended in the setup instructions), the correct formatting settings are automatically applied.
|
||||
|
||||
Python-specific settings in the workspace files:
|
||||
##### Python-specific settings
|
||||
|
||||
These are configured in `.vscode/settings.json`:
|
||||
|
||||
```json
|
||||
"[python]": {
|
||||
"editor.formatOnSave": true,
|
||||
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": "explicit"
|
||||
}
|
||||
{
|
||||
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": "explicit",
|
||||
"source.fixAll": "explicit"
|
||||
},
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.black-formatter"
|
||||
},
|
||||
"python.formatting.provider": "black",
|
||||
"ruff.configuration": "${workspaceFolder}/pyproject.toml",
|
||||
"mypy-type-checker.args": ["--config-file", "${workspaceFolder}/pyproject.toml"],
|
||||
"mypy-type-checker.path": ["${workspaceFolder}"]
|
||||
}
|
||||
```
|
||||
|
||||
Recommended VS Code extensions:
|
||||
##### **JS/TS-specific settings**
|
||||
|
||||
- Black Formatter (ms-python.black-formatter)
|
||||
- Ruff (charliermarsh.ruff)
|
||||
- Pylance (ms-python.vscode-pylance)
|
||||
```json
|
||||
"[javascript][typescript][typescriptreact][javascriptreact]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
}
|
||||
```
|
||||
|
||||
- Ensures Prettier is used for all JS/TS files for consistent formatting.
|
||||
|
||||
Recommended VS Code Extensions
|
||||
|
||||
- **Black Formatter** – `ms-python.black-formatter`
|
||||
- **Ruff** – `charliermarsh.ruff`
|
||||
- **Pylance** – `ms-python.vscode-pylance`
|
||||
- **isort** – `ms-python.isort`
|
||||
- **Prettier** – `esbenp.prettier-vscode`
|
||||
- **Mypy Type Checker** – `ms-python.mypy-type-checker`
|
||||
|
||||
> VSCode will automatically suggest installing the recommended extensions when you open the workspace.
|
||||
|
||||
#### Manual Formatting
|
||||
|
||||
@@ -256,26 +252,93 @@ To manually format code:
|
||||
|
||||
```bash
|
||||
# Format all Python files using Black
|
||||
pdm run black .
|
||||
uv run black .
|
||||
|
||||
# Sort imports using isort
|
||||
uv run isort .
|
||||
|
||||
# Run Ruff linter with auto-fix
|
||||
pdm run ruff check --fix .
|
||||
uv run ruff check .
|
||||
|
||||
# Run type checking with MyPy
|
||||
pdm run mypy .
|
||||
uv run mypy .
|
||||
```
|
||||
|
||||
#### Pre-commit Validation
|
||||
|
||||
Before submitting a pull request, ensure your code passes all formatting checks:
|
||||
|
||||
**Option 1: Run all hooks via pre-commit (all in a single command)**
|
||||
|
||||
```bash
|
||||
# Run all checks
|
||||
pdm run black --check .
|
||||
pdm run ruff check .
|
||||
pdm run mypy .
|
||||
# Run hooks on staged files (recommended for quick checks)
|
||||
uv run pre-commit run
|
||||
```
|
||||
|
||||
- Automatically runs Black, Ruff, isort, Mypy, Prettier, and any other configured hooks.
|
||||
|
||||
**Option 2: Run individual tools manually**
|
||||
|
||||
```bash
|
||||
# Python checks
|
||||
uv run black --check .
|
||||
uv run isort --check .
|
||||
uv run ruff check .
|
||||
uv run mypy .
|
||||
|
||||
# JavaScript/TypeScript checks
|
||||
uv run prettier --check "**/*.{ts,tsx,js,jsx,json,md,yaml,yml}"
|
||||
|
||||
# TypeScript typecheck
|
||||
node ./scripts/typescript-typecheck.js
|
||||
```
|
||||
|
||||
### JavaScript / TypeScript Formatting (Prettier)
|
||||
|
||||
The project uses **Prettier** to ensure consistent formatting across all JS/TS/JSON/Markdown/YAML files.
|
||||
|
||||
#### Installation
|
||||
|
||||
All Node.js dependencies are managed via `pnpm`. Make sure you have run:
|
||||
|
||||
```bash
|
||||
# Install pnpm if you don't have it
|
||||
npm install -g pnpm
|
||||
|
||||
# Install project dependencies
|
||||
pnpm install
|
||||
```
|
||||
|
||||
This installs Prettier and other JS/TS dependencies defined in `package.json`.
|
||||
|
||||
#### Usage
|
||||
|
||||
- **Check formatting** (without making changes):
|
||||
|
||||
```bash
|
||||
pnpm prettier:check
|
||||
```
|
||||
|
||||
- **Automatically format files**:
|
||||
|
||||
```bash
|
||||
pnpm prettier:format
|
||||
```
|
||||
|
||||
#### Type Checking (TypeScript)
|
||||
|
||||
- Run the TypeScript type checker:
|
||||
|
||||
```bash
|
||||
node ./scripts/typescript-typecheck.js
|
||||
```
|
||||
|
||||
#### VSCode Integration
|
||||
|
||||
- The workspace config ensures Prettier is used automatically for JS/TS/JSON/Markdown/YAML files.
|
||||
- Recommended extension: Prettier – Code Formatter
|
||||
- Ensure `editor.formatOnSave` is enabled in VSCode for automatic formatting.
|
||||
|
||||
### Swift Code (Lume)
|
||||
|
||||
For Swift code in the `libs/lume` directory:
|
||||
@@ -283,3 +346,101 @@ For Swift code in the `libs/lume` directory:
|
||||
- Follow the [Swift API Design Guidelines](https://www.swift.org/documentation/api-design-guidelines/)
|
||||
- Use SwiftFormat for consistent formatting
|
||||
- Code will be automatically formatted on save when using the lume workspace
|
||||
|
||||
## Releasing Packages
|
||||
|
||||
Cua uses an automated GitHub Actions workflow to bump package versions.
|
||||
|
||||
> **Note:** The main branch is currently not protected. If branch protection is enabled in the future, the github-actions bot must be added to the bypass list for these workflows to commit directly.
|
||||
|
||||
### Version Bump Workflow
|
||||
|
||||
All packages are managed through a single consolidated workflow: [Bump Version](https://github.com/trycua/cua/actions/workflows/bump-version.yml)
|
||||
|
||||
**Supported packages:**
|
||||
|
||||
- cua-agent
|
||||
- cua-computer
|
||||
- cua-computer-server
|
||||
- cua-core
|
||||
- cua-mcp-server
|
||||
- cua-som
|
||||
- pylume
|
||||
|
||||
**How to use:**
|
||||
|
||||
1. Navigate to the [Bump Version workflow](https://github.com/trycua/cua/actions/workflows/bump-version.yml)
|
||||
2. Click the "Run workflow" button in the GitHub UI
|
||||
3. Select the **service/package** you want to bump from the first dropdown
|
||||
4. Select the **bump type** (patch/minor/major) from the second dropdown
|
||||
5. Click "Run workflow" to start the version bump
|
||||
6. The workflow will automatically commit changes and push to main
|
||||
|
||||
### Rolling Back a Version Bump
|
||||
|
||||
If you need to revert a version bump, follow these steps:
|
||||
|
||||
**Step 1: Find the version bump commit**
|
||||
|
||||
```bash
|
||||
# List recent commits
|
||||
git log --oneline | grep "Bump"
|
||||
|
||||
# Example output:
|
||||
# a1b2c3d Bump cua-core to v0.1.9
|
||||
```
|
||||
|
||||
**Step 2: Revert the commit**
|
||||
|
||||
```bash
|
||||
# Revert the specific commit
|
||||
git revert <commit-hash>
|
||||
|
||||
# Example:
|
||||
# git revert a1b2c3d
|
||||
```
|
||||
|
||||
**Step 3: Delete the git tag**
|
||||
|
||||
```bash
|
||||
# List tags to find the version tag
|
||||
git tag -l
|
||||
|
||||
# Delete the tag locally (use the correct package-specific format)
|
||||
git tag -d core-v0.1.9
|
||||
|
||||
# Delete the tag remotely
|
||||
git push origin :refs/tags/core-v0.1.9
|
||||
```
|
||||
|
||||
**Step 4: Push the revert**
|
||||
|
||||
```bash
|
||||
git push origin main
|
||||
```
|
||||
|
||||
**Per-package tag patterns:**
|
||||
|
||||
Each package uses its own tag format defined in `.bumpversion.cfg`:
|
||||
|
||||
- **cua-core**: `core-v{version}` (e.g., `core-v0.1.9`)
|
||||
- **cua-computer**: `computer-v{version}` (e.g., `computer-v0.4.7`)
|
||||
- **cua-agent**: `agent-v{version}` (e.g., `agent-v0.4.35`)
|
||||
- **cua-som**: `som-v{version}` (e.g., `som-v0.1.3`)
|
||||
- **pylume**: `pylume-v{version}` (e.g., `pylume-v0.2.1`)
|
||||
- **cua-computer-server**: `computer-server-v{version}` (e.g., `computer-server-v0.1.27`)
|
||||
- **cua-mcp-server**: `mcp-server-v{version}` (e.g., `mcp-server-v0.1.14`)
|
||||
|
||||
### Local Testing (Advanced)
|
||||
|
||||
The Makefile targets are kept for local testing only:
|
||||
|
||||
```bash
|
||||
# Test version bump locally (dry run)
|
||||
make dry-run-patch-core
|
||||
|
||||
# View current versions
|
||||
make show-versions
|
||||
```
|
||||
|
||||
**Note:** For production releases, always use the GitHub Actions workflows above instead of running Makefile commands directly.
|
||||
|
||||
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
SOFTWARE.
|
||||
|
||||
40
Makefile
Normal file
40
Makefile
Normal file
@@ -0,0 +1,40 @@
|
||||
# Python Package Release Makefile
|
||||
# Version bumps are managed via GitHub Actions workflows (see Development.md)
|
||||
# This Makefile provides utility targets for checking versions and dry-run testing
|
||||
|
||||
.PHONY: help
|
||||
|
||||
help: ## Show this help message
|
||||
@echo "Python Package Release Utilities"
|
||||
@echo ""
|
||||
@echo "Usage: make <target>"
|
||||
@echo ""
|
||||
@echo "Available targets:"
|
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " %-25s %s\n", $$1, $$2}'
|
||||
@echo ""
|
||||
@echo "⚠️ For production version bumps, use GitHub Actions:"
|
||||
@echo " https://github.com/trycua/cua/actions/workflows/bump-version.yml"
|
||||
|
||||
# Dry run targets (test without making changes)
|
||||
dry-run-patch-%: ## Dry run for patch version bump (e.g., make dry-run-patch-core)
|
||||
@echo "Dry run: Bumping $* patch version..."
|
||||
cd libs/python/$* && bump2version --dry-run --verbose patch
|
||||
|
||||
dry-run-minor-%: ## Dry run for minor version bump (e.g., make dry-run-minor-core)
|
||||
@echo "Dry run: Bumping $* minor version..."
|
||||
cd libs/python/$* && bump2version --dry-run --verbose minor
|
||||
|
||||
dry-run-major-%: ## Dry run for major version bump (e.g., make dry-run-major-core)
|
||||
@echo "Dry run: Bumping $* major version..."
|
||||
cd libs/python/$* && bump2version --dry-run --verbose major
|
||||
|
||||
# Show current versions
|
||||
show-versions: ## Show current versions of all packages
|
||||
@echo "Current Python package versions:"
|
||||
@echo " cua-core: $$(grep 'current_version' libs/python/core/.bumpversion.cfg | cut -d' ' -f3)"
|
||||
@echo " pylume: $$(grep 'current_version' libs/python/pylume/.bumpversion.cfg | cut -d' ' -f3)"
|
||||
@echo " cua-computer: $$(grep 'current_version' libs/python/computer/.bumpversion.cfg | cut -d' ' -f3)"
|
||||
@echo " cua-som: $$(grep 'current_version' libs/python/som/.bumpversion.cfg | cut -d' ' -f3)"
|
||||
@echo " cua-agent: $$(grep 'current_version' libs/python/agent/.bumpversion.cfg | cut -d' ' -f3)"
|
||||
@echo " cua-computer-server: $$(grep 'current_version' libs/python/computer-server/.bumpversion.cfg | cut -d' ' -f3)"
|
||||
@echo " cua-mcp-server: $$(grep 'current_version' libs/python/mcp-server/.bumpversion.cfg | cut -d' ' -f3)"
|
||||
413
README.md
413
README.md
@@ -5,70 +5,115 @@
|
||||
<img alt="Cua logo" height="150" src="img/logo_black.png">
|
||||
</picture>
|
||||
|
||||
[](#)
|
||||
[](#)
|
||||
[](#)
|
||||
[](https://discord.com/invite/mVnXXpdE85)
|
||||
<br>
|
||||
<a href="https://trendshift.io/repositories/13685" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13685" alt="trycua%2Fcua | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||
[](#)
|
||||
[](#)
|
||||
[](#)
|
||||
[](https://discord.com/invite/mVnXXpdE85)
|
||||
<br>
|
||||
<a href="https://trendshift.io/repositories/13685" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13685" alt="trycua%2Fcua | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||
|
||||
</div>
|
||||
|
||||
> We’re hosting the **Computer-Use Agents SOTA Challenge** at [Hack the North](https://hackthenorth.com) and online!
|
||||
>> **Track A (On-site @ UWaterloo)**: Reserved for participants accepted to Hack the North. 🏆 Prize: **YC interview guaranteed**.
|
||||
>> **Track B (Remote)**: Open to everyone worldwide. 🏆 Prize: **Cash award**.
|
||||
>>> 👉 Sign up here: [trycua.com/hackathon](https://www.trycua.com/hackathon)
|
||||
|
||||
**cua** ("koo-ah") is Docker for [Computer-Use Agents](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse) - it enables AI agents to control full operating systems in virtual containers and deploy them locally or to the cloud.
|
||||
**Cua** ("koo-ah") is Docker for [Computer-Use Agents](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse) - it enables AI agents to control full operating systems in virtual containers and deploy them locally or to the cloud.
|
||||
|
||||
<div align="center">
|
||||
<video src="https://github.com/user-attachments/assets/c619b4ea-bb8e-4382-860e-f3757e36af20" width="600" controls></video>
|
||||
</div>
|
||||
|
||||
With the Computer SDK, you can:
|
||||
- automate Windows, Linux, and macOS VMs with a consistent, [pyautogui-like API](https://docs.trycua.com/docs/libraries/computer#interface-actions)
|
||||
- create & manage VMs [locally](https://docs.trycua.com/docs/computer-sdk/computers#cua-local-containers) or using [cua cloud](https://www.trycua.com/)
|
||||
With the [Computer SDK](#computer-sdk), you can:
|
||||
|
||||
- automate Windows, Linux, and macOS VMs with a consistent, [pyautogui-like API](https://docs.trycua.com/docs/libraries/computer#interface-actions)
|
||||
- create & manage VMs [locally](https://docs.trycua.com/docs/computer-sdk/computers#cua-local-containers) or using [Cua cloud](https://www.trycua.com/)
|
||||
|
||||
With the [Agent SDK](#agent-sdk), you can:
|
||||
|
||||
With the Agent SDK, you can:
|
||||
- run computer-use models with a [consistent schema](https://docs.trycua.com/docs/agent-sdk/message-format)
|
||||
- benchmark on OSWorld-Verified, SheetBench-V2, and more [with a single line of code using HUD](https://docs.trycua.com/docs/agent-sdk/integrations/hud) ([Notebook](https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb))
|
||||
- combine UI grounding models with any LLM using [composed agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents)
|
||||
- use new UI agent models and UI grounding models from the Model Zoo below with just a model string (e.g., `ComputerAgent(model="openai/computer-use-preview")`)
|
||||
- use API or local inference by changing a prefix (e.g., `openai/`, `openrouter/`, `ollama/`, `huggingface-local/`, `mlx/`, [etc.](https://docs.litellm.ai/docs/providers))
|
||||
|
||||
### CUA Model Zoo 🐨
|
||||
# Modules
|
||||
|
||||
| [All-in-one CUAs](https://docs.trycua.com/docs/agent-sdk/supported-agents/computer-use-agents) | [UI Grounding Models](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) | [UI Planning Models](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) |
|
||||
|---|---|---|
|
||||
| `anthropic/claude-opus-4-1-20250805` | `huggingface-local/xlangai/OpenCUA-{7B,32B}` | any all-in-one CUA |
|
||||
| `openai/computer-use-preview` | `huggingface-local/HelloKKMe/GTA1-{7B,32B,72B}` | any VLM (using liteLLM, requires `tools` parameter) |
|
||||
| `openrouter/z-ai/glm-4.5v` | `huggingface-local/Hcompany/Holo1.5-{3B,7B,72B}` | |
|
||||
| `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}` | any all-in-one CUA | |
|
||||
| `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` | |
|
||||
| `omniparser+{ui planning}` | | |
|
||||
| `{ui grounding}+{ui planning}` | | |
|
||||
<table>
|
||||
<tr>
|
||||
<td width="25%" align="center" valign="top">
|
||||
|
||||
- `human/human` → [Human-in-the-Loop](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop)
|
||||
[**Agent**](#agent-sdk)<br />
|
||||
AI agent framework for automating tasks
|
||||
|
||||
Missing a model? [Raise a feature request](https://github.com/trycua/cua/issues/new?assignees=&labels=enhancement&projects=&title=%5BAgent%5D%3A+Add+model+support+for+) or [contribute](https://github.com/trycua/cua/blob/main/CONTRIBUTING.md)!
|
||||
</td>
|
||||
<td width="25%" align="center" valign="top">
|
||||
|
||||
<br/>
|
||||
**[Computer](#computer-sdk)**<br />
|
||||
TypeScript/Python SDK for controlling Cua environments
|
||||
|
||||
# Quick Start
|
||||
</td>
|
||||
<td width="25%" align="center" valign="top">
|
||||
|
||||
- [Get started with a Computer-Use Agent UI](https://docs.trycua.com/docs/quickstart-ui)
|
||||
- [Get started with the Computer-Use Agent CLI](https://docs.trycua.com/docs/quickstart-cli)
|
||||
- [Get started with the Python SDKs](https://docs.trycua.com/docs/quickstart-devs)
|
||||
**[MCP Server](#mcp-server)**<br />
|
||||
MCP server for using Cua agents and computers
|
||||
|
||||
<br/>
|
||||
</td>
|
||||
<td width="25%" align="center" valign="top">
|
||||
|
||||
# Usage ([Docs](https://docs.trycua.com/docs))
|
||||
**[Computer Server](#computer-server)**<br />
|
||||
Server component that runs on Cua environments
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td width="25%" align="center" valign="top">
|
||||
|
||||
**[Lume](#lume)**<br />
|
||||
VM management for macOS
|
||||
|
||||
</td>
|
||||
<td width="25%" align="center" valign="top">
|
||||
|
||||
**[Lumier](#lumier)**<br />
|
||||
Docker interface for macOS/Linux VMs
|
||||
|
||||
</td>
|
||||
<td width="25%" align="center" valign="top">
|
||||
|
||||
**[SOM](#som)**<br />
|
||||
Set-of-Mark library for Agent
|
||||
|
||||
</td>
|
||||
<td width="25%" align="center" valign="top">
|
||||
|
||||
**[Core](#core)**<br />
|
||||
Core utilities for Cua
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
# Quick Start
|
||||
|
||||
- [Clone a starter template and run the code in <1 min](https://github.com/trycua/agent-template)
|
||||
- [Get started with the Cua SDKs](https://docs.trycua.com/docs/quickstart-devs)
|
||||
- [Get started with the Cua CLI](https://docs.trycua.com/docs/quickstart-cli)
|
||||
|
||||
# Agent SDK
|
||||
|
||||
Install the agent SDK:
|
||||
|
||||
```bash
|
||||
pip install cua-agent[all]
|
||||
```
|
||||
|
||||
Initialize a computer agent using a [model configuration string](#model-configuration) and a [computer instance](#computer-usage):
|
||||
|
||||
```python
|
||||
from agent import ComputerAgent
|
||||
|
||||
# ComputerAgent works with any computer initialized with the Computer SDK
|
||||
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer],
|
||||
@@ -83,121 +128,229 @@ async for result in agent.run(messages):
|
||||
print(item["content"][0]["text"])
|
||||
```
|
||||
|
||||
### Output format (OpenAI Agent Responses Format):
|
||||
## Output format
|
||||
|
||||
Cua uses the OpenAI Agent response format.
|
||||
|
||||
<details>
|
||||
<summary>Example</summary>
|
||||
|
||||
```json
|
||||
{
|
||||
{
|
||||
"output": [
|
||||
# user input
|
||||
{
|
||||
"role": "user",
|
||||
"content": "go to trycua on gh"
|
||||
},
|
||||
# first agent turn adds the model output to the history
|
||||
{
|
||||
"summary": [
|
||||
{
|
||||
"text": "Searching Firefox for Trycua GitHub",
|
||||
"type": "summary_text"
|
||||
}
|
||||
],
|
||||
"type": "reasoning"
|
||||
"role": "user",
|
||||
"content": "go to trycua on gh"
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"text": "Trycua GitHub",
|
||||
"type": "type"
|
||||
},
|
||||
"call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq",
|
||||
"status": "completed",
|
||||
"type": "computer_call"
|
||||
},
|
||||
# second agent turn adds the computer output to the history
|
||||
{
|
||||
"type": "computer_call_output",
|
||||
"call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq",
|
||||
"output": {
|
||||
"type": "input_image",
|
||||
"image_url": "data:image/png;base64,..."
|
||||
"summary": [
|
||||
{
|
||||
"text": "Searching Firefox for Trycua GitHub",
|
||||
"type": "summary_text"
|
||||
}
|
||||
],
|
||||
"type": "reasoning"
|
||||
},
|
||||
# final agent turn adds the agent output text to the history
|
||||
{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"text": "Success! The Trycua GitHub page has been opened.",
|
||||
"type": "output_text"
|
||||
}
|
||||
]
|
||||
"action": {
|
||||
"text": "Trycua GitHub",
|
||||
"type": "type"
|
||||
},
|
||||
"call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq",
|
||||
"status": "completed",
|
||||
"type": "computer_call"
|
||||
},
|
||||
{
|
||||
"type": "computer_call_output",
|
||||
"call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq",
|
||||
"output": {
|
||||
"type": "input_image",
|
||||
"image_url": "data:image/png;base64,..."
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"text": "Success! The Trycua GitHub page has been opened.",
|
||||
"type": "output_text"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": 150,
|
||||
"completion_tokens": 75,
|
||||
"total_tokens": 225,
|
||||
"response_cost": 0.01,
|
||||
"prompt_tokens": 150,
|
||||
"completion_tokens": 75,
|
||||
"total_tokens": 225,
|
||||
"response_cost": 0.01
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
# Computer ([Docs](https://docs.trycua.com/docs/computer-sdk/computers))
|
||||
</details>
|
||||
|
||||
## Model Configuration
|
||||
|
||||
These are the valid model configurations for `ComputerAgent(model="...")`:
|
||||
|
||||
| Configuration | Description |
|
||||
| ---------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `{computer-use-model}` | A single model to perform all computer-use tasks |
|
||||
| `{grounding-model}+{any-vlm-with-tools}` | [Composed](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) with VLM for captioning and grounding LLM for element detection |
|
||||
| `moondream3+{any-llm-with-tools}` | [Composed](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) with Moondream3 for captioning and UI element detection |
|
||||
| `human/human` | A [human-in-the-loop](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop) in place of a model |
|
||||
|
||||
### Model Capabilities
|
||||
|
||||
The following table shows which capabilities are supported by each model:
|
||||
|
||||
| Model | Computer-Use | Grounding | Tools | VLM |
|
||||
| -------------------------------------------------------------------------------------------------------------------------------- | :----------: | :-------: | :---: | :-: |
|
||||
| [Claude Sonnet/Haiku](https://docs.claude.com/en/docs/agents-and-tools/tool-use/computer-use-tool#how-to-implement-computer-use) | ✓ | ✓ | ✓ | ✓ |
|
||||
| [OpenAI CU Preview](https://platform.openai.com/docs/models/computer-use-preview) | ✓ | ✓ | | ✓ |
|
||||
| [GLM-V](https://huggingface.co/THUDM/glm-4v-9b) | ✓ | ✓ | ✓ | ✓ |
|
||||
| [Gemini CU Preview](https://ai.google.dev/gemini-api/docs/computer-use) | ✓ | ✓ | | ✓ |
|
||||
| [InternVL](https://huggingface.co/OpenGVLab/InternVL3_5-1B) | ✓ | ✓ | ✓ | ✓ |
|
||||
| [UI-TARS](https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B) | ✓ | ✓ | ✓ | ✓ |
|
||||
| [OpenCUA](https://huggingface.co/xlangai/OpenCUA-7B) | | ✓ | | |
|
||||
| [GTA](https://huggingface.co/HelloKKMe/GTA1-7B) | | ✓ | | |
|
||||
| [Holo](https://huggingface.co/Hcompany/Holo1.5-3B) | | ✓ | | |
|
||||
| [Moondream](https://huggingface.co/moondream/moondream3-preview) | | ✓ | | |
|
||||
| [OmniParser](https://github.com/microsoft/OmniParser) | | ✓ | | |
|
||||
|
||||
### Model IDs
|
||||
|
||||
<details>
|
||||
<summary>Examples of valid model IDs</summary>
|
||||
|
||||
| Model | Model IDs |
|
||||
| -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------- |
|
||||
| [Claude Sonnet/Haiku](https://docs.claude.com/en/docs/agents-and-tools/tool-use/computer-use-tool#how-to-implement-computer-use) | `anthropic/claude-sonnet-4-5`, `anthropic/claude-haiku-4-5` |
|
||||
| [OpenAI CU Preview](https://platform.openai.com/docs/models/computer-use-preview) | `openai/computer-use-preview` |
|
||||
| [GLM-V](https://huggingface.co/THUDM/glm-4v-9b) | `openrouter/z-ai/glm-4.5v`, `huggingface-local/zai-org/GLM-4.5V` |
|
||||
| [Gemini CU Preview](https://ai.google.dev/gemini-api/docs/computer-use) | `gemini-2.5-computer-use-preview` |
|
||||
| [InternVL](https://huggingface.co/OpenGVLab/InternVL3_5-1B) | `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}` |
|
||||
| [UI-TARS](https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B) | `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` |
|
||||
| [OpenCUA](https://huggingface.co/xlangai/OpenCUA-7B) | `huggingface-local/xlangai/OpenCUA-{7B,32B}` |
|
||||
| [GTA](https://huggingface.co/HelloKKMe/GTA1-7B) | `huggingface-local/HelloKKMe/GTA1-{7B,32B,72B}` |
|
||||
| [Holo](https://huggingface.co/Hcompany/Holo1.5-3B) | `huggingface-local/Hcompany/Holo1.5-{3B,7B,72B}` |
|
||||
| [Moondream](https://huggingface.co/moondream/moondream3-preview) | `moondream3` |
|
||||
| [OmniParser](https://github.com/microsoft/OmniParser) | `omniparser` |
|
||||
|
||||
</details>
|
||||
|
||||
Missing a model? Create a [feature request](https://github.com/trycua/cua/issues/new?assignees=&labels=enhancement&projects=&title=%5BAgent%5D%3A+Add+model+support+for+) or [contribute](https://github.com/trycua/cua/blob/main/CONTRIBUTING.md)!
|
||||
|
||||
Learn more in the [Agent SDK documentation](./libs/python/agent/README.md).
|
||||
|
||||
# Computer SDK
|
||||
|
||||
Install the computer SDK:
|
||||
|
||||
```bash
|
||||
pip install cua-computer[all]
|
||||
pip install cua-computer
|
||||
```
|
||||
|
||||
Initialize a computer:
|
||||
|
||||
```python
|
||||
from computer import Computer
|
||||
|
||||
async with Computer(
|
||||
os_type="linux",
|
||||
provider_type="cloud",
|
||||
name="your-container-name",
|
||||
api_key="your-api-key"
|
||||
) as computer:
|
||||
# Take screenshot
|
||||
computer = Computer(
|
||||
os_type="linux", # or "macos", "windows"
|
||||
provider_type="cloud", # or "lume", "docker", "windows_sandbox"
|
||||
name="your-sandbox-name",
|
||||
api_key="your-api-key" # only for cloud
|
||||
# or use_host_computer_server=True for host desktop
|
||||
)
|
||||
|
||||
try:
|
||||
await computer.run()
|
||||
|
||||
# Take a screenshot
|
||||
screenshot = await computer.interface.screenshot()
|
||||
|
||||
# Click and type
|
||||
await computer.interface.left_click(100, 100)
|
||||
await computer.interface.type("Hello!")
|
||||
finally:
|
||||
await computer.close()
|
||||
```
|
||||
|
||||
Learn more in the [Computer SDK documentation](./libs/python/computer/README.md).
|
||||
|
||||
# MCP Server
|
||||
|
||||
Install the MCP server:
|
||||
|
||||
```bash
|
||||
pip install cua-mcp-server
|
||||
```
|
||||
|
||||
Learn more in the [MCP Server documentation](./libs/python/mcp-server/README.md).
|
||||
|
||||
# Computer Server
|
||||
|
||||
Install the Computer Server:
|
||||
|
||||
```bash
|
||||
pip install cua-computer-server
|
||||
python -m computer_server
|
||||
```
|
||||
|
||||
Learn more in the [Computer Server documentation](./libs/python/computer-server/README.md).
|
||||
|
||||
# Lume
|
||||
|
||||
Install Lume:
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
|
||||
```
|
||||
|
||||
Learn more in the [Lume documentation](./libs/lume/README.md).
|
||||
|
||||
# Lumier
|
||||
|
||||
Install Lumier:
|
||||
|
||||
```bash
|
||||
docker pull trycua/lumier:latest
|
||||
```
|
||||
|
||||
Learn more in the [Lumier documentation](./libs/lumier/README.md).
|
||||
|
||||
# SOM
|
||||
|
||||
Install SOM:
|
||||
|
||||
```bash
|
||||
pip install cua-som
|
||||
```
|
||||
|
||||
Learn more in the [SOM documentation](./libs/python/som/README.md).
|
||||
|
||||
# Resources
|
||||
|
||||
- [How to use the MCP Server with Claude Desktop or other MCP clients](./libs/python/mcp-server/README.md) - One of the easiest ways to get started with Cua
|
||||
- [How to use OpenAI Computer-Use, Anthropic, OmniParser, or UI-TARS for your Computer-Use Agent](./libs/python/agent/README.md)
|
||||
- [How to use Lume CLI for managing desktops](./libs/lume/README.md)
|
||||
- [Training Computer-Use Models: Collecting Human Trajectories with Cua (Part 1)](https://www.trycua.com/blog/training-computer-use-models-trajectories-1)
|
||||
- [Cua Blog](https://www.trycua.com/blog)
|
||||
- [Cua Docs](https://docs.trycua.com)
|
||||
|
||||
## Modules
|
||||
# Community and Contributions
|
||||
|
||||
| Module | Description | Installation |
|
||||
|--------|-------------|---------------|
|
||||
| [**Lume**](./libs/lume/README.md) | VM management for macOS/Linux using Apple's Virtualization.Framework | `curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh \| bash` |
|
||||
| [**Lumier**](./libs/lumier/README.md) | Docker interface for macOS and Linux VMs | `docker pull trycua/lumier:latest` |
|
||||
| [**Computer (Python)**](./libs/python/computer/README.md) | Python Interface for controlling virtual machines | `pip install "cua-computer[all]"` |
|
||||
| [**Computer (Typescript)**](./libs/typescript/computer/README.md) | Typescript Interface for controlling virtual machines | `npm install @trycua/computer` |
|
||||
| [**Agent**](./libs/python/agent/README.md) | AI agent framework for automating tasks | `pip install "cua-agent[all]"` |
|
||||
| [**MCP Server**](./libs/python/mcp-server/README.md) | MCP server for using CUA with Claude Desktop | `pip install cua-mcp-server` |
|
||||
| [**SOM**](./libs/python/som/README.md) | Self-of-Mark library for Agent | `pip install cua-som` |
|
||||
| [**Computer Server**](./libs/python/computer-server/README.md) | Server component for Computer | `pip install cua-computer-server` |
|
||||
| [**Core (Python)**](./libs/python/core/README.md) | Python Core utilities | `pip install cua-core` |
|
||||
| [**Core (Typescript)**](./libs/typescript/core/README.md) | Typescript Core utilities | `npm install @trycua/core` |
|
||||
|
||||
## Community
|
||||
We welcome contributions to Cua! Please refer to our [Contributing Guidelines](CONTRIBUTING.md) for details.
|
||||
|
||||
Join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas, get assistance, or share your demos!
|
||||
|
||||
## License
|
||||
# License
|
||||
|
||||
Cua is open-sourced under the MIT License - see the [LICENSE](LICENSE) file for details.
|
||||
Cua is open-sourced under the MIT License - see the [LICENSE](LICENSE.md) file for details.
|
||||
|
||||
Portions of this project, specifically components adapted from Kasm Technologies Inc., are also licensed under the MIT License. See [libs/kasm/LICENSE](libs/kasm/LICENSE) for details.
|
||||
|
||||
Microsoft's OmniParser, which is used in this project, is licensed under the Creative Commons Attribution 4.0 International License (CC-BY-4.0). See the [OmniParser LICENSE](https://github.com/microsoft/OmniParser/blob/master/LICENSE) for details.
|
||||
|
||||
### Third-Party Licenses and Optional Components
|
||||
## Third-Party Licenses and Optional Components
|
||||
|
||||
Some optional extras for this project depend on third-party packages that are licensed under terms different from the MIT License.
|
||||
|
||||
@@ -205,52 +358,22 @@ Some optional extras for this project depend on third-party packages that are li
|
||||
|
||||
When you choose to install and use such optional extras, your use, modification, and distribution of those third-party components are governed by their respective licenses (e.g., AGPL-3.0 for `ultralytics`).
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome contributions to Cua! Please refer to our [Contributing Guidelines](CONTRIBUTING.md) for details.
|
||||
|
||||
## Trademarks
|
||||
|
||||
Apple, macOS, and Apple Silicon are trademarks of Apple Inc.
|
||||
Ubuntu and Canonical are registered trademarks of Canonical Ltd.
|
||||
Microsoft is a registered trademark of Microsoft Corporation.
|
||||
Microsoft is a registered trademark of Microsoft Corporation.
|
||||
|
||||
This project is not affiliated with, endorsed by, or sponsored by Apple Inc., Canonical Ltd., Microsoft Corporation, or Kasm Technologies.
|
||||
|
||||
## Stargazers
|
||||
# Stargazers
|
||||
|
||||
Thank you to all our supporters!
|
||||
|
||||
[](https://starchart.cc/trycua/cua)
|
||||
|
||||
## Contributors
|
||||
# Sponsors
|
||||
|
||||
<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
|
||||
<!-- prettier-ignore-start -->
|
||||
<!-- markdownlint-disable -->
|
||||
<table>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://github.com/f-trycua"><img src="https://avatars.githubusercontent.com/u/195596869?v=4?s=100" width="100px;" alt="f-trycua"/><br /><sub><b>f-trycua</b></sub></a><br /><a href="#code-f-trycua" title="Code">💻</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="http://pepicrft.me"><img src="https://avatars.githubusercontent.com/u/663605?v=4?s=100" width="100px;" alt="Pedro Piñera Buendía"/><br /><sub><b>Pedro Piñera Buendía</b></sub></a><br /><a href="#code-pepicrft" title="Code">💻</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://iamit.in"><img src="https://avatars.githubusercontent.com/u/5647941?v=4?s=100" width="100px;" alt="Amit Kumar"/><br /><sub><b>Amit Kumar</b></sub></a><br /><a href="#code-aktech" title="Code">💻</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://productsway.com/"><img src="https://avatars.githubusercontent.com/u/870029?v=4?s=100" width="100px;" alt="Dung Duc Huynh (Kaka)"/><br /><sub><b>Dung Duc Huynh (Kaka)</b></sub></a><br /><a href="#code-jellydn" title="Code">💻</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="http://zaydkrunz.com"><img src="https://avatars.githubusercontent.com/u/70227235?v=4?s=100" width="100px;" alt="Zayd Krunz"/><br /><sub><b>Zayd Krunz</b></sub></a><br /><a href="#code-ShrootBuck" title="Code">💻</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://github.com/PrashantRaj18198"><img src="https://avatars.githubusercontent.com/u/23168997?v=4?s=100" width="100px;" alt="Prashant Raj"/><br /><sub><b>Prashant Raj</b></sub></a><br /><a href="#code-PrashantRaj18198" title="Code">💻</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://www.mobile.dev"><img src="https://avatars.githubusercontent.com/u/847683?v=4?s=100" width="100px;" alt="Leland Takamine"/><br /><sub><b>Leland Takamine</b></sub></a><br /><a href="#code-Leland-Takamine" title="Code">💻</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://github.com/ddupont808"><img src="https://avatars.githubusercontent.com/u/3820588?v=4?s=100" width="100px;" alt="ddupont"/><br /><sub><b>ddupont</b></sub></a><br /><a href="#code-ddupont808" title="Code">💻</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://github.com/Lizzard1123"><img src="https://avatars.githubusercontent.com/u/46036335?v=4?s=100" width="100px;" alt="Ethan Gutierrez"/><br /><sub><b>Ethan Gutierrez</b></sub></a><br /><a href="#code-Lizzard1123" title="Code">💻</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://ricterz.me"><img src="https://avatars.githubusercontent.com/u/5282759?v=4?s=100" width="100px;" alt="Ricter Zheng"/><br /><sub><b>Ricter Zheng</b></sub></a><br /><a href="#code-RicterZ" title="Code">💻</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://www.trytruffle.ai/"><img src="https://avatars.githubusercontent.com/u/50844303?v=4?s=100" width="100px;" alt="Rahul Karajgikar"/><br /><sub><b>Rahul Karajgikar</b></sub></a><br /><a href="#code-rahulkarajgikar" title="Code">💻</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://github.com/trospix"><img src="https://avatars.githubusercontent.com/u/81363696?v=4?s=100" width="100px;" alt="trospix"/><br /><sub><b>trospix</b></sub></a><br /><a href="#code-trospix" title="Code">💻</a></td>
|
||||
<td align="center" valign="top" width="14.28%"><a href="https://github.com/evnsnclr"><img src="https://avatars.githubusercontent.com/u/139897548?v=4?s=100" width="100px;" alt="Evan smith"/><br /><sub><b>Evan smith</b></sub></a><br /><a href="#code-evnsnclr" title="Code">💻</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
Thank you to all our [GitHub Sponsors](https://github.com/sponsors/trycua)!
|
||||
|
||||
<!-- markdownlint-restore -->
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
<!-- ALL-CONTRIBUTORS-LIST:END -->
|
||||
<img width="300" alt="coderabbit-cli" src="https://github.com/user-attachments/assets/23a98e38-7897-4043-8ef7-eb990520dccc" />
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# App-Use: Control Individual Applications with Cua Agents
|
||||
|
||||
*Published on May 31, 2025 by The Cua Team*
|
||||
_Published on May 31, 2025 by The Cua Team_
|
||||
|
||||
Today, we are excited to introduce a new experimental feature landing in the [Cua GitHub repository](https://github.com/trycua/cua): **App-Use**. App-Use allows you to create lightweight virtual desktops that limit agent access to specific applications, improving precision of your agent's trajectory. Perfect for parallel workflows, and focused task execution.
|
||||
|
||||
@@ -33,9 +33,11 @@ agent = ComputerAgent(
|
||||
## Key Benefits
|
||||
|
||||
### 1. Lightweight and Fast
|
||||
|
||||
App-Use creates visual filters, not new processes. Your apps continue running normally - we just control what the agent can see and click on. The virtual desktops are composited views that require no additional compute resources beyond the existing window manager operations.
|
||||
|
||||
### 2. Run Multiple Agents in Parallel
|
||||
|
||||
Deploy a team of specialized agents, each focused on their own apps:
|
||||
|
||||
```python
|
||||
@@ -46,7 +48,7 @@ computer = Computer(experiments=["app-use"])
|
||||
research_desktop = computer.create_desktop_from_apps(["Safari"])
|
||||
research_agent = ComputerAgent(tools=[research_desktop], ...)
|
||||
|
||||
# Writing agent focuses on documents
|
||||
# Writing agent focuses on documents
|
||||
writing_desktop = computer.create_desktop_from_apps(["Pages", "Notes"])
|
||||
writing_agent = ComputerAgent(tools=[writing_desktop], ...)
|
||||
|
||||
@@ -66,6 +68,7 @@ await asyncio.gather(
|
||||
### Requirements
|
||||
|
||||
To get started with App-Use, you'll need:
|
||||
|
||||
- Python 3.11+
|
||||
- macOS Sequoia (15.0) or later
|
||||
|
||||
@@ -85,21 +88,21 @@ from agent import ComputerAgent
|
||||
async def main():
|
||||
computer = Computer()
|
||||
await computer.run()
|
||||
|
||||
|
||||
# Create app-specific desktop sessions
|
||||
desktop = computer.create_desktop_from_apps(["Notes"])
|
||||
|
||||
|
||||
# Initialize an agent
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[desktop]
|
||||
)
|
||||
|
||||
|
||||
# Take a screenshot (returns bytes by default)
|
||||
screenshot = await desktop.interface.screenshot()
|
||||
with open("app_screenshot.png", "wb") as f:
|
||||
f.write(screenshot)
|
||||
|
||||
|
||||
# Run an agent task
|
||||
async for result in agent.run("Create a new note titled 'Meeting Notes' and add today's agenda items"):
|
||||
print(f"Agent: {result.get('text', '')}")
|
||||
@@ -113,6 +116,7 @@ if __name__ == "__main__":
|
||||
### ⚠️ Important Warning
|
||||
|
||||
Computer-use agents are powerful tools that can interact with your devices. This guide involves using your own macOS and iPhone instead of a VM. **Proceed at your own risk.** Always:
|
||||
|
||||
- Review agent actions before running
|
||||
- Start with non-critical tasks
|
||||
- Monitor agent behavior closely
|
||||
@@ -150,20 +154,20 @@ async def automate_iphone():
|
||||
# Connect to your local computer server
|
||||
my_mac = Computer(use_host_computer_server=True, os_type="macos", experiments=["app-use"])
|
||||
await my_mac.run()
|
||||
|
||||
|
||||
# Create a desktop focused on iPhone Mirroring
|
||||
my_iphone = my_mac.create_desktop_from_apps(["iPhone Mirroring"])
|
||||
|
||||
|
||||
# Initialize an agent for iPhone automation
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[my_iphone]
|
||||
)
|
||||
|
||||
|
||||
# Example: Send a message
|
||||
async for result in agent.run("Open Messages and send 'Hello from Cua!' to John"):
|
||||
print(f"Agent: {result.get('text', '')}")
|
||||
|
||||
|
||||
# Example: Set a reminder
|
||||
async for result in agent.run("Create a reminder to call mom at 5 PM today"):
|
||||
print(f"Agent: {result.get('text', '')}")
|
||||
@@ -175,6 +179,7 @@ if __name__ == "__main__":
|
||||
### iPhone Automation Use Cases
|
||||
|
||||
With Cua's iPhone automation, you can:
|
||||
|
||||
- **Automate messaging**: Send texts, respond to messages, manage conversations
|
||||
- **Control apps**: Navigate any iPhone app using natural language
|
||||
- **Manage settings**: Adjust iPhone settings programmatically
|
||||
@@ -191,6 +196,7 @@ With Cua's iPhone automation, you can:
|
||||
## When to Use What: App-Use vs Multiple Cua Containers
|
||||
|
||||
### Use App-Use within the same macOS Cua Container:
|
||||
|
||||
- ✅ You need lightweight, fast agent focusing (macOS only)
|
||||
- ✅ You want to run multiple agents on one desktop
|
||||
- ✅ You're automating personal devices like iPhones
|
||||
@@ -198,6 +204,7 @@ With Cua's iPhone automation, you can:
|
||||
- ✅ You want low computational overhead
|
||||
|
||||
### Use Multiple Cua Containers:
|
||||
|
||||
- ✅ You need maximum isolation between agents
|
||||
- ✅ You require cross-platform support (Mac/Linux/Windows)
|
||||
- ✅ You need guaranteed resource allocation
|
||||
@@ -215,6 +222,7 @@ With Cua's iPhone automation, you can:
|
||||
### How It Works
|
||||
|
||||
When you create a desktop session with `create_desktop_from_apps()`, App Use:
|
||||
|
||||
- Filters the visual output to show only specified application windows
|
||||
- Routes input events only to those applications
|
||||
- Maintains window layout isolation between different sessions
|
||||
|
||||
BIN
blog/assets/hack-booth.png
Normal file
BIN
blog/assets/hack-booth.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.4 MiB |
BIN
blog/assets/hack-closing-ceremony.jpg
Normal file
BIN
blog/assets/hack-closing-ceremony.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 161 KiB |
BIN
blog/assets/hack-cua-ollama-hud.jpeg
Normal file
BIN
blog/assets/hack-cua-ollama-hud.jpeg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 5.1 MiB |
BIN
blog/assets/hack-leaderboard.png
Normal file
BIN
blog/assets/hack-leaderboard.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 994 KiB |
BIN
blog/assets/hack-winners.jpeg
Normal file
BIN
blog/assets/hack-winners.jpeg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.9 MiB |
BIN
blog/assets/hack-workshop.jpeg
Normal file
BIN
blog/assets/hack-workshop.jpeg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 314 KiB |
@@ -1,10 +1,10 @@
|
||||
# Bringing Computer-Use to the Web
|
||||
|
||||
*Published on August 5, 2025 by Morgan Dean*
|
||||
_Published on August 5, 2025 by Morgan Dean_
|
||||
|
||||
In one of our original posts, we explored building Computer-Use Operators on macOS - first with a [manual implementation](build-your-own-operator-on-macos-1.md) using OpenAI's `computer-use-preview` model, then with our [cua-agent framework](build-your-own-operator-on-macos-2.md) for Python developers. While these tutorials have been incredibly popular, we've received consistent feedback from our community: **"Can we use C/ua with JavaScript and TypeScript?"**
|
||||
In one of our original posts, we explored building Computer-Use Operators on macOS - first with a [manual implementation](build-your-own-operator-on-macos-1.md) using OpenAI's `computer-use-preview` model, then with our [cua-agent framework](build-your-own-operator-on-macos-2.md) for Python developers. While these tutorials have been incredibly popular, we've received consistent feedback from our community: **"Can we use Cua with JavaScript and TypeScript?"**
|
||||
|
||||
Today, we're excited to announce the release of the **`@trycua/computer` Web SDK** - a new library that allows you to control your C/ua cloud containers from any JavaScript or TypeScript project. With this library, you can click, type, and grab screenshots from your cloud containers - no extra servers required.
|
||||
Today, we're excited to announce the release of the **`@trycua/computer` Web SDK** - a new library that allows you to control your Cua cloud containers from any JavaScript or TypeScript project. With this library, you can click, type, and grab screenshots from your cloud containers - no extra servers required.
|
||||
|
||||
With this new SDK, you can easily develop CUA experiences like the one below, which we will release soon as open source.
|
||||
|
||||
@@ -19,7 +19,7 @@ Let’s see how it works.
|
||||
By the end of this tutorial, you'll be able to:
|
||||
|
||||
- Set up the `@trycua/computer` npm library in any JavaScript/TypeScript project
|
||||
- Connect OpenAI's computer-use model to C/ua cloud containers from web applications
|
||||
- Connect OpenAI's computer-use model to Cua cloud containers from web applications
|
||||
- Build computer-use agents that work in Node.js, React, Vue, or any web framework
|
||||
- Handle different types of computer actions (clicking, typing, scrolling) from web code
|
||||
- Implement the complete computer-use loop in JavaScript/TypeScript
|
||||
@@ -30,7 +30,7 @@ By the end of this tutorial, you'll be able to:
|
||||
- Node.js 16+ and npm/yarn/pnpm
|
||||
- Basic JavaScript or TypeScript knowledge
|
||||
- OpenAI API access (Tier 3+ for computer-use-preview)
|
||||
- C/ua cloud container credits ([get started here](https://trycua.com/pricing))
|
||||
- Cua cloud container credits ([get started here](https://trycua.com/pricing))
|
||||
|
||||
**Estimated Time:** 45-60 minutes
|
||||
|
||||
@@ -47,9 +47,9 @@ At the time of writing, the **computer-use-preview** model has limited availabil
|
||||
|
||||
Luckily, the `@trycua/computer` library can be used in conjunction with other models, like [Anthropic’s Computer Use](https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/computer-use-tool) or [UI-TARS](https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B). You’ll just have to write your own handler to parse the model output for interfacing with the container.
|
||||
|
||||
### C/ua Cloud Containers
|
||||
### Cua Cloud Containers
|
||||
|
||||
To follow this guide, you’ll need access to a C/ua cloud container.
|
||||
To follow this guide, you’ll need access to a Cua cloud container.
|
||||
|
||||
Getting access is simple: purchase credits from our [pricing page](https://trycua.com/pricing), then create and provision a new container instance from the [dashboard](https://trycua.com/dashboard/containers). With your container running, you'll be ready to leverage the web SDK and bring automation to your JavaScript or TypeScript applications.
|
||||
|
||||
@@ -96,7 +96,7 @@ const res = await openai.responses.create({
|
||||
],
|
||||
},
|
||||
],
|
||||
truncation: 'auto'
|
||||
truncation: 'auto',
|
||||
});
|
||||
```
|
||||
|
||||
@@ -142,32 +142,32 @@ Each response contains:
|
||||
|
||||
## Implementation Guide
|
||||
|
||||
### Provision a C/ua Cloud Container
|
||||
### Provision a Cua Cloud Container
|
||||
|
||||
1. Visit [trycua.com](https://trycua.com), sign up, purchase [credits](https://trycua.com/pricing), and create a new container instance from the [dashboard](https://trycua.com/dashboard).
|
||||
2. Create an API key from the dashboard — be sure to save it in a secure location before continuing.
|
||||
3. Start the cloud container from the dashboard.
|
||||
1. Visit [trycua.com](https://trycua.com), sign up, purchase [credits](https://trycua.com/pricing), and create a new container instance from the [dashboard](https://trycua.com/dashboard).
|
||||
2. Create an API key from the dashboard — be sure to save it in a secure location before continuing.
|
||||
3. Start the cloud container from the dashboard.
|
||||
|
||||
### Environment Setup
|
||||
|
||||
1. Install required packages with your preferred package manager:
|
||||
1. Install required packages with your preferred package manager:
|
||||
|
||||
```bash
|
||||
npm install --save @trycua/computer # or yarn, pnpm, bun
|
||||
npm install --save openai # or yarn, pnpm, bun
|
||||
```
|
||||
```bash
|
||||
npm install --save @trycua/computer # or yarn, pnpm, bun
|
||||
npm install --save openai # or yarn, pnpm, bun
|
||||
```
|
||||
|
||||
Works with any JavaScript/TypeScript project setup - whether you're using Create React App, Next.js, Vue, Angular, or plain JavaScript.
|
||||
Works with any JavaScript/TypeScript project setup - whether you're using Create React App, Next.js, Vue, Angular, or plain JavaScript.
|
||||
|
||||
2. Save your OpenAI API key, C/ua API key, and container name to a `.env` file:
|
||||
2. Save your OpenAI API key, Cua API key, and container name to a `.env` file:
|
||||
|
||||
```bash
|
||||
OPENAI_API_KEY=openai-api-key
|
||||
CUA_API_KEY=cua-api-key
|
||||
CUA_CONTAINER_NAME=cua-cloud-container-name
|
||||
```
|
||||
```bash
|
||||
OPENAI_API_KEY=openai-api-key
|
||||
CUA_API_KEY=cua-api-key
|
||||
CUA_CONTAINER_NAME=cua-cloud-container-name
|
||||
```
|
||||
|
||||
These environment variables work the same whether you're using vanilla JavaScript, TypeScript, or any web framework.
|
||||
These environment variables work the same whether you're using vanilla JavaScript, TypeScript, or any web framework.
|
||||
|
||||
## Building the Agent
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Build Your Own Operator on macOS - Part 1
|
||||
|
||||
*Published on March 31, 2025 by Francesco Bonacci*
|
||||
_Published on March 31, 2025 by Francesco Bonacci_
|
||||
|
||||
In this first blogpost, we'll learn how to build our own Computer-Use Operator using OpenAI's `computer-use-preview` model. But first, let's understand what some common terms mean:
|
||||
|
||||
@@ -19,6 +19,7 @@ Check out what it looks like to use your own Operator from a Gradio app:
|
||||
## What You'll Learn
|
||||
|
||||
By the end of this tutorial, you'll be able to:
|
||||
|
||||
- Set up a macOS virtual machine for AI automation
|
||||
- Connect OpenAI's computer-use model to your VM
|
||||
- Create a basic loop for the AI to interact with your VM
|
||||
@@ -26,6 +27,7 @@ By the end of this tutorial, you'll be able to:
|
||||
- Implement safety checks and error handling
|
||||
|
||||
**Prerequisites:**
|
||||
|
||||
- macOS Sonoma (14.0) or later
|
||||
- 8GB RAM minimum (16GB recommended)
|
||||
- OpenAI API access (Tier 3+)
|
||||
@@ -41,15 +43,17 @@ Last March OpenAI released a fine-tuned version of GPT-4o, namely [CUA](https://
|
||||
Professor Ethan Mollick provides an excellent explanation of computer-use agents in this article: [When you give a Claude a mouse](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse).
|
||||
|
||||
### ChatGPT Operator
|
||||
|
||||
OpenAI's computer-use model powers [ChatGPT Operator](https://openai.com/index/introducing-operator), a Chromium-based interface exclusively available to ChatGPT Pro subscribers. Users leverage this functionality to automate web-based tasks such as online shopping, expense report submission, and booking reservations by interacting with websites in a human-like manner.
|
||||
|
||||
## Benefits of Custom Operators
|
||||
|
||||
### Why Build Your Own?
|
||||
|
||||
While OpenAI's Operator uses a controlled Chromium VM instance, there are scenarios where you may want to use your own VM with full desktop capabilities. Here are some examples:
|
||||
|
||||
- Automating native macOS apps like Finder, Xcode
|
||||
- Managing files, changing settings, and running terminal commands
|
||||
- Managing files, changing settings, and running terminal commands
|
||||
- Testing desktop software and applications
|
||||
- Creating workflows that combine web and desktop tasks
|
||||
- Automating media editing in apps like Final Cut Pro and Blender
|
||||
@@ -59,7 +63,9 @@ This gives you more control and flexibility to automate tasks beyond just web br
|
||||
## Access Requirements
|
||||
|
||||
### Model Availability
|
||||
|
||||
As we speak, the **computer-use-preview** model has limited availability:
|
||||
|
||||
- Only accessible to OpenAI tier 3+ users
|
||||
- Additional application process may be required even for eligible users
|
||||
- Cannot be used in the OpenAI Playground
|
||||
@@ -68,15 +74,18 @@ As we speak, the **computer-use-preview** model has limited availability:
|
||||
## Understanding the OpenAI API
|
||||
|
||||
### Responses API Overview
|
||||
|
||||
Let's start with the basics. In our case, we'll use OpenAI's Responses API to communicate with their computer-use model.
|
||||
|
||||
Think of it like this:
|
||||
|
||||
1. We send the model a screenshot of our VM and tell it what we want it to do
|
||||
2. The model looks at the screenshot and decides what actions to take
|
||||
3. It sends back instructions (like "click here" or "type this")
|
||||
4. We execute those instructions in our VM
|
||||
|
||||
The [Responses API](https://platform.openai.com/docs/guides/responses) is OpenAI's newest way to interact with their AI models. It comes with several built-in tools:
|
||||
|
||||
- **Web search**: Let the AI search the internet
|
||||
- **File search**: Help the AI find documents
|
||||
- **Computer use**: Allow the AI to control a computer (what we'll be using)
|
||||
@@ -84,9 +93,11 @@ The [Responses API](https://platform.openai.com/docs/guides/responses) is OpenAI
|
||||
As we speak, the computer-use model is only available through the Responses API.
|
||||
|
||||
### Responses API Examples
|
||||
|
||||
Let's look at some simple examples. We'll start with the traditional way of using OpenAI's API with Chat Completions, then show the new Responses API primitive.
|
||||
|
||||
Chat Completions:
|
||||
|
||||
```python
|
||||
# The old way required managing conversation history manually
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
@@ -98,13 +109,14 @@ messages.append(response.choices[0].message) # Manual message tracking
|
||||
```
|
||||
|
||||
Responses API:
|
||||
|
||||
```python
|
||||
# Example 1: Simple web search
|
||||
# The API handles all the complexity for us
|
||||
response = client.responses.create(
|
||||
model="gpt-4",
|
||||
input=[{
|
||||
"role": "user",
|
||||
"role": "user",
|
||||
"content": "What's the latest news about AI?"
|
||||
}],
|
||||
tools=[{
|
||||
@@ -118,7 +130,7 @@ response = client.responses.create(
|
||||
response = client.responses.create(
|
||||
model="gpt-4",
|
||||
input=[{
|
||||
"role": "user",
|
||||
"role": "user",
|
||||
"content": "Find documents about project X"
|
||||
}],
|
||||
tools=[{
|
||||
@@ -130,6 +142,7 @@ response = client.responses.create(
|
||||
```
|
||||
|
||||
### Computer-Use Model Setup
|
||||
|
||||
For our operator, we'll use the computer-use model. Here's how we set it up:
|
||||
|
||||
```python
|
||||
@@ -144,7 +157,7 @@ response = client.responses.create(
|
||||
}],
|
||||
input=[
|
||||
{
|
||||
"role": "user",
|
||||
"role": "user",
|
||||
"content": [
|
||||
# What we want the AI to do
|
||||
{"type": "input_text", "text": "Open Safari and go to google.com"},
|
||||
@@ -158,6 +171,7 @@ response = client.responses.create(
|
||||
```
|
||||
|
||||
### Understanding the Response
|
||||
|
||||
When we send a request, the API sends back a response that looks like this:
|
||||
|
||||
```json
|
||||
@@ -189,6 +203,7 @@ When we send a request, the API sends back a response that looks like this:
|
||||
```
|
||||
|
||||
Each response contains:
|
||||
|
||||
1. **Reasoning**: The AI's explanation of what it's doing
|
||||
2. **Action**: The specific computer action to perform
|
||||
3. **Safety Checks**: Any potential risks to review
|
||||
@@ -197,6 +212,7 @@ Each response contains:
|
||||
## CUA-Computer Interface
|
||||
|
||||
### Architecture Overview
|
||||
|
||||
Let's break down the main components of our system and how they work together:
|
||||
|
||||
1. **The Virtual Machine (VM)**
|
||||
@@ -238,7 +254,7 @@ sequenceDiagram
|
||||
VM-->>CUI: Return current screen
|
||||
CUI->>AI: Send screenshot + instructions
|
||||
AI-->>CUI: Return next action
|
||||
|
||||
|
||||
Note over CUI,VM: Execute the action
|
||||
alt Mouse Click
|
||||
CUI->>VM: Move and click mouse
|
||||
@@ -259,6 +275,7 @@ sequenceDiagram
|
||||
```
|
||||
|
||||
The diagram above shows how information flows through our system:
|
||||
|
||||
1. You start the operator
|
||||
2. The Computer Interface creates a virtual macOS
|
||||
3. Then it enters a loop:
|
||||
@@ -290,17 +307,19 @@ This design keeps everything organized and safe. The AI can only interact with t
|
||||
- Cached images are stored in `~/.lume/cache`
|
||||
|
||||
You can check your downloaded VM images anytime:
|
||||
|
||||
```bash
|
||||
lume ls
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
| name | os | cpu | memory | disk | display | status | ip | vnc |
|
||||
|--------------------------|---------|-------|---------|----------------|-----------|-----------|----------------|---------------------------------------------------|
|
||||
| macos-sequoia-cua:latest | macOS | 12 | 16.00G | 64.5GB/80.0GB | 1024x768 | running | 192.168.64.78 | vnc://:kind-forest-zulu-island@127.0.0.1:56085 |
|
||||
| name | os | cpu | memory | disk | display | status | ip | vnc |
|
||||
| ------------------------ | ----- | --- | ------ | ------------- | -------- | ------- | ------------- | ---------------------------------------------- |
|
||||
| macos-sequoia-cua:latest | macOS | 12 | 16.00G | 64.5GB/80.0GB | 1024x768 | running | 192.168.64.78 | vnc://:kind-forest-zulu-island@127.0.0.1:56085 |
|
||||
|
||||
After checking your available images, you can run the VM to ensure everything is working correctly:
|
||||
|
||||
```bash
|
||||
lume run macos-sequoia-cua:latest
|
||||
```
|
||||
@@ -309,12 +328,14 @@ This design keeps everything organized and safe. The AI can only interact with t
|
||||
**Note**: The `cua-computer` package requires Python 3.10 or later. We recommend creating a dedicated Python environment:
|
||||
|
||||
**Using venv:**
|
||||
|
||||
```bash
|
||||
python -m venv cua-env
|
||||
source cua-env/bin/activate
|
||||
```
|
||||
|
||||
**Using conda:**
|
||||
|
||||
```bash
|
||||
conda create -n cua-env python=3.10
|
||||
conda activate cua-env
|
||||
@@ -332,6 +353,7 @@ This design keeps everything organized and safe. The AI can only interact with t
|
||||
### Building the Operator
|
||||
|
||||
#### Importing Required Modules
|
||||
|
||||
With the prerequisites installed and configured, we're ready to build our first operator.
|
||||
The following example uses asynchronous Python (async/await). You can run it either in a VS Code Notebook or as a standalone Python script.
|
||||
|
||||
@@ -344,12 +366,13 @@ from computer import Computer
|
||||
```
|
||||
|
||||
#### Mapping API Actions to CUA Methods
|
||||
|
||||
The following helper function converts a `computer_call` action from the OpenAI Responses API into corresponding commands on the CUI interface. For example, if the API instructs a `click` action, we move the cursor and perform a left click on the lume VM Sandbox. We will use the computer interface to execute the actions.
|
||||
|
||||
```python
|
||||
async def execute_action(computer, action):
|
||||
action_type = action.type
|
||||
|
||||
|
||||
if action_type == "click":
|
||||
x = action.x
|
||||
y = action.y
|
||||
@@ -360,12 +383,12 @@ async def execute_action(computer, action):
|
||||
await computer.interface.right_click()
|
||||
else:
|
||||
await computer.interface.left_click()
|
||||
|
||||
|
||||
elif action_type == "type":
|
||||
text = action.text
|
||||
print(f"Typing text: {text}")
|
||||
await computer.interface.type_text(text)
|
||||
|
||||
|
||||
elif action_type == "scroll":
|
||||
x = action.x
|
||||
y = action.y
|
||||
@@ -374,7 +397,7 @@ async def execute_action(computer, action):
|
||||
print(f"Scrolling at ({x}, {y}) with offsets (scroll_x={scroll_x}, scroll_y={scroll_y})")
|
||||
await computer.interface.move_cursor(x, y)
|
||||
await computer.interface.scroll(scroll_y) # Using vertical scroll only
|
||||
|
||||
|
||||
elif action_type == "keypress":
|
||||
keys = action.keys
|
||||
for key in keys:
|
||||
@@ -386,23 +409,24 @@ async def execute_action(computer, action):
|
||||
await computer.interface.press_key("space")
|
||||
else:
|
||||
await computer.interface.press_key(key)
|
||||
|
||||
|
||||
elif action_type == "wait":
|
||||
wait_time = action.time
|
||||
print(f"Waiting for {wait_time} seconds")
|
||||
await asyncio.sleep(wait_time)
|
||||
|
||||
|
||||
elif action_type == "screenshot":
|
||||
print("Taking screenshot")
|
||||
# This is handled automatically in the main loop, but we can take an extra one if requested
|
||||
screenshot = await computer.interface.screenshot()
|
||||
return screenshot
|
||||
|
||||
|
||||
else:
|
||||
print(f"Unrecognized action: {action_type}")
|
||||
```
|
||||
|
||||
#### Implementing the Computer-Use Loop
|
||||
|
||||
This section defines a loop that:
|
||||
|
||||
1. Initializes the cua-computer instance (connecting to a macOS sandbox).
|
||||
@@ -423,7 +447,7 @@ async def cua_openai_loop():
|
||||
os_type="macos"
|
||||
) as computer:
|
||||
await computer.run() # Start the lume VM
|
||||
|
||||
|
||||
# Capture the initial screenshot
|
||||
screenshot = await computer.interface.screenshot()
|
||||
screenshot_base64 = base64.b64encode(screenshot).decode('utf-8')
|
||||
@@ -438,8 +462,8 @@ async def cua_openai_loop():
|
||||
"environment": "mac"
|
||||
}],
|
||||
input=[
|
||||
{
|
||||
"role": "user",
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "input_text", "text": "Open Safari, download and install Cursor."},
|
||||
{"type": "input_image", "image_url": f"data:image/png;base64,{screenshot_base64}"}
|
||||
@@ -488,7 +512,7 @@ async def cua_openai_loop():
|
||||
"display_height": 768,
|
||||
"environment": "mac"
|
||||
}],
|
||||
input=[{
|
||||
input=[{
|
||||
"type": "computer_call_output",
|
||||
"call_id": last_call_id,
|
||||
"acknowledged_safety_checks": acknowledged_checks,
|
||||
@@ -511,12 +535,15 @@ if __name__ == "__main__":
|
||||
You can find the full code in our [notebook](https://github.com/trycua/cua/blob/main/notebooks/blog/build-your-own-operator-on-macos-1.ipynb).
|
||||
|
||||
#### Request Handling Differences
|
||||
|
||||
The first request to the OpenAI Responses API is special in that it includes the initial screenshot and prompt. Subsequent requests are handled differently, using the `computer_call_output` type to provide feedback on the executed action.
|
||||
|
||||
##### Initial Request Format
|
||||
|
||||
- We use `role: "user"` with `content` that contains both `input_text` (the prompt) and `input_image` (the screenshot)
|
||||
|
||||
##### Subsequent Request Format
|
||||
|
||||
- We use `type: "computer_call_output"` instead of the user role
|
||||
- We include the `call_id` to link the output to the specific previous action that was executed
|
||||
- We provide any `acknowledged_safety_checks` that were approved
|
||||
@@ -529,6 +556,7 @@ This structured approach allows the API to maintain context and continuity throu
|
||||
## Conclusion
|
||||
|
||||
### Summary
|
||||
|
||||
This blogpost demonstrates a single iteration of a OpenAI Computer-Use loop where:
|
||||
|
||||
- A macOS sandbox is controlled using the CUA interface.
|
||||
@@ -538,9 +566,11 @@ This blogpost demonstrates a single iteration of a OpenAI Computer-Use loop wher
|
||||
In a production setting, you would wrap the action-response cycle in a loop, handling multiple actions and safety checks as needed.
|
||||
|
||||
### Next Steps
|
||||
|
||||
In the next blogpost, we'll introduce our Agent framework which abstracts away all these tedious implementation steps. This framework provides a higher-level API that handles the interaction loop between OpenAI's computer-use model and the macOS sandbox, allowing you to focus on building sophisticated applications rather than managing the low-level details we've explored here. Can't wait? Check out the [cua-agent](https://github.com/trycua/cua/tree/main/libs/agent) package!
|
||||
|
||||
### Resources
|
||||
|
||||
- [OpenAI Computer-Use docs](https://platform.openai.com/docs/guides/tools-computer-use)
|
||||
- [cua-computer](https://github.com/trycua/cua/tree/main/libs/computer)
|
||||
- [lume](https://github.com/trycua/cua/tree/main/libs/lume)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Build Your Own Operator on macOS - Part 2
|
||||
|
||||
*Published on April 27, 2025 by Francesco Bonacci*
|
||||
_Published on April 27, 2025 by Francesco Bonacci_
|
||||
|
||||
In our [previous post](build-your-own-operator-on-macos-1.md), we built a basic Computer-Use Operator from scratch using OpenAI's `computer-use-preview` model and our [cua-computer](https://pypi.org/project/cua-computer) package. While educational, implementing the control loop manually can be tedious and error-prone.
|
||||
|
||||
@@ -13,12 +13,14 @@ In this follow-up, we'll explore our [cua-agent](https://pypi.org/project/cua-ag
|
||||
## What You'll Learn
|
||||
|
||||
By the end of this tutorial, you'll be able to:
|
||||
|
||||
- Set up the `cua-agent` framework with various agent loop types and model providers
|
||||
- Understand the different agent loop types and their capabilities
|
||||
- Work with local models for cost-effective workflows
|
||||
- Use a simple UI for your operator
|
||||
|
||||
**Prerequisites:**
|
||||
|
||||
- Completed setup from Part 1 ([lume CLI installed](https://github.com/trycua/cua?tab=readme-ov-file#option-2-full-computer-use-agent-capabilities), macOS CUA image already pulled)
|
||||
- Python 3.10+. We recommend using Conda (or Anaconda) to create an ad hoc Python environment.
|
||||
- API keys for OpenAI and/or Anthropic (optional for local models)
|
||||
@@ -58,6 +60,7 @@ pip install "cua-agent[ui]" # Gradio UI
|
||||
Before running any code examples, let's set up a proper environment:
|
||||
|
||||
1. **Create a new directory** for your project:
|
||||
|
||||
```bash
|
||||
mkdir cua-agent-tutorial
|
||||
cd cua-agent-tutorial
|
||||
@@ -66,12 +69,13 @@ Before running any code examples, let's set up a proper environment:
|
||||
2. **Set up a Python environment** using one of these methods:
|
||||
|
||||
**Option A: Using conda command line**
|
||||
|
||||
```bash
|
||||
# Using conda
|
||||
conda create -n cua-agent python=3.10
|
||||
conda activate cua-agent
|
||||
```
|
||||
|
||||
|
||||
**Option B: Using Anaconda Navigator UI**
|
||||
- Open Anaconda Navigator
|
||||
- Click on "Environments" in the left sidebar
|
||||
@@ -80,35 +84,39 @@ Before running any code examples, let's set up a proper environment:
|
||||
- Select Python 3.10
|
||||
- Click "Create"
|
||||
- Once created, select the environment and click "Open Terminal" to activate it
|
||||
|
||||
|
||||
**Option C: Using venv**
|
||||
|
||||
```bash
|
||||
python -m venv cua-env
|
||||
source cua-env/bin/activate # On macOS/Linux
|
||||
```
|
||||
|
||||
3. **Install the cua-agent package**:
|
||||
|
||||
```bash
|
||||
pip install "cua-agent[all]"
|
||||
```
|
||||
|
||||
4. **Set up your API keys as environment variables**:
|
||||
|
||||
```bash
|
||||
# For OpenAI models
|
||||
export OPENAI_API_KEY=your_openai_key_here
|
||||
|
||||
|
||||
# For Anthropic models (if needed)
|
||||
export ANTHROPIC_API_KEY=your_anthropic_key_here
|
||||
```
|
||||
|
||||
5. **Create a Python file or notebook**:
|
||||
|
||||
|
||||
**Option A: Create a Python script**
|
||||
|
||||
```bash
|
||||
# For a Python script
|
||||
touch cua_agent_example.py
|
||||
```
|
||||
|
||||
|
||||
**Option B: Use VS Code notebooks**
|
||||
- Open VS Code
|
||||
- Install the Python extension if you haven't already
|
||||
@@ -120,9 +128,10 @@ Now you're ready to run the code examples!
|
||||
|
||||
## Understanding Agent Loops
|
||||
|
||||
If you recall from Part 1, we had to implement a custom interaction loop to interact with the compute-use-preview model.
|
||||
If you recall from Part 1, we had to implement a custom interaction loop to interact with the compute-use-preview model.
|
||||
|
||||
In the `cua-agent` framework, an **Agent Loop** is the core abstraction that implements the continuous interaction cycle between an AI model and the computer environment. It manages the flow of:
|
||||
|
||||
1. Capturing screenshots of the computer's state
|
||||
2. Processing these screenshots (with or without UI element detection)
|
||||
3. Sending this visual context to an AI model along with the task instructions
|
||||
@@ -141,6 +150,7 @@ While the core concept remains the same across all agent loops, different AI mod
|
||||
| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama or OpenAI-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
|
||||
|
||||
Each loop handles the same basic pattern we implemented manually in Part 1:
|
||||
|
||||
1. Take a screenshot of the VM
|
||||
2. Send the screenshot and task to the AI model
|
||||
3. Receive an action to perform
|
||||
@@ -169,13 +179,13 @@ Choosing the right agent loop depends not only on your API access and technical
|
||||
|
||||
The performance of different Computer-Use models varies significantly across tasks. These benchmark evaluations measure an agent's ability to follow instructions and complete real-world tasks in different computing environments.
|
||||
|
||||
| Benchmark type | Benchmark | UI-TARS-1.5 | OpenAI CUA | Claude 3.7 | Previous SOTA | Human |
|
||||
|----------------|--------------------------------------------------------------------------------------------------------------------------------------------------|-------------|-------------|-------------|----------------------|-------------|
|
||||
| **Computer Use** | [OSworld](https://arxiv.org/abs/2404.07972) (100 steps) | **42.5** | 36.4 | 28 | 38.1 (200 step) | 72.4 |
|
||||
| | [Windows Agent Arena](https://arxiv.org/abs/2409.08264) (50 steps) | **42.1** | - | - | 29.8 | - |
|
||||
| **Browser Use** | [WebVoyager](https://arxiv.org/abs/2401.13919) | 84.8 | **87** | 84.1 | 87 | - |
|
||||
| | [Online-Mind2web](https://arxiv.org/abs/2504.01382) | **75.8** | 71 | 62.9 | 71 | - |
|
||||
| **Phone Use** | [Android World](https://arxiv.org/abs/2405.14573) | **64.2** | - | - | 59.5 | - |
|
||||
| Benchmark type | Benchmark | UI-TARS-1.5 | OpenAI CUA | Claude 3.7 | Previous SOTA | Human |
|
||||
| ---------------- | ------------------------------------------------------------------ | ----------- | ---------- | ---------- | --------------- | ----- |
|
||||
| **Computer Use** | [OSworld](https://arxiv.org/abs/2404.07972) (100 steps) | **42.5** | 36.4 | 28 | 38.1 (200 step) | 72.4 |
|
||||
| | [Windows Agent Arena](https://arxiv.org/abs/2409.08264) (50 steps) | **42.1** | - | - | 29.8 | - |
|
||||
| **Browser Use** | [WebVoyager](https://arxiv.org/abs/2401.13919) | 84.8 | **87** | 84.1 | 87 | - |
|
||||
| | [Online-Mind2web](https://arxiv.org/abs/2504.01382) | **75.8** | 71 | 62.9 | 71 | - |
|
||||
| **Phone Use** | [Android World](https://arxiv.org/abs/2405.14573) | **64.2** | - | - | 59.5 | - |
|
||||
|
||||
### When to Use Each Loop
|
||||
|
||||
@@ -210,10 +220,10 @@ async def run_simple_task():
|
||||
model="openai/computer-use-preview",
|
||||
tools=[macos_computer]
|
||||
)
|
||||
|
||||
|
||||
# Define a simple task
|
||||
task = "Open Safari and search for 'Python tutorials'"
|
||||
|
||||
|
||||
# Run the task and process responses
|
||||
async for result in agent.run(task):
|
||||
print(f"Action: {result.get('text')}")
|
||||
@@ -225,6 +235,7 @@ if __name__ == "__main__":
|
||||
|
||||
3. Save the file
|
||||
4. Open a terminal, navigate to your project directory, and run:
|
||||
|
||||
```bash
|
||||
python simple_task.py
|
||||
```
|
||||
@@ -232,6 +243,7 @@ if __name__ == "__main__":
|
||||
5. The code will initialize the macOS virtual machine, create an agent, and execute the task of opening Safari and searching for Python tutorials.
|
||||
|
||||
You can also run this in a VS Code notebook:
|
||||
|
||||
1. Create a new notebook in VS Code (.ipynb file)
|
||||
2. Copy the code into a cell (without the `if __name__ == "__main__":` part)
|
||||
3. Run the cell to execute the code
|
||||
@@ -259,7 +271,7 @@ async def run_multi_task_workflow():
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[macos_computer]
|
||||
)
|
||||
|
||||
|
||||
tasks = [
|
||||
"Open Safari and go to github.com",
|
||||
"Search for 'trycua/cua'",
|
||||
@@ -267,7 +279,7 @@ async def run_multi_task_workflow():
|
||||
"Click on the 'Issues' tab",
|
||||
"Read the first open issue"
|
||||
]
|
||||
|
||||
|
||||
for i, task in enumerate(tasks):
|
||||
print(f"\nTask {i+1}/{len(tasks)}: {task}")
|
||||
async for result in agent.run(task):
|
||||
@@ -301,13 +313,13 @@ async for result in agent.run(task):
|
||||
# Basic information
|
||||
print(f"Response ID: {result.get('id')}")
|
||||
print(f"Response Text: {result.get('text')}")
|
||||
|
||||
|
||||
# Detailed token usage statistics
|
||||
usage = result.get('usage')
|
||||
if usage:
|
||||
print(f"Input Tokens: {usage.get('input_tokens')}")
|
||||
print(f"Output Tokens: {usage.get('output_tokens')}")
|
||||
|
||||
|
||||
# Reasoning and actions
|
||||
for output in result.get('output', []):
|
||||
if output.get('type') == 'reasoning':
|
||||
@@ -318,6 +330,7 @@ async for result in agent.run(task):
|
||||
```
|
||||
|
||||
This structured format allows you to:
|
||||
|
||||
- Log detailed information about agent actions
|
||||
- Provide real-time feedback to users
|
||||
- Track token usage for cost monitoring
|
||||
@@ -350,9 +363,9 @@ async def run_with_local_model():
|
||||
model="omniparser+ollama_chat/gemma3",
|
||||
tools=[macos_computer]
|
||||
)
|
||||
|
||||
|
||||
task = "Open the Calculator app and perform a simple calculation"
|
||||
|
||||
|
||||
async for result in agent.run(task):
|
||||
print(f"Action: {result.get('text')}")
|
||||
|
||||
@@ -379,12 +392,14 @@ agent = ComputerAgent(
|
||||
```
|
||||
|
||||
Common local endpoints include:
|
||||
|
||||
- LM Studio: `http://localhost:1234/v1`
|
||||
- vLLM: `http://localhost:8000/v1`
|
||||
- LocalAI: `http://localhost:8080/v1`
|
||||
- Ollama with OpenAI compat: `http://localhost:11434/v1`
|
||||
|
||||
This approach is perfect for:
|
||||
|
||||
- Development and testing without incurring API costs
|
||||
- Offline or air-gapped environments where API access isn't possible
|
||||
- Privacy-sensitive applications where data can't leave your network
|
||||
@@ -406,8 +421,8 @@ UI-TARS is ByteDance's Computer-Use model designed for navigating OS-level inter
|
||||
```python
|
||||
agent = ComputerAgent(
|
||||
model=LLM(
|
||||
provider=LLMProvider.OAICOMPAT,
|
||||
name="tgi",
|
||||
provider=LLMProvider.OAICOMPAT,
|
||||
name="tgi",
|
||||
provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1"
|
||||
),
|
||||
tools=[macos_computer]
|
||||
@@ -475,11 +490,13 @@ if __name__ == "__main__":
|
||||
```
|
||||
|
||||
2. Install the UI dependencies if you haven't already:
|
||||
|
||||
```bash
|
||||
pip install "cua-agent[ui]"
|
||||
```
|
||||
|
||||
3. Run the script:
|
||||
|
||||
```bash
|
||||
python launch_ui.py
|
||||
```
|
||||
@@ -498,12 +515,14 @@ if __name__ == "__main__":
|
||||
```
|
||||
|
||||
When you run this, Gradio will display both a local URL and a public URL like:
|
||||
|
||||
```
|
||||
Running on local URL: http://127.0.0.1:7860
|
||||
Running on public URL: https://abcd1234.gradio.live
|
||||
```
|
||||
|
||||
**Security Note:** Be cautious when sharing your Gradio UI publicly:
|
||||
|
||||
- The public URL gives anyone with the link full access to your agent
|
||||
- Consider using basic authentication for additional protection:
|
||||
```python
|
||||
@@ -513,6 +532,7 @@ Running on public URL: https://abcd1234.gradio.live
|
||||
- The temporary link expires when you stop the Gradio application
|
||||
|
||||
This provides:
|
||||
|
||||
- Model provider selection
|
||||
- Agent loop selection
|
||||
- Task input field
|
||||
@@ -566,7 +586,7 @@ async def github_workflow():
|
||||
verbosity=logging.INFO,
|
||||
tools=[macos_computer]
|
||||
)
|
||||
|
||||
|
||||
tasks = [
|
||||
"Look for a repository named trycua/cua on GitHub.",
|
||||
"Check the open issues, open the most recent one and read it.",
|
||||
@@ -575,7 +595,7 @@ async def github_workflow():
|
||||
"From Cursor, open Composer if not already open.",
|
||||
"Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
|
||||
]
|
||||
|
||||
|
||||
for i, task in enumerate(tasks):
|
||||
print(f"\nExecuting task {i+1}/{len(tasks)}: {task}")
|
||||
async for result in agent.run(task):
|
||||
@@ -587,11 +607,13 @@ if __name__ == "__main__":
|
||||
```
|
||||
|
||||
2. Make sure your OpenAI API key is set:
|
||||
|
||||
```bash
|
||||
export OPENAI_API_KEY=your_openai_key_here
|
||||
```
|
||||
|
||||
3. Run the script:
|
||||
|
||||
```bash
|
||||
python github_workflow.py
|
||||
```
|
||||
@@ -604,6 +626,7 @@ if __name__ == "__main__":
|
||||
- Use Cursor's AI features to work on a solution
|
||||
|
||||
This example:
|
||||
|
||||
1. Searches GitHub for a repository
|
||||
2. Reads an issue
|
||||
3. Clones the repository
|
||||
@@ -615,6 +638,7 @@ This example:
|
||||
Let's compare our manual implementation from Part 1 with the framework approach:
|
||||
|
||||
### Manual Implementation (Part 1)
|
||||
|
||||
- Required writing custom code for the interaction loop
|
||||
- Needed explicit handling of different action types
|
||||
- Required direct management of the OpenAI API calls
|
||||
@@ -622,6 +646,7 @@ Let's compare our manual implementation from Part 1 with the framework approach:
|
||||
- Limited to OpenAI's computer-use model
|
||||
|
||||
### Framework Implementation (Part 2)
|
||||
|
||||
- Abstracts the interaction loop
|
||||
- Handles all action types automatically
|
||||
- Manages API calls internally
|
||||
@@ -634,17 +659,21 @@ Let's compare our manual implementation from Part 1 with the framework approach:
|
||||
The `cua-agent` framework transforms what was a complex implementation task into a simple, high-level interface for building Computer-Use Agents. By abstracting away the technical details, it lets you focus on defining the tasks rather than the machinery.
|
||||
|
||||
### When to Use Each Approach
|
||||
|
||||
- **Manual Implementation (Part 1)**: When you need complete control over the interaction loop or are implementing a custom solution
|
||||
- **Framework (Part 2)**: For most applications where you want to quickly build and deploy Computer-Use Agents
|
||||
|
||||
### Next Steps
|
||||
|
||||
With the basics covered, you might want to explore:
|
||||
|
||||
- Customizing the agent's behavior with additional parameters
|
||||
- Building more complex workflows spanning multiple applications
|
||||
- Integrating your agent into other applications
|
||||
- Contributing to the open-source project on GitHub
|
||||
|
||||
### Resources
|
||||
|
||||
- [cua-agent GitHub repository](https://github.com/trycua/cua/tree/main/libs/agent)
|
||||
- [Agent Notebook Examples](https://github.com/trycua/cua/blob/main/notebooks/agent_nb.ipynb)
|
||||
- [OpenAI Agent SDK Specification](https://platform.openai.com/docs/api-reference/responses)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Announcing Cua Agent framework 0.4 and Composite Agents
|
||||
|
||||
*Published on August 26, 2025 by Dillon DuPont*
|
||||
_Published on August 26, 2025 by Dillon DuPont_
|
||||
|
||||
<img src="./assets/composite-agents.png" alt="Composite Agents">
|
||||
|
||||
@@ -12,7 +12,7 @@ This is the kind of problem that makes you wonder if we're building the future o
|
||||
|
||||
## What we fixed
|
||||
|
||||
Agent framework 0.4 solves this by doing something radical: making all these different models speak the same language.
|
||||
Agent framework 0.4 solves this by doing something radical: making all these different models speak the same language.
|
||||
|
||||
Instead of writing separate code for each model's peculiarities, you now just pick a model with a string like `"anthropic/claude-3-5-sonnet-20241022"` or `"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"`, and everything else Just Works™. Behind the scenes, we handle all the coordinate normalization, token parsing, and image preprocessing so you don't have to.
|
||||
|
||||
@@ -42,7 +42,7 @@ agent = ComputerAgent(
|
||||
|
||||
This creates a composite agent where one model (the "grounding" model) handles the visual understanding and precise UI interactions, while the other (the "planning" model) handles the high-level reasoning and task orchestration. It's like having a pilot and a navigator, except they're both AI models and they're trying to help you star a GitHub repository.
|
||||
|
||||
You can even take a model that was never designed for computer use—like GPT-4o—and give it GUI capabilities by pairing it with a specialized vision model:
|
||||
You can even take a model that was never designed for computer use—like GPT-4o—and give it GUI capabilities by pairing it with a specialized vision model:
|
||||
|
||||
```python
|
||||
agent = ComputerAgent(
|
||||
@@ -63,12 +63,11 @@ We're building integration with HUD evals, allowing us to curate and benchmark m
|
||||
|
||||
If you try out version 0.4.x, we'd love to hear how it goes. Join us on Discord to share your results and let us know what model combinations work best for your projects.
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Links
|
||||
|
||||
* **Composite Agent Docs:** [https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents)
|
||||
* **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai)
|
||||
- **Composite Agent Docs:** [https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents)
|
||||
- **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai)
|
||||
|
||||
Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build.
|
||||
Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Computer-Use Agents SOTA Challenge: Hack the North + Global Online
|
||||
|
||||
*Published on August 25, 2025 by Francesco Bonacci*
|
||||
_Published on August 25, 2025 by Francesco Bonacci_
|
||||
|
||||
We’re bringing something new to [Hack the North](https://hackthenorth.com), Canada’s largest hackathon, this year: a head-to-head competition for **Computer-Use Agents** - on-site at Waterloo and a **Global online challenge**. From September 12–14, 2025, teams build on the **Cua Agent Framework** and are scored in **HUD’s OSWorld-Verified** environment to push past today’s SOTA on [OS-World](https://os-world.github.io).
|
||||
|
||||
@@ -14,7 +14,8 @@ There’s one global leaderboard: **Cua - Best State-of-the-Art Computer-Use Age
|
||||
|
||||
**Cua** and [**Ollama**](https://ollama.com) organize a global hackathon to find the **most creative uses of local and hybrid computer-use agents**. There are no geographic restrictions on who can join — this is a worldwide competition focused on **originality, impact, and inventive applications** that showcase what's possible with local and hybrid inference.
|
||||
|
||||
**Prizes:**
|
||||
**Prizes:**
|
||||
|
||||
- 1st **MacBook Air M4 (or equivalent value)** + features in Cua & Ollama channels
|
||||
- 2nd **$500 CAD + swag**
|
||||
- 3rd **swag + public feature**
|
||||
@@ -26,36 +27,42 @@ There’s one global leaderboard: **Cua - Best State-of-the-Art Computer-Use Age
|
||||
Two different tracks, two different processes:
|
||||
|
||||
### On-site (Track A)
|
||||
|
||||
Build during the weekend and submit a repo with a one-line start command. **HUD** executes your command in a clean environment and runs **OSWorld-Verified**. Scores come from official benchmark results; ties break by median, then wall-clock time, then earliest submission. Any model setup is allowed (cloud or local).
|
||||
|
||||
**HUD** runs official evaluations immediately after submission. Winners are announced at the **closing ceremony**.
|
||||
|
||||
### Rules
|
||||
|
||||
- Fork and star the [Cua repo](https://github.com/trycua/cua).
|
||||
- Add your agent and instructions in `samples/community/hack-the-north/<YOUR_TEAM_NAME>`.
|
||||
- Include a README with details on the approach and any required notes.
|
||||
- Submit a PR.
|
||||
- Include a README with details on the approach and any required notes.
|
||||
- Submit a PR.
|
||||
|
||||
**Deadline: Sept 15, 8:00 AM EDT**
|
||||
|
||||
### Global Online (Track B)
|
||||
|
||||
Open to anyone, anywhere. Build on your own timeline and submit through the **Cua Discord form** by the deadline.
|
||||
|
||||
**Project Requirements:**
|
||||
|
||||
- Your agent must integrate **Cua and Ollama** in some way
|
||||
- Your agent must be **easily runnable by judges**
|
||||
|
||||
Judged by **Cua** and **Ollama** teams on:
|
||||
- **Creativity (30%)** – originality, usefulness, surprise factor
|
||||
- **Technical Depth (30%)** – quality of engineering and agent design
|
||||
- **Use of Ollama (30%)** – effective integration of local/hybrid inference
|
||||
- **Polish (10%)** – presentation, clarity, demo readiness
|
||||
Judged by **Cua** and **Ollama** teams on:
|
||||
|
||||
- **Creativity (30%)** – originality, usefulness, surprise factor
|
||||
- **Technical Depth (30%)** – quality of engineering and agent design
|
||||
- **Use of Ollama (30%)** – effective integration of local/hybrid inference
|
||||
- **Polish (10%)** – presentation, clarity, demo readiness
|
||||
|
||||
### Submission Process
|
||||
|
||||
Submissions will be collected via a **form link provided in the Cua Discord**. Your submission must contain:
|
||||
|
||||
- **GitHub repo** containing the agent source code and a clear README with instructions on how to use the agent
|
||||
- **Explanation** of the models and tools used, and what's local or hybrid about your design
|
||||
- **Explanation** of the models and tools used, and what's local or hybrid about your design
|
||||
- **Short demo video** (up to two minutes)
|
||||
|
||||
A **commit freeze** will be used to ensure that no changes are made after the deadline. Winners will be announced after judging is complete.
|
||||
@@ -68,12 +75,13 @@ A **commit freeze** will be used to ensure that no changes are made after the de
|
||||
|
||||
Bring a team, pick a model stack, and push what agents can do on real computers. We can’t wait to see what you build at **Hack the North 2025**.
|
||||
|
||||
**Discord channels**
|
||||
**Discord channels**
|
||||
|
||||
- Join the Discord first: https://discord.gg/cua-ai
|
||||
- **#hack-the-north (on-site):** https://discord.com/channels/1328377437301641247/1409508526774157342
|
||||
- **#global-online (Ollama × Cua):** https://discord.com/channels/1328377437301641247/1409518100491145226
|
||||
- **#hack-the-north (on-site):** https://discord.com/channels/1328377437301641247/1409508526774157342
|
||||
- **#global-online (Ollama × Cua):** https://discord.com/channels/1328377437301641247/1409518100491145226
|
||||
|
||||
**Contact**
|
||||
Questions on Hack the North? Email **hackthenorth@trycua.com**.
|
||||
|
||||
*P.S. If you’re planning ahead, start with the Cua Agent Framework and OSWorld-Verified docs at docs.trycua.com; we’ll share office-hour times in both Discord channels.*
|
||||
_P.S. If you’re planning ahead, start with the Cua Agent Framework and OSWorld-Verified docs at docs.trycua.com; we’ll share office-hour times in both Discord channels._
|
||||
|
||||
169
blog/hack-the-north.md
Normal file
169
blog/hack-the-north.md
Normal file
@@ -0,0 +1,169 @@
|
||||
# What happens when hackathon judging is a public benchmark (Hack the North edition)
|
||||
|
||||
_Written by Francesco Bonacci — Reviewed by Parth Patel (HUD W25) — Sept 25, 2025_
|
||||
|
||||
## Prologue
|
||||
|
||||
Hack the North ran Sept 12–14 at the University of Waterloo. Official count this year: **1,778 hackers**, and a [Guinness World Record for the most people building interlocking plastic brick sculptures simultaneously](https://uwaterloo.ca/news/eweal-making-hackathons-fun-again-breaking-guinness-world-record).
|
||||
|
||||
Our team arrived from Europe and the US one day before the hackathon, after a summer scattered post–YC X25, waiting for our O-1 visas. **HUD**’s founders Parth and Jay flew in from SF to help us run evaluations, and Michael and Parth from **Ollama** joined as co-sponsors.
|
||||
|
||||
Our plan was ambitious: run the **first state-of-the-art Computer-Use Agents track**, score it on a public benchmark, and give the top performer a guaranteed YC interview. (Interview ≠ offer. YC didn’t judge.)
|
||||
|
||||
The rest, as they say, was a 36h story worth telling—and a playbook worth sharing for anyone thinking about running or sponsoring this type of hackathon track.
|
||||
|
||||

|
||||
|
||||
## The sign-up problem we had to invent
|
||||
|
||||
We joined as a sponsor at the last minute, thanks to a push from our friend @Michael Chiang at Ollama—Waterloo alum, naturally. It’s kind of an open secret that UWaterloo turns out some of the sharpest hackers around (_no pun intended, HackMIT_). It was a bit of a scramble, but also great timing—our Agent framework had just finished a major refactor, with support for **100+ VLM configurations** now live. Naturally, we wanted to stress-test it at scale—and see whether teams could come up with SOTA-level setups. _This wasn’t a blank-slate, build-whatever-you-want kind of track._
|
||||
|
||||
From day one, though, we knew we’d have to fight for sign-ups. This was a niche track, and a guaranteed YC interview alone wouldn’t be enough to pull people in.
|
||||
|
||||
Unfortunately, Hack the North (HTN) didn’t offer an interest form to help us estimate demand, which made capacity planning tricky—especially with early-stage infra. Stress-testing takes foresight, and multimodal language model usage is still costly (~1.5× to 3–4× the price of comparable text-only models).
|
||||
|
||||
On top of that, we were discouraged from external promotion on [lu.ma](http://lu.ma). So we spun up our own sign-up page at **trycua.com/hackathon** and built ad-hoc Discord channels to share track details. We emphasized—repeatedly—that only students already accepted to Hack the North should register.
|
||||
|
||||
_(Moral: the “measure-zero effect”—no matter how many times you say it, some people won’t see it. Plenty of invalid sign-ups still slipped through.)_
|
||||
|
||||
Even so, having your own form is absolutely worth it: it gives you an **early funnel**, surfaces demand signals ahead of time, and—crucially—**lets you require platform sign-up before kickoff**. In our case, Hack the North didn’t provide Devpost access until the very end, so our form was the only way to build a working roster.
|
||||
|
||||
Only a small trickle of sign-ups came through by the time the event kicked off—too few to plan around, but clearly the right kind of crowd. Several were already familiar with computer-use agents; one was even interning at Shopify, working on this space.
|
||||
|
||||
## At the Sponsor Booth
|
||||
|
||||
Day 0 on campus made the difference. We arrived a couple of hours early to collect swag shipments (around 1,200 stickers of our new **Cua-la** mascot, plus t-shirts and hats—always plan ~1.5× the estimated number of hackers!). After walking the sponsor floor and explaining the track at our booth, ~40 hackers signed up.
|
||||
|
||||
**Moral:** sponsor booths are still the most effective way to recruit for a track.
|
||||
|
||||
**Suggestions to maximize booth time (for HTN this is only ~24 of the total 36 hours):**
|
||||
|
||||
- **Be unmistakable.** Run a mini-challenge and a visible giveaway. We offered 5 × $200 Anthropic credits as a lightning raffle and constantly advertised in HTN Slack. Shout-out to our neighbors at **Mintlify**, who dressed their teammate as a mint plant - memorable and effective.
|
||||
- **Create multiple touchpoints.** Hand out flyers and QR codes, and ask nearby booths to cross-refer. Big thanks to the YC team for flyer space and student connections - and to Michael (Ollama) for pointing visitors our way.
|
||||
- **Never leave the booth empty.** Keep someone at the booth at all times and rotate shifts. With four founding engineers on-site, coverage was easy. Even after hacking kicked off, the booth stayed a point of reference - and even then multiple participants DM’d us asking where to meet up.
|
||||
- **Students are organic DevRel.** Our runner-up, Adam, hung out with us at the booth, pulling more people in. Peer-to-peer energy creates the network effect you need!
|
||||
|
||||

|
||||
|
||||
_(Our Founding Engineer, Morgan, hangs out with students at the stand, while Adam (runner-up) hacks on the side.)_
|
||||
|
||||
## 02:30 a.m. is still prime time at a hackathon
|
||||
|
||||
Hack the North gives sponsors a 30-minute API Workshop during the early hours of the event—a perfect moment to shift from talking to building.
|
||||
|
||||
Our slot landed at **2:30 a.m.** (_perks of the cheapest sponsor tier_). Thirty students showed up, energy surprisingly high. James, our new Founding DevRel Engineer, led the session and nailed it.
|
||||
|
||||
**Our track rules were simple:**
|
||||
|
||||
1. Build a Computer-Use Agent with the [Cua framework](https://github.com/trycua/cua)
|
||||
2. Benchmark the agent on [HUD](https://www.hud.so)
|
||||
3. Use [OSWorld-Tiny](https://huggingface.co/datasets/ddupont/OSWorld-Tiny-Public): a 14-task distillation of the full benchmark (~360 tasks, >1h)
|
||||
|
||||
**Suggestions:**
|
||||
|
||||
- **Leave something tangible.** We provided a Jupyter Notebook teams could run immediately.
|
||||
- **Narrow scope, strong starts.** The more focused the challenge, the more **robust starting points** you should provide.
|
||||
- **Want the details?** [Here’s the notebook we left participants](https://github.com/trycua/cua/blob/main/notebooks/sota_hackathon.ipynb).
|
||||
|
||||

|
||||
|
||||
_(Our CUA Workshop at 2:30 AM.)_
|
||||
|
||||
## Making it possible to focus on the work
|
||||
|
||||
If you’re an OSS framework, it’s tempting to have hackers self-host on laptops. **Don’t.** You’ll spend the workshop debugging setups instead of reviewing ideas.
|
||||
|
||||
**Lesson learned:** within hours, we shifted to **cloud-only Sandboxes**. Payoff: consistent environments, faster starts, far less tech support.
|
||||
|
||||
We provided:
|
||||
|
||||
- **Credits:** $200 Cua Cloud + $200 HUD per team (manual top-ups for visible progress)
|
||||
- **LLMs/VLMs:** Anthropic assigned $50 per participant—tight for VLM iteration—so we added capped access under our org
|
||||
- **Pre-kickoff provisioning:** Platform sign-up auto-created projects, keys, and sandboxes
|
||||
|
||||
**Takeaway:** every minute not spent on setup is a minute gained for iterating.
|
||||
|
||||
## 12 Hours in the Hackathon
|
||||
|
||||
**After the workshop buzz.** Morning interest was high, but Docker setup + requiring focus on a single track thinned the crowd. Most sponsor prizes are broad (“use our product and you qualify”), letting students stack tracks. Ours required commitment. Upside: those who stayed shipped sharper, higher-quality submissions.
|
||||
|
||||
**The bell curve of submissions.** Most entries used _claude-sonnet-4-20250514_—proof that docs and public leaderboards ([OSWorld](https://os-world.github.io/#benchmark)) guide choices. Results clustered around the safe pick, with fewer pushing boundaries.
|
||||
|
||||
**Who went beyond the baseline.** A few tried multi-agent/tool graphs. One standout—[**cuala**](https://github.com/YeIIcw/cuala)—was a clean reference: deterministic actions, verifiable state changes, callbacks for saving images and trajectories.
|
||||
|
||||
**Bottom line:** Early excitement is easy; keeping teams engaged requires reducing friction and offering multiple entry points.
|
||||
|
||||
### What broke (and why)
|
||||
|
||||
We skipped a full end-to-end **Cua × HUD** dry-run. It showed.
|
||||
|
||||
- Hackers ran out of inference credits. Desktop tasks are token-heavy. A full OSWorld run (200 max steps) for _computer-use-preview_ (OpenAI Operator API) can cost >$600. Serious attempts: ~400k tokens × 14 tasks.
|
||||
- Python version/build mismatches surfaced, requiring debug time across both OSS repos.
|
||||
- Our Cua framework lacked a **Response Agent** to complete evaluation loops. Some runs stalled until patched.
|
||||
|
||||
## Scoring and Results
|
||||
|
||||
### Participation & Outcomes
|
||||
|
||||
- ~**30** hackers gave the track a serious try; **5** crossed the finish line
|
||||
- All submissions were **solo**, mostly undergrads
|
||||
- Judging: OSWorld-Tiny on HUD, with Cua + HUD reruns to verify scores
|
||||
- Final leaderboard: [HUD Leaderboard](https://www.hud.so/leaderboards/ddupont/OSWorld-Tiny-Public)
|
||||
|
||||

|
||||
|
||||
_(Leaderboard on HUD)_
|
||||
|
||||
### Winners
|
||||
|
||||
**🥇 Winner — Ram**
|
||||
|
||||
- Devpost: https://devpost.com/software/sota-computer-use-agent-challenge
|
||||
- Code: https://github.com/Ram-Raghav-S/cua/tree/ram
|
||||
- Score: 68.3%
|
||||
|
||||
**🥈 Runner-up — Aryan**
|
||||
|
||||
- Devpost: https://devpost.com/software/loopdeloop-computer-use-agent-sota-attempt
|
||||
- Code: https://github.com/Tumph/cua
|
||||
- Score: 55.9%
|
||||
|
||||
**🥉 Special Mention — Adam**
|
||||
|
||||
- Devpost: https://devpost.com/software/cuala
|
||||
- Code: https://github.com/YeIIcw/cuala
|
||||
- Score: 42.1%
|
||||
|
||||

|
||||
|
||||
_(Our finalists before the award ceremony)_
|
||||
|
||||
## What We’d Keep
|
||||
|
||||
- **Sponsor Hack the North again**
|
||||
- **Keep a visible, staffed booth**
|
||||
- **Publish a compact FAQ**
|
||||
- **Simple, transparent scoring**
|
||||
|
||||
## What We’d Change
|
||||
|
||||
- **Run a full Cua × HUD dry-run under load**
|
||||
- **Offer multiple on-ramps (evals, creative, RL)**
|
||||
- **Keep a private eval set for judging**
|
||||
- **Default to cloud sandboxes**
|
||||
- **Handle ops earlier (swag, signage, QR codes)**
|
||||
- **Reward generalization, not lucky runs**
|
||||
|
||||
## Closing Thoughts
|
||||
|
||||
Our first outing as sponsors wasn’t perfect, but it gave us a working playbook: **provision cloud early, keep scoring simple, always dry-run infra, and make the booth unforgettable**.
|
||||
|
||||
If more hackathon tracks leaned on **public benchmarks**, weekends like this would produce fewer demos-for-show and more measurable progress.
|
||||
|
||||
**P.S.** Huge thanks to the Ollama and HUD teams for co-sponsoring the track, and to our YC Partner Diana for offering a **guaranteed YC interview** as first prize.
|
||||
|
||||
Whether you’re a hacker who wants to participate, or a company looking to sponsor, let’s talk — we’re especially excited to support benchmark-first hackathon tracks in the Bay Area this year.
|
||||
|
||||

|
||||
|
||||
_(HTN Closing Ceremony — Cua Track Winner Announcement)_
|
||||
@@ -1,6 +1,6 @@
|
||||
# Cua × HUD - Evaluate Any Computer-Use Agent
|
||||
|
||||
*Published on August 27, 2025 by Dillon DuPont*
|
||||
_Published on August 27, 2025 by Dillon DuPont_
|
||||
|
||||
You can now benchmark any GUI-capable agent on real computer-use tasks through our new integration with [HUD](https://hud.so), the evaluation platform for computer-use agents.
|
||||
|
||||
@@ -70,9 +70,9 @@ Watch your agent work in real-time. Example output:
|
||||
```md
|
||||
Starting full dataset run...
|
||||
╔═════════════════════════════════════════════════════════════════╗
|
||||
║ 🚀 See your agent live at: ║
|
||||
║ 🚀 See your agent live at: ║
|
||||
╟─────────────────────────────────────────────────────────────────╢
|
||||
║ https://app.hud.so/jobs/fe05805d-4da9-4fc6-84b5-5c518528fd3c ║
|
||||
║ https://app.hud.so/jobs/fe05805d-4da9-4fc6-84b5-5c518528fd3c ║
|
||||
╚═════════════════════════════════════════════════════════════════╝
|
||||
```
|
||||
|
||||
@@ -90,4 +90,4 @@ Customize your evaluation with these options:
|
||||
|
||||
- Notebook with end‑to‑end examples: https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb
|
||||
- Docs: https://docs.trycua.com/docs/agent-sdk/integrations/hud
|
||||
- Live traces: https://app.hud.so
|
||||
- Live traces: https://app.hud.so
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# When Agents Need Human Wisdom - Introducing Human-In-The-Loop Support
|
||||
|
||||
*Published on August 29, 2025 by Francesco Bonacci*
|
||||
_Published on August 29, 2025 by Francesco Bonacci_
|
||||
|
||||
Sometimes the best AI agent is a human. Whether you're creating training demonstrations, evaluating complex scenarios, or need to intervene when automation hits a wall, our new Human-In-The-Loop integration puts you directly in control.
|
||||
|
||||
With yesterday's [HUD evaluation integration](hud-agent-evals.md), you could benchmark any agent at scale. Today's update lets you *become* the agent when it matters most—seamlessly switching between automated intelligence and human judgment.
|
||||
With yesterday's [HUD evaluation integration](hud-agent-evals.md), you could benchmark any agent at scale. Today's update lets you _become_ the agent when it matters most—seamlessly switching between automated intelligence and human judgment.
|
||||
|
||||
<div align="center">
|
||||
<video src="https://github.com/user-attachments/assets/9091b50f-26e7-4981-95ce-40e5d42a1260" width="600" controls></video>
|
||||
@@ -20,11 +20,12 @@ With yesterday's [HUD evaluation integration](hud-agent-evals.md), you could ben
|
||||
|
||||
## Why Human-In-The-Loop?
|
||||
|
||||
Even the most sophisticated agents encounter edge cases, ambiguous interfaces, or tasks requiring human judgment. Rather than failing gracefully, they can now fail *intelligently*—by asking for human help.
|
||||
Even the most sophisticated agents encounter edge cases, ambiguous interfaces, or tasks requiring human judgment. Rather than failing gracefully, they can now fail _intelligently_—by asking for human help.
|
||||
|
||||
This approach bridges the gap between fully automated systems and pure manual control, letting you:
|
||||
|
||||
- **Demonstrate complex workflows** that agents can learn from
|
||||
- **Evaluate tricky scenarios** where ground truth requires human assessment
|
||||
- **Evaluate tricky scenarios** where ground truth requires human assessment
|
||||
- **Intervene selectively** when automated agents need guidance
|
||||
- **Test and debug** your tools and environments manually
|
||||
|
||||
@@ -64,7 +65,7 @@ Combine model intelligence with human precision—let AI plan, then execute manu
|
||||
|
||||
```python
|
||||
agent = ComputerAgent(
|
||||
"huggingface-local/HelloKKMe/GTA1-7B+human/human",
|
||||
"huggingface-local/HelloKKMe/GTA1-7B+human/human",
|
||||
tools=[computer]
|
||||
)
|
||||
|
||||
@@ -81,7 +82,7 @@ Start automated, escalate to human when needed:
|
||||
# Primary automated agent
|
||||
primary_agent = ComputerAgent("openai/computer-use-preview", tools=[computer])
|
||||
|
||||
# Human fallback agent
|
||||
# Human fallback agent
|
||||
fallback_agent = ComputerAgent("human/human", tools=[computer])
|
||||
|
||||
try:
|
||||
@@ -101,22 +102,26 @@ except Exception:
|
||||
The human-in-the-loop interface provides a rich, responsive experience:
|
||||
|
||||
### **Visual Environment**
|
||||
|
||||
- **Screenshot display** with live updates as you work
|
||||
- **Click handlers** for direct interaction with UI elements
|
||||
- **Click handlers** for direct interaction with UI elements
|
||||
- **Zoom and pan** to see details clearly
|
||||
|
||||
### **Action Controls**
|
||||
|
||||
- **Click actions** - precise cursor positioning and clicking
|
||||
- **Keyboard input** - type text naturally or send specific key combinations
|
||||
- **Action history** - see the sequence of actions taken
|
||||
- **Undo support** - step back when needed
|
||||
|
||||
### **Tool Integration**
|
||||
### **Tool Integration**
|
||||
|
||||
- **Full OpenAI compatibility** - standard tool call format
|
||||
- **Custom tools** - integrate your own tools seamlessly
|
||||
- **Real-time feedback** - see tool responses immediately
|
||||
|
||||
### **Smart Polling**
|
||||
|
||||
- **Responsive updates** - UI refreshes when new completions arrive
|
||||
- **Background processing** - continue working while waiting for tasks
|
||||
- **Session persistence** - resume interrupted sessions
|
||||
@@ -124,6 +129,7 @@ The human-in-the-loop interface provides a rich, responsive experience:
|
||||
## Real-World Use Cases
|
||||
|
||||
### **Training Data Generation**
|
||||
|
||||
Create perfect demonstrations for fine-tuning:
|
||||
|
||||
```python
|
||||
@@ -132,7 +138,7 @@ demo_agent = ComputerAgent("human/human", tools=[computer])
|
||||
|
||||
tasks = [
|
||||
"Create a budget spreadsheet with income and expense categories",
|
||||
"Apply conditional formatting to highlight overbudget items",
|
||||
"Apply conditional formatting to highlight overbudget items",
|
||||
"Generate a pie chart showing expense distribution"
|
||||
]
|
||||
|
||||
@@ -143,6 +149,7 @@ for task in tasks:
|
||||
```
|
||||
|
||||
### **Evaluation and Ground Truth**
|
||||
|
||||
Validate agent performance on complex scenarios:
|
||||
|
||||
```python
|
||||
@@ -154,6 +161,7 @@ async for _ in evaluator.run("Review this completed form and rate accuracy (1-10
|
||||
```
|
||||
|
||||
### **Interactive Debugging**
|
||||
|
||||
Step through agent behavior manually:
|
||||
|
||||
```python
|
||||
@@ -165,6 +173,7 @@ async for _ in debug_agent.run("Reproduce the agent's failed login sequence"):
|
||||
```
|
||||
|
||||
### **Edge Case Handling**
|
||||
|
||||
Handle scenarios that break automated agents:
|
||||
|
||||
```python
|
||||
@@ -180,26 +189,26 @@ async for _ in edge_case_agent.run("Navigate this CAPTCHA-protected form"):
|
||||
Customize the human agent experience:
|
||||
|
||||
- **UI refresh rate**: Adjust polling frequency for your workflow
|
||||
- **Image quality**: Balance detail vs. performance for screenshots
|
||||
- **Image quality**: Balance detail vs. performance for screenshots
|
||||
- **Action logging**: Save detailed traces for analysis and training
|
||||
- **Session timeout**: Configure idle timeouts for security
|
||||
- **Tool permissions**: Restrict which tools humans can access
|
||||
|
||||
## When to Use Human-In-The-Loop
|
||||
|
||||
| **Scenario** | **Why Human Control** |
|
||||
|--------------|----------------------|
|
||||
| **Creating training data** | Perfect demonstrations for model fine-tuning |
|
||||
| **Evaluating complex tasks** | Human judgment for subjective or nuanced assessment |
|
||||
| **Handling edge cases** | CAPTCHAs, unusual UIs, context-dependent decisions |
|
||||
| **Debugging workflows** | Step through failures to identify breaking points |
|
||||
| **High-stakes operations** | Critical tasks requiring human oversight and approval |
|
||||
| **Testing new environments** | Validate tools and environments work as expected |
|
||||
| **Scenario** | **Why Human Control** |
|
||||
| ---------------------------- | ----------------------------------------------------- |
|
||||
| **Creating training data** | Perfect demonstrations for model fine-tuning |
|
||||
| **Evaluating complex tasks** | Human judgment for subjective or nuanced assessment |
|
||||
| **Handling edge cases** | CAPTCHAs, unusual UIs, context-dependent decisions |
|
||||
| **Debugging workflows** | Step through failures to identify breaking points |
|
||||
| **High-stakes operations** | Critical tasks requiring human oversight and approval |
|
||||
| **Testing new environments** | Validate tools and environments work as expected |
|
||||
|
||||
## Learn More
|
||||
|
||||
- **Interactive examples**: Try human-in-the-loop control with sample tasks
|
||||
- **Training data pipelines**: Learn how to convert human demonstrations into model training data
|
||||
- **Training data pipelines**: Learn how to convert human demonstrations into model training data
|
||||
- **Evaluation frameworks**: Build human-validated test suites for your agents
|
||||
- **API documentation**: Full reference for human agent configuration
|
||||
|
||||
@@ -207,4 +216,4 @@ Ready to put humans back in the loop? The most sophisticated AI system knows whe
|
||||
|
||||
---
|
||||
|
||||
*Questions about human-in-the-loop agents? Join the conversation in our [Discord community](https://discord.gg/cua-ai) or check out our [documentation](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop).*
|
||||
_Questions about human-in-the-loop agents? Join the conversation in our [Discord community](https://discord.gg/cua-ai) or check out our [documentation](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop)._
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Introducing Cua Cloud Containers: Computer-Use Agents in the Cloud
|
||||
# Introducing Cua Cloud Sandbox: Computer-Use Agents in the Cloud
|
||||
|
||||
*Published on May 28, 2025 by Francesco Bonacci*
|
||||
_Published on May 28, 2025 by Francesco Bonacci_
|
||||
|
||||
Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./build-your-own-operator-on-macos-1), we showed you how to build your own Operator on macOS. In [Part 2](./build-your-own-operator-on-macos-2), we explored the cua-agent framework. Today, we're excited to introduce **Cua Cloud Containers** – the easiest way to deploy Computer-Use Agents at scale.
|
||||
Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./build-your-own-operator-on-macos-1), we showed you how to build your own Operator on macOS. In [Part 2](./build-your-own-operator-on-macos-2), we explored the cua-agent framework. Today, we're excited to introduce **Cua Cloud Sandbox** – the easiest way to deploy Computer-Use Agents at scale.
|
||||
|
||||
<div align="center">
|
||||
<video src="https://github.com/user-attachments/assets/63a2addf-649f-4468-971d-58d38dd43ee6" width="600" controls></video>
|
||||
@@ -10,13 +10,13 @@ Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./bui
|
||||
|
||||
## What is Cua Cloud?
|
||||
|
||||
Think of Cua Cloud as **Docker for Computer-Use Agents**. Instead of managing VMs, installing dependencies, and configuring environments, you can launch pre-configured cloud containers with a single command. Each container comes with a **full desktop environment** accessible via browser (via noVNC), all CUA-related dependencies pre-configured (with a PyAutoGUI-compatible server), and **pay-per-use pricing** that scales with your needs.
|
||||
Think of Cua Cloud as **Docker for Computer-Use Agents**. Instead of managing VMs, installing dependencies, and configuring environments, you can launch pre-configured Cloud Sandbox instances with a single command. Each sandbox comes with a **full desktop environment** accessible via browser (via noVNC), all CUA-related dependencies pre-configured (with a PyAutoGUI-compatible server), and **pay-per-use pricing** that scales with your needs.
|
||||
|
||||
## Why Cua Cloud Containers?
|
||||
## Why Cua Cloud Sandbox?
|
||||
|
||||
Four months ago, we launched [**Lume**](https://github.com/trycua/cua/tree/main/libs/lume) and [**Cua**](https://github.com/trycua/cua) with the goal to bring sandboxed VMs and Computer-Use Agents on Apple Silicon. The developer's community response was incredible 🎉
|
||||
Four months ago, we launched [**Lume**](https://github.com/trycua/cua/tree/main/libs/lume) and [**Cua**](https://github.com/trycua/cua) with the goal to bring sandboxed VMs and Computer-Use Agents on Apple Silicon. The developer's community response was incredible 🎉
|
||||
|
||||
Going from prototype to production revealed a problem though: **local macOS VMs don't scale**, neither are they easily portable.
|
||||
Going from prototype to production revealed a problem though: **local macOS VMs don't scale**, neither are they easily portable.
|
||||
|
||||
Our Discord community, YC peers, and early pilot customers kept hitting the same issues. Storage constraints meant **20-40GB per VM** filled laptops fast. Different hardware architectures (Apple Silicon ARM vs Intel x86) prevented portability of local workflows. Every new user lost a day to setup and configuration.
|
||||
|
||||
@@ -40,7 +40,7 @@ export CUA_API_KEY=your_api_key_here
|
||||
export CUA_CONTAINER_NAME=my-agent-container
|
||||
```
|
||||
|
||||
### Step 2: Launch Your First Container
|
||||
### Step 2: Launch Your First Sandbox
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
@@ -55,7 +55,7 @@ async def run_cloud_agent():
|
||||
name=os.getenv("CUA_CONTAINER_NAME"),
|
||||
provider_type=VMProviderType.CLOUD,
|
||||
)
|
||||
|
||||
|
||||
# Create an agent with your preferred loop
|
||||
agent = ComputerAgent(
|
||||
model="openai/gpt-4o",
|
||||
@@ -63,7 +63,7 @@ async def run_cloud_agent():
|
||||
verbosity=logging.INFO,
|
||||
tools=[computer]
|
||||
)
|
||||
|
||||
|
||||
# Run a task
|
||||
async for result in agent.run("Open Chrome and search for AI news"):
|
||||
print(f"Response: {result.get('text')}")
|
||||
@@ -80,7 +80,7 @@ We're launching with **three compute tiers** to match your workload needs:
|
||||
- **Medium** (2 vCPU, 8GB RAM) - Ideal for most production workloads
|
||||
- **Large** (8 vCPU, 32GB RAM) - Built for complex, resource-intensive operations
|
||||
|
||||
Each tier includes a **full Linux with Xfce desktop environment** with pre-configured browser, **secure VNC access** with SSL, persistent storage during your session, and automatic cleanup on termination.
|
||||
Each tier includes a **full Linux with Xfce desktop environment** with pre-configured browser, **secure VNC access** with SSL, persistent storage during your session, and automatic cleanup on termination for sandboxes.
|
||||
|
||||
## How some customers are using Cua Cloud today
|
||||
|
||||
@@ -102,14 +102,14 @@ async def github_automation():
|
||||
name="github-automation",
|
||||
provider_type=VMProviderType.CLOUD,
|
||||
)
|
||||
|
||||
|
||||
agent = ComputerAgent(
|
||||
model="openai/gpt-4o",
|
||||
save_trajectory=True,
|
||||
verbosity=logging.INFO,
|
||||
tools=[computer]
|
||||
)
|
||||
|
||||
|
||||
tasks = [
|
||||
"Look for a repository named trycua/cua on GitHub.",
|
||||
"Check the open issues, open the most recent one and read it.",
|
||||
@@ -119,17 +119,17 @@ async def github_automation():
|
||||
"Commit the changes with a descriptive message.",
|
||||
"Create a pull request."
|
||||
]
|
||||
|
||||
|
||||
for i, task in enumerate(tasks):
|
||||
print(f"\nExecuting task {i+1}/{len(tasks)}: {task}")
|
||||
async for result in agent.run(task):
|
||||
print(f"Response: {result.get('text')}")
|
||||
|
||||
|
||||
# Check if any tools were used
|
||||
tools = result.get('tools')
|
||||
if tools:
|
||||
print(f"Tools used: {tools}")
|
||||
|
||||
|
||||
print(f"Task {i+1} completed")
|
||||
|
||||
# Run the automation
|
||||
@@ -153,13 +153,13 @@ async def scrape_website(site_name, url):
|
||||
name=f"scraper-{site_name}",
|
||||
provider_type=VMProviderType.CLOUD,
|
||||
)
|
||||
|
||||
|
||||
agent = ComputerAgent(
|
||||
model="openai/gpt-4o",
|
||||
save_trajectory=True,
|
||||
tools=[computer]
|
||||
)
|
||||
|
||||
|
||||
results = []
|
||||
tasks = [
|
||||
f"Navigate to {url}",
|
||||
@@ -167,7 +167,7 @@ async def scrape_website(site_name, url):
|
||||
"Take a screenshot of the page",
|
||||
"Save the extracted data to a file"
|
||||
]
|
||||
|
||||
|
||||
for task in tasks:
|
||||
async for result in agent.run(task):
|
||||
results.append({
|
||||
@@ -175,7 +175,7 @@ async def scrape_website(site_name, url):
|
||||
'task': task,
|
||||
'response': result.get('text')
|
||||
})
|
||||
|
||||
|
||||
return results
|
||||
|
||||
async def parallel_scraping():
|
||||
@@ -185,11 +185,11 @@ async def parallel_scraping():
|
||||
("HackerNews", "https://news.ycombinator.com"),
|
||||
("TechCrunch", "https://techcrunch.com")
|
||||
]
|
||||
|
||||
|
||||
# Run all scraping tasks in parallel
|
||||
tasks = [scrape_website(name, url) for name, url in sites]
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
|
||||
# Process results
|
||||
for site_results in results:
|
||||
print(f"\nResults from {site_results[0]['site']}:")
|
||||
@@ -202,23 +202,23 @@ asyncio.run(parallel_scraping())
|
||||
|
||||
## Cost Optimization Tips
|
||||
|
||||
To optimize your costs, use appropriate container sizes for your workload and implement timeouts to prevent runaway tasks. Batch related operations together to minimize container spin-up time, and always remember to terminate containers when your work is complete.
|
||||
To optimize your costs, use appropriate sandbox sizes for your workload and implement timeouts to prevent runaway tasks. Batch related operations together to minimize sandbox spin-up time, and always remember to terminate sandboxes when your work is complete.
|
||||
|
||||
## Security Considerations
|
||||
|
||||
Cua Cloud runs all containers in isolated environments with encrypted VNC connections. Your API keys are never exposed in trajectories.
|
||||
Cua Cloud runs all sandboxes in isolated environments with encrypted VNC connections. Your API keys are never exposed in trajectories.
|
||||
|
||||
## What's Next for Cua Cloud
|
||||
|
||||
We're just getting started! Here's what's coming in the next few months:
|
||||
|
||||
### Elastic Autoscaled Container Pools
|
||||
### Elastic Autoscaled Sandbox Pools
|
||||
|
||||
Soon you'll be able to create elastic container pools that automatically scale based on demand. Define minimum and maximum container counts, and let Cua Cloud handle the rest. Perfect for batch processing, scheduled automations, and handling traffic spikes without manual intervention.
|
||||
Soon you'll be able to create elastic sandbox pools that automatically scale based on demand. Define minimum and maximum sandbox counts, and let Cua Cloud handle the rest. Perfect for batch processing, scheduled automations, and handling traffic spikes without manual intervention.
|
||||
|
||||
### Windows and macOS Cloud Support
|
||||
|
||||
While we're launching with Linux containers, Windows and macOS cloud machines are coming soon. Run Windows-specific automations, test cross-platform workflows, or leverage macOS-exclusive applications – all in the cloud with the same simple API.
|
||||
While we're launching with Linux sandboxes, Windows and macOS cloud machines are coming soon. Run Windows-specific automations, test cross-platform workflows, or leverage macOS-exclusive applications – all in the cloud with the same simple API.
|
||||
|
||||
Stay tuned for updates and join our [**Discord**](https://discord.gg/cua-ai) to vote on which features you'd like to see first!
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# From Lume to Containerization: Our Journey Meets Apple's Vision
|
||||
|
||||
*Published on June 10, 2025 by Francesco Bonacci*
|
||||
_Published on June 10, 2025 by Francesco Bonacci_
|
||||
|
||||
Yesterday, Apple announced their new [Containerization framework](https://github.com/apple/containerization) at WWDC. Since then, our Discord and X users have been asking what this means for Cua virtualization capabilities on Apple Silicon. We've been working in this space for months - from [Lume](https://github.com/trycua/cua/tree/main/libs/lume) to [Lumier](https://github.com/trycua/cua/tree/main/libs/lumier) to [Cua Cloud Containers](./introducing-cua-cloud-containers). Here's our take on Apple's announcement.
|
||||
Yesterday, Apple announced their new [Containerization framework](https://github.com/apple/containerization) at WWDC. Since then, our Discord and X users have been asking what this means for Cua virtualization capabilities on Apple Silicon. We've been working in this space for months - from [Lume](https://github.com/trycua/cua/tree/main/libs/lume) to [Lumier](https://github.com/trycua/cua/tree/main/libs/lumier) to [Cua Cloud Sandbox](./introducing-cua-cloud-containers). Here's our take on Apple's announcement.
|
||||
|
||||
## Our Story
|
||||
|
||||
@@ -40,6 +40,7 @@ How Apple's Framework Works:
|
||||
```
|
||||
|
||||
Why is this better?
|
||||
|
||||
- **Better security**: Each container is completely separate
|
||||
- **Better performance**: Each container gets its own resources
|
||||
- **Real isolation**: If one container has problems, others aren't affected
|
||||
@@ -71,6 +72,7 @@ While Apple's new framework focuses on containers, we've been building VM manage
|
||||
[Lume](https://github.com/trycua/cua/tree/main/libs/lume) is our command-line tool for creating and managing VMs on Apple Silicon. We built it because setting up VMs on macOS was too complicated.
|
||||
|
||||
What Lume does:
|
||||
|
||||
- **Direct control**: Works directly with Apple's Virtualization framework
|
||||
- **Ready-to-use images**: Start a macOS or Linux VM with one command
|
||||
- **API server**: Control VMs from other programs (runs on port 7777)
|
||||
@@ -91,6 +93,7 @@ lume run macos-sequoia-vanilla:latest
|
||||
[Lumier](https://github.com/trycua/lumier) works differently. It lets you use Docker commands to manage VMs. But here's the key: **Docker is just for packaging, not for isolation**.
|
||||
|
||||
What makes Lumier useful:
|
||||
|
||||
- **Familiar commands**: If you know Docker, you know Lumier
|
||||
- **Web access**: Connect to your VM through a browser
|
||||
- **Save your work**: VMs remember their state
|
||||
@@ -127,6 +130,7 @@ Docker → Lume → Full VM → Mac Hardware
|
||||
### When to Use What
|
||||
|
||||
**Apple's Containerization**
|
||||
|
||||
- ✅ Perfect for: Running containers with maximum security
|
||||
- ✅ Starts in under a second
|
||||
- ✅ Uses less memory and CPU
|
||||
@@ -134,6 +138,7 @@ Docker → Lume → Full VM → Mac Hardware
|
||||
- ❌ Only for containers, not full VMs
|
||||
|
||||
**Lume**
|
||||
|
||||
- ✅ Perfect for: Development and testing
|
||||
- ✅ Full control over macOS/Linux VMs
|
||||
- ✅ Works on current macOS versions
|
||||
@@ -141,6 +146,7 @@ Docker → Lume → Full VM → Mac Hardware
|
||||
- ❌ Uses more resources than containers
|
||||
|
||||
**Lumier**
|
||||
|
||||
- ✅ Perfect for: Teams already using Docker
|
||||
- ✅ Easy to share and deploy
|
||||
- ✅ Access through your browser
|
||||
@@ -168,9 +174,9 @@ Apple's announcement confirms we're on the right path. Here's what we're looking
|
||||
- [Apple Containerization Framework](https://github.com/apple/containerization)
|
||||
- [Lume - Direct VM Management](https://github.com/trycua/cua/tree/main/libs/lume)
|
||||
- [Lumier - Docker Interface for VMs](https://github.com/trycua/cua/tree/main/libs/lumier)
|
||||
- [Cua Cloud Containers](https://trycua.com)
|
||||
- [Cua Cloud Sandbox](https://trycua.com)
|
||||
- [Join our Discord](https://discord.gg/cua-ai)
|
||||
|
||||
---
|
||||
|
||||
*Questions about virtualization on Apple Silicon? Come chat with us on Discord!*
|
||||
_Questions about virtualization on Apple Silicon? Come chat with us on Discord!_
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Sandboxed Python Execution: Run Code Safely in Cua Containers
|
||||
|
||||
*Published on June 23, 2025 by Dillon DuPont*
|
||||
_Published on June 23, 2025 by Dillon DuPont_
|
||||
|
||||
Cua's computer-use capabilities that we touched on in [Building your own Operator on macOS - Part 2](build-your-own-operator-on-macos-2.md) – your AI agents can click, scroll, type, and interact with any desktop application. But what if your agent needs to do more than just UI automation? What if it needs to process data, make API calls, analyze images, or run complex logic alongside those UI interactions, within the same virtual environment?
|
||||
|
||||
@@ -49,15 +49,19 @@ What's happening here? When you call `greet_and_print()`, Cua extracts the funct
|
||||
Cua's sandboxed execution system employs several key architectural components:
|
||||
|
||||
### 1. Source Code Extraction
|
||||
|
||||
Cua uses Python's `inspect.getsource()` to extract your function's source code and reconstruct the function definition in the remote environment.
|
||||
|
||||
### 2. Virtual Environment Isolation
|
||||
|
||||
Each sandboxed function runs in a named virtual environment within the container. This provides complete dependency isolation between different functions and their respective environments.
|
||||
|
||||
### 3. Data Serialization and Transport
|
||||
|
||||
Arguments and return values are serialized as JSON and transported between the host and container. This ensures compatibility across different Python versions and execution environments.
|
||||
|
||||
### 4. Comprehensive Error Handling
|
||||
|
||||
The system captures both successful results and exceptions, preserving stack traces and error information for debugging purposes.
|
||||
|
||||
## Getting your sandbox ready
|
||||
@@ -73,10 +77,10 @@ async def main():
|
||||
# Fire up the computer
|
||||
computer = Computer()
|
||||
await computer.run()
|
||||
|
||||
|
||||
# Make it the default for all sandboxed functions
|
||||
set_default_computer(computer)
|
||||
|
||||
|
||||
# Install some packages in a virtual environment
|
||||
await computer.venv_install("demo_venv", ["requests", "beautifulsoup4"])
|
||||
```
|
||||
@@ -104,7 +108,7 @@ def automate_browser_with_playwright():
|
||||
import time
|
||||
import base64
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
# Launch browser (visible, because why not?)
|
||||
@@ -112,68 +116,68 @@ def automate_browser_with_playwright():
|
||||
headless=False,
|
||||
args=['--no-sandbox', '--disable-dev-shm-usage']
|
||||
)
|
||||
|
||||
|
||||
page = browser.new_page()
|
||||
page.set_viewport_size({"width": 1280, "height": 720})
|
||||
|
||||
|
||||
actions = []
|
||||
screenshots = {}
|
||||
|
||||
|
||||
# Let's visit example.com and poke around
|
||||
page.goto("https://example.com")
|
||||
actions.append("Navigated to example.com")
|
||||
|
||||
|
||||
# Grab a screenshot because screenshots are cool
|
||||
screenshot_bytes = page.screenshot(full_page=True)
|
||||
screenshots["initial"] = base64.b64encode(screenshot_bytes).decode()
|
||||
|
||||
|
||||
# Get some basic info
|
||||
title = page.title()
|
||||
actions.append(f"Page title: {title}")
|
||||
|
||||
|
||||
# Find links and headings
|
||||
try:
|
||||
links = page.locator("a").all()
|
||||
link_texts = [link.text_content() for link in links[:5]]
|
||||
actions.append(f"Found {len(links)} links: {link_texts}")
|
||||
|
||||
|
||||
headings = page.locator("h1, h2, h3").all()
|
||||
heading_texts = [h.text_content() for h in headings[:3]]
|
||||
actions.append(f"Found headings: {heading_texts}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
actions.append(f"Element interaction error: {str(e)}")
|
||||
|
||||
|
||||
# Let's try a form for good measure
|
||||
try:
|
||||
page.goto("https://httpbin.org/forms/post")
|
||||
actions.append("Navigated to form page")
|
||||
|
||||
|
||||
# Fill out the form
|
||||
page.fill('input[name="custname"]', "Test User from Sandboxed Environment")
|
||||
page.fill('input[name="custtel"]', "555-0123")
|
||||
page.fill('input[name="custemail"]', "test@example.com")
|
||||
page.select_option('select[name="size"]', "large")
|
||||
|
||||
|
||||
actions.append("Filled out form fields")
|
||||
|
||||
|
||||
# Submit and see what happens
|
||||
page.click('input[type="submit"]')
|
||||
page.wait_for_load_state("networkidle")
|
||||
|
||||
|
||||
actions.append("Submitted form")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
actions.append(f"Form interaction error: {str(e)}")
|
||||
|
||||
|
||||
browser.close()
|
||||
|
||||
|
||||
return {
|
||||
"actions_performed": actions,
|
||||
"screenshots": screenshots,
|
||||
"success": True
|
||||
}
|
||||
|
||||
|
||||
except Exception as e:
|
||||
return {"error": f"Browser automation failed: {str(e)}"}
|
||||
|
||||
@@ -196,9 +200,9 @@ def security_audit_tool(code_snippet):
|
||||
"""Analyze code for potential security issues"""
|
||||
import ast
|
||||
import re
|
||||
|
||||
|
||||
issues = []
|
||||
|
||||
|
||||
# Check for the usual suspects
|
||||
dangerous_patterns = [
|
||||
(r'eval\s*\(', "Use of eval() function"),
|
||||
@@ -207,11 +211,11 @@ def security_audit_tool(code_snippet):
|
||||
(r'subprocess\.', "Subprocess usage"),
|
||||
(r'os\.system\s*\(', "OS system call"),
|
||||
]
|
||||
|
||||
|
||||
for pattern, description in dangerous_patterns:
|
||||
if re.search(pattern, code_snippet):
|
||||
issues.append(description)
|
||||
|
||||
|
||||
# Get fancy with AST analysis
|
||||
try:
|
||||
tree = ast.parse(code_snippet)
|
||||
@@ -222,7 +226,7 @@ def security_audit_tool(code_snippet):
|
||||
issues.append(f"Dangerous function call: {node.func.id}")
|
||||
except SyntaxError:
|
||||
issues.append("Syntax error in code")
|
||||
|
||||
|
||||
return {
|
||||
"security_issues": issues,
|
||||
"risk_level": "HIGH" if len(issues) > 2 else "MEDIUM" if issues else "LOW"
|
||||
@@ -235,7 +239,7 @@ print(f"Security audit: {audit_result}")
|
||||
|
||||
### Desktop automation in the cloud
|
||||
|
||||
Here's where things get really interesting. Cua cloud containers come with full desktop environments, so you can automate GUIs:
|
||||
Here's where things get really interesting. Cua Cloud Sandbox comes with full desktop environments, so you can automate GUIs:
|
||||
|
||||
```python
|
||||
@sandboxed("desktop_env")
|
||||
@@ -245,34 +249,34 @@ def take_screenshot_and_analyze():
|
||||
import base64
|
||||
from PIL import ImageGrab
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
try:
|
||||
# Grab the screen
|
||||
screenshot = ImageGrab.grab()
|
||||
|
||||
|
||||
# Convert to base64 for easy transport
|
||||
buffer = io.BytesIO()
|
||||
screenshot.save(buffer, format='PNG')
|
||||
screenshot_data = base64.b64encode(buffer.getvalue()).decode()
|
||||
|
||||
|
||||
# Get some basic info
|
||||
screen_info = {
|
||||
"size": screenshot.size,
|
||||
"mode": screenshot.mode,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
|
||||
# Analyze the colors (because why not?)
|
||||
colors = screenshot.getcolors(maxcolors=256*256*256)
|
||||
dominant_color = max(colors, key=lambda x: x[0])[1] if colors else None
|
||||
|
||||
|
||||
return {
|
||||
"screenshot_base64": screenshot_data,
|
||||
"screen_info": screen_info,
|
||||
"dominant_color": dominant_color,
|
||||
"unique_colors": len(colors) if colors else 0
|
||||
}
|
||||
|
||||
|
||||
except Exception as e:
|
||||
return {"error": f"Screenshot failed: {str(e)}"}
|
||||
|
||||
@@ -287,6 +291,7 @@ print("Desktop analysis complete!")
|
||||
## Pro tips for sandboxed success
|
||||
|
||||
### Keep it self-contained
|
||||
|
||||
Always put your imports inside the function. Trust us on this one:
|
||||
|
||||
```python
|
||||
@@ -294,12 +299,13 @@ Always put your imports inside the function. Trust us on this one:
|
||||
def good_function():
|
||||
import os # Import inside the function
|
||||
import json
|
||||
|
||||
|
||||
# Your code here
|
||||
return {"result": "success"}
|
||||
```
|
||||
|
||||
### Install dependencies first
|
||||
|
||||
Don't forget to install packages before using them:
|
||||
|
||||
```python
|
||||
@@ -314,13 +320,14 @@ def data_analysis():
|
||||
```
|
||||
|
||||
### Use descriptive environment names
|
||||
|
||||
Future you will thank you:
|
||||
|
||||
```python
|
||||
@sandboxed("data_processing_env")
|
||||
def process_data(): pass
|
||||
|
||||
@sandboxed("web_scraping_env")
|
||||
@sandboxed("web_scraping_env")
|
||||
def scrape_site(): pass
|
||||
|
||||
@sandboxed("ml_training_env")
|
||||
@@ -328,6 +335,7 @@ def train_model(): pass
|
||||
```
|
||||
|
||||
### Always handle errors gracefully
|
||||
|
||||
Things break. Plan for it:
|
||||
|
||||
```python
|
||||
@@ -345,6 +353,7 @@ def robust_function(data):
|
||||
Let's be honest – there's some overhead here. Code needs to be serialized, sent over the network, and executed remotely. But for most use cases, the benefits far outweigh the costs.
|
||||
|
||||
If you're building something performance-critical, consider:
|
||||
|
||||
- Batching multiple operations into a single sandboxed function
|
||||
- Minimizing data transfer between host and container
|
||||
- Using persistent virtual environments
|
||||
@@ -369,4 +378,4 @@ Happy coding (safely)!
|
||||
|
||||
---
|
||||
|
||||
*Want to dive deeper? Check out our [sandboxed functions examples](https://github.com/trycua/cua/blob/main/examples/sandboxed_functions_examples.py) and [virtual environment tests](https://github.com/trycua/cua/blob/main/tests/venv.py) on GitHub. Questions? Come chat with us on Discord!*
|
||||
_Want to dive deeper? Check out our [sandboxed functions examples](https://github.com/trycua/cua/blob/main/examples/sandboxed_functions_examples.py) and [virtual environment tests](https://github.com/trycua/cua/blob/main/tests/venv.py) on GitHub. Questions? Come chat with us on Discord!_
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Training Computer-Use Models: Creating Human Trajectories with Cua
|
||||
|
||||
*Published on May 1, 2025 by Dillon DuPont*
|
||||
_Published on May 1, 2025 by Dillon DuPont_
|
||||
|
||||
In our previous posts, we covered [building your own Computer-Use Operator](build-your-own-operator-on-macos-1) and [using the Agent framework](build-your-own-operator-on-macos-2) to simplify development. Today, we'll focus on a critical aspect of improving computer-use agents and models: gathering high-quality demonstration data using Cua's Computer-Use Interface (CUI) and its Gradio UI to create and share human-generated trajectories.
|
||||
|
||||
@@ -8,10 +8,10 @@ Why is this important? Underlying models used by Computer-use agents need exampl
|
||||
|
||||
<video src="https://github.com/user-attachments/assets/c586d460-3877-4b5f-a736-3248886d2134" controls width="600"></video>
|
||||
|
||||
|
||||
## What You'll Learn
|
||||
|
||||
By the end of this tutorial, you'll be able to:
|
||||
|
||||
- Set up the Computer-Use Interface (CUI) with Gradio UI support
|
||||
- Record your own computer interaction trajectories
|
||||
- Organize and tag your demonstrations
|
||||
@@ -19,6 +19,7 @@ By the end of this tutorial, you'll be able to:
|
||||
- Contribute to improving computer-use AI for everyone
|
||||
|
||||
**Prerequisites:**
|
||||
|
||||
- macOS Sonoma (14.0) or later
|
||||
- Python 3.10+
|
||||
- Basic familiarity with Python and terminal commands
|
||||
@@ -38,6 +39,7 @@ Human trajectories, in the context of Computer-use AI Agents, are recordings of
|
||||
- Time spent on different elements
|
||||
|
||||
These trajectories serve as examples for AI models to learn from, helping them understand the relationship between:
|
||||
|
||||
1. The visual state of the screen
|
||||
2. The user's goal or task
|
||||
3. The most appropriate action to take
|
||||
@@ -59,17 +61,19 @@ By contributing high-quality demonstrations, you're helping to create more capab
|
||||
The Computer-Use Interface includes an optional Gradio UI specifically designed to make recording and sharing demonstrations easy. Let's set it up:
|
||||
|
||||
1. **Create a Python environment** (optional but recommended):
|
||||
|
||||
```bash
|
||||
# Using conda
|
||||
conda create -n cua-trajectories python=3.10
|
||||
conda activate cua-trajectories
|
||||
|
||||
|
||||
# Using venv
|
||||
python -m venv cua-trajectories
|
||||
source cua-trajectories/bin/activate # On macOS/Linux
|
||||
```
|
||||
|
||||
2. **Install the CUI package with UI support**:
|
||||
|
||||
```bash
|
||||
pip install "cua-computer[ui]"
|
||||
```
|
||||
@@ -145,6 +149,7 @@ Effective tagging and organization make your demonstrations more valuable to res
|
||||
### Task-Based Tags
|
||||
|
||||
Describe what the demonstration accomplishes:
|
||||
|
||||
- `web-browsing`
|
||||
- `document-editing`
|
||||
- `file-management`
|
||||
@@ -154,6 +159,7 @@ Describe what the demonstration accomplishes:
|
||||
### Application Tags
|
||||
|
||||
Identify the applications used:
|
||||
|
||||
- `finder`
|
||||
- `safari`
|
||||
- `notes`
|
||||
@@ -163,6 +169,7 @@ Identify the applications used:
|
||||
### Complexity Tags
|
||||
|
||||
Indicate the difficulty level:
|
||||
|
||||
- `beginner`
|
||||
- `intermediate`
|
||||
- `advanced`
|
||||
@@ -171,6 +178,7 @@ Indicate the difficulty level:
|
||||
### UI Element Tags
|
||||
|
||||
Highlight specific UI interactions:
|
||||
|
||||
- `drag-and-drop`
|
||||
- `menu-navigation`
|
||||
- `form-filling`
|
||||
@@ -229,11 +237,11 @@ from computer import Computer
|
||||
computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4")
|
||||
try:
|
||||
await computer.run()
|
||||
|
||||
|
||||
screenshot = await computer.interface.screenshot()
|
||||
with open("screenshot.png", "wb") as f:
|
||||
f.write(screenshot)
|
||||
|
||||
|
||||
await computer.interface.move_cursor(100, 100)
|
||||
await computer.interface.left_click()
|
||||
await computer.interface.right_click(300, 300)
|
||||
@@ -280,6 +288,7 @@ You can also learn from existing trajectory datasets contributed by the communit
|
||||
### Summary
|
||||
|
||||
In this guide, we've covered how to:
|
||||
|
||||
- Set up the Computer-Use Interface with Gradio UI
|
||||
- Record high-quality human demonstrations
|
||||
- Organize and tag your trajectories
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Trajectory Viewer for Cua
|
||||
|
||||
*Published on May 13, 2025 by Dillon DuPont*
|
||||
_Published on May 13, 2025 by Dillon DuPont_
|
||||
|
||||
Don’t forget to check out [Part 1: Building your own Computer-Use Operator](build-your-own-operator-on-macos-1) and [Part 2: Using the Agent framework](build-your-own-operator-on-macos-2) for setting up your Cua environment and basic tips and tricks!
|
||||
|
||||
@@ -18,7 +18,7 @@ Think of a trajectory as a detailed video recording of your agent’s journey:
|
||||
- **Observations**: What did the agent see (the exact screen content) at each point in time?
|
||||
- **Actions**: What clicks, keystrokes, or commands did it perform in response?
|
||||
- **Decisions**: Which options did it choose, and why?
|
||||
Especially for longer and more complex tasks, your agent will make multiple steps, take multiple actions, and make multiple observations. By examining this record, you can pinpoint where things go right, and more importantly, where they go wrong.
|
||||
Especially for longer and more complex tasks, your agent will make multiple steps, take multiple actions, and make multiple observations. By examining this record, you can pinpoint where things go right, and more importantly, where they go wrong.
|
||||
|
||||
## So, what’s Cua’s Trajectory Viewer and why use it?
|
||||
|
||||
@@ -40,10 +40,6 @@ The viewer allows you to see exactly what your agent observed and how it interac
|
||||
|
||||
## Recording a Trajectory
|
||||
|
||||
### Using the Gradio UI
|
||||
|
||||
The simplest way to create agent trajectories is through the [Cua Agent Gradio UI](https://www.trycua.com/docs/quickstart-ui) by checking the "Save Trajectory" option.
|
||||
|
||||
### Using the ComputerAgent API
|
||||
|
||||
Trajectories are saved by default when using the ComputerAgent API:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Ubuntu Docker Support in Cua with Kasm
|
||||
|
||||
*Published Aug 26, 2025 by Francesco Bonacci*
|
||||
_Published Aug 26, 2025 by Francesco Bonacci_
|
||||
|
||||
Today we’re shipping **Ubuntu Docker support** in Cua. You get a full Linux desktop inside a Docker container, viewable right in your browser—no VM spin-up, no extra clients. It behaves the same on macOS, Windows, and Linux.
|
||||
|
||||
@@ -16,17 +16,17 @@ We wanted something lightweight, isolated, and identical across machines. So we
|
||||
|
||||
Short answer: **portability, startup time, and ops friction.**
|
||||
|
||||
* **Runs everywhere, no hypervisor drama.** KVM needs Linux; Hyper-V/Virtualization.Framework setups vary by host and policy. Docker is ubiquitous across macOS/Windows/Linux and allowed in most CI runners—so your GUI env actually runs where your team works.
|
||||
* **Faster boot & smaller footprints.** Containers cold-start in seconds and images are GB-scale; VMs tend to be minutes and tens of GB. That matters for parallel agents, CI, and local iteration.
|
||||
* **Lower ops overhead.** No nested virt, kernel modules, or privileged host tweaks that many orgs (and cloud runners) block. Pull → run → browser.
|
||||
* **Same image, everywhere.** One Docker image gives you an identical desktop on every dev laptop and in CI.
|
||||
* **Web-first access out of the box.** KasmVNC serves the desktop over HTTP—no extra VNC/RDP clients or SPICE config.
|
||||
- **Runs everywhere, no hypervisor drama.** KVM needs Linux; Hyper-V/Virtualization.Framework setups vary by host and policy. Docker is ubiquitous across macOS/Windows/Linux and allowed in most CI runners—so your GUI env actually runs where your team works.
|
||||
- **Faster boot & smaller footprints.** Containers cold-start in seconds and images are GB-scale; VMs tend to be minutes and tens of GB. That matters for parallel agents, CI, and local iteration.
|
||||
- **Lower ops overhead.** No nested virt, kernel modules, or privileged host tweaks that many orgs (and cloud runners) block. Pull → run → browser.
|
||||
- **Same image, everywhere.** One Docker image gives you an identical desktop on every dev laptop and in CI.
|
||||
- **Web-first access out of the box.** KasmVNC serves the desktop over HTTP—no extra VNC/RDP clients or SPICE config.
|
||||
|
||||
**When we *do* reach for QEMU/KVM:**
|
||||
**When we _do_ reach for QEMU/KVM:**
|
||||
|
||||
* You need **true OS isolation** or to run **non-Linux** guests.
|
||||
* You want **kernel-level features** or **device/GPU passthrough** (VFIO).
|
||||
* You’re optimizing for **hardware realism** over startup speed and density.
|
||||
- You need **true OS isolation** or to run **non-Linux** guests.
|
||||
- You want **kernel-level features** or **device/GPU passthrough** (VFIO).
|
||||
- You’re optimizing for **hardware realism** over startup speed and density.
|
||||
|
||||
For this release, the goal was a **cross-platform Linux desktop that feels instant and identical** across local dev and CI. Containers + KasmVNC hit that sweet spot.
|
||||
|
||||
@@ -174,10 +174,10 @@ await computer.run()
|
||||
|
||||
## Links
|
||||
|
||||
* **Docker Provider Docs:** [https://docs.trycua.com/computers/docker](https://docs.trycua.com/computers/docker)
|
||||
* **KasmVNC:** [https://github.com/kasmtech/KasmVNC](https://github.com/kasmtech/KasmVNC)
|
||||
* **Container Source:** [https://github.com/trycua/cua/tree/main/libs/kasm](https://github.com/trycua/cua/tree/main/libs/kasm)
|
||||
* **Computer SDK:** [https://docs.trycua.com/docs/computer-sdk/computers](https://docs.trycua.com/docs/computer-sdk/computers)
|
||||
* **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai)
|
||||
- **Docker Provider Docs:** [https://docs.trycua.com/computers/docker](https://docs.trycua.com/computers/docker)
|
||||
- **KasmVNC:** [https://github.com/kasmtech/KasmVNC](https://github.com/kasmtech/KasmVNC)
|
||||
- **Container Source:** [https://github.com/trycua/cua/tree/main/libs/kasm](https://github.com/trycua/cua/tree/main/libs/kasm)
|
||||
- **Computer SDK:** [https://docs.trycua.com/docs/computer-sdk/computers](https://docs.trycua.com/docs/computer-sdk/computers)
|
||||
- **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai)
|
||||
|
||||
Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build.
|
||||
Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build.
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# Your Windows PC is Already the Perfect Development Environment for Computer-Use Agents
|
||||
|
||||
*Published on June 18, 2025 by Dillon DuPont*
|
||||
_Published on June 18, 2025 by Dillon DuPont_
|
||||
|
||||
Over the last few months, our enterprise users kept asking the same type of question: *"When are you adding support for AutoCAD?"* *"What about SAP integration?"* *"Can you automate our MES system?"* - each request was for different enterprise applications we'd never heard of.
|
||||
Over the last few months, our enterprise users kept asking the same type of question: _"When are you adding support for AutoCAD?"_ _"What about SAP integration?"_ _"Can you automate our MES system?"_ - each request was for different enterprise applications we'd never heard of.
|
||||
|
||||
At first, we deflected. We've been building Cua to work across different environments - from [Lume for macOS VMs](./lume-to-containerization) to cloud containers. But these requests kept piling up. AutoCAD automation. SAP integration. Specialized manufacturing systems.
|
||||
At first, we deflected. We've been building Cua to work across different environments - from [Lume for macOS VMs](./lume-to-containerization) to cloud containers. But these requests kept piling up. AutoCAD automation. SAP integration. Specialized manufacturing systems.
|
||||
|
||||
Then it hit us: **they all ran exclusively on Windows**.
|
||||
|
||||
@@ -80,6 +80,7 @@ python -m agent.ui
|
||||
```
|
||||
|
||||
**What you get**:
|
||||
|
||||
- Visual interface in your browser
|
||||
- Real-time agent action viewing
|
||||
- Natural language task instructions
|
||||
@@ -101,21 +102,21 @@ async def test_windows_agent():
|
||||
os_type="windows",
|
||||
memory="4GB",
|
||||
)
|
||||
|
||||
|
||||
# Start the VM (~35s)
|
||||
await computer.run()
|
||||
|
||||
|
||||
# Create agent with your preferred model
|
||||
agent = ComputerAgent(
|
||||
model="openai/computer-use-preview",
|
||||
save_trajectory=True,
|
||||
tools=[computer]
|
||||
)
|
||||
|
||||
|
||||
# Give it a task
|
||||
async for result in agent.run("Open Calculator and compute 15% tip on $47.50"):
|
||||
print(f"Agent action: {result}")
|
||||
|
||||
|
||||
# Shutdown the VM
|
||||
await computer.stop()
|
||||
|
||||
@@ -123,6 +124,7 @@ asyncio.run(test_windows_agent())
|
||||
```
|
||||
|
||||
**What you get**:
|
||||
|
||||
- Full programmatic control
|
||||
- Custom agent workflows
|
||||
- Integration with your existing code
|
||||
@@ -141,6 +143,7 @@ asyncio.run(test_windows_agent())
|
||||
Let's see how different testing approaches stack up:
|
||||
|
||||
### Windows Sandbox + Cua
|
||||
|
||||
- **Perfect for**: Quick testing and development
|
||||
- **Cost**: Free (built into Windows)
|
||||
- **Setup time**: Under 5 minutes
|
||||
@@ -149,6 +152,7 @@ Let's see how different testing approaches stack up:
|
||||
- **Requires**: Windows 10/11 with 4GB+ RAM
|
||||
|
||||
### Traditional VMs
|
||||
|
||||
- **Perfect for**: Complex testing scenarios
|
||||
- **Full customization**: Any Windows version
|
||||
- **Heavy resource usage**: Slow to start/stop
|
||||
@@ -160,6 +164,7 @@ Let's see how different testing approaches stack up:
|
||||
Here's what our enterprise users are building with Windows Sandbox:
|
||||
|
||||
### CAD and Engineering Automation
|
||||
|
||||
```python
|
||||
# Example: AutoCAD drawing automation
|
||||
task = """
|
||||
@@ -172,6 +177,7 @@ task = """
|
||||
```
|
||||
|
||||
### Manufacturing and ERP Integration
|
||||
|
||||
```python
|
||||
# Example: SAP workflow automation
|
||||
task = """
|
||||
@@ -184,6 +190,7 @@ task = """
|
||||
```
|
||||
|
||||
### Financial Software Automation
|
||||
|
||||
```python
|
||||
# Example: Trading platform automation
|
||||
task = """
|
||||
@@ -196,6 +203,7 @@ task = """
|
||||
```
|
||||
|
||||
### Legacy Windows Application Integration
|
||||
|
||||
```python
|
||||
# Example: Custom Windows application automation
|
||||
task = """
|
||||
@@ -210,12 +218,14 @@ task = """
|
||||
## System Requirements and Performance
|
||||
|
||||
### What You Need
|
||||
|
||||
- **Windows 10/11**: Any edition that supports Windows Sandbox
|
||||
- **Memory**: 4GB minimum (8GB recommended for CAD/professional software)
|
||||
- **CPU**: Virtualization support (enabled by default on modern systems)
|
||||
- **Storage**: A few GB free space
|
||||
|
||||
### Performance Tips
|
||||
|
||||
- **Close unnecessary applications** before starting Windows Sandbox
|
||||
- **Allocate appropriate memory** based on your RPA workflow complexity
|
||||
- **Use SSD storage** for faster sandbox startup
|
||||
@@ -234,4 +244,4 @@ But for development, prototyping, and learning Windows RPA workflows, **Windows
|
||||
|
||||
---
|
||||
|
||||
*Ready to see AI agents control your Windows applications? Come share your testing experiences on Discord!*
|
||||
_Ready to see AI agents control your Windows applications? Come share your testing experiences on Discord!_
|
||||
|
||||
2
docs/.env.example
Normal file
2
docs/.env.example
Normal file
@@ -0,0 +1,2 @@
|
||||
NEXT_PUBLIC_POSTHOG_API_KEY=
|
||||
NEXT_PUBLIC_POSTHOG_HOST=
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"useTabs": false,
|
||||
"semi": true,
|
||||
"singleQuote": true,
|
||||
"trailingComma": "es5",
|
||||
"bracketSpacing": true,
|
||||
"jsxBracketSameLine": true
|
||||
}
|
||||
@@ -34,6 +34,14 @@ A `source.config.ts` config file has been included, you can customise different
|
||||
|
||||
Read the [Introduction](https://fumadocs.dev/docs/mdx) for further details.
|
||||
|
||||
## Setup Telemetry
|
||||
|
||||
We use PostHog for telemetry to improve the clarity and structure of our documentation. Start by copying the `.env.example` and adding in your PostHog API key and host.
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
## Learn More
|
||||
|
||||
To learn more about Next.js and Fumadocs, take a look at the following
|
||||
|
||||
@@ -15,20 +15,34 @@ To run an agent loop simply do:
|
||||
|
||||
```python
|
||||
from agent import ComputerAgent
|
||||
import asyncio
|
||||
from computer import Computer
|
||||
|
||||
computer = Computer() # Connect to a cua container
|
||||
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer]
|
||||
)
|
||||
async def take_screenshot():
|
||||
async with Computer(
|
||||
os_type="linux",
|
||||
provider_type="cloud",
|
||||
name="your-sandbox-name",
|
||||
api_key="your-api-key"
|
||||
) as computer:
|
||||
|
||||
prompt = "Take a screenshot and tell me what you see"
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer],
|
||||
max_trajectory_budget=5.0
|
||||
)
|
||||
|
||||
async for result in agent.run(prompt):
|
||||
if result["output"][-1]["type"] == "message":
|
||||
print("Agent:", result["output"][-1]["content"][0]["text"])
|
||||
messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}]
|
||||
|
||||
async for result in agent.run(messages):
|
||||
for item in result["output"]:
|
||||
if item["type"] == "message":
|
||||
print(item["content"][0]["text"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(take_screenshot())
|
||||
```
|
||||
|
||||
For a list of supported models and configurations, see the [Supported Agents](./supported-agents/computer-use-agents) page.
|
||||
@@ -122,6 +136,8 @@ The `ComputerAgent` constructor provides a wide range of options for customizing
|
||||
Enables prompt caching for repeated prompts (mainly for Anthropic models).
|
||||
- `max_trajectory_budget` (`float` | `dict`):
|
||||
If set (float or dict), adds a budget manager callback that tracks usage costs and stops execution if the budget is exceeded. Dict allows advanced options (e.g., `{ "max_budget": 5.0, "raise_error": True }`).
|
||||
- `instructions` (`str` | `list[str]`):
|
||||
System instructions for the agent. Can be a single string or multiple strings in a tuple/list for readability; they are concatenated into one system prompt.
|
||||
- `**kwargs` (`any`):
|
||||
Any additional keyword arguments are passed through to the agent loop or model provider.
|
||||
|
||||
@@ -142,7 +158,11 @@ agent = ComputerAgent(
|
||||
max_retries=5,
|
||||
screenshot_delay=1.0,
|
||||
use_prompt_caching=True,
|
||||
max_trajectory_budget={"max_budget": 5.0, "raise_error": True}
|
||||
max_trajectory_budget={"max_budget": 5.0, "raise_error": True},
|
||||
instructions=(
|
||||
"You are a helpful computer-using agent"
|
||||
"Output computer calls until you complete the given task"
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
@@ -170,4 +190,4 @@ except BudgetExceededException:
|
||||
print("Budget limit exceeded")
|
||||
except Exception as e:
|
||||
print(f"Agent error: {e}")
|
||||
```
|
||||
```
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
---
|
||||
title: Introduction
|
||||
description: Overview of benchmarking in the c/ua agent framework
|
||||
description: Overview of benchmarking in the Cua agent framework
|
||||
---
|
||||
|
||||
The c/ua agent framework uses benchmarks to test the performance of supported models and providers at various agentic tasks.
|
||||
The Cua agent framework uses benchmarks to test the performance of supported models and providers at various agentic tasks.
|
||||
|
||||
## Benchmark Types
|
||||
|
||||
@@ -13,7 +13,7 @@ Computer-Agent benchmarks evaluate two key capabilities:
|
||||
|
||||
## Using State-of-the-Art Models
|
||||
|
||||
Let's see how to use the SOTA vision-language models in the c/ua agent framework.
|
||||
Let's see how to use the SOTA vision-language models in the Cua agent framework.
|
||||
|
||||
### Plan Generation + Coordinate Generation
|
||||
|
||||
@@ -46,7 +46,7 @@ agent.predict_click("find the button to open the settings") # (27, 450)
|
||||
|
||||
### Composed Agent
|
||||
|
||||
The c/ua agent framework also supports composed agents, which combine a planning model with a clicking model for the best of both worlds. Any liteLLM model can be used as the plan generation model.
|
||||
The Cua agent framework also supports composed agents, which combine a planning model with a clicking model for the best of both worlds. Any liteLLM model can be used as the plan generation model.
|
||||
|
||||
```python
|
||||
# It can be paired with any LLM to form a composed agent:
|
||||
|
||||
@@ -1,9 +1,3 @@
|
||||
{
|
||||
"pages": [
|
||||
"introduction",
|
||||
"screenspot-v2",
|
||||
"screenspot-pro",
|
||||
"interactive",
|
||||
"osworld-verified"
|
||||
]
|
||||
}
|
||||
"pages": ["introduction", "screenspot-v2", "screenspot-pro", "interactive", "osworld-verified"]
|
||||
}
|
||||
|
||||
@@ -1,11 +1,5 @@
|
||||
{
|
||||
"title": "Callbacks",
|
||||
"description": "Extending agents with callback hooks and built-in handlers",
|
||||
"pages": [
|
||||
"agent-lifecycle",
|
||||
"trajectories",
|
||||
"logging",
|
||||
"cost-saving",
|
||||
"pii-anonymization"
|
||||
]
|
||||
"title": "Callbacks",
|
||||
"description": "Extending agents with callback hooks and built-in handlers",
|
||||
"pages": ["agent-lifecycle", "trajectories", "logging", "cost-saving", "pii-anonymization"]
|
||||
}
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
{
|
||||
"title": "Agent SDK",
|
||||
"description": "Build computer-using agents with the Agent SDK",
|
||||
"pages": [
|
||||
"agent-loops",
|
||||
"supported-agents",
|
||||
"supported-model-providers",
|
||||
"chat-history",
|
||||
"message-format",
|
||||
"customizing-computeragent",
|
||||
"callbacks",
|
||||
"custom-tools",
|
||||
"custom-computer-handlers",
|
||||
"prompt-caching",
|
||||
"usage-tracking",
|
||||
"benchmarks",
|
||||
"migration-guide",
|
||||
"integrations"
|
||||
]
|
||||
"title": "Agent SDK",
|
||||
"description": "Build computer-using agents with the Agent SDK",
|
||||
"pages": [
|
||||
"agent-loops",
|
||||
"supported-agents",
|
||||
"supported-model-providers",
|
||||
"chat-history",
|
||||
"message-format",
|
||||
"customizing-computeragent",
|
||||
"callbacks",
|
||||
"custom-tools",
|
||||
"custom-computer-handlers",
|
||||
"prompt-caching",
|
||||
"usage-tracking",
|
||||
"benchmarks",
|
||||
"migration-guide",
|
||||
"integrations"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ Any model that supports `predict_click()` can be used as the grounding component
|
||||
- InternVL 3.5 family: `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}`
|
||||
- UI‑TARS 1.5: `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` (also supports full CU)
|
||||
- OmniParser (OCR): `omniparser` (requires combination with a LiteLLM vision model)
|
||||
- Moondream3: `moondream3` (requires combination with a LiteLLM vision/text model)
|
||||
|
||||
## Supported Planning Models
|
||||
|
||||
@@ -83,6 +84,23 @@ async for _ in agent.run("Help me fill out this form with my personal informatio
|
||||
pass
|
||||
```
|
||||
|
||||
### Moondream3 + GPT-4o
|
||||
|
||||
Use the built-in Moondream3 grounding with any planning model. Moondream3 will detect UI elements on the latest screenshot, label them, and provide a user message listing detected element names.
|
||||
|
||||
```python
|
||||
from agent import ComputerAgent
|
||||
from computer import computer
|
||||
|
||||
agent = ComputerAgent(
|
||||
"moondream3+openai/gpt-4o",
|
||||
tools=[computer]
|
||||
)
|
||||
|
||||
async for _ in agent.run("Close the settings window, then open the Downloads folder"):
|
||||
pass
|
||||
```
|
||||
|
||||
## Benefits of Composed Agents
|
||||
|
||||
- **Specialized Grounding**: Use models optimized for click prediction accuracy
|
||||
|
||||
@@ -7,12 +7,25 @@ These models support complete computer-use agent functionality through `Computer
|
||||
|
||||
All agent loops are compatible with any LLM provider supported by LiteLLM.
|
||||
|
||||
See [Running Models Locally](../local-models) for how to use Hugging Face and MLX models on your own machine.
|
||||
See [Running Models Locally](/agent-sdk/supported-model-providers/local-models) for how to use Hugging Face and MLX models on your own machine.
|
||||
|
||||
## Gemini CUA
|
||||
|
||||
Gemini models with computer-use capabilities:
|
||||
|
||||
- Gemini 2.5 CUA: `gemini-2.5-computer-use-preview-10-2025`
|
||||
|
||||
```python
|
||||
agent = ComputerAgent("gemini-2.5-computer-use-preview-10-2025", tools=[computer])
|
||||
async for _ in agent.run("Open Firefox and navigate to github.com"):
|
||||
pass
|
||||
```
|
||||
|
||||
## Anthropic CUAs
|
||||
|
||||
Claude models with computer-use capabilities:
|
||||
|
||||
- Claude 4.5: `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`
|
||||
- Claude 4.1: `claude-opus-4-1-20250805`
|
||||
- Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514`
|
||||
- Claude 3.7: `claude-3-7-sonnet-20250219`
|
||||
@@ -60,6 +73,17 @@ async for _ in agent.run("Open Firefox and navigate to github.com"):
|
||||
pass
|
||||
```
|
||||
|
||||
## Qwen3 VL
|
||||
|
||||
Qwen3 VL family:
|
||||
- `openrouter/qwen/qwen3-vl-235b-a22b-instruct`
|
||||
|
||||
```python
|
||||
agent = ComputerAgent("openrouter/qwen/qwen3-vl-235b-a22b-instruct", tools=[computer])
|
||||
async for _ in agent.run("Open Firefox and navigate to github.com"):
|
||||
pass
|
||||
```
|
||||
|
||||
## UI-TARS 1.5
|
||||
|
||||
Unified vision-language model for computer-use:
|
||||
|
||||
@@ -45,6 +45,12 @@ OCR-focused set-of-marks model that requires an LLM for click prediction:
|
||||
|
||||
- `omniparser` (requires combination with any LiteLLM vision model)
|
||||
|
||||
### Moondream3 (Local Grounding)
|
||||
|
||||
Moondream3 is a powerful small model that can perform UI grounding and click prediction.
|
||||
|
||||
- `moondream3`
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```python
|
||||
|
||||
@@ -1,10 +1,5 @@
|
||||
{
|
||||
"title": "Supported Agents",
|
||||
"description": "Models and configurations supported by the Agent SDK",
|
||||
"pages": [
|
||||
"computer-use-agents",
|
||||
"grounding-models",
|
||||
"composed-agents",
|
||||
"human-in-the-loop"
|
||||
]
|
||||
"title": "Supported Agents",
|
||||
"description": "Models and configurations supported by the Agent SDK",
|
||||
"pages": ["computer-use-agents", "grounding-models", "composed-agents", "human-in-the-loop"]
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
title: Running Models Locally
|
||||
---
|
||||
|
||||
You can run open-source LLMs and vision models on your own machine using cua, without relying on cloud APIs. This is ideal for development, privacy, or running on air-gapped systems.
|
||||
You can run open-source LLMs and vision models on your own machine using Cua, without relying on cloud APIs. This is ideal for development, privacy, or running on air-gapped systems.
|
||||
|
||||
## Hugging Face (transformers)
|
||||
|
||||
|
||||
260
docs/content/docs/computer-sdk/cloud-vm-management.mdx
Normal file
260
docs/content/docs/computer-sdk/cloud-vm-management.mdx
Normal file
@@ -0,0 +1,260 @@
|
||||
---
|
||||
title: Cloud VM Management
|
||||
description: Manage your Cua Cloud sandboxes (VMs) via Python SDK or HTTP API
|
||||
---
|
||||
|
||||
import { Tab, Tabs } from 'fumadocs-ui/components/tabs';
|
||||
|
||||
|
||||
Using the Cua Cloud API, you can manage your Cua Cloud sandboxes (VMs) with Python or HTTP (curl).
|
||||
|
||||
All examples require a CUA API key. You can obtain one from the [Dashboard](https://www.cua.ai/dashboard/keys).
|
||||
|
||||
---
|
||||
|
||||
## List VMs
|
||||
|
||||
<Tabs items={["Python", "curl"]}>
|
||||
<Tab value="Python">
|
||||
|
||||
```python
|
||||
import os
|
||||
import asyncio
|
||||
from computer.providers.cloud.provider import CloudProvider
|
||||
|
||||
async def main():
|
||||
api_key = os.getenv("CUA_API_KEY") or "your-api-key"
|
||||
# Optional: point to a different API base
|
||||
# os.environ["CUA_API_BASE"] = "https://api.cua.ai"
|
||||
|
||||
provider = CloudProvider(api_key=api_key, verbose=False)
|
||||
async with provider:
|
||||
vms = await provider.list_vms()
|
||||
for vm in vms:
|
||||
print({
|
||||
"name": vm["name"],
|
||||
"status": vm["status"],
|
||||
"api_url": vm.get("api_url"),
|
||||
"vnc_url": vm.get("vnc_url"),
|
||||
})
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="curl">
|
||||
|
||||
```bash
|
||||
curl -H "Authorization: Bearer $CUA_API_KEY" \
|
||||
"https://api.cua.ai/v1/vms"
|
||||
```
|
||||
|
||||
Responses:
|
||||
- 200: Array of minimal VM objects with fields `{ name, password, status }`
|
||||
- 401: Unauthorized (missing/invalid API key)
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"name": "s-windows-x4snp46ebf",
|
||||
"password": "49b8daa3",
|
||||
"status": "running"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
Status values:
|
||||
|
||||
- `pending`: VM deployment in progress
|
||||
- `running`: VM is active and accessible
|
||||
- `stopped`: VM is stopped but not terminated
|
||||
- `terminated`: VM has been permanently destroyed
|
||||
- `failed`: VM deployment or operation failed
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
---
|
||||
|
||||
## Start a VM
|
||||
Provide the VM name you want to start.
|
||||
|
||||
<Tabs items={["Python", "curl"]}>
|
||||
<Tab value="Python">
|
||||
|
||||
```python
|
||||
import os
|
||||
import asyncio
|
||||
from computer.providers.cloud.provider import CloudProvider
|
||||
|
||||
async def main():
|
||||
api_key = os.getenv("CUA_API_KEY") or "your-api-key"
|
||||
name = "my-vm-name" # e.g., "m-linux-96lcxd2c2k"
|
||||
|
||||
provider = CloudProvider(api_key=api_key)
|
||||
async with provider:
|
||||
resp = await provider.run_vm(name)
|
||||
print(resp) # { "name": name, "status": "starting" }
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="curl">
|
||||
|
||||
```bash
|
||||
curl -X POST \
|
||||
-H "Authorization: Bearer $CUA_API_KEY" \
|
||||
"https://api.cua.ai/v1/vms/my-vm-name/start" -i
|
||||
```
|
||||
|
||||
Responses:
|
||||
- 204: No Content (start accepted)
|
||||
- 401: Unauthorized (missing/invalid API key)
|
||||
- 404: VM not found or not owned by the user
|
||||
|
||||
```text
|
||||
HTTP/1.1 204 No Content
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
---
|
||||
|
||||
## Stop a VM
|
||||
Stops the VM asynchronously.
|
||||
|
||||
<Tabs items={["Python", "curl"]}>
|
||||
<Tab value="Python">
|
||||
|
||||
```python
|
||||
import os
|
||||
import asyncio
|
||||
from computer.providers.cloud.provider import CloudProvider
|
||||
|
||||
async def main():
|
||||
api_key = os.getenv("CUA_API_KEY") or "your-api-key"
|
||||
name = "my-vm-name"
|
||||
|
||||
provider = CloudProvider(api_key=api_key)
|
||||
async with provider:
|
||||
resp = await provider.stop_vm(name)
|
||||
print(resp) # { "name": name, "status": "stopping" }
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="curl">
|
||||
|
||||
```bash
|
||||
curl -X POST \
|
||||
-H "Authorization: Bearer $CUA_API_KEY" \
|
||||
"https://api.cua.ai/v1/vms/my-vm-name/stop"
|
||||
```
|
||||
|
||||
Responses:
|
||||
- 202: Accepted with `{ "status": "stopping" }`
|
||||
- 401: Unauthorized (missing/invalid API key)
|
||||
- 404: VM not found or not owned by the user
|
||||
|
||||
```json
|
||||
{ "status": "stopping" }
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
---
|
||||
|
||||
## Restart a VM
|
||||
Restarts the VM asynchronously.
|
||||
|
||||
<Tabs items={["Python", "curl"]}>
|
||||
<Tab value="Python">
|
||||
|
||||
```python
|
||||
import os
|
||||
import asyncio
|
||||
from computer.providers.cloud.provider import CloudProvider
|
||||
|
||||
async def main():
|
||||
api_key = os.getenv("CUA_API_KEY") or "your-api-key"
|
||||
name = "my-vm-name"
|
||||
|
||||
provider = CloudProvider(api_key=api_key)
|
||||
async with provider:
|
||||
resp = await provider.restart_vm(name)
|
||||
print(resp) # { "name": name, "status": "restarting" }
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="curl">
|
||||
|
||||
```bash
|
||||
curl -X POST \
|
||||
-H "Authorization: Bearer $CUA_API_KEY" \
|
||||
"https://api.cua.ai/v1/vms/my-vm-name/restart"
|
||||
```
|
||||
|
||||
Responses:
|
||||
- 202: Accepted with `{ "status": "restarting" }`
|
||||
- 401: Unauthorized (missing/invalid API key)
|
||||
- 404: VM not found or not owned by the user
|
||||
|
||||
```json
|
||||
{ "status": "restarting" }
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
---
|
||||
|
||||
## Query a VM by name
|
||||
Query the computer-server running on the VM. Useful for checking details like status or OS type.
|
||||
|
||||
<Tabs items={["Python", "curl"]}>
|
||||
<Tab value="Python">
|
||||
|
||||
```python
|
||||
import os
|
||||
import asyncio
|
||||
from computer.providers.cloud.provider import CloudProvider
|
||||
|
||||
async def main():
|
||||
api_key = os.getenv("CUA_API_KEY") or "your-api-key"
|
||||
name = "my-vm-name"
|
||||
|
||||
provider = CloudProvider(api_key=api_key)
|
||||
async with provider:
|
||||
info = await provider.get_vm(name)
|
||||
print(info)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="curl">
|
||||
|
||||
```bash
|
||||
curl "https://my-vm-name.containers.cloud.cua.ai:8443/status"
|
||||
```
|
||||
|
||||
Responses:
|
||||
- 200: Server available
|
||||
|
||||
```json
|
||||
{ "status": "ok", "os_type": "linux", "features": ["agent"] }
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
title: Cua Computers
|
||||
description: Understanding cua computer types and connection methods
|
||||
description: Understanding Cua computer types and connection methods
|
||||
---
|
||||
|
||||
<Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/computer_nb.ipynb" target="_blank">Jupyter Notebook</a> and <a href="https://github.com/trycua/cua/tree/main/examples/computer-example-ts" target="_blank">NodeJS project</a> are available for this documentation.</Callout>
|
||||
@@ -9,9 +9,11 @@ Before we can automate apps using AI, we need to first connect to a Computer Ser
|
||||
|
||||
Cua Computers are preconfigured virtual machines running the Computer Server. They can be either macOS, Linux, or Windows. They're found in either a cloud-native container, or on your host desktop.
|
||||
|
||||
## cua cloud container
|
||||
## Cloud Sandbox
|
||||
|
||||
This is a cloud container running the Computer Server. This is the easiest & safest way to get a cua computer, and can be done by going on the trycua.com website.
|
||||
**Easiest & safest way to get started - works on any host OS**
|
||||
|
||||
This is a Cloud Sandbox running the Computer Server. Get a container at [trycua.com](https://www.trycua.com/).
|
||||
|
||||
<Tabs items={['Python', 'TypeScript']}>
|
||||
<Tab value="Python">
|
||||
@@ -21,11 +23,11 @@ This is a cloud container running the Computer Server. This is the easiest & saf
|
||||
computer = Computer(
|
||||
os_type="linux",
|
||||
provider_type="cloud",
|
||||
name="your-container-name",
|
||||
name="your-sandbox-name",
|
||||
api_key="your-api-key"
|
||||
)
|
||||
|
||||
await computer.run() # Connect to the container
|
||||
await computer.run() # Connect to the sandbox
|
||||
```
|
||||
|
||||
</Tab>
|
||||
@@ -35,33 +37,33 @@ This is a cloud container running the Computer Server. This is the easiest & saf
|
||||
|
||||
const computer = new Computer({
|
||||
osType: OSType.LINUX,
|
||||
name: "your-container-name",
|
||||
name: "your-sandbox-name",
|
||||
apiKey: "your-api-key"
|
||||
});
|
||||
|
||||
await computer.run(); // Connect to the container
|
||||
await computer.run(); // Connect to the sandbox
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## cua local containers
|
||||
## Linux on Docker
|
||||
|
||||
cua provides local containers using different providers depending on your host operating system:
|
||||
**Run Linux desktop locally on macOS, Windows, or Linux hosts**
|
||||
|
||||
<Tabs items={['Lume (macOS Only)', 'Windows Sandbox (Windows Only)', 'Docker (macOS, Windows, Linux)']}>
|
||||
<Tab value="Lume (macOS Only)">
|
||||
|
||||
1. Install lume cli
|
||||
Cua provides two Docker images for running Linux desktops:
|
||||
|
||||
<Tabs items={['XFCE (Lightweight)', 'KASM (Full-Featured)']}>
|
||||
<Tab value="XFCE (Lightweight)">
|
||||
|
||||
**Recommended for most use cases** - lightweight XFCE desktop with Firefox
|
||||
|
||||
1. Install Docker Desktop or Docker Engine
|
||||
|
||||
2. Pull the CUA XFCE image
|
||||
|
||||
```bash
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
|
||||
```
|
||||
|
||||
2. Start a local cua container
|
||||
|
||||
```bash
|
||||
lume run macos-sequoia-cua:latest
|
||||
docker pull --platform=linux/amd64 trycua/cua-xfce:latest
|
||||
```
|
||||
|
||||
3. Connect with Computer
|
||||
@@ -70,44 +72,23 @@ cua provides local containers using different providers depending on your host o
|
||||
from computer import Computer
|
||||
|
||||
computer = Computer(
|
||||
os_type="macos",
|
||||
provider_type="lume",
|
||||
name="macos-sequoia-cua:latest"
|
||||
os_type="linux",
|
||||
provider_type="docker",
|
||||
image="trycua/cua-xfce:latest",
|
||||
name="my-xfce-container"
|
||||
)
|
||||
|
||||
await computer.run() # Launch & connect to the container
|
||||
await computer.run() # Launch & connect to Docker sandbox
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="Windows Sandbox (Windows Only)">
|
||||
|
||||
1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11)
|
||||
2. Install pywinsandbox dependency
|
||||
<Tab value="KASM (Full-Featured)">
|
||||
|
||||
```bash
|
||||
pip install -U git+git://github.com/karkason/pywinsandbox.git
|
||||
```
|
||||
**Full-featured Ubuntu desktop** with additional applications
|
||||
|
||||
3. Windows Sandbox will be automatically configured when you run the CLI
|
||||
|
||||
```python
|
||||
from computer import Computer
|
||||
|
||||
computer = Computer(
|
||||
os_type="windows",
|
||||
provider_type="winsandbox",
|
||||
ephemeral=True # Windows Sandbox is always ephemeral
|
||||
)
|
||||
|
||||
await computer.run() # Launch & connect to Windows Sandbox
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="Docker (macOS, Windows, Linux)">
|
||||
|
||||
1. Install Docker Desktop or Docker Engine
|
||||
|
||||
2. Build or pull the CUA Ubuntu container
|
||||
2. Build or pull the CUA KASM image
|
||||
|
||||
```bash
|
||||
# Option 1: Pull from Docker Hub
|
||||
@@ -127,15 +108,70 @@ cua provides local containers using different providers depending on your host o
|
||||
os_type="linux",
|
||||
provider_type="docker",
|
||||
image="trycua/cua-ubuntu:latest",
|
||||
name="my-cua-container"
|
||||
name="my-kasm-container"
|
||||
)
|
||||
|
||||
await computer.run() # Launch & connect to Docker container
|
||||
await computer.run() # Launch & connect to Docker sandbox
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Windows Sandbox
|
||||
|
||||
**Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11**
|
||||
|
||||
1. Enable Windows Sandbox
|
||||
2. Install pywinsandbox dependency
|
||||
|
||||
```bash
|
||||
pip install -U git+git://github.com/karkason/pywinsandbox.git
|
||||
```
|
||||
|
||||
3. Connect with Computer
|
||||
|
||||
```python
|
||||
from computer import Computer
|
||||
|
||||
computer = Computer(
|
||||
os_type="windows",
|
||||
provider_type="winsandbox",
|
||||
ephemeral=True # Windows Sandbox is always ephemeral
|
||||
)
|
||||
|
||||
await computer.run() # Launch & connect to Windows Sandbox
|
||||
```
|
||||
|
||||
## macOS VM
|
||||
|
||||
**macOS hosts only - requires Lume CLI**
|
||||
|
||||
1. Install lume cli
|
||||
|
||||
```bash
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
|
||||
```
|
||||
|
||||
2. Start a local Cua macOS VM
|
||||
|
||||
```bash
|
||||
lume run macos-sequoia-cua:latest
|
||||
```
|
||||
|
||||
3. Connect with Computer
|
||||
|
||||
```python
|
||||
from computer import Computer
|
||||
|
||||
computer = Computer(
|
||||
os_type="macos",
|
||||
provider_type="lume",
|
||||
name="macos-sequoia-cua:latest"
|
||||
)
|
||||
|
||||
await computer.run() # Launch & connect to the sandbox
|
||||
```
|
||||
|
||||
## Your host desktop
|
||||
|
||||
You can also have agents control your desktop directly by running Computer Server without any containerization layer. Beware that AI models may perform risky actions.
|
||||
|
||||
@@ -1,10 +1,5 @@
|
||||
{
|
||||
"title": "Computer SDK",
|
||||
"description": "Build computer-using agents with the Computer SDK",
|
||||
"pages": [
|
||||
"computers",
|
||||
"commands",
|
||||
"computer-ui",
|
||||
"sandboxed-python"
|
||||
]
|
||||
"title": "Computer SDK",
|
||||
"description": "Build computer-using agents with the Computer SDK",
|
||||
"pages": ["computers", "cloud-vm-management", "commands", "computer-ui", "sandboxed-python"]
|
||||
}
|
||||
|
||||
491
docs/content/docs/example-usecases/form-filling.mdx
Normal file
491
docs/content/docs/example-usecases/form-filling.mdx
Normal file
@@ -0,0 +1,491 @@
|
||||
---
|
||||
title: Form Filling
|
||||
description: Enhance and Automate Interactions Between Form Filling and Local File Systems
|
||||
---
|
||||
|
||||
import { EditableCodeBlock, EditableValue, S } from '@/components/editable-code-block';
|
||||
import { Tab, Tabs } from 'fumadocs-ui/components/tabs';
|
||||
|
||||
## Overview
|
||||
|
||||
Cua can be used to automate interactions between form filling and local file systems over any operating system. Cua let's you interact with all the elements of a web page and local file systems to integrate between the two.
|
||||
|
||||
This preset usecase uses [Cua Computer](/computer-sdk/computers) to interact with a web page and local file systems along with [Agent Loops](/agent-sdk/agent-loops) to run the agent in a loop with message history.
|
||||
|
||||
## Quickstart
|
||||
|
||||
Create a `requirements.txt` file with the following dependencies:
|
||||
```text
|
||||
cua-agent
|
||||
cua-computer
|
||||
python-dotenv>=1.0.0
|
||||
```
|
||||
|
||||
And install:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
Create a `.env` file with the following environment variables:
|
||||
|
||||
```text
|
||||
ANTHROPIC_API_KEY=your-api-key
|
||||
CUA_API_KEY=sk_cua-api01...
|
||||
```
|
||||
|
||||
Select the environment you want to run the code in (*click on the underlined values in the code to edit them directly!*):
|
||||
|
||||
<Tabs items={['☁️ Cloud', '🐳 Docker', '🍎 Lume', '🪟 Windows Sandbox']}>
|
||||
<Tab value="☁️ Cloud">
|
||||
|
||||
<EditableCodeBlock
|
||||
key="cloud-tab"
|
||||
lang="python"
|
||||
defaultValues={{
|
||||
"container-name": "m-linux-...",
|
||||
"api_key": "sk_cua-api01..."
|
||||
}}
|
||||
>
|
||||
{`import asyncio
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import traceback
|
||||
|
||||
from agent import ComputerAgent
|
||||
from computer import Computer, VMProviderType
|
||||
from dotenv import load_dotenv
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def handle_sigint(sig, frame):
|
||||
print("\\n\\nExecution interrupted by user. Exiting gracefully...")
|
||||
exit(0)
|
||||
|
||||
|
||||
async def fill_application():
|
||||
try:
|
||||
async with Computer(
|
||||
os_type="linux",
|
||||
provider_type=VMProviderType.CLOUD,
|
||||
name="`}<EditableValue placeholder="container-name" />{`",
|
||||
api_key="`}<EditableValue placeholder="api_key" />{`",
|
||||
verbosity=logging.INFO,
|
||||
) as computer:
|
||||
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer],
|
||||
only_n_most_recent_images=3,
|
||||
verbosity=logging.INFO,
|
||||
trajectory_dir="trajectories",
|
||||
use_prompt_caching=True,
|
||||
max_trajectory_budget=5.0,
|
||||
)
|
||||
|
||||
tasks = [
|
||||
"Visit https://www.overleaf.com/latex/templates/jakes-resume/syzfjbzwjncs.pdf and download the pdf.",
|
||||
"Visit https://form.jotform.com/252881246782264 and fill the form from the information in the pdf."
|
||||
]
|
||||
|
||||
history = []
|
||||
|
||||
for i, task in enumerate(tasks, 1):
|
||||
print(f"\\n[Task {i}/{len(tasks)}] {task}")
|
||||
|
||||
# Add user message to history
|
||||
history.append({"role": "user", "content": task})
|
||||
|
||||
# Run agent with conversation history
|
||||
async for result in agent.run(history, stream=False):
|
||||
history += result.get("output", [])
|
||||
|
||||
# Print output for debugging
|
||||
for item in result.get("output", []):
|
||||
if item.get("type") == "message":
|
||||
content = item.get("content", [])
|
||||
for content_part in content:
|
||||
if content_part.get("text"):
|
||||
logger.info(f"Agent: {content_part.get('text')}")
|
||||
elif item.get("type") == "computer_call":
|
||||
action = item.get("action", {})
|
||||
action_type = action.get("type", "")
|
||||
logger.debug(f"Computer Action: {action_type}")
|
||||
|
||||
print(f"✅ Task {i}/{len(tasks)} completed")
|
||||
|
||||
print("\\n🎉 All tasks completed successfully!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in fill_application: {e}")
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
load_dotenv()
|
||||
|
||||
if "ANTHROPIC_API_KEY" not in os.environ:
|
||||
raise RuntimeError(
|
||||
"Please set the ANTHROPIC_API_KEY environment variable.\\n"
|
||||
"You can add it to a .env file in the project root."
|
||||
)
|
||||
|
||||
if "CUA_API_KEY" not in os.environ:
|
||||
raise RuntimeError(
|
||||
"Please set the CUA_API_KEY environment variable.\\n"
|
||||
"You can add it to a .env file in the project root."
|
||||
)
|
||||
|
||||
signal.signal(signal.SIGINT, handle_sigint)
|
||||
|
||||
asyncio.run(fill_application())
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error running automation: {e}")
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()`}
|
||||
</EditableCodeBlock>
|
||||
|
||||
</Tab>
|
||||
<Tab value="🍎 Lume">
|
||||
|
||||
<EditableCodeBlock
|
||||
key="lume-tab"
|
||||
lang="python"
|
||||
defaultValues={{
|
||||
"container-name": "macos-sequoia-cua:latest"
|
||||
}}
|
||||
>
|
||||
{`import asyncio
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import traceback
|
||||
|
||||
from agent import ComputerAgent
|
||||
from computer import Computer, VMProviderType
|
||||
from dotenv import load_dotenv
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def handle_sigint(sig, frame):
|
||||
print("\\n\\nExecution interrupted by user. Exiting gracefully...")
|
||||
exit(0)
|
||||
|
||||
|
||||
async def fill_application():
|
||||
try:
|
||||
async with Computer(
|
||||
os_type="macos",
|
||||
provider_type=VMProviderType.LUME,
|
||||
name="`}<EditableValue placeholder="container-name" />{`",
|
||||
verbosity=logging.INFO,
|
||||
) as computer:
|
||||
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer],
|
||||
only_n_most_recent_images=3,
|
||||
verbosity=logging.INFO,
|
||||
trajectory_dir="trajectories",
|
||||
use_prompt_caching=True,
|
||||
max_trajectory_budget=5.0,
|
||||
)
|
||||
|
||||
tasks = [
|
||||
"Visit https://www.overleaf.com/latex/templates/jakes-resume/syzfjbzwjncs.pdf and download the pdf.",
|
||||
"Visit https://form.jotform.com/252881246782264 and fill the form from the information in the pdf."
|
||||
]
|
||||
|
||||
history = []
|
||||
|
||||
for i, task in enumerate(tasks, 1):
|
||||
print(f"\\n[Task {i}/{len(tasks)}] {task}")
|
||||
|
||||
# Add user message to history
|
||||
history.append({"role": "user", "content": task})
|
||||
|
||||
# Run agent with conversation history
|
||||
async for result in agent.run(history, stream=False):
|
||||
history += result.get("output", [])
|
||||
|
||||
# Print output for debugging
|
||||
for item in result.get("output", []):
|
||||
if item.get("type") == "message":
|
||||
content = item.get("content", [])
|
||||
for content_part in content:
|
||||
if content_part.get("text"):
|
||||
logger.info(f"Agent: {content_part.get('text')}")
|
||||
elif item.get("type") == "computer_call":
|
||||
action = item.get("action", {})
|
||||
action_type = action.get("type", "")
|
||||
logger.debug(f"Computer Action: {action_type}")
|
||||
|
||||
print(f"✅ Task {i}/{len(tasks)} completed")
|
||||
|
||||
print("\\n🎉 All tasks completed successfully!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in fill_application: {e}")
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
load_dotenv()
|
||||
|
||||
if "ANTHROPIC_API_KEY" not in os.environ:
|
||||
raise RuntimeError(
|
||||
"Please set the ANTHROPIC_API_KEY environment variable.\\n"
|
||||
"You can add it to a .env file in the project root."
|
||||
)
|
||||
|
||||
signal.signal(signal.SIGINT, handle_sigint)
|
||||
|
||||
asyncio.run(fill_application())
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error running automation: {e}")
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()`}
|
||||
</EditableCodeBlock>
|
||||
|
||||
</Tab>
|
||||
<Tab value="🪟 Windows Sandbox">
|
||||
|
||||
<EditableCodeBlock
|
||||
key="windows-tab"
|
||||
lang="python"
|
||||
defaultValues={{}}
|
||||
>
|
||||
{`import asyncio
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import traceback
|
||||
|
||||
from agent import ComputerAgent
|
||||
from computer import Computer, VMProviderType
|
||||
from dotenv import load_dotenv
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def handle_sigint(sig, frame):
|
||||
print("\\n\\nExecution interrupted by user. Exiting gracefully...")
|
||||
exit(0)
|
||||
|
||||
|
||||
async def fill_application():
|
||||
try:
|
||||
async with Computer(
|
||||
os_type="windows",
|
||||
provider_type=VMProviderType.WINDOWS_SANDBOX,
|
||||
verbosity=logging.INFO,
|
||||
) as computer:
|
||||
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer],
|
||||
only_n_most_recent_images=3,
|
||||
verbosity=logging.INFO,
|
||||
trajectory_dir="trajectories",
|
||||
use_prompt_caching=True,
|
||||
max_trajectory_budget=5.0,
|
||||
)
|
||||
|
||||
tasks = [
|
||||
"Visit https://www.overleaf.com/latex/templates/jakes-resume/syzfjbzwjncs.pdf and download the pdf.",
|
||||
"Visit https://form.jotform.com/252881246782264 and fill the form from the information in the pdf."
|
||||
]
|
||||
|
||||
history = []
|
||||
|
||||
for i, task in enumerate(tasks, 1):
|
||||
print(f"\\n[Task {i}/{len(tasks)}] {task}")
|
||||
|
||||
# Add user message to history
|
||||
history.append({"role": "user", "content": task})
|
||||
|
||||
# Run agent with conversation history
|
||||
async for result in agent.run(history, stream=False):
|
||||
history += result.get("output", [])
|
||||
|
||||
# Print output for debugging
|
||||
for item in result.get("output", []):
|
||||
if item.get("type") == "message":
|
||||
content = item.get("content", [])
|
||||
for content_part in content:
|
||||
if content_part.get("text"):
|
||||
logger.info(f"Agent: {content_part.get('text')}")
|
||||
elif item.get("type") == "computer_call":
|
||||
action = item.get("action", {})
|
||||
action_type = action.get("type", "")
|
||||
logger.debug(f"Computer Action: {action_type}")
|
||||
|
||||
print(f"✅ Task {i}/{len(tasks)} completed")
|
||||
|
||||
print("\\n🎉 All tasks completed successfully!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in fill_application: {e}")
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
load_dotenv()
|
||||
|
||||
if "ANTHROPIC_API_KEY" not in os.environ:
|
||||
raise RuntimeError(
|
||||
"Please set the ANTHROPIC_API_KEY environment variable.\\n"
|
||||
"You can add it to a .env file in the project root."
|
||||
)
|
||||
|
||||
signal.signal(signal.SIGINT, handle_sigint)
|
||||
|
||||
asyncio.run(fill_application())
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error running automation: {e}")
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()`}
|
||||
</EditableCodeBlock>
|
||||
|
||||
</Tab>
|
||||
<Tab value="🐳 Docker">
|
||||
|
||||
<EditableCodeBlock
|
||||
key="docker-tab"
|
||||
lang="python"
|
||||
defaultValues={{
|
||||
"container-name": "trycua/cua-ubuntu:latest"
|
||||
}}
|
||||
>
|
||||
{`import asyncio
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import traceback
|
||||
|
||||
from agent import ComputerAgent
|
||||
from computer import Computer, VMProviderType
|
||||
from dotenv import load_dotenv
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def handle_sigint(sig, frame):
|
||||
print("\\n\\nExecution interrupted by user. Exiting gracefully...")
|
||||
exit(0)
|
||||
|
||||
|
||||
async def fill_application():
|
||||
try:
|
||||
async with Computer(
|
||||
os_type="linux",
|
||||
provider_type=VMProviderType.DOCKER,
|
||||
name="`}<EditableValue placeholder="container-name" />{`",
|
||||
verbosity=logging.INFO,
|
||||
) as computer:
|
||||
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer],
|
||||
only_n_most_recent_images=3,
|
||||
verbosity=logging.INFO,
|
||||
trajectory_dir="trajectories",
|
||||
use_prompt_caching=True,
|
||||
max_trajectory_budget=5.0,
|
||||
)
|
||||
|
||||
tasks = [
|
||||
"Visit https://www.overleaf.com/latex/templates/jakes-resume/syzfjbzwjncs.pdf and download the pdf.",
|
||||
"Visit https://form.jotform.com/252881246782264 and fill the form from the information in the pdf."
|
||||
]
|
||||
|
||||
history = []
|
||||
|
||||
for i, task in enumerate(tasks, 1):
|
||||
print(f"\\n[Task {i}/{len(tasks)}] {task}")
|
||||
|
||||
# Add user message to history
|
||||
history.append({"role": "user", "content": task})
|
||||
|
||||
# Run agent with conversation history
|
||||
async for result in agent.run(history, stream=False):
|
||||
history += result.get("output", [])
|
||||
|
||||
# Print output for debugging
|
||||
for item in result.get("output", []):
|
||||
if item.get("type") == "message":
|
||||
content = item.get("content", [])
|
||||
for content_part in content:
|
||||
if content_part.get("text"):
|
||||
logger.info(f"Agent: {content_part.get('text')}")
|
||||
elif item.get("type") == "computer_call":
|
||||
action = item.get("action", {})
|
||||
action_type = action.get("type", "")
|
||||
logger.debug(f"Computer Action: {action_type}")
|
||||
|
||||
print(f"✅ Task {i}/{len(tasks)} completed")
|
||||
|
||||
print("\\n🎉 All tasks completed successfully!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in fill_application: {e}")
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
load_dotenv()
|
||||
|
||||
if "ANTHROPIC_API_KEY" not in os.environ:
|
||||
raise RuntimeError(
|
||||
"Please set the ANTHROPIC_API_KEY environment variable.\\n"
|
||||
"You can add it to a .env file in the project root."
|
||||
)
|
||||
|
||||
signal.signal(signal.SIGINT, handle_sigint)
|
||||
|
||||
asyncio.run(fill_application())
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error running automation: {e}")
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()`}
|
||||
</EditableCodeBlock>
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Next Steps
|
||||
|
||||
- Learn more about [Cua computers](/computer-sdk/computers) and [computer commands](/computer-sdk/commands)
|
||||
- Read about [Agent loops](/agent-sdk/agent-loops), [tools](/agent-sdk/custom-tools), and [supported model providers](/agent-sdk/supported-model-providers/)
|
||||
- Experiment with different [Models and Providers](/agent-sdk/supported-model-providers/)
|
||||
5
docs/content/docs/example-usecases/meta.json
Normal file
5
docs/content/docs/example-usecases/meta.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"title": "Example Use Cases",
|
||||
"description": "Real-world examples of building with Cua",
|
||||
"pages": ["form-filling"]
|
||||
}
|
||||
@@ -12,18 +12,13 @@ Cua is a framework for automating Windows, Mac, and Linux apps powered by comput
|
||||
Cua makes every stage of computer-using agent development simple:
|
||||
|
||||
- **Development**: Use any LLM provider with liteLLM. The agent SDK makes multiple agent loop providers, trajectory tracing, caching, and budget management easy
|
||||
- **Containerization**: cua offers Docker containers pre-installed with everything needed for AI-powered RPA
|
||||
- **Deployment**: cua cloud gives you a production-ready cloud environment for your assistants
|
||||
- **Containerization**: Cua offers Docker containers pre-installed with everything needed for AI-powered RPA
|
||||
- **Deployment**: Cua cloud gives you a production-ready cloud environment for your assistants
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-6 mt-8">
|
||||
<Card icon={<Monitor />} href="/quickstart-ui" title="Quickstart (UI)">
|
||||
Try the cua Agent UI in your browser—no coding required.
|
||||
</Card>
|
||||
<Card icon={<Code />} href="/quickstart-devs" title="Quickstart (Developers)">
|
||||
<Card icon={<Monitor />} href="/quickstart-devs" title="Quickstart (Developers)">
|
||||
Build with Python—full SDK and agent code examples.
|
||||
</Card>
|
||||
</div>
|
||||
<div className="grid grid-cols-1 gap-6 mt-6">
|
||||
<Card icon={<BookOpen />} href="/libraries/agent" title="API Reference">
|
||||
Explore the agent SDK and APIs
|
||||
</Card>
|
||||
|
||||
@@ -17,10 +17,12 @@ Lume follows the XDG Base Directory specification for the configuration file:
|
||||
- Configuration is stored in `$XDG_CONFIG_HOME/lume/config.yaml` (defaults to `~/.config/lume/config.yaml`)
|
||||
|
||||
By default, other data is stored in:
|
||||
|
||||
- VM data: `~/.lume`
|
||||
- Cache files: `~/.lume/cache`
|
||||
|
||||
The config file contains settings for:
|
||||
|
||||
- VM storage locations and the default location
|
||||
- Cache directory location
|
||||
- Whether caching is enabled
|
||||
@@ -88,6 +90,7 @@ lume delete <name>
|
||||
### How to Install macOS from an IPSW Image
|
||||
|
||||
#### Create a new macOS VM using the latest supported IPSW image:
|
||||
|
||||
Run the following command to create a new macOS virtual machine using the latest available IPSW image:
|
||||
|
||||
```bash
|
||||
@@ -95,6 +98,7 @@ lume create <name> --os macos --ipsw latest
|
||||
```
|
||||
|
||||
#### Create a new macOS VM using a specific IPSW image:
|
||||
|
||||
To create a macOS virtual machine from an older or specific IPSW file, first download the desired IPSW (UniversalMac) from a trusted source.
|
||||
|
||||
Then, use the downloaded IPSW path:
|
||||
|
||||
@@ -1,9 +1,3 @@
|
||||
{
|
||||
"pages": [
|
||||
"installation",
|
||||
"prebuilt-images",
|
||||
"cli-reference",
|
||||
"http-api",
|
||||
"faq"
|
||||
]
|
||||
"pages": ["installation", "prebuilt-images", "cli-reference", "http-api", "faq"]
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ docker run -it --rm \
|
||||
-e RAM_SIZE=8192 \
|
||||
trycua/lumier:latest
|
||||
```
|
||||
Access the VM in your browser at [http://localhost:8006](http://localhost:8006).
|
||||
Access the VM in your browser at **http://localhost:8006**.
|
||||
|
||||
After running the command above, you can access your macOS VM through a web browser (e.g., http://localhost:8006).
|
||||
|
||||
|
||||
@@ -1,8 +1,3 @@
|
||||
{
|
||||
"pages": [
|
||||
"installation",
|
||||
"docker",
|
||||
"docker-compose",
|
||||
"building-lumier"
|
||||
]
|
||||
"pages": ["installation", "docker", "docker-compose", "building-lumier"]
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
{
|
||||
"pages": [
|
||||
"installation",
|
||||
"configuration",
|
||||
"usage",
|
||||
"tools",
|
||||
"client-integrations",
|
||||
"llm-integrations"
|
||||
]
|
||||
}
|
||||
"pages": [
|
||||
"installation",
|
||||
"configuration",
|
||||
"usage",
|
||||
"tools",
|
||||
"client-integrations",
|
||||
"llm-integrations"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
{
|
||||
"title": "Home",
|
||||
"description": "Documentation Home",
|
||||
"root": true,
|
||||
"defaultOpen": true,
|
||||
"pages": [
|
||||
"index",
|
||||
"quickstart-ui",
|
||||
"quickstart-cli",
|
||||
"quickstart-devs",
|
||||
"telemetry",
|
||||
"---[BookCopy]Computer Playbook---",
|
||||
"...computer-sdk",
|
||||
"---[BookCopy]Agent Playbook---",
|
||||
"...agent-sdk",
|
||||
"---[CodeXml]API Reference---",
|
||||
"...libraries"
|
||||
]
|
||||
}
|
||||
"title": "Home",
|
||||
"description": "Documentation Home",
|
||||
"root": true,
|
||||
"defaultOpen": true,
|
||||
"pages": [
|
||||
"index",
|
||||
"quickstart-devs",
|
||||
"quickstart-cli",
|
||||
"telemetry",
|
||||
"example-usecases",
|
||||
"---[BookCopy]Computer Playbook---",
|
||||
"...computer-sdk",
|
||||
"---[BookCopy]Agent Playbook---",
|
||||
"...agent-sdk",
|
||||
"---[CodeXml]API Reference---",
|
||||
"...libraries"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
title: Quickstart (CLI)
|
||||
description: Get started with the cua Agent CLI in 4 steps
|
||||
description: Get started with the Cua Agent CLI in 4 steps
|
||||
icon: Rocket
|
||||
---
|
||||
|
||||
@@ -8,14 +8,14 @@ import { Step, Steps } from 'fumadocs-ui/components/steps';
|
||||
import { Tab, Tabs } from 'fumadocs-ui/components/tabs';
|
||||
import { Accordion, Accordions } from 'fumadocs-ui/components/accordion';
|
||||
|
||||
Get up and running with the cua Agent CLI in 4 simple steps.
|
||||
Get up and running with the Cua Agent CLI in 4 simple steps.
|
||||
|
||||
<Steps>
|
||||
<Step>
|
||||
|
||||
## Introduction
|
||||
|
||||
cua combines Computer (interface) + Agent (AI) for automating desktop apps. The Agent CLI provides a clean terminal interface to control your remote computer using natural language commands.
|
||||
Cua combines Computer (interface) + Agent (AI) for automating desktop apps. The Agent CLI provides a clean terminal interface to control your remote computer using natural language commands.
|
||||
|
||||
</Step>
|
||||
|
||||
@@ -23,39 +23,45 @@ cua combines Computer (interface) + Agent (AI) for automating desktop apps. The
|
||||
|
||||
## Set Up Your Computer Environment
|
||||
|
||||
Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup:
|
||||
Choose how you want to run your Cua computer. **Cloud Sandbox is recommended** for the easiest setup:
|
||||
|
||||
<Tabs items={['☁️ Cloud Sandbox (Recommended)', 'Linux on Docker', 'Windows Sandbox', 'macOS VM']}>
|
||||
<Tab value="☁️ Cloud Sandbox (Recommended)">
|
||||
|
||||
**Easiest & safest way to get started - works on any host OS**
|
||||
|
||||
<Tabs items={['☁️ Cloud (Recommended)', 'Lume (macOS Only)', 'Windows Sandbox (Windows Only)', 'Docker (Cross-Platform)']}>
|
||||
<Tab value="☁️ Cloud (Recommended)">
|
||||
|
||||
**Easiest & safest way to get started**
|
||||
|
||||
1. Go to [trycua.com/signin](https://www.trycua.com/signin)
|
||||
2. Navigate to **Dashboard > Containers > Create Instance**
|
||||
3. Create a **Medium, Ubuntu 22** container
|
||||
4. Note your container name and API key
|
||||
|
||||
|
||||
Your cloud container will be automatically configured and ready to use.
|
||||
|
||||
</Tab>
|
||||
<Tab value="Lume (macOS Only)">
|
||||
|
||||
1. Install lume cli
|
||||
<Tab value="Linux on Docker">
|
||||
|
||||
**Run Linux desktop locally on macOS, Windows, or Linux hosts**
|
||||
|
||||
1. Install Docker Desktop or Docker Engine
|
||||
|
||||
2. Pull the CUA XFCE container (lightweight desktop)
|
||||
|
||||
```bash
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
|
||||
docker pull --platform=linux/amd64 trycua/cua-xfce:latest
|
||||
```
|
||||
|
||||
2. Start a local cua container
|
||||
Or use KASM for a full-featured desktop:
|
||||
|
||||
```bash
|
||||
lume run macos-sequoia-cua:latest
|
||||
docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="Windows Sandbox (Windows Only)">
|
||||
|
||||
1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11)
|
||||
<Tab value="Windows Sandbox">
|
||||
|
||||
**Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11**
|
||||
|
||||
1. Enable Windows Sandbox
|
||||
2. Install pywinsandbox dependency
|
||||
|
||||
```bash
|
||||
@@ -65,14 +71,20 @@ Choose how you want to run your cua computer. **Cloud containers are recommended
|
||||
3. Windows Sandbox will be automatically configured when you run the CLI
|
||||
|
||||
</Tab>
|
||||
<Tab value="Docker (Cross-Platform)">
|
||||
|
||||
1. Install Docker Desktop or Docker Engine
|
||||
<Tab value="macOS VM">
|
||||
|
||||
2. Pull the CUA Ubuntu container
|
||||
**macOS hosts only - requires Lume CLI**
|
||||
|
||||
1. Install lume cli
|
||||
|
||||
```bash
|
||||
docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
|
||||
```
|
||||
|
||||
2. Start a local Cua macOS VM
|
||||
|
||||
```bash
|
||||
lume run macos-sequoia-cua:latest
|
||||
```
|
||||
|
||||
</Tab>
|
||||
@@ -82,7 +94,7 @@ Choose how you want to run your cua computer. **Cloud containers are recommended
|
||||
|
||||
<Step>
|
||||
|
||||
## Install cua
|
||||
## Install Cua
|
||||
|
||||
<Accordions type="single" defaultValue="uv">
|
||||
|
||||
@@ -116,7 +128,7 @@ powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | ie
|
||||
|
||||
```bash
|
||||
uv python install 3.12
|
||||
# uv will install cua dependencies automatically when you use --with "cua-agent[cli]"
|
||||
# uv will install Cua dependencies automatically when you use --with "cua-agent[cli]"
|
||||
```
|
||||
|
||||
</Accordion>
|
||||
@@ -166,7 +178,7 @@ conda create -n cua python=3.12
|
||||
conda activate cua
|
||||
```
|
||||
|
||||
### Install cua
|
||||
### Install Cua
|
||||
|
||||
```bash
|
||||
pip install "cua-agent[cli]" cua-computer
|
||||
@@ -176,7 +188,7 @@ pip install "cua-agent[cli]" cua-computer
|
||||
|
||||
<Accordion title="pip" value="pip">
|
||||
|
||||
### Install cua
|
||||
### Install Cua
|
||||
|
||||
```bash
|
||||
pip install "cua-agent[cli]" cua-computer
|
||||
@@ -190,7 +202,7 @@ pip install "cua-agent[cli]" cua-computer
|
||||
|
||||
<Step>
|
||||
|
||||
## Run cua CLI
|
||||
## Run Cua CLI
|
||||
|
||||
Choose your preferred AI model:
|
||||
|
||||
@@ -219,6 +231,7 @@ python -m agent.cli openai/computer-use-preview
|
||||
<Tab value="uv">
|
||||
|
||||
```bash
|
||||
uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-sonnet-4-5-20250929
|
||||
uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-20250514
|
||||
uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-1-20250805
|
||||
uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-sonnet-4-20250514
|
||||
@@ -229,6 +242,7 @@ uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-3-5-sonnet-20241022
|
||||
<Tab value="conda/pip">
|
||||
|
||||
```bash
|
||||
python -m agent.cli anthropic/claude-sonnet-4-5-20250929
|
||||
python -m agent.cli anthropic/claude-opus-4-1-20250805
|
||||
python -m agent.cli anthropic/claude-opus-4-20250514
|
||||
python -m agent.cli anthropic/claude-sonnet-4-20250514
|
||||
@@ -298,8 +312,8 @@ python -m agent.cli omniparser+ollama_chat/llama3.2:latest
|
||||
|
||||
If you haven't set up environment variables, the CLI will guide you through the setup:
|
||||
|
||||
1. **Container Name**: Enter your cua container name (or get one at [trycua.com](https://www.trycua.com/))
|
||||
2. **CUA API Key**: Enter your cua API key
|
||||
1. **Sandbox Name**: Enter your Cua sandbox name (or get one at [trycua.com](https://www.trycua.com/))
|
||||
2. **CUA API Key**: Enter your Cua API key
|
||||
3. **Provider API Key**: Enter your AI provider API key (OpenAI, Anthropic, etc.)
|
||||
|
||||
### Start Chatting
|
||||
@@ -326,6 +340,4 @@ You can ask your agent to perform actions like:
|
||||
|
||||
---
|
||||
|
||||
For advanced Python usage and GUI interface, see the [Quickstart (GUI)](/quickstart-ui) and [Quickstart for Developers](/quickstart-devs).
|
||||
|
||||
For running models locally, see [Running Models Locally](/agent-sdk/local-models).
|
||||
For running models locally, see [Running Models Locally](/agent-sdk/supported-model-providers/local-models).
|
||||
|
||||
@@ -1,61 +1,60 @@
|
||||
---
|
||||
title: Quickstart (for Developers)
|
||||
description: Get started with cua in 5 steps
|
||||
title: Quickstart
|
||||
description: Get started with Cua in three steps
|
||||
icon: Rocket
|
||||
---
|
||||
|
||||
import { Step, Steps } from 'fumadocs-ui/components/steps';
|
||||
import { Tab, Tabs } from 'fumadocs-ui/components/tabs';
|
||||
|
||||
Get up and running with cua in 5 simple steps.
|
||||
This quickstart guides you through setting up your [computer environment](#set-up-your-computer-environment), programmatic control with a [Cua computer](#using-computer), and task automation with a [Cua agent](#using-agent):
|
||||
|
||||
<Steps>
|
||||
<Step>
|
||||
|
||||
## Introduction
|
||||
|
||||
cua combines Computer (interface) + Agent (AI) for automating desktop apps. Computer handles clicks/typing, Agent provides the intelligence.
|
||||
|
||||
</Step>
|
||||
|
||||
<Step>
|
||||
|
||||
## Set Up Your Computer Environment
|
||||
|
||||
Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup:
|
||||
Choose how you want to run your Cua computer. This will be the environment where your automated tasks will execute.
|
||||
|
||||
You can run your Cua computer in the cloud (recommended for easiest setup), locally on macOS with Lume, locally on Windows with a Windows Sandbox, or in a Docker container on any platform. Choose the option that matches your system and needs.
|
||||
|
||||
<Tabs items={['☁️ Cloud', '🐳 Docker', '🍎 Lume', '🪟 Windows Sandbox']}>
|
||||
<Tab value="☁️ Cloud">
|
||||
|
||||
Cua Cloud Sandbox provides virtual machines that run Ubuntu.
|
||||
|
||||
<Tabs items={['☁️ Cloud (Recommended)', 'Lume (macOS Only)', 'Windows Sandbox (Windows Only)', 'Docker (Cross-Platform)']}>
|
||||
<Tab value="☁️ Cloud (Recommended)">
|
||||
|
||||
**Easiest & safest way to get started**
|
||||
|
||||
1. Go to [trycua.com/signin](https://www.trycua.com/signin)
|
||||
2. Navigate to **Dashboard > Containers > Create Instance**
|
||||
3. Create a **Medium, Ubuntu 22** container
|
||||
4. Note your container name and API key
|
||||
|
||||
Your cloud container will be automatically configured and ready to use.
|
||||
3. Create a **Medium, Ubuntu 22** sandbox
|
||||
4. Note your sandbox name and API key
|
||||
|
||||
Your Cloud Sandbox will be automatically configured and ready to use.
|
||||
|
||||
</Tab>
|
||||
<Tab value="Lume (macOS Only)">
|
||||
<Tab value="🍎 Lume">
|
||||
|
||||
Lume containers are macOS virtual machines that run on a macOS host machine.
|
||||
|
||||
1. Install lume cli
|
||||
1. Install the Lume CLI:
|
||||
|
||||
```bash
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
|
||||
```
|
||||
|
||||
2. Start a local cua container
|
||||
2. Start a local Cua sandbox:
|
||||
|
||||
```bash
|
||||
lume run macos-sequoia-cua:latest
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="Windows Sandbox (Windows Only)">
|
||||
<Tab value="🪟 Windows Sandbox">
|
||||
|
||||
Windows Sandbox provides Windows virtual environments that run on a Windows host machine.
|
||||
|
||||
1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11)
|
||||
2. Install pywinsandbox dependency
|
||||
1. Enable [Windows Sandbox](https://learn.microsoft.com/en-us/windows/security/application-security/application-isolation/windows-sandbox/windows-sandbox-install) (requires Windows 10 Pro/Enterprise or Windows 11)
|
||||
2. Install the `pywinsandbox` dependency:
|
||||
|
||||
```bash
|
||||
pip install -U git+git://github.com/karkason/pywinsandbox.git
|
||||
@@ -64,11 +63,13 @@ Choose how you want to run your cua computer. **Cloud containers are recommended
|
||||
3. Windows Sandbox will be automatically configured when you run the CLI
|
||||
|
||||
</Tab>
|
||||
<Tab value="Docker (Cross-Platform)">
|
||||
|
||||
1. Install Docker Desktop or Docker Engine
|
||||
<Tab value="🐳 Docker">
|
||||
|
||||
2. Pull the CUA Ubuntu container
|
||||
Docker provides a way to run Ubuntu containers on any host machine.
|
||||
|
||||
1. Install Docker Desktop or Docker Engine:
|
||||
|
||||
2. Pull the CUA Ubuntu sandbox:
|
||||
|
||||
```bash
|
||||
docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest
|
||||
@@ -81,90 +82,203 @@ Choose how you want to run your cua computer. **Cloud containers are recommended
|
||||
|
||||
<Step>
|
||||
|
||||
## Install cua
|
||||
## Using Computer
|
||||
|
||||
Connect to your Cua computer and perform basic interactions, such as taking screenshots or simulating user input.
|
||||
|
||||
<Tabs items={['Python', 'TypeScript']}>
|
||||
<Tab value="Python">
|
||||
Install the Cua computer Python SDK:
|
||||
```bash
|
||||
pip install "cua-agent[all]" cua-computer
|
||||
pip install cua-computer
|
||||
```
|
||||
|
||||
# or install specific providers
|
||||
pip install "cua-agent[openai]" # OpenAI computer-use-preview support
|
||||
pip install "cua-agent[anthropic]" # Anthropic Claude support
|
||||
pip install "cua-agent[omni]" # Omniparser + any LLM support
|
||||
pip install "cua-agent[uitars]" # UI-TARS
|
||||
pip install "cua-agent[uitars-mlx]" # UI-TARS + MLX support
|
||||
pip install "cua-agent[uitars-hf]" # UI-TARS + Huggingface support
|
||||
pip install "cua-agent[glm45v-hf]" # GLM-4.5V + Huggingface support
|
||||
pip install "cua-agent[ui]" # Gradio UI support
|
||||
Then, connect to your desired computer environment:
|
||||
|
||||
<Tabs items={['☁️ Cloud', '🐳 Docker', '🍎 Lume', '🪟 Windows Sandbox', '🖥️ Host Desktop']}>
|
||||
<Tab value="☁️ Cloud">
|
||||
```python
|
||||
from computer import Computer
|
||||
|
||||
computer = Computer(
|
||||
os_type="linux",
|
||||
provider_type="cloud",
|
||||
name="your-sandbox-name",
|
||||
api_key="your-api-key"
|
||||
)
|
||||
await computer.run() # Connect to the sandbox
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="🍎 Lume">
|
||||
```python
|
||||
from computer import Computer
|
||||
|
||||
computer = Computer(
|
||||
os_type="macos",
|
||||
provider_type="lume",
|
||||
name="macos-sequoia-cua:latest"
|
||||
)
|
||||
await computer.run() # Launch & connect to the container
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="🪟 Windows Sandbox">
|
||||
```python
|
||||
from computer import Computer
|
||||
|
||||
computer = Computer(
|
||||
os_type="windows",
|
||||
provider_type="windows_sandbox"
|
||||
)
|
||||
await computer.run() # Launch & connect to the container
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="🐳 Docker">
|
||||
```python
|
||||
from computer import Computer
|
||||
|
||||
computer = Computer(
|
||||
os_type="linux",
|
||||
provider_type="docker",
|
||||
name="trycua/cua-ubuntu:latest"
|
||||
)
|
||||
await computer.run() # Launch & connect to the container
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="🖥️ Host Desktop">
|
||||
Install and run `cua-computer-server`:
|
||||
```bash
|
||||
pip install cua-computer-server
|
||||
python -m computer_server
|
||||
```
|
||||
|
||||
Then, use the `Computer` object to connect:
|
||||
```python
|
||||
from computer import Computer
|
||||
|
||||
computer = Computer(use_host_computer_server=True)
|
||||
await computer.run() # Connect to the host desktop
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
Once connected, you can perform interactions:
|
||||
```python
|
||||
try:
|
||||
# Take a screenshot of the computer's current display
|
||||
screenshot = await computer.interface.screenshot()
|
||||
# Simulate a left-click at coordinates (100, 100)
|
||||
await computer.interface.left_click(100, 100)
|
||||
# Type "Hello!" into the active application
|
||||
await computer.interface.type("Hello!")
|
||||
finally:
|
||||
await computer.close()
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="TypeScript">
|
||||
Install the Cua computer TypeScript SDK:
|
||||
```bash
|
||||
npm install @trycua/computer
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
</Step>
|
||||
Then, connect to your desired computer environment:
|
||||
|
||||
<Step>
|
||||
<Tabs items={['☁️ Cloud','🐳 Docker', '🍎 Lume', '🪟 Windows Sandbox', '🖥️ Host Desktop']}>
|
||||
<Tab value="☁️ Cloud">
|
||||
```typescript
|
||||
import { Computer, OSType } from '@trycua/computer';
|
||||
|
||||
## Using Computer
|
||||
const computer = new Computer({
|
||||
osType: OSType.LINUX,
|
||||
name: "your-sandbox-name",
|
||||
apiKey: "your-api-key"
|
||||
});
|
||||
await computer.run(); // Connect to the sandbox
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="🍎 Lume">
|
||||
```typescript
|
||||
import { Computer, OSType, ProviderType } from '@trycua/computer';
|
||||
|
||||
<Tabs items={['Python', 'TypeScript']}>
|
||||
<Tab value="Python">
|
||||
```python
|
||||
from computer import Computer
|
||||
const computer = new Computer({
|
||||
osType: OSType.MACOS,
|
||||
providerType: ProviderType.LUME,
|
||||
name: "macos-sequoia-cua:latest"
|
||||
});
|
||||
await computer.run(); // Launch & connect to the container
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="🪟 Windows Sandbox">
|
||||
```typescript
|
||||
import { Computer, OSType, ProviderType } from '@trycua/computer';
|
||||
|
||||
async with Computer(
|
||||
os_type="linux",
|
||||
provider_type="cloud",
|
||||
name="your-container-name",
|
||||
api_key="your-api-key"
|
||||
) as computer:
|
||||
# Take screenshot
|
||||
screenshot = await computer.interface.screenshot()
|
||||
const computer = new Computer({
|
||||
osType: OSType.WINDOWS,
|
||||
providerType: ProviderType.WINDOWS_SANDBOX
|
||||
});
|
||||
await computer.run(); // Launch & connect to the container
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="🐳 Docker">
|
||||
```typescript
|
||||
import { Computer, OSType, ProviderType } from '@trycua/computer';
|
||||
|
||||
# Click and type
|
||||
await computer.interface.left_click(100, 100)
|
||||
await computer.interface.type("Hello!")
|
||||
```
|
||||
const computer = new Computer({
|
||||
osType: OSType.LINUX,
|
||||
providerType: ProviderType.DOCKER,
|
||||
name: "trycua/cua-ubuntu:latest"
|
||||
});
|
||||
await computer.run(); // Launch & connect to the container
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="🖥️ Host Desktop">
|
||||
First, install and run `cua-computer-server`:
|
||||
```bash
|
||||
pip install cua-computer-server
|
||||
python -m computer_server
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="TypeScript">
|
||||
Then, use the `Computer` object to connect:
|
||||
```typescript
|
||||
import { Computer } from '@trycua/computer';
|
||||
|
||||
const computer = new Computer({ useHostComputerServer: true });
|
||||
await computer.run(); // Connect to the host desktop
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
Once connected, you can perform interactions:
|
||||
```typescript
|
||||
import { Computer, OSType } from '@trycua/computer';
|
||||
|
||||
const computer = new Computer({
|
||||
osType: OSType.LINUX,
|
||||
name: "your-container-name",
|
||||
apiKey: "your-api-key"
|
||||
});
|
||||
|
||||
await computer.run();
|
||||
|
||||
try {
|
||||
// Take screenshot
|
||||
// Take a screenshot of the computer's current display
|
||||
const screenshot = await computer.interface.screenshot();
|
||||
|
||||
// Click and type
|
||||
// Simulate a left-click at coordinates (100, 100)
|
||||
await computer.interface.leftClick(100, 100);
|
||||
// Type "Hello!" into the active application
|
||||
await computer.interface.typeText("Hello!");
|
||||
} finally {
|
||||
await computer.close();
|
||||
}
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
Learn more about computers in the [Cua computers documentation](/computer-sdk/computers). You will see how to automate computers with agents in the next step.
|
||||
|
||||
</Step>
|
||||
|
||||
<Step>
|
||||
|
||||
## Using Agent
|
||||
|
||||
Utilize an Agent to automate complex tasks by providing it with a goal and allowing it to interact with the computer environment.
|
||||
|
||||
Install the Cua agent Python SDK:
|
||||
```bash
|
||||
pip install "cua-agent[all]"
|
||||
```
|
||||
|
||||
Then, use the `ComputerAgent` object:
|
||||
```python
|
||||
from agent import ComputerAgent
|
||||
|
||||
@@ -182,12 +296,14 @@ async for result in agent.run(messages):
|
||||
print(item["content"][0]["text"])
|
||||
```
|
||||
|
||||
Learn more about agents in [Agent Loops](/agent-sdk/agent-loops) and available models in [Supported Models](/agent-sdk/supported-model-providers/).
|
||||
|
||||
</Step>
|
||||
</Steps>
|
||||
|
||||
## Next Steps
|
||||
|
||||
{/* - Explore the [SDK documentation](/sdk) for advanced features */}
|
||||
|
||||
- Learn about [trajectory tracking](/agent-sdk/callbacks/trajectories) and [callbacks](/agent-sdk/callbacks/agent-lifecycle)
|
||||
- Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for support
|
||||
- Learn more about [Cua computers](/computer-sdk/computers) and [computer commands](/computer-sdk/commands)
|
||||
- Read about [Agent loops](/agent-sdk/agent-loops), [tools](/agent-sdk/custom-tools), and [supported model providers](/agent-sdk/supported-model-providers/)
|
||||
- Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for help
|
||||
- Try out [Form Filling](/example-usecases/form-filling) preset usecase
|
||||
|
||||
@@ -1,216 +0,0 @@
|
||||
---
|
||||
title: Quickstart (GUI)
|
||||
description: Get started with the cua Agent UI in 3 steps
|
||||
icon: Rocket
|
||||
---
|
||||
|
||||
import { Step, Steps } from 'fumadocs-ui/components/steps';
|
||||
import { Tab, Tabs } from 'fumadocs-ui/components/tabs';
|
||||
import { Accordion, Accordions } from 'fumadocs-ui/components/accordion';
|
||||
|
||||
Get up and running with the cua Agent UI in 3 simple steps.
|
||||
|
||||
<Steps>
|
||||
<Step>
|
||||
|
||||
## Introduction
|
||||
|
||||
cua combines Computer (interface) + Agent (AI) for automating desktop apps. The Agent UI provides a simple chat interface to control your remote computer using natural language.
|
||||
|
||||
</Step>
|
||||
|
||||
<Step>
|
||||
|
||||
## Set Up Your Computer Environment
|
||||
|
||||
Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup:
|
||||
|
||||
<Tabs items={['☁️ Cloud (Recommended)', 'Lume (macOS Only)', 'Windows Sandbox (Windows Only)', 'Docker (Cross-Platform)']}>
|
||||
<Tab value="☁️ Cloud (Recommended)">
|
||||
|
||||
**Easiest & safest way to get started**
|
||||
|
||||
1. Go to [trycua.com/signin](https://www.trycua.com/signin)
|
||||
2. Navigate to **Dashboard > Containers > Create Instance**
|
||||
3. Create a **Medium, Ubuntu 22** container
|
||||
4. Note your container name and API key
|
||||
|
||||
Your cloud container will be automatically configured and ready to use.
|
||||
|
||||
</Tab>
|
||||
<Tab value="Lume (macOS Only)">
|
||||
|
||||
1. Install lume cli
|
||||
|
||||
```bash
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
|
||||
```
|
||||
|
||||
2. Start a local cua container
|
||||
|
||||
```bash
|
||||
lume run macos-sequoia-cua:latest
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="Windows Sandbox (Windows Only)">
|
||||
|
||||
1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11)
|
||||
2. Install pywinsandbox dependency
|
||||
|
||||
```bash
|
||||
pip install -U git+git://github.com/karkason/pywinsandbox.git
|
||||
```
|
||||
|
||||
3. Windows Sandbox will be automatically configured when you run the CLI
|
||||
|
||||
</Tab>
|
||||
<Tab value="Docker (Cross-Platform)">
|
||||
|
||||
1. Install Docker Desktop or Docker Engine
|
||||
|
||||
2. Pull the CUA Ubuntu container
|
||||
|
||||
```bash
|
||||
docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
</Step>
|
||||
|
||||
<Step>
|
||||
|
||||
## Install and Run cua
|
||||
|
||||
<Accordions type="single" defaultValue="uv">
|
||||
|
||||
<Accordion title="uv (Recommended)" value="uv">
|
||||
|
||||
### Install uv
|
||||
|
||||
<Tabs items={['macOS / Linux', 'Windows']} persist>
|
||||
<Tab value="macOS / Linux">
|
||||
|
||||
```bash
|
||||
# Use curl to download the script and execute it with sh:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
# If your system doesn't have curl, you can use wget:
|
||||
# wget -qO- https://astral.sh/uv/install.sh | sh
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="Windows">
|
||||
|
||||
```powershell
|
||||
# Use irm to download the script and execute it with iex:
|
||||
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
### Install Python 3.12
|
||||
|
||||
```bash
|
||||
uv python install 3.12
|
||||
```
|
||||
|
||||
### Run cua
|
||||
|
||||
```bash
|
||||
uv run --with "cua-agent[ui]" -m agent.ui
|
||||
```
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="conda" value="conda">
|
||||
|
||||
### Install conda
|
||||
|
||||
<Tabs items={['macOS', 'Linux', 'Windows']} persist>
|
||||
<Tab value="macOS">
|
||||
|
||||
```bash
|
||||
mkdir -p ~/miniconda3
|
||||
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh -o ~/miniconda3/miniconda.sh
|
||||
bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
|
||||
rm ~/miniconda3/miniconda.sh
|
||||
source ~/miniconda3/bin/activate
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="Linux">
|
||||
|
||||
```bash
|
||||
mkdir -p ~/miniconda3
|
||||
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
|
||||
bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
|
||||
rm ~/miniconda3/miniconda.sh
|
||||
source ~/miniconda3/bin/activate
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="Windows">
|
||||
|
||||
```powershell
|
||||
wget "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe" -outfile ".\miniconda.exe"
|
||||
Start-Process -FilePath ".\miniconda.exe" -ArgumentList "/S" -Wait
|
||||
del .\miniconda.exe
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
### Create and activate Python 3.12 environment
|
||||
|
||||
```bash
|
||||
conda create -n cua python=3.12
|
||||
conda activate cua
|
||||
```
|
||||
|
||||
### Install and run cua
|
||||
|
||||
```bash
|
||||
pip install "cua-agent[ui]" cua-computer
|
||||
python -m agent.ui
|
||||
```
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="pip" value="pip">
|
||||
|
||||
### Install cua
|
||||
|
||||
```bash
|
||||
pip install "cua-agent[ui]" cua-computer
|
||||
```
|
||||
|
||||
### Run the Agent UI
|
||||
|
||||
```bash
|
||||
python -m agent.ui
|
||||
```
|
||||
|
||||
</Accordion>
|
||||
|
||||
</Accordions>
|
||||
|
||||
### Start Chatting
|
||||
|
||||
Open your browser to the displayed URL and start chatting with your computer-using agent.
|
||||
|
||||
You can ask your agent to perform actions like:
|
||||
|
||||
- "Open Firefox and go to github.com"
|
||||
- "Take a screenshot and tell me what's on the screen"
|
||||
- "Type 'Hello world' into the terminal"
|
||||
|
||||
</Step>
|
||||
</Steps>
|
||||
|
||||
---
|
||||
|
||||
For advanced Python usage, see the [Quickstart for Developers](/quickstart-devs).
|
||||
@@ -16,6 +16,7 @@
|
||||
"mermaid": "^11.8.1",
|
||||
"next": "15.3.3",
|
||||
"next-themes": "^0.4.6",
|
||||
"posthog-js": "^1.276.0",
|
||||
"react": "^19.1.0",
|
||||
"react-dom": "^19.1.0",
|
||||
"remark": "^15.0.1",
|
||||
@@ -42,4 +43,4 @@
|
||||
"sharp"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
47
docs/pnpm-lock.yaml
generated
47
docs/pnpm-lock.yaml
generated
@@ -29,6 +29,9 @@ importers:
|
||||
next-themes:
|
||||
specifier: ^0.4.6
|
||||
version: 0.4.6(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
|
||||
posthog-js:
|
||||
specifier: ^1.276.0
|
||||
version: 1.276.0
|
||||
react:
|
||||
specifier: ^19.1.0
|
||||
version: 19.1.0
|
||||
@@ -489,6 +492,9 @@ packages:
|
||||
resolution: {integrity: sha512-6yB0117ZjsgNevZw3LP+bkrZa9mU/POPVaXgzMPOBbBc35w2P3R+1vMMhEfC06kYCpd5bf0jodBaTkYQW5TVeQ==}
|
||||
engines: {node: '>= 20.0.0'}
|
||||
|
||||
'@posthog/core@1.3.0':
|
||||
resolution: {integrity: sha512-hxLL8kZNHH098geedcxCz8y6xojkNYbmJEW+1vFXsmPcExyCXIUUJ/34X6xa9GcprKxd0Wsx3vfJQLQX4iVPhw==}
|
||||
|
||||
'@radix-ui/number@1.1.1':
|
||||
resolution: {integrity: sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==}
|
||||
|
||||
@@ -1221,6 +1227,9 @@ packages:
|
||||
confbox@0.2.2:
|
||||
resolution: {integrity: sha512-1NB+BKqhtNipMsov4xI/NnhCKp9XG9NamYp5PVm9klAT0fsrNPjaFICsCFhNhwZJKNh7zB/3q8qXz0E9oaMNtQ==}
|
||||
|
||||
core-js@3.46.0:
|
||||
resolution: {integrity: sha512-vDMm9B0xnqqZ8uSBpZ8sNtRtOdmfShrvT6h2TuQGLs0Is+cR0DYbj/KWP6ALVNbWPpqA/qPLoOuppJN07humpA==}
|
||||
|
||||
cose-base@1.0.3:
|
||||
resolution: {integrity: sha512-s9whTXInMSgAp/NVXVNuVxVKzGH2qck3aQlVHxDCdAEPgtMKwc4Wq6/QKhgdEdgbLSi9rBTAcPoRa6JpiG4ksg==}
|
||||
|
||||
@@ -1492,6 +1501,9 @@ packages:
|
||||
picomatch:
|
||||
optional: true
|
||||
|
||||
fflate@0.4.8:
|
||||
resolution: {integrity: sha512-FJqqoDBR00Mdj9ppamLa/Y7vxm+PRmNWA67N846RvsoYVMKB4q3y/de5PA7gUmRMYK/8CMz2GDZQmCRN1wBcWA==}
|
||||
|
||||
fumadocs-core@15.5.1:
|
||||
resolution: {integrity: sha512-5eJPJw+BFWFdgrtWPQ9aAZAhhsyuZAwth8OjBd9R77sXoIoae4Y4lJZMq3BeSpJZcuIAOVbSCS+pJhsBAoXJ8g==}
|
||||
peerDependencies:
|
||||
@@ -2012,6 +2024,20 @@ packages:
|
||||
resolution: {integrity: sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==}
|
||||
engines: {node: ^10 || ^12 || >=14}
|
||||
|
||||
posthog-js@1.276.0:
|
||||
resolution: {integrity: sha512-FYZE1037LrAoKKeUU0pUL7u8WwNK2BVeg5TFApwquVPUdj9h7u5Z077A313hPN19Ar+7Y+VHxqYqdHc4VNsVgw==}
|
||||
peerDependencies:
|
||||
'@rrweb/types': 2.0.0-alpha.17
|
||||
rrweb-snapshot: 2.0.0-alpha.17
|
||||
peerDependenciesMeta:
|
||||
'@rrweb/types':
|
||||
optional: true
|
||||
rrweb-snapshot:
|
||||
optional: true
|
||||
|
||||
preact@10.27.2:
|
||||
resolution: {integrity: sha512-5SYSgFKSyhCbk6SrXyMpqjb5+MQBgfvEKE/OC+PujcY34sOpqtr+0AZQtPYx5IA6VxynQ7rUPCtKzyovpj9Bpg==}
|
||||
|
||||
prettier@3.6.2:
|
||||
resolution: {integrity: sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==}
|
||||
engines: {node: '>=14'}
|
||||
@@ -2317,6 +2343,9 @@ packages:
|
||||
vscode-uri@3.0.8:
|
||||
resolution: {integrity: sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw==}
|
||||
|
||||
web-vitals@4.2.4:
|
||||
resolution: {integrity: sha512-r4DIlprAGwJ7YM11VZp4R884m0Vmgr6EAKe3P+kO0PPj3Unqyvv59rczf6UiGcb9Z8QxZVcqKNwv/g0WNdWwsw==}
|
||||
|
||||
yallist@5.0.0:
|
||||
resolution: {integrity: sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==}
|
||||
engines: {node: '>=18'}
|
||||
@@ -2642,6 +2671,8 @@ snapshots:
|
||||
|
||||
'@orama/orama@3.1.7': {}
|
||||
|
||||
'@posthog/core@1.3.0': {}
|
||||
|
||||
'@radix-ui/number@1.1.1': {}
|
||||
|
||||
'@radix-ui/primitive@1.1.2': {}
|
||||
@@ -3378,6 +3409,8 @@ snapshots:
|
||||
|
||||
confbox@0.2.2: {}
|
||||
|
||||
core-js@3.46.0: {}
|
||||
|
||||
cose-base@1.0.3:
|
||||
dependencies:
|
||||
layout-base: 1.0.2
|
||||
@@ -3702,6 +3735,8 @@ snapshots:
|
||||
optionalDependencies:
|
||||
picomatch: 4.0.2
|
||||
|
||||
fflate@0.4.8: {}
|
||||
|
||||
fumadocs-core@15.5.1(@types/react@19.1.8)(next@15.3.3(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-dom@19.1.0(react@19.1.0))(react@19.1.0):
|
||||
dependencies:
|
||||
'@formatjs/intl-localematcher': 0.6.1
|
||||
@@ -4566,6 +4601,16 @@ snapshots:
|
||||
picocolors: 1.1.1
|
||||
source-map-js: 1.2.1
|
||||
|
||||
posthog-js@1.276.0:
|
||||
dependencies:
|
||||
'@posthog/core': 1.3.0
|
||||
core-js: 3.46.0
|
||||
fflate: 0.4.8
|
||||
preact: 10.27.2
|
||||
web-vitals: 4.2.4
|
||||
|
||||
preact@10.27.2: {}
|
||||
|
||||
prettier@3.6.2: {}
|
||||
|
||||
property-information@7.1.0: {}
|
||||
@@ -4934,6 +4979,8 @@ snapshots:
|
||||
|
||||
vscode-uri@3.0.8: {}
|
||||
|
||||
web-vitals@4.2.4: {}
|
||||
|
||||
yallist@5.0.0: {}
|
||||
|
||||
zod@3.25.76: {}
|
||||
|
||||
@@ -1,9 +1,4 @@
|
||||
import {
|
||||
defineConfig,
|
||||
defineDocs,
|
||||
frontmatterSchema,
|
||||
metaSchema,
|
||||
} from 'fumadocs-mdx/config';
|
||||
import { defineConfig, defineDocs, frontmatterSchema, metaSchema } from 'fumadocs-mdx/config';
|
||||
import { z } from 'zod';
|
||||
|
||||
// You can customise Zod schemas for frontmatter and `meta.json` here
|
||||
|
||||
@@ -1,27 +1,17 @@
|
||||
import { getApiVersions, source } from '@/lib/source';
|
||||
import { getMDXComponents } from '@/mdx-components';
|
||||
import { buttonVariants } from 'fumadocs-ui/components/ui/button';
|
||||
import {
|
||||
Popover,
|
||||
PopoverContent,
|
||||
PopoverTrigger,
|
||||
} from 'fumadocs-ui/components/ui/popover';
|
||||
import { Popover, PopoverContent, PopoverTrigger } from 'fumadocs-ui/components/ui/popover';
|
||||
import { createRelativeLink } from 'fumadocs-ui/mdx';
|
||||
import {
|
||||
DocsBody,
|
||||
DocsDescription,
|
||||
DocsPage,
|
||||
DocsTitle,
|
||||
} from 'fumadocs-ui/page';
|
||||
import { DocsBody, DocsDescription, DocsPage, DocsTitle } from 'fumadocs-ui/page';
|
||||
import { cn } from 'fumadocs-ui/utils/cn';
|
||||
import { ChevronDown, CodeXml, ExternalLink } from 'lucide-react';
|
||||
import type { Metadata } from 'next';
|
||||
import Link from 'next/link';
|
||||
import { notFound, redirect } from 'next/navigation';
|
||||
import { PageFeedback } from '@/components/page-feedback';
|
||||
|
||||
export default async function Page(props: {
|
||||
params: Promise<{ slug?: string[] }>;
|
||||
}) {
|
||||
export default async function Page(props: { params: Promise<{ slug?: string[] }> }) {
|
||||
const params = await props.params;
|
||||
const slug = params.slug || [];
|
||||
const page = source.getPage(slug);
|
||||
@@ -65,7 +55,8 @@ export default async function Page(props: {
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="currentColor"
|
||||
className="h-5"
|
||||
viewBox="0 0 448 512">
|
||||
viewBox="0 0 448 512"
|
||||
>
|
||||
<title>Windows</title>
|
||||
<path d="M0 93.7l183.6-25.3v177.4H0V93.7zm0 324.6l183.6 25.3V268.4H0v149.9zm203.8 28L448 480V268.4H203.8v177.9zm0-380.6v180.1H448V32L203.8 65.7z" />
|
||||
</svg>
|
||||
@@ -75,7 +66,8 @@ export default async function Page(props: {
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="currentColor"
|
||||
className="h-5"
|
||||
viewBox="0 0 384 512">
|
||||
viewBox="0 0 384 512"
|
||||
>
|
||||
<title>macOS</title>
|
||||
<path d="M318.7 268.7c-.2-36.7 16.4-64.4 50-84.8-18.8-26.9-47.2-41.7-84.7-44.6-35.5-2.8-74.3 20.7-88.5 20.7-15 0-49.4-19.7-76.4-19.7C63.3 141.2 4 184.8 4 273.5q0 39.3 14.4 81.2c12.8 36.7 59 126.7 107.2 125.2 25.2-.6 43-17.9 75.8-17.9 31.8 0 48.3 17.9 76.4 17.9 48.6-.7 90.4-82.5 102.6-119.3-65.2-30.7-61.7-90-61.7-91.9zm-56.6-164.2c27.3-32.4 24.8-61.9 24-72.5-24.1 1.4-52 16.4-67.9 34.9-17.5 19.8-27.8 44.3-25.6 71.9 26.1 2 49.9-11.4 69.5-34.3z" />
|
||||
</svg>
|
||||
@@ -85,7 +77,8 @@ export default async function Page(props: {
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="currentColor"
|
||||
className="h-5"
|
||||
viewBox="0 0 448 512">
|
||||
viewBox="0 0 448 512"
|
||||
>
|
||||
<title>Linux</title>
|
||||
<path d="M220.8 123.3c1 .5 1.8 1.7 3 1.7 1.1 0 2.8-.4 2.9-1.5 .2-1.4-1.9-2.3-3.2-2.9-1.7-.7-3.9-1-5.5-.1-.4 .2-.8 .7-.6 1.1 .3 1.3 2.3 1.1 3.4 1.7zm-21.9 1.7c1.2 0 2-1.2 3-1.7 1.1-.6 3.1-.4 3.5-1.6 .2-.4-.2-.9-.6-1.1-1.6-.9-3.8-.6-5.5 .1-1.3 .6-3.4 1.5-3.2 2.9 .1 1 1.8 1.5 2.8 1.4zM420 403.8c-3.6-4-5.3-11.6-7.2-19.7-1.8-8.1-3.9-16.8-10.5-22.4-1.3-1.1-2.6-2.1-4-2.9-1.3-.8-2.7-1.5-4.1-2 9.2-27.3 5.6-54.5-3.7-79.1-11.4-30.1-31.3-56.4-46.5-74.4-17.1-21.5-33.7-41.9-33.4-72C311.1 85.4 315.7 .1 234.8 0 132.4-.2 158 103.4 156.9 135.2c-1.7 23.4-6.4 41.8-22.5 64.7-18.9 22.5-45.5 58.8-58.1 96.7-6 17.9-8.8 36.1-6.2 53.3-6.5 5.8-11.4 14.7-16.6 20.2-4.2 4.3-10.3 5.9-17 8.3s-14 6-18.5 14.5c-2.1 3.9-2.8 8.1-2.8 12.4 0 3.9 .6 7.9 1.2 11.8 1.2 8.1 2.5 15.7 .8 20.8-5.2 14.4-5.9 24.4-2.2 31.7 3.8 7.3 11.4 10.5 20.1 12.3 17.3 3.6 40.8 2.7 59.3 12.5 19.8 10.4 39.9 14.1 55.9 10.4 11.6-2.6 21.1-9.6 25.9-20.2 12.5-.1 26.3-5.4 48.3-6.6 14.9-1.2 33.6 5.3 55.1 4.1 .6 2.3 1.4 4.6 2.5 6.7v.1c8.3 16.7 23.8 24.3 40.3 23 16.6-1.3 34.1-11 48.3-27.9 13.6-16.4 36-23.2 50.9-32.2 7.4-4.5 13.4-10.1 13.9-18.3 .4-8.2-4.4-17.3-15.5-29.7zM223.7 87.3c9.8-22.2 34.2-21.8 44-.4 6.5 14.2 3.6 30.9-4.3 40.4-1.6-.8-5.9-2.6-12.6-4.9 1.1-1.2 3.1-2.7 3.9-4.6 4.8-11.8-.2-27-9.1-27.3-7.3-.5-13.9 10.8-11.8 23-4.1-2-9.4-3.5-13-4.4-1-6.9-.3-14.6 2.9-21.8zM183 75.8c10.1 0 20.8 14.2 19.1 33.5-3.5 1-7.1 2.5-10.2 4.6 1.2-8.9-3.3-20.1-9.6-19.6-8.4 .7-9.8 21.2-1.8 28.1 1 .8 1.9-.2-5.9 5.5-15.6-14.6-10.5-52.1 8.4-52.1zm-13.6 60.7c6.2-4.6 13.6-10 14.1-10.5 4.7-4.4 13.5-14.2 27.9-14.2 7.1 0 15.6 2.3 25.9 8.9 6.3 4.1 11.3 4.4 22.6 9.3 8.4 3.5 13.7 9.7 10.5 18.2-2.6 7.1-11 14.4-22.7 18.1-11.1 3.6-19.8 16-38.2 14.9-3.9-.2-7-1-9.6-2.1-8-3.5-12.2-10.4-20-15-8.6-4.8-13.2-10.4-14.7-15.3-1.4-4.9 0-9 4.2-12.3zm3.3 334c-2.7 35.1-43.9 34.4-75.3 18-29.9-15.8-68.6-6.5-76.5-21.9-2.4-4.7-2.4-12.7 2.6-26.4v-.2c2.4-7.6 .6-16-.6-23.9-1.2-7.8-1.8-15 .9-20 3.5-6.7 8.5-9.1 14.8-11.3 10.3-3.7 11.8-3.4 19.6-9.9 5.5-5.7 9.5-12.9 14.3-18 5.1-5.5 10-8.1 17.7-6.9 8.1 1.2 15.1 6.8 21.9 16l19.6 35.6c9.5 19.9 43.1 48.4 41 68.9zm-1.4-25.9c-4.1-6.6-9.6-13.6-14.4-19.6 7.1 0 14.2-2.2 16.7-8.9 2.3-6.2 0-14.9-7.4-24.9-13.5-18.2-38.3-32.5-38.3-32.5-13.5-8.4-21.1-18.7-24.6-29.9s-3-23.3-.3-35.2c5.2-22.9 18.6-45.2 27.2-59.2 2.3-1.7 .8 3.2-8.7 20.8-8.5 16.1-24.4 53.3-2.6 82.4 .6-20.7 5.5-41.8 13.8-61.5 12-27.4 37.3-74.9 39.3-112.7 1.1 .8 4.6 3.2 6.2 4.1 4.6 2.7 8.1 6.7 12.6 10.3 12.4 10 28.5 9.2 42.4 1.2 6.2-3.5 11.2-7.5 15.9-9 9.9-3.1 17.8-8.6 22.3-15 7.7 30.4 25.7 74.3 37.2 95.7 6.1 11.4 18.3 35.5 23.6 64.6 3.3-.1 7 .4 10.9 1.4 13.8-35.7-11.7-74.2-23.3-84.9-4.7-4.6-4.9-6.6-2.6-6.5 12.6 11.2 29.2 33.7 35.2 59 2.8 11.6 3.3 23.7 .4 35.7 16.4 6.8 35.9 17.9 30.7 34.8-2.2-.1-3.2 0-4.2 0 3.2-10.1-3.9-17.6-22.8-26.1-19.6-8.6-36-8.6-38.3 12.5-12.1 4.2-18.3 14.7-21.4 27.3-2.8 11.2-3.6 24.7-4.4 39.9-.5 7.7-3.6 18-6.8 29-32.1 22.9-76.7 32.9-114.3 7.2zm257.4-11.5c-.9 16.8-41.2 19.9-63.2 46.5-13.2 15.7-29.4 24.4-43.6 25.5s-26.5-4.8-33.7-19.3c-4.7-11.1-2.4-23.1 1.1-36.3 3.7-14.2 9.2-28.8 9.9-40.6 .8-15.2 1.7-28.5 4.2-38.7 2.6-10.3 6.6-17.2 13.7-21.1 .3-.2 .7-.3 1-.5 .8 13.2 7.3 26.6 18.8 29.5 12.6 3.3 30.7-7.5 38.4-16.3 9-.3 15.7-.9 22.6 5.1 9.9 8.5 7.1 30.3 17.1 41.6 10.6 11.6 14 19.5 13.7 24.6zM173.3 148.7c2 1.9 4.7 4.5 8 7.1 6.6 5.2 15.8 10.6 27.3 10.6 11.6 0 22.5-5.9 31.8-10.8 4.9-2.6 10.9-7 14.8-10.4s5.9-6.3 3.1-6.6-2.6 2.6-6 5.1c-4.4 3.2-9.7 7.4-13.9 9.8-7.4 4.2-19.5 10.2-29.9 10.2s-18.7-4.8-24.9-9.7c-3.1-2.5-5.7-5-7.7-6.9-1.5-1.4-1.9-4.6-4.3-4.9-1.4-.1-1.8 3.7 1.7 6.5z" />
|
||||
</svg>
|
||||
@@ -95,10 +88,7 @@ export default async function Page(props: {
|
||||
|
||||
<div className="flex flex-row gap-2 items-left">
|
||||
{pypi && (
|
||||
<a
|
||||
target="_blank"
|
||||
href={`https://pypi.org/project/${pypi}/`}
|
||||
rel="noreferrer">
|
||||
<a target="_blank" href={`https://pypi.org/project/${pypi}/`} rel="noreferrer">
|
||||
<img
|
||||
src={`https://img.shields.io/pypi/v/${pypi}?color=blue`}
|
||||
className="h-5"
|
||||
@@ -107,10 +97,7 @@ export default async function Page(props: {
|
||||
</a>
|
||||
)}
|
||||
{npm && (
|
||||
<a
|
||||
target="_blank"
|
||||
href={`https://www.npmjs.com/package/${npm}`}
|
||||
rel="noreferrer">
|
||||
<a target="_blank" href={`https://www.npmjs.com/package/${npm}`} rel="noreferrer">
|
||||
<img
|
||||
src={`https://img.shields.io/npm/v/${npm}?color=bf4c4b`}
|
||||
className="h-5"
|
||||
@@ -137,7 +124,8 @@ export default async function Page(props: {
|
||||
target="_blank"
|
||||
className="inline-flex gap-2 w-fit items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&_svg]:size-5 text-fd-muted-foreground md:[&_svg]:size-4.5"
|
||||
aria-label="Source"
|
||||
data-active="false">
|
||||
data-active="false"
|
||||
>
|
||||
<svg role="img" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path>
|
||||
</svg>
|
||||
@@ -161,7 +149,8 @@ export default async function Page(props: {
|
||||
href={link}
|
||||
rel="noreferrer noopener"
|
||||
target="_blank"
|
||||
className="inline-flex gap-2 w-full items-center rounded-md p-2 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground">
|
||||
className="inline-flex gap-2 w-full items-center rounded-md p-2 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground"
|
||||
>
|
||||
{link.includes('python')
|
||||
? 'Python'
|
||||
: link.includes('typescript')
|
||||
@@ -174,14 +163,14 @@ export default async function Page(props: {
|
||||
</PopoverContent>
|
||||
</Popover>
|
||||
))}
|
||||
{slug.includes('libraries') && (
|
||||
{/*slug.includes('libraries') && (
|
||||
<a
|
||||
className="inline-flex gap-2 w-fit items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&_svg]:size-5 text-fd-muted-foreground md:[&_svg]:size-4.5"
|
||||
href={`/api/${page.data.title.toLowerCase()}`}>
|
||||
<CodeXml size={12} />
|
||||
Reference
|
||||
</a>
|
||||
)}
|
||||
)*/}
|
||||
</div>
|
||||
<hr className="my-2 border-t border-fd-border" />
|
||||
</div>
|
||||
@@ -189,10 +178,7 @@ export default async function Page(props: {
|
||||
};
|
||||
|
||||
return (
|
||||
<DocsPage
|
||||
toc={page.data.toc}
|
||||
tableOfContent={{ header: tocHeader() }}
|
||||
full={page.data.full}>
|
||||
<DocsPage toc={page.data.toc} tableOfContent={{ header: tocHeader() }} full={page.data.full}>
|
||||
<div className="flex flex-row w-full items-start">
|
||||
<div className="flex-1">
|
||||
<div className="flex flex-row w-full">
|
||||
@@ -208,15 +194,14 @@ export default async function Page(props: {
|
||||
size: 'sm',
|
||||
className: 'gap-2',
|
||||
})
|
||||
)}>
|
||||
)}
|
||||
>
|
||||
{(() => {
|
||||
// Find the current version label
|
||||
let currentLabel = 'Current';
|
||||
if (apiVersionSlug.length > 0) {
|
||||
const found = versionItems.find(
|
||||
(item) =>
|
||||
item.label !== 'Current' &&
|
||||
apiVersionSlug[0] === item.label
|
||||
(item) => item.label !== 'Current' && apiVersionSlug[0] === item.label
|
||||
);
|
||||
if (found) currentLabel = found.label;
|
||||
}
|
||||
@@ -237,10 +222,8 @@ export default async function Page(props: {
|
||||
: `/api/${apiSection}/${item.label}`;
|
||||
// Highlight current version
|
||||
const isCurrent =
|
||||
(item.label === 'Current' &&
|
||||
apiVersionSlug.length === 0) ||
|
||||
(item.label !== 'Current' &&
|
||||
apiVersionSlug[0] === item.label);
|
||||
(item.label === 'Current' && apiVersionSlug.length === 0) ||
|
||||
(item.label !== 'Current' && apiVersionSlug[0] === item.label);
|
||||
return (
|
||||
<Link
|
||||
key={item.label}
|
||||
@@ -248,7 +231,8 @@ export default async function Page(props: {
|
||||
className={cn(
|
||||
'px-3 py-1 rounded hover:bg-fd-muted',
|
||||
isCurrent && 'font-bold bg-fd-muted'
|
||||
)}>
|
||||
)}
|
||||
>
|
||||
API version: {item.label}
|
||||
</Link>
|
||||
);
|
||||
@@ -258,9 +242,7 @@ export default async function Page(props: {
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<DocsDescription className="text-md mt-1">
|
||||
{page.data.description}
|
||||
</DocsDescription>
|
||||
<DocsDescription className="text-md mt-1">{page.data.description}</DocsDescription>
|
||||
</div>
|
||||
</div>
|
||||
<DocsBody>
|
||||
@@ -270,6 +252,7 @@ export default async function Page(props: {
|
||||
a: createRelativeLink(source, page),
|
||||
})}
|
||||
/>
|
||||
<PageFeedback />
|
||||
</DocsBody>
|
||||
</DocsPage>
|
||||
);
|
||||
@@ -288,8 +271,7 @@ export async function generateMetadata(props: {
|
||||
|
||||
let title = `${page.data.title} | Cua Docs`;
|
||||
if (page.url.includes('api')) title = `${page.data.title} | Cua API Docs`;
|
||||
if (page.url.includes('guide'))
|
||||
title = ` Guide: ${page.data.title} | Cua Docs`;
|
||||
if (page.url.includes('guide')) title = ` Guide: ${page.data.title} | Cua Docs`;
|
||||
|
||||
return {
|
||||
title,
|
||||
|
||||
75
docs/src/app/api/posthog/[...path]/route.ts
Normal file
75
docs/src/app/api/posthog/[...path]/route.ts
Normal file
@@ -0,0 +1,75 @@
|
||||
import { NextRequest, NextResponse } from 'next/server';
|
||||
|
||||
export async function GET(
|
||||
request: NextRequest,
|
||||
{ params }: { params: Promise<{ path: string[] }> }
|
||||
) {
|
||||
const { path } = await params;
|
||||
const url = new URL(request.url);
|
||||
|
||||
const targetUrl = `${process.env.NEXT_PUBLIC_POSTHOG_HOST}/${path.join('/')}${url.search}`;
|
||||
|
||||
try {
|
||||
const response = await fetch(targetUrl, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Content-Type': request.headers.get('Content-Type') || 'application/json',
|
||||
},
|
||||
});
|
||||
|
||||
// Handle 204 No Content responses
|
||||
if (response.status === 204) {
|
||||
return new NextResponse(null, { status: 204 });
|
||||
}
|
||||
|
||||
const data = await response.arrayBuffer();
|
||||
return new NextResponse(data, {
|
||||
status: response.status,
|
||||
headers: {
|
||||
'Content-Type': response.headers.get('Content-Type') || 'application/json',
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('PostHog proxy error:', error);
|
||||
return new NextResponse('Error proxying request', { status: 500 });
|
||||
}
|
||||
}
|
||||
|
||||
export async function POST(
|
||||
request: NextRequest,
|
||||
{ params }: { params: Promise<{ path: string[] }> }
|
||||
) {
|
||||
const { path } = await params;
|
||||
const url = new URL(request.url);
|
||||
|
||||
const targetUrl = `${process.env.NEXT_PUBLIC_POSTHOG_HOST}/${path.join('/')}${url.search}`;
|
||||
|
||||
try {
|
||||
const body = await request.arrayBuffer();
|
||||
const contentType = request.headers.get('Content-Type') || 'application/x-www-form-urlencoded';
|
||||
|
||||
const response = await fetch(targetUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': contentType,
|
||||
},
|
||||
body,
|
||||
});
|
||||
|
||||
// Handle 204 No Content responses
|
||||
if (response.status === 204) {
|
||||
return new NextResponse(null, { status: 204 });
|
||||
}
|
||||
|
||||
const data = await response.arrayBuffer();
|
||||
return new NextResponse(data, {
|
||||
status: response.status,
|
||||
headers: {
|
||||
'Content-Type': response.headers.get('Content-Type') || 'application/json',
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('PostHog proxy error:', error);
|
||||
return new NextResponse('Error proxying request', { status: 500 });
|
||||
}
|
||||
}
|
||||
@@ -42,14 +42,14 @@ export const baseOptions: BaseLayoutProps = {
|
||||
links: [
|
||||
{
|
||||
url: 'https://trycua.com',
|
||||
text: 'cua home',
|
||||
text: 'Cua home',
|
||||
type: 'icon',
|
||||
icon: <HomeIcon />,
|
||||
external: false,
|
||||
},
|
||||
{
|
||||
url: 'https://discord.com/invite/mVnXXpdE85',
|
||||
text: 'cua discord',
|
||||
text: 'Cua discord',
|
||||
type: 'icon',
|
||||
icon: (
|
||||
<>
|
||||
|
||||
@@ -2,6 +2,11 @@ import './global.css';
|
||||
import { RootProvider } from 'fumadocs-ui/provider';
|
||||
import { Inter } from 'next/font/google';
|
||||
import type { ReactNode } from 'react';
|
||||
import { PHProvider, PostHogPageView } from '@/providers/posthog-provider';
|
||||
import { AnalyticsTracker } from '@/components/analytics-tracker';
|
||||
import { CookieConsent } from '@/components/cookie-consent';
|
||||
import { Footer } from '@/components/footer';
|
||||
import { Suspense } from 'react';
|
||||
|
||||
const inter = Inter({
|
||||
subsets: ['latin'],
|
||||
@@ -14,9 +19,15 @@ export default function Layout({ children }: { children: ReactNode }) {
|
||||
<link rel="icon" href="/docs/favicon.ico" sizes="any" />
|
||||
</head>
|
||||
<body className="flex min-h-screen flex-col">
|
||||
<RootProvider search={{ options: { api: '/docs/api/search' } }}>
|
||||
{children}
|
||||
</RootProvider>
|
||||
<PHProvider>
|
||||
<Suspense fallback={null}>
|
||||
<PostHogPageView />
|
||||
</Suspense>
|
||||
<AnalyticsTracker />
|
||||
<RootProvider search={{ options: { api: '/docs/api/search' } }}>{children}</RootProvider>
|
||||
<Footer />
|
||||
<CookieConsent />
|
||||
</PHProvider>
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
|
||||
@@ -5,10 +5,7 @@ import { notFound } from 'next/navigation';
|
||||
|
||||
export const revalidate = false;
|
||||
|
||||
export async function GET(
|
||||
_req: NextRequest,
|
||||
{ params }: { params: Promise<{ slug?: string[] }> }
|
||||
) {
|
||||
export async function GET(_req: NextRequest, { params }: { params: Promise<{ slug?: string[] }> }) {
|
||||
const { slug } = await params;
|
||||
const page = source.getPage(slug);
|
||||
if (!page) notFound();
|
||||
|
||||
71
docs/src/components/analytics-tracker.tsx
Normal file
71
docs/src/components/analytics-tracker.tsx
Normal file
@@ -0,0 +1,71 @@
|
||||
'use client';
|
||||
|
||||
import { useEffect } from 'react';
|
||||
import posthog from 'posthog-js';
|
||||
|
||||
export function AnalyticsTracker() {
|
||||
useEffect(() => {
|
||||
const handleClick = (e: MouseEvent) => {
|
||||
const target = e.target as HTMLElement;
|
||||
const link = target.closest('a');
|
||||
|
||||
if (!link) return;
|
||||
|
||||
const href = link.href;
|
||||
const text = link.textContent || link.getAttribute('aria-label') || '';
|
||||
|
||||
if (href.includes('github.com/trycua')) {
|
||||
posthog.capture('github_link_clicked', {
|
||||
url: href,
|
||||
link_text: text,
|
||||
page: window.location.pathname,
|
||||
});
|
||||
}
|
||||
|
||||
if (href.includes('discord.com/invite') || href.includes('discord.gg')) {
|
||||
posthog.capture('discord_link_clicked', {
|
||||
url: href,
|
||||
link_text: text,
|
||||
page: window.location.pathname,
|
||||
});
|
||||
}
|
||||
|
||||
if (
|
||||
(href.includes('trycua.com') && !href.includes('trycua.com/docs')) ||
|
||||
href.includes('cua.ai')
|
||||
) {
|
||||
posthog.capture('main_website_clicked', {
|
||||
url: href,
|
||||
link_text: text,
|
||||
page: window.location.pathname,
|
||||
});
|
||||
}
|
||||
|
||||
if (link.hostname && link.hostname !== window.location.hostname) {
|
||||
if (
|
||||
href.includes('github.com/trycua') ||
|
||||
href.includes('discord.com') ||
|
||||
href.includes('trycua.com') ||
|
||||
href.includes('cua.ai')
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
posthog.capture('external_link_clicked', {
|
||||
url: href,
|
||||
link_text: text,
|
||||
page: window.location.pathname,
|
||||
domain: link.hostname,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
document.addEventListener('click', handleClick);
|
||||
|
||||
return () => {
|
||||
document.removeEventListener('click', handleClick);
|
||||
};
|
||||
}, []);
|
||||
|
||||
return null;
|
||||
}
|
||||
44
docs/src/components/cookie-consent.tsx
Normal file
44
docs/src/components/cookie-consent.tsx
Normal file
@@ -0,0 +1,44 @@
|
||||
'use client';
|
||||
|
||||
import { useEffect, useState } from 'react';
|
||||
import posthog from 'posthog-js';
|
||||
|
||||
export function CookieConsent() {
|
||||
const [isVisible, setIsVisible] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
// Check if user has already accepted cookies
|
||||
const hasAccepted = localStorage.getItem('cookie-consent');
|
||||
if (!hasAccepted) {
|
||||
setIsVisible(true);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const handleAccept = () => {
|
||||
localStorage.setItem('cookie-consent', 'accepted');
|
||||
setIsVisible(false);
|
||||
|
||||
// Track cookie acceptance
|
||||
posthog.capture('cookie_consent_accepted', {
|
||||
page: window.location.pathname,
|
||||
});
|
||||
};
|
||||
|
||||
if (!isVisible) return null;
|
||||
|
||||
return (
|
||||
<div className="fixed bottom-0 left-0 right-0 z-50 bg-fd-background border-t border-fd-border shadow-lg">
|
||||
<div className="container mx-auto px-4 py-2 flex flex-col sm:flex-row items-center justify-between gap-3">
|
||||
<p className="text-xs text-fd-muted-foreground">
|
||||
This site uses cookies for website functionality, analytics, and personalized content.
|
||||
</p>
|
||||
<button
|
||||
onClick={handleAccept}
|
||||
className="px-4 py-1 text-xs bg-fd-primary text-fd-primary-foreground rounded hover:opacity-90 transition-opacity whitespace-nowrap"
|
||||
>
|
||||
Okay
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
310
docs/src/components/editable-code-block.tsx
Normal file
310
docs/src/components/editable-code-block.tsx
Normal file
@@ -0,0 +1,310 @@
|
||||
'use client';
|
||||
|
||||
import React, { createContext, useContext, useState, ReactNode } from 'react';
|
||||
import * as Base from 'fumadocs-ui/components/codeblock';
|
||||
import { cn } from 'fumadocs-ui/utils/cn';
|
||||
|
||||
/**
|
||||
* Context for managing editable values within code blocks
|
||||
*/
|
||||
interface EditableCodeContextValue {
|
||||
values: Record<string, string>;
|
||||
updateValue: (key: string, value: string) => void;
|
||||
}
|
||||
|
||||
const EditableCodeContext = createContext<EditableCodeContextValue | null>(null);
|
||||
|
||||
/**
|
||||
* Hook to access the editable code context
|
||||
*/
|
||||
function useEditableCode() {
|
||||
const context = useContext(EditableCodeContext);
|
||||
if (!context) {
|
||||
throw new Error('useEditableCode must be used within EditableCodeBlock');
|
||||
}
|
||||
return context;
|
||||
}
|
||||
|
||||
/**
|
||||
* Props for EditableCodeBlock component
|
||||
*/
|
||||
interface EditableCodeBlockProps {
|
||||
/** Programming language for styling */
|
||||
lang?: string;
|
||||
/** Initial values for placeholders */
|
||||
defaultValues?: Record<string, string>;
|
||||
/** Code content with embedded EditableValue components */
|
||||
children: ReactNode;
|
||||
/** Additional CSS classes */
|
||||
className?: string;
|
||||
/** Title for the code block */
|
||||
title?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Code block component that supports inline editable values
|
||||
* Uses fumadocs-ui styling with interactive input fields
|
||||
*/
|
||||
export function EditableCodeBlock({
|
||||
lang = 'python',
|
||||
defaultValues = {},
|
||||
children,
|
||||
className,
|
||||
title,
|
||||
}: EditableCodeBlockProps) {
|
||||
const [values, setValues] = useState<Record<string, string>>(defaultValues);
|
||||
|
||||
const updateValue = (key: string, value: string) => {
|
||||
setValues((prev) => ({ ...prev, [key]: value }));
|
||||
};
|
||||
|
||||
return (
|
||||
<EditableCodeContext.Provider value={{ values, updateValue }}>
|
||||
<Base.CodeBlock title={title} className={cn('my-4', className)}>
|
||||
<Base.Pre className={cn(`language-${lang}`, 'px-3')}>
|
||||
<code
|
||||
className={cn(`language-${lang}`)}
|
||||
style={{ display: 'block', whiteSpace: 'pre-wrap' }}
|
||||
>
|
||||
{children}
|
||||
</code>
|
||||
</Base.Pre>
|
||||
</Base.CodeBlock>
|
||||
</EditableCodeContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Props for EditableValue component
|
||||
*/
|
||||
interface EditableValueProps {
|
||||
/** Unique identifier for this value */
|
||||
placeholder: string;
|
||||
/** Display width in characters (default: auto) */
|
||||
width?: number;
|
||||
/** Optional default value */
|
||||
defaultValue?: string;
|
||||
/** Input type */
|
||||
type?: 'text' | 'password';
|
||||
}
|
||||
|
||||
/**
|
||||
* Inline editable input that blends with code styling
|
||||
* Appears as an underlined, hoverable value within code
|
||||
*/
|
||||
export function EditableValue({
|
||||
placeholder,
|
||||
width: explicitWidth,
|
||||
defaultValue = '',
|
||||
type = 'text',
|
||||
}: EditableValueProps) {
|
||||
const { values, updateValue } = useEditableCode();
|
||||
const value = values[placeholder] ?? defaultValue;
|
||||
const spanRef = React.useRef<HTMLSpanElement>(null);
|
||||
const placeholderSpanRef = React.useRef<HTMLSpanElement>(null);
|
||||
const inputRef = React.useRef<HTMLInputElement>(null);
|
||||
const [measuredWidth, setMeasuredWidth] = React.useState(0);
|
||||
const [placeholderWidth, setPlaceholderWidth] = React.useState(0);
|
||||
const [isHovered, setIsHovered] = React.useState(false);
|
||||
const [tooltipPosition, setTooltipPosition] = React.useState({ top: 0, left: 0 });
|
||||
const [isVisible, setIsVisible] = React.useState(false);
|
||||
|
||||
// Observe visibility changes to trigger remeasurement
|
||||
React.useEffect(() => {
|
||||
if (!inputRef.current) return;
|
||||
|
||||
const observer = new IntersectionObserver(
|
||||
(entries) => {
|
||||
entries.forEach((entry) => {
|
||||
setIsVisible(entry.isIntersecting);
|
||||
});
|
||||
},
|
||||
{ threshold: 0.01 }
|
||||
);
|
||||
|
||||
observer.observe(inputRef.current);
|
||||
|
||||
return () => {
|
||||
observer.disconnect();
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Measure the actual text width using a hidden span
|
||||
React.useEffect(() => {
|
||||
if (spanRef.current && isVisible) {
|
||||
setMeasuredWidth(spanRef.current.offsetWidth);
|
||||
}
|
||||
}, [value, isVisible]);
|
||||
|
||||
// Measure placeholder width when visible
|
||||
React.useEffect(() => {
|
||||
if (placeholderSpanRef.current && isVisible) {
|
||||
setPlaceholderWidth(placeholderSpanRef.current.offsetWidth);
|
||||
}
|
||||
}, [placeholder, isVisible]);
|
||||
|
||||
// Update tooltip position when hovered
|
||||
React.useEffect(() => {
|
||||
if (isHovered && inputRef.current) {
|
||||
const rect = inputRef.current.getBoundingClientRect();
|
||||
setTooltipPosition({
|
||||
top: rect.top - 28,
|
||||
left: rect.left + rect.width / 2,
|
||||
});
|
||||
}
|
||||
}, [isHovered]);
|
||||
|
||||
const inputWidth = explicitWidth
|
||||
? `${explicitWidth}ch`
|
||||
: `${Math.max(placeholderWidth, measuredWidth, 80)}px`;
|
||||
|
||||
return (
|
||||
<span
|
||||
style={{ display: 'inline', whiteSpace: 'nowrap', position: 'relative' }}
|
||||
onMouseEnter={() => setIsHovered(true)}
|
||||
onMouseLeave={() => setIsHovered(false)}
|
||||
>
|
||||
{/* Hidden span to measure current value width */}
|
||||
<span
|
||||
ref={spanRef}
|
||||
style={{
|
||||
position: 'absolute',
|
||||
visibility: 'hidden',
|
||||
whiteSpace: 'pre',
|
||||
fontFamily: 'inherit',
|
||||
pointerEvents: 'none',
|
||||
}}
|
||||
aria-hidden="true"
|
||||
>
|
||||
{value}
|
||||
</span>
|
||||
|
||||
{/* Hidden span to measure placeholder width */}
|
||||
<span
|
||||
ref={placeholderSpanRef}
|
||||
style={{
|
||||
position: 'absolute',
|
||||
visibility: 'hidden',
|
||||
whiteSpace: 'pre',
|
||||
fontFamily: 'inherit',
|
||||
pointerEvents: 'none',
|
||||
}}
|
||||
aria-hidden="true"
|
||||
>
|
||||
{placeholder}
|
||||
</span>
|
||||
|
||||
{/* Tooltip */}
|
||||
<span
|
||||
style={{
|
||||
position: 'fixed',
|
||||
top: tooltipPosition.top,
|
||||
left: tooltipPosition.left,
|
||||
transform: 'translateX(-50%)',
|
||||
padding: '4px 8px',
|
||||
backgroundColor: 'rgba(0, 0, 0, 0.8)',
|
||||
color: 'white',
|
||||
fontSize: '12px',
|
||||
borderRadius: '4px',
|
||||
whiteSpace: 'nowrap',
|
||||
pointerEvents: 'none',
|
||||
opacity: isHovered ? 1 : 0,
|
||||
transition: 'opacity 0.2s ease-in-out',
|
||||
zIndex: 9999,
|
||||
}}
|
||||
>
|
||||
Edit me!
|
||||
</span>
|
||||
|
||||
<input
|
||||
ref={inputRef}
|
||||
type={type}
|
||||
value={value}
|
||||
onChange={(e) => updateValue(placeholder, e.target.value)}
|
||||
placeholder={placeholder}
|
||||
className={cn(type === 'password' && value && 'text-security-disc')}
|
||||
style={{
|
||||
display: 'inline',
|
||||
width: inputWidth,
|
||||
verticalAlign: 'baseline',
|
||||
lineHeight: 'inherit',
|
||||
fontSize: 'inherit',
|
||||
fontFamily: 'inherit',
|
||||
height: 'auto',
|
||||
padding: 0,
|
||||
margin: 0,
|
||||
background: 'transparent',
|
||||
border: 'none',
|
||||
borderBottom: '2px dashed rgba(96, 165, 250, 0.5)',
|
||||
outline: 'none',
|
||||
color: 'inherit',
|
||||
transition: 'border-bottom-color 0.2s ease-in-out',
|
||||
}}
|
||||
/>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Container for form inputs outside the code block
|
||||
*/
|
||||
export function EditableForm({
|
||||
children,
|
||||
className = '',
|
||||
}: {
|
||||
children: ReactNode;
|
||||
className?: string;
|
||||
}) {
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'p-4 border rounded-lg bg-fd-secondary/50 dark:bg-fd-secondary/30 mb-6',
|
||||
className
|
||||
)}
|
||||
>
|
||||
<h3 className="text-lg font-semibold mb-4">Configuration</h3>
|
||||
{children}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Form input for editing values outside code block
|
||||
*/
|
||||
interface EditableInputProps {
|
||||
/** Placeholder key to bind to */
|
||||
placeholder: string;
|
||||
/** Label text */
|
||||
label: string;
|
||||
/** Input type */
|
||||
type?: 'text' | 'email' | 'password';
|
||||
/** Custom class name */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
export function EditableInput({
|
||||
placeholder,
|
||||
label,
|
||||
type = 'text',
|
||||
className = '',
|
||||
}: EditableInputProps) {
|
||||
const { values, updateValue } = useEditableCode();
|
||||
const value = values[placeholder] || '';
|
||||
|
||||
return (
|
||||
<div className={cn('mb-4', className)}>
|
||||
<label className="block text-sm font-medium mb-2">{label}</label>
|
||||
<input
|
||||
type={type}
|
||||
value={value}
|
||||
onChange={(e) => updateValue(placeholder, e.target.value)}
|
||||
placeholder={placeholder}
|
||||
className={cn(
|
||||
'w-full px-3 py-2 border rounded-md',
|
||||
'focus:outline-none focus:ring-2 focus:ring-blue-500',
|
||||
'bg-fd-background border-fd-border'
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
16
docs/src/components/footer.tsx
Normal file
16
docs/src/components/footer.tsx
Normal file
@@ -0,0 +1,16 @@
|
||||
export function Footer() {
|
||||
return (
|
||||
<footer className="mt-auto border-t border-fd-border py-4">
|
||||
<div className="container mx-auto px-4 flex justify-end">
|
||||
<a
|
||||
href="https://www.cua.ai/cookie-policy"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Cookie Policy
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
);
|
||||
}
|
||||
@@ -34,7 +34,7 @@ interface IOUProps {
|
||||
}
|
||||
|
||||
/**
|
||||
* A React component that visualizes and calculates the Intersection over Union (IOU)
|
||||
* A React component that visualizes and calculates the Intersection over Union (IOU)
|
||||
* of two rectangles on a canvas
|
||||
* @param props - The component props
|
||||
* @returns The rendered IOU visualization component
|
||||
@@ -130,12 +130,7 @@ export default function IOU({ title, description, rect1, rect2 }: IOUProps) {
|
||||
<h3 className="text-sm font-semibold ">{title}</h3>
|
||||
<div className="flex items-start gap-6">
|
||||
<div>
|
||||
<canvas
|
||||
ref={canvasRef}
|
||||
width={200}
|
||||
height={150}
|
||||
className="border bg-white rounded-md"
|
||||
/>
|
||||
<canvas ref={canvasRef} width={200} height={150} className="border bg-white rounded-md" />
|
||||
<div className="mt-2 text-sm">
|
||||
<div className="font-mono mb-2">IOU = {actualIOU.toFixed(3)}</div>
|
||||
<span className="">{description}</span>
|
||||
|
||||
@@ -28,10 +28,7 @@ export function Mermaid({ chart }: { chart: string }) {
|
||||
theme: resolvedTheme === 'dark' ? 'dark' : 'default',
|
||||
});
|
||||
|
||||
const { svg, bindFunctions } = await mermaid.render(
|
||||
id,
|
||||
chart.replaceAll('\\n', '\n'),
|
||||
);
|
||||
const { svg, bindFunctions } = await mermaid.render(id, chart.replaceAll('\\n', '\n'));
|
||||
|
||||
bindFunctions?.(container);
|
||||
setSvg(svg);
|
||||
@@ -44,4 +41,4 @@ export function Mermaid({ chart }: { chart: string }) {
|
||||
}, [chart, id, resolvedTheme]);
|
||||
|
||||
return <div ref={containerRef} dangerouslySetInnerHTML={{ __html: svg }} />;
|
||||
}
|
||||
}
|
||||
|
||||
53
docs/src/components/page-feedback.tsx
Normal file
53
docs/src/components/page-feedback.tsx
Normal file
@@ -0,0 +1,53 @@
|
||||
'use client';
|
||||
|
||||
import { useState } from 'react';
|
||||
import posthog from 'posthog-js';
|
||||
import { ThumbsUp, ThumbsDown } from 'lucide-react';
|
||||
|
||||
export function PageFeedback() {
|
||||
const [feedback, setFeedback] = useState<'helpful' | 'not_helpful' | null>(null);
|
||||
|
||||
const handleFeedback = (isHelpful: boolean) => {
|
||||
const feedbackType = isHelpful ? 'helpful' : 'not_helpful';
|
||||
setFeedback(feedbackType);
|
||||
|
||||
posthog.capture(`page_feedback_${feedbackType}`, {
|
||||
page: window.location.pathname,
|
||||
page_title: document.title,
|
||||
});
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="mt-8 pt-4 border-t border-fd-border">
|
||||
{feedback === null ? (
|
||||
<div className="flex flex-col sm:flex-row items-center justify-between gap-3">
|
||||
<p className="text-sm text-fd-muted-foreground">Was this page helpful?</p>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => handleFeedback(true)}
|
||||
className="flex items-center gap-1.5 px-3 py-1.5 text-sm hover:bg-fd-accent rounded transition-colors"
|
||||
aria-label="This page was helpful"
|
||||
>
|
||||
<ThumbsUp className="w-4 h-4" />
|
||||
Yes
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleFeedback(false)}
|
||||
className="flex items-center gap-1.5 px-3 py-1.5 text-sm hover:bg-fd-accent rounded transition-colors"
|
||||
aria-label="This page was not helpful"
|
||||
>
|
||||
<ThumbsDown className="w-4 h-4" />
|
||||
No
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-fd-muted-foreground text-left">
|
||||
{feedback === 'helpful'
|
||||
? 'Thanks for your feedback!'
|
||||
: "Thanks for your feedback. We'll work on improving this page."}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -34,9 +34,7 @@ export async function getApiVersions(
|
||||
...versions.filter((v) => v.label === 'Current'),
|
||||
...versions
|
||||
.filter((v) => v.label !== 'Current')
|
||||
.sort((a, b) =>
|
||||
b.label.localeCompare(a.label, undefined, { numeric: true })
|
||||
),
|
||||
.sort((a, b) => b.label.localeCompare(a.label, undefined, { numeric: true })),
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,12 @@ import * as TabsComponents from 'fumadocs-ui/components/tabs';
|
||||
import type { MDXComponents } from 'mdx/types';
|
||||
import { Mermaid } from './components/mermaid';
|
||||
import IOU from './components/iou';
|
||||
import {
|
||||
EditableCodeBlock,
|
||||
EditableValue,
|
||||
EditableForm,
|
||||
EditableInput,
|
||||
} from './components/editable-code-block';
|
||||
|
||||
// use this function to get MDX components, you will need it for rendering MDX
|
||||
export function getMDXComponents(components?: MDXComponents): MDXComponents {
|
||||
@@ -10,6 +16,10 @@ export function getMDXComponents(components?: MDXComponents): MDXComponents {
|
||||
...defaultMdxComponents,
|
||||
Mermaid,
|
||||
IOU,
|
||||
EditableCodeBlock,
|
||||
EditableValue,
|
||||
EditableForm,
|
||||
EditableInput,
|
||||
...TabsComponents,
|
||||
...components,
|
||||
};
|
||||
|
||||
40
docs/src/providers/posthog-provider.tsx
Normal file
40
docs/src/providers/posthog-provider.tsx
Normal file
@@ -0,0 +1,40 @@
|
||||
'use client';
|
||||
|
||||
import posthog from 'posthog-js';
|
||||
import { PostHogProvider } from 'posthog-js/react';
|
||||
import { useEffect } from 'react';
|
||||
import { usePathname, useSearchParams } from 'next/navigation';
|
||||
|
||||
if (typeof window !== 'undefined') {
|
||||
posthog.init(process.env.NEXT_PUBLIC_POSTHOG_API_KEY!, {
|
||||
api_host: '/docs/api/posthog',
|
||||
ui_host: process.env.NEXT_PUBLIC_POSTHOG_HOST,
|
||||
person_profiles: 'always',
|
||||
capture_pageview: false,
|
||||
capture_pageleave: true,
|
||||
});
|
||||
}
|
||||
|
||||
export function PHProvider({ children }: { children: React.ReactNode }) {
|
||||
return <PostHogProvider client={posthog}>{children}</PostHogProvider>;
|
||||
}
|
||||
|
||||
export function PostHogPageView(): null {
|
||||
const pathname = usePathname();
|
||||
const searchParams = useSearchParams();
|
||||
|
||||
useEffect(() => {
|
||||
if (pathname) {
|
||||
let url = window.origin + pathname;
|
||||
if (searchParams && searchParams.toString()) {
|
||||
url = url + `?${searchParams.toString()}`;
|
||||
}
|
||||
|
||||
posthog.capture('$pageview', {
|
||||
$current_url: url,
|
||||
});
|
||||
}
|
||||
}, [pathname, searchParams]);
|
||||
|
||||
return null;
|
||||
}
|
||||
@@ -2,11 +2,7 @@
|
||||
"compilerOptions": {
|
||||
"baseUrl": ".",
|
||||
"target": "ESNext",
|
||||
"lib": [
|
||||
"dom",
|
||||
"dom.iterable",
|
||||
"esnext"
|
||||
],
|
||||
"lib": ["dom", "dom.iterable", "esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
@@ -20,12 +16,8 @@
|
||||
"jsx": "preserve",
|
||||
"incremental": true,
|
||||
"paths": {
|
||||
"@/.source": [
|
||||
"./.source/index.ts"
|
||||
],
|
||||
"@/*": [
|
||||
"./src/*"
|
||||
]
|
||||
"@/.source": ["./.source/index.ts"],
|
||||
"@/*": ["./src/*"]
|
||||
},
|
||||
"plugins": [
|
||||
{
|
||||
@@ -33,13 +25,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"include": [
|
||||
"next-env.d.ts",
|
||||
"**/*.ts",
|
||||
"**/*.tsx",
|
||||
".next/types/**/*.ts"
|
||||
],
|
||||
"exclude": [
|
||||
"node_modules"
|
||||
]
|
||||
}
|
||||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
|
||||
@@ -2,16 +2,15 @@
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import traceback
|
||||
import signal
|
||||
|
||||
from computer import Computer, VMProviderType
|
||||
import traceback
|
||||
|
||||
# Import the unified agent class and types
|
||||
from agent import ComputerAgent
|
||||
from computer import Computer, VMProviderType
|
||||
|
||||
# Import utility functions
|
||||
from utils import load_dotenv_files, handle_sigint
|
||||
from utils import handle_sigint, load_dotenv_files
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
@@ -40,25 +39,20 @@ async def run_agent_example():
|
||||
# Create ComputerAgent with new API
|
||||
agent = ComputerAgent(
|
||||
# Supported models:
|
||||
|
||||
# == OpenAI CUA (computer-use-preview) ==
|
||||
model="openai/computer-use-preview",
|
||||
|
||||
# == Anthropic CUA (Claude > 3.5) ==
|
||||
# model="anthropic/claude-opus-4-20250514",
|
||||
# model="anthropic/claude-opus-4-20250514",
|
||||
# model="anthropic/claude-sonnet-4-20250514",
|
||||
# model="anthropic/claude-3-7-sonnet-20250219",
|
||||
# model="anthropic/claude-3-5-sonnet-20241022",
|
||||
|
||||
# == UI-TARS ==
|
||||
# model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
|
||||
# model="mlx/mlx-community/UI-TARS-1.5-7B-6bit",
|
||||
# model="ollama_chat/0000/ui-tars-1.5-7b",
|
||||
|
||||
# == Omniparser + Any LLM ==
|
||||
# model="omniparser+anthropic/claude-opus-4-20250514",
|
||||
# model="omniparser+ollama_chat/gemma3:12b-it-q4_K_M",
|
||||
|
||||
tools=[computer],
|
||||
only_n_most_recent_images=3,
|
||||
verbosity=logging.DEBUG,
|
||||
@@ -79,18 +73,18 @@ async def run_agent_example():
|
||||
|
||||
# Use message-based conversation history
|
||||
history = []
|
||||
|
||||
|
||||
for i, task in enumerate(tasks):
|
||||
print(f"\nExecuting task {i+1}/{len(tasks)}: {task}")
|
||||
|
||||
|
||||
# Add user message to history
|
||||
history.append({"role": "user", "content": task})
|
||||
|
||||
|
||||
# Run agent with conversation history
|
||||
async for result in agent.run(history, stream=False):
|
||||
# Add agent outputs to history
|
||||
history += result.get("output", [])
|
||||
|
||||
|
||||
# Print output for debugging
|
||||
for item in result.get("output", []):
|
||||
if item.get("type") == "message":
|
||||
@@ -104,7 +98,7 @@ async def run_agent_example():
|
||||
print(f"Computer Action: {action_type}({action})")
|
||||
elif item.get("type") == "computer_call_output":
|
||||
print("Computer Output: [Screenshot/Result]")
|
||||
|
||||
|
||||
print(f"✅ Task {i+1}/{len(tasks)} completed: {task}")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
73
examples/cloud_api_examples.py
Normal file
73
examples/cloud_api_examples.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from utils import load_dotenv_files
|
||||
|
||||
load_dotenv_files()
|
||||
|
||||
from computer.providers.cloud.provider import CloudProvider
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
api_key = os.getenv("CUA_API_KEY")
|
||||
if not api_key:
|
||||
raise RuntimeError("CUA_API_KEY environment variable is not set")
|
||||
api_base = os.getenv("CUA_API_BASE")
|
||||
if api_base:
|
||||
print(f"Using API base: {api_base}")
|
||||
|
||||
provider = CloudProvider(api_key=api_key, verbose=True)
|
||||
async with provider:
|
||||
|
||||
# List all VMs
|
||||
vms = await provider.list_vms()
|
||||
print(f"Found {len(vms)} VM(s)")
|
||||
for vm in vms:
|
||||
print(
|
||||
f"name: {vm['name']}\n",
|
||||
f"status: {vm['status']}\n", # pending, running, stopped, terminated, failed
|
||||
f"api_url: {vm.get('api_url')}\n",
|
||||
f"vnc_url: {vm.get('vnc_url')}\n",
|
||||
)
|
||||
|
||||
# # --- Additional operations (commented out) ---
|
||||
# # To stop a VM by name:
|
||||
# name = "m-linux-96lcxd2c2k"
|
||||
# resp = await provider.stop_vm(name)
|
||||
# print(
|
||||
# "stop_vm response:\n",
|
||||
# f"name: {resp['name']}\n",
|
||||
# f"status: {resp['status']}\n", # stopping
|
||||
# )
|
||||
|
||||
# # To start a VM by name:
|
||||
# name = "m-linux-96lcxd2c2k"
|
||||
# resp = await provider.run_vm(name)
|
||||
# print(
|
||||
# "run_vm response:\n",
|
||||
# f"name: {resp['name']}\n",
|
||||
# f"status: {resp['status']}\n", # starting
|
||||
# )
|
||||
|
||||
# # To restart a VM by name:
|
||||
# name = "m-linux-96lcxd2c2k"
|
||||
# resp = await provider.restart_vm(name)
|
||||
# print(
|
||||
# "restart_vm response:\n",
|
||||
# f"name: {resp['name']}\n",
|
||||
# f"status: {resp['status']}\n", # restarting
|
||||
# )
|
||||
|
||||
# # To probe a VM's status via its public hostname (if you know the name):
|
||||
# name = "m-linux-96lcxd2c2k"
|
||||
# info = await provider.get_vm(name)
|
||||
# print("get_vm info:\n",
|
||||
# f"name: {info['name']}\n",
|
||||
# f"status: {info['status']}\n", # running
|
||||
# f"api_url: {info.get('api_url')}\n",
|
||||
# f"os_type: {info.get('os_type')}\n",
|
||||
# )
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,7 +0,0 @@
|
||||
{
|
||||
"useTabs": false,
|
||||
"semi": true,
|
||||
"singleQuote": true,
|
||||
"trailingComma": "es5",
|
||||
"bracketSpacing": true
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user