diff --git a/.github/scripts/get_pyproject_version.py b/.github/scripts/get_pyproject_version.py new file mode 100755 index 00000000..a00ea22c --- /dev/null +++ b/.github/scripts/get_pyproject_version.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +""" +Verifies that the version in pyproject.toml matches the expected version. + +Usage: + python get_pyproject_version.py + +Exit codes: + 0 - Versions match + 1 - Versions don't match or error occurred +""" + +import sys +try: + import tomllib +except ImportError: + # Fallback for Python < 3.11 + import toml as tomllib + + +def main(): + if len(sys.argv) != 3: + print("Usage: python get_pyproject_version.py ", file=sys.stderr) + sys.exit(1) + + pyproject_path = sys.argv[1] + expected_version = sys.argv[2] + + # tomllib requires binary mode + try: + with open(pyproject_path, 'rb') as f: + data = tomllib.load(f) + except FileNotFoundError: + print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr) + sys.exit(1) + except Exception as e: + # Fallback to toml if using the old library or handle other errors + try: + import toml + data = toml.load(pyproject_path) + except FileNotFoundError: + print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr) + sys.exit(1) + except Exception as toml_err: + print(f"❌ ERROR: Failed to parse TOML file: {e}", file=sys.stderr) + sys.exit(1) + + actual_version = data.get('project', {}).get('version') + + if not actual_version: + print("❌ ERROR: No version found in pyproject.toml", file=sys.stderr) + sys.exit(1) + + if actual_version != expected_version: + print("❌ Version mismatch detected!", file=sys.stderr) + print(f" pyproject.toml version: {actual_version}", file=sys.stderr) + print(f" Expected version: {expected_version}", file=sys.stderr) + print("", file=sys.stderr) + print("The version in pyproject.toml must match the version being published.", file=sys.stderr) + print(f"Please update pyproject.toml to version {expected_version} or use the correct tag.", file=sys.stderr) + sys.exit(1) + + print(f"✅ Version consistency check passed: {actual_version}") + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/.github/scripts/tests/README.md b/.github/scripts/tests/README.md new file mode 100644 index 00000000..2a440065 --- /dev/null +++ b/.github/scripts/tests/README.md @@ -0,0 +1,131 @@ +# Tests for .github/scripts + +This directory contains comprehensive tests for the GitHub workflow scripts using Python's built-in testing framework. + +## Requirements + +**No external dependencies required!** + +This test suite uses: +- `unittest` - Python's built-in testing framework +- `tomllib` - Python 3.11+ built-in TOML parser + +For Python < 3.11, the `toml` package is used as a fallback. + +## Running Tests + +### Run all tests +```bash +cd .github/scripts/tests +python3 -m unittest discover -v +``` + +### Run a specific test file +```bash +python3 -m unittest test_get_pyproject_version -v +``` + +### Run a specific test class +```bash +python3 -m unittest test_get_pyproject_version.TestGetPyprojectVersion -v +``` + +### Run a specific test method +```bash +python3 -m unittest test_get_pyproject_version.TestGetPyprojectVersion.test_matching_versions -v +``` + +### Run tests directly from the test file +```bash +python3 test_get_pyproject_version.py +``` + +## Test Structure + +### test_get_pyproject_version.py + +Comprehensive tests for `get_pyproject_version.py` covering: + +- ✅ **Version matching**: Tests successful version validation +- ✅ **Version mismatch**: Tests error handling when versions don't match +- ✅ **Missing version**: Tests handling of pyproject.toml without version field +- ✅ **Missing project section**: Tests handling of pyproject.toml without project section +- ✅ **File not found**: Tests handling of non-existent files +- ✅ **Malformed TOML**: Tests handling of invalid TOML syntax +- ✅ **Argument validation**: Tests proper argument count validation +- ✅ **Semantic versioning**: Tests various semantic version formats +- ✅ **Pre-release tags**: Tests versions with alpha, beta, rc tags +- ✅ **Build metadata**: Tests versions with build metadata +- ✅ **Edge cases**: Tests empty versions and other edge cases + +**Total Tests**: 17+ test cases covering all functionality + +## Best Practices Implemented + +1. **Fixture Management**: Uses `setUp()` and `tearDown()` for clean test isolation +2. **Helper Methods**: Provides reusable helpers for creating test fixtures +3. **Temporary Files**: Uses `tempfile` for file creation with proper cleanup +4. **Comprehensive Coverage**: Tests happy paths, error conditions, and edge cases +5. **Clear Documentation**: Each test has a descriptive docstring +6. **Output Capture**: Uses `unittest.mock.patch` and `StringIO` to test stdout/stderr +7. **Exit Code Validation**: Properly tests script exit codes with `assertRaises(SystemExit)` +8. **Type Hints**: Uses type hints in helper methods for clarity +9. **PEP 8 Compliance**: Follows Python style guidelines +10. **Zero External Dependencies**: Uses only Python standard library + +## Continuous Integration + +These tests can be integrated into GitHub Actions workflows with no additional dependencies: + +```yaml +- name: Run .github scripts tests + run: | + cd .github/scripts/tests + python3 -m unittest discover -v +``` + +## Test Output Example + +``` +test_empty_version_string (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of empty version string. ... ok +test_file_not_found (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of non-existent pyproject.toml file. ... ok +test_malformed_toml (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of malformed TOML file. ... ok +test_matching_versions (test_get_pyproject_version.TestGetPyprojectVersion) +Test that matching versions result in success. ... ok +test_missing_project_section (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of pyproject.toml without a project section. ... ok +test_missing_version_field (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of pyproject.toml without a version field. ... ok +test_no_arguments (test_get_pyproject_version.TestGetPyprojectVersion) +Test that providing no arguments results in usage error. ... ok +test_semantic_version_0_0_1 (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version 0.0.1. ... ok +test_semantic_version_1_0_0 (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version 1.0.0. ... ok +test_semantic_version_10_20_30 (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version 10.20.30. ... ok +test_semantic_version_alpha (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version with alpha tag. ... ok +test_semantic_version_beta (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version with beta tag. ... ok +test_semantic_version_rc_with_build (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version with rc and build metadata. ... ok +test_too_few_arguments (test_get_pyproject_version.TestGetPyprojectVersion) +Test that providing too few arguments results in usage error. ... ok +test_too_many_arguments (test_get_pyproject_version.TestGetPyprojectVersion) +Test that providing too many arguments results in usage error. ... ok +test_version_mismatch (test_get_pyproject_version.TestGetPyprojectVersion) +Test that mismatched versions result in failure with appropriate error message. ... ok +test_version_with_build_metadata (test_get_pyproject_version.TestGetPyprojectVersion) +Test matching versions with build metadata. ... ok +test_version_with_prerelease_tags (test_get_pyproject_version.TestGetPyprojectVersion) +Test matching versions with pre-release tags like alpha, beta, rc. ... ok + +---------------------------------------------------------------------- +Ran 18 tests in 0.XXXs + +OK +``` diff --git a/.github/scripts/tests/__init__.py b/.github/scripts/tests/__init__.py new file mode 100644 index 00000000..cbc9d370 --- /dev/null +++ b/.github/scripts/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for .github/scripts.""" diff --git a/.github/scripts/tests/test_get_pyproject_version.py b/.github/scripts/tests/test_get_pyproject_version.py new file mode 100644 index 00000000..95c980a9 --- /dev/null +++ b/.github/scripts/tests/test_get_pyproject_version.py @@ -0,0 +1,340 @@ +""" +Comprehensive tests for get_pyproject_version.py script using unittest. + +This test suite covers: +- Version matching validation +- Error handling for missing versions +- Invalid input handling +- File not found scenarios +- Malformed TOML handling +""" + +import sys +import unittest +import tempfile +from pathlib import Path +from io import StringIO +from unittest.mock import patch + +# Add parent directory to path to import the module +sys.path.insert(0, str(Path(__file__).parent.parent)) + +# Import after path is modified +import get_pyproject_version + + +class TestGetPyprojectVersion(unittest.TestCase): + """Test suite for get_pyproject_version.py functionality.""" + + def setUp(self): + """Reset sys.argv before each test.""" + self.original_argv = sys.argv.copy() + + def tearDown(self): + """Restore sys.argv after each test.""" + sys.argv = self.original_argv + + def create_pyproject_toml(self, version: str) -> Path: + """Helper to create a temporary pyproject.toml file with a given version.""" + temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.toml', delete=False) + temp_file.write(f""" +[project] +name = "test-project" +version = "{version}" +description = "A test project" +""") + temp_file.close() + return Path(temp_file.name) + + def create_pyproject_toml_no_version(self) -> Path: + """Helper to create a pyproject.toml without a version field.""" + temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.toml', delete=False) + temp_file.write(""" +[project] +name = "test-project" +description = "A test project without version" +""") + temp_file.close() + return Path(temp_file.name) + + def create_pyproject_toml_no_project(self) -> Path: + """Helper to create a pyproject.toml without a project section.""" + temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.toml', delete=False) + temp_file.write(""" +[tool.poetry] +name = "test-project" +version = "1.0.0" +""") + temp_file.close() + return Path(temp_file.name) + + def create_malformed_toml(self) -> Path: + """Helper to create a malformed TOML file.""" + temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.toml', delete=False) + temp_file.write(""" +[project +name = "test-project +version = "1.0.0" +""") + temp_file.close() + return Path(temp_file.name) + + # Test: Successful version match + def test_matching_versions(self): + """Test that matching versions result in success.""" + pyproject_file = self.create_pyproject_toml("1.2.3") + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.2.3'] + + # Capture stdout + captured_output = StringIO() + with patch('sys.stdout', captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn("✅ Version consistency check passed: 1.2.3", captured_output.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Version mismatch + def test_version_mismatch(self): + """Test that mismatched versions result in failure with appropriate error message.""" + pyproject_file = self.create_pyproject_toml("1.2.3") + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.2.4'] + + # Capture stderr + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + error_output = captured_error.getvalue() + self.assertIn("❌ Version mismatch detected!", error_output) + self.assertIn("pyproject.toml version: 1.2.3", error_output) + self.assertIn("Expected version: 1.2.4", error_output) + self.assertIn("Please update pyproject.toml to version 1.2.4", error_output) + finally: + pyproject_file.unlink() + + # Test: Missing version in pyproject.toml + def test_missing_version_field(self): + """Test handling of pyproject.toml without a version field.""" + pyproject_file = self.create_pyproject_toml_no_version() + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.0.0'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("❌ ERROR: No version found in pyproject.toml", captured_error.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Missing project section + def test_missing_project_section(self): + """Test handling of pyproject.toml without a project section.""" + pyproject_file = self.create_pyproject_toml_no_project() + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.0.0'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("❌ ERROR: No version found in pyproject.toml", captured_error.getvalue()) + finally: + pyproject_file.unlink() + + # Test: File not found + def test_file_not_found(self): + """Test handling of non-existent pyproject.toml file.""" + sys.argv = ['get_pyproject_version.py', '/nonexistent/pyproject.toml', '1.0.0'] + + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + + # Test: Malformed TOML + def test_malformed_toml(self): + """Test handling of malformed TOML file.""" + pyproject_file = self.create_malformed_toml() + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.0.0'] + + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + finally: + pyproject_file.unlink() + + # Test: Incorrect number of arguments - too few + def test_too_few_arguments(self): + """Test that providing too few arguments results in usage error.""" + sys.argv = ['get_pyproject_version.py', 'pyproject.toml'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("Usage: python get_pyproject_version.py ", + captured_error.getvalue()) + + # Test: Incorrect number of arguments - too many + def test_too_many_arguments(self): + """Test that providing too many arguments results in usage error.""" + sys.argv = ['get_pyproject_version.py', 'pyproject.toml', '1.0.0', 'extra'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("Usage: python get_pyproject_version.py ", + captured_error.getvalue()) + + # Test: No arguments + def test_no_arguments(self): + """Test that providing no arguments results in usage error.""" + sys.argv = ['get_pyproject_version.py'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("Usage: python get_pyproject_version.py ", + captured_error.getvalue()) + + # Test: Version with pre-release tags + def test_version_with_prerelease_tags(self): + """Test matching versions with pre-release tags like alpha, beta, rc.""" + pyproject_file = self.create_pyproject_toml("1.2.3-rc.1") + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.2.3-rc.1'] + + captured_output = StringIO() + with patch('sys.stdout', captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn("✅ Version consistency check passed: 1.2.3-rc.1", captured_output.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Version with build metadata + def test_version_with_build_metadata(self): + """Test matching versions with build metadata.""" + pyproject_file = self.create_pyproject_toml("1.2.3+build.123") + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.2.3+build.123'] + + captured_output = StringIO() + with patch('sys.stdout', captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn("✅ Version consistency check passed: 1.2.3+build.123", captured_output.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Various semantic version formats + def test_semantic_version_0_0_1(self): + """Test semantic version 0.0.1.""" + self._test_version_format("0.0.1") + + def test_semantic_version_1_0_0(self): + """Test semantic version 1.0.0.""" + self._test_version_format("1.0.0") + + def test_semantic_version_10_20_30(self): + """Test semantic version 10.20.30.""" + self._test_version_format("10.20.30") + + def test_semantic_version_alpha(self): + """Test semantic version with alpha tag.""" + self._test_version_format("1.2.3-alpha") + + def test_semantic_version_beta(self): + """Test semantic version with beta tag.""" + self._test_version_format("1.2.3-beta.1") + + def test_semantic_version_rc_with_build(self): + """Test semantic version with rc and build metadata.""" + self._test_version_format("1.2.3-rc.1+build.456") + + def _test_version_format(self, version: str): + """Helper method to test various semantic version formats.""" + pyproject_file = self.create_pyproject_toml(version) + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), version] + + captured_output = StringIO() + with patch('sys.stdout', captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn(f"✅ Version consistency check passed: {version}", captured_output.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Empty version string + def test_empty_version_string(self): + """Test handling of empty version string.""" + pyproject_file = self.create_pyproject_toml("") + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.0.0'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + # Empty string is falsy, so it should trigger error + self.assertIn("❌", captured_error.getvalue()) + finally: + pyproject_file.unlink() + + +class TestSuiteInfo(unittest.TestCase): + """Test suite metadata.""" + + def test_suite_info(self): + """Display test suite information.""" + print("\n" + "="*70) + print("Test Suite: get_pyproject_version.py") + print("Framework: unittest (Python built-in)") + print("TOML Library: tomllib (Python 3.11+ built-in)") + print("="*70) + self.assertTrue(True) + + +if __name__ == '__main__': + # Run tests with verbose output + unittest.main(verbosity=2) diff --git a/.github/workflows/docker-publish-kasm.yml b/.github/workflows/docker-publish-kasm.yml new file mode 100644 index 00000000..d97dbf6f --- /dev/null +++ b/.github/workflows/docker-publish-kasm.yml @@ -0,0 +1,29 @@ +name: Build and Publish CUA Ubuntu Container + +on: + push: + branches: + - main + tags: + - "docker-kasm-v*.*.*" + paths: + - "libs/kasm/**" + - ".github/workflows/docker-publish-kasm.yml" + - ".github/workflows/docker-reusable-publish.yml" + pull_request: + paths: + - "libs/kasm/**" + - ".github/workflows/docker-publish-kasm.yml" + - ".github/workflows/docker-reusable-publish.yml" + +jobs: + publish: + uses: ./.github/workflows/docker-reusable-publish.yml + with: + image_name: cua-ubuntu + context_dir: libs/kasm + dockerfile_path: Dockerfile + tag_prefix: docker-kasm-v + docker_hub_org: trycua + secrets: + DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} diff --git a/.github/workflows/docker-publish-xfce.yml b/.github/workflows/docker-publish-xfce.yml new file mode 100644 index 00000000..fa64849e --- /dev/null +++ b/.github/workflows/docker-publish-xfce.yml @@ -0,0 +1,29 @@ +name: Build and Publish CUA XFCE Container + +on: + push: + branches: + - main + tags: + - "docker-xfce-v*.*.*" + paths: + - "libs/xfce/**" + - ".github/workflows/docker-publish-xfce.yml" + - ".github/workflows/docker-reusable-publish.yml" + pull_request: + paths: + - "libs/xfce/**" + - ".github/workflows/docker-publish-xfce.yml" + - ".github/workflows/docker-reusable-publish.yml" + +jobs: + publish: + uses: ./.github/workflows/docker-reusable-publish.yml + with: + image_name: cua-xfce + context_dir: libs/xfce + dockerfile_path: Dockerfile + tag_prefix: docker-xfce-v + docker_hub_org: trycua + secrets: + DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} diff --git a/.github/workflows/docker-reusable-publish.yml b/.github/workflows/docker-reusable-publish.yml new file mode 100644 index 00000000..3472883f --- /dev/null +++ b/.github/workflows/docker-reusable-publish.yml @@ -0,0 +1,155 @@ +name: Reusable Docker Publish Workflow + +on: + workflow_call: + inputs: + image_name: + description: "Name of the Docker image (e.g. cua-ubuntu, cua-xfce)" + required: true + type: string + context_dir: + description: "Directory containing the Dockerfile relative to workspace root (e.g. libs/kasm, libs/xfce)" + required: true + type: string + dockerfile_path: + description: "Path to Dockerfile relative to context_dir (e.g. Dockerfile)" + required: false + type: string + default: "Dockerfile" + tag_prefix: + description: "Prefix for semantic version tags (e.g. docker-kasm-v, docker-xfce-v)" + required: true + type: string + docker_hub_org: + description: "Docker Hub organization name" + required: false + type: string + default: "trycua" + secrets: + DOCKER_HUB_TOKEN: + required: true + +jobs: + build-and-push: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Prepare platform tag + id: platform + run: | + # Convert platform (e.g., linux/amd64) to a valid tag suffix (e.g., linux-amd64) + PLATFORM_TAG=$(echo "${{ matrix.platform }}" | sed 's/\//-/g') + echo "tag=${PLATFORM_TAG}" >> $GITHUB_OUTPUT + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ inputs.docker_hub_org }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + + - name: Extract metadata (PR) + if: github.event_name == 'pull_request' + id: meta-pr + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=raw,value=${{ github.sha }} + + - name: Extract metadata (main branch) + if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + id: meta-main + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=raw,value=latest + + - name: Extract metadata (semantic version tag) + if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + id: meta-semver + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=semver,pattern={{version}},prefix=${{ inputs.tag_prefix }} + type=semver,pattern={{major}}.{{minor}},prefix=${{ inputs.tag_prefix }} + type=semver,pattern={{major}},prefix=${{ inputs.tag_prefix }} + type=raw,value=latest + + - name: Build and push Docker image (PR) + if: github.event_name == 'pull_request' + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-pr.outputs.tags }} + labels: ${{ steps.meta-pr.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Build and push Docker image (main branch) + if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-main.outputs.tags }} + labels: ${{ steps.meta-main.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Build and push Docker image (semantic version tag) + if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-semver.outputs.tags }} + labels: ${{ steps.meta-semver.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Image digest + if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Image pushed with digest ${{ steps.meta-pr.outputs.digest }}" + elif [[ "${{ github.ref }}" == refs/tags/${{ inputs.tag_prefix }}* ]]; then + echo "Image pushed with digest ${{ steps.meta-semver.outputs.digest }}" + else + echo "Image pushed with digest ${{ steps.meta-main.outputs.digest }}" + fi + + - name: print image tags + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Image tags: ${{ steps.meta-pr.outputs.tags }}" + elif [[ "${{ github.ref }}" == refs/tags/${{ inputs.tag_prefix }}* ]]; then + echo "Image tags: ${{ steps.meta-semver.outputs.tags }}" + else + echo "Image tags: ${{ steps.meta-main.outputs.tags }}" + fi diff --git a/.github/workflows/pypi-reusable-publish.yml b/.github/workflows/pypi-reusable-publish.yml index f1eb045e..4a220610 100644 --- a/.github/workflows/pypi-reusable-publish.yml +++ b/.github/workflows/pypi-reusable-publish.yml @@ -71,6 +71,16 @@ jobs: echo "VERSION=${{ inputs.version }}" >> $GITHUB_ENV echo "version=${{ inputs.version }}" >> $GITHUB_OUTPUT + - name: Verify version consistency + run: | + # Install toml parser + pip install toml + + # Verify version matches using script (exits with error if mismatch) + python ${GITHUB_WORKSPACE}/.github/scripts/get_pyproject_version.py \ + ${{ inputs.package_dir }}/pyproject.toml \ + ${{ inputs.version }} + - name: Initialize PDM in package directory run: | # Make sure we're working with a properly initialized PDM project @@ -82,21 +92,6 @@ jobs: pdm lock fi - - name: Set version in package - run: | - cd ${{ inputs.package_dir }} - # Replace pdm bump with direct edit of pyproject.toml - if [[ "$OSTYPE" == "darwin"* ]]; then - # macOS version of sed needs an empty string for -i - sed -i '' "s/version = \".*\"/version = \"$VERSION\"/" pyproject.toml - else - # Linux version - sed -i "s/version = \".*\"/version = \"$VERSION\"/" pyproject.toml - fi - # Verify version was updated - echo "Updated version in pyproject.toml:" - grep "version =" pyproject.toml - # Conditional step for lume binary download (only for pylume package) - name: Download and setup lume binary if: inputs.is_lume_package diff --git a/.github/workflows/test-validation-script.yml b/.github/workflows/test-validation-script.yml new file mode 100644 index 00000000..cc11dda7 --- /dev/null +++ b/.github/workflows/test-validation-script.yml @@ -0,0 +1,36 @@ +name: Test valididation script + +on: + pull_request: + paths: + - '.github/scripts/**' + - '.github/workflows/test-scripts.yml' + push: + branches: + - main + paths: + - '.github/scripts/**' + - '.github/workflows/test-scripts.yml' + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest toml + + - name: Run tests + run: | + cd .github/scripts + pytest tests/ -v diff --git a/README.md b/README.md index e5451a0f..5f32c316 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ from computer import Computer async with Computer( os_type="linux", provider_type="cloud", - name="your-container-name", + name="your-sandbox-name", api_key="your-api-key" ) as computer: # Take screenshot @@ -192,7 +192,7 @@ Join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss i ## License -Cua is open-sourced under the MIT License - see the [LICENSE](LICENSE) file for details. +Cua is open-sourced under the MIT License - see the [LICENSE](LICENSE.md) file for details. Portions of this project, specifically components adapted from Kasm Technologies Inc., are also licensed under the MIT License. See [libs/kasm/LICENSE](libs/kasm/LICENSE) for details. @@ -223,3 +223,9 @@ This project is not affiliated with, endorsed by, or sponsored by Apple Inc., Ca Thank you to all our supporters! [![Stargazers over time](https://starchart.cc/trycua/cua.svg?variant=adaptive)](https://starchart.cc/trycua/cua) + +## Sponsors + +Thank you to all our [GitHub Sponsors](https://github.com/sponsors/trycua)! + +coderabbit-cli diff --git a/blog/introducing-cua-cloud-containers.md b/blog/introducing-cua-cloud-containers.md index 86cbd400..1555da43 100644 --- a/blog/introducing-cua-cloud-containers.md +++ b/blog/introducing-cua-cloud-containers.md @@ -1,8 +1,8 @@ -# Introducing Cua Cloud Containers: Computer-Use Agents in the Cloud +# Introducing Cua Cloud Sandbox: Computer-Use Agents in the Cloud *Published on May 28, 2025 by Francesco Bonacci* -Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./build-your-own-operator-on-macos-1), we showed you how to build your own Operator on macOS. In [Part 2](./build-your-own-operator-on-macos-2), we explored the cua-agent framework. Today, we're excited to introduce **Cua Cloud Containers** – the easiest way to deploy Computer-Use Agents at scale. +Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./build-your-own-operator-on-macos-1), we showed you how to build your own Operator on macOS. In [Part 2](./build-your-own-operator-on-macos-2), we explored the cua-agent framework. Today, we're excited to introduce **Cua Cloud Sandbox** – the easiest way to deploy Computer-Use Agents at scale.
@@ -10,9 +10,9 @@ Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./bui ## What is Cua Cloud? -Think of Cua Cloud as **Docker for Computer-Use Agents**. Instead of managing VMs, installing dependencies, and configuring environments, you can launch pre-configured cloud containers with a single command. Each container comes with a **full desktop environment** accessible via browser (via noVNC), all CUA-related dependencies pre-configured (with a PyAutoGUI-compatible server), and **pay-per-use pricing** that scales with your needs. +Think of Cua Cloud as **Docker for Computer-Use Agents**. Instead of managing VMs, installing dependencies, and configuring environments, you can launch pre-configured Cloud Sandbox instances with a single command. Each sandbox comes with a **full desktop environment** accessible via browser (via noVNC), all CUA-related dependencies pre-configured (with a PyAutoGUI-compatible server), and **pay-per-use pricing** that scales with your needs. -## Why Cua Cloud Containers? +## Why Cua Cloud Sandbox? Four months ago, we launched [**Lume**](https://github.com/trycua/cua/tree/main/libs/lume) and [**Cua**](https://github.com/trycua/cua) with the goal to bring sandboxed VMs and Computer-Use Agents on Apple Silicon. The developer's community response was incredible 🎉 @@ -40,7 +40,7 @@ export CUA_API_KEY=your_api_key_here export CUA_CONTAINER_NAME=my-agent-container ``` -### Step 2: Launch Your First Container +### Step 2: Launch Your First Sandbox ```python import asyncio @@ -80,7 +80,7 @@ We're launching with **three compute tiers** to match your workload needs: - **Medium** (2 vCPU, 8GB RAM) - Ideal for most production workloads - **Large** (8 vCPU, 32GB RAM) - Built for complex, resource-intensive operations -Each tier includes a **full Linux with Xfce desktop environment** with pre-configured browser, **secure VNC access** with SSL, persistent storage during your session, and automatic cleanup on termination. +Each tier includes a **full Linux with Xfce desktop environment** with pre-configured browser, **secure VNC access** with SSL, persistent storage during your session, and automatic cleanup on termination for sandboxes. ## How some customers are using Cua Cloud today @@ -202,23 +202,23 @@ asyncio.run(parallel_scraping()) ## Cost Optimization Tips -To optimize your costs, use appropriate container sizes for your workload and implement timeouts to prevent runaway tasks. Batch related operations together to minimize container spin-up time, and always remember to terminate containers when your work is complete. +To optimize your costs, use appropriate sandbox sizes for your workload and implement timeouts to prevent runaway tasks. Batch related operations together to minimize sandbox spin-up time, and always remember to terminate sandboxes when your work is complete. ## Security Considerations -Cua Cloud runs all containers in isolated environments with encrypted VNC connections. Your API keys are never exposed in trajectories. +Cua Cloud runs all sandboxes in isolated environments with encrypted VNC connections. Your API keys are never exposed in trajectories. ## What's Next for Cua Cloud We're just getting started! Here's what's coming in the next few months: -### Elastic Autoscaled Container Pools +### Elastic Autoscaled Sandbox Pools -Soon you'll be able to create elastic container pools that automatically scale based on demand. Define minimum and maximum container counts, and let Cua Cloud handle the rest. Perfect for batch processing, scheduled automations, and handling traffic spikes without manual intervention. +Soon you'll be able to create elastic sandbox pools that automatically scale based on demand. Define minimum and maximum sandbox counts, and let Cua Cloud handle the rest. Perfect for batch processing, scheduled automations, and handling traffic spikes without manual intervention. ### Windows and macOS Cloud Support -While we're launching with Linux containers, Windows and macOS cloud machines are coming soon. Run Windows-specific automations, test cross-platform workflows, or leverage macOS-exclusive applications – all in the cloud with the same simple API. +While we're launching with Linux sandboxes, Windows and macOS cloud machines are coming soon. Run Windows-specific automations, test cross-platform workflows, or leverage macOS-exclusive applications – all in the cloud with the same simple API. Stay tuned for updates and join our [**Discord**](https://discord.gg/cua-ai) to vote on which features you'd like to see first! diff --git a/blog/lume-to-containerization.md b/blog/lume-to-containerization.md index cf468e0e..7a779536 100644 --- a/blog/lume-to-containerization.md +++ b/blog/lume-to-containerization.md @@ -2,7 +2,7 @@ *Published on June 10, 2025 by Francesco Bonacci* -Yesterday, Apple announced their new [Containerization framework](https://github.com/apple/containerization) at WWDC. Since then, our Discord and X users have been asking what this means for Cua virtualization capabilities on Apple Silicon. We've been working in this space for months - from [Lume](https://github.com/trycua/cua/tree/main/libs/lume) to [Lumier](https://github.com/trycua/cua/tree/main/libs/lumier) to [Cua Cloud Containers](./introducing-cua-cloud-containers). Here's our take on Apple's announcement. +Yesterday, Apple announced their new [Containerization framework](https://github.com/apple/containerization) at WWDC. Since then, our Discord and X users have been asking what this means for Cua virtualization capabilities on Apple Silicon. We've been working in this space for months - from [Lume](https://github.com/trycua/cua/tree/main/libs/lume) to [Lumier](https://github.com/trycua/cua/tree/main/libs/lumier) to [Cua Cloud Sandbox](./introducing-cua-cloud-containers). Here's our take on Apple's announcement. ## Our Story @@ -168,7 +168,7 @@ Apple's announcement confirms we're on the right path. Here's what we're looking - [Apple Containerization Framework](https://github.com/apple/containerization) - [Lume - Direct VM Management](https://github.com/trycua/cua/tree/main/libs/lume) - [Lumier - Docker Interface for VMs](https://github.com/trycua/cua/tree/main/libs/lumier) -- [Cua Cloud Containers](https://trycua.com) +- [Cua Cloud Sandbox](https://trycua.com) - [Join our Discord](https://discord.gg/cua-ai) --- diff --git a/blog/sandboxed-python-execution.md b/blog/sandboxed-python-execution.md index 9261e955..c7b115c4 100644 --- a/blog/sandboxed-python-execution.md +++ b/blog/sandboxed-python-execution.md @@ -235,7 +235,7 @@ print(f"Security audit: {audit_result}") ### Desktop automation in the cloud -Here's where things get really interesting. Cua cloud containers come with full desktop environments, so you can automate GUIs: +Here's where things get really interesting. Cua Cloud Sandbox comes with full desktop environments, so you can automate GUIs: ```python @sandboxed("desktop_env") diff --git a/docs/content/docs/agent-sdk/agent-loops.mdx b/docs/content/docs/agent-sdk/agent-loops.mdx index 33bf66e2..db1d8455 100644 --- a/docs/content/docs/agent-sdk/agent-loops.mdx +++ b/docs/content/docs/agent-sdk/agent-loops.mdx @@ -15,20 +15,34 @@ To run an agent loop simply do: ```python from agent import ComputerAgent +import asyncio from computer import Computer -computer = Computer() # Connect to a cua container -agent = ComputerAgent( - model="anthropic/claude-3-5-sonnet-20241022", - tools=[computer] -) +async def take_screenshot(): + async with Computer( + os_type="linux", + provider_type="cloud", + name="your-sandbox-name", + api_key="your-api-key" + ) as computer: -prompt = "Take a screenshot and tell me what you see" + agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20241022", + tools=[computer], + max_trajectory_budget=5.0 + ) -async for result in agent.run(prompt): - if result["output"][-1]["type"] == "message": - print("Agent:", result["output"][-1]["content"][0]["text"]) + messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}] + + async for result in agent.run(messages): + for item in result["output"]: + if item["type"] == "message": + print(item["content"][0]["text"]) + + +if __name__ == "__main__": + asyncio.run(take_screenshot()) ``` For a list of supported models and configurations, see the [Supported Agents](./supported-agents/computer-use-agents) page. @@ -170,4 +184,4 @@ except BudgetExceededException: print("Budget limit exceeded") except Exception as e: print(f"Agent error: {e}") -``` \ No newline at end of file +``` diff --git a/docs/content/docs/computer-sdk/cloud-vm-management.mdx b/docs/content/docs/computer-sdk/cloud-vm-management.mdx new file mode 100644 index 00000000..a48984ff --- /dev/null +++ b/docs/content/docs/computer-sdk/cloud-vm-management.mdx @@ -0,0 +1,240 @@ +--- +title: Cloud VM Management +description: Manage your Cua Cloud sandboxes (VMs) via Python SDK or HTTP API +--- + +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +Use these concise examples to manage your cloud sandboxes. Pick either the Python SDK or plain HTTP (curl) for each action. + +> You need a CUA Database API key. Set it as an environment variable `CUA_API_KEY`. + +## Status values +- `pending` – VM deployment in progress +- `running` – VM is active and accessible +- `stopped` – VM is stopped but not terminated +- `terminated` – VM has been permanently destroyed +- `failed` – VM deployment or operation failed + +--- + +## List VMs + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + # Optional: point to a different API base + # os.environ["CUA_API_BASE"] = "https://api.cua.ai" + + provider = CloudProvider(api_key=api_key, verbose=False) + async with provider: + vms = await provider.list_vms() + for vm in vms: + print({ + "name": vm["name"], + "status": vm["status"], + "api_url": vm.get("api_url"), + "vnc_url": vm.get("vnc_url"), + }) + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms" + ``` + + Example response: + ```json + [ + { + "name": "s-windows-x4snp46ebf", + "status": "running" + } + ] + ``` + + + + +--- + +## Start a VM +Provide the VM name you want to start. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" # e.g., "m-linux-96lcxd2c2k" + + provider = CloudProvider(api_key=api_key) + async with provider: + resp = await provider.run_vm(name) + print(resp) # { "name": name, "status": "starting" } + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -X POST \ + -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms/my-vm-name/start" -i + ``` + + Example response headers (no body): + ```text + HTTP/1.1 204 No Content + ``` + + + + +--- + +## Stop a VM +Stops the VM asynchronously. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" + + provider = CloudProvider(api_key=api_key) + async with provider: + resp = await provider.stop_vm(name) + print(resp) # { "name": name, "status": "stopping" } + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -X POST \ + -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms/my-vm-name/stop" + ``` + + Example response: + ```json + { "status": "stopping" } + ``` + + + + +--- + +## Restart a VM +Restarts the VM asynchronously. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" + + provider = CloudProvider(api_key=api_key) + async with provider: + resp = await provider.restart_vm(name) + print(resp) # { "name": name, "status": "restarting" } + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -X POST \ + -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms/my-vm-name/restart" + ``` + + Example response: + ```json + { "status": "restarting" } + ``` + + + + +--- + +## Query a VM by name +Query the computer-server running on the VM. Useful for checking details like status or OS type. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" + + provider = CloudProvider(api_key=api_key) + async with provider: + info = await provider.get_vm(name) + print(info) + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl "https://my-vm-name.containers.cloud.cua.ai:8443/status" + ``` + + Example response: + ```json + { "status": "ok", "os_type": "linux", "features": ["agent"] } + ``` + + + diff --git a/docs/content/docs/computer-sdk/computers.mdx b/docs/content/docs/computer-sdk/computers.mdx index 0b11d20d..1c3558da 100644 --- a/docs/content/docs/computer-sdk/computers.mdx +++ b/docs/content/docs/computer-sdk/computers.mdx @@ -9,9 +9,11 @@ Before we can automate apps using AI, we need to first connect to a Computer Ser Cua Computers are preconfigured virtual machines running the Computer Server. They can be either macOS, Linux, or Windows. They're found in either a cloud-native container, or on your host desktop. -## cua cloud container +## Cloud Sandbox -This is a cloud container running the Computer Server. This is the easiest & safest way to get a cua computer, and can be done by going on the trycua.com website. +**Easiest & safest way to get started - works on any host OS** + +This is a Cloud Sandbox running the Computer Server. Get a container at [trycua.com](https://www.trycua.com/). @@ -21,11 +23,11 @@ This is a cloud container running the Computer Server. This is the easiest & saf computer = Computer( os_type="linux", provider_type="cloud", - name="your-container-name", + name="your-sandbox-name", api_key="your-api-key" ) - await computer.run() # Connect to the container + await computer.run() # Connect to the sandbox ``` @@ -35,33 +37,33 @@ This is a cloud container running the Computer Server. This is the easiest & saf const computer = new Computer({ osType: OSType.LINUX, - name: "your-container-name", + name: "your-sandbox-name", apiKey: "your-api-key" }); - await computer.run(); // Connect to the container + await computer.run(); // Connect to the sandbox ``` -## cua local containers +## Linux on Docker -cua provides local containers using different providers depending on your host operating system: +**Run Linux desktop locally on macOS, Windows, or Linux hosts** - - - - 1. Install lume cli +Cua provides two Docker images for running Linux desktops: + + + + + **Recommended for most use cases** - lightweight XFCE desktop with Firefox + + 1. Install Docker Desktop or Docker Engine + + 2. Pull the CUA XFCE image ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" - ``` - - 2. Start a local cua container - - ```bash - lume run macos-sequoia-cua:latest + docker pull --platform=linux/amd64 trycua/cua-xfce:latest ``` 3. Connect with Computer @@ -70,44 +72,23 @@ cua provides local containers using different providers depending on your host o from computer import Computer computer = Computer( - os_type="macos", - provider_type="lume", - name="macos-sequoia-cua:latest" + os_type="linux", + provider_type="docker", + image="trycua/cua-xfce:latest", + name="my-xfce-container" ) - await computer.run() # Launch & connect to the container + await computer.run() # Launch & connect to Docker sandbox ``` - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) - 2. Install pywinsandbox dependency + - ```bash - pip install -U git+git://github.com/karkason/pywinsandbox.git - ``` + **Full-featured Ubuntu desktop** with additional applications - 3. Windows Sandbox will be automatically configured when you run the CLI - - ```python - from computer import Computer - - computer = Computer( - os_type="windows", - provider_type="winsandbox", - ephemeral=True # Windows Sandbox is always ephemeral - ) - - await computer.run() # Launch & connect to Windows Sandbox - ``` - - - - 1. Install Docker Desktop or Docker Engine - 2. Build or pull the CUA Ubuntu container + 2. Build or pull the CUA KASM image ```bash # Option 1: Pull from Docker Hub @@ -127,15 +108,70 @@ cua provides local containers using different providers depending on your host o os_type="linux", provider_type="docker", image="trycua/cua-ubuntu:latest", - name="my-cua-container" + name="my-kasm-container" ) - await computer.run() # Launch & connect to Docker container + await computer.run() # Launch & connect to Docker sandbox ``` +## Windows Sandbox + +**Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11** + +1. Enable Windows Sandbox +2. Install pywinsandbox dependency + +```bash +pip install -U git+git://github.com/karkason/pywinsandbox.git +``` + +3. Connect with Computer + +```python +from computer import Computer + +computer = Computer( + os_type="windows", + provider_type="winsandbox", + ephemeral=True # Windows Sandbox is always ephemeral +) + +await computer.run() # Launch & connect to Windows Sandbox +``` + +## macOS VM + +**macOS hosts only - requires Lume CLI** + +1. Install lume cli + +```bash +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" +``` + +2. Start a local cua macOS VM + +```bash +lume run macos-sequoia-cua:latest +``` + +3. Connect with Computer + +```python +from computer import Computer + +computer = Computer( + os_type="macos", + provider_type="lume", + name="macos-sequoia-cua:latest" +) + +await computer.run() # Launch & connect to the sandbox +``` + ## Your host desktop You can also have agents control your desktop directly by running Computer Server without any containerization layer. Beware that AI models may perform risky actions. diff --git a/docs/content/docs/computer-sdk/meta.json b/docs/content/docs/computer-sdk/meta.json index 92e14612..f09c6057 100644 --- a/docs/content/docs/computer-sdk/meta.json +++ b/docs/content/docs/computer-sdk/meta.json @@ -3,6 +3,7 @@ "description": "Build computer-using agents with the Computer SDK", "pages": [ "computers", + "cloud-vm-management", "commands", "computer-ui", "sandboxed-python" diff --git a/docs/content/docs/meta.json b/docs/content/docs/meta.json index 9aea034a..bee0e1c8 100644 --- a/docs/content/docs/meta.json +++ b/docs/content/docs/meta.json @@ -5,9 +5,8 @@ "defaultOpen": true, "pages": [ "index", - "quickstart-ui", - "quickstart-cli", "quickstart-devs", + "quickstart-cli", "telemetry", "---[BookCopy]Computer Playbook---", "...computer-sdk", diff --git a/docs/content/docs/quickstart-cli.mdx b/docs/content/docs/quickstart-cli.mdx index a22907ce..7bf53773 100644 --- a/docs/content/docs/quickstart-cli.mdx +++ b/docs/content/docs/quickstart-cli.mdx @@ -23,39 +23,45 @@ cua combines Computer (interface) + Agent (AI) for automating desktop apps. The ## Set Up Your Computer Environment -Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup: +Choose how you want to run your cua computer. **Cloud Sandbox is recommended** for the easiest setup: + + + + + **Easiest & safest way to get started - works on any host OS** - - - - **Easiest & safest way to get started** - 1. Go to [trycua.com/signin](https://www.trycua.com/signin) 2. Navigate to **Dashboard > Containers > Create Instance** 3. Create a **Medium, Ubuntu 22** container 4. Note your container name and API key - + Your cloud container will be automatically configured and ready to use. - - - 1. Install lume cli + + + **Run Linux desktop locally on macOS, Windows, or Linux hosts** + + 1. Install Docker Desktop or Docker Engine + + 2. Pull the CUA XFCE container (lightweight desktop) ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" + docker pull --platform=linux/amd64 trycua/cua-xfce:latest ``` - 2. Start a local cua container + Or use KASM for a full-featured desktop: ```bash - lume run macos-sequoia-cua:latest + docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest ``` - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) + + + **Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11** + + 1. Enable Windows Sandbox 2. Install pywinsandbox dependency ```bash @@ -65,14 +71,20 @@ Choose how you want to run your cua computer. **Cloud containers are recommended 3. Windows Sandbox will be automatically configured when you run the CLI - - - 1. Install Docker Desktop or Docker Engine + - 2. Pull the CUA Ubuntu container + **macOS hosts only - requires Lume CLI** + + 1. Install lume cli ```bash - docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" + ``` + + 2. Start a local cua macOS VM + + ```bash + lume run macos-sequoia-cua:latest ``` @@ -300,7 +312,7 @@ python -m agent.cli omniparser+ollama_chat/llama3.2:latest If you haven't set up environment variables, the CLI will guide you through the setup: -1. **Container Name**: Enter your cua container name (or get one at [trycua.com](https://www.trycua.com/)) +1. **Sandbox Name**: Enter your cua sandbox name (or get one at [trycua.com](https://www.trycua.com/)) 2. **CUA API Key**: Enter your cua API key 3. **Provider API Key**: Enter your AI provider API key (OpenAI, Anthropic, etc.) diff --git a/docs/content/docs/quickstart-devs.mdx b/docs/content/docs/quickstart-devs.mdx index 37367709..8a9adea7 100644 --- a/docs/content/docs/quickstart-devs.mdx +++ b/docs/content/docs/quickstart-devs.mdx @@ -1,61 +1,60 @@ --- -title: Quickstart (for Developers) -description: Get started with cua in 5 steps +title: Quickstart +description: Get started with Cua in three steps icon: Rocket --- import { Step, Steps } from 'fumadocs-ui/components/steps'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; -Get up and running with cua in 5 simple steps. +This quickstart guides you through setting up your [computer environment](#set-up-your-computer-environment), programmatic control with a [Cua computer](#using-computer), and task automation with a [Cua agent](#using-agent): - - -## Introduction - -cua combines Computer (interface) + Agent (AI) for automating desktop apps. Computer handles clicks/typing, Agent provides the intelligence. - - ## Set Up Your Computer Environment -Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup: +Choose how you want to run your Cua computer. This will be the environment where your automated tasks will execute. + +You can run your Cua computer in the cloud (recommended for easiest setup), locally on macOS with Lume, locally on Windows with a Windows Sandbox, or in a Docker container on any platform. Choose the option that matches your system and needs. + + + + + Cua Cloud Sandbox provides virtual machines that run Ubuntu. - - - - **Easiest & safest way to get started** - 1. Go to [trycua.com/signin](https://www.trycua.com/signin) 2. Navigate to **Dashboard > Containers > Create Instance** - 3. Create a **Medium, Ubuntu 22** container - 4. Note your container name and API key - - Your cloud container will be automatically configured and ready to use. + 3. Create a **Medium, Ubuntu 22** sandbox + 4. Note your sandbox name and API key + + Your Cloud Sandbox will be automatically configured and ready to use. - + + + Lume containers are macOS virtual machines that run on a macOS host machine. - 1. Install lume cli + 1. Install the Lume CLI: ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" ``` - 2. Start a local cua container + 2. Start a local Cua sandbox: ```bash lume run macos-sequoia-cua:latest ``` - + + + Windows Sandbox provides Windows virtual environments that run on a Windows host machine. - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) - 2. Install pywinsandbox dependency + 1. Enable [Windows Sandbox](https://learn.microsoft.com/en-us/windows/security/application-security/application-isolation/windows-sandbox/windows-sandbox-install) (requires Windows 10 Pro/Enterprise or Windows 11) + 2. Install the `pywinsandbox` dependency: ```bash pip install -U git+git://github.com/karkason/pywinsandbox.git @@ -64,11 +63,13 @@ Choose how you want to run your cua computer. **Cloud containers are recommended 3. Windows Sandbox will be automatically configured when you run the CLI - - - 1. Install Docker Desktop or Docker Engine + - 2. Pull the CUA Ubuntu container + Docker provides a way to run Ubuntu containers on any host machine. + + 1. Install Docker Desktop or Docker Engine: + + 2. Pull the CUA Ubuntu sandbox: ```bash docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest @@ -81,90 +82,203 @@ Choose how you want to run your cua computer. **Cloud containers are recommended -## Install cua +## Using Computer + +Connect to your Cua computer and perform basic interactions, such as taking screenshots or simulating user input. + Install the Cua computer Python SDK: ```bash - pip install "cua-agent[all]" cua-computer + pip install cua-computer + ``` - # or install specific providers - pip install "cua-agent[openai]" # OpenAI computer-use-preview support - pip install "cua-agent[anthropic]" # Anthropic Claude support - pip install "cua-agent[omni]" # Omniparser + any LLM support - pip install "cua-agent[uitars]" # UI-TARS - pip install "cua-agent[uitars-mlx]" # UI-TARS + MLX support - pip install "cua-agent[uitars-hf]" # UI-TARS + Huggingface support - pip install "cua-agent[glm45v-hf]" # GLM-4.5V + Huggingface support - pip install "cua-agent[ui]" # Gradio UI support + Then, connect to your desired computer environment: + + + + ```python + from computer import Computer + + computer = Computer( + os_type="linux", + provider_type="cloud", + name="your-sandbox-name", + api_key="your-api-key" + ) + await computer.run() # Connect to the sandbox + ``` + + + ```python + from computer import Computer + + computer = Computer( + os_type="macos", + provider_type="lume", + name="macos-sequoia-cua:latest" + ) + await computer.run() # Launch & connect to the container + ``` + + + ```python + from computer import Computer + + computer = Computer( + os_type="windows", + provider_type="windows_sandbox" + ) + await computer.run() # Launch & connect to the container + ``` + + + ```python + from computer import Computer + + computer = Computer( + os_type="linux", + provider_type="docker", + name="trycua/cua-ubuntu:latest" + ) + await computer.run() # Launch & connect to the container + ``` + + + Install and run `cua-computer-server`: + ```bash + pip install cua-computer-server + python -m computer_server + ``` + + Then, use the `Computer` object to connect: + ```python + from computer import Computer + + computer = Computer(use_host_computer_server=True) + await computer.run() # Connect to the host desktop + ``` + + + + Once connected, you can perform interactions: + ```python + try: + # Take a screenshot of the computer's current display + screenshot = await computer.interface.screenshot() + # Simulate a left-click at coordinates (100, 100) + await computer.interface.left_click(100, 100) + # Type "Hello!" into the active application + await computer.interface.type("Hello!") + finally: + await computer.close() ``` + Install the Cua computer TypeScript SDK: ```bash npm install @trycua/computer ``` - - - + Then, connect to your desired computer environment: - + + + ```typescript + import { Computer, OSType } from '@trycua/computer'; -## Using Computer + const computer = new Computer({ + osType: OSType.LINUX, + name: "your-sandbox-name", + apiKey: "your-api-key" + }); + await computer.run(); // Connect to the sandbox + ``` + + + ```typescript + import { Computer, OSType, ProviderType } from '@trycua/computer'; - - - ```python - from computer import Computer + const computer = new Computer({ + osType: OSType.MACOS, + providerType: ProviderType.LUME, + name: "macos-sequoia-cua:latest" + }); + await computer.run(); // Launch & connect to the container + ``` + + + ```typescript + import { Computer, OSType, ProviderType } from '@trycua/computer'; - async with Computer( - os_type="linux", - provider_type="cloud", - name="your-container-name", - api_key="your-api-key" - ) as computer: - # Take screenshot - screenshot = await computer.interface.screenshot() + const computer = new Computer({ + osType: OSType.WINDOWS, + providerType: ProviderType.WINDOWS_SANDBOX + }); + await computer.run(); // Launch & connect to the container + ``` + + + ```typescript + import { Computer, OSType, ProviderType } from '@trycua/computer'; - # Click and type - await computer.interface.left_click(100, 100) - await computer.interface.type("Hello!") - ``` + const computer = new Computer({ + osType: OSType.LINUX, + providerType: ProviderType.DOCKER, + name: "trycua/cua-ubuntu:latest" + }); + await computer.run(); // Launch & connect to the container + ``` + + + First, install and run `cua-computer-server`: + ```bash + pip install cua-computer-server + python -m computer_server + ``` - - + Then, use the `Computer` object to connect: + ```typescript + import { Computer } from '@trycua/computer'; + + const computer = new Computer({ useHostComputerServer: true }); + await computer.run(); // Connect to the host desktop + ``` + + + + Once connected, you can perform interactions: ```typescript - import { Computer, OSType } from '@trycua/computer'; - - const computer = new Computer({ - osType: OSType.LINUX, - name: "your-container-name", - apiKey: "your-api-key" - }); - - await computer.run(); - try { - // Take screenshot + // Take a screenshot of the computer's current display const screenshot = await computer.interface.screenshot(); - - // Click and type + // Simulate a left-click at coordinates (100, 100) await computer.interface.leftClick(100, 100); + // Type "Hello!" into the active application await computer.interface.typeText("Hello!"); } finally { await computer.close(); } ``` - +Learn more about computers in the [Cua computers documentation](/computer-sdk/computers). You will see how to automate computers with agents in the next step. + ## Using Agent +Utilize an Agent to automate complex tasks by providing it with a goal and allowing it to interact with the computer environment. + +Install the Cua agent Python SDK: +```bash +pip install "cua-agent[all]" +``` + +Then, use the `ComputerAgent` object: ```python from agent import ComputerAgent @@ -182,12 +296,13 @@ async for result in agent.run(messages): print(item["content"][0]["text"]) ``` +Learn more about agents in [Agent Loops](/agent-sdk/agent-loops) and available models in [Supported Models](/agent-sdk/supported-model-providers/). + ## Next Steps -{/* - Explore the [SDK documentation](/sdk) for advanced features */} - -- Learn about [trajectory tracking](/agent-sdk/callbacks/trajectories) and [callbacks](/agent-sdk/callbacks/agent-lifecycle) -- Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for support +- Learn more about [Cua computers](/computer-sdk/computers) and [computer commands](/computer-sdk/commands) +- Read about [Agent loops](/agent-sdk/agent-loops), [tools](/agent-sdk/custom-tools), and [supported model providers](/agent-sdk/supported-model-providers/) +- Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for help diff --git a/docs/content/docs/quickstart-ui.mdx b/docs/content/docs/quickstart-ui.mdx deleted file mode 100644 index 72bac935..00000000 --- a/docs/content/docs/quickstart-ui.mdx +++ /dev/null @@ -1,216 +0,0 @@ ---- -title: Quickstart (GUI) -description: Get started with the cua Agent UI in 3 steps -icon: Rocket ---- - -import { Step, Steps } from 'fumadocs-ui/components/steps'; -import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; -import { Accordion, Accordions } from 'fumadocs-ui/components/accordion'; - -Get up and running with the cua Agent UI in 3 simple steps. - - - - -## Introduction - -cua combines Computer (interface) + Agent (AI) for automating desktop apps. The Agent UI provides a simple chat interface to control your remote computer using natural language. - - - - - -## Set Up Your Computer Environment - -Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup: - - - - - **Easiest & safest way to get started** - - 1. Go to [trycua.com/signin](https://www.trycua.com/signin) - 2. Navigate to **Dashboard > Containers > Create Instance** - 3. Create a **Medium, Ubuntu 22** container - 4. Note your container name and API key - - Your cloud container will be automatically configured and ready to use. - - - - - 1. Install lume cli - - ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" - ``` - - 2. Start a local cua container - - ```bash - lume run macos-sequoia-cua:latest - ``` - - - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) - 2. Install pywinsandbox dependency - - ```bash - pip install -U git+git://github.com/karkason/pywinsandbox.git - ``` - - 3. Windows Sandbox will be automatically configured when you run the CLI - - - - - 1. Install Docker Desktop or Docker Engine - - 2. Pull the CUA Ubuntu container - - ```bash - docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest - ``` - - - - - - - - -## Install and Run cua - - - - - -### Install uv - - - - -```bash -# Use curl to download the script and execute it with sh: -curl -LsSf https://astral.sh/uv/install.sh | sh - -# If your system doesn't have curl, you can use wget: -# wget -qO- https://astral.sh/uv/install.sh | sh -``` - - - - -```powershell -# Use irm to download the script and execute it with iex: -powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" -``` - - - - -### Install Python 3.12 - -```bash -uv python install 3.12 -``` - -### Run cua - -```bash -uv run --with "cua-agent[ui]" -m agent.ui -``` - - - - - -### Install conda - - - - -```bash -mkdir -p ~/miniconda3 -curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh -o ~/miniconda3/miniconda.sh -bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 -rm ~/miniconda3/miniconda.sh -source ~/miniconda3/bin/activate -``` - - - - -```bash -mkdir -p ~/miniconda3 -wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh -bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 -rm ~/miniconda3/miniconda.sh -source ~/miniconda3/bin/activate -``` - - - - -```powershell -wget "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe" -outfile ".\miniconda.exe" -Start-Process -FilePath ".\miniconda.exe" -ArgumentList "/S" -Wait -del .\miniconda.exe -``` - - - - -### Create and activate Python 3.12 environment - -```bash -conda create -n cua python=3.12 -conda activate cua -``` - -### Install and run cua - -```bash -pip install "cua-agent[ui]" cua-computer -python -m agent.ui -``` - - - - - -### Install cua - -```bash -pip install "cua-agent[ui]" cua-computer -``` - -### Run the Agent UI - -```bash -python -m agent.ui -``` - - - - - -### Start Chatting - -Open your browser to the displayed URL and start chatting with your computer-using agent. - -You can ask your agent to perform actions like: - -- "Open Firefox and go to github.com" -- "Take a screenshot and tell me what's on the screen" -- "Type 'Hello world' into the terminal" - - - - ---- - -For advanced Python usage, see the [Quickstart for Developers](/quickstart-devs). diff --git a/examples/cloud_api_examples.py b/examples/cloud_api_examples.py new file mode 100644 index 00000000..88b1ea66 --- /dev/null +++ b/examples/cloud_api_examples.py @@ -0,0 +1,70 @@ +import asyncio +import os +from utils import load_dotenv_files + +load_dotenv_files() + +from computer.providers.cloud.provider import CloudProvider + +async def main() -> None: + api_key = os.getenv("CUA_API_KEY") + if not api_key: + raise RuntimeError("CUA_API_KEY environment variable is not set") + api_base = os.getenv("CUA_API_BASE") + if api_base: + print(f"Using API base: {api_base}") + + provider = CloudProvider(api_key=api_key, verbose=True) + async with provider: + + # List all VMs + vms = await provider.list_vms() + print(f"Found {len(vms)} VM(s)") + for vm in vms: + print( + f"name: {vm['name']}\n", + f"status: {vm['status']}\n", # pending, running, stopped, terminated, failed + f"api_url: {vm.get('api_url')}\n", + f"vnc_url: {vm.get('vnc_url')}\n", + ) + + # # --- Additional operations (commented out) --- + # # To stop a VM by name: + # name = "m-linux-96lcxd2c2k" + # resp = await provider.stop_vm(name) + # print( + # "stop_vm response:\n", + # f"name: {resp['name']}\n", + # f"status: {resp['status']}\n", # stopping + # ) + + # # To start a VM by name: + # name = "m-linux-96lcxd2c2k" + # resp = await provider.run_vm(name) + # print( + # "run_vm response:\n", + # f"name: {resp['name']}\n", + # f"status: {resp['status']}\n", # starting + # ) + + # # To restart a VM by name: + # name = "m-linux-96lcxd2c2k" + # resp = await provider.restart_vm(name) + # print( + # "restart_vm response:\n", + # f"name: {resp['name']}\n", + # f"status: {resp['status']}\n", # restarting + # ) + + # # To probe a VM's status via its public hostname (if you know the name): + # name = "m-linux-96lcxd2c2k" + # info = await provider.get_vm(name) + # print("get_vm info:\n", + # f"name: {info['name']}\n", + # f"status: {info['status']}\n", # running + # f"api_url: {info.get('api_url')}\n", + # f"os_type: {info.get('os_type')}\n", + # ) + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/computer-example-ts/README.md b/examples/computer-example-ts/README.md index 500362c9..7e7fc81e 100644 --- a/examples/computer-example-ts/README.md +++ b/examples/computer-example-ts/README.md @@ -1,13 +1,13 @@ # cua-cloud-openai Example -This example demonstrates how to control a cua Cloud container using the OpenAI `computer-use-preview` model and the `@trycua/computer` TypeScript library. +This example demonstrates how to control a Cua Cloud Sandbox using the OpenAI `computer-use-preview` model and the `@trycua/computer` TypeScript library. ## Overview -- Connects to a cua Cloud container via the `@trycua/computer` library +- Connects to a Cua Cloud Sandbox via the `@trycua/computer` library - Sends screenshots and instructions to OpenAI's computer-use model -- Executes AI-generated actions (clicks, typing, etc.) inside the container -- Designed for Linux containers, but can be adapted for other OS types +- Executes AI-generated actions (clicks, typing, etc.) inside the sandbox +- Designed for Linux sandboxes, but can be adapted for other OS types ## Getting Started @@ -20,8 +20,8 @@ This example demonstrates how to control a cua Cloud container using the OpenAI 2. **Set up environment variables:** Create a `.env` file with the following variables: - `OPENAI_API_KEY` — your OpenAI API key - - `CUA_API_KEY` — your cua Cloud API key - - `CUA_CONTAINER_NAME` — the name of your provisioned container + - `CUA_API_KEY` — your Cua Cloud API key + - `CUA_CONTAINER_NAME` — the name of your provisioned sandbox 3. **Run the example:** @@ -38,7 +38,7 @@ This example demonstrates how to control a cua Cloud container using the OpenAI For a step-by-step tutorial and more detailed explanation, see the accompanying blog post: -➡️ [Controlling a cua Cloud Container with JavaScript](https://placeholder-url-to-blog-post.com) +➡️ [Controlling a Cua Cloud Sandbox with JavaScript](https://placeholder-url-to-blog-post.com) _(This link will be updated once the article is published.)_ diff --git a/libs/python/agent/agent/cli.py b/libs/python/agent/agent/cli.py index 0ea840d2..7edc99e1 100644 --- a/libs/python/agent/agent/cli.py +++ b/libs/python/agent/agent/cli.py @@ -226,6 +226,13 @@ Examples: help="Model string (e.g., 'openai/computer-use-preview', 'anthropic/claude-3-5-sonnet-20241022')" ) + parser.add_argument( + "--provider", + choices=["cloud", "lume", "winsandbox", "docker"], + default="cloud", + help="Computer provider to use: cloud (default), lume, winsandbox, or docker" + ) + parser.add_argument( "--images", type=int, @@ -257,6 +264,12 @@ Examples: help="Initial prompt to send to the agent. Leave blank for interactive mode." ) + parser.add_argument( + "--prompt-file", + type=Path, + help="Path to a UTF-8 text file whose contents will be used as the initial prompt. If provided, overrides --prompt." + ) + parser.add_argument( "--predict-click", dest="predict_click", @@ -289,33 +302,35 @@ Examples: container_name = os.getenv("CUA_CONTAINER_NAME") cua_api_key = os.getenv("CUA_API_KEY") - # Prompt for missing environment variables + # Prompt for missing environment variables (container name always required) if not container_name: - print_colored("CUA_CONTAINER_NAME not set.", dim=True) - print_colored("You can get a CUA container at https://www.trycua.com/", dim=True) - container_name = input("Enter your CUA container name: ").strip() - if not container_name: - print_colored("❌ Container name is required.") - sys.exit(1) - - if not cua_api_key: + if args.provider == "cloud": + print_colored("CUA_CONTAINER_NAME not set.", dim=True) + print_colored("You can get a CUA container at https://www.trycua.com/", dim=True) + container_name = input("Enter your CUA container name: ").strip() + if not container_name: + print_colored("❌ Container name is required.") + sys.exit(1) + else: + container_name = "cli-sandbox" + + # Only require API key for cloud provider + if args.provider == "cloud" and not cua_api_key: print_colored("CUA_API_KEY not set.", dim=True) cua_api_key = input("Enter your CUA API key: ").strip() if not cua_api_key: - print_colored("❌ API key is required.") + print_colored("❌ API key is required for cloud provider.") sys.exit(1) # Check for provider-specific API keys based on model provider_api_keys = { "openai/": "OPENAI_API_KEY", "anthropic/": "ANTHROPIC_API_KEY", - "omniparser+": "OPENAI_API_KEY", - "omniparser+": "ANTHROPIC_API_KEY", } # Find matching provider and check for API key for prefix, env_var in provider_api_keys.items(): - if args.model.startswith(prefix): + if prefix in args.model: if not os.getenv(env_var): print_colored(f"{env_var} not set.", dim=True) api_key = input(f"Enter your {env_var.replace('_', ' ').title()}: ").strip() @@ -335,13 +350,25 @@ Examples: print_colored("Make sure agent and computer libraries are installed.", Colors.YELLOW) sys.exit(1) + # Resolve provider -> os_type, provider_type, api key requirement + provider_map = { + "cloud": ("linux", "cloud", True), + "lume": ("macos", "lume", False), + "winsandbox": ("windows", "winsandbox", False), + "docker": ("linux", "docker", False), + } + os_type, provider_type, needs_api_key = provider_map[args.provider] + + computer_kwargs = { + "os_type": os_type, + "provider_type": provider_type, + "name": container_name, + } + if needs_api_key: + computer_kwargs["api_key"] = cua_api_key # type: ignore + # Create computer instance - async with Computer( - os_type="linux", - provider_type="cloud", - name=container_name, - api_key=cua_api_key - ) as computer: + async with Computer(**computer_kwargs) as computer: # type: ignore # Create agent agent_kwargs = { @@ -442,8 +469,17 @@ Examples: # Done sys.exit(0) + # Resolve initial prompt from --prompt-file or --prompt + initial_prompt = args.prompt or "" + if args.prompt_file: + try: + initial_prompt = args.prompt_file.read_text(encoding="utf-8") + except Exception as e: + print_colored(f"❌ Failed to read --prompt-file: {e}", Colors.RED, bold=True) + sys.exit(1) + # Start chat loop (default interactive mode) - await chat_loop(agent, args.model, container_name, args.prompt, args.usage) + await chat_loop(agent, args.model, container_name, initial_prompt, args.usage) diff --git a/libs/python/agent/pyproject.toml b/libs/python/agent/pyproject.toml index db057570..d8ee534b 100644 --- a/libs/python/agent/pyproject.toml +++ b/libs/python/agent/pyproject.toml @@ -68,7 +68,7 @@ cli = [ "yaspin>=3.1.0", ] hud = [ - "hud-python==0.4.26", + "hud-python==0.4.52", ] gemini = [ "google-genai>=1.41.0", @@ -91,7 +91,7 @@ all = [ # cli requirements "yaspin>=3.1.0", # hud requirements - "hud-python==0.4.26", + "hud-python==0.4.52", # gemini requirements "google-genai>=1.41.0", ] diff --git a/libs/python/computer-server/pyproject.toml b/libs/python/computer-server/pyproject.toml index 6e9e7240..941f43c5 100644 --- a/libs/python/computer-server/pyproject.toml +++ b/libs/python/computer-server/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-computer-server" -version = "0.1.0" +version = "0.1.24" description = "Server component for the Computer-Use Interface (CUI) framework powering Cua" authors = [ { name = "TryCua", email = "gh@trycua.com" } diff --git a/libs/python/computer/computer/computer.py b/libs/python/computer/computer/computer.py index 6841f2f2..0d132ab0 100644 --- a/libs/python/computer/computer/computer.py +++ b/libs/python/computer/computer/computer.py @@ -1,3 +1,4 @@ +import traceback from typing import Optional, List, Literal, Dict, Any, Union, TYPE_CHECKING, cast import asyncio from .models import Computer as ComputerConfig, Display @@ -451,6 +452,7 @@ class Computer: raise RuntimeError(f"VM failed to become ready: {wait_error}") except Exception as e: self.logger.error(f"Failed to initialize computer: {e}") + self.logger.error(traceback.format_exc()) raise RuntimeError(f"Failed to initialize computer: {e}") try: @@ -558,6 +560,102 @@ class Computer: self.logger.debug(f"Computer stop process took {duration_ms:.2f}ms") return + async def start(self) -> None: + """Start the computer.""" + await self.run() + + async def restart(self) -> None: + """Restart the computer. + + If using a VM provider that supports restart, this will issue a restart + without tearing down the provider context, then reconnect the interface. + Falls back to stop()+run() when a provider restart is not available. + """ + # Host computer server: just disconnect and run again + if self.use_host_computer_server: + try: + await self.disconnect() + finally: + await self.run() + return + + # If no VM provider context yet, fall back to full run + if not getattr(self, "_provider_context", None) or self.config.vm_provider is None: + self.logger.info("No provider context active; performing full restart via run()") + await self.run() + return + + # Gracefully close current interface connection if present + if self._interface: + try: + self._interface.close() + except Exception as e: + self.logger.debug(f"Error closing interface prior to restart: {e}") + + # Attempt provider-level restart if implemented + try: + storage_param = "ephemeral" if self.ephemeral else self.storage + if hasattr(self.config.vm_provider, "restart_vm"): + self.logger.info(f"Restarting VM {self.config.name} via provider...") + await self.config.vm_provider.restart_vm(name=self.config.name, storage=storage_param) + else: + # Fallback: stop then start without leaving provider context + self.logger.info(f"Provider has no restart_vm; performing stop+start for {self.config.name}...") + await self.config.vm_provider.stop_vm(name=self.config.name, storage=storage_param) + await self.config.vm_provider.run_vm(image=self.image, name=self.config.name, run_opts={}, storage=storage_param) + except Exception as e: + self.logger.error(f"Failed to restart VM via provider: {e}") + # As a last resort, do a full stop (with provider context exit) and run + try: + await self.stop() + finally: + await self.run() + return + + # Wait for VM to be ready and reconnect interface + try: + self.logger.info("Waiting for VM to be ready after restart...") + if self.provider_type == VMProviderType.LUMIER: + max_retries = 60 + retry_delay = 3 + else: + max_retries = 30 + retry_delay = 2 + ip_address = await self.get_ip(max_retries=max_retries, retry_delay=retry_delay) + + self.logger.info(f"Re-initializing interface for {self.os_type} at {ip_address}") + from .interface.base import BaseComputerInterface + + if self.provider_type == VMProviderType.CLOUD and self.api_key and self.config.name: + self._interface = cast( + BaseComputerInterface, + InterfaceFactory.create_interface_for_os( + os=self.os_type, + ip_address=ip_address, + api_key=self.api_key, + vm_name=self.config.name, + ), + ) + else: + self._interface = cast( + BaseComputerInterface, + InterfaceFactory.create_interface_for_os( + os=self.os_type, + ip_address=ip_address, + ), + ) + + self.logger.info("Connecting to WebSocket interface after restart...") + await self._interface.wait_for_ready(timeout=30) + self.logger.info("Computer reconnected and ready after restart") + except Exception as e: + self.logger.error(f"Failed to reconnect after restart: {e}") + # Try a full reset if reconnection failed + try: + await self.stop() + finally: + await self.run() + # @property async def get_ip(self, max_retries: int = 15, retry_delay: int = 3) -> str: """Get the IP address of the VM or localhost if using host computer server. diff --git a/libs/python/computer/computer/providers/base.py b/libs/python/computer/computer/providers/base.py index 23526097..0c36c913 100644 --- a/libs/python/computer/computer/providers/base.py +++ b/libs/python/computer/computer/providers/base.py @@ -2,7 +2,9 @@ import abc from enum import StrEnum -from typing import Dict, List, Optional, Any, AsyncContextManager +from typing import Dict, Optional, Any, AsyncContextManager + +from .types import ListVMsResponse class VMProviderType(StrEnum): @@ -42,8 +44,13 @@ class BaseVMProvider(AsyncContextManager): pass @abc.abstractmethod - async def list_vms(self) -> List[Dict[str, Any]]: - """List all available VMs.""" + async def list_vms(self) -> ListVMsResponse: + """List all available VMs. + + Returns: + ListVMsResponse: A list of minimal VM objects as defined in + `computer.providers.types.MinimalVM`. + """ pass @abc.abstractmethod @@ -76,6 +83,20 @@ class BaseVMProvider(AsyncContextManager): """ pass + @abc.abstractmethod + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + """Restart a VM by name. + + Args: + name: Name of the VM to restart + storage: Optional storage path override. If provided, this will be used + instead of the provider's default storage path. + + Returns: + Dictionary with VM restart status and information + """ + pass + @abc.abstractmethod async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: """Update VM configuration. diff --git a/libs/python/computer/computer/providers/cloud/provider.py b/libs/python/computer/computer/providers/cloud/provider.py index 1cfba161..5e4e7c51 100644 --- a/libs/python/computer/computer/providers/cloud/provider.py +++ b/libs/python/computer/computer/providers/cloud/provider.py @@ -1,12 +1,18 @@ -"""Cloud VM provider implementation. +"""Cloud VM provider implementation using CUA Public API. -This module contains a stub implementation for a future cloud VM provider. +Implements the following public API endpoints: + +- GET /v1/vms +- POST /v1/vms/:name/start +- POST /v1/vms/:name/stop +- POST /v1/vms/:name/restart """ import logging from typing import Dict, List, Optional, Any from ..base import BaseVMProvider, VMProviderType +from ..types import ListVMsResponse, MinimalVM # Setup logging logger = logging.getLogger(__name__) @@ -14,6 +20,10 @@ logger = logging.getLogger(__name__) import asyncio import aiohttp from urllib.parse import urlparse +import os + + +DEFAULT_API_BASE = os.getenv("CUA_API_BASE", "https://api.cua.ai") class CloudProvider(BaseVMProvider): """Cloud VM Provider implementation.""" @@ -21,6 +31,7 @@ class CloudProvider(BaseVMProvider): self, api_key: str, verbose: bool = False, + api_base: Optional[str] = None, **kwargs, ): """ @@ -32,6 +43,7 @@ class CloudProvider(BaseVMProvider): assert api_key, "api_key required for CloudProvider" self.api_key = api_key self.verbose = verbose + self.api_base = (api_base or DEFAULT_API_BASE).rstrip("/") @property def provider_type(self) -> VMProviderType: @@ -44,24 +56,162 @@ class CloudProvider(BaseVMProvider): pass async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: - """Get VM VNC URL by name using the cloud API.""" - return {"name": name, "hostname": f"{name}.containers.cloud.trycua.com"} + """Get VM information by querying the VM status endpoint. - async def list_vms(self) -> List[Dict[str, Any]]: - logger.warning("CloudProvider.list_vms is not implemented") - return [] + - Build hostname via get_ip(name) → "{name}.containers.cloud.trycua.com" + - Probe https://{hostname}:8443/status with a short timeout + - If JSON contains a "status" field, return it; otherwise infer + - Fallback to DNS resolve check to distinguish unknown vs not_found + """ + hostname = await self.get_ip(name=name) - async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: - # logger.warning("CloudProvider.run_vm is not implemented") - return {"name": name, "status": "unavailable", "message": "CloudProvider.run_vm is not implemented"} + # Try HTTPS probe to the computer-server status endpoint (8443) + try: + timeout = aiohttp.ClientTimeout(total=3) + async with aiohttp.ClientSession(timeout=timeout) as session: + url = f"https://{hostname}:8443/status" + async with session.get(url, allow_redirects=False) as resp: + status_code = resp.status + vm_status: str + vm_os_type: Optional[str] = None + if status_code == 200: + try: + data = await resp.json(content_type=None) + vm_status = str(data.get("status", "ok")) + vm_os_type = str(data.get("os_type")) + except Exception: + vm_status = "unknown" + elif status_code < 500: + vm_status = "unknown" + else: + vm_status = "unknown" + return { + "name": name, + "status": "running" if vm_status == "ok" else vm_status, + "api_url": f"https://{hostname}:8443", + "os_type": vm_os_type, + } + except Exception: + return {"name": name, "status": "not_found", "api_url": f"https://{hostname}:8443"} + + async def list_vms(self) -> ListVMsResponse: + url = f"{self.api_base}/v1/vms" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as resp: + if resp.status == 200: + try: + data = await resp.json(content_type=None) + except Exception: + text = await resp.text() + logger.error(f"Failed to parse list_vms JSON: {text}") + return [] + if isinstance(data, list): + # Enrich with convenience URLs when possible. + enriched: List[Dict[str, Any]] = [] + for item in data: + vm = dict(item) if isinstance(item, dict) else {} + name = vm.get("name") + password = vm.get("password") + if isinstance(name, str) and name: + host = f"{name}.containers.cloud.trycua.com" + # api_url: always set if missing + if not vm.get("api_url"): + vm["api_url"] = f"https://{host}:8443" + # vnc_url: only when password available + if not vm.get("vnc_url") and isinstance(password, str) and password: + vm[ + "vnc_url" + ] = f"https://{host}/vnc.html?autoconnect=true&password={password}" + enriched.append(vm) + return enriched # type: ignore[return-value] + logger.warning("Unexpected response for list_vms; expected list") + return [] + elif resp.status == 401: + logger.error("Unauthorized: invalid CUA API key for list_vms") + return [] + else: + text = await resp.text() + logger.error(f"list_vms failed: HTTP {resp.status} - {text}") + return [] + + async def run_vm(self, name: str, image: Optional[str] = None, run_opts: Optional[Dict[str, Any]] = None, storage: Optional[str] = None) -> Dict[str, Any]: + """Start a VM via public API. Returns a minimal status.""" + url = f"{self.api_base}/v1/vms/{name}/start" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers) as resp: + if resp.status in (200, 201, 202, 204): + return {"name": name, "status": "starting"} + elif resp.status == 404: + return {"name": name, "status": "not_found"} + elif resp.status == 401: + return {"name": name, "status": "unauthorized"} + else: + text = await resp.text() + return {"name": name, "status": "error", "message": text} async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: - logger.warning("CloudProvider.stop_vm is not implemented. To clean up resources, please use Computer.disconnect()") - return {"name": name, "status": "stopped", "message": "CloudProvider is not implemented"} + """Stop a VM via public API.""" + url = f"{self.api_base}/v1/vms/{name}/stop" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers) as resp: + if resp.status in (200, 202): + # Spec says 202 with {"status":"stopping"} + body_status: Optional[str] = None + try: + data = await resp.json(content_type=None) + body_status = data.get("status") if isinstance(data, dict) else None + except Exception: + body_status = None + return {"name": name, "status": body_status or "stopping"} + elif resp.status == 404: + return {"name": name, "status": "not_found"} + elif resp.status == 401: + return {"name": name, "status": "unauthorized"} + else: + text = await resp.text() + return {"name": name, "status": "error", "message": text} + + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + """Restart a VM via public API.""" + url = f"{self.api_base}/v1/vms/{name}/restart" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers) as resp: + if resp.status in (200, 202): + # Spec says 202 with {"status":"restarting"} + body_status: Optional[str] = None + try: + data = await resp.json(content_type=None) + body_status = data.get("status") if isinstance(data, dict) else None + except Exception: + body_status = None + return {"name": name, "status": body_status or "restarting"} + elif resp.status == 404: + return {"name": name, "status": "not_found"} + elif resp.status == 401: + return {"name": name, "status": "unauthorized"} + else: + text = await resp.text() + return {"name": name, "status": "error", "message": text} async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: - logger.warning("CloudProvider.update_vm is not implemented") - return {"name": name, "status": "unchanged", "message": "CloudProvider is not implemented"} + logger.warning("CloudProvider.update_vm is not implemented via public API") + return {"name": name, "status": "unchanged", "message": "update_vm not supported by public API"} async def get_ip(self, name: Optional[str] = None, storage: Optional[str] = None, retry_delay: int = 2) -> str: """ diff --git a/libs/python/computer/computer/providers/docker/provider.py b/libs/python/computer/computer/providers/docker/provider.py index 82ad411c..850300b6 100644 --- a/libs/python/computer/computer/providers/docker/provider.py +++ b/libs/python/computer/computer/providers/docker/provider.py @@ -36,7 +36,7 @@ class DockerProvider(BaseVMProvider): """ def __init__( - self, + self, port: Optional[int] = 8000, host: str = "localhost", storage: Optional[str] = None, @@ -47,13 +47,16 @@ class DockerProvider(BaseVMProvider): vnc_port: Optional[int] = 6901, ): """Initialize the Docker VM Provider. - + Args: port: Currently unused (VM provider port) host: Hostname for the API server (default: localhost) storage: Path for persistent VM storage shared_path: Path for shared folder between host and container image: Docker image to use (default: "trycua/cua-ubuntu:latest") + Supported images: + - "trycua/cua-ubuntu:latest" (Kasm-based) + - "trycua/cua-docker-xfce:latest" (vanilla XFCE) verbose: Enable verbose logging ephemeral: Use ephemeral (temporary) storage vnc_port: Port for VNC interface (default: 6901) @@ -62,19 +65,35 @@ class DockerProvider(BaseVMProvider): self.api_port = 8000 self.vnc_port = vnc_port self.ephemeral = ephemeral - + # Handle ephemeral storage (temporary directory) if ephemeral: self.storage = "ephemeral" else: self.storage = storage - + self.shared_path = shared_path self.image = image self.verbose = verbose self._container_id = None self._running_containers = {} # Track running containers by name + + # Detect image type and configure user directory accordingly + self._detect_image_config() + def _detect_image_config(self): + """Detect image type and configure paths accordingly.""" + # Detect if this is a docker-xfce image or Kasm image + if "docker-xfce" in self.image.lower() or "xfce" in self.image.lower(): + self._home_dir = "/home/cua" + self._image_type = "docker-xfce" + logger.info(f"Detected docker-xfce image: using {self._home_dir}") + else: + # Default to Kasm configuration + self._home_dir = "/home/kasm-user" + self._image_type = "kasm" + logger.info(f"Detected Kasm image: using {self._home_dir}") + @property def provider_type(self) -> VMProviderType: """Return the provider type.""" @@ -277,12 +296,13 @@ class DockerProvider(BaseVMProvider): # Add volume mounts if storage is specified storage_path = storage or self.storage if storage_path and storage_path != "ephemeral": - # Mount storage directory - cmd.extend(["-v", f"{storage_path}:/home/kasm-user/storage"]) - + # Mount storage directory using detected home directory + cmd.extend(["-v", f"{storage_path}:{self._home_dir}/storage"]) + # Add shared path if specified if self.shared_path: - cmd.extend(["-v", f"{self.shared_path}:/home/kasm-user/shared"]) + # Mount shared directory using detected home directory + cmd.extend(["-v", f"{self.shared_path}:{self._home_dir}/shared"]) # Add environment variables cmd.extend(["-e", "VNC_PW=password"]) # Set VNC password @@ -405,6 +425,9 @@ class DockerProvider(BaseVMProvider): "provider": "docker" } + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("DockerProvider does not support restarting VMs.") + async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: """Update VM configuration. diff --git a/libs/python/computer/computer/providers/lume/provider.py b/libs/python/computer/computer/providers/lume/provider.py index 5816e53e..d651d2eb 100644 --- a/libs/python/computer/computer/providers/lume/provider.py +++ b/libs/python/computer/computer/providers/lume/provider.py @@ -486,6 +486,9 @@ class LumeProvider(BaseVMProvider): """Update VM configuration.""" return self._lume_api_update(name, update_opts, debug=self.verbose) + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("LumeProvider does not support restarting VMs.") + async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str: """Get the IP address of a VM, waiting indefinitely until it's available. diff --git a/libs/python/computer/computer/providers/lumier/provider.py b/libs/python/computer/computer/providers/lumier/provider.py index 67f348be..9b3e8c4d 100644 --- a/libs/python/computer/computer/providers/lumier/provider.py +++ b/libs/python/computer/computer/providers/lumier/provider.py @@ -836,6 +836,9 @@ class LumierProvider(BaseVMProvider): logger.error(error_msg) return error_msg + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("LumierProvider does not support restarting VMs.") + async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str: """Get the IP address of a VM, waiting indefinitely until it's available. diff --git a/libs/python/computer/computer/providers/types.py b/libs/python/computer/computer/providers/types.py new file mode 100644 index 00000000..2db44230 --- /dev/null +++ b/libs/python/computer/computer/providers/types.py @@ -0,0 +1,36 @@ +"""Shared provider type definitions for VM metadata and responses. + +These base types describe the common shape of objects returned by provider +methods like `list_vms()`. +""" +from __future__ import annotations + +from typing import Literal, TypedDict, NotRequired + +# Core status values per product docs +VMStatus = Literal[ + "pending", # VM deployment in progress + "running", # VM is active and accessible + "stopped", # VM is stopped but not terminated + "terminated", # VM has been permanently destroyed + "failed", # VM deployment or operation failed +] + +OSType = Literal["macos", "linux", "windows"] + +class MinimalVM(TypedDict): + """Minimal VM object shape returned by list calls. + + Providers may include additional fields. Optional fields below are + common extensions some providers expose or that callers may compute. + """ + name: str + status: VMStatus + # Not always included by all providers + password: NotRequired[str] + vnc_url: NotRequired[str] + api_url: NotRequired[str] + + +# Convenience alias for list_vms() responses +ListVMsResponse = list[MinimalVM] diff --git a/libs/python/computer/computer/providers/winsandbox/provider.py b/libs/python/computer/computer/providers/winsandbox/provider.py index e072d900..1c9aec3c 100644 --- a/libs/python/computer/computer/providers/winsandbox/provider.py +++ b/libs/python/computer/computer/providers/winsandbox/provider.py @@ -390,6 +390,9 @@ class WinSandboxProvider(BaseVMProvider): "error": "Windows Sandbox does not support runtime configuration updates. " "Please stop and restart the sandbox with new configuration." } + + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("WinSandboxProvider does not support restarting VMs.") async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str: """Get the IP address of a VM, waiting indefinitely until it's available. diff --git a/libs/python/computer/pyproject.toml b/libs/python/computer/pyproject.toml index 4a9b41bb..3cf06f41 100644 --- a/libs/python/computer/pyproject.toml +++ b/libs/python/computer/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-computer" -version = "0.4.0" +version = "0.4.8" description = "Computer-Use Interface (CUI) framework powering Cua" readme = "README.md" authors = [ diff --git a/libs/xfce/.dockerignore b/libs/xfce/.dockerignore new file mode 100644 index 00000000..d4352f88 --- /dev/null +++ b/libs/xfce/.dockerignore @@ -0,0 +1,5 @@ +README.md +.git +.gitignore +*.md +LICENSE diff --git a/libs/xfce/.gitignore b/libs/xfce/.gitignore new file mode 100644 index 00000000..0a2449a1 --- /dev/null +++ b/libs/xfce/.gitignore @@ -0,0 +1,4 @@ +storage/ +shared/ +*.log +.DS_Store diff --git a/libs/xfce/Dockerfile b/libs/xfce/Dockerfile new file mode 100644 index 00000000..d44bdb95 --- /dev/null +++ b/libs/xfce/Dockerfile @@ -0,0 +1,141 @@ +# CUA Docker XFCE Container +# Vanilla XFCE desktop with noVNC and computer-server + +FROM ubuntu:22.04 + +# Avoid prompts from apt +ENV DEBIAN_FRONTEND=noninteractive + +# Set environment variables +ENV HOME=/home/cua +ENV DISPLAY=:1 +ENV VNC_PORT=5901 +ENV NOVNC_PORT=6901 +ENV API_PORT=8000 +ENV VNC_RESOLUTION=1024x768 +ENV VNC_COL_DEPTH=24 + +# Install system dependencies first (including sudo) +RUN apt-get update && apt-get install -y \ + # System utilities + sudo \ + # Desktop environment + xfce4 \ + xfce4-terminal \ + dbus-x11 \ + # VNC server + tigervnc-standalone-server \ + tigervnc-common \ + # noVNC dependencies + python3 \ + python3-pip \ + python3-numpy \ + git \ + net-tools \ + netcat \ + supervisor \ + # Computer-server dependencies + python3-tk \ + python3-dev \ + gnome-screenshot \ + wmctrl \ + ffmpeg \ + socat \ + xclip \ + # Browser + wget \ + software-properties-common \ + # Build tools + build-essential \ + libncursesw5-dev \ + libssl-dev \ + libsqlite3-dev \ + tk-dev \ + libgdbm-dev \ + libc6-dev \ + libbz2-dev \ + libffi-dev \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Remove screensavers and power manager to avoid popups and lock screens +RUN apt-get remove -y \ + xfce4-power-manager \ + xfce4-power-manager-data \ + xfce4-power-manager-plugins \ + xfce4-screensaver \ + light-locker \ + xscreensaver \ + xscreensaver-data || true + +# Create user after sudo is installed +RUN useradd -m -s /bin/bash -G sudo cua && \ + echo "cua:cua" | chpasswd && \ + echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + +# Install Firefox from Mozilla PPA (snap-free) - inline to avoid script issues +RUN apt-get update && \ + add-apt-repository -y ppa:mozillateam/ppa && \ + echo 'Package: *\nPin: release o=LP-PPA-mozillateam\nPin-Priority: 1001' > /etc/apt/preferences.d/mozilla-firefox && \ + apt-get update && \ + apt-get install -y firefox && \ + echo 'pref("datareporting.policy.firstRunURL", "");\npref("datareporting.policy.dataSubmissionEnabled", false);\npref("datareporting.healthreport.service.enabled", false);\npref("datareporting.healthreport.uploadEnabled", false);\npref("trailhead.firstrun.branches", "nofirstrun-empty");\npref("browser.aboutwelcome.enabled", false);' > /usr/lib/firefox/browser/defaults/preferences/firefox.js && \ + update-alternatives --install /usr/bin/x-www-browser x-www-browser /usr/bin/firefox 100 && \ + update-alternatives --install /usr/bin/gnome-www-browser gnome-www-browser /usr/bin/firefox 100 && \ + rm -rf /var/lib/apt/lists/* + +# Install noVNC +RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \ + git clone https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \ + ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html + +# Pre-create cache directory with correct ownership before pip install +RUN mkdir -p /home/cua/.cache && \ + chown -R cua:cua /home/cua/.cache + +# Install computer-server +RUN pip3 install cua-computer-server + +# Fix any cache files created by pip +RUN chown -R cua:cua /home/cua/.cache + +# Copy startup scripts +COPY src/supervisor/ /etc/supervisor/conf.d/ +COPY src/scripts/ /usr/local/bin/ + +# Make scripts executable +RUN chmod +x /usr/local/bin/*.sh + +# Setup VNC +USER cua +WORKDIR /home/cua + +# Create VNC directory (no password needed with SecurityTypes None) +RUN mkdir -p $HOME/.vnc + +# Configure XFCE for first start +RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml $HOME/.config/xfce4 $HOME/.config/autostart + +# Copy XFCE config to disable browser launching and welcome screens +COPY --chown=cua:cua src/xfce-config/helpers.rc $HOME/.config/xfce4/helpers.rc +COPY --chown=cua:cua src/xfce-config/xfce4-session.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-session.xml +COPY --chown=cua:cua src/xfce-config/xfce4-power-manager.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-power-manager.xml + +# Disable autostart for screensaver, lock screen, and power manager +RUN echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-tips-autostart.desktop && \ + echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-screensaver.desktop && \ + echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/light-locker.desktop && \ + echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-power-manager.desktop && \ + chown -R cua:cua $HOME/.config + +# Create storage and shared directories, and Firefox cache directory +RUN mkdir -p $HOME/storage $HOME/shared $HOME/.cache/dconf $HOME/.mozilla/firefox && \ + chown -R cua:cua $HOME/storage $HOME/shared $HOME/.cache $HOME/.mozilla $HOME/.vnc + +USER root + +# Expose ports +EXPOSE $VNC_PORT $NOVNC_PORT $API_PORT + +# Start services via supervisor +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] diff --git a/libs/xfce/README.md b/libs/xfce/README.md new file mode 100644 index 00000000..9ecdff00 --- /dev/null +++ b/libs/xfce/README.md @@ -0,0 +1,261 @@ +# CUA Docker XFCE Container + +Vanilla XFCE desktop container for Computer-Using Agents (CUA) with noVNC and computer-server. This is a lightweight alternative to the Kasm-based container with minimal dependencies. + +## Features + +- Ubuntu 22.04 (Jammy) with vanilla XFCE desktop environment +- TigerVNC server for remote desktop access +- noVNC for web-based VNC access (no client required) +- Pre-installed computer-server for remote computer control +- Python 3.11 with necessary libraries +- Screen capture tools (gnome-screenshot, wmctrl, ffmpeg) +- Clipboard utilities (xclip, socat) +- Firefox browser with telemetry disabled + +## Architecture + +``` +┌─────────────────────────────────────────┐ +│ Docker Container (Ubuntu 22.04) │ +├─────────────────────────────────────────┤ +│ XFCE Desktop Environment │ +│ ├── Firefox │ +│ ├── XFCE Terminal │ +│ └── Desktop utilities │ +├─────────────────────────────────────────┤ +│ TigerVNC Server (Port 5901) │ +│ └── X11 Display :1 │ +├─────────────────────────────────────────┤ +│ noVNC Web Interface (Port 6901) │ +│ └── WebSocket proxy to VNC │ +├─────────────────────────────────────────┤ +│ CUA Computer Server (Port 8000) │ +│ └── WebSocket API for automation │ +└─────────────────────────────────────────┘ +``` + +## Building the Container + +```bash +docker build -t cua-docker-xfce:latest . +``` + +## Pushing to Registry + +```bash +# Tag for Docker Hub (replace 'trycua' with your Docker Hub username) +docker tag cua-docker-xfce:latest trycua/cua-docker-xfce:latest + +# Login to Docker Hub +docker login + +# Push to Docker Hub +docker push trycua/cua-docker-xfce:latest +``` + +## Running the Container Manually + +### Basic Usage + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + cua-docker-xfce:latest +``` + +### With Custom Resolution + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + -e VNC_RESOLUTION=1280x720 \ + cua-docker-xfce:latest +``` + +### With Persistent Storage + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + -v $(pwd)/storage:/home/cua/storage \ + cua-docker-xfce:latest +``` + +## Accessing the Container + +- **noVNC Web Interface**: Open `http://localhost:6901` in your browser (no password required) +- **VNC Client**: Connect to `localhost:5901` (no password required) +- **Computer Server API**: Available at `http://localhost:8000` + +## Using with CUA Docker Provider + +This container is designed to work with the CUA Docker provider. Simply specify the docker-xfce image: + +```python +from computer import Computer + +# Create computer with docker-xfce container +computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest", # Use docker-xfce instead of Kasm + display="1024x768", + memory="4GB", + cpu="2" +) + +# Use the computer +async with computer: + # Take a screenshot + screenshot = await computer.interface.screenshot() + + # Click and type + await computer.interface.left_click(100, 100) + await computer.interface.type_text("Hello from CUA!") + + # Run commands + result = await computer.interface.run_command("ls -la") + print(result.stdout) +``` + +### Switching between Kasm and docker-xfce + +The Docker provider automatically detects which image you're using: + +```python +# Use Kasm-based container (default for Linux) +computer_kasm = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-ubuntu:latest", # Kasm image +) + +# Use docker-xfce container (vanilla XFCE) +computer_xfce = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest", # docker-xfce image +) +``` + +Both provide the same API and functionality - the provider automatically configures the correct paths and settings based on the image. + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `VNC_RESOLUTION` | `1024x768` | Screen resolution | +| `VNC_COL_DEPTH` | `24` | Color depth | +| `VNC_PORT` | `5901` | VNC server port | +| `NOVNC_PORT` | `6901` | noVNC web interface port | +| `API_PORT` | `8000` | Computer-server API port | +| `DISPLAY` | `:1` | X11 display number | + +## Exposed Ports + +- **5901**: TigerVNC server +- **6901**: noVNC web interface +- **8000**: Computer-server WebSocket API + +## Volume Mount Points + +- `/home/cua/storage`: Persistent storage mount point +- `/home/cua/shared`: Shared folder mount point + +## User Credentials + +- **Username**: `cua` +- **Password**: `password` (for shell login only) +- **Sudo access**: Enabled without password +- **VNC access**: No password required + +## Creating Snapshots + +### Filesystem Snapshot +```bash +docker commit cua-docker-xfce-snapshot:latest +``` + +### Running from Snapshot +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 6901:6901 \ + -p 8000:8000 \ + cua-docker-xfce-snapshot:latest +``` + +## Comparison with Kasm Container + +| Feature | Kasm Container | Docker XFCE Container | +|---------|---------------|----------------------| +| Base Image | KasmWeb Ubuntu | Vanilla Ubuntu | +| VNC Server | KasmVNC | TigerVNC | +| Dependencies | Higher | Lower | +| Configuration | Pre-configured | Minimal | +| Size | Larger | Smaller | +| Maintenance | Depends on Kasm | Independent | + +## Process Management + +The container uses `supervisord` to manage three main processes: + +1. **VNC Server** (Priority 10): TigerVNC with XFCE desktop +2. **noVNC** (Priority 20): WebSocket proxy for browser access +3. **Computer Server** (Priority 30): CUA automation API + +All processes are automatically restarted on failure. + +## Troubleshooting + +### VNC server won't start +Check if X11 lock files exist: +```bash +docker exec rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1 +``` + +### noVNC shows black screen +Ensure VNC server is running: +```bash +docker exec supervisorctl status vncserver +``` + +### Computer-server not responding +Check if X server is accessible: +```bash +docker exec env DISPLAY=:1 xdpyinfo +``` + +### View logs +```bash +docker exec tail -f /var/log/supervisor/supervisord.log +docker exec supervisorctl status +``` + +## Integration with CUA System + +This container provides the same functionality as the Kasm container but with: +- **Reduced dependencies**: No reliance on KasmWeb infrastructure +- **Smaller image size**: Minimal base configuration +- **Full control**: Direct access to all components +- **Easy customization**: Simple to modify and extend + +The container integrates seamlessly with: +- CUA Computer library (via WebSocket API) +- Docker provider for lifecycle management +- Standard VNC clients for debugging +- Web browsers for visual monitoring + +## License + +MIT License - See LICENSE file for details diff --git a/libs/xfce/src/scripts/resize-display.sh b/libs/xfce/src/scripts/resize-display.sh new file mode 100644 index 00000000..ea663dce --- /dev/null +++ b/libs/xfce/src/scripts/resize-display.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Dynamic display resolution script +# Can be called to change the VNC display resolution + +RESOLUTION=${1:-1920x1080} + +# Wait for display to be ready +for i in {1..10}; do + if DISPLAY=:1 xdpyinfo >/dev/null 2>&1; then + break + fi + sleep 1 +done + +# Change resolution using xrandr +DISPLAY=:1 xrandr --output VNC-0 --mode "$RESOLUTION" 2>/dev/null || \ +DISPLAY=:1 xrandr --fb "$RESOLUTION" 2>/dev/null || \ +echo "Failed to set resolution to $RESOLUTION" + +echo "Display resolution set to: $RESOLUTION" diff --git a/libs/xfce/src/scripts/start-computer-server.sh b/libs/xfce/src/scripts/start-computer-server.sh new file mode 100644 index 00000000..bc27a3db --- /dev/null +++ b/libs/xfce/src/scripts/start-computer-server.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -e + +# Wait for X server to be ready +echo "Waiting for X server to start..." +while ! xdpyinfo -display :1 >/dev/null 2>&1; do + sleep 1 +done +echo "X server is ready" + +# Start computer-server +export DISPLAY=:1 +python3 -m computer_server --port ${API_PORT:-8000} diff --git a/libs/xfce/src/scripts/start-novnc.sh b/libs/xfce/src/scripts/start-novnc.sh new file mode 100644 index 00000000..07894acb --- /dev/null +++ b/libs/xfce/src/scripts/start-novnc.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +# Give VNC a moment to start (supervisor starts it with priority 10, this is priority 20) +echo "Waiting for VNC server to start..." +sleep 5 + +# Start noVNC +cd /opt/noVNC +/opt/noVNC/utils/novnc_proxy \ + --vnc localhost:${VNC_PORT:-5901} \ + --listen ${NOVNC_PORT:-6901} diff --git a/libs/xfce/src/scripts/start-vnc.sh b/libs/xfce/src/scripts/start-vnc.sh new file mode 100644 index 00000000..934e6d3c --- /dev/null +++ b/libs/xfce/src/scripts/start-vnc.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +# Clean up any existing VNC lock files +rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1 + +# Start VNC server without password authentication +vncserver :1 \ + -geometry ${VNC_RESOLUTION:-1920x1080} \ + -depth ${VNC_COL_DEPTH:-24} \ + -rfbport ${VNC_PORT:-5901} \ + -localhost no \ + -SecurityTypes None \ + -AlwaysShared \ + -AcceptPointerEvents \ + -AcceptKeyEvents \ + -AcceptCutText \ + -SendCutText \ + -xstartup /usr/local/bin/xstartup.sh \ + --I-KNOW-THIS-IS-INSECURE + +# Keep the process running +tail -f /home/cua/.vnc/*.log diff --git a/libs/xfce/src/scripts/xstartup.sh b/libs/xfce/src/scripts/xstartup.sh new file mode 100644 index 00000000..49bb46a2 --- /dev/null +++ b/libs/xfce/src/scripts/xstartup.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +# Start D-Bus +if [ -z "$DBUS_SESSION_BUS_ADDRESS" ]; then + eval $(dbus-launch --sh-syntax --exit-with-session) +fi + +# Start XFCE +startxfce4 & + +# Wait for XFCE to start +sleep 2 + +# Disable screensaver and power management +xset s off +xset -dpms +xset s noblank + +# Wait for the session +wait diff --git a/libs/xfce/src/supervisor/supervisord.conf b/libs/xfce/src/supervisor/supervisord.conf new file mode 100644 index 00000000..fb367c4f --- /dev/null +++ b/libs/xfce/src/supervisor/supervisord.conf @@ -0,0 +1,30 @@ +[supervisord] +nodaemon=true +user=root +logfile=/var/log/supervisor/supervisord.log +pidfile=/var/run/supervisord.pid +childlogdir=/var/log/supervisor + +[program:vncserver] +command=/usr/local/bin/start-vnc.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/vncserver.log +stderr_logfile=/var/log/supervisor/vncserver.error.log +priority=10 + +[program:novnc] +command=/usr/local/bin/start-novnc.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/novnc.log +stderr_logfile=/var/log/supervisor/novnc.error.log +priority=20 + +[program:computer-server] +command=/usr/local/bin/start-computer-server.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/computer-server.log +stderr_logfile=/var/log/supervisor/computer-server.error.log +priority=30 diff --git a/libs/xfce/src/xfce-config/helpers.rc b/libs/xfce/src/xfce-config/helpers.rc new file mode 100644 index 00000000..b2270633 --- /dev/null +++ b/libs/xfce/src/xfce-config/helpers.rc @@ -0,0 +1,2 @@ +# XFCE preferred applications - set Firefox as default browser +WebBrowser=firefox diff --git a/libs/xfce/src/xfce-config/xfce4-power-manager.xml b/libs/xfce/src/xfce-config/xfce4-power-manager.xml new file mode 100644 index 00000000..56447c1e --- /dev/null +++ b/libs/xfce/src/xfce-config/xfce4-power-manager.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/libs/xfce/src/xfce-config/xfce4-session.xml b/libs/xfce/src/xfce-config/xfce4-session.xml new file mode 100644 index 00000000..5af36711 --- /dev/null +++ b/libs/xfce/src/xfce-config/xfce4-session.xml @@ -0,0 +1,55 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/notebooks/README.md b/notebooks/README.md index 0a7f4890..25978415 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -5,7 +5,7 @@ This folder contains Jupyter notebooks that demonstrate the core functionality o ## Available Notebooks ### Core Components -- **`computer_nb.ipynb`** - Demonstrates the Computer API for programmatically operating sandbox VMs using either Cua Cloud Containers or local Lume VMs on Apple Silicon macOS systems +- **`computer_nb.ipynb`** - Demonstrates the Computer API for programmatically operating sandbox VMs using either Cua Cloud Sandbox or local Lume VMs on Apple Silicon macOS systems - **`agent_nb.ipynb`** - Shows how to use CUA's Agent to run automated workflows in virtual sandboxes with various AI models (OpenAI, Anthropic, local models) - **`pylume_nb.ipynb`** - Quickstart guide for the pylume Python library, which handles VM creation, management, and image operations - **`computer_server_nb.ipynb`** - Demonstrates how to host and configure the Computer server that powers the Computer API diff --git a/notebooks/agent_nb.ipynb b/notebooks/agent_nb.ipynb index 30746780..7523e25d 100644 --- a/notebooks/agent_nb.ipynb +++ b/notebooks/agent_nb.ipynb @@ -3,11 +3,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## Agent\n", - "\n", - "This notebook demonstrates how to use Cua's Agent to run workflows in virtual sandboxes, either using Cua Cloud Containers or local VMs on Apple Silicon Macs." - ] + "source": "## Agent\n\nThis notebook demonstrates how to use Cua's Agent to run workflows in virtual sandboxes, either using Cua Cloud Sandbox or local VMs on Apple Silicon Macs." }, { "cell_type": "markdown", @@ -61,9 +57,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "Agent allows you to run an agentic workflow in virtual sandbox instances. You can choose between cloud containers or local VMs." - ] + "source": "Agent allows you to run an agentic workflow in virtual sandbox instances. You can choose between Cloud Sandbox or local VMs." }, { "cell_type": "code", @@ -96,32 +90,17 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## Option 1: Agent with Cua Cloud Containers\n", - "\n", - "Use cloud containers for running agents from any system without local setup." - ] + "source": "## Option 1: Agent with Cua Cloud Sandbox\n\nUse Cloud Sandbox for running agents from any system without local setup." }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "### Prerequisites for Cloud Containers\n", - "\n", - "To use Cua Cloud Containers, you need to:\n", - "1. Sign up at https://trycua.com\n", - "2. Create a Cloud Container\n", - "3. Generate an API Key\n", - "\n", - "Once you have these, you can connect to your cloud container and run agents on it." - ] + "source": "### Prerequisites for Cloud Sandbox\n\nTo use Cua Cloud Sandbox, you need to:\n1. Sign up at https://trycua.com\n2. Create a Cloud Sandbox\n3. Generate an API Key\n\nOnce you have these, you can connect to your Cloud Sandbox and run agents on it." }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "Get Cua API credentials and container details" - ] + "source": "Get Cua API credentials and sandbox details" }, { "cell_type": "code", @@ -138,60 +117,31 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "Choose the OS type for your container (linux or macos)" - ] + "source": "Choose the OS type for your sandbox (linux or macos)" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "os_type = input(\"Enter the OS type of your container (linux/macos) [default: linux]: \").lower() or \"linux\"" - ] + "source": "os_type = input(\"Enter the OS type of your sandbox (linux/macos) [default: linux]: \").lower() or \"linux\"" }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "### Create an agent with cloud container" - ] + "source": "### Create an agent with Cloud Sandbox" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import logging\n", - "from pathlib import Path\n", - "\n", - "# Connect to your existing cloud container\n", - "computer = Computer(\n", - " os_type=os_type,\n", - " api_key=cua_api_key,\n", - " name=container_name,\n", - " provider_type=VMProviderType.CLOUD,\n", - " verbosity=logging.INFO\n", - ")\n", - "\n", - "# Create agent\n", - "agent = ComputerAgent(\n", - " model=\"openai/computer-use-preview\",\n", - " tools=[computer],\n", - " trajectory_dir=str(Path(\"trajectories\")),\n", - " only_n_most_recent_images=3,\n", - " verbosity=logging.INFO\n", - ")\n" - ] + "source": "import logging\nfrom pathlib import Path\n\n# Connect to your existing Cloud Sandbox\ncomputer = Computer(\n os_type=os_type,\n api_key=cua_api_key,\n name=container_name,\n provider_type=VMProviderType.CLOUD,\n verbosity=logging.INFO\n)\n\n# Create agent\nagent = ComputerAgent(\n model=\"openai/computer-use-preview\",\n tools=[computer],\n trajectory_dir=str(Path(\"trajectories\")),\n only_n_most_recent_images=3,\n verbosity=logging.INFO\n)\n" }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "Run tasks on cloud container" - ] + "source": "Run tasks on Cloud Sandbox" }, { "cell_type": "code", @@ -565,4 +515,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/notebooks/computer_nb.ipynb b/notebooks/computer_nb.ipynb index 0cf35175..740bad2c 100644 --- a/notebooks/computer_nb.ipynb +++ b/notebooks/computer_nb.ipynb @@ -3,11 +3,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## Computer\n", - "\n", - "This notebook demonstrates how to use Computer to operate sandbox VMs programmatically, either using Cua Cloud Containers or local Lume VMs on Apple Silicon macOS systems." - ] + "source": "## Computer\n\nThis notebook demonstrates how to use Computer to operate sandbox VMs programmatically, either using Cua Cloud Sandbox or local Lume VMs on Apple Silicon macOS systems." }, { "cell_type": "markdown", @@ -513,25 +509,12 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## Option 1: Cua Cloud Containers\n", - "\n", - "Cua Cloud Containers provide remote VMs that can be accessed from any system without local setup." - ] + "source": "## Option 1: Cua Cloud Sandbox\n\nCua Cloud Sandbox provides remote VMs that can be accessed from any system without local setup." }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "### Prerequisites for Cloud Containers\n", - "\n", - "To use Cua Cloud Containers, you need to:\n", - "1. Sign up at https://trycua.com\n", - "2. Create a Cloud Container\n", - "3. Generate an API Key\n", - "\n", - "Once you have these, you can connect to your cloud container using its name." - ] + "source": "### Prerequisites for Cloud Sandbox\n\nTo use Cua Cloud Sandbox, you need to:\n1. Sign up at https://trycua.com\n2. Create a Cloud Sandbox\n3. Generate an API Key\n\nOnce you have these, you can connect to your Cloud Sandbox using its name." }, { "cell_type": "code", @@ -551,25 +534,19 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "Choose the OS type for your container (linux or macos)" - ] + "source": "Choose the OS type for your sandbox (linux or macos)" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "os_type = input(\"Enter the OS type of your container (linux/macos) [default: linux]: \").lower() or \"linux\"" - ] + "source": "os_type = input(\"Enter the OS type of your sandbox (linux/macos) [default: linux]: \").lower() or \"linux\"" }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "### Connect to your Cloud Container" - ] + "source": "### Connect to your Cloud Sandbox" }, { "cell_type": "code", @@ -583,23 +560,14 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "Connect to your existing Cua Cloud Container" - ] + "source": "Connect to your existing Cua Cloud Sandbox" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "computer = Computer(\n", - " os_type=os_type, # Must match the OS type of your cloud container\n", - " api_key=cua_api_key,\n", - " name=container_name,\n", - " provider_type=VMProviderType.CLOUD,\n", - ")" - ] + "source": "computer = Computer(\n os_type=os_type, # Must match the OS type of your Cloud Sandbox\n api_key=cua_api_key,\n name=container_name,\n provider_type=VMProviderType.CLOUD,\n)" }, { "cell_type": "markdown", @@ -1106,4 +1074,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/notebooks/eval_osworld.ipynb b/notebooks/eval_osworld.ipynb index 1bc58e48..27a56020 100644 --- a/notebooks/eval_osworld.ipynb +++ b/notebooks/eval_osworld.ipynb @@ -39,6 +39,7 @@ "outputs": [], "source": [ "from dotenv import load_dotenv\n", + "import os\n", "\n", "# Load environment variables from ../.env\n", "load_dotenv(dotenv_path='../.env')\n", @@ -47,6 +48,8 @@ "# - HUD_API_KEY (for HUD access)\n", "# - ANTHROPIC_API_KEY (for Claude models)\n", "# - OPENAI_API_KEY (for OpenAI models)\n", + "assert os.getenv('HUD_API_KEY') is not None\n", + "assert os.getenv('ANTHROPIC_API_KEY') is not None or os.getenv('OPENAI_API_KEY') is not None\n", "\n", "from pprint import pprint" ] @@ -72,7 +75,7 @@ "# You can swap \"hud-evals/OSWorld-Verified\" -> \"hud-evals/SheetBench-V2\" to test SheetBench.\n", "await run_single_task(\n", " dataset=\"hud-evals/OSWorld-Verified\",\n", - " model=\"openai/computer-use-preview+openai/gpt-5-nano\", # or any supported model string\n", + " model=\"openai/computer-use-preview+openai/gpt-5\", # or any supported model string\n", " task_id=155 # open last tab task (easy)\n", ")" ] diff --git a/notebooks/sota_hackathon_cloud.ipynb b/notebooks/sota_hackathon_cloud.ipynb index d6298e94..575d15ea 100644 --- a/notebooks/sota_hackathon_cloud.ipynb +++ b/notebooks/sota_hackathon_cloud.ipynb @@ -56,12 +56,7 @@ "cell_type": "markdown", "id": "47171dc3", "metadata": {}, - "source": [ - "1. Create a Cua account at https://www.trycua.com/\n", - "2. Start a small Cua container at https://www.trycua.com/dashboard/containers (If you need credits, ask us!)\n", - "3. Create a HUD account at https://www.hud.so/\n", - "4. Create a .env file:" - ] + "source": "1. Create a Cua account at https://www.trycua.com/\n2. Start a small Cua sandbox at https://www.trycua.com/dashboard/containers (If you need credits, ask us!)\n3. Create a HUD account at https://www.hud.so/\n4. Create a .env file:" }, { "cell_type": "code", @@ -151,21 +146,13 @@ "cell_type": "markdown", "id": "a07b09ee", "metadata": {}, - "source": [ - "## 🖱️ Test your agent\n", - "\n", - "Run your agent on a test scenario in a Cua cloud container." - ] + "source": "## 🖱️ Test your agent\n\nRun your agent on a test scenario in a Cua Cloud Sandbox." }, { "cell_type": "markdown", "id": "12b9c22c", "metadata": {}, - "source": [ - "Connect to an existing cloud container through the Cua SDK.\n", - "\n", - "You can access the computer through VNC on the [Cua Dashboard](https://www.trycua.com/dashboard)." - ] + "source": "Connect to an existing Cloud Sandbox through the Cua SDK.\n\nYou can access the computer through VNC on the [Cua Dashboard](https://www.trycua.com/dashboard)." }, { "cell_type": "code", @@ -173,20 +160,7 @@ "id": "a210e959", "metadata": {}, "outputs": [], - "source": [ - "from computer import Computer, VMProviderType\n", - "\n", - "# Connect to your existing cloud container\n", - "computer = Computer(\n", - " os_type=\"linux\",\n", - " provider_type=VMProviderType.CLOUD,\n", - " name=os.getenv(\"CUA_CONTAINER_NAME\") or \"\",\n", - " api_key=os.getenv(\"CUA_API_KEY\"),\n", - " verbosity=logging.INFO\n", - ")\n", - "\n", - "agent_config[\"tools\"] = [ computer ]" - ] + "source": "from computer import Computer, VMProviderType\n\n# Connect to your existing Cloud Sandbox\ncomputer = Computer(\n os_type=\"linux\",\n provider_type=VMProviderType.CLOUD,\n name=os.getenv(\"CUA_CONTAINER_NAME\") or \"\",\n api_key=os.getenv(\"CUA_API_KEY\"),\n verbosity=logging.INFO\n)\n\nagent_config[\"tools\"] = [ computer ]" }, { "cell_type": "markdown", @@ -283,4 +257,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index baa2567a..874d9ed2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dev = [ "mypy>=1.10.0", "ruff>=0.9.2", "types-requests>=2.31.0", - "hud-python[agent]==0.4.26" + "hud-python[agent]==0.4.52" ] docs = ["mkdocs-material>=9.2.0", "mkdocs>=1.5.0"] test = [ diff --git a/scripts/playground-docker.sh b/scripts/playground-docker.sh index 5cfae574..8a798464 100644 --- a/scripts/playground-docker.sh +++ b/scripts/playground-docker.sh @@ -80,16 +80,16 @@ trap cleanup EXIT echo "" echo "Choose your Cua setup:" -echo "1) ☁️ Cua Cloud Containers (works on any system)" +echo "1) ☁️ Cua Cloud Sandbox (works on any system)" echo "2) 🖥️ Local macOS VMs (requires Apple Silicon Mac + macOS 15+)" echo "3) 🖥️ Local Windows VMs (requires Windows 10 / 11)" echo "" read -p "Enter your choice (1, 2, or 3): " CHOICE if [[ "$CHOICE" == "1" ]]; then - # Cua Cloud Container setup + # Cua Cloud Sandbox setup echo "" - print_info "Setting up Cua Cloud Containers..." + print_info "Setting up Cua Cloud Sandbox..." echo "" # Check if existing .env.local already has CUA_API_KEY @@ -116,15 +116,15 @@ if [[ "$CHOICE" == "1" ]]; then # If no valid API key found, prompt for one if [[ -z "$CUA_API_KEY" ]]; then - echo "To use Cua Cloud Containers, you need to:" + echo "To use Cua Cloud Sandbox, you need to:" echo "1. Sign up at https://trycua.com" - echo "2. Create a Cloud Container" + echo "2. Create a Cloud Sandbox" echo "3. Generate an Api Key" echo "" read -p "Enter your Cua Api Key: " CUA_API_KEY if [[ -z "$CUA_API_KEY" ]]; then - print_error "Cua Api Key is required for Cloud Containers." + print_error "Cua Api Key is required for Cloud Sandbox." exit 1 fi else @@ -142,7 +142,7 @@ elif [[ "$CHOICE" == "2" ]]; then # Check for Apple Silicon Mac if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then print_error "Local macOS VMs require an Apple Silicon Mac (M1/M2/M3/M4)." - echo "💡 Consider using Cua Cloud Containers instead (option 1)." + echo "💡 Consider using Cua Cloud Sandbox instead (option 1)." exit 1 fi @@ -150,7 +150,7 @@ elif [[ "$CHOICE" == "2" ]]; then OSVERSION=$(sw_vers -productVersion) if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then print_error "Local macOS VMs require macOS 15 (Sequoia) or newer. You have $OSVERSION." - echo "💡 Consider using Cua Cloud Containers instead (option 1)." + echo "💡 Consider using Cua Cloud Sandbox instead (option 1)." exit 1 fi @@ -165,7 +165,7 @@ elif [[ "$CHOICE" == "3" ]]; then # Check if we're on Windows if [[ $(uname -s) != MINGW* && $(uname -s) != CYGWIN* && $(uname -s) != MSYS* ]]; then print_error "Local Windows VMs require Windows 10 or 11." - echo "💡 Consider using Cua Cloud Containers instead (option 1)." + echo "💡 Consider using Cua Cloud Sandbox instead (option 1)." echo "" echo "🔗 If you are using WSL, refer to the blog post to get started: https://www.trycua.com/blog/windows-sandbox" exit 1 @@ -303,7 +303,7 @@ chmod +x "$DEMO_DIR/start_ui.sh" print_success "Setup complete!" if [[ "$USE_CLOUD" == "true" ]]; then - echo "☁️ Cua Cloud Container setup complete!" + echo "☁️ Cua Cloud Sandbox setup complete!" else echo "🖥️ Cua Local VM setup complete!" fi diff --git a/scripts/playground.sh b/scripts/playground.sh index 0cde5a25..58bc2da2 100755 --- a/scripts/playground.sh +++ b/scripts/playground.sh @@ -22,18 +22,18 @@ TMP_DIR=$(mktemp -d) cd "$TMP_DIR" trap cleanup EXIT -# Ask user to choose between local macOS VMs or Cua Cloud Containers +# Ask user to choose between local macOS VMs or Cua Cloud Sandbox echo "" echo "Choose your Cua setup:" -echo "1) ☁️ Cua Cloud Containers (works on any system)" +echo "1) ☁️ Cua Cloud Sandbox (works on any system)" echo "2) 🖥️ Local macOS VMs (requires Apple Silicon Mac + macOS 15+)" echo "" read -p "Enter your choice (1 or 2): " CHOICE if [[ "$CHOICE" == "1" ]]; then - # Cua Cloud Container setup + # Cua Cloud Sandbox setup echo "" - echo "☁️ Setting up Cua Cloud Containers..." + echo "☁️ Setting up Cua Cloud Sandbox..." echo "" # Check if existing .env.local already has CUA_API_KEY (check current dir and demo dir) @@ -61,15 +61,15 @@ if [[ "$CHOICE" == "1" ]]; then # If no valid API key found, prompt for one if [[ -z "$CUA_API_KEY" ]]; then - echo "To use Cua Cloud Containers, you need to:" + echo "To use Cua Cloud Sandbox, you need to:" echo "1. Sign up at https://trycua.com" - echo "2. Create a Cloud Container" + echo "2. Create a Cloud Sandbox" echo "3. Generate an Api Key" echo "" read -p "Enter your Cua Api Key: " CUA_API_KEY if [[ -z "$CUA_API_KEY" ]]; then - echo "❌ Cua Api Key is required for Cloud Containers." + echo "❌ Cua Api Key is required for Cloud Sandbox." exit 1 fi fi @@ -84,7 +84,7 @@ elif [[ "$CHOICE" == "2" ]]; then # Check for Apple Silicon Mac if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then echo "❌ Local macOS VMs require an Apple Silicon Mac (M1/M2/M3/M4)." - echo "💡 Consider using Cua Cloud Containers instead (option 1)." + echo "💡 Consider using Cua Cloud Sandbox instead (option 1)." exit 1 fi @@ -92,7 +92,7 @@ elif [[ "$CHOICE" == "2" ]]; then OSVERSION=$(sw_vers -productVersion) if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then echo "❌ Local macOS VMs require macOS 15 (Sequoia) or newer. You have $OSVERSION." - echo "💡 Consider using Cua Cloud Containers instead (option 1)." + echo "💡 Consider using Cua Cloud Sandbox instead (option 1)." exit 1 fi @@ -249,7 +249,7 @@ chmod +x "$DEMO_DIR/start_ui.sh" echo "✅ Setup complete!" if [[ "$USE_CLOUD" == "true" ]]; then - # Create run_demo.py for cloud containers + # Create run_demo.py for cloud sandbox cat > "$DEMO_DIR/run_demo.py" << 'EOF' import asyncio import os @@ -276,7 +276,7 @@ if not openai_key and not anthropic_key: print("\n⚠️ No OpenAI or Anthropic API keys found in .env.local.") print("Please add at least one API key to use AI agents.") -print("🚀 Starting CUA playground with Cloud Containers...") +print("🚀 Starting CUA playground with Cloud Sandbox...") print("📝 Edit .env.local to update your API keys") # Launch the Gradio UI and open it in the browser @@ -314,7 +314,7 @@ app.launch(share=False, inbrowser=True) EOF fi -echo "☁️ CUA Cloud Container setup complete!" +echo "☁️ CUA Cloud Sandbox setup complete!" echo "📝 Edit $DEMO_DIR/.env.local to update your API keys" echo "🖥️ Start the playground by running: $DEMO_DIR/start_ui.sh"