diff --git a/.devcontainer/README.md b/.devcontainer/README.md deleted file mode 100644 index cabc2356..00000000 --- a/.devcontainer/README.md +++ /dev/null @@ -1,66 +0,0 @@ -# Dev Container Setup - -This repository includes a Dev Container configuration that simplifies the development setup to just 3 steps: - -## Quick Start - -![Clipboard-20250611-180809-459](https://github.com/user-attachments/assets/447eaeeb-0eec-4354-9a82-44446e202e06) - -1. **Install the Dev Containers extension ([VS Code](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [WindSurf](https://docs.windsurf.com/windsurf/advanced#dev-containers-beta))** -2. **Open the repository in the Dev Container:** - - Press `Ctrl+Shift+P` (or `⌘+Shift+P` on macOS) - - Select `Dev Containers: Clone Repository in Container Volume...` and paste the repository URL: `https://github.com/trycua/cua.git` (if not cloned) or `Dev Containers: Open Folder in Container...` (if git cloned). - > **Note**: On WindSurf, the post install hook might not run automatically. If so, run `/bin/bash .devcontainer/post-install.sh` manually. -3. **Open the VS Code workspace:** Once the post-install.sh is done running, open the `.vscode/py.code-workspace` workspace and press ![Open Workspace](https://github.com/user-attachments/assets/923bdd43-8c8f-4060-8d78-75bfa302b48c) -. -4. **Run the Agent UI example:** Click ![Run Agent UI](https://github.com/user-attachments/assets/7a61ef34-4b22-4dab-9864-f86bf83e290b) - to start the Gradio UI. If prompted to install **debugpy (Python Debugger)** to enable remote debugging, select 'Yes' to proceed. -5. **Access the Gradio UI:** The Gradio UI will be available at `http://localhost:7860` and will automatically forward to your host machine. - -## What's Included - -The dev container automatically: - -- ✅ Sets up Python 3.11 environment -- ✅ Installs all system dependencies (build tools, OpenGL, etc.) -- ✅ Configures Python paths for all packages -- ✅ Installs Python extensions (Black, Ruff, Pylance) -- ✅ Forwards port 7860 for the Gradio web UI -- ✅ Mounts your source code for live editing -- ✅ Creates the required `.env.local` file - -## Running Examples - -After the container is built, you can run examples directly: - -```bash -# Run the agent UI (Gradio web interface) -python examples/agent_ui_examples.py - -# Run computer examples -python examples/computer_examples.py - -# Run computer UI examples -python examples/computer_ui_examples.py -``` - -The Gradio UI will be available at `http://localhost:7860` and will automatically forward to your host machine. - -## Environment Variables - -You'll need to add your API keys to `.env.local`: - -```bash -# Required for Anthropic provider -ANTHROPIC_API_KEY=your_anthropic_key_here - -# Required for OpenAI provider -OPENAI_API_KEY=your_openai_key_here -``` - -## Notes - -- The container connects to `host.docker.internal:7777` for Lume server communication -- All Python packages are pre-installed and configured -- Source code changes are reflected immediately (no rebuild needed) -- The container uses the same Dockerfile as the regular Docker development environment diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index 88113b84..00000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "name": "Cua - OSS", - "build": { - "dockerfile": "../Dockerfile" - }, - "containerEnv": { - "DISPLAY": "", - "PYLUME_HOST": "host.docker.internal" - }, - "forwardPorts": [7860], - "portsAttributes": { - "7860": { - "label": "Cua web client (Gradio)", - "onAutoForward": "silent" - } - }, - "postCreateCommand": "/bin/bash .devcontainer/post-install.sh" -} diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh deleted file mode 100755 index 1738e635..00000000 --- a/.devcontainer/post-install.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -WORKSPACE="/workspaces/cua" - -# Setup .env.local -echo "PYTHON_BIN=python" > /workspaces/cua/.env.local - -# Run /scripts/build.sh -./scripts/build.sh - -# --- -# Build is complete. Show user a clear message to open the workspace manually. -# --- - -cat << 'EOM' - -============================================ - 🚀 Build complete! - - 👉 Next steps: - - 1. Open '.vscode/py.code-workspace' - 2. Press 'Open Workspace' - - Happy coding! -============================================ - -EOM diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..86e25293 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +root = true + +[*] +indent_style = space +indent_size = 4 +charset = utf-8 +end_of_line = lf +insert_final_newline = false +trim_trailing_whitespace = true + +[*.{js,ts,jsx,tsx,json,css,scss,html,md}] +indent_size = 2 diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..5d5400e2 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,15 @@ +# These are supported funding model platforms + +github: trycua +patreon: # Replace with a single Patreon username +open_collective: # Replace with a single Open Collective username +ko_fi: # Replace with a single Ko-fi username +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry +polar: # Replace with a single Polar username +buy_me_a_coffee: # Replace with a single Buy Me a Coffee username +thanks_dev: # Replace with a single thanks.dev username +custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.github/scripts/get_pyproject_version.py b/.github/scripts/get_pyproject_version.py new file mode 100755 index 00000000..74bf09dd --- /dev/null +++ b/.github/scripts/get_pyproject_version.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +""" +Verifies that the version in pyproject.toml matches the expected version. + +Usage: + python get_pyproject_version.py + +Exit codes: + 0 - Versions match + 1 - Versions don't match or error occurred +""" + +import sys + +try: + import tomllib +except ImportError: + # Fallback for Python < 3.11 + import toml as tomllib + + +def main(): + if len(sys.argv) != 3: + print( + "Usage: python get_pyproject_version.py ", + file=sys.stderr, + ) + sys.exit(1) + + pyproject_path = sys.argv[1] + expected_version = sys.argv[2] + + # tomllib requires binary mode + try: + with open(pyproject_path, "rb") as f: + data = tomllib.load(f) + except FileNotFoundError: + print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr) + sys.exit(1) + except Exception as e: + # Fallback to toml if using the old library or handle other errors + try: + import toml + + data = toml.load(pyproject_path) + except FileNotFoundError: + print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr) + sys.exit(1) + except Exception as toml_err: + print(f"❌ ERROR: Failed to parse TOML file: {e}", file=sys.stderr) + sys.exit(1) + + actual_version = data.get("project", {}).get("version") + + if not actual_version: + print("❌ ERROR: No version found in pyproject.toml", file=sys.stderr) + sys.exit(1) + + if actual_version != expected_version: + print("❌ Version mismatch detected!", file=sys.stderr) + print(f" pyproject.toml version: {actual_version}", file=sys.stderr) + print(f" Expected version: {expected_version}", file=sys.stderr) + print("", file=sys.stderr) + print( + "The version in pyproject.toml must match the version being published.", file=sys.stderr + ) + print( + f"Please update pyproject.toml to version {expected_version} or use the correct tag.", + file=sys.stderr, + ) + sys.exit(1) + + print(f"✅ Version consistency check passed: {actual_version}") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/tests/README.md b/.github/scripts/tests/README.md new file mode 100644 index 00000000..a4fd2857 --- /dev/null +++ b/.github/scripts/tests/README.md @@ -0,0 +1,137 @@ +# Tests for .github/scripts + +This directory contains comprehensive tests for the GitHub workflow scripts using Python's built-in testing framework. + +## Requirements + +**No external dependencies required!** + +This test suite uses: + +- `unittest` - Python's built-in testing framework +- `tomllib` - Python 3.11+ built-in TOML parser + +For Python < 3.11, the `toml` package is used as a fallback. + +## Running Tests + +### Run all tests + +```bash +cd .github/scripts/tests +python3 -m unittest discover -v +``` + +### Run a specific test file + +```bash +python3 -m unittest test_get_pyproject_version -v +``` + +### Run a specific test class + +```bash +python3 -m unittest test_get_pyproject_version.TestGetPyprojectVersion -v +``` + +### Run a specific test method + +```bash +python3 -m unittest test_get_pyproject_version.TestGetPyprojectVersion.test_matching_versions -v +``` + +### Run tests directly from the test file + +```bash +python3 test_get_pyproject_version.py +``` + +## Test Structure + +### test_get_pyproject_version.py + +Comprehensive tests for `get_pyproject_version.py` covering: + +- ✅ **Version matching**: Tests successful version validation +- ✅ **Version mismatch**: Tests error handling when versions don't match +- ✅ **Missing version**: Tests handling of pyproject.toml without version field +- ✅ **Missing project section**: Tests handling of pyproject.toml without project section +- ✅ **File not found**: Tests handling of non-existent files +- ✅ **Malformed TOML**: Tests handling of invalid TOML syntax +- ✅ **Argument validation**: Tests proper argument count validation +- ✅ **Semantic versioning**: Tests various semantic version formats +- ✅ **Pre-release tags**: Tests versions with alpha, beta, rc tags +- ✅ **Build metadata**: Tests versions with build metadata +- ✅ **Edge cases**: Tests empty versions and other edge cases + +**Total Tests**: 17+ test cases covering all functionality + +## Best Practices Implemented + +1. **Fixture Management**: Uses `setUp()` and `tearDown()` for clean test isolation +2. **Helper Methods**: Provides reusable helpers for creating test fixtures +3. **Temporary Files**: Uses `tempfile` for file creation with proper cleanup +4. **Comprehensive Coverage**: Tests happy paths, error conditions, and edge cases +5. **Clear Documentation**: Each test has a descriptive docstring +6. **Output Capture**: Uses `unittest.mock.patch` and `StringIO` to test stdout/stderr +7. **Exit Code Validation**: Properly tests script exit codes with `assertRaises(SystemExit)` +8. **Type Hints**: Uses type hints in helper methods for clarity +9. **PEP 8 Compliance**: Follows Python style guidelines +10. **Zero External Dependencies**: Uses only Python standard library + +## Continuous Integration + +These tests can be integrated into GitHub Actions workflows with no additional dependencies: + +```yaml +- name: Run .github scripts tests + run: | + cd .github/scripts/tests + python3 -m unittest discover -v +``` + +## Test Output Example + +``` +test_empty_version_string (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of empty version string. ... ok +test_file_not_found (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of non-existent pyproject.toml file. ... ok +test_malformed_toml (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of malformed TOML file. ... ok +test_matching_versions (test_get_pyproject_version.TestGetPyprojectVersion) +Test that matching versions result in success. ... ok +test_missing_project_section (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of pyproject.toml without a project section. ... ok +test_missing_version_field (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of pyproject.toml without a version field. ... ok +test_no_arguments (test_get_pyproject_version.TestGetPyprojectVersion) +Test that providing no arguments results in usage error. ... ok +test_semantic_version_0_0_1 (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version 0.0.1. ... ok +test_semantic_version_1_0_0 (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version 1.0.0. ... ok +test_semantic_version_10_20_30 (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version 10.20.30. ... ok +test_semantic_version_alpha (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version with alpha tag. ... ok +test_semantic_version_beta (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version with beta tag. ... ok +test_semantic_version_rc_with_build (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version with rc and build metadata. ... ok +test_too_few_arguments (test_get_pyproject_version.TestGetPyprojectVersion) +Test that providing too few arguments results in usage error. ... ok +test_too_many_arguments (test_get_pyproject_version.TestGetPyprojectVersion) +Test that providing too many arguments results in usage error. ... ok +test_version_mismatch (test_get_pyproject_version.TestGetPyprojectVersion) +Test that mismatched versions result in failure with appropriate error message. ... ok +test_version_with_build_metadata (test_get_pyproject_version.TestGetPyprojectVersion) +Test matching versions with build metadata. ... ok +test_version_with_prerelease_tags (test_get_pyproject_version.TestGetPyprojectVersion) +Test matching versions with pre-release tags like alpha, beta, rc. ... ok + +---------------------------------------------------------------------- +Ran 18 tests in 0.XXXs + +OK +``` diff --git a/.github/scripts/tests/__init__.py b/.github/scripts/tests/__init__.py new file mode 100644 index 00000000..cbc9d370 --- /dev/null +++ b/.github/scripts/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for .github/scripts.""" diff --git a/.github/scripts/tests/test_get_pyproject_version.py b/.github/scripts/tests/test_get_pyproject_version.py new file mode 100644 index 00000000..6b8fbd65 --- /dev/null +++ b/.github/scripts/tests/test_get_pyproject_version.py @@ -0,0 +1,360 @@ +""" +Comprehensive tests for get_pyproject_version.py script using unittest. + +This test suite covers: +- Version matching validation +- Error handling for missing versions +- Invalid input handling +- File not found scenarios +- Malformed TOML handling +""" + +import sys +import tempfile +import unittest +from io import StringIO +from pathlib import Path +from unittest.mock import patch + +# Add parent directory to path to import the module +sys.path.insert(0, str(Path(__file__).parent.parent)) + +# Import after path is modified +import get_pyproject_version + + +class TestGetPyprojectVersion(unittest.TestCase): + """Test suite for get_pyproject_version.py functionality.""" + + def setUp(self): + """Reset sys.argv before each test.""" + self.original_argv = sys.argv.copy() + + def tearDown(self): + """Restore sys.argv after each test.""" + sys.argv = self.original_argv + + def create_pyproject_toml(self, version: str) -> Path: + """Helper to create a temporary pyproject.toml file with a given version.""" + temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".toml", delete=False) + temp_file.write( + f""" +[project] +name = "test-project" +version = "{version}" +description = "A test project" +""" + ) + temp_file.close() + return Path(temp_file.name) + + def create_pyproject_toml_no_version(self) -> Path: + """Helper to create a pyproject.toml without a version field.""" + temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".toml", delete=False) + temp_file.write( + """ +[project] +name = "test-project" +description = "A test project without version" +""" + ) + temp_file.close() + return Path(temp_file.name) + + def create_pyproject_toml_no_project(self) -> Path: + """Helper to create a pyproject.toml without a project section.""" + temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".toml", delete=False) + temp_file.write( + """ +[tool.poetry] +name = "test-project" +version = "1.0.0" +""" + ) + temp_file.close() + return Path(temp_file.name) + + def create_malformed_toml(self) -> Path: + """Helper to create a malformed TOML file.""" + temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".toml", delete=False) + temp_file.write( + """ +[project +name = "test-project +version = "1.0.0" +""" + ) + temp_file.close() + return Path(temp_file.name) + + # Test: Successful version match + def test_matching_versions(self): + """Test that matching versions result in success.""" + pyproject_file = self.create_pyproject_toml("1.2.3") + + try: + sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.2.3"] + + # Capture stdout + captured_output = StringIO() + with patch("sys.stdout", captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn("✅ Version consistency check passed: 1.2.3", captured_output.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Version mismatch + def test_version_mismatch(self): + """Test that mismatched versions result in failure with appropriate error message.""" + pyproject_file = self.create_pyproject_toml("1.2.3") + + try: + sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.2.4"] + + # Capture stderr + captured_error = StringIO() + with patch("sys.stderr", captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + error_output = captured_error.getvalue() + self.assertIn("❌ Version mismatch detected!", error_output) + self.assertIn("pyproject.toml version: 1.2.3", error_output) + self.assertIn("Expected version: 1.2.4", error_output) + self.assertIn("Please update pyproject.toml to version 1.2.4", error_output) + finally: + pyproject_file.unlink() + + # Test: Missing version in pyproject.toml + def test_missing_version_field(self): + """Test handling of pyproject.toml without a version field.""" + pyproject_file = self.create_pyproject_toml_no_version() + + try: + sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.0.0"] + + captured_error = StringIO() + with patch("sys.stderr", captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("❌ ERROR: No version found in pyproject.toml", captured_error.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Missing project section + def test_missing_project_section(self): + """Test handling of pyproject.toml without a project section.""" + pyproject_file = self.create_pyproject_toml_no_project() + + try: + sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.0.0"] + + captured_error = StringIO() + with patch("sys.stderr", captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("❌ ERROR: No version found in pyproject.toml", captured_error.getvalue()) + finally: + pyproject_file.unlink() + + # Test: File not found + def test_file_not_found(self): + """Test handling of non-existent pyproject.toml file.""" + sys.argv = ["get_pyproject_version.py", "/nonexistent/pyproject.toml", "1.0.0"] + + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + + # Test: Malformed TOML + def test_malformed_toml(self): + """Test handling of malformed TOML file.""" + pyproject_file = self.create_malformed_toml() + + try: + sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.0.0"] + + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + finally: + pyproject_file.unlink() + + # Test: Incorrect number of arguments - too few + def test_too_few_arguments(self): + """Test that providing too few arguments results in usage error.""" + sys.argv = ["get_pyproject_version.py", "pyproject.toml"] + + captured_error = StringIO() + with patch("sys.stderr", captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn( + "Usage: python get_pyproject_version.py ", + captured_error.getvalue(), + ) + + # Test: Incorrect number of arguments - too many + def test_too_many_arguments(self): + """Test that providing too many arguments results in usage error.""" + sys.argv = ["get_pyproject_version.py", "pyproject.toml", "1.0.0", "extra"] + + captured_error = StringIO() + with patch("sys.stderr", captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn( + "Usage: python get_pyproject_version.py ", + captured_error.getvalue(), + ) + + # Test: No arguments + def test_no_arguments(self): + """Test that providing no arguments results in usage error.""" + sys.argv = ["get_pyproject_version.py"] + + captured_error = StringIO() + with patch("sys.stderr", captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn( + "Usage: python get_pyproject_version.py ", + captured_error.getvalue(), + ) + + # Test: Version with pre-release tags + def test_version_with_prerelease_tags(self): + """Test matching versions with pre-release tags like alpha, beta, rc.""" + pyproject_file = self.create_pyproject_toml("1.2.3-rc.1") + + try: + sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.2.3-rc.1"] + + captured_output = StringIO() + with patch("sys.stdout", captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn( + "✅ Version consistency check passed: 1.2.3-rc.1", captured_output.getvalue() + ) + finally: + pyproject_file.unlink() + + # Test: Version with build metadata + def test_version_with_build_metadata(self): + """Test matching versions with build metadata.""" + pyproject_file = self.create_pyproject_toml("1.2.3+build.123") + + try: + sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.2.3+build.123"] + + captured_output = StringIO() + with patch("sys.stdout", captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn( + "✅ Version consistency check passed: 1.2.3+build.123", captured_output.getvalue() + ) + finally: + pyproject_file.unlink() + + # Test: Various semantic version formats + def test_semantic_version_0_0_1(self): + """Test semantic version 0.0.1.""" + self._test_version_format("0.0.1") + + def test_semantic_version_1_0_0(self): + """Test semantic version 1.0.0.""" + self._test_version_format("1.0.0") + + def test_semantic_version_10_20_30(self): + """Test semantic version 10.20.30.""" + self._test_version_format("10.20.30") + + def test_semantic_version_alpha(self): + """Test semantic version with alpha tag.""" + self._test_version_format("1.2.3-alpha") + + def test_semantic_version_beta(self): + """Test semantic version with beta tag.""" + self._test_version_format("1.2.3-beta.1") + + def test_semantic_version_rc_with_build(self): + """Test semantic version with rc and build metadata.""" + self._test_version_format("1.2.3-rc.1+build.456") + + def _test_version_format(self, version: str): + """Helper method to test various semantic version formats.""" + pyproject_file = self.create_pyproject_toml(version) + + try: + sys.argv = ["get_pyproject_version.py", str(pyproject_file), version] + + captured_output = StringIO() + with patch("sys.stdout", captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn( + f"✅ Version consistency check passed: {version}", captured_output.getvalue() + ) + finally: + pyproject_file.unlink() + + # Test: Empty version string + def test_empty_version_string(self): + """Test handling of empty version string.""" + pyproject_file = self.create_pyproject_toml("") + + try: + sys.argv = ["get_pyproject_version.py", str(pyproject_file), "1.0.0"] + + captured_error = StringIO() + with patch("sys.stderr", captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + # Empty string is falsy, so it should trigger error + self.assertIn("❌", captured_error.getvalue()) + finally: + pyproject_file.unlink() + + +class TestSuiteInfo(unittest.TestCase): + """Test suite metadata.""" + + def test_suite_info(self): + """Display test suite information.""" + print("\n" + "=" * 70) + print("Test Suite: get_pyproject_version.py") + print("Framework: unittest (Python built-in)") + print("TOML Library: tomllib (Python 3.11+ built-in)") + print("=" * 70) + self.assertTrue(True) + + +if __name__ == "__main__": + # Run tests with verbose output + unittest.main(verbosity=2) diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml new file mode 100644 index 00000000..e628cf42 --- /dev/null +++ b/.github/workflows/bump-version.yml @@ -0,0 +1,91 @@ +name: Bump Version + +on: + workflow_dispatch: + inputs: + service: + description: "Service/Package to bump" + required: true + type: choice + options: + - cua-agent + - cua-computer + - cua-computer-server + - cua-core + - cua-mcp-server + - cua-som + - pylume + bump_type: + description: "Version bump type" + required: true + type: choice + options: + - patch + - minor + - major + +permissions: + contents: write + +jobs: + bump-version: + runs-on: ubuntu-latest + steps: + - name: Set package directory + id: package + run: | + case "${{ inputs.service }}" in + "cua-agent") + echo "directory=libs/python/agent" >> $GITHUB_OUTPUT + ;; + "cua-computer") + echo "directory=libs/python/computer" >> $GITHUB_OUTPUT + ;; + "cua-computer-server") + echo "directory=libs/python/computer-server" >> $GITHUB_OUTPUT + ;; + "cua-core") + echo "directory=libs/python/core" >> $GITHUB_OUTPUT + ;; + "cua-mcp-server") + echo "directory=libs/python/mcp-server" >> $GITHUB_OUTPUT + ;; + "cua-som") + echo "directory=libs/python/som" >> $GITHUB_OUTPUT + ;; + "pylume") + echo "directory=libs/python/pylume" >> $GITHUB_OUTPUT + ;; + *) + echo "Unknown service: ${{ inputs.service }}" + exit 1 + ;; + esac + + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install bump2version + run: pip install bump2version + + - name: Configure Git + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Run bump2version + run: | + cd ${{ steps.package.outputs.directory }} + bump2version ${{ inputs.bump_type }} + + - name: Push changes + run: | + git push origin main --follow-tags diff --git a/.github/workflows/docker-publish-kasm.yml b/.github/workflows/docker-publish-kasm.yml new file mode 100644 index 00000000..d97dbf6f --- /dev/null +++ b/.github/workflows/docker-publish-kasm.yml @@ -0,0 +1,29 @@ +name: Build and Publish CUA Ubuntu Container + +on: + push: + branches: + - main + tags: + - "docker-kasm-v*.*.*" + paths: + - "libs/kasm/**" + - ".github/workflows/docker-publish-kasm.yml" + - ".github/workflows/docker-reusable-publish.yml" + pull_request: + paths: + - "libs/kasm/**" + - ".github/workflows/docker-publish-kasm.yml" + - ".github/workflows/docker-reusable-publish.yml" + +jobs: + publish: + uses: ./.github/workflows/docker-reusable-publish.yml + with: + image_name: cua-ubuntu + context_dir: libs/kasm + dockerfile_path: Dockerfile + tag_prefix: docker-kasm-v + docker_hub_org: trycua + secrets: + DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} diff --git a/.github/workflows/docker-publish-xfce.yml b/.github/workflows/docker-publish-xfce.yml new file mode 100644 index 00000000..fa64849e --- /dev/null +++ b/.github/workflows/docker-publish-xfce.yml @@ -0,0 +1,29 @@ +name: Build and Publish CUA XFCE Container + +on: + push: + branches: + - main + tags: + - "docker-xfce-v*.*.*" + paths: + - "libs/xfce/**" + - ".github/workflows/docker-publish-xfce.yml" + - ".github/workflows/docker-reusable-publish.yml" + pull_request: + paths: + - "libs/xfce/**" + - ".github/workflows/docker-publish-xfce.yml" + - ".github/workflows/docker-reusable-publish.yml" + +jobs: + publish: + uses: ./.github/workflows/docker-reusable-publish.yml + with: + image_name: cua-xfce + context_dir: libs/xfce + dockerfile_path: Dockerfile + tag_prefix: docker-xfce-v + docker_hub_org: trycua + secrets: + DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} diff --git a/.github/workflows/docker-reusable-publish.yml b/.github/workflows/docker-reusable-publish.yml new file mode 100644 index 00000000..3472883f --- /dev/null +++ b/.github/workflows/docker-reusable-publish.yml @@ -0,0 +1,155 @@ +name: Reusable Docker Publish Workflow + +on: + workflow_call: + inputs: + image_name: + description: "Name of the Docker image (e.g. cua-ubuntu, cua-xfce)" + required: true + type: string + context_dir: + description: "Directory containing the Dockerfile relative to workspace root (e.g. libs/kasm, libs/xfce)" + required: true + type: string + dockerfile_path: + description: "Path to Dockerfile relative to context_dir (e.g. Dockerfile)" + required: false + type: string + default: "Dockerfile" + tag_prefix: + description: "Prefix for semantic version tags (e.g. docker-kasm-v, docker-xfce-v)" + required: true + type: string + docker_hub_org: + description: "Docker Hub organization name" + required: false + type: string + default: "trycua" + secrets: + DOCKER_HUB_TOKEN: + required: true + +jobs: + build-and-push: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Prepare platform tag + id: platform + run: | + # Convert platform (e.g., linux/amd64) to a valid tag suffix (e.g., linux-amd64) + PLATFORM_TAG=$(echo "${{ matrix.platform }}" | sed 's/\//-/g') + echo "tag=${PLATFORM_TAG}" >> $GITHUB_OUTPUT + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ inputs.docker_hub_org }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + + - name: Extract metadata (PR) + if: github.event_name == 'pull_request' + id: meta-pr + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=raw,value=${{ github.sha }} + + - name: Extract metadata (main branch) + if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + id: meta-main + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=raw,value=latest + + - name: Extract metadata (semantic version tag) + if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + id: meta-semver + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=semver,pattern={{version}},prefix=${{ inputs.tag_prefix }} + type=semver,pattern={{major}}.{{minor}},prefix=${{ inputs.tag_prefix }} + type=semver,pattern={{major}},prefix=${{ inputs.tag_prefix }} + type=raw,value=latest + + - name: Build and push Docker image (PR) + if: github.event_name == 'pull_request' + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-pr.outputs.tags }} + labels: ${{ steps.meta-pr.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Build and push Docker image (main branch) + if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-main.outputs.tags }} + labels: ${{ steps.meta-main.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Build and push Docker image (semantic version tag) + if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-semver.outputs.tags }} + labels: ${{ steps.meta-semver.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Image digest + if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Image pushed with digest ${{ steps.meta-pr.outputs.digest }}" + elif [[ "${{ github.ref }}" == refs/tags/${{ inputs.tag_prefix }}* ]]; then + echo "Image pushed with digest ${{ steps.meta-semver.outputs.digest }}" + else + echo "Image pushed with digest ${{ steps.meta-main.outputs.digest }}" + fi + + - name: print image tags + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Image tags: ${{ steps.meta-pr.outputs.tags }}" + elif [[ "${{ github.ref }}" == refs/tags/${{ inputs.tag_prefix }}* ]]; then + echo "Image tags: ${{ steps.meta-semver.outputs.tags }}" + else + echo "Image tags: ${{ steps.meta-main.outputs.tags }}" + fi diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..b17bcd74 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,60 @@ +name: Lint & Format Check + +on: + pull_request: + branches: + - main + push: + branches: + - main + +jobs: + lint: + name: Lint & Format + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: 20 + + - name: Set up pnpm + uses: pnpm/action-setup@v4 + with: + version: 10 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.12 + + - name: Install Python dependencies + run: | + pip install uv + uv sync + + - name: Install Node dependencies + run: | + pnpm install --frozen-lockfile + pnpm -C libs/typescript install --frozen-lockfile + + # Python checks (isort, black, ruff, mypy) + - name: Python lint & typecheck + run: | + uv run isort --check-only . + uv run black --check . + uv run ruff check . + # Temporarily disabled due to untyped codebase + # uv run mypy . + + # TypeScript type check + - name: TypeScript typecheck + run: node ./scripts/typescript-typecheck.js + + # JS/TS/Markdown/YAML checks + - name: Prettier check + run: pnpm prettier --check "**/*.{ts,tsx,js,jsx,json,md,yaml,yml}" diff --git a/.github/workflows/pypi-reusable-publish.yml b/.github/workflows/pypi-reusable-publish.yml index f1eb045e..4a220610 100644 --- a/.github/workflows/pypi-reusable-publish.yml +++ b/.github/workflows/pypi-reusable-publish.yml @@ -71,6 +71,16 @@ jobs: echo "VERSION=${{ inputs.version }}" >> $GITHUB_ENV echo "version=${{ inputs.version }}" >> $GITHUB_OUTPUT + - name: Verify version consistency + run: | + # Install toml parser + pip install toml + + # Verify version matches using script (exits with error if mismatch) + python ${GITHUB_WORKSPACE}/.github/scripts/get_pyproject_version.py \ + ${{ inputs.package_dir }}/pyproject.toml \ + ${{ inputs.version }} + - name: Initialize PDM in package directory run: | # Make sure we're working with a properly initialized PDM project @@ -82,21 +92,6 @@ jobs: pdm lock fi - - name: Set version in package - run: | - cd ${{ inputs.package_dir }} - # Replace pdm bump with direct edit of pyproject.toml - if [[ "$OSTYPE" == "darwin"* ]]; then - # macOS version of sed needs an empty string for -i - sed -i '' "s/version = \".*\"/version = \"$VERSION\"/" pyproject.toml - else - # Linux version - sed -i "s/version = \".*\"/version = \"$VERSION\"/" pyproject.toml - fi - # Verify version was updated - echo "Updated version in pyproject.toml:" - grep "version =" pyproject.toml - # Conditional step for lume binary download (only for pylume package) - name: Download and setup lume binary if: inputs.is_lume_package diff --git a/.github/workflows/test-validation-script.yml b/.github/workflows/test-validation-script.yml new file mode 100644 index 00000000..15f5b7ec --- /dev/null +++ b/.github/workflows/test-validation-script.yml @@ -0,0 +1,36 @@ +name: Test valididation script + +on: + pull_request: + paths: + - ".github/scripts/**" + - ".github/workflows/test-scripts.yml" + push: + branches: + - main + paths: + - ".github/scripts/**" + - ".github/workflows/test-scripts.yml" + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest toml + + - name: Run tests + run: | + cd .github/scripts + pytest tests/ -v diff --git a/.gitignore b/.gitignore index e623dda8..8cae22ce 100644 --- a/.gitignore +++ b/.gitignore @@ -111,6 +111,9 @@ ENV/ env.bak/ venv.bak/ +# Git worktrees +.worktrees/ + # Spyder project settings .spyderproject .spyproject diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..e1523f92 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,48 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v3.0.0 + hooks: + - id: prettier + name: Prettier (TS/JS/JSON/Markdown/YAML) + entry: prettier --write + language: node + additional_dependencies: ["prettier@3.6.2"] + files: \.(ts|tsx|js|jsx|json|md|yaml|yml)$ + + - repo: local + hooks: + - id: tsc + name: TypeScript type check + entry: node ./scripts/typescript-typecheck.js + language: node + + - repo: https://github.com/PyCQA/isort + rev: 7.0.0 + hooks: + - id: isort + name: isort code formatter + args: ["--profile", "black"] + files: \.(py)$ + + - repo: https://github.com/psf/black + rev: 25.9.0 + hooks: + - id: black + name: Black code formatter + files: \.(py)$ + + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.14.1 + hooks: + - id: ruff + name: ruff linter + args: ["--fix"] + files: \.(py)$ + + # Temporarily disabled due to untyped codebase + # - repo: https://github.com/pre-commit/mirrors-mypy + # rev: v1.5.1 + # hooks: + # - id: mypy + # name: mypy type checker + # files: \.(py)$ diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..7a28e2a7 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,32 @@ +# Node / JS +node_modules/ +dist/ +build/ +out/ +.next/ +*.min.js + +# Python +__pycache__/ +*.pyc +*.pyo +*.pyd +.venv/ +venv/ +.env +.env.local + +# Logs +*.log +*.tmp + +# VSCode / editor files +.vscode/ +.idea/ + +# Other generated files +*.lock +*.db +*.sqlite +pnpm-lock.yaml +uv.lock \ No newline at end of file diff --git a/.prettierrc.yaml b/.prettierrc.yaml new file mode 100644 index 00000000..1815ee00 --- /dev/null +++ b/.prettierrc.yaml @@ -0,0 +1,12 @@ +semi: true +singleQuote: true +trailingComma: es5 +tabWidth: 2 +printWidth: 100 +arrowParens: always +bracketSpacing: true + +overrides: + - files: "*.{yml,yaml}" + options: + singleQuote: false diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 00000000..a28f3b28 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,10 @@ +{ + "recommendations": [ + "esbenp.prettier-vscode", + "charliermarsh.ruff", + "ms-python.black-formatter", + "ms-python.mypy-type-checker", + "ms-python.vscode-pylance", + "ms-python.isort" + ] +} \ No newline at end of file diff --git a/.vscode/libs-ts.code-workspace b/.vscode/libs-ts.code-workspace index 732316f2..ccff6b06 100644 --- a/.vscode/libs-ts.code-workspace +++ b/.vscode/libs-ts.code-workspace @@ -7,7 +7,7 @@ ], "extensions": { "recommendations": [ - "biomejs.biome", + "esbenp.prettier-vscode" ] } } \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..ab4deb49 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,25 @@ +{ + "python-envs.pythonProjects": [], + "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python", + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit", + "source.fixAll": "explicit" + }, + "extensions.ignoreRecommendations": false, + "python.formatting.provider": "black", + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + }, + "[javascript][typescript][typescriptreact][javascriptreact]": { + "editor.defaultFormatter": "esbenp.prettier-vscode" + }, + "ruff.configuration": "${workspaceFolder}/pyproject.toml", + "mypy-type-checker.args": [ + "--config-file", + "${workspaceFolder}/pyproject.toml" + ], + "mypy-type-checker.path": [ + "${workspaceFolder}" + ] +} \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ed05c47b..69819ef6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ -# Contributing to cua +# Contributing to Cua -We deeply appreciate your interest in contributing to cua! Whether you're reporting bugs, suggesting enhancements, improving docs, or submitting pull requests, your contributions help improve the project for everyone. +We deeply appreciate your interest in contributing to Cua! Whether you're reporting bugs, suggesting enhancements, improving docs, or submitting pull requests, your contributions help improve the project for everyone. ## Reporting Bugs @@ -35,17 +35,20 @@ We follow strict code formatting guidelines to ensure consistency across the cod 3. **Run Formatting Tools**: Always run the formatting tools before submitting a PR: ```bash # For Python code - pdm run black . - pdm run ruff check --fix . + uv run black . + uv run isort . + uv run ruff check --fix . ``` 4. **Validate Your Code**: Ensure your code passes all checks: ```bash - pdm run mypy . + uv run mypy . ``` +5. Every time you try to commit code, a pre-commit hook will automatically run the formatting and validation tools. If any issues are found, the commit will be blocked until they are resolved. Please make sure to address any issues reported by the pre-commit hook before attempting to commit again. Once all issues are resolved, you can proceed with your commit. ## Documentation Documentation improvements are always welcome. You can: + - Fix typos or unclear explanations - Add examples and use cases - Improve API documentation @@ -53,4 +56,4 @@ Documentation improvements are always welcome. You can: For detailed instructions on setting up your development environment and submitting code contributions, please see our [Developer-Guide](Development.md). -Feel free to join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas or get help with your contributions. \ No newline at end of file +Feel free to join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas or get help with your contributions. diff --git a/Development.md b/Development.md index be281dc8..4b18fad7 100644 --- a/Development.md +++ b/Development.md @@ -10,44 +10,71 @@ The project is organized as a monorepo with these main packages: - `libs/som/` - Set-of-Mark parser - `libs/computer-server/` - Server component for VM - `libs/lume/` - Lume CLI -- `libs/pylume/` - Python bindings for Lume -Each package has its own virtual environment and dependencies, managed through PDM. +These packages are part of a uv workspace which manages a shared virtual environment and dependencies. ## Local Development Setup 1. Install Lume CLI: - ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" - ``` + ```bash + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" + ``` 2. Clone the repository: - ```bash - git clone https://github.com/trycua/cua.git - cd cua - ``` + ```bash + git clone https://github.com/trycua/cua.git + cd cua + ``` 3. Create a `.env.local` file in the root directory with your API keys: - ```bash - # Required for Anthropic provider - ANTHROPIC_API_KEY=your_anthropic_key_here + ```bash + # Required for Anthropic provider + ANTHROPIC_API_KEY=your_anthropic_key_here - # Required for OpenAI provider - OPENAI_API_KEY=your_openai_key_here - ``` + # Required for OpenAI provider + OPENAI_API_KEY=your_openai_key_here + ``` -4. Open the workspace in VSCode or Cursor: +4. Install Node.js dependencies for Prettier and other scripts: - ```bash - # For Cua Python development - code .vscode/py.code-workspace + ```bash + # Install pnpm if you don't have it + npm install -g pnpm - # For Lume (Swift) development - code .vscode/lume.code-workspace - ``` + # Install all JS/TS dependencies + pnpm install + ``` + +5. Install Python dependencies and workspace packages: + + ```bash + # First install uv if you don't have it + pip install uv + + # Then install all Python dependencies + uv sync + ``` + +6. Open the workspace in VSCode or Cursor: + + ```bash + # For Cua Python development + code .vscode/py.code-workspace + + # For Lume (Swift) development + code .vscode/lume.code-workspace + ``` + +7. Install Pre-commit hooks: + + This ensures code formatting and validation run automatically on each commit. + + ```bash + uv run pre-commit install + ``` Using the workspace file is strongly recommended as it: @@ -62,39 +89,33 @@ Refer to the [Lume README](./libs/lume/Development.md) for instructions on how t ## Python Development -There are two ways to install Lume: +### Setup -### Run the build script - -Run the build script to set up all packages: +Install all of workspace dependencies with a single command: ```bash -./scripts/build.sh +uv sync ``` -The build script creates a shared virtual environment for all packages. The workspace configuration automatically handles import paths with the correct Python path settings. +This installs all dependencies in the virtual environment `.venv`. -This will: +Each Cua package is installed in editable mode, which means changes to the source code are immediately reflected in the installed package. -- Create a virtual environment for the project -- Install all packages in development mode -- Set up the correct Python path -- Install development tools +The `.venv` environment is also configured as the default VS Code Python interpreter in `.vscode/settings.json`. -### Install with PDM +### Running Python Scripts -If PDM is not already installed, you can follow the installation instructions [here](https://pdm-project.org/en/latest/#installation). +To run Python scripts in the workspace, use the `uv run` command: -To install with PDM, simply run: - -```console -pdm install -G:all +```bash +uv run python examples/agent_examples.py ``` -This installs all the dependencies for development, testing, and building the docs. If you'd only like development dependencies, you can run: +Or activate the virtual environment manually: -```console -pdm install -d +```bash +source .venv/bin/activate +python examples/agent_examples.py ``` ## Running Examples @@ -114,71 +135,9 @@ The workspace also includes compound launch configurations: - "Run Computer Examples + Server" - Runs both the Computer Examples and Server simultaneously -## Docker Development Environment - -As an alternative to installing directly on your host machine, you can use Docker for development. This approach has several advantages: - -### Prerequisites - -- Docker installed on your machine -- Lume server running on your host (port 7777): `lume serve` - -### Setup and Usage - -1. Build the development Docker image: - - ```bash - ./scripts/run-docker-dev.sh build - ``` - -2. Run an example in the container: - - ```bash - ./scripts/run-docker-dev.sh run computer_examples.py - ``` - -3. Get an interactive shell in the container: - - ```bash - ./scripts/run-docker-dev.sh run --interactive - ``` - -4. Stop any running containers: - - ```bash - ./scripts/run-docker-dev.sh stop - ``` - -### How it Works - -The Docker development environment: - -- Installs all required Python dependencies in the container -- Mounts your source code from the host at runtime -- Automatically configures the connection to use host.docker.internal:7777 for accessing the Lume server on your host machine -- Preserves your code changes without requiring rebuilds (source code is mounted as a volume) - -> **Note**: The Docker container doesn't include the macOS-specific Lume executable. Instead, it connects to the Lume server running on your host machine via host.docker.internal:7777. Make sure to start the Lume server on your host before running examples in the container. - -## Cleanup and Reset - -If you need to clean up the environment (non-docker) and start fresh: - -```bash -./scripts/cleanup.sh -``` - -This will: - -- Remove all virtual environments -- Clean Python cache files and directories -- Remove build artifacts -- Clean PDM-related files -- Reset environment configurations - ## Code Formatting Standards -The cua project follows strict code formatting standards to ensure consistency across all packages. +The Cua project follows strict code formatting standards to ensure consistency across all packages. ### Python Code Formatting @@ -187,10 +146,11 @@ The cua project follows strict code formatting standards to ensure consistency a The project uses the following tools for code formatting and linting: - **[Black](https://black.readthedocs.io/)**: Code formatter +- **[isort](https://pycqa.github.io/isort/)**: Import sorter - **[Ruff](https://beta.ruff.rs/docs/)**: Fast linter and formatter - **[MyPy](https://mypy.readthedocs.io/)**: Static type checker -These tools are automatically installed when you set up the development environment using the `./scripts/build.sh` script. +These tools are automatically installed when you set up the development environment. #### Configuration @@ -202,23 +162,34 @@ line-length = 100 target-version = ["py311"] [tool.ruff] +fix = true line-length = 100 target-version = "py311" + +[tool.ruff.lint] select = ["E", "F", "B", "I"] +ignore = [ + "E501", "E402", "I001", "I002", "B007", "B023", "B024", "B027", "B028", + "B904", "B905", "E711", "E712", "E722", "E731", "F401", "F403", "F405", + "F811", "F821", "F841" +] fix = true [tool.ruff.format] docstring-code-format = true [tool.mypy] -strict = true -python_version = "3.11" -ignore_missing_imports = true -disallow_untyped_defs = true check_untyped_defs = true -warn_return_any = true +disallow_untyped_defs = true +ignore_missing_imports = true +python_version = "3.11" show_error_codes = true +strict = true +warn_return_any = true warn_unused_ignores = false + +[tool.isort] +profile = "black" ``` #### Key Formatting Rules @@ -232,23 +203,48 @@ warn_unused_ignores = false The repository includes VSCode workspace configurations that enable automatic formatting. When you open the workspace files (as recommended in the setup instructions), the correct formatting settings are automatically applied. -Python-specific settings in the workspace files: +##### Python-specific settings + +These are configured in `.vscode/settings.json`: ```json -"[python]": { - "editor.formatOnSave": true, - "editor.defaultFormatter": "ms-python.black-formatter", - "editor.codeActionsOnSave": { - "source.organizeImports": "explicit" - } +{ + "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python", + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit", + "source.fixAll": "explicit" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, + "python.formatting.provider": "black", + "ruff.configuration": "${workspaceFolder}/pyproject.toml", + "mypy-type-checker.args": ["--config-file", "${workspaceFolder}/pyproject.toml"], + "mypy-type-checker.path": ["${workspaceFolder}"] } ``` -Recommended VS Code extensions: +##### **JS/TS-specific settings** -- Black Formatter (ms-python.black-formatter) -- Ruff (charliermarsh.ruff) -- Pylance (ms-python.vscode-pylance) +```json +"[javascript][typescript][typescriptreact][javascriptreact]": { + "editor.defaultFormatter": "esbenp.prettier-vscode" +} +``` + +- Ensures Prettier is used for all JS/TS files for consistent formatting. + +Recommended VS Code Extensions + +- **Black Formatter** – `ms-python.black-formatter` +- **Ruff** – `charliermarsh.ruff` +- **Pylance** – `ms-python.vscode-pylance` +- **isort** – `ms-python.isort` +- **Prettier** – `esbenp.prettier-vscode` +- **Mypy Type Checker** – `ms-python.mypy-type-checker` + +> VSCode will automatically suggest installing the recommended extensions when you open the workspace. #### Manual Formatting @@ -256,26 +252,93 @@ To manually format code: ```bash # Format all Python files using Black -pdm run black . +uv run black . + +# Sort imports using isort +uv run isort . # Run Ruff linter with auto-fix -pdm run ruff check --fix . +uv run ruff check . # Run type checking with MyPy -pdm run mypy . +uv run mypy . ``` #### Pre-commit Validation Before submitting a pull request, ensure your code passes all formatting checks: +**Option 1: Run all hooks via pre-commit (all in a single command)** + ```bash -# Run all checks -pdm run black --check . -pdm run ruff check . -pdm run mypy . +# Run hooks on staged files (recommended for quick checks) +uv run pre-commit run ``` +- Automatically runs Black, Ruff, isort, Mypy, Prettier, and any other configured hooks. + +**Option 2: Run individual tools manually** + +```bash +# Python checks +uv run black --check . +uv run isort --check . +uv run ruff check . +uv run mypy . + +# JavaScript/TypeScript checks +uv run prettier --check "**/*.{ts,tsx,js,jsx,json,md,yaml,yml}" + +# TypeScript typecheck +node ./scripts/typescript-typecheck.js +``` + +### JavaScript / TypeScript Formatting (Prettier) + +The project uses **Prettier** to ensure consistent formatting across all JS/TS/JSON/Markdown/YAML files. + +#### Installation + +All Node.js dependencies are managed via `pnpm`. Make sure you have run: + +```bash +# Install pnpm if you don't have it +npm install -g pnpm + +# Install project dependencies +pnpm install +``` + +This installs Prettier and other JS/TS dependencies defined in `package.json`. + +#### Usage + +- **Check formatting** (without making changes): + +```bash +pnpm prettier:check +``` + +- **Automatically format files**: + +```bash +pnpm prettier:format +``` + +#### Type Checking (TypeScript) + +- Run the TypeScript type checker: + +```bash +node ./scripts/typescript-typecheck.js +``` + +#### VSCode Integration + +- The workspace config ensures Prettier is used automatically for JS/TS/JSON/Markdown/YAML files. +- Recommended extension: Prettier – Code Formatter +- Ensure `editor.formatOnSave` is enabled in VSCode for automatic formatting. + ### Swift Code (Lume) For Swift code in the `libs/lume` directory: @@ -283,3 +346,101 @@ For Swift code in the `libs/lume` directory: - Follow the [Swift API Design Guidelines](https://www.swift.org/documentation/api-design-guidelines/) - Use SwiftFormat for consistent formatting - Code will be automatically formatted on save when using the lume workspace + +## Releasing Packages + +Cua uses an automated GitHub Actions workflow to bump package versions. + +> **Note:** The main branch is currently not protected. If branch protection is enabled in the future, the github-actions bot must be added to the bypass list for these workflows to commit directly. + +### Version Bump Workflow + +All packages are managed through a single consolidated workflow: [Bump Version](https://github.com/trycua/cua/actions/workflows/bump-version.yml) + +**Supported packages:** + +- cua-agent +- cua-computer +- cua-computer-server +- cua-core +- cua-mcp-server +- cua-som +- pylume + +**How to use:** + +1. Navigate to the [Bump Version workflow](https://github.com/trycua/cua/actions/workflows/bump-version.yml) +2. Click the "Run workflow" button in the GitHub UI +3. Select the **service/package** you want to bump from the first dropdown +4. Select the **bump type** (patch/minor/major) from the second dropdown +5. Click "Run workflow" to start the version bump +6. The workflow will automatically commit changes and push to main + +### Rolling Back a Version Bump + +If you need to revert a version bump, follow these steps: + +**Step 1: Find the version bump commit** + +```bash +# List recent commits +git log --oneline | grep "Bump" + +# Example output: +# a1b2c3d Bump cua-core to v0.1.9 +``` + +**Step 2: Revert the commit** + +```bash +# Revert the specific commit +git revert + +# Example: +# git revert a1b2c3d +``` + +**Step 3: Delete the git tag** + +```bash +# List tags to find the version tag +git tag -l + +# Delete the tag locally (use the correct package-specific format) +git tag -d core-v0.1.9 + +# Delete the tag remotely +git push origin :refs/tags/core-v0.1.9 +``` + +**Step 4: Push the revert** + +```bash +git push origin main +``` + +**Per-package tag patterns:** + +Each package uses its own tag format defined in `.bumpversion.cfg`: + +- **cua-core**: `core-v{version}` (e.g., `core-v0.1.9`) +- **cua-computer**: `computer-v{version}` (e.g., `computer-v0.4.7`) +- **cua-agent**: `agent-v{version}` (e.g., `agent-v0.4.35`) +- **cua-som**: `som-v{version}` (e.g., `som-v0.1.3`) +- **pylume**: `pylume-v{version}` (e.g., `pylume-v0.2.1`) +- **cua-computer-server**: `computer-server-v{version}` (e.g., `computer-server-v0.1.27`) +- **cua-mcp-server**: `mcp-server-v{version}` (e.g., `mcp-server-v0.1.14`) + +### Local Testing (Advanced) + +The Makefile targets are kept for local testing only: + +```bash +# Test version bump locally (dry run) +make dry-run-patch-core + +# View current versions +make show-versions +``` + +**Note:** For production releases, always use the GitHub Actions workflows above instead of running Makefile commands directly. diff --git a/LICENSE.md b/LICENSE.md index 207f2665..b8b198ce 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..2339ea73 --- /dev/null +++ b/Makefile @@ -0,0 +1,40 @@ +# Python Package Release Makefile +# Version bumps are managed via GitHub Actions workflows (see Development.md) +# This Makefile provides utility targets for checking versions and dry-run testing + +.PHONY: help + +help: ## Show this help message + @echo "Python Package Release Utilities" + @echo "" + @echo "Usage: make " + @echo "" + @echo "Available targets:" + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " %-25s %s\n", $$1, $$2}' + @echo "" + @echo "⚠️ For production version bumps, use GitHub Actions:" + @echo " https://github.com/trycua/cua/actions/workflows/bump-version.yml" + +# Dry run targets (test without making changes) +dry-run-patch-%: ## Dry run for patch version bump (e.g., make dry-run-patch-core) + @echo "Dry run: Bumping $* patch version..." + cd libs/python/$* && bump2version --dry-run --verbose patch + +dry-run-minor-%: ## Dry run for minor version bump (e.g., make dry-run-minor-core) + @echo "Dry run: Bumping $* minor version..." + cd libs/python/$* && bump2version --dry-run --verbose minor + +dry-run-major-%: ## Dry run for major version bump (e.g., make dry-run-major-core) + @echo "Dry run: Bumping $* major version..." + cd libs/python/$* && bump2version --dry-run --verbose major + +# Show current versions +show-versions: ## Show current versions of all packages + @echo "Current Python package versions:" + @echo " cua-core: $$(grep 'current_version' libs/python/core/.bumpversion.cfg | cut -d' ' -f3)" + @echo " pylume: $$(grep 'current_version' libs/python/pylume/.bumpversion.cfg | cut -d' ' -f3)" + @echo " cua-computer: $$(grep 'current_version' libs/python/computer/.bumpversion.cfg | cut -d' ' -f3)" + @echo " cua-som: $$(grep 'current_version' libs/python/som/.bumpversion.cfg | cut -d' ' -f3)" + @echo " cua-agent: $$(grep 'current_version' libs/python/agent/.bumpversion.cfg | cut -d' ' -f3)" + @echo " cua-computer-server: $$(grep 'current_version' libs/python/computer-server/.bumpversion.cfg | cut -d' ' -f3)" + @echo " cua-mcp-server: $$(grep 'current_version' libs/python/mcp-server/.bumpversion.cfg | cut -d' ' -f3)" diff --git a/README.md b/README.md index e03d9bb3..2a43f3b7 100644 --- a/README.md +++ b/README.md @@ -5,70 +5,115 @@ Cua logo - [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) - [![Swift](https://img.shields.io/badge/Swift-F05138?logo=swift&logoColor=white)](#) - [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) - [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) -
- trycua%2Fcua | Trendshift +[![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) +[![Swift](https://img.shields.io/badge/Swift-F05138?logo=swift&logoColor=white)](#) +[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) +[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +
+trycua%2Fcua | Trendshift + -> We’re hosting the **Computer-Use Agents SOTA Challenge** at [Hack the North](https://hackthenorth.com) and online! ->> **Track A (On-site @ UWaterloo)**: Reserved for participants accepted to Hack the North. 🏆 Prize: **YC interview guaranteed**. ->> **Track B (Remote)**: Open to everyone worldwide. 🏆 Prize: **Cash award**. ->>> 👉 Sign up here: [trycua.com/hackathon](https://www.trycua.com/hackathon) - -**cua** ("koo-ah") is Docker for [Computer-Use Agents](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse) - it enables AI agents to control full operating systems in virtual containers and deploy them locally or to the cloud. +**Cua** ("koo-ah") is Docker for [Computer-Use Agents](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse) - it enables AI agents to control full operating systems in virtual containers and deploy them locally or to the cloud.
-With the Computer SDK, you can: -- automate Windows, Linux, and macOS VMs with a consistent, [pyautogui-like API](https://docs.trycua.com/docs/libraries/computer#interface-actions) -- create & manage VMs [locally](https://docs.trycua.com/docs/computer-sdk/computers#cua-local-containers) or using [cua cloud](https://www.trycua.com/) +With the [Computer SDK](#computer-sdk), you can: + +- automate Windows, Linux, and macOS VMs with a consistent, [pyautogui-like API](https://docs.trycua.com/docs/libraries/computer#interface-actions) +- create & manage VMs [locally](https://docs.trycua.com/docs/computer-sdk/computers#cua-local-containers) or using [Cua cloud](https://www.trycua.com/) + +With the [Agent SDK](#agent-sdk), you can: -With the Agent SDK, you can: - run computer-use models with a [consistent schema](https://docs.trycua.com/docs/agent-sdk/message-format) - benchmark on OSWorld-Verified, SheetBench-V2, and more [with a single line of code using HUD](https://docs.trycua.com/docs/agent-sdk/integrations/hud) ([Notebook](https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb)) - combine UI grounding models with any LLM using [composed agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) - use new UI agent models and UI grounding models from the Model Zoo below with just a model string (e.g., `ComputerAgent(model="openai/computer-use-preview")`) - use API or local inference by changing a prefix (e.g., `openai/`, `openrouter/`, `ollama/`, `huggingface-local/`, `mlx/`, [etc.](https://docs.litellm.ai/docs/providers)) -### CUA Model Zoo 🐨 +# Modules -| [All-in-one CUAs](https://docs.trycua.com/docs/agent-sdk/supported-agents/computer-use-agents) | [UI Grounding Models](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) | [UI Planning Models](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) | -|---|---|---| -| `anthropic/claude-opus-4-1-20250805` | `huggingface-local/xlangai/OpenCUA-{7B,32B}` | any all-in-one CUA | -| `openai/computer-use-preview` | `huggingface-local/HelloKKMe/GTA1-{7B,32B,72B}` | any VLM (using liteLLM, requires `tools` parameter) | -| `openrouter/z-ai/glm-4.5v` | `huggingface-local/Hcompany/Holo1.5-{3B,7B,72B}` | | -| `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}` | any all-in-one CUA | | -| `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` | | -| `omniparser+{ui planning}` | | | -| `{ui grounding}+{ui planning}` | | | + + + + + + + +
-- `human/human` → [Human-in-the-Loop](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop) +[**Agent**](#agent-sdk)
+AI agent framework for automating tasks -Missing a model? [Raise a feature request](https://github.com/trycua/cua/issues/new?assignees=&labels=enhancement&projects=&title=%5BAgent%5D%3A+Add+model+support+for+) or [contribute](https://github.com/trycua/cua/blob/main/CONTRIBUTING.md)! +
-
+**[Computer](#computer-sdk)**
+TypeScript/Python SDK for controlling Cua environments -# Quick Start +
-- [Get started with a Computer-Use Agent UI](https://docs.trycua.com/docs/quickstart-ui) -- [Get started with the Computer-Use Agent CLI](https://docs.trycua.com/docs/quickstart-cli) -- [Get started with the Python SDKs](https://docs.trycua.com/docs/quickstart-devs) +**[MCP Server](#mcp-server)**
+MCP server for using Cua agents and computers -
+
-# Usage ([Docs](https://docs.trycua.com/docs)) +**[Computer Server](#computer-server)**
+Server component that runs on Cua environments + +
+ + + + + + + + +
+ +**[Lume](#lume)**
+VM management for macOS + +
+ +**[Lumier](#lumier)**
+Docker interface for macOS/Linux VMs + +
+ +**[SOM](#som)**
+Set-of-Mark library for Agent + +
+ +**[Core](#core)**
+Core utilities for Cua + +
+ +# Quick Start + +- [Clone a starter template and run the code in <1 min](https://github.com/trycua/agent-template) +- [Get started with the Cua SDKs](https://docs.trycua.com/docs/quickstart-devs) +- [Get started with the Cua CLI](https://docs.trycua.com/docs/quickstart-cli) + +# Agent SDK + +Install the agent SDK: ```bash pip install cua-agent[all] ``` + +Initialize a computer agent using a [model configuration string](#model-configuration) and a [computer instance](#computer-usage): + ```python from agent import ComputerAgent +# ComputerAgent works with any computer initialized with the Computer SDK + agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[computer], @@ -83,121 +128,229 @@ async for result in agent.run(messages): print(item["content"][0]["text"]) ``` -### Output format (OpenAI Agent Responses Format): +## Output format + +Cua uses the OpenAI Agent response format. + +
+Example + ```json -{ +{ "output": [ - # user input { - "role": "user", - "content": "go to trycua on gh" - }, - # first agent turn adds the model output to the history - { - "summary": [ - { - "text": "Searching Firefox for Trycua GitHub", - "type": "summary_text" - } - ], - "type": "reasoning" + "role": "user", + "content": "go to trycua on gh" }, { - "action": { - "text": "Trycua GitHub", - "type": "type" - }, - "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq", - "status": "completed", - "type": "computer_call" - }, - # second agent turn adds the computer output to the history - { - "type": "computer_call_output", - "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq", - "output": { - "type": "input_image", - "image_url": "data:image/png;base64,..." + "summary": [ + { + "text": "Searching Firefox for Trycua GitHub", + "type": "summary_text" } + ], + "type": "reasoning" }, - # final agent turn adds the agent output text to the history { - "type": "message", - "role": "assistant", - "content": [ - { - "text": "Success! The Trycua GitHub page has been opened.", - "type": "output_text" - } - ] + "action": { + "text": "Trycua GitHub", + "type": "type" + }, + "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq", + "status": "completed", + "type": "computer_call" + }, + { + "type": "computer_call_output", + "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq", + "output": { + "type": "input_image", + "image_url": "data:image/png;base64,..." + } + }, + { + "type": "message", + "role": "assistant", + "content": [ + { + "text": "Success! The Trycua GitHub page has been opened.", + "type": "output_text" + } + ] } - ], + ], "usage": { - "prompt_tokens": 150, - "completion_tokens": 75, - "total_tokens": 225, - "response_cost": 0.01, + "prompt_tokens": 150, + "completion_tokens": 75, + "total_tokens": 225, + "response_cost": 0.01 } } ``` -# Computer ([Docs](https://docs.trycua.com/docs/computer-sdk/computers)) +
+ +## Model Configuration + +These are the valid model configurations for `ComputerAgent(model="...")`: + +| Configuration | Description | +| ---------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | +| `{computer-use-model}` | A single model to perform all computer-use tasks | +| `{grounding-model}+{any-vlm-with-tools}` | [Composed](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) with VLM for captioning and grounding LLM for element detection | +| `moondream3+{any-llm-with-tools}` | [Composed](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) with Moondream3 for captioning and UI element detection | +| `human/human` | A [human-in-the-loop](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop) in place of a model | + +### Model Capabilities + +The following table shows which capabilities are supported by each model: + +| Model | Computer-Use | Grounding | Tools | VLM | +| -------------------------------------------------------------------------------------------------------------------------------- | :----------: | :-------: | :---: | :-: | +| [Claude Sonnet/Haiku](https://docs.claude.com/en/docs/agents-and-tools/tool-use/computer-use-tool#how-to-implement-computer-use) | ✓ | ✓ | ✓ | ✓ | +| [OpenAI CU Preview](https://platform.openai.com/docs/models/computer-use-preview) | ✓ | ✓ | | ✓ | +| [GLM-V](https://huggingface.co/THUDM/glm-4v-9b) | ✓ | ✓ | ✓ | ✓ | +| [Gemini CU Preview](https://ai.google.dev/gemini-api/docs/computer-use) | ✓ | ✓ | | ✓ | +| [InternVL](https://huggingface.co/OpenGVLab/InternVL3_5-1B) | ✓ | ✓ | ✓ | ✓ | +| [UI-TARS](https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B) | ✓ | ✓ | ✓ | ✓ | +| [OpenCUA](https://huggingface.co/xlangai/OpenCUA-7B) | | ✓ | | | +| [GTA](https://huggingface.co/HelloKKMe/GTA1-7B) | | ✓ | | | +| [Holo](https://huggingface.co/Hcompany/Holo1.5-3B) | | ✓ | | | +| [Moondream](https://huggingface.co/moondream/moondream3-preview) | | ✓ | | | +| [OmniParser](https://github.com/microsoft/OmniParser) | | ✓ | | | + +### Model IDs + +
+Examples of valid model IDs + +| Model | Model IDs | +| -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------- | +| [Claude Sonnet/Haiku](https://docs.claude.com/en/docs/agents-and-tools/tool-use/computer-use-tool#how-to-implement-computer-use) | `anthropic/claude-sonnet-4-5`, `anthropic/claude-haiku-4-5` | +| [OpenAI CU Preview](https://platform.openai.com/docs/models/computer-use-preview) | `openai/computer-use-preview` | +| [GLM-V](https://huggingface.co/THUDM/glm-4v-9b) | `openrouter/z-ai/glm-4.5v`, `huggingface-local/zai-org/GLM-4.5V` | +| [Gemini CU Preview](https://ai.google.dev/gemini-api/docs/computer-use) | `gemini-2.5-computer-use-preview` | +| [InternVL](https://huggingface.co/OpenGVLab/InternVL3_5-1B) | `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}` | +| [UI-TARS](https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B) | `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` | +| [OpenCUA](https://huggingface.co/xlangai/OpenCUA-7B) | `huggingface-local/xlangai/OpenCUA-{7B,32B}` | +| [GTA](https://huggingface.co/HelloKKMe/GTA1-7B) | `huggingface-local/HelloKKMe/GTA1-{7B,32B,72B}` | +| [Holo](https://huggingface.co/Hcompany/Holo1.5-3B) | `huggingface-local/Hcompany/Holo1.5-{3B,7B,72B}` | +| [Moondream](https://huggingface.co/moondream/moondream3-preview) | `moondream3` | +| [OmniParser](https://github.com/microsoft/OmniParser) | `omniparser` | + +
+ +Missing a model? Create a [feature request](https://github.com/trycua/cua/issues/new?assignees=&labels=enhancement&projects=&title=%5BAgent%5D%3A+Add+model+support+for+) or [contribute](https://github.com/trycua/cua/blob/main/CONTRIBUTING.md)! + +Learn more in the [Agent SDK documentation](./libs/python/agent/README.md). + +# Computer SDK + +Install the computer SDK: ```bash -pip install cua-computer[all] +pip install cua-computer ``` + +Initialize a computer: + ```python from computer import Computer -async with Computer( - os_type="linux", - provider_type="cloud", - name="your-container-name", - api_key="your-api-key" -) as computer: - # Take screenshot +computer = Computer( + os_type="linux", # or "macos", "windows" + provider_type="cloud", # or "lume", "docker", "windows_sandbox" + name="your-sandbox-name", + api_key="your-api-key" # only for cloud + # or use_host_computer_server=True for host desktop +) + +try: + await computer.run() + + # Take a screenshot screenshot = await computer.interface.screenshot() # Click and type await computer.interface.left_click(100, 100) await computer.interface.type("Hello!") +finally: + await computer.close() ``` +Learn more in the [Computer SDK documentation](./libs/python/computer/README.md). + +# MCP Server + +Install the MCP server: + +```bash +pip install cua-mcp-server +``` + +Learn more in the [MCP Server documentation](./libs/python/mcp-server/README.md). + +# Computer Server + +Install the Computer Server: + +```bash +pip install cua-computer-server +python -m computer_server +``` + +Learn more in the [Computer Server documentation](./libs/python/computer-server/README.md). + +# Lume + +Install Lume: + +```bash +curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash +``` + +Learn more in the [Lume documentation](./libs/lume/README.md). + +# Lumier + +Install Lumier: + +```bash +docker pull trycua/lumier:latest +``` + +Learn more in the [Lumier documentation](./libs/lumier/README.md). + +# SOM + +Install SOM: + +```bash +pip install cua-som +``` + +Learn more in the [SOM documentation](./libs/python/som/README.md). + # Resources -- [How to use the MCP Server with Claude Desktop or other MCP clients](./libs/python/mcp-server/README.md) - One of the easiest ways to get started with Cua -- [How to use OpenAI Computer-Use, Anthropic, OmniParser, or UI-TARS for your Computer-Use Agent](./libs/python/agent/README.md) -- [How to use Lume CLI for managing desktops](./libs/lume/README.md) -- [Training Computer-Use Models: Collecting Human Trajectories with Cua (Part 1)](https://www.trycua.com/blog/training-computer-use-models-trajectories-1) +- [Cua Blog](https://www.trycua.com/blog) +- [Cua Docs](https://docs.trycua.com) -## Modules +# Community and Contributions -| Module | Description | Installation | -|--------|-------------|---------------| -| [**Lume**](./libs/lume/README.md) | VM management for macOS/Linux using Apple's Virtualization.Framework | `curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh \| bash` | -| [**Lumier**](./libs/lumier/README.md) | Docker interface for macOS and Linux VMs | `docker pull trycua/lumier:latest` | -| [**Computer (Python)**](./libs/python/computer/README.md) | Python Interface for controlling virtual machines | `pip install "cua-computer[all]"` | -| [**Computer (Typescript)**](./libs/typescript/computer/README.md) | Typescript Interface for controlling virtual machines | `npm install @trycua/computer` | -| [**Agent**](./libs/python/agent/README.md) | AI agent framework for automating tasks | `pip install "cua-agent[all]"` | -| [**MCP Server**](./libs/python/mcp-server/README.md) | MCP server for using CUA with Claude Desktop | `pip install cua-mcp-server` | -| [**SOM**](./libs/python/som/README.md) | Self-of-Mark library for Agent | `pip install cua-som` | -| [**Computer Server**](./libs/python/computer-server/README.md) | Server component for Computer | `pip install cua-computer-server` | -| [**Core (Python)**](./libs/python/core/README.md) | Python Core utilities | `pip install cua-core` | -| [**Core (Typescript)**](./libs/typescript/core/README.md) | Typescript Core utilities | `npm install @trycua/core` | - -## Community +We welcome contributions to Cua! Please refer to our [Contributing Guidelines](CONTRIBUTING.md) for details. Join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas, get assistance, or share your demos! -## License +# License -Cua is open-sourced under the MIT License - see the [LICENSE](LICENSE) file for details. +Cua is open-sourced under the MIT License - see the [LICENSE](LICENSE.md) file for details. Portions of this project, specifically components adapted from Kasm Technologies Inc., are also licensed under the MIT License. See [libs/kasm/LICENSE](libs/kasm/LICENSE) for details. Microsoft's OmniParser, which is used in this project, is licensed under the Creative Commons Attribution 4.0 International License (CC-BY-4.0). See the [OmniParser LICENSE](https://github.com/microsoft/OmniParser/blob/master/LICENSE) for details. -### Third-Party Licenses and Optional Components +## Third-Party Licenses and Optional Components Some optional extras for this project depend on third-party packages that are licensed under terms different from the MIT License. @@ -205,52 +358,22 @@ Some optional extras for this project depend on third-party packages that are li When you choose to install and use such optional extras, your use, modification, and distribution of those third-party components are governed by their respective licenses (e.g., AGPL-3.0 for `ultralytics`). -## Contributing - -We welcome contributions to Cua! Please refer to our [Contributing Guidelines](CONTRIBUTING.md) for details. - ## Trademarks Apple, macOS, and Apple Silicon are trademarks of Apple Inc. Ubuntu and Canonical are registered trademarks of Canonical Ltd. -Microsoft is a registered trademark of Microsoft Corporation. +Microsoft is a registered trademark of Microsoft Corporation. This project is not affiliated with, endorsed by, or sponsored by Apple Inc., Canonical Ltd., Microsoft Corporation, or Kasm Technologies. -## Stargazers +# Stargazers Thank you to all our supporters! [![Stargazers over time](https://starchart.cc/trycua/cua.svg?variant=adaptive)](https://starchart.cc/trycua/cua) -## Contributors +# Sponsors - - - - - - - - - - - - - - - - - - - - - - - -
f-trycua
f-trycua

💻
Pedro Piñera Buendía
Pedro Piñera Buendía

💻
Amit Kumar
Amit Kumar

💻
Dung Duc Huynh (Kaka)
Dung Duc Huynh (Kaka)

💻
Zayd Krunz
Zayd Krunz

💻
Prashant Raj
Prashant Raj

💻
Leland Takamine
Leland Takamine

💻
ddupont
ddupont

💻
Ethan Gutierrez
Ethan Gutierrez

💻
Ricter Zheng
Ricter Zheng

💻
Rahul Karajgikar
Rahul Karajgikar

💻
trospix
trospix

💻
Evan smith
Evan smith

💻
+Thank you to all our [GitHub Sponsors](https://github.com/sponsors/trycua)! - - - - +coderabbit-cli diff --git a/blog/app-use.md b/blog/app-use.md index 2bd8e058..68cf9c9b 100644 --- a/blog/app-use.md +++ b/blog/app-use.md @@ -1,6 +1,6 @@ # App-Use: Control Individual Applications with Cua Agents -*Published on May 31, 2025 by The Cua Team* +_Published on May 31, 2025 by The Cua Team_ Today, we are excited to introduce a new experimental feature landing in the [Cua GitHub repository](https://github.com/trycua/cua): **App-Use**. App-Use allows you to create lightweight virtual desktops that limit agent access to specific applications, improving precision of your agent's trajectory. Perfect for parallel workflows, and focused task execution. @@ -33,9 +33,11 @@ agent = ComputerAgent( ## Key Benefits ### 1. Lightweight and Fast + App-Use creates visual filters, not new processes. Your apps continue running normally - we just control what the agent can see and click on. The virtual desktops are composited views that require no additional compute resources beyond the existing window manager operations. ### 2. Run Multiple Agents in Parallel + Deploy a team of specialized agents, each focused on their own apps: ```python @@ -46,7 +48,7 @@ computer = Computer(experiments=["app-use"]) research_desktop = computer.create_desktop_from_apps(["Safari"]) research_agent = ComputerAgent(tools=[research_desktop], ...) -# Writing agent focuses on documents +# Writing agent focuses on documents writing_desktop = computer.create_desktop_from_apps(["Pages", "Notes"]) writing_agent = ComputerAgent(tools=[writing_desktop], ...) @@ -66,6 +68,7 @@ await asyncio.gather( ### Requirements To get started with App-Use, you'll need: + - Python 3.11+ - macOS Sequoia (15.0) or later @@ -85,21 +88,21 @@ from agent import ComputerAgent async def main(): computer = Computer() await computer.run() - + # Create app-specific desktop sessions desktop = computer.create_desktop_from_apps(["Notes"]) - + # Initialize an agent agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[desktop] ) - + # Take a screenshot (returns bytes by default) screenshot = await desktop.interface.screenshot() with open("app_screenshot.png", "wb") as f: f.write(screenshot) - + # Run an agent task async for result in agent.run("Create a new note titled 'Meeting Notes' and add today's agenda items"): print(f"Agent: {result.get('text', '')}") @@ -113,6 +116,7 @@ if __name__ == "__main__": ### ⚠️ Important Warning Computer-use agents are powerful tools that can interact with your devices. This guide involves using your own macOS and iPhone instead of a VM. **Proceed at your own risk.** Always: + - Review agent actions before running - Start with non-critical tasks - Monitor agent behavior closely @@ -150,20 +154,20 @@ async def automate_iphone(): # Connect to your local computer server my_mac = Computer(use_host_computer_server=True, os_type="macos", experiments=["app-use"]) await my_mac.run() - + # Create a desktop focused on iPhone Mirroring my_iphone = my_mac.create_desktop_from_apps(["iPhone Mirroring"]) - + # Initialize an agent for iPhone automation agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", tools=[my_iphone] ) - + # Example: Send a message async for result in agent.run("Open Messages and send 'Hello from Cua!' to John"): print(f"Agent: {result.get('text', '')}") - + # Example: Set a reminder async for result in agent.run("Create a reminder to call mom at 5 PM today"): print(f"Agent: {result.get('text', '')}") @@ -175,6 +179,7 @@ if __name__ == "__main__": ### iPhone Automation Use Cases With Cua's iPhone automation, you can: + - **Automate messaging**: Send texts, respond to messages, manage conversations - **Control apps**: Navigate any iPhone app using natural language - **Manage settings**: Adjust iPhone settings programmatically @@ -191,6 +196,7 @@ With Cua's iPhone automation, you can: ## When to Use What: App-Use vs Multiple Cua Containers ### Use App-Use within the same macOS Cua Container: + - ✅ You need lightweight, fast agent focusing (macOS only) - ✅ You want to run multiple agents on one desktop - ✅ You're automating personal devices like iPhones @@ -198,6 +204,7 @@ With Cua's iPhone automation, you can: - ✅ You want low computational overhead ### Use Multiple Cua Containers: + - ✅ You need maximum isolation between agents - ✅ You require cross-platform support (Mac/Linux/Windows) - ✅ You need guaranteed resource allocation @@ -215,6 +222,7 @@ With Cua's iPhone automation, you can: ### How It Works When you create a desktop session with `create_desktop_from_apps()`, App Use: + - Filters the visual output to show only specified application windows - Routes input events only to those applications - Maintains window layout isolation between different sessions diff --git a/blog/assets/hack-booth.png b/blog/assets/hack-booth.png new file mode 100644 index 00000000..6da6cae1 Binary files /dev/null and b/blog/assets/hack-booth.png differ diff --git a/blog/assets/hack-closing-ceremony.jpg b/blog/assets/hack-closing-ceremony.jpg new file mode 100644 index 00000000..f5796f57 Binary files /dev/null and b/blog/assets/hack-closing-ceremony.jpg differ diff --git a/blog/assets/hack-cua-ollama-hud.jpeg b/blog/assets/hack-cua-ollama-hud.jpeg new file mode 100644 index 00000000..c9067104 Binary files /dev/null and b/blog/assets/hack-cua-ollama-hud.jpeg differ diff --git a/blog/assets/hack-leaderboard.png b/blog/assets/hack-leaderboard.png new file mode 100644 index 00000000..3d11cda5 Binary files /dev/null and b/blog/assets/hack-leaderboard.png differ diff --git a/blog/assets/hack-winners.jpeg b/blog/assets/hack-winners.jpeg new file mode 100644 index 00000000..60059d97 Binary files /dev/null and b/blog/assets/hack-winners.jpeg differ diff --git a/blog/assets/hack-workshop.jpeg b/blog/assets/hack-workshop.jpeg new file mode 100644 index 00000000..94f913ff Binary files /dev/null and b/blog/assets/hack-workshop.jpeg differ diff --git a/blog/bringing-computer-use-to-the-web.md b/blog/bringing-computer-use-to-the-web.md index a5593c12..b753b454 100644 --- a/blog/bringing-computer-use-to-the-web.md +++ b/blog/bringing-computer-use-to-the-web.md @@ -1,10 +1,10 @@ # Bringing Computer-Use to the Web -*Published on August 5, 2025 by Morgan Dean* +_Published on August 5, 2025 by Morgan Dean_ -In one of our original posts, we explored building Computer-Use Operators on macOS - first with a [manual implementation](build-your-own-operator-on-macos-1.md) using OpenAI's `computer-use-preview` model, then with our [cua-agent framework](build-your-own-operator-on-macos-2.md) for Python developers. While these tutorials have been incredibly popular, we've received consistent feedback from our community: **"Can we use C/ua with JavaScript and TypeScript?"** +In one of our original posts, we explored building Computer-Use Operators on macOS - first with a [manual implementation](build-your-own-operator-on-macos-1.md) using OpenAI's `computer-use-preview` model, then with our [cua-agent framework](build-your-own-operator-on-macos-2.md) for Python developers. While these tutorials have been incredibly popular, we've received consistent feedback from our community: **"Can we use Cua with JavaScript and TypeScript?"** -Today, we're excited to announce the release of the **`@trycua/computer` Web SDK** - a new library that allows you to control your C/ua cloud containers from any JavaScript or TypeScript project. With this library, you can click, type, and grab screenshots from your cloud containers - no extra servers required. +Today, we're excited to announce the release of the **`@trycua/computer` Web SDK** - a new library that allows you to control your Cua cloud containers from any JavaScript or TypeScript project. With this library, you can click, type, and grab screenshots from your cloud containers - no extra servers required. With this new SDK, you can easily develop CUA experiences like the one below, which we will release soon as open source. @@ -19,7 +19,7 @@ Let’s see how it works. By the end of this tutorial, you'll be able to: - Set up the `@trycua/computer` npm library in any JavaScript/TypeScript project -- Connect OpenAI's computer-use model to C/ua cloud containers from web applications +- Connect OpenAI's computer-use model to Cua cloud containers from web applications - Build computer-use agents that work in Node.js, React, Vue, or any web framework - Handle different types of computer actions (clicking, typing, scrolling) from web code - Implement the complete computer-use loop in JavaScript/TypeScript @@ -30,7 +30,7 @@ By the end of this tutorial, you'll be able to: - Node.js 16+ and npm/yarn/pnpm - Basic JavaScript or TypeScript knowledge - OpenAI API access (Tier 3+ for computer-use-preview) -- C/ua cloud container credits ([get started here](https://trycua.com/pricing)) +- Cua cloud container credits ([get started here](https://trycua.com/pricing)) **Estimated Time:** 45-60 minutes @@ -47,9 +47,9 @@ At the time of writing, the **computer-use-preview** model has limited availabil Luckily, the `@trycua/computer` library can be used in conjunction with other models, like [Anthropic’s Computer Use](https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/computer-use-tool) or [UI-TARS](https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B). You’ll just have to write your own handler to parse the model output for interfacing with the container. -### C/ua Cloud Containers +### Cua Cloud Containers -To follow this guide, you’ll need access to a C/ua cloud container. +To follow this guide, you’ll need access to a Cua cloud container. Getting access is simple: purchase credits from our [pricing page](https://trycua.com/pricing), then create and provision a new container instance from the [dashboard](https://trycua.com/dashboard/containers). With your container running, you'll be ready to leverage the web SDK and bring automation to your JavaScript or TypeScript applications. @@ -96,7 +96,7 @@ const res = await openai.responses.create({ ], }, ], - truncation: 'auto' + truncation: 'auto', }); ``` @@ -142,32 +142,32 @@ Each response contains: ## Implementation Guide -### Provision a C/ua Cloud Container +### Provision a Cua Cloud Container - 1. Visit [trycua.com](https://trycua.com), sign up, purchase [credits](https://trycua.com/pricing), and create a new container instance from the [dashboard](https://trycua.com/dashboard). - 2. Create an API key from the dashboard — be sure to save it in a secure location before continuing. - 3. Start the cloud container from the dashboard. +1. Visit [trycua.com](https://trycua.com), sign up, purchase [credits](https://trycua.com/pricing), and create a new container instance from the [dashboard](https://trycua.com/dashboard). +2. Create an API key from the dashboard — be sure to save it in a secure location before continuing. +3. Start the cloud container from the dashboard. ### Environment Setup - 1. Install required packages with your preferred package manager: +1. Install required packages with your preferred package manager: - ```bash - npm install --save @trycua/computer # or yarn, pnpm, bun - npm install --save openai # or yarn, pnpm, bun - ``` + ```bash + npm install --save @trycua/computer # or yarn, pnpm, bun + npm install --save openai # or yarn, pnpm, bun + ``` - Works with any JavaScript/TypeScript project setup - whether you're using Create React App, Next.js, Vue, Angular, or plain JavaScript. + Works with any JavaScript/TypeScript project setup - whether you're using Create React App, Next.js, Vue, Angular, or plain JavaScript. - 2. Save your OpenAI API key, C/ua API key, and container name to a `.env` file: +2. Save your OpenAI API key, Cua API key, and container name to a `.env` file: - ```bash - OPENAI_API_KEY=openai-api-key - CUA_API_KEY=cua-api-key - CUA_CONTAINER_NAME=cua-cloud-container-name - ``` + ```bash + OPENAI_API_KEY=openai-api-key + CUA_API_KEY=cua-api-key + CUA_CONTAINER_NAME=cua-cloud-container-name + ``` - These environment variables work the same whether you're using vanilla JavaScript, TypeScript, or any web framework. + These environment variables work the same whether you're using vanilla JavaScript, TypeScript, or any web framework. ## Building the Agent diff --git a/blog/build-your-own-operator-on-macos-1.md b/blog/build-your-own-operator-on-macos-1.md index 1e15ae02..dd075e01 100644 --- a/blog/build-your-own-operator-on-macos-1.md +++ b/blog/build-your-own-operator-on-macos-1.md @@ -1,6 +1,6 @@ # Build Your Own Operator on macOS - Part 1 -*Published on March 31, 2025 by Francesco Bonacci* +_Published on March 31, 2025 by Francesco Bonacci_ In this first blogpost, we'll learn how to build our own Computer-Use Operator using OpenAI's `computer-use-preview` model. But first, let's understand what some common terms mean: @@ -19,6 +19,7 @@ Check out what it looks like to use your own Operator from a Gradio app: ## What You'll Learn By the end of this tutorial, you'll be able to: + - Set up a macOS virtual machine for AI automation - Connect OpenAI's computer-use model to your VM - Create a basic loop for the AI to interact with your VM @@ -26,6 +27,7 @@ By the end of this tutorial, you'll be able to: - Implement safety checks and error handling **Prerequisites:** + - macOS Sonoma (14.0) or later - 8GB RAM minimum (16GB recommended) - OpenAI API access (Tier 3+) @@ -41,15 +43,17 @@ Last March OpenAI released a fine-tuned version of GPT-4o, namely [CUA](https:// Professor Ethan Mollick provides an excellent explanation of computer-use agents in this article: [When you give a Claude a mouse](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse). ### ChatGPT Operator + OpenAI's computer-use model powers [ChatGPT Operator](https://openai.com/index/introducing-operator), a Chromium-based interface exclusively available to ChatGPT Pro subscribers. Users leverage this functionality to automate web-based tasks such as online shopping, expense report submission, and booking reservations by interacting with websites in a human-like manner. ## Benefits of Custom Operators ### Why Build Your Own? + While OpenAI's Operator uses a controlled Chromium VM instance, there are scenarios where you may want to use your own VM with full desktop capabilities. Here are some examples: - Automating native macOS apps like Finder, Xcode -- Managing files, changing settings, and running terminal commands +- Managing files, changing settings, and running terminal commands - Testing desktop software and applications - Creating workflows that combine web and desktop tasks - Automating media editing in apps like Final Cut Pro and Blender @@ -59,7 +63,9 @@ This gives you more control and flexibility to automate tasks beyond just web br ## Access Requirements ### Model Availability + As we speak, the **computer-use-preview** model has limited availability: + - Only accessible to OpenAI tier 3+ users - Additional application process may be required even for eligible users - Cannot be used in the OpenAI Playground @@ -68,15 +74,18 @@ As we speak, the **computer-use-preview** model has limited availability: ## Understanding the OpenAI API ### Responses API Overview + Let's start with the basics. In our case, we'll use OpenAI's Responses API to communicate with their computer-use model. Think of it like this: + 1. We send the model a screenshot of our VM and tell it what we want it to do 2. The model looks at the screenshot and decides what actions to take 3. It sends back instructions (like "click here" or "type this") 4. We execute those instructions in our VM The [Responses API](https://platform.openai.com/docs/guides/responses) is OpenAI's newest way to interact with their AI models. It comes with several built-in tools: + - **Web search**: Let the AI search the internet - **File search**: Help the AI find documents - **Computer use**: Allow the AI to control a computer (what we'll be using) @@ -84,9 +93,11 @@ The [Responses API](https://platform.openai.com/docs/guides/responses) is OpenAI As we speak, the computer-use model is only available through the Responses API. ### Responses API Examples + Let's look at some simple examples. We'll start with the traditional way of using OpenAI's API with Chat Completions, then show the new Responses API primitive. Chat Completions: + ```python # The old way required managing conversation history manually messages = [{"role": "user", "content": "Hello"}] @@ -98,13 +109,14 @@ messages.append(response.choices[0].message) # Manual message tracking ``` Responses API: + ```python # Example 1: Simple web search # The API handles all the complexity for us response = client.responses.create( model="gpt-4", input=[{ - "role": "user", + "role": "user", "content": "What's the latest news about AI?" }], tools=[{ @@ -118,7 +130,7 @@ response = client.responses.create( response = client.responses.create( model="gpt-4", input=[{ - "role": "user", + "role": "user", "content": "Find documents about project X" }], tools=[{ @@ -130,6 +142,7 @@ response = client.responses.create( ``` ### Computer-Use Model Setup + For our operator, we'll use the computer-use model. Here's how we set it up: ```python @@ -144,7 +157,7 @@ response = client.responses.create( }], input=[ { - "role": "user", + "role": "user", "content": [ # What we want the AI to do {"type": "input_text", "text": "Open Safari and go to google.com"}, @@ -158,6 +171,7 @@ response = client.responses.create( ``` ### Understanding the Response + When we send a request, the API sends back a response that looks like this: ```json @@ -189,6 +203,7 @@ When we send a request, the API sends back a response that looks like this: ``` Each response contains: + 1. **Reasoning**: The AI's explanation of what it's doing 2. **Action**: The specific computer action to perform 3. **Safety Checks**: Any potential risks to review @@ -197,6 +212,7 @@ Each response contains: ## CUA-Computer Interface ### Architecture Overview + Let's break down the main components of our system and how they work together: 1. **The Virtual Machine (VM)** @@ -238,7 +254,7 @@ sequenceDiagram VM-->>CUI: Return current screen CUI->>AI: Send screenshot + instructions AI-->>CUI: Return next action - + Note over CUI,VM: Execute the action alt Mouse Click CUI->>VM: Move and click mouse @@ -259,6 +275,7 @@ sequenceDiagram ``` The diagram above shows how information flows through our system: + 1. You start the operator 2. The Computer Interface creates a virtual macOS 3. Then it enters a loop: @@ -290,17 +307,19 @@ This design keeps everything organized and safe. The AI can only interact with t - Cached images are stored in `~/.lume/cache` You can check your downloaded VM images anytime: + ```bash lume ls ``` Example output: - | name | os | cpu | memory | disk | display | status | ip | vnc | - |--------------------------|---------|-------|---------|----------------|-----------|-----------|----------------|---------------------------------------------------| - | macos-sequoia-cua:latest | macOS | 12 | 16.00G | 64.5GB/80.0GB | 1024x768 | running | 192.168.64.78 | vnc://:kind-forest-zulu-island@127.0.0.1:56085 | + | name | os | cpu | memory | disk | display | status | ip | vnc | + | ------------------------ | ----- | --- | ------ | ------------- | -------- | ------- | ------------- | ---------------------------------------------- | + | macos-sequoia-cua:latest | macOS | 12 | 16.00G | 64.5GB/80.0GB | 1024x768 | running | 192.168.64.78 | vnc://:kind-forest-zulu-island@127.0.0.1:56085 | After checking your available images, you can run the VM to ensure everything is working correctly: + ```bash lume run macos-sequoia-cua:latest ``` @@ -309,12 +328,14 @@ This design keeps everything organized and safe. The AI can only interact with t **Note**: The `cua-computer` package requires Python 3.10 or later. We recommend creating a dedicated Python environment: **Using venv:** + ```bash python -m venv cua-env source cua-env/bin/activate ``` **Using conda:** + ```bash conda create -n cua-env python=3.10 conda activate cua-env @@ -332,6 +353,7 @@ This design keeps everything organized and safe. The AI can only interact with t ### Building the Operator #### Importing Required Modules + With the prerequisites installed and configured, we're ready to build our first operator. The following example uses asynchronous Python (async/await). You can run it either in a VS Code Notebook or as a standalone Python script. @@ -344,12 +366,13 @@ from computer import Computer ``` #### Mapping API Actions to CUA Methods + The following helper function converts a `computer_call` action from the OpenAI Responses API into corresponding commands on the CUI interface. For example, if the API instructs a `click` action, we move the cursor and perform a left click on the lume VM Sandbox. We will use the computer interface to execute the actions. ```python async def execute_action(computer, action): action_type = action.type - + if action_type == "click": x = action.x y = action.y @@ -360,12 +383,12 @@ async def execute_action(computer, action): await computer.interface.right_click() else: await computer.interface.left_click() - + elif action_type == "type": text = action.text print(f"Typing text: {text}") await computer.interface.type_text(text) - + elif action_type == "scroll": x = action.x y = action.y @@ -374,7 +397,7 @@ async def execute_action(computer, action): print(f"Scrolling at ({x}, {y}) with offsets (scroll_x={scroll_x}, scroll_y={scroll_y})") await computer.interface.move_cursor(x, y) await computer.interface.scroll(scroll_y) # Using vertical scroll only - + elif action_type == "keypress": keys = action.keys for key in keys: @@ -386,23 +409,24 @@ async def execute_action(computer, action): await computer.interface.press_key("space") else: await computer.interface.press_key(key) - + elif action_type == "wait": wait_time = action.time print(f"Waiting for {wait_time} seconds") await asyncio.sleep(wait_time) - + elif action_type == "screenshot": print("Taking screenshot") # This is handled automatically in the main loop, but we can take an extra one if requested screenshot = await computer.interface.screenshot() return screenshot - + else: print(f"Unrecognized action: {action_type}") ``` #### Implementing the Computer-Use Loop + This section defines a loop that: 1. Initializes the cua-computer instance (connecting to a macOS sandbox). @@ -423,7 +447,7 @@ async def cua_openai_loop(): os_type="macos" ) as computer: await computer.run() # Start the lume VM - + # Capture the initial screenshot screenshot = await computer.interface.screenshot() screenshot_base64 = base64.b64encode(screenshot).decode('utf-8') @@ -438,8 +462,8 @@ async def cua_openai_loop(): "environment": "mac" }], input=[ - { - "role": "user", + { + "role": "user", "content": [ {"type": "input_text", "text": "Open Safari, download and install Cursor."}, {"type": "input_image", "image_url": f"data:image/png;base64,{screenshot_base64}"} @@ -488,7 +512,7 @@ async def cua_openai_loop(): "display_height": 768, "environment": "mac" }], - input=[{ + input=[{ "type": "computer_call_output", "call_id": last_call_id, "acknowledged_safety_checks": acknowledged_checks, @@ -511,12 +535,15 @@ if __name__ == "__main__": You can find the full code in our [notebook](https://github.com/trycua/cua/blob/main/notebooks/blog/build-your-own-operator-on-macos-1.ipynb). #### Request Handling Differences + The first request to the OpenAI Responses API is special in that it includes the initial screenshot and prompt. Subsequent requests are handled differently, using the `computer_call_output` type to provide feedback on the executed action. ##### Initial Request Format + - We use `role: "user"` with `content` that contains both `input_text` (the prompt) and `input_image` (the screenshot) ##### Subsequent Request Format + - We use `type: "computer_call_output"` instead of the user role - We include the `call_id` to link the output to the specific previous action that was executed - We provide any `acknowledged_safety_checks` that were approved @@ -529,6 +556,7 @@ This structured approach allows the API to maintain context and continuity throu ## Conclusion ### Summary + This blogpost demonstrates a single iteration of a OpenAI Computer-Use loop where: - A macOS sandbox is controlled using the CUA interface. @@ -538,9 +566,11 @@ This blogpost demonstrates a single iteration of a OpenAI Computer-Use loop wher In a production setting, you would wrap the action-response cycle in a loop, handling multiple actions and safety checks as needed. ### Next Steps + In the next blogpost, we'll introduce our Agent framework which abstracts away all these tedious implementation steps. This framework provides a higher-level API that handles the interaction loop between OpenAI's computer-use model and the macOS sandbox, allowing you to focus on building sophisticated applications rather than managing the low-level details we've explored here. Can't wait? Check out the [cua-agent](https://github.com/trycua/cua/tree/main/libs/agent) package! ### Resources + - [OpenAI Computer-Use docs](https://platform.openai.com/docs/guides/tools-computer-use) - [cua-computer](https://github.com/trycua/cua/tree/main/libs/computer) - [lume](https://github.com/trycua/cua/tree/main/libs/lume) diff --git a/blog/build-your-own-operator-on-macos-2.md b/blog/build-your-own-operator-on-macos-2.md index 59844456..bf521b75 100644 --- a/blog/build-your-own-operator-on-macos-2.md +++ b/blog/build-your-own-operator-on-macos-2.md @@ -1,6 +1,6 @@ # Build Your Own Operator on macOS - Part 2 -*Published on April 27, 2025 by Francesco Bonacci* +_Published on April 27, 2025 by Francesco Bonacci_ In our [previous post](build-your-own-operator-on-macos-1.md), we built a basic Computer-Use Operator from scratch using OpenAI's `computer-use-preview` model and our [cua-computer](https://pypi.org/project/cua-computer) package. While educational, implementing the control loop manually can be tedious and error-prone. @@ -13,12 +13,14 @@ In this follow-up, we'll explore our [cua-agent](https://pypi.org/project/cua-ag ## What You'll Learn By the end of this tutorial, you'll be able to: + - Set up the `cua-agent` framework with various agent loop types and model providers - Understand the different agent loop types and their capabilities - Work with local models for cost-effective workflows - Use a simple UI for your operator **Prerequisites:** + - Completed setup from Part 1 ([lume CLI installed](https://github.com/trycua/cua?tab=readme-ov-file#option-2-full-computer-use-agent-capabilities), macOS CUA image already pulled) - Python 3.10+. We recommend using Conda (or Anaconda) to create an ad hoc Python environment. - API keys for OpenAI and/or Anthropic (optional for local models) @@ -58,6 +60,7 @@ pip install "cua-agent[ui]" # Gradio UI Before running any code examples, let's set up a proper environment: 1. **Create a new directory** for your project: + ```bash mkdir cua-agent-tutorial cd cua-agent-tutorial @@ -66,12 +69,13 @@ Before running any code examples, let's set up a proper environment: 2. **Set up a Python environment** using one of these methods: **Option A: Using conda command line** + ```bash # Using conda conda create -n cua-agent python=3.10 conda activate cua-agent ``` - + **Option B: Using Anaconda Navigator UI** - Open Anaconda Navigator - Click on "Environments" in the left sidebar @@ -80,35 +84,39 @@ Before running any code examples, let's set up a proper environment: - Select Python 3.10 - Click "Create" - Once created, select the environment and click "Open Terminal" to activate it - + **Option C: Using venv** + ```bash python -m venv cua-env source cua-env/bin/activate # On macOS/Linux ``` 3. **Install the cua-agent package**: + ```bash pip install "cua-agent[all]" ``` 4. **Set up your API keys as environment variables**: + ```bash # For OpenAI models export OPENAI_API_KEY=your_openai_key_here - + # For Anthropic models (if needed) export ANTHROPIC_API_KEY=your_anthropic_key_here ``` 5. **Create a Python file or notebook**: - + **Option A: Create a Python script** + ```bash # For a Python script touch cua_agent_example.py ``` - + **Option B: Use VS Code notebooks** - Open VS Code - Install the Python extension if you haven't already @@ -120,9 +128,10 @@ Now you're ready to run the code examples! ## Understanding Agent Loops -If you recall from Part 1, we had to implement a custom interaction loop to interact with the compute-use-preview model. +If you recall from Part 1, we had to implement a custom interaction loop to interact with the compute-use-preview model. In the `cua-agent` framework, an **Agent Loop** is the core abstraction that implements the continuous interaction cycle between an AI model and the computer environment. It manages the flow of: + 1. Capturing screenshots of the computer's state 2. Processing these screenshots (with or without UI element detection) 3. Sending this visual context to an AI model along with the task instructions @@ -141,6 +150,7 @@ While the core concept remains the same across all agent loops, different AI mod | `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`
• `claude-3-7-sonnet-20250219`
• `gpt-4.5-preview`
• `gpt-4o`
• `gpt-4`
• `phi4`
• `phi4-mini`
• `gemma3`
• `...`
• `Any Ollama or OpenAI-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser | Each loop handles the same basic pattern we implemented manually in Part 1: + 1. Take a screenshot of the VM 2. Send the screenshot and task to the AI model 3. Receive an action to perform @@ -169,13 +179,13 @@ Choosing the right agent loop depends not only on your API access and technical The performance of different Computer-Use models varies significantly across tasks. These benchmark evaluations measure an agent's ability to follow instructions and complete real-world tasks in different computing environments. -| Benchmark type | Benchmark | UI-TARS-1.5 | OpenAI CUA | Claude 3.7 | Previous SOTA | Human | -|----------------|--------------------------------------------------------------------------------------------------------------------------------------------------|-------------|-------------|-------------|----------------------|-------------| -| **Computer Use** | [OSworld](https://arxiv.org/abs/2404.07972) (100 steps) | **42.5** | 36.4 | 28 | 38.1 (200 step) | 72.4 | -| | [Windows Agent Arena](https://arxiv.org/abs/2409.08264) (50 steps) | **42.1** | - | - | 29.8 | - | -| **Browser Use** | [WebVoyager](https://arxiv.org/abs/2401.13919) | 84.8 | **87** | 84.1 | 87 | - | -| | [Online-Mind2web](https://arxiv.org/abs/2504.01382) | **75.8** | 71 | 62.9 | 71 | - | -| **Phone Use** | [Android World](https://arxiv.org/abs/2405.14573) | **64.2** | - | - | 59.5 | - | +| Benchmark type | Benchmark | UI-TARS-1.5 | OpenAI CUA | Claude 3.7 | Previous SOTA | Human | +| ---------------- | ------------------------------------------------------------------ | ----------- | ---------- | ---------- | --------------- | ----- | +| **Computer Use** | [OSworld](https://arxiv.org/abs/2404.07972) (100 steps) | **42.5** | 36.4 | 28 | 38.1 (200 step) | 72.4 | +| | [Windows Agent Arena](https://arxiv.org/abs/2409.08264) (50 steps) | **42.1** | - | - | 29.8 | - | +| **Browser Use** | [WebVoyager](https://arxiv.org/abs/2401.13919) | 84.8 | **87** | 84.1 | 87 | - | +| | [Online-Mind2web](https://arxiv.org/abs/2504.01382) | **75.8** | 71 | 62.9 | 71 | - | +| **Phone Use** | [Android World](https://arxiv.org/abs/2405.14573) | **64.2** | - | - | 59.5 | - | ### When to Use Each Loop @@ -210,10 +220,10 @@ async def run_simple_task(): model="openai/computer-use-preview", tools=[macos_computer] ) - + # Define a simple task task = "Open Safari and search for 'Python tutorials'" - + # Run the task and process responses async for result in agent.run(task): print(f"Action: {result.get('text')}") @@ -225,6 +235,7 @@ if __name__ == "__main__": 3. Save the file 4. Open a terminal, navigate to your project directory, and run: + ```bash python simple_task.py ``` @@ -232,6 +243,7 @@ if __name__ == "__main__": 5. The code will initialize the macOS virtual machine, create an agent, and execute the task of opening Safari and searching for Python tutorials. You can also run this in a VS Code notebook: + 1. Create a new notebook in VS Code (.ipynb file) 2. Copy the code into a cell (without the `if __name__ == "__main__":` part) 3. Run the cell to execute the code @@ -259,7 +271,7 @@ async def run_multi_task_workflow(): model="anthropic/claude-3-5-sonnet-20241022", tools=[macos_computer] ) - + tasks = [ "Open Safari and go to github.com", "Search for 'trycua/cua'", @@ -267,7 +279,7 @@ async def run_multi_task_workflow(): "Click on the 'Issues' tab", "Read the first open issue" ] - + for i, task in enumerate(tasks): print(f"\nTask {i+1}/{len(tasks)}: {task}") async for result in agent.run(task): @@ -301,13 +313,13 @@ async for result in agent.run(task): # Basic information print(f"Response ID: {result.get('id')}") print(f"Response Text: {result.get('text')}") - + # Detailed token usage statistics usage = result.get('usage') if usage: print(f"Input Tokens: {usage.get('input_tokens')}") print(f"Output Tokens: {usage.get('output_tokens')}") - + # Reasoning and actions for output in result.get('output', []): if output.get('type') == 'reasoning': @@ -318,6 +330,7 @@ async for result in agent.run(task): ``` This structured format allows you to: + - Log detailed information about agent actions - Provide real-time feedback to users - Track token usage for cost monitoring @@ -350,9 +363,9 @@ async def run_with_local_model(): model="omniparser+ollama_chat/gemma3", tools=[macos_computer] ) - + task = "Open the Calculator app and perform a simple calculation" - + async for result in agent.run(task): print(f"Action: {result.get('text')}") @@ -379,12 +392,14 @@ agent = ComputerAgent( ``` Common local endpoints include: + - LM Studio: `http://localhost:1234/v1` - vLLM: `http://localhost:8000/v1` - LocalAI: `http://localhost:8080/v1` - Ollama with OpenAI compat: `http://localhost:11434/v1` This approach is perfect for: + - Development and testing without incurring API costs - Offline or air-gapped environments where API access isn't possible - Privacy-sensitive applications where data can't leave your network @@ -406,8 +421,8 @@ UI-TARS is ByteDance's Computer-Use model designed for navigating OS-level inter ```python agent = ComputerAgent( model=LLM( - provider=LLMProvider.OAICOMPAT, - name="tgi", + provider=LLMProvider.OAICOMPAT, + name="tgi", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1" ), tools=[macos_computer] @@ -475,11 +490,13 @@ if __name__ == "__main__": ``` 2. Install the UI dependencies if you haven't already: + ```bash pip install "cua-agent[ui]" ``` 3. Run the script: + ```bash python launch_ui.py ``` @@ -498,12 +515,14 @@ if __name__ == "__main__": ``` When you run this, Gradio will display both a local URL and a public URL like: + ``` Running on local URL: http://127.0.0.1:7860 Running on public URL: https://abcd1234.gradio.live ``` **Security Note:** Be cautious when sharing your Gradio UI publicly: + - The public URL gives anyone with the link full access to your agent - Consider using basic authentication for additional protection: ```python @@ -513,6 +532,7 @@ Running on public URL: https://abcd1234.gradio.live - The temporary link expires when you stop the Gradio application This provides: + - Model provider selection - Agent loop selection - Task input field @@ -566,7 +586,7 @@ async def github_workflow(): verbosity=logging.INFO, tools=[macos_computer] ) - + tasks = [ "Look for a repository named trycua/cua on GitHub.", "Check the open issues, open the most recent one and read it.", @@ -575,7 +595,7 @@ async def github_workflow(): "From Cursor, open Composer if not already open.", "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.", ] - + for i, task in enumerate(tasks): print(f"\nExecuting task {i+1}/{len(tasks)}: {task}") async for result in agent.run(task): @@ -587,11 +607,13 @@ if __name__ == "__main__": ``` 2. Make sure your OpenAI API key is set: + ```bash export OPENAI_API_KEY=your_openai_key_here ``` 3. Run the script: + ```bash python github_workflow.py ``` @@ -604,6 +626,7 @@ if __name__ == "__main__": - Use Cursor's AI features to work on a solution This example: + 1. Searches GitHub for a repository 2. Reads an issue 3. Clones the repository @@ -615,6 +638,7 @@ This example: Let's compare our manual implementation from Part 1 with the framework approach: ### Manual Implementation (Part 1) + - Required writing custom code for the interaction loop - Needed explicit handling of different action types - Required direct management of the OpenAI API calls @@ -622,6 +646,7 @@ Let's compare our manual implementation from Part 1 with the framework approach: - Limited to OpenAI's computer-use model ### Framework Implementation (Part 2) + - Abstracts the interaction loop - Handles all action types automatically - Manages API calls internally @@ -634,17 +659,21 @@ Let's compare our manual implementation from Part 1 with the framework approach: The `cua-agent` framework transforms what was a complex implementation task into a simple, high-level interface for building Computer-Use Agents. By abstracting away the technical details, it lets you focus on defining the tasks rather than the machinery. ### When to Use Each Approach + - **Manual Implementation (Part 1)**: When you need complete control over the interaction loop or are implementing a custom solution - **Framework (Part 2)**: For most applications where you want to quickly build and deploy Computer-Use Agents ### Next Steps + With the basics covered, you might want to explore: + - Customizing the agent's behavior with additional parameters - Building more complex workflows spanning multiple applications - Integrating your agent into other applications - Contributing to the open-source project on GitHub ### Resources + - [cua-agent GitHub repository](https://github.com/trycua/cua/tree/main/libs/agent) - [Agent Notebook Examples](https://github.com/trycua/cua/blob/main/notebooks/agent_nb.ipynb) - [OpenAI Agent SDK Specification](https://platform.openai.com/docs/api-reference/responses) diff --git a/blog/composite-agents.md b/blog/composite-agents.md index 84d0efbf..383fb035 100644 --- a/blog/composite-agents.md +++ b/blog/composite-agents.md @@ -1,6 +1,6 @@ # Announcing Cua Agent framework 0.4 and Composite Agents -*Published on August 26, 2025 by Dillon DuPont* +_Published on August 26, 2025 by Dillon DuPont_ Composite Agents @@ -12,7 +12,7 @@ This is the kind of problem that makes you wonder if we're building the future o ## What we fixed -Agent framework 0.4 solves this by doing something radical: making all these different models speak the same language. +Agent framework 0.4 solves this by doing something radical: making all these different models speak the same language. Instead of writing separate code for each model's peculiarities, you now just pick a model with a string like `"anthropic/claude-3-5-sonnet-20241022"` or `"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"`, and everything else Just Works™. Behind the scenes, we handle all the coordinate normalization, token parsing, and image preprocessing so you don't have to. @@ -42,7 +42,7 @@ agent = ComputerAgent( This creates a composite agent where one model (the "grounding" model) handles the visual understanding and precise UI interactions, while the other (the "planning" model) handles the high-level reasoning and task orchestration. It's like having a pilot and a navigator, except they're both AI models and they're trying to help you star a GitHub repository. - You can even take a model that was never designed for computer use—like GPT-4o—and give it GUI capabilities by pairing it with a specialized vision model: +You can even take a model that was never designed for computer use—like GPT-4o—and give it GUI capabilities by pairing it with a specialized vision model: ```python agent = ComputerAgent( @@ -63,12 +63,11 @@ We're building integration with HUD evals, allowing us to curate and benchmark m If you try out version 0.4.x, we'd love to hear how it goes. Join us on Discord to share your results and let us know what model combinations work best for your projects. - --- ## Links -* **Composite Agent Docs:** [https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) -* **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai) +- **Composite Agent Docs:** [https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) +- **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai) -Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build. \ No newline at end of file +Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build. diff --git a/blog/cua-hackathon.md b/blog/cua-hackathon.md index ac40f488..26e59cf7 100644 --- a/blog/cua-hackathon.md +++ b/blog/cua-hackathon.md @@ -1,6 +1,6 @@ # Computer-Use Agents SOTA Challenge: Hack the North + Global Online -*Published on August 25, 2025 by Francesco Bonacci* +_Published on August 25, 2025 by Francesco Bonacci_ We’re bringing something new to [Hack the North](https://hackthenorth.com), Canada’s largest hackathon, this year: a head-to-head competition for **Computer-Use Agents** - on-site at Waterloo and a **Global online challenge**. From September 12–14, 2025, teams build on the **Cua Agent Framework** and are scored in **HUD’s OSWorld-Verified** environment to push past today’s SOTA on [OS-World](https://os-world.github.io). @@ -14,7 +14,8 @@ There’s one global leaderboard: **Cua - Best State-of-the-Art Computer-Use Age **Cua** and [**Ollama**](https://ollama.com) organize a global hackathon to find the **most creative uses of local and hybrid computer-use agents**. There are no geographic restrictions on who can join — this is a worldwide competition focused on **originality, impact, and inventive applications** that showcase what's possible with local and hybrid inference. -**Prizes:** +**Prizes:** + - 1st **MacBook Air M4 (or equivalent value)** + features in Cua & Ollama channels - 2nd **$500 CAD + swag** - 3rd **swag + public feature** @@ -26,36 +27,42 @@ There’s one global leaderboard: **Cua - Best State-of-the-Art Computer-Use Age Two different tracks, two different processes: ### On-site (Track A) + Build during the weekend and submit a repo with a one-line start command. **HUD** executes your command in a clean environment and runs **OSWorld-Verified**. Scores come from official benchmark results; ties break by median, then wall-clock time, then earliest submission. Any model setup is allowed (cloud or local). **HUD** runs official evaluations immediately after submission. Winners are announced at the **closing ceremony**. ### Rules + - Fork and star the [Cua repo](https://github.com/trycua/cua). - Add your agent and instructions in `samples/community/hack-the-north/`. -- Include a README with details on the approach and any required notes. -- Submit a PR. +- Include a README with details on the approach and any required notes. +- Submit a PR. **Deadline: Sept 15, 8:00 AM EDT** ### Global Online (Track B) + Open to anyone, anywhere. Build on your own timeline and submit through the **Cua Discord form** by the deadline. **Project Requirements:** + - Your agent must integrate **Cua and Ollama** in some way - Your agent must be **easily runnable by judges** -Judged by **Cua** and **Ollama** teams on: -- **Creativity (30%)** – originality, usefulness, surprise factor -- **Technical Depth (30%)** – quality of engineering and agent design -- **Use of Ollama (30%)** – effective integration of local/hybrid inference -- **Polish (10%)** – presentation, clarity, demo readiness +Judged by **Cua** and **Ollama** teams on: + +- **Creativity (30%)** – originality, usefulness, surprise factor +- **Technical Depth (30%)** – quality of engineering and agent design +- **Use of Ollama (30%)** – effective integration of local/hybrid inference +- **Polish (10%)** – presentation, clarity, demo readiness ### Submission Process + Submissions will be collected via a **form link provided in the Cua Discord**. Your submission must contain: - **GitHub repo** containing the agent source code and a clear README with instructions on how to use the agent -- **Explanation** of the models and tools used, and what's local or hybrid about your design +- **Explanation** of the models and tools used, and what's local or hybrid about your design - **Short demo video** (up to two minutes) A **commit freeze** will be used to ensure that no changes are made after the deadline. Winners will be announced after judging is complete. @@ -68,12 +75,13 @@ A **commit freeze** will be used to ensure that no changes are made after the de Bring a team, pick a model stack, and push what agents can do on real computers. We can’t wait to see what you build at **Hack the North 2025**. -**Discord channels** +**Discord channels** + - Join the Discord first: https://discord.gg/cua-ai -- **#hack-the-north (on-site):** https://discord.com/channels/1328377437301641247/1409508526774157342 -- **#global-online (Ollama × Cua):** https://discord.com/channels/1328377437301641247/1409518100491145226 +- **#hack-the-north (on-site):** https://discord.com/channels/1328377437301641247/1409508526774157342 +- **#global-online (Ollama × Cua):** https://discord.com/channels/1328377437301641247/1409518100491145226 **Contact** Questions on Hack the North? Email **hackthenorth@trycua.com**. -*P.S. If you’re planning ahead, start with the Cua Agent Framework and OSWorld-Verified docs at docs.trycua.com; we’ll share office-hour times in both Discord channels.* \ No newline at end of file +_P.S. If you’re planning ahead, start with the Cua Agent Framework and OSWorld-Verified docs at docs.trycua.com; we’ll share office-hour times in both Discord channels._ diff --git a/blog/hack-the-north.md b/blog/hack-the-north.md new file mode 100644 index 00000000..574af62c --- /dev/null +++ b/blog/hack-the-north.md @@ -0,0 +1,169 @@ +# What happens when hackathon judging is a public benchmark (Hack the North edition) + +_Written by Francesco Bonacci — Reviewed by Parth Patel (HUD W25) — Sept 25, 2025_ + +## Prologue + +Hack the North ran Sept 12–14 at the University of Waterloo. Official count this year: **1,778 hackers**, and a [Guinness World Record for the most people building interlocking plastic brick sculptures simultaneously](https://uwaterloo.ca/news/eweal-making-hackathons-fun-again-breaking-guinness-world-record). + +Our team arrived from Europe and the US one day before the hackathon, after a summer scattered post–YC X25, waiting for our O-1 visas. **HUD**’s founders Parth and Jay flew in from SF to help us run evaluations, and Michael and Parth from **Ollama** joined as co-sponsors. + +Our plan was ambitious: run the **first state-of-the-art Computer-Use Agents track**, score it on a public benchmark, and give the top performer a guaranteed YC interview. (Interview ≠ offer. YC didn’t judge.) + +The rest, as they say, was a 36h story worth telling—and a playbook worth sharing for anyone thinking about running or sponsoring this type of hackathon track. + +![hack-cua-ollama-hud](./assets/hack-cua-ollama-hud.jpeg) + +## The sign-up problem we had to invent + +We joined as a sponsor at the last minute, thanks to a push from our friend @Michael Chiang at Ollama—Waterloo alum, naturally. It’s kind of an open secret that UWaterloo turns out some of the sharpest hackers around (_no pun intended, HackMIT_). It was a bit of a scramble, but also great timing—our Agent framework had just finished a major refactor, with support for **100+ VLM configurations** now live. Naturally, we wanted to stress-test it at scale—and see whether teams could come up with SOTA-level setups. _This wasn’t a blank-slate, build-whatever-you-want kind of track._ + +From day one, though, we knew we’d have to fight for sign-ups. This was a niche track, and a guaranteed YC interview alone wouldn’t be enough to pull people in. + +Unfortunately, Hack the North (HTN) didn’t offer an interest form to help us estimate demand, which made capacity planning tricky—especially with early-stage infra. Stress-testing takes foresight, and multimodal language model usage is still costly (~1.5× to 3–4× the price of comparable text-only models). + +On top of that, we were discouraged from external promotion on [lu.ma](http://lu.ma). So we spun up our own sign-up page at **trycua.com/hackathon** and built ad-hoc Discord channels to share track details. We emphasized—repeatedly—that only students already accepted to Hack the North should register. + +_(Moral: the “measure-zero effect”—no matter how many times you say it, some people won’t see it. Plenty of invalid sign-ups still slipped through.)_ + +Even so, having your own form is absolutely worth it: it gives you an **early funnel**, surfaces demand signals ahead of time, and—crucially—**lets you require platform sign-up before kickoff**. In our case, Hack the North didn’t provide Devpost access until the very end, so our form was the only way to build a working roster. + +Only a small trickle of sign-ups came through by the time the event kicked off—too few to plan around, but clearly the right kind of crowd. Several were already familiar with computer-use agents; one was even interning at Shopify, working on this space. + +## At the Sponsor Booth + +Day 0 on campus made the difference. We arrived a couple of hours early to collect swag shipments (around 1,200 stickers of our new **Cua-la** mascot, plus t-shirts and hats—always plan ~1.5× the estimated number of hackers!). After walking the sponsor floor and explaining the track at our booth, ~40 hackers signed up. + +**Moral:** sponsor booths are still the most effective way to recruit for a track. + +**Suggestions to maximize booth time (for HTN this is only ~24 of the total 36 hours):** + +- **Be unmistakable.** Run a mini-challenge and a visible giveaway. We offered 5 × $200 Anthropic credits as a lightning raffle and constantly advertised in HTN Slack. Shout-out to our neighbors at **Mintlify**, who dressed their teammate as a mint plant - memorable and effective. +- **Create multiple touchpoints.** Hand out flyers and QR codes, and ask nearby booths to cross-refer. Big thanks to the YC team for flyer space and student connections - and to Michael (Ollama) for pointing visitors our way. +- **Never leave the booth empty.** Keep someone at the booth at all times and rotate shifts. With four founding engineers on-site, coverage was easy. Even after hacking kicked off, the booth stayed a point of reference - and even then multiple participants DM’d us asking where to meet up. +- **Students are organic DevRel.** Our runner-up, Adam, hung out with us at the booth, pulling more people in. Peer-to-peer energy creates the network effect you need! + +![hack-booth](./assets/hack-booth.png) + +_(Our Founding Engineer, Morgan, hangs out with students at the stand, while Adam (runner-up) hacks on the side.)_ + +## 02:30 a.m. is still prime time at a hackathon + +Hack the North gives sponsors a 30-minute API Workshop during the early hours of the event—a perfect moment to shift from talking to building. + +Our slot landed at **2:30 a.m.** (_perks of the cheapest sponsor tier_). Thirty students showed up, energy surprisingly high. James, our new Founding DevRel Engineer, led the session and nailed it. + +**Our track rules were simple:** + +1. Build a Computer-Use Agent with the [Cua framework](https://github.com/trycua/cua) +2. Benchmark the agent on [HUD](https://www.hud.so) +3. Use [OSWorld-Tiny](https://huggingface.co/datasets/ddupont/OSWorld-Tiny-Public): a 14-task distillation of the full benchmark (~360 tasks, >1h) + +**Suggestions:** + +- **Leave something tangible.** We provided a Jupyter Notebook teams could run immediately. +- **Narrow scope, strong starts.** The more focused the challenge, the more **robust starting points** you should provide. +- **Want the details?** [Here’s the notebook we left participants](https://github.com/trycua/cua/blob/main/notebooks/sota_hackathon.ipynb). + +![hack-booth](./assets/hack-workshop.jpeg) + +_(Our CUA Workshop at 2:30 AM.)_ + +## Making it possible to focus on the work + +If you’re an OSS framework, it’s tempting to have hackers self-host on laptops. **Don’t.** You’ll spend the workshop debugging setups instead of reviewing ideas. + +**Lesson learned:** within hours, we shifted to **cloud-only Sandboxes**. Payoff: consistent environments, faster starts, far less tech support. + +We provided: + +- **Credits:** $200 Cua Cloud + $200 HUD per team (manual top-ups for visible progress) +- **LLMs/VLMs:** Anthropic assigned $50 per participant—tight for VLM iteration—so we added capped access under our org +- **Pre-kickoff provisioning:** Platform sign-up auto-created projects, keys, and sandboxes + +**Takeaway:** every minute not spent on setup is a minute gained for iterating. + +## 12 Hours in the Hackathon + +**After the workshop buzz.** Morning interest was high, but Docker setup + requiring focus on a single track thinned the crowd. Most sponsor prizes are broad (“use our product and you qualify”), letting students stack tracks. Ours required commitment. Upside: those who stayed shipped sharper, higher-quality submissions. + +**The bell curve of submissions.** Most entries used _claude-sonnet-4-20250514_—proof that docs and public leaderboards ([OSWorld](https://os-world.github.io/#benchmark)) guide choices. Results clustered around the safe pick, with fewer pushing boundaries. + +**Who went beyond the baseline.** A few tried multi-agent/tool graphs. One standout—[**cuala**](https://github.com/YeIIcw/cuala)—was a clean reference: deterministic actions, verifiable state changes, callbacks for saving images and trajectories. + +**Bottom line:** Early excitement is easy; keeping teams engaged requires reducing friction and offering multiple entry points. + +### What broke (and why) + +We skipped a full end-to-end **Cua × HUD** dry-run. It showed. + +- Hackers ran out of inference credits. Desktop tasks are token-heavy. A full OSWorld run (200 max steps) for _computer-use-preview_ (OpenAI Operator API) can cost >$600. Serious attempts: ~400k tokens × 14 tasks. +- Python version/build mismatches surfaced, requiring debug time across both OSS repos. +- Our Cua framework lacked a **Response Agent** to complete evaluation loops. Some runs stalled until patched. + +## Scoring and Results + +### Participation & Outcomes + +- ~**30** hackers gave the track a serious try; **5** crossed the finish line +- All submissions were **solo**, mostly undergrads +- Judging: OSWorld-Tiny on HUD, with Cua + HUD reruns to verify scores +- Final leaderboard: [HUD Leaderboard](https://www.hud.so/leaderboards/ddupont/OSWorld-Tiny-Public) + +![hack-leaderboard](./assets/hack-leaderboard.png) + +_(Leaderboard on HUD)_ + +### Winners + +**🥇 Winner — Ram** + +- Devpost: https://devpost.com/software/sota-computer-use-agent-challenge +- Code: https://github.com/Ram-Raghav-S/cua/tree/ram +- Score: 68.3% + +**🥈 Runner-up — Aryan** + +- Devpost: https://devpost.com/software/loopdeloop-computer-use-agent-sota-attempt +- Code: https://github.com/Tumph/cua +- Score: 55.9% + +**🥉 Special Mention — Adam** + +- Devpost: https://devpost.com/software/cuala +- Code: https://github.com/YeIIcw/cuala +- Score: 42.1% + +![hack-winners](./assets/hack-winners.jpeg) + +_(Our finalists before the award ceremony)_ + +## What We’d Keep + +- **Sponsor Hack the North again** +- **Keep a visible, staffed booth** +- **Publish a compact FAQ** +- **Simple, transparent scoring** + +## What We’d Change + +- **Run a full Cua × HUD dry-run under load** +- **Offer multiple on-ramps (evals, creative, RL)** +- **Keep a private eval set for judging** +- **Default to cloud sandboxes** +- **Handle ops earlier (swag, signage, QR codes)** +- **Reward generalization, not lucky runs** + +## Closing Thoughts + +Our first outing as sponsors wasn’t perfect, but it gave us a working playbook: **provision cloud early, keep scoring simple, always dry-run infra, and make the booth unforgettable**. + +If more hackathon tracks leaned on **public benchmarks**, weekends like this would produce fewer demos-for-show and more measurable progress. + +**P.S.** Huge thanks to the Ollama and HUD teams for co-sponsoring the track, and to our YC Partner Diana for offering a **guaranteed YC interview** as first prize. + +Whether you’re a hacker who wants to participate, or a company looking to sponsor, let’s talk — we’re especially excited to support benchmark-first hackathon tracks in the Bay Area this year. + +![hack-closing-ceremony](./assets/hack-closing-ceremony.jpg) + +_(HTN Closing Ceremony — Cua Track Winner Announcement)_ diff --git a/blog/hud-agent-evals.md b/blog/hud-agent-evals.md index 93c8b388..43a120e9 100644 --- a/blog/hud-agent-evals.md +++ b/blog/hud-agent-evals.md @@ -1,6 +1,6 @@ # Cua × HUD - Evaluate Any Computer-Use Agent -*Published on August 27, 2025 by Dillon DuPont* +_Published on August 27, 2025 by Dillon DuPont_ You can now benchmark any GUI-capable agent on real computer-use tasks through our new integration with [HUD](https://hud.so), the evaluation platform for computer-use agents. @@ -70,9 +70,9 @@ Watch your agent work in real-time. Example output: ```md Starting full dataset run... ╔═════════════════════════════════════════════════════════════════╗ -║ 🚀 See your agent live at: ║ +║ 🚀 See your agent live at: ║ ╟─────────────────────────────────────────────────────────────────╢ -║ https://app.hud.so/jobs/fe05805d-4da9-4fc6-84b5-5c518528fd3c ║ +║ https://app.hud.so/jobs/fe05805d-4da9-4fc6-84b5-5c518528fd3c ║ ╚═════════════════════════════════════════════════════════════════╝ ``` @@ -90,4 +90,4 @@ Customize your evaluation with these options: - Notebook with end‑to‑end examples: https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb - Docs: https://docs.trycua.com/docs/agent-sdk/integrations/hud -- Live traces: https://app.hud.so \ No newline at end of file +- Live traces: https://app.hud.so diff --git a/blog/human-in-the-loop.md b/blog/human-in-the-loop.md index 055ad73a..dd14b27b 100644 --- a/blog/human-in-the-loop.md +++ b/blog/human-in-the-loop.md @@ -1,10 +1,10 @@ # When Agents Need Human Wisdom - Introducing Human-In-The-Loop Support -*Published on August 29, 2025 by Francesco Bonacci* +_Published on August 29, 2025 by Francesco Bonacci_ Sometimes the best AI agent is a human. Whether you're creating training demonstrations, evaluating complex scenarios, or need to intervene when automation hits a wall, our new Human-In-The-Loop integration puts you directly in control. -With yesterday's [HUD evaluation integration](hud-agent-evals.md), you could benchmark any agent at scale. Today's update lets you *become* the agent when it matters most—seamlessly switching between automated intelligence and human judgment. +With yesterday's [HUD evaluation integration](hud-agent-evals.md), you could benchmark any agent at scale. Today's update lets you _become_ the agent when it matters most—seamlessly switching between automated intelligence and human judgment.
@@ -20,11 +20,12 @@ With yesterday's [HUD evaluation integration](hud-agent-evals.md), you could ben ## Why Human-In-The-Loop? -Even the most sophisticated agents encounter edge cases, ambiguous interfaces, or tasks requiring human judgment. Rather than failing gracefully, they can now fail *intelligently*—by asking for human help. +Even the most sophisticated agents encounter edge cases, ambiguous interfaces, or tasks requiring human judgment. Rather than failing gracefully, they can now fail _intelligently_—by asking for human help. This approach bridges the gap between fully automated systems and pure manual control, letting you: + - **Demonstrate complex workflows** that agents can learn from -- **Evaluate tricky scenarios** where ground truth requires human assessment +- **Evaluate tricky scenarios** where ground truth requires human assessment - **Intervene selectively** when automated agents need guidance - **Test and debug** your tools and environments manually @@ -64,7 +65,7 @@ Combine model intelligence with human precision—let AI plan, then execute manu ```python agent = ComputerAgent( - "huggingface-local/HelloKKMe/GTA1-7B+human/human", + "huggingface-local/HelloKKMe/GTA1-7B+human/human", tools=[computer] ) @@ -81,7 +82,7 @@ Start automated, escalate to human when needed: # Primary automated agent primary_agent = ComputerAgent("openai/computer-use-preview", tools=[computer]) -# Human fallback agent +# Human fallback agent fallback_agent = ComputerAgent("human/human", tools=[computer]) try: @@ -101,22 +102,26 @@ except Exception: The human-in-the-loop interface provides a rich, responsive experience: ### **Visual Environment** + - **Screenshot display** with live updates as you work -- **Click handlers** for direct interaction with UI elements +- **Click handlers** for direct interaction with UI elements - **Zoom and pan** to see details clearly ### **Action Controls** + - **Click actions** - precise cursor positioning and clicking - **Keyboard input** - type text naturally or send specific key combinations - **Action history** - see the sequence of actions taken - **Undo support** - step back when needed -### **Tool Integration** +### **Tool Integration** + - **Full OpenAI compatibility** - standard tool call format - **Custom tools** - integrate your own tools seamlessly - **Real-time feedback** - see tool responses immediately ### **Smart Polling** + - **Responsive updates** - UI refreshes when new completions arrive - **Background processing** - continue working while waiting for tasks - **Session persistence** - resume interrupted sessions @@ -124,6 +129,7 @@ The human-in-the-loop interface provides a rich, responsive experience: ## Real-World Use Cases ### **Training Data Generation** + Create perfect demonstrations for fine-tuning: ```python @@ -132,7 +138,7 @@ demo_agent = ComputerAgent("human/human", tools=[computer]) tasks = [ "Create a budget spreadsheet with income and expense categories", - "Apply conditional formatting to highlight overbudget items", + "Apply conditional formatting to highlight overbudget items", "Generate a pie chart showing expense distribution" ] @@ -143,6 +149,7 @@ for task in tasks: ``` ### **Evaluation and Ground Truth** + Validate agent performance on complex scenarios: ```python @@ -154,6 +161,7 @@ async for _ in evaluator.run("Review this completed form and rate accuracy (1-10 ``` ### **Interactive Debugging** + Step through agent behavior manually: ```python @@ -165,6 +173,7 @@ async for _ in debug_agent.run("Reproduce the agent's failed login sequence"): ``` ### **Edge Case Handling** + Handle scenarios that break automated agents: ```python @@ -180,26 +189,26 @@ async for _ in edge_case_agent.run("Navigate this CAPTCHA-protected form"): Customize the human agent experience: - **UI refresh rate**: Adjust polling frequency for your workflow -- **Image quality**: Balance detail vs. performance for screenshots +- **Image quality**: Balance detail vs. performance for screenshots - **Action logging**: Save detailed traces for analysis and training - **Session timeout**: Configure idle timeouts for security - **Tool permissions**: Restrict which tools humans can access ## When to Use Human-In-The-Loop -| **Scenario** | **Why Human Control** | -|--------------|----------------------| -| **Creating training data** | Perfect demonstrations for model fine-tuning | -| **Evaluating complex tasks** | Human judgment for subjective or nuanced assessment | -| **Handling edge cases** | CAPTCHAs, unusual UIs, context-dependent decisions | -| **Debugging workflows** | Step through failures to identify breaking points | -| **High-stakes operations** | Critical tasks requiring human oversight and approval | -| **Testing new environments** | Validate tools and environments work as expected | +| **Scenario** | **Why Human Control** | +| ---------------------------- | ----------------------------------------------------- | +| **Creating training data** | Perfect demonstrations for model fine-tuning | +| **Evaluating complex tasks** | Human judgment for subjective or nuanced assessment | +| **Handling edge cases** | CAPTCHAs, unusual UIs, context-dependent decisions | +| **Debugging workflows** | Step through failures to identify breaking points | +| **High-stakes operations** | Critical tasks requiring human oversight and approval | +| **Testing new environments** | Validate tools and environments work as expected | ## Learn More - **Interactive examples**: Try human-in-the-loop control with sample tasks -- **Training data pipelines**: Learn how to convert human demonstrations into model training data +- **Training data pipelines**: Learn how to convert human demonstrations into model training data - **Evaluation frameworks**: Build human-validated test suites for your agents - **API documentation**: Full reference for human agent configuration @@ -207,4 +216,4 @@ Ready to put humans back in the loop? The most sophisticated AI system knows whe --- -*Questions about human-in-the-loop agents? Join the conversation in our [Discord community](https://discord.gg/cua-ai) or check out our [documentation](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop).* +_Questions about human-in-the-loop agents? Join the conversation in our [Discord community](https://discord.gg/cua-ai) or check out our [documentation](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop)._ diff --git a/blog/introducing-cua-cloud-containers.md b/blog/introducing-cua-cloud-containers.md index 86cbd400..87ef62b0 100644 --- a/blog/introducing-cua-cloud-containers.md +++ b/blog/introducing-cua-cloud-containers.md @@ -1,8 +1,8 @@ -# Introducing Cua Cloud Containers: Computer-Use Agents in the Cloud +# Introducing Cua Cloud Sandbox: Computer-Use Agents in the Cloud -*Published on May 28, 2025 by Francesco Bonacci* +_Published on May 28, 2025 by Francesco Bonacci_ -Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./build-your-own-operator-on-macos-1), we showed you how to build your own Operator on macOS. In [Part 2](./build-your-own-operator-on-macos-2), we explored the cua-agent framework. Today, we're excited to introduce **Cua Cloud Containers** – the easiest way to deploy Computer-Use Agents at scale. +Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./build-your-own-operator-on-macos-1), we showed you how to build your own Operator on macOS. In [Part 2](./build-your-own-operator-on-macos-2), we explored the cua-agent framework. Today, we're excited to introduce **Cua Cloud Sandbox** – the easiest way to deploy Computer-Use Agents at scale.
@@ -10,13 +10,13 @@ Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./bui ## What is Cua Cloud? -Think of Cua Cloud as **Docker for Computer-Use Agents**. Instead of managing VMs, installing dependencies, and configuring environments, you can launch pre-configured cloud containers with a single command. Each container comes with a **full desktop environment** accessible via browser (via noVNC), all CUA-related dependencies pre-configured (with a PyAutoGUI-compatible server), and **pay-per-use pricing** that scales with your needs. +Think of Cua Cloud as **Docker for Computer-Use Agents**. Instead of managing VMs, installing dependencies, and configuring environments, you can launch pre-configured Cloud Sandbox instances with a single command. Each sandbox comes with a **full desktop environment** accessible via browser (via noVNC), all CUA-related dependencies pre-configured (with a PyAutoGUI-compatible server), and **pay-per-use pricing** that scales with your needs. -## Why Cua Cloud Containers? +## Why Cua Cloud Sandbox? -Four months ago, we launched [**Lume**](https://github.com/trycua/cua/tree/main/libs/lume) and [**Cua**](https://github.com/trycua/cua) with the goal to bring sandboxed VMs and Computer-Use Agents on Apple Silicon. The developer's community response was incredible 🎉 +Four months ago, we launched [**Lume**](https://github.com/trycua/cua/tree/main/libs/lume) and [**Cua**](https://github.com/trycua/cua) with the goal to bring sandboxed VMs and Computer-Use Agents on Apple Silicon. The developer's community response was incredible 🎉 -Going from prototype to production revealed a problem though: **local macOS VMs don't scale**, neither are they easily portable. +Going from prototype to production revealed a problem though: **local macOS VMs don't scale**, neither are they easily portable. Our Discord community, YC peers, and early pilot customers kept hitting the same issues. Storage constraints meant **20-40GB per VM** filled laptops fast. Different hardware architectures (Apple Silicon ARM vs Intel x86) prevented portability of local workflows. Every new user lost a day to setup and configuration. @@ -40,7 +40,7 @@ export CUA_API_KEY=your_api_key_here export CUA_CONTAINER_NAME=my-agent-container ``` -### Step 2: Launch Your First Container +### Step 2: Launch Your First Sandbox ```python import asyncio @@ -55,7 +55,7 @@ async def run_cloud_agent(): name=os.getenv("CUA_CONTAINER_NAME"), provider_type=VMProviderType.CLOUD, ) - + # Create an agent with your preferred loop agent = ComputerAgent( model="openai/gpt-4o", @@ -63,7 +63,7 @@ async def run_cloud_agent(): verbosity=logging.INFO, tools=[computer] ) - + # Run a task async for result in agent.run("Open Chrome and search for AI news"): print(f"Response: {result.get('text')}") @@ -80,7 +80,7 @@ We're launching with **three compute tiers** to match your workload needs: - **Medium** (2 vCPU, 8GB RAM) - Ideal for most production workloads - **Large** (8 vCPU, 32GB RAM) - Built for complex, resource-intensive operations -Each tier includes a **full Linux with Xfce desktop environment** with pre-configured browser, **secure VNC access** with SSL, persistent storage during your session, and automatic cleanup on termination. +Each tier includes a **full Linux with Xfce desktop environment** with pre-configured browser, **secure VNC access** with SSL, persistent storage during your session, and automatic cleanup on termination for sandboxes. ## How some customers are using Cua Cloud today @@ -102,14 +102,14 @@ async def github_automation(): name="github-automation", provider_type=VMProviderType.CLOUD, ) - + agent = ComputerAgent( model="openai/gpt-4o", save_trajectory=True, verbosity=logging.INFO, tools=[computer] ) - + tasks = [ "Look for a repository named trycua/cua on GitHub.", "Check the open issues, open the most recent one and read it.", @@ -119,17 +119,17 @@ async def github_automation(): "Commit the changes with a descriptive message.", "Create a pull request." ] - + for i, task in enumerate(tasks): print(f"\nExecuting task {i+1}/{len(tasks)}: {task}") async for result in agent.run(task): print(f"Response: {result.get('text')}") - + # Check if any tools were used tools = result.get('tools') if tools: print(f"Tools used: {tools}") - + print(f"Task {i+1} completed") # Run the automation @@ -153,13 +153,13 @@ async def scrape_website(site_name, url): name=f"scraper-{site_name}", provider_type=VMProviderType.CLOUD, ) - + agent = ComputerAgent( model="openai/gpt-4o", save_trajectory=True, tools=[computer] ) - + results = [] tasks = [ f"Navigate to {url}", @@ -167,7 +167,7 @@ async def scrape_website(site_name, url): "Take a screenshot of the page", "Save the extracted data to a file" ] - + for task in tasks: async for result in agent.run(task): results.append({ @@ -175,7 +175,7 @@ async def scrape_website(site_name, url): 'task': task, 'response': result.get('text') }) - + return results async def parallel_scraping(): @@ -185,11 +185,11 @@ async def parallel_scraping(): ("HackerNews", "https://news.ycombinator.com"), ("TechCrunch", "https://techcrunch.com") ] - + # Run all scraping tasks in parallel tasks = [scrape_website(name, url) for name, url in sites] results = await asyncio.gather(*tasks) - + # Process results for site_results in results: print(f"\nResults from {site_results[0]['site']}:") @@ -202,23 +202,23 @@ asyncio.run(parallel_scraping()) ## Cost Optimization Tips -To optimize your costs, use appropriate container sizes for your workload and implement timeouts to prevent runaway tasks. Batch related operations together to minimize container spin-up time, and always remember to terminate containers when your work is complete. +To optimize your costs, use appropriate sandbox sizes for your workload and implement timeouts to prevent runaway tasks. Batch related operations together to minimize sandbox spin-up time, and always remember to terminate sandboxes when your work is complete. ## Security Considerations -Cua Cloud runs all containers in isolated environments with encrypted VNC connections. Your API keys are never exposed in trajectories. +Cua Cloud runs all sandboxes in isolated environments with encrypted VNC connections. Your API keys are never exposed in trajectories. ## What's Next for Cua Cloud We're just getting started! Here's what's coming in the next few months: -### Elastic Autoscaled Container Pools +### Elastic Autoscaled Sandbox Pools -Soon you'll be able to create elastic container pools that automatically scale based on demand. Define minimum and maximum container counts, and let Cua Cloud handle the rest. Perfect for batch processing, scheduled automations, and handling traffic spikes without manual intervention. +Soon you'll be able to create elastic sandbox pools that automatically scale based on demand. Define minimum and maximum sandbox counts, and let Cua Cloud handle the rest. Perfect for batch processing, scheduled automations, and handling traffic spikes without manual intervention. ### Windows and macOS Cloud Support -While we're launching with Linux containers, Windows and macOS cloud machines are coming soon. Run Windows-specific automations, test cross-platform workflows, or leverage macOS-exclusive applications – all in the cloud with the same simple API. +While we're launching with Linux sandboxes, Windows and macOS cloud machines are coming soon. Run Windows-specific automations, test cross-platform workflows, or leverage macOS-exclusive applications – all in the cloud with the same simple API. Stay tuned for updates and join our [**Discord**](https://discord.gg/cua-ai) to vote on which features you'd like to see first! diff --git a/blog/lume-to-containerization.md b/blog/lume-to-containerization.md index cf468e0e..ca74286d 100644 --- a/blog/lume-to-containerization.md +++ b/blog/lume-to-containerization.md @@ -1,8 +1,8 @@ # From Lume to Containerization: Our Journey Meets Apple's Vision -*Published on June 10, 2025 by Francesco Bonacci* +_Published on June 10, 2025 by Francesco Bonacci_ -Yesterday, Apple announced their new [Containerization framework](https://github.com/apple/containerization) at WWDC. Since then, our Discord and X users have been asking what this means for Cua virtualization capabilities on Apple Silicon. We've been working in this space for months - from [Lume](https://github.com/trycua/cua/tree/main/libs/lume) to [Lumier](https://github.com/trycua/cua/tree/main/libs/lumier) to [Cua Cloud Containers](./introducing-cua-cloud-containers). Here's our take on Apple's announcement. +Yesterday, Apple announced their new [Containerization framework](https://github.com/apple/containerization) at WWDC. Since then, our Discord and X users have been asking what this means for Cua virtualization capabilities on Apple Silicon. We've been working in this space for months - from [Lume](https://github.com/trycua/cua/tree/main/libs/lume) to [Lumier](https://github.com/trycua/cua/tree/main/libs/lumier) to [Cua Cloud Sandbox](./introducing-cua-cloud-containers). Here's our take on Apple's announcement. ## Our Story @@ -40,6 +40,7 @@ How Apple's Framework Works: ``` Why is this better? + - **Better security**: Each container is completely separate - **Better performance**: Each container gets its own resources - **Real isolation**: If one container has problems, others aren't affected @@ -71,6 +72,7 @@ While Apple's new framework focuses on containers, we've been building VM manage [Lume](https://github.com/trycua/cua/tree/main/libs/lume) is our command-line tool for creating and managing VMs on Apple Silicon. We built it because setting up VMs on macOS was too complicated. What Lume does: + - **Direct control**: Works directly with Apple's Virtualization framework - **Ready-to-use images**: Start a macOS or Linux VM with one command - **API server**: Control VMs from other programs (runs on port 7777) @@ -91,6 +93,7 @@ lume run macos-sequoia-vanilla:latest [Lumier](https://github.com/trycua/lumier) works differently. It lets you use Docker commands to manage VMs. But here's the key: **Docker is just for packaging, not for isolation**. What makes Lumier useful: + - **Familiar commands**: If you know Docker, you know Lumier - **Web access**: Connect to your VM through a browser - **Save your work**: VMs remember their state @@ -127,6 +130,7 @@ Docker → Lume → Full VM → Mac Hardware ### When to Use What **Apple's Containerization** + - ✅ Perfect for: Running containers with maximum security - ✅ Starts in under a second - ✅ Uses less memory and CPU @@ -134,6 +138,7 @@ Docker → Lume → Full VM → Mac Hardware - ❌ Only for containers, not full VMs **Lume** + - ✅ Perfect for: Development and testing - ✅ Full control over macOS/Linux VMs - ✅ Works on current macOS versions @@ -141,6 +146,7 @@ Docker → Lume → Full VM → Mac Hardware - ❌ Uses more resources than containers **Lumier** + - ✅ Perfect for: Teams already using Docker - ✅ Easy to share and deploy - ✅ Access through your browser @@ -168,9 +174,9 @@ Apple's announcement confirms we're on the right path. Here's what we're looking - [Apple Containerization Framework](https://github.com/apple/containerization) - [Lume - Direct VM Management](https://github.com/trycua/cua/tree/main/libs/lume) - [Lumier - Docker Interface for VMs](https://github.com/trycua/cua/tree/main/libs/lumier) -- [Cua Cloud Containers](https://trycua.com) +- [Cua Cloud Sandbox](https://trycua.com) - [Join our Discord](https://discord.gg/cua-ai) --- -*Questions about virtualization on Apple Silicon? Come chat with us on Discord!* \ No newline at end of file +_Questions about virtualization on Apple Silicon? Come chat with us on Discord!_ diff --git a/blog/sandboxed-python-execution.md b/blog/sandboxed-python-execution.md index 9261e955..e0eb8391 100644 --- a/blog/sandboxed-python-execution.md +++ b/blog/sandboxed-python-execution.md @@ -1,6 +1,6 @@ # Sandboxed Python Execution: Run Code Safely in Cua Containers -*Published on June 23, 2025 by Dillon DuPont* +_Published on June 23, 2025 by Dillon DuPont_ Cua's computer-use capabilities that we touched on in [Building your own Operator on macOS - Part 2](build-your-own-operator-on-macos-2.md) – your AI agents can click, scroll, type, and interact with any desktop application. But what if your agent needs to do more than just UI automation? What if it needs to process data, make API calls, analyze images, or run complex logic alongside those UI interactions, within the same virtual environment? @@ -49,15 +49,19 @@ What's happening here? When you call `greet_and_print()`, Cua extracts the funct Cua's sandboxed execution system employs several key architectural components: ### 1. Source Code Extraction + Cua uses Python's `inspect.getsource()` to extract your function's source code and reconstruct the function definition in the remote environment. ### 2. Virtual Environment Isolation + Each sandboxed function runs in a named virtual environment within the container. This provides complete dependency isolation between different functions and their respective environments. ### 3. Data Serialization and Transport + Arguments and return values are serialized as JSON and transported between the host and container. This ensures compatibility across different Python versions and execution environments. ### 4. Comprehensive Error Handling + The system captures both successful results and exceptions, preserving stack traces and error information for debugging purposes. ## Getting your sandbox ready @@ -73,10 +77,10 @@ async def main(): # Fire up the computer computer = Computer() await computer.run() - + # Make it the default for all sandboxed functions set_default_computer(computer) - + # Install some packages in a virtual environment await computer.venv_install("demo_venv", ["requests", "beautifulsoup4"]) ``` @@ -104,7 +108,7 @@ def automate_browser_with_playwright(): import time import base64 from datetime import datetime - + try: with sync_playwright() as p: # Launch browser (visible, because why not?) @@ -112,68 +116,68 @@ def automate_browser_with_playwright(): headless=False, args=['--no-sandbox', '--disable-dev-shm-usage'] ) - + page = browser.new_page() page.set_viewport_size({"width": 1280, "height": 720}) - + actions = [] screenshots = {} - + # Let's visit example.com and poke around page.goto("https://example.com") actions.append("Navigated to example.com") - + # Grab a screenshot because screenshots are cool screenshot_bytes = page.screenshot(full_page=True) screenshots["initial"] = base64.b64encode(screenshot_bytes).decode() - + # Get some basic info title = page.title() actions.append(f"Page title: {title}") - + # Find links and headings try: links = page.locator("a").all() link_texts = [link.text_content() for link in links[:5]] actions.append(f"Found {len(links)} links: {link_texts}") - + headings = page.locator("h1, h2, h3").all() heading_texts = [h.text_content() for h in headings[:3]] actions.append(f"Found headings: {heading_texts}") - + except Exception as e: actions.append(f"Element interaction error: {str(e)}") - + # Let's try a form for good measure try: page.goto("https://httpbin.org/forms/post") actions.append("Navigated to form page") - + # Fill out the form page.fill('input[name="custname"]', "Test User from Sandboxed Environment") page.fill('input[name="custtel"]', "555-0123") page.fill('input[name="custemail"]', "test@example.com") page.select_option('select[name="size"]', "large") - + actions.append("Filled out form fields") - + # Submit and see what happens page.click('input[type="submit"]') page.wait_for_load_state("networkidle") - + actions.append("Submitted form") - + except Exception as e: actions.append(f"Form interaction error: {str(e)}") - + browser.close() - + return { "actions_performed": actions, "screenshots": screenshots, "success": True } - + except Exception as e: return {"error": f"Browser automation failed: {str(e)}"} @@ -196,9 +200,9 @@ def security_audit_tool(code_snippet): """Analyze code for potential security issues""" import ast import re - + issues = [] - + # Check for the usual suspects dangerous_patterns = [ (r'eval\s*\(', "Use of eval() function"), @@ -207,11 +211,11 @@ def security_audit_tool(code_snippet): (r'subprocess\.', "Subprocess usage"), (r'os\.system\s*\(', "OS system call"), ] - + for pattern, description in dangerous_patterns: if re.search(pattern, code_snippet): issues.append(description) - + # Get fancy with AST analysis try: tree = ast.parse(code_snippet) @@ -222,7 +226,7 @@ def security_audit_tool(code_snippet): issues.append(f"Dangerous function call: {node.func.id}") except SyntaxError: issues.append("Syntax error in code") - + return { "security_issues": issues, "risk_level": "HIGH" if len(issues) > 2 else "MEDIUM" if issues else "LOW" @@ -235,7 +239,7 @@ print(f"Security audit: {audit_result}") ### Desktop automation in the cloud -Here's where things get really interesting. Cua cloud containers come with full desktop environments, so you can automate GUIs: +Here's where things get really interesting. Cua Cloud Sandbox comes with full desktop environments, so you can automate GUIs: ```python @sandboxed("desktop_env") @@ -245,34 +249,34 @@ def take_screenshot_and_analyze(): import base64 from PIL import ImageGrab from datetime import datetime - + try: # Grab the screen screenshot = ImageGrab.grab() - + # Convert to base64 for easy transport buffer = io.BytesIO() screenshot.save(buffer, format='PNG') screenshot_data = base64.b64encode(buffer.getvalue()).decode() - + # Get some basic info screen_info = { "size": screenshot.size, "mode": screenshot.mode, "timestamp": datetime.now().isoformat() } - + # Analyze the colors (because why not?) colors = screenshot.getcolors(maxcolors=256*256*256) dominant_color = max(colors, key=lambda x: x[0])[1] if colors else None - + return { "screenshot_base64": screenshot_data, "screen_info": screen_info, "dominant_color": dominant_color, "unique_colors": len(colors) if colors else 0 } - + except Exception as e: return {"error": f"Screenshot failed: {str(e)}"} @@ -287,6 +291,7 @@ print("Desktop analysis complete!") ## Pro tips for sandboxed success ### Keep it self-contained + Always put your imports inside the function. Trust us on this one: ```python @@ -294,12 +299,13 @@ Always put your imports inside the function. Trust us on this one: def good_function(): import os # Import inside the function import json - + # Your code here return {"result": "success"} ``` ### Install dependencies first + Don't forget to install packages before using them: ```python @@ -314,13 +320,14 @@ def data_analysis(): ``` ### Use descriptive environment names + Future you will thank you: ```python @sandboxed("data_processing_env") def process_data(): pass -@sandboxed("web_scraping_env") +@sandboxed("web_scraping_env") def scrape_site(): pass @sandboxed("ml_training_env") @@ -328,6 +335,7 @@ def train_model(): pass ``` ### Always handle errors gracefully + Things break. Plan for it: ```python @@ -345,6 +353,7 @@ def robust_function(data): Let's be honest – there's some overhead here. Code needs to be serialized, sent over the network, and executed remotely. But for most use cases, the benefits far outweigh the costs. If you're building something performance-critical, consider: + - Batching multiple operations into a single sandboxed function - Minimizing data transfer between host and container - Using persistent virtual environments @@ -369,4 +378,4 @@ Happy coding (safely)! --- -*Want to dive deeper? Check out our [sandboxed functions examples](https://github.com/trycua/cua/blob/main/examples/sandboxed_functions_examples.py) and [virtual environment tests](https://github.com/trycua/cua/blob/main/tests/venv.py) on GitHub. Questions? Come chat with us on Discord!* +_Want to dive deeper? Check out our [sandboxed functions examples](https://github.com/trycua/cua/blob/main/examples/sandboxed_functions_examples.py) and [virtual environment tests](https://github.com/trycua/cua/blob/main/tests/venv.py) on GitHub. Questions? Come chat with us on Discord!_ diff --git a/blog/training-computer-use-models-trajectories-1.md b/blog/training-computer-use-models-trajectories-1.md index b5cafb6f..040eaea4 100644 --- a/blog/training-computer-use-models-trajectories-1.md +++ b/blog/training-computer-use-models-trajectories-1.md @@ -1,6 +1,6 @@ # Training Computer-Use Models: Creating Human Trajectories with Cua -*Published on May 1, 2025 by Dillon DuPont* +_Published on May 1, 2025 by Dillon DuPont_ In our previous posts, we covered [building your own Computer-Use Operator](build-your-own-operator-on-macos-1) and [using the Agent framework](build-your-own-operator-on-macos-2) to simplify development. Today, we'll focus on a critical aspect of improving computer-use agents and models: gathering high-quality demonstration data using Cua's Computer-Use Interface (CUI) and its Gradio UI to create and share human-generated trajectories. @@ -8,10 +8,10 @@ Why is this important? Underlying models used by Computer-use agents need exampl - ## What You'll Learn By the end of this tutorial, you'll be able to: + - Set up the Computer-Use Interface (CUI) with Gradio UI support - Record your own computer interaction trajectories - Organize and tag your demonstrations @@ -19,6 +19,7 @@ By the end of this tutorial, you'll be able to: - Contribute to improving computer-use AI for everyone **Prerequisites:** + - macOS Sonoma (14.0) or later - Python 3.10+ - Basic familiarity with Python and terminal commands @@ -38,6 +39,7 @@ Human trajectories, in the context of Computer-use AI Agents, are recordings of - Time spent on different elements These trajectories serve as examples for AI models to learn from, helping them understand the relationship between: + 1. The visual state of the screen 2. The user's goal or task 3. The most appropriate action to take @@ -59,17 +61,19 @@ By contributing high-quality demonstrations, you're helping to create more capab The Computer-Use Interface includes an optional Gradio UI specifically designed to make recording and sharing demonstrations easy. Let's set it up: 1. **Create a Python environment** (optional but recommended): + ```bash # Using conda conda create -n cua-trajectories python=3.10 conda activate cua-trajectories - + # Using venv python -m venv cua-trajectories source cua-trajectories/bin/activate # On macOS/Linux ``` 2. **Install the CUI package with UI support**: + ```bash pip install "cua-computer[ui]" ``` @@ -145,6 +149,7 @@ Effective tagging and organization make your demonstrations more valuable to res ### Task-Based Tags Describe what the demonstration accomplishes: + - `web-browsing` - `document-editing` - `file-management` @@ -154,6 +159,7 @@ Describe what the demonstration accomplishes: ### Application Tags Identify the applications used: + - `finder` - `safari` - `notes` @@ -163,6 +169,7 @@ Identify the applications used: ### Complexity Tags Indicate the difficulty level: + - `beginner` - `intermediate` - `advanced` @@ -171,6 +178,7 @@ Indicate the difficulty level: ### UI Element Tags Highlight specific UI interactions: + - `drag-and-drop` - `menu-navigation` - `form-filling` @@ -229,11 +237,11 @@ from computer import Computer computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4") try: await computer.run() - + screenshot = await computer.interface.screenshot() with open("screenshot.png", "wb") as f: f.write(screenshot) - + await computer.interface.move_cursor(100, 100) await computer.interface.left_click() await computer.interface.right_click(300, 300) @@ -280,6 +288,7 @@ You can also learn from existing trajectory datasets contributed by the communit ### Summary In this guide, we've covered how to: + - Set up the Computer-Use Interface with Gradio UI - Record high-quality human demonstrations - Organize and tag your trajectories diff --git a/blog/trajectory-viewer.md b/blog/trajectory-viewer.md index 16b78d95..20003708 100644 --- a/blog/trajectory-viewer.md +++ b/blog/trajectory-viewer.md @@ -1,6 +1,6 @@ # Trajectory Viewer for Cua -*Published on May 13, 2025 by Dillon DuPont* +_Published on May 13, 2025 by Dillon DuPont_ Don’t forget to check out [Part 1: Building your own Computer-Use Operator](build-your-own-operator-on-macos-1) and [Part 2: Using the Agent framework](build-your-own-operator-on-macos-2) for setting up your Cua environment and basic tips and tricks! @@ -18,7 +18,7 @@ Think of a trajectory as a detailed video recording of your agent’s journey: - **Observations**: What did the agent see (the exact screen content) at each point in time? - **Actions**: What clicks, keystrokes, or commands did it perform in response? - **Decisions**: Which options did it choose, and why? -Especially for longer and more complex tasks, your agent will make multiple steps, take multiple actions, and make multiple observations. By examining this record, you can pinpoint where things go right, and more importantly, where they go wrong. + Especially for longer and more complex tasks, your agent will make multiple steps, take multiple actions, and make multiple observations. By examining this record, you can pinpoint where things go right, and more importantly, where they go wrong. ## So, what’s Cua’s Trajectory Viewer and why use it? @@ -40,10 +40,6 @@ The viewer allows you to see exactly what your agent observed and how it interac ## Recording a Trajectory -### Using the Gradio UI - -The simplest way to create agent trajectories is through the [Cua Agent Gradio UI](https://www.trycua.com/docs/quickstart-ui) by checking the "Save Trajectory" option. - ### Using the ComputerAgent API Trajectories are saved by default when using the ComputerAgent API: diff --git a/blog/ubuntu-docker-support.md b/blog/ubuntu-docker-support.md index bd3c4a4d..69b6b09e 100644 --- a/blog/ubuntu-docker-support.md +++ b/blog/ubuntu-docker-support.md @@ -1,6 +1,6 @@ # Ubuntu Docker Support in Cua with Kasm -*Published Aug 26, 2025 by Francesco Bonacci* +_Published Aug 26, 2025 by Francesco Bonacci_ Today we’re shipping **Ubuntu Docker support** in Cua. You get a full Linux desktop inside a Docker container, viewable right in your browser—no VM spin-up, no extra clients. It behaves the same on macOS, Windows, and Linux. @@ -16,17 +16,17 @@ We wanted something lightweight, isolated, and identical across machines. So we Short answer: **portability, startup time, and ops friction.** -* **Runs everywhere, no hypervisor drama.** KVM needs Linux; Hyper-V/Virtualization.Framework setups vary by host and policy. Docker is ubiquitous across macOS/Windows/Linux and allowed in most CI runners—so your GUI env actually runs where your team works. -* **Faster boot & smaller footprints.** Containers cold-start in seconds and images are GB-scale; VMs tend to be minutes and tens of GB. That matters for parallel agents, CI, and local iteration. -* **Lower ops overhead.** No nested virt, kernel modules, or privileged host tweaks that many orgs (and cloud runners) block. Pull → run → browser. -* **Same image, everywhere.** One Docker image gives you an identical desktop on every dev laptop and in CI. -* **Web-first access out of the box.** KasmVNC serves the desktop over HTTP—no extra VNC/RDP clients or SPICE config. +- **Runs everywhere, no hypervisor drama.** KVM needs Linux; Hyper-V/Virtualization.Framework setups vary by host and policy. Docker is ubiquitous across macOS/Windows/Linux and allowed in most CI runners—so your GUI env actually runs where your team works. +- **Faster boot & smaller footprints.** Containers cold-start in seconds and images are GB-scale; VMs tend to be minutes and tens of GB. That matters for parallel agents, CI, and local iteration. +- **Lower ops overhead.** No nested virt, kernel modules, or privileged host tweaks that many orgs (and cloud runners) block. Pull → run → browser. +- **Same image, everywhere.** One Docker image gives you an identical desktop on every dev laptop and in CI. +- **Web-first access out of the box.** KasmVNC serves the desktop over HTTP—no extra VNC/RDP clients or SPICE config. -**When we *do* reach for QEMU/KVM:** +**When we _do_ reach for QEMU/KVM:** -* You need **true OS isolation** or to run **non-Linux** guests. -* You want **kernel-level features** or **device/GPU passthrough** (VFIO). -* You’re optimizing for **hardware realism** over startup speed and density. +- You need **true OS isolation** or to run **non-Linux** guests. +- You want **kernel-level features** or **device/GPU passthrough** (VFIO). +- You’re optimizing for **hardware realism** over startup speed and density. For this release, the goal was a **cross-platform Linux desktop that feels instant and identical** across local dev and CI. Containers + KasmVNC hit that sweet spot. @@ -174,10 +174,10 @@ await computer.run() ## Links -* **Docker Provider Docs:** [https://docs.trycua.com/computers/docker](https://docs.trycua.com/computers/docker) -* **KasmVNC:** [https://github.com/kasmtech/KasmVNC](https://github.com/kasmtech/KasmVNC) -* **Container Source:** [https://github.com/trycua/cua/tree/main/libs/kasm](https://github.com/trycua/cua/tree/main/libs/kasm) -* **Computer SDK:** [https://docs.trycua.com/docs/computer-sdk/computers](https://docs.trycua.com/docs/computer-sdk/computers) -* **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai) +- **Docker Provider Docs:** [https://docs.trycua.com/computers/docker](https://docs.trycua.com/computers/docker) +- **KasmVNC:** [https://github.com/kasmtech/KasmVNC](https://github.com/kasmtech/KasmVNC) +- **Container Source:** [https://github.com/trycua/cua/tree/main/libs/kasm](https://github.com/trycua/cua/tree/main/libs/kasm) +- **Computer SDK:** [https://docs.trycua.com/docs/computer-sdk/computers](https://docs.trycua.com/docs/computer-sdk/computers) +- **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai) -Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build. \ No newline at end of file +Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build. diff --git a/blog/windows-sandbox.md b/blog/windows-sandbox.md index f8926804..ef577611 100644 --- a/blog/windows-sandbox.md +++ b/blog/windows-sandbox.md @@ -1,10 +1,10 @@ # Your Windows PC is Already the Perfect Development Environment for Computer-Use Agents -*Published on June 18, 2025 by Dillon DuPont* +_Published on June 18, 2025 by Dillon DuPont_ -Over the last few months, our enterprise users kept asking the same type of question: *"When are you adding support for AutoCAD?"* *"What about SAP integration?"* *"Can you automate our MES system?"* - each request was for different enterprise applications we'd never heard of. +Over the last few months, our enterprise users kept asking the same type of question: _"When are you adding support for AutoCAD?"_ _"What about SAP integration?"_ _"Can you automate our MES system?"_ - each request was for different enterprise applications we'd never heard of. -At first, we deflected. We've been building Cua to work across different environments - from [Lume for macOS VMs](./lume-to-containerization) to cloud containers. But these requests kept piling up. AutoCAD automation. SAP integration. Specialized manufacturing systems. +At first, we deflected. We've been building Cua to work across different environments - from [Lume for macOS VMs](./lume-to-containerization) to cloud containers. But these requests kept piling up. AutoCAD automation. SAP integration. Specialized manufacturing systems. Then it hit us: **they all ran exclusively on Windows**. @@ -80,6 +80,7 @@ python -m agent.ui ``` **What you get**: + - Visual interface in your browser - Real-time agent action viewing - Natural language task instructions @@ -101,21 +102,21 @@ async def test_windows_agent(): os_type="windows", memory="4GB", ) - + # Start the VM (~35s) await computer.run() - + # Create agent with your preferred model agent = ComputerAgent( model="openai/computer-use-preview", save_trajectory=True, tools=[computer] ) - + # Give it a task async for result in agent.run("Open Calculator and compute 15% tip on $47.50"): print(f"Agent action: {result}") - + # Shutdown the VM await computer.stop() @@ -123,6 +124,7 @@ asyncio.run(test_windows_agent()) ``` **What you get**: + - Full programmatic control - Custom agent workflows - Integration with your existing code @@ -141,6 +143,7 @@ asyncio.run(test_windows_agent()) Let's see how different testing approaches stack up: ### Windows Sandbox + Cua + - **Perfect for**: Quick testing and development - **Cost**: Free (built into Windows) - **Setup time**: Under 5 minutes @@ -149,6 +152,7 @@ Let's see how different testing approaches stack up: - **Requires**: Windows 10/11 with 4GB+ RAM ### Traditional VMs + - **Perfect for**: Complex testing scenarios - **Full customization**: Any Windows version - **Heavy resource usage**: Slow to start/stop @@ -160,6 +164,7 @@ Let's see how different testing approaches stack up: Here's what our enterprise users are building with Windows Sandbox: ### CAD and Engineering Automation + ```python # Example: AutoCAD drawing automation task = """ @@ -172,6 +177,7 @@ task = """ ``` ### Manufacturing and ERP Integration + ```python # Example: SAP workflow automation task = """ @@ -184,6 +190,7 @@ task = """ ``` ### Financial Software Automation + ```python # Example: Trading platform automation task = """ @@ -196,6 +203,7 @@ task = """ ``` ### Legacy Windows Application Integration + ```python # Example: Custom Windows application automation task = """ @@ -210,12 +218,14 @@ task = """ ## System Requirements and Performance ### What You Need + - **Windows 10/11**: Any edition that supports Windows Sandbox - **Memory**: 4GB minimum (8GB recommended for CAD/professional software) - **CPU**: Virtualization support (enabled by default on modern systems) - **Storage**: A few GB free space ### Performance Tips + - **Close unnecessary applications** before starting Windows Sandbox - **Allocate appropriate memory** based on your RPA workflow complexity - **Use SSD storage** for faster sandbox startup @@ -234,4 +244,4 @@ But for development, prototyping, and learning Windows RPA workflows, **Windows --- -*Ready to see AI agents control your Windows applications? Come share your testing experiences on Discord!* +_Ready to see AI agents control your Windows applications? Come share your testing experiences on Discord!_ diff --git a/docs/.env.example b/docs/.env.example new file mode 100644 index 00000000..01693726 --- /dev/null +++ b/docs/.env.example @@ -0,0 +1,2 @@ +NEXT_PUBLIC_POSTHOG_API_KEY= +NEXT_PUBLIC_POSTHOG_HOST= \ No newline at end of file diff --git a/docs/.prettierrc b/docs/.prettierrc deleted file mode 100644 index 03af2a8b..00000000 --- a/docs/.prettierrc +++ /dev/null @@ -1,8 +0,0 @@ -{ - "useTabs": false, - "semi": true, - "singleQuote": true, - "trailingComma": "es5", - "bracketSpacing": true, - "jsxBracketSameLine": true -} \ No newline at end of file diff --git a/docs/README.md b/docs/README.md index b6f494e5..18ddff9a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -34,6 +34,14 @@ A `source.config.ts` config file has been included, you can customise different Read the [Introduction](https://fumadocs.dev/docs/mdx) for further details. +## Setup Telemetry + +We use PostHog for telemetry to improve the clarity and structure of our documentation. Start by copying the `.env.example` and adding in your PostHog API key and host. + +```bash +cp .env.example .env +``` + ## Learn More To learn more about Next.js and Fumadocs, take a look at the following diff --git a/docs/content/docs/agent-sdk/agent-loops.mdx b/docs/content/docs/agent-sdk/agent-loops.mdx index 33bf66e2..08dcf07b 100644 --- a/docs/content/docs/agent-sdk/agent-loops.mdx +++ b/docs/content/docs/agent-sdk/agent-loops.mdx @@ -15,20 +15,34 @@ To run an agent loop simply do: ```python from agent import ComputerAgent +import asyncio from computer import Computer -computer = Computer() # Connect to a cua container -agent = ComputerAgent( - model="anthropic/claude-3-5-sonnet-20241022", - tools=[computer] -) +async def take_screenshot(): + async with Computer( + os_type="linux", + provider_type="cloud", + name="your-sandbox-name", + api_key="your-api-key" + ) as computer: -prompt = "Take a screenshot and tell me what you see" + agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20241022", + tools=[computer], + max_trajectory_budget=5.0 + ) -async for result in agent.run(prompt): - if result["output"][-1]["type"] == "message": - print("Agent:", result["output"][-1]["content"][0]["text"]) + messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}] + + async for result in agent.run(messages): + for item in result["output"]: + if item["type"] == "message": + print(item["content"][0]["text"]) + + +if __name__ == "__main__": + asyncio.run(take_screenshot()) ``` For a list of supported models and configurations, see the [Supported Agents](./supported-agents/computer-use-agents) page. @@ -122,6 +136,8 @@ The `ComputerAgent` constructor provides a wide range of options for customizing Enables prompt caching for repeated prompts (mainly for Anthropic models). - `max_trajectory_budget` (`float` | `dict`): If set (float or dict), adds a budget manager callback that tracks usage costs and stops execution if the budget is exceeded. Dict allows advanced options (e.g., `{ "max_budget": 5.0, "raise_error": True }`). +- `instructions` (`str` | `list[str]`): + System instructions for the agent. Can be a single string or multiple strings in a tuple/list for readability; they are concatenated into one system prompt. - `**kwargs` (`any`): Any additional keyword arguments are passed through to the agent loop or model provider. @@ -142,7 +158,11 @@ agent = ComputerAgent( max_retries=5, screenshot_delay=1.0, use_prompt_caching=True, - max_trajectory_budget={"max_budget": 5.0, "raise_error": True} + max_trajectory_budget={"max_budget": 5.0, "raise_error": True}, + instructions=( + "You are a helpful computer-using agent" + "Output computer calls until you complete the given task" + ) ) ``` @@ -170,4 +190,4 @@ except BudgetExceededException: print("Budget limit exceeded") except Exception as e: print(f"Agent error: {e}") -``` \ No newline at end of file +``` diff --git a/docs/content/docs/agent-sdk/benchmarks/introduction.mdx b/docs/content/docs/agent-sdk/benchmarks/introduction.mdx index 3f2251f8..7f15b6a8 100644 --- a/docs/content/docs/agent-sdk/benchmarks/introduction.mdx +++ b/docs/content/docs/agent-sdk/benchmarks/introduction.mdx @@ -1,9 +1,9 @@ --- title: Introduction -description: Overview of benchmarking in the c/ua agent framework +description: Overview of benchmarking in the Cua agent framework --- -The c/ua agent framework uses benchmarks to test the performance of supported models and providers at various agentic tasks. +The Cua agent framework uses benchmarks to test the performance of supported models and providers at various agentic tasks. ## Benchmark Types @@ -13,7 +13,7 @@ Computer-Agent benchmarks evaluate two key capabilities: ## Using State-of-the-Art Models -Let's see how to use the SOTA vision-language models in the c/ua agent framework. +Let's see how to use the SOTA vision-language models in the Cua agent framework. ### Plan Generation + Coordinate Generation @@ -46,7 +46,7 @@ agent.predict_click("find the button to open the settings") # (27, 450) ### Composed Agent -The c/ua agent framework also supports composed agents, which combine a planning model with a clicking model for the best of both worlds. Any liteLLM model can be used as the plan generation model. +The Cua agent framework also supports composed agents, which combine a planning model with a clicking model for the best of both worlds. Any liteLLM model can be used as the plan generation model. ```python # It can be paired with any LLM to form a composed agent: diff --git a/docs/content/docs/agent-sdk/benchmarks/meta.json b/docs/content/docs/agent-sdk/benchmarks/meta.json index 3573a892..8067bb01 100644 --- a/docs/content/docs/agent-sdk/benchmarks/meta.json +++ b/docs/content/docs/agent-sdk/benchmarks/meta.json @@ -1,9 +1,3 @@ { - "pages": [ - "introduction", - "screenspot-v2", - "screenspot-pro", - "interactive", - "osworld-verified" - ] -} \ No newline at end of file + "pages": ["introduction", "screenspot-v2", "screenspot-pro", "interactive", "osworld-verified"] +} diff --git a/docs/content/docs/agent-sdk/callbacks/meta.json b/docs/content/docs/agent-sdk/callbacks/meta.json index c9a072b0..b1b7085a 100644 --- a/docs/content/docs/agent-sdk/callbacks/meta.json +++ b/docs/content/docs/agent-sdk/callbacks/meta.json @@ -1,11 +1,5 @@ { - "title": "Callbacks", - "description": "Extending agents with callback hooks and built-in handlers", - "pages": [ - "agent-lifecycle", - "trajectories", - "logging", - "cost-saving", - "pii-anonymization" - ] + "title": "Callbacks", + "description": "Extending agents with callback hooks and built-in handlers", + "pages": ["agent-lifecycle", "trajectories", "logging", "cost-saving", "pii-anonymization"] } diff --git a/docs/content/docs/agent-sdk/meta.json b/docs/content/docs/agent-sdk/meta.json index 1083fc25..b2b75fa3 100644 --- a/docs/content/docs/agent-sdk/meta.json +++ b/docs/content/docs/agent-sdk/meta.json @@ -1,20 +1,20 @@ { - "title": "Agent SDK", - "description": "Build computer-using agents with the Agent SDK", - "pages": [ - "agent-loops", - "supported-agents", - "supported-model-providers", - "chat-history", - "message-format", - "customizing-computeragent", - "callbacks", - "custom-tools", - "custom-computer-handlers", - "prompt-caching", - "usage-tracking", - "benchmarks", - "migration-guide", - "integrations" - ] + "title": "Agent SDK", + "description": "Build computer-using agents with the Agent SDK", + "pages": [ + "agent-loops", + "supported-agents", + "supported-model-providers", + "chat-history", + "message-format", + "customizing-computeragent", + "callbacks", + "custom-tools", + "custom-computer-handlers", + "prompt-caching", + "usage-tracking", + "benchmarks", + "migration-guide", + "integrations" + ] } diff --git a/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx b/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx index 485074e2..593ca84b 100644 --- a/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx +++ b/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx @@ -23,6 +23,7 @@ Any model that supports `predict_click()` can be used as the grounding component - InternVL 3.5 family: `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}` - UI‑TARS 1.5: `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` (also supports full CU) - OmniParser (OCR): `omniparser` (requires combination with a LiteLLM vision model) +- Moondream3: `moondream3` (requires combination with a LiteLLM vision/text model) ## Supported Planning Models @@ -83,6 +84,23 @@ async for _ in agent.run("Help me fill out this form with my personal informatio pass ``` +### Moondream3 + GPT-4o + +Use the built-in Moondream3 grounding with any planning model. Moondream3 will detect UI elements on the latest screenshot, label them, and provide a user message listing detected element names. + +```python +from agent import ComputerAgent +from computer import computer + +agent = ComputerAgent( + "moondream3+openai/gpt-4o", + tools=[computer] +) + +async for _ in agent.run("Close the settings window, then open the Downloads folder"): + pass +``` + ## Benefits of Composed Agents - **Specialized Grounding**: Use models optimized for click prediction accuracy diff --git a/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx b/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx index b2487a7c..a3384b21 100644 --- a/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx +++ b/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx @@ -7,12 +7,25 @@ These models support complete computer-use agent functionality through `Computer All agent loops are compatible with any LLM provider supported by LiteLLM. -See [Running Models Locally](../local-models) for how to use Hugging Face and MLX models on your own machine. +See [Running Models Locally](/agent-sdk/supported-model-providers/local-models) for how to use Hugging Face and MLX models on your own machine. + +## Gemini CUA + +Gemini models with computer-use capabilities: + +- Gemini 2.5 CUA: `gemini-2.5-computer-use-preview-10-2025` + +```python +agent = ComputerAgent("gemini-2.5-computer-use-preview-10-2025", tools=[computer]) +async for _ in agent.run("Open Firefox and navigate to github.com"): + pass +``` ## Anthropic CUAs Claude models with computer-use capabilities: +- Claude 4.5: `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001` - Claude 4.1: `claude-opus-4-1-20250805` - Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514` - Claude 3.7: `claude-3-7-sonnet-20250219` @@ -60,6 +73,17 @@ async for _ in agent.run("Open Firefox and navigate to github.com"): pass ``` +## Qwen3 VL + +Qwen3 VL family: +- `openrouter/qwen/qwen3-vl-235b-a22b-instruct` + +```python +agent = ComputerAgent("openrouter/qwen/qwen3-vl-235b-a22b-instruct", tools=[computer]) +async for _ in agent.run("Open Firefox and navigate to github.com"): + pass +``` + ## UI-TARS 1.5 Unified vision-language model for computer-use: diff --git a/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx b/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx index 9270f183..20e95ddb 100644 --- a/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx +++ b/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx @@ -45,6 +45,12 @@ OCR-focused set-of-marks model that requires an LLM for click prediction: - `omniparser` (requires combination with any LiteLLM vision model) +### Moondream3 (Local Grounding) + +Moondream3 is a powerful small model that can perform UI grounding and click prediction. + +- `moondream3` + ## Usage Examples ```python diff --git a/docs/content/docs/agent-sdk/supported-agents/meta.json b/docs/content/docs/agent-sdk/supported-agents/meta.json index 5d50b124..e812c4fb 100644 --- a/docs/content/docs/agent-sdk/supported-agents/meta.json +++ b/docs/content/docs/agent-sdk/supported-agents/meta.json @@ -1,10 +1,5 @@ { - "title": "Supported Agents", - "description": "Models and configurations supported by the Agent SDK", - "pages": [ - "computer-use-agents", - "grounding-models", - "composed-agents", - "human-in-the-loop" - ] + "title": "Supported Agents", + "description": "Models and configurations supported by the Agent SDK", + "pages": ["computer-use-agents", "grounding-models", "composed-agents", "human-in-the-loop"] } diff --git a/docs/content/docs/agent-sdk/supported-model-providers/local-models.mdx b/docs/content/docs/agent-sdk/supported-model-providers/local-models.mdx index 0ba9d1e6..b147a538 100644 --- a/docs/content/docs/agent-sdk/supported-model-providers/local-models.mdx +++ b/docs/content/docs/agent-sdk/supported-model-providers/local-models.mdx @@ -2,7 +2,7 @@ title: Running Models Locally --- -You can run open-source LLMs and vision models on your own machine using cua, without relying on cloud APIs. This is ideal for development, privacy, or running on air-gapped systems. +You can run open-source LLMs and vision models on your own machine using Cua, without relying on cloud APIs. This is ideal for development, privacy, or running on air-gapped systems. ## Hugging Face (transformers) diff --git a/docs/content/docs/computer-sdk/cloud-vm-management.mdx b/docs/content/docs/computer-sdk/cloud-vm-management.mdx new file mode 100644 index 00000000..2c8f09db --- /dev/null +++ b/docs/content/docs/computer-sdk/cloud-vm-management.mdx @@ -0,0 +1,260 @@ +--- +title: Cloud VM Management +description: Manage your Cua Cloud sandboxes (VMs) via Python SDK or HTTP API +--- + +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + + +Using the Cua Cloud API, you can manage your Cua Cloud sandboxes (VMs) with Python or HTTP (curl). + +All examples require a CUA API key. You can obtain one from the [Dashboard](https://www.cua.ai/dashboard/keys). + +--- + +## List VMs + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + # Optional: point to a different API base + # os.environ["CUA_API_BASE"] = "https://api.cua.ai" + + provider = CloudProvider(api_key=api_key, verbose=False) + async with provider: + vms = await provider.list_vms() + for vm in vms: + print({ + "name": vm["name"], + "status": vm["status"], + "api_url": vm.get("api_url"), + "vnc_url": vm.get("vnc_url"), + }) + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms" + ``` + + Responses: + - 200: Array of minimal VM objects with fields `{ name, password, status }` + - 401: Unauthorized (missing/invalid API key) + + ```json + [ + { + "name": "s-windows-x4snp46ebf", + "password": "49b8daa3", + "status": "running" + } + ] + ``` + + Status values: + + - `pending`: VM deployment in progress + - `running`: VM is active and accessible + - `stopped`: VM is stopped but not terminated + - `terminated`: VM has been permanently destroyed + - `failed`: VM deployment or operation failed + + + + +--- + +## Start a VM +Provide the VM name you want to start. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" # e.g., "m-linux-96lcxd2c2k" + + provider = CloudProvider(api_key=api_key) + async with provider: + resp = await provider.run_vm(name) + print(resp) # { "name": name, "status": "starting" } + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -X POST \ + -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms/my-vm-name/start" -i + ``` + + Responses: + - 204: No Content (start accepted) + - 401: Unauthorized (missing/invalid API key) + - 404: VM not found or not owned by the user + + ```text + HTTP/1.1 204 No Content + ``` + + + + +--- + +## Stop a VM +Stops the VM asynchronously. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" + + provider = CloudProvider(api_key=api_key) + async with provider: + resp = await provider.stop_vm(name) + print(resp) # { "name": name, "status": "stopping" } + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -X POST \ + -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms/my-vm-name/stop" + ``` + + Responses: + - 202: Accepted with `{ "status": "stopping" }` + - 401: Unauthorized (missing/invalid API key) + - 404: VM not found or not owned by the user + + ```json + { "status": "stopping" } + ``` + + + + +--- + +## Restart a VM +Restarts the VM asynchronously. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" + + provider = CloudProvider(api_key=api_key) + async with provider: + resp = await provider.restart_vm(name) + print(resp) # { "name": name, "status": "restarting" } + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -X POST \ + -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms/my-vm-name/restart" + ``` + + Responses: + - 202: Accepted with `{ "status": "restarting" }` + - 401: Unauthorized (missing/invalid API key) + - 404: VM not found or not owned by the user + + ```json + { "status": "restarting" } + ``` + + + + +--- + +## Query a VM by name +Query the computer-server running on the VM. Useful for checking details like status or OS type. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" + + provider = CloudProvider(api_key=api_key) + async with provider: + info = await provider.get_vm(name) + print(info) + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl "https://my-vm-name.containers.cloud.cua.ai:8443/status" + ``` + + Responses: + - 200: Server available + + ```json + { "status": "ok", "os_type": "linux", "features": ["agent"] } + ``` + + + diff --git a/docs/content/docs/computer-sdk/computers.mdx b/docs/content/docs/computer-sdk/computers.mdx index 0b11d20d..d666bd99 100644 --- a/docs/content/docs/computer-sdk/computers.mdx +++ b/docs/content/docs/computer-sdk/computers.mdx @@ -1,6 +1,6 @@ --- title: Cua Computers -description: Understanding cua computer types and connection methods +description: Understanding Cua computer types and connection methods --- A corresponding Jupyter Notebook and NodeJS project are available for this documentation. @@ -9,9 +9,11 @@ Before we can automate apps using AI, we need to first connect to a Computer Ser Cua Computers are preconfigured virtual machines running the Computer Server. They can be either macOS, Linux, or Windows. They're found in either a cloud-native container, or on your host desktop. -## cua cloud container +## Cloud Sandbox -This is a cloud container running the Computer Server. This is the easiest & safest way to get a cua computer, and can be done by going on the trycua.com website. +**Easiest & safest way to get started - works on any host OS** + +This is a Cloud Sandbox running the Computer Server. Get a container at [trycua.com](https://www.trycua.com/). @@ -21,11 +23,11 @@ This is a cloud container running the Computer Server. This is the easiest & saf computer = Computer( os_type="linux", provider_type="cloud", - name="your-container-name", + name="your-sandbox-name", api_key="your-api-key" ) - await computer.run() # Connect to the container + await computer.run() # Connect to the sandbox ``` @@ -35,33 +37,33 @@ This is a cloud container running the Computer Server. This is the easiest & saf const computer = new Computer({ osType: OSType.LINUX, - name: "your-container-name", + name: "your-sandbox-name", apiKey: "your-api-key" }); - await computer.run(); // Connect to the container + await computer.run(); // Connect to the sandbox ``` -## cua local containers +## Linux on Docker -cua provides local containers using different providers depending on your host operating system: +**Run Linux desktop locally on macOS, Windows, or Linux hosts** - - - - 1. Install lume cli +Cua provides two Docker images for running Linux desktops: + + + + + **Recommended for most use cases** - lightweight XFCE desktop with Firefox + + 1. Install Docker Desktop or Docker Engine + + 2. Pull the CUA XFCE image ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" - ``` - - 2. Start a local cua container - - ```bash - lume run macos-sequoia-cua:latest + docker pull --platform=linux/amd64 trycua/cua-xfce:latest ``` 3. Connect with Computer @@ -70,44 +72,23 @@ cua provides local containers using different providers depending on your host o from computer import Computer computer = Computer( - os_type="macos", - provider_type="lume", - name="macos-sequoia-cua:latest" + os_type="linux", + provider_type="docker", + image="trycua/cua-xfce:latest", + name="my-xfce-container" ) - await computer.run() # Launch & connect to the container + await computer.run() # Launch & connect to Docker sandbox ``` - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) - 2. Install pywinsandbox dependency + - ```bash - pip install -U git+git://github.com/karkason/pywinsandbox.git - ``` + **Full-featured Ubuntu desktop** with additional applications - 3. Windows Sandbox will be automatically configured when you run the CLI - - ```python - from computer import Computer - - computer = Computer( - os_type="windows", - provider_type="winsandbox", - ephemeral=True # Windows Sandbox is always ephemeral - ) - - await computer.run() # Launch & connect to Windows Sandbox - ``` - - - - 1. Install Docker Desktop or Docker Engine - 2. Build or pull the CUA Ubuntu container + 2. Build or pull the CUA KASM image ```bash # Option 1: Pull from Docker Hub @@ -127,15 +108,70 @@ cua provides local containers using different providers depending on your host o os_type="linux", provider_type="docker", image="trycua/cua-ubuntu:latest", - name="my-cua-container" + name="my-kasm-container" ) - await computer.run() # Launch & connect to Docker container + await computer.run() # Launch & connect to Docker sandbox ``` +## Windows Sandbox + +**Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11** + +1. Enable Windows Sandbox +2. Install pywinsandbox dependency + +```bash +pip install -U git+git://github.com/karkason/pywinsandbox.git +``` + +3. Connect with Computer + +```python +from computer import Computer + +computer = Computer( + os_type="windows", + provider_type="winsandbox", + ephemeral=True # Windows Sandbox is always ephemeral +) + +await computer.run() # Launch & connect to Windows Sandbox +``` + +## macOS VM + +**macOS hosts only - requires Lume CLI** + +1. Install lume cli + +```bash +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" +``` + +2. Start a local Cua macOS VM + +```bash +lume run macos-sequoia-cua:latest +``` + +3. Connect with Computer + +```python +from computer import Computer + +computer = Computer( + os_type="macos", + provider_type="lume", + name="macos-sequoia-cua:latest" +) + +await computer.run() # Launch & connect to the sandbox +``` + ## Your host desktop You can also have agents control your desktop directly by running Computer Server without any containerization layer. Beware that AI models may perform risky actions. diff --git a/docs/content/docs/computer-sdk/meta.json b/docs/content/docs/computer-sdk/meta.json index 92e14612..ab69fa17 100644 --- a/docs/content/docs/computer-sdk/meta.json +++ b/docs/content/docs/computer-sdk/meta.json @@ -1,10 +1,5 @@ { - "title": "Computer SDK", - "description": "Build computer-using agents with the Computer SDK", - "pages": [ - "computers", - "commands", - "computer-ui", - "sandboxed-python" - ] + "title": "Computer SDK", + "description": "Build computer-using agents with the Computer SDK", + "pages": ["computers", "cloud-vm-management", "commands", "computer-ui", "sandboxed-python"] } diff --git a/docs/content/docs/example-usecases/form-filling.mdx b/docs/content/docs/example-usecases/form-filling.mdx new file mode 100644 index 00000000..e819502e --- /dev/null +++ b/docs/content/docs/example-usecases/form-filling.mdx @@ -0,0 +1,491 @@ +--- +title: Form Filling +description: Enhance and Automate Interactions Between Form Filling and Local File Systems +--- + +import { EditableCodeBlock, EditableValue, S } from '@/components/editable-code-block'; +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +## Overview + +Cua can be used to automate interactions between form filling and local file systems over any operating system. Cua let's you interact with all the elements of a web page and local file systems to integrate between the two. + +This preset usecase uses [Cua Computer](/computer-sdk/computers) to interact with a web page and local file systems along with [Agent Loops](/agent-sdk/agent-loops) to run the agent in a loop with message history. + +## Quickstart + +Create a `requirements.txt` file with the following dependencies: +```text +cua-agent +cua-computer +python-dotenv>=1.0.0 +``` + +And install: + +```bash +pip install -r requirements.txt +``` + +Create a `.env` file with the following environment variables: + +```text +ANTHROPIC_API_KEY=your-api-key +CUA_API_KEY=sk_cua-api01... +``` + +Select the environment you want to run the code in (*click on the underlined values in the code to edit them directly!*): + + + + + +{`import asyncio +import logging +import os +import signal +import traceback + +from agent import ComputerAgent +from computer import Computer, VMProviderType +from dotenv import load_dotenv + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def handle_sigint(sig, frame): + print("\\n\\nExecution interrupted by user. Exiting gracefully...") + exit(0) + + +async def fill_application(): + try: + async with Computer( + os_type="linux", + provider_type=VMProviderType.CLOUD, + name="`}{`", + api_key="`}{`", + verbosity=logging.INFO, + ) as computer: + + agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20241022", + tools=[computer], + only_n_most_recent_images=3, + verbosity=logging.INFO, + trajectory_dir="trajectories", + use_prompt_caching=True, + max_trajectory_budget=5.0, + ) + + tasks = [ + "Visit https://www.overleaf.com/latex/templates/jakes-resume/syzfjbzwjncs.pdf and download the pdf.", + "Visit https://form.jotform.com/252881246782264 and fill the form from the information in the pdf." + ] + + history = [] + + for i, task in enumerate(tasks, 1): + print(f"\\n[Task {i}/{len(tasks)}] {task}") + + # Add user message to history + history.append({"role": "user", "content": task}) + + # Run agent with conversation history + async for result in agent.run(history, stream=False): + history += result.get("output", []) + + # Print output for debugging + for item in result.get("output", []): + if item.get("type") == "message": + content = item.get("content", []) + for content_part in content: + if content_part.get("text"): + logger.info(f"Agent: {content_part.get('text')}") + elif item.get("type") == "computer_call": + action = item.get("action", {}) + action_type = action.get("type", "") + logger.debug(f"Computer Action: {action_type}") + + print(f"✅ Task {i}/{len(tasks)} completed") + + print("\\n🎉 All tasks completed successfully!") + + except Exception as e: + logger.error(f"Error in fill_application: {e}") + traceback.print_exc() + raise + + +def main(): + try: + load_dotenv() + + if "ANTHROPIC_API_KEY" not in os.environ: + raise RuntimeError( + "Please set the ANTHROPIC_API_KEY environment variable.\\n" + "You can add it to a .env file in the project root." + ) + + if "CUA_API_KEY" not in os.environ: + raise RuntimeError( + "Please set the CUA_API_KEY environment variable.\\n" + "You can add it to a .env file in the project root." + ) + + signal.signal(signal.SIGINT, handle_sigint) + + asyncio.run(fill_application()) + + except Exception as e: + logger.error(f"Error running automation: {e}") + traceback.print_exc() + + +if __name__ == "__main__": + main()`} + + + + + + +{`import asyncio +import logging +import os +import signal +import traceback + +from agent import ComputerAgent +from computer import Computer, VMProviderType +from dotenv import load_dotenv + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def handle_sigint(sig, frame): + print("\\n\\nExecution interrupted by user. Exiting gracefully...") + exit(0) + + +async def fill_application(): + try: + async with Computer( + os_type="macos", + provider_type=VMProviderType.LUME, + name="`}{`", + verbosity=logging.INFO, + ) as computer: + + agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20241022", + tools=[computer], + only_n_most_recent_images=3, + verbosity=logging.INFO, + trajectory_dir="trajectories", + use_prompt_caching=True, + max_trajectory_budget=5.0, + ) + + tasks = [ + "Visit https://www.overleaf.com/latex/templates/jakes-resume/syzfjbzwjncs.pdf and download the pdf.", + "Visit https://form.jotform.com/252881246782264 and fill the form from the information in the pdf." + ] + + history = [] + + for i, task in enumerate(tasks, 1): + print(f"\\n[Task {i}/{len(tasks)}] {task}") + + # Add user message to history + history.append({"role": "user", "content": task}) + + # Run agent with conversation history + async for result in agent.run(history, stream=False): + history += result.get("output", []) + + # Print output for debugging + for item in result.get("output", []): + if item.get("type") == "message": + content = item.get("content", []) + for content_part in content: + if content_part.get("text"): + logger.info(f"Agent: {content_part.get('text')}") + elif item.get("type") == "computer_call": + action = item.get("action", {}) + action_type = action.get("type", "") + logger.debug(f"Computer Action: {action_type}") + + print(f"✅ Task {i}/{len(tasks)} completed") + + print("\\n🎉 All tasks completed successfully!") + + except Exception as e: + logger.error(f"Error in fill_application: {e}") + traceback.print_exc() + raise + + +def main(): + try: + load_dotenv() + + if "ANTHROPIC_API_KEY" not in os.environ: + raise RuntimeError( + "Please set the ANTHROPIC_API_KEY environment variable.\\n" + "You can add it to a .env file in the project root." + ) + + signal.signal(signal.SIGINT, handle_sigint) + + asyncio.run(fill_application()) + + except Exception as e: + logger.error(f"Error running automation: {e}") + traceback.print_exc() + + +if __name__ == "__main__": + main()`} + + + + + + +{`import asyncio +import logging +import os +import signal +import traceback + +from agent import ComputerAgent +from computer import Computer, VMProviderType +from dotenv import load_dotenv + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def handle_sigint(sig, frame): + print("\\n\\nExecution interrupted by user. Exiting gracefully...") + exit(0) + + +async def fill_application(): + try: + async with Computer( + os_type="windows", + provider_type=VMProviderType.WINDOWS_SANDBOX, + verbosity=logging.INFO, + ) as computer: + + agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20241022", + tools=[computer], + only_n_most_recent_images=3, + verbosity=logging.INFO, + trajectory_dir="trajectories", + use_prompt_caching=True, + max_trajectory_budget=5.0, + ) + + tasks = [ + "Visit https://www.overleaf.com/latex/templates/jakes-resume/syzfjbzwjncs.pdf and download the pdf.", + "Visit https://form.jotform.com/252881246782264 and fill the form from the information in the pdf." + ] + + history = [] + + for i, task in enumerate(tasks, 1): + print(f"\\n[Task {i}/{len(tasks)}] {task}") + + # Add user message to history + history.append({"role": "user", "content": task}) + + # Run agent with conversation history + async for result in agent.run(history, stream=False): + history += result.get("output", []) + + # Print output for debugging + for item in result.get("output", []): + if item.get("type") == "message": + content = item.get("content", []) + for content_part in content: + if content_part.get("text"): + logger.info(f"Agent: {content_part.get('text')}") + elif item.get("type") == "computer_call": + action = item.get("action", {}) + action_type = action.get("type", "") + logger.debug(f"Computer Action: {action_type}") + + print(f"✅ Task {i}/{len(tasks)} completed") + + print("\\n🎉 All tasks completed successfully!") + + except Exception as e: + logger.error(f"Error in fill_application: {e}") + traceback.print_exc() + raise + + +def main(): + try: + load_dotenv() + + if "ANTHROPIC_API_KEY" not in os.environ: + raise RuntimeError( + "Please set the ANTHROPIC_API_KEY environment variable.\\n" + "You can add it to a .env file in the project root." + ) + + signal.signal(signal.SIGINT, handle_sigint) + + asyncio.run(fill_application()) + + except Exception as e: + logger.error(f"Error running automation: {e}") + traceback.print_exc() + + +if __name__ == "__main__": + main()`} + + + + + + +{`import asyncio +import logging +import os +import signal +import traceback + +from agent import ComputerAgent +from computer import Computer, VMProviderType +from dotenv import load_dotenv + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def handle_sigint(sig, frame): + print("\\n\\nExecution interrupted by user. Exiting gracefully...") + exit(0) + + +async def fill_application(): + try: + async with Computer( + os_type="linux", + provider_type=VMProviderType.DOCKER, + name="`}{`", + verbosity=logging.INFO, + ) as computer: + + agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20241022", + tools=[computer], + only_n_most_recent_images=3, + verbosity=logging.INFO, + trajectory_dir="trajectories", + use_prompt_caching=True, + max_trajectory_budget=5.0, + ) + + tasks = [ + "Visit https://www.overleaf.com/latex/templates/jakes-resume/syzfjbzwjncs.pdf and download the pdf.", + "Visit https://form.jotform.com/252881246782264 and fill the form from the information in the pdf." + ] + + history = [] + + for i, task in enumerate(tasks, 1): + print(f"\\n[Task {i}/{len(tasks)}] {task}") + + # Add user message to history + history.append({"role": "user", "content": task}) + + # Run agent with conversation history + async for result in agent.run(history, stream=False): + history += result.get("output", []) + + # Print output for debugging + for item in result.get("output", []): + if item.get("type") == "message": + content = item.get("content", []) + for content_part in content: + if content_part.get("text"): + logger.info(f"Agent: {content_part.get('text')}") + elif item.get("type") == "computer_call": + action = item.get("action", {}) + action_type = action.get("type", "") + logger.debug(f"Computer Action: {action_type}") + + print(f"✅ Task {i}/{len(tasks)} completed") + + print("\\n🎉 All tasks completed successfully!") + + except Exception as e: + logger.error(f"Error in fill_application: {e}") + traceback.print_exc() + raise + + +def main(): + try: + load_dotenv() + + if "ANTHROPIC_API_KEY" not in os.environ: + raise RuntimeError( + "Please set the ANTHROPIC_API_KEY environment variable.\\n" + "You can add it to a .env file in the project root." + ) + + signal.signal(signal.SIGINT, handle_sigint) + + asyncio.run(fill_application()) + + except Exception as e: + logger.error(f"Error running automation: {e}") + traceback.print_exc() + + +if __name__ == "__main__": + main()`} + + + + + +## Next Steps + +- Learn more about [Cua computers](/computer-sdk/computers) and [computer commands](/computer-sdk/commands) +- Read about [Agent loops](/agent-sdk/agent-loops), [tools](/agent-sdk/custom-tools), and [supported model providers](/agent-sdk/supported-model-providers/) +- Experiment with different [Models and Providers](/agent-sdk/supported-model-providers/) diff --git a/docs/content/docs/example-usecases/meta.json b/docs/content/docs/example-usecases/meta.json new file mode 100644 index 00000000..60bba1c3 --- /dev/null +++ b/docs/content/docs/example-usecases/meta.json @@ -0,0 +1,5 @@ +{ + "title": "Example Use Cases", + "description": "Real-world examples of building with Cua", + "pages": ["form-filling"] +} diff --git a/docs/content/docs/index.mdx b/docs/content/docs/index.mdx index 22d5986e..9e4bf2ff 100644 --- a/docs/content/docs/index.mdx +++ b/docs/content/docs/index.mdx @@ -12,18 +12,13 @@ Cua is a framework for automating Windows, Mac, and Linux apps powered by comput Cua makes every stage of computer-using agent development simple: - **Development**: Use any LLM provider with liteLLM. The agent SDK makes multiple agent loop providers, trajectory tracing, caching, and budget management easy -- **Containerization**: cua offers Docker containers pre-installed with everything needed for AI-powered RPA -- **Deployment**: cua cloud gives you a production-ready cloud environment for your assistants +- **Containerization**: Cua offers Docker containers pre-installed with everything needed for AI-powered RPA +- **Deployment**: Cua cloud gives you a production-ready cloud environment for your assistants
- } href="/quickstart-ui" title="Quickstart (UI)"> - Try the cua Agent UI in your browser—no coding required. - - } href="/quickstart-devs" title="Quickstart (Developers)"> + } href="/quickstart-devs" title="Quickstart (Developers)"> Build with Python—full SDK and agent code examples. -
-
} href="/libraries/agent" title="API Reference"> Explore the agent SDK and APIs diff --git a/docs/content/docs/libraries/lume/faq.md b/docs/content/docs/libraries/lume/faq.md index 98d6b766..4009a94e 100644 --- a/docs/content/docs/libraries/lume/faq.md +++ b/docs/content/docs/libraries/lume/faq.md @@ -17,10 +17,12 @@ Lume follows the XDG Base Directory specification for the configuration file: - Configuration is stored in `$XDG_CONFIG_HOME/lume/config.yaml` (defaults to `~/.config/lume/config.yaml`) By default, other data is stored in: + - VM data: `~/.lume` - Cache files: `~/.lume/cache` The config file contains settings for: + - VM storage locations and the default location - Cache directory location - Whether caching is enabled @@ -88,6 +90,7 @@ lume delete ### How to Install macOS from an IPSW Image #### Create a new macOS VM using the latest supported IPSW image: + Run the following command to create a new macOS virtual machine using the latest available IPSW image: ```bash @@ -95,6 +98,7 @@ lume create --os macos --ipsw latest ``` #### Create a new macOS VM using a specific IPSW image: + To create a macOS virtual machine from an older or specific IPSW file, first download the desired IPSW (UniversalMac) from a trusted source. Then, use the downloaded IPSW path: diff --git a/docs/content/docs/libraries/lume/meta.json b/docs/content/docs/libraries/lume/meta.json index 5f4d907a..42aeac1d 100644 --- a/docs/content/docs/libraries/lume/meta.json +++ b/docs/content/docs/libraries/lume/meta.json @@ -1,9 +1,3 @@ { - "pages": [ - "installation", - "prebuilt-images", - "cli-reference", - "http-api", - "faq" - ] + "pages": ["installation", "prebuilt-images", "cli-reference", "http-api", "faq"] } diff --git a/docs/content/docs/libraries/lumier/docker.mdx b/docs/content/docs/libraries/lumier/docker.mdx index 4ecd15d6..a14d0599 100644 --- a/docs/content/docs/libraries/lumier/docker.mdx +++ b/docs/content/docs/libraries/lumier/docker.mdx @@ -16,7 +16,7 @@ docker run -it --rm \ -e RAM_SIZE=8192 \ trycua/lumier:latest ``` -Access the VM in your browser at [http://localhost:8006](http://localhost:8006). +Access the VM in your browser at **http://localhost:8006**. After running the command above, you can access your macOS VM through a web browser (e.g., http://localhost:8006). diff --git a/docs/content/docs/libraries/lumier/meta.json b/docs/content/docs/libraries/lumier/meta.json index f6a8946c..bfe25724 100644 --- a/docs/content/docs/libraries/lumier/meta.json +++ b/docs/content/docs/libraries/lumier/meta.json @@ -1,8 +1,3 @@ { - "pages": [ - "installation", - "docker", - "docker-compose", - "building-lumier" - ] + "pages": ["installation", "docker", "docker-compose", "building-lumier"] } diff --git a/docs/content/docs/libraries/mcp-server/meta.json b/docs/content/docs/libraries/mcp-server/meta.json index 45fa4ba9..f5ccb9ba 100644 --- a/docs/content/docs/libraries/mcp-server/meta.json +++ b/docs/content/docs/libraries/mcp-server/meta.json @@ -1,10 +1,10 @@ { - "pages": [ - "installation", - "configuration", - "usage", - "tools", - "client-integrations", - "llm-integrations" - ] -} \ No newline at end of file + "pages": [ + "installation", + "configuration", + "usage", + "tools", + "client-integrations", + "llm-integrations" + ] +} diff --git a/docs/content/docs/meta.json b/docs/content/docs/meta.json index 9aea034a..c3517f0a 100644 --- a/docs/content/docs/meta.json +++ b/docs/content/docs/meta.json @@ -1,19 +1,19 @@ { - "title": "Home", - "description": "Documentation Home", - "root": true, - "defaultOpen": true, - "pages": [ - "index", - "quickstart-ui", - "quickstart-cli", - "quickstart-devs", - "telemetry", - "---[BookCopy]Computer Playbook---", - "...computer-sdk", - "---[BookCopy]Agent Playbook---", - "...agent-sdk", - "---[CodeXml]API Reference---", - "...libraries" - ] -} \ No newline at end of file + "title": "Home", + "description": "Documentation Home", + "root": true, + "defaultOpen": true, + "pages": [ + "index", + "quickstart-devs", + "quickstart-cli", + "telemetry", + "example-usecases", + "---[BookCopy]Computer Playbook---", + "...computer-sdk", + "---[BookCopy]Agent Playbook---", + "...agent-sdk", + "---[CodeXml]API Reference---", + "...libraries" + ] +} diff --git a/docs/content/docs/quickstart-cli.mdx b/docs/content/docs/quickstart-cli.mdx index 668ec49d..19073dea 100644 --- a/docs/content/docs/quickstart-cli.mdx +++ b/docs/content/docs/quickstart-cli.mdx @@ -1,6 +1,6 @@ --- title: Quickstart (CLI) -description: Get started with the cua Agent CLI in 4 steps +description: Get started with the Cua Agent CLI in 4 steps icon: Rocket --- @@ -8,14 +8,14 @@ import { Step, Steps } from 'fumadocs-ui/components/steps'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordion, Accordions } from 'fumadocs-ui/components/accordion'; -Get up and running with the cua Agent CLI in 4 simple steps. +Get up and running with the Cua Agent CLI in 4 simple steps. ## Introduction -cua combines Computer (interface) + Agent (AI) for automating desktop apps. The Agent CLI provides a clean terminal interface to control your remote computer using natural language commands. +Cua combines Computer (interface) + Agent (AI) for automating desktop apps. The Agent CLI provides a clean terminal interface to control your remote computer using natural language commands. @@ -23,39 +23,45 @@ cua combines Computer (interface) + Agent (AI) for automating desktop apps. The ## Set Up Your Computer Environment -Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup: +Choose how you want to run your Cua computer. **Cloud Sandbox is recommended** for the easiest setup: + + + + + **Easiest & safest way to get started - works on any host OS** - - - - **Easiest & safest way to get started** - 1. Go to [trycua.com/signin](https://www.trycua.com/signin) 2. Navigate to **Dashboard > Containers > Create Instance** 3. Create a **Medium, Ubuntu 22** container 4. Note your container name and API key - + Your cloud container will be automatically configured and ready to use. - - - 1. Install lume cli + + + **Run Linux desktop locally on macOS, Windows, or Linux hosts** + + 1. Install Docker Desktop or Docker Engine + + 2. Pull the CUA XFCE container (lightweight desktop) ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" + docker pull --platform=linux/amd64 trycua/cua-xfce:latest ``` - 2. Start a local cua container + Or use KASM for a full-featured desktop: ```bash - lume run macos-sequoia-cua:latest + docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest ``` - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) + + + **Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11** + + 1. Enable Windows Sandbox 2. Install pywinsandbox dependency ```bash @@ -65,14 +71,20 @@ Choose how you want to run your cua computer. **Cloud containers are recommended 3. Windows Sandbox will be automatically configured when you run the CLI - - - 1. Install Docker Desktop or Docker Engine + - 2. Pull the CUA Ubuntu container + **macOS hosts only - requires Lume CLI** + + 1. Install lume cli ```bash - docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" + ``` + + 2. Start a local Cua macOS VM + + ```bash + lume run macos-sequoia-cua:latest ``` @@ -82,7 +94,7 @@ Choose how you want to run your cua computer. **Cloud containers are recommended -## Install cua +## Install Cua @@ -116,7 +128,7 @@ powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | ie ```bash uv python install 3.12 -# uv will install cua dependencies automatically when you use --with "cua-agent[cli]" +# uv will install Cua dependencies automatically when you use --with "cua-agent[cli]" ``` @@ -166,7 +178,7 @@ conda create -n cua python=3.12 conda activate cua ``` -### Install cua +### Install Cua ```bash pip install "cua-agent[cli]" cua-computer @@ -176,7 +188,7 @@ pip install "cua-agent[cli]" cua-computer -### Install cua +### Install Cua ```bash pip install "cua-agent[cli]" cua-computer @@ -190,7 +202,7 @@ pip install "cua-agent[cli]" cua-computer -## Run cua CLI +## Run Cua CLI Choose your preferred AI model: @@ -219,6 +231,7 @@ python -m agent.cli openai/computer-use-preview ```bash +uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-sonnet-4-5-20250929 uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-20250514 uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-1-20250805 uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-sonnet-4-20250514 @@ -229,6 +242,7 @@ uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-3-5-sonnet-20241022 ```bash +python -m agent.cli anthropic/claude-sonnet-4-5-20250929 python -m agent.cli anthropic/claude-opus-4-1-20250805 python -m agent.cli anthropic/claude-opus-4-20250514 python -m agent.cli anthropic/claude-sonnet-4-20250514 @@ -298,8 +312,8 @@ python -m agent.cli omniparser+ollama_chat/llama3.2:latest If you haven't set up environment variables, the CLI will guide you through the setup: -1. **Container Name**: Enter your cua container name (or get one at [trycua.com](https://www.trycua.com/)) -2. **CUA API Key**: Enter your cua API key +1. **Sandbox Name**: Enter your Cua sandbox name (or get one at [trycua.com](https://www.trycua.com/)) +2. **CUA API Key**: Enter your Cua API key 3. **Provider API Key**: Enter your AI provider API key (OpenAI, Anthropic, etc.) ### Start Chatting @@ -326,6 +340,4 @@ You can ask your agent to perform actions like: --- -For advanced Python usage and GUI interface, see the [Quickstart (GUI)](/quickstart-ui) and [Quickstart for Developers](/quickstart-devs). - -For running models locally, see [Running Models Locally](/agent-sdk/local-models). +For running models locally, see [Running Models Locally](/agent-sdk/supported-model-providers/local-models). diff --git a/docs/content/docs/quickstart-devs.mdx b/docs/content/docs/quickstart-devs.mdx index 37367709..4bd5b9ab 100644 --- a/docs/content/docs/quickstart-devs.mdx +++ b/docs/content/docs/quickstart-devs.mdx @@ -1,61 +1,60 @@ --- -title: Quickstart (for Developers) -description: Get started with cua in 5 steps +title: Quickstart +description: Get started with Cua in three steps icon: Rocket --- import { Step, Steps } from 'fumadocs-ui/components/steps'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; -Get up and running with cua in 5 simple steps. +This quickstart guides you through setting up your [computer environment](#set-up-your-computer-environment), programmatic control with a [Cua computer](#using-computer), and task automation with a [Cua agent](#using-agent): - - -## Introduction - -cua combines Computer (interface) + Agent (AI) for automating desktop apps. Computer handles clicks/typing, Agent provides the intelligence. - - ## Set Up Your Computer Environment -Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup: +Choose how you want to run your Cua computer. This will be the environment where your automated tasks will execute. + +You can run your Cua computer in the cloud (recommended for easiest setup), locally on macOS with Lume, locally on Windows with a Windows Sandbox, or in a Docker container on any platform. Choose the option that matches your system and needs. + + + + + Cua Cloud Sandbox provides virtual machines that run Ubuntu. - - - - **Easiest & safest way to get started** - 1. Go to [trycua.com/signin](https://www.trycua.com/signin) 2. Navigate to **Dashboard > Containers > Create Instance** - 3. Create a **Medium, Ubuntu 22** container - 4. Note your container name and API key - - Your cloud container will be automatically configured and ready to use. + 3. Create a **Medium, Ubuntu 22** sandbox + 4. Note your sandbox name and API key + + Your Cloud Sandbox will be automatically configured and ready to use. - + + + Lume containers are macOS virtual machines that run on a macOS host machine. - 1. Install lume cli + 1. Install the Lume CLI: ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" ``` - 2. Start a local cua container + 2. Start a local Cua sandbox: ```bash lume run macos-sequoia-cua:latest ``` - + + + Windows Sandbox provides Windows virtual environments that run on a Windows host machine. - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) - 2. Install pywinsandbox dependency + 1. Enable [Windows Sandbox](https://learn.microsoft.com/en-us/windows/security/application-security/application-isolation/windows-sandbox/windows-sandbox-install) (requires Windows 10 Pro/Enterprise or Windows 11) + 2. Install the `pywinsandbox` dependency: ```bash pip install -U git+git://github.com/karkason/pywinsandbox.git @@ -64,11 +63,13 @@ Choose how you want to run your cua computer. **Cloud containers are recommended 3. Windows Sandbox will be automatically configured when you run the CLI - - - 1. Install Docker Desktop or Docker Engine + - 2. Pull the CUA Ubuntu container + Docker provides a way to run Ubuntu containers on any host machine. + + 1. Install Docker Desktop or Docker Engine: + + 2. Pull the CUA Ubuntu sandbox: ```bash docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest @@ -81,90 +82,203 @@ Choose how you want to run your cua computer. **Cloud containers are recommended -## Install cua +## Using Computer + +Connect to your Cua computer and perform basic interactions, such as taking screenshots or simulating user input. + Install the Cua computer Python SDK: ```bash - pip install "cua-agent[all]" cua-computer + pip install cua-computer + ``` - # or install specific providers - pip install "cua-agent[openai]" # OpenAI computer-use-preview support - pip install "cua-agent[anthropic]" # Anthropic Claude support - pip install "cua-agent[omni]" # Omniparser + any LLM support - pip install "cua-agent[uitars]" # UI-TARS - pip install "cua-agent[uitars-mlx]" # UI-TARS + MLX support - pip install "cua-agent[uitars-hf]" # UI-TARS + Huggingface support - pip install "cua-agent[glm45v-hf]" # GLM-4.5V + Huggingface support - pip install "cua-agent[ui]" # Gradio UI support + Then, connect to your desired computer environment: + + + + ```python + from computer import Computer + + computer = Computer( + os_type="linux", + provider_type="cloud", + name="your-sandbox-name", + api_key="your-api-key" + ) + await computer.run() # Connect to the sandbox + ``` + + + ```python + from computer import Computer + + computer = Computer( + os_type="macos", + provider_type="lume", + name="macos-sequoia-cua:latest" + ) + await computer.run() # Launch & connect to the container + ``` + + + ```python + from computer import Computer + + computer = Computer( + os_type="windows", + provider_type="windows_sandbox" + ) + await computer.run() # Launch & connect to the container + ``` + + + ```python + from computer import Computer + + computer = Computer( + os_type="linux", + provider_type="docker", + name="trycua/cua-ubuntu:latest" + ) + await computer.run() # Launch & connect to the container + ``` + + + Install and run `cua-computer-server`: + ```bash + pip install cua-computer-server + python -m computer_server + ``` + + Then, use the `Computer` object to connect: + ```python + from computer import Computer + + computer = Computer(use_host_computer_server=True) + await computer.run() # Connect to the host desktop + ``` + + + + Once connected, you can perform interactions: + ```python + try: + # Take a screenshot of the computer's current display + screenshot = await computer.interface.screenshot() + # Simulate a left-click at coordinates (100, 100) + await computer.interface.left_click(100, 100) + # Type "Hello!" into the active application + await computer.interface.type("Hello!") + finally: + await computer.close() ``` + Install the Cua computer TypeScript SDK: ```bash npm install @trycua/computer ``` - - - + Then, connect to your desired computer environment: - + + + ```typescript + import { Computer, OSType } from '@trycua/computer'; -## Using Computer + const computer = new Computer({ + osType: OSType.LINUX, + name: "your-sandbox-name", + apiKey: "your-api-key" + }); + await computer.run(); // Connect to the sandbox + ``` + + + ```typescript + import { Computer, OSType, ProviderType } from '@trycua/computer'; - - - ```python - from computer import Computer + const computer = new Computer({ + osType: OSType.MACOS, + providerType: ProviderType.LUME, + name: "macos-sequoia-cua:latest" + }); + await computer.run(); // Launch & connect to the container + ``` + + + ```typescript + import { Computer, OSType, ProviderType } from '@trycua/computer'; - async with Computer( - os_type="linux", - provider_type="cloud", - name="your-container-name", - api_key="your-api-key" - ) as computer: - # Take screenshot - screenshot = await computer.interface.screenshot() + const computer = new Computer({ + osType: OSType.WINDOWS, + providerType: ProviderType.WINDOWS_SANDBOX + }); + await computer.run(); // Launch & connect to the container + ``` + + + ```typescript + import { Computer, OSType, ProviderType } from '@trycua/computer'; - # Click and type - await computer.interface.left_click(100, 100) - await computer.interface.type("Hello!") - ``` + const computer = new Computer({ + osType: OSType.LINUX, + providerType: ProviderType.DOCKER, + name: "trycua/cua-ubuntu:latest" + }); + await computer.run(); // Launch & connect to the container + ``` + + + First, install and run `cua-computer-server`: + ```bash + pip install cua-computer-server + python -m computer_server + ``` - - + Then, use the `Computer` object to connect: + ```typescript + import { Computer } from '@trycua/computer'; + + const computer = new Computer({ useHostComputerServer: true }); + await computer.run(); // Connect to the host desktop + ``` + + + + Once connected, you can perform interactions: ```typescript - import { Computer, OSType } from '@trycua/computer'; - - const computer = new Computer({ - osType: OSType.LINUX, - name: "your-container-name", - apiKey: "your-api-key" - }); - - await computer.run(); - try { - // Take screenshot + // Take a screenshot of the computer's current display const screenshot = await computer.interface.screenshot(); - - // Click and type + // Simulate a left-click at coordinates (100, 100) await computer.interface.leftClick(100, 100); + // Type "Hello!" into the active application await computer.interface.typeText("Hello!"); } finally { await computer.close(); } ``` - +Learn more about computers in the [Cua computers documentation](/computer-sdk/computers). You will see how to automate computers with agents in the next step. + ## Using Agent +Utilize an Agent to automate complex tasks by providing it with a goal and allowing it to interact with the computer environment. + +Install the Cua agent Python SDK: +```bash +pip install "cua-agent[all]" +``` + +Then, use the `ComputerAgent` object: ```python from agent import ComputerAgent @@ -182,12 +296,14 @@ async for result in agent.run(messages): print(item["content"][0]["text"]) ``` +Learn more about agents in [Agent Loops](/agent-sdk/agent-loops) and available models in [Supported Models](/agent-sdk/supported-model-providers/). + ## Next Steps -{/* - Explore the [SDK documentation](/sdk) for advanced features */} - -- Learn about [trajectory tracking](/agent-sdk/callbacks/trajectories) and [callbacks](/agent-sdk/callbacks/agent-lifecycle) -- Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for support +- Learn more about [Cua computers](/computer-sdk/computers) and [computer commands](/computer-sdk/commands) +- Read about [Agent loops](/agent-sdk/agent-loops), [tools](/agent-sdk/custom-tools), and [supported model providers](/agent-sdk/supported-model-providers/) +- Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for help +- Try out [Form Filling](/example-usecases/form-filling) preset usecase diff --git a/docs/content/docs/quickstart-ui.mdx b/docs/content/docs/quickstart-ui.mdx deleted file mode 100644 index 72bac935..00000000 --- a/docs/content/docs/quickstart-ui.mdx +++ /dev/null @@ -1,216 +0,0 @@ ---- -title: Quickstart (GUI) -description: Get started with the cua Agent UI in 3 steps -icon: Rocket ---- - -import { Step, Steps } from 'fumadocs-ui/components/steps'; -import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; -import { Accordion, Accordions } from 'fumadocs-ui/components/accordion'; - -Get up and running with the cua Agent UI in 3 simple steps. - - - - -## Introduction - -cua combines Computer (interface) + Agent (AI) for automating desktop apps. The Agent UI provides a simple chat interface to control your remote computer using natural language. - - - - - -## Set Up Your Computer Environment - -Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup: - - - - - **Easiest & safest way to get started** - - 1. Go to [trycua.com/signin](https://www.trycua.com/signin) - 2. Navigate to **Dashboard > Containers > Create Instance** - 3. Create a **Medium, Ubuntu 22** container - 4. Note your container name and API key - - Your cloud container will be automatically configured and ready to use. - - - - - 1. Install lume cli - - ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" - ``` - - 2. Start a local cua container - - ```bash - lume run macos-sequoia-cua:latest - ``` - - - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) - 2. Install pywinsandbox dependency - - ```bash - pip install -U git+git://github.com/karkason/pywinsandbox.git - ``` - - 3. Windows Sandbox will be automatically configured when you run the CLI - - - - - 1. Install Docker Desktop or Docker Engine - - 2. Pull the CUA Ubuntu container - - ```bash - docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest - ``` - - - - - - - - -## Install and Run cua - - - - - -### Install uv - - - - -```bash -# Use curl to download the script and execute it with sh: -curl -LsSf https://astral.sh/uv/install.sh | sh - -# If your system doesn't have curl, you can use wget: -# wget -qO- https://astral.sh/uv/install.sh | sh -``` - - - - -```powershell -# Use irm to download the script and execute it with iex: -powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" -``` - - - - -### Install Python 3.12 - -```bash -uv python install 3.12 -``` - -### Run cua - -```bash -uv run --with "cua-agent[ui]" -m agent.ui -``` - - - - - -### Install conda - - - - -```bash -mkdir -p ~/miniconda3 -curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh -o ~/miniconda3/miniconda.sh -bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 -rm ~/miniconda3/miniconda.sh -source ~/miniconda3/bin/activate -``` - - - - -```bash -mkdir -p ~/miniconda3 -wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh -bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 -rm ~/miniconda3/miniconda.sh -source ~/miniconda3/bin/activate -``` - - - - -```powershell -wget "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe" -outfile ".\miniconda.exe" -Start-Process -FilePath ".\miniconda.exe" -ArgumentList "/S" -Wait -del .\miniconda.exe -``` - - - - -### Create and activate Python 3.12 environment - -```bash -conda create -n cua python=3.12 -conda activate cua -``` - -### Install and run cua - -```bash -pip install "cua-agent[ui]" cua-computer -python -m agent.ui -``` - - - - - -### Install cua - -```bash -pip install "cua-agent[ui]" cua-computer -``` - -### Run the Agent UI - -```bash -python -m agent.ui -``` - - - - - -### Start Chatting - -Open your browser to the displayed URL and start chatting with your computer-using agent. - -You can ask your agent to perform actions like: - -- "Open Firefox and go to github.com" -- "Take a screenshot and tell me what's on the screen" -- "Type 'Hello world' into the terminal" - - - - ---- - -For advanced Python usage, see the [Quickstart for Developers](/quickstart-devs). diff --git a/docs/package.json b/docs/package.json index c6c083c9..232e4f47 100644 --- a/docs/package.json +++ b/docs/package.json @@ -16,6 +16,7 @@ "mermaid": "^11.8.1", "next": "15.3.3", "next-themes": "^0.4.6", + "posthog-js": "^1.276.0", "react": "^19.1.0", "react-dom": "^19.1.0", "remark": "^15.0.1", @@ -42,4 +43,4 @@ "sharp" ] } -} \ No newline at end of file +} diff --git a/docs/pnpm-lock.yaml b/docs/pnpm-lock.yaml index 21945e6b..21932097 100644 --- a/docs/pnpm-lock.yaml +++ b/docs/pnpm-lock.yaml @@ -29,6 +29,9 @@ importers: next-themes: specifier: ^0.4.6 version: 0.4.6(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + posthog-js: + specifier: ^1.276.0 + version: 1.276.0 react: specifier: ^19.1.0 version: 19.1.0 @@ -489,6 +492,9 @@ packages: resolution: {integrity: sha512-6yB0117ZjsgNevZw3LP+bkrZa9mU/POPVaXgzMPOBbBc35w2P3R+1vMMhEfC06kYCpd5bf0jodBaTkYQW5TVeQ==} engines: {node: '>= 20.0.0'} + '@posthog/core@1.3.0': + resolution: {integrity: sha512-hxLL8kZNHH098geedcxCz8y6xojkNYbmJEW+1vFXsmPcExyCXIUUJ/34X6xa9GcprKxd0Wsx3vfJQLQX4iVPhw==} + '@radix-ui/number@1.1.1': resolution: {integrity: sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==} @@ -1221,6 +1227,9 @@ packages: confbox@0.2.2: resolution: {integrity: sha512-1NB+BKqhtNipMsov4xI/NnhCKp9XG9NamYp5PVm9klAT0fsrNPjaFICsCFhNhwZJKNh7zB/3q8qXz0E9oaMNtQ==} + core-js@3.46.0: + resolution: {integrity: sha512-vDMm9B0xnqqZ8uSBpZ8sNtRtOdmfShrvT6h2TuQGLs0Is+cR0DYbj/KWP6ALVNbWPpqA/qPLoOuppJN07humpA==} + cose-base@1.0.3: resolution: {integrity: sha512-s9whTXInMSgAp/NVXVNuVxVKzGH2qck3aQlVHxDCdAEPgtMKwc4Wq6/QKhgdEdgbLSi9rBTAcPoRa6JpiG4ksg==} @@ -1492,6 +1501,9 @@ packages: picomatch: optional: true + fflate@0.4.8: + resolution: {integrity: sha512-FJqqoDBR00Mdj9ppamLa/Y7vxm+PRmNWA67N846RvsoYVMKB4q3y/de5PA7gUmRMYK/8CMz2GDZQmCRN1wBcWA==} + fumadocs-core@15.5.1: resolution: {integrity: sha512-5eJPJw+BFWFdgrtWPQ9aAZAhhsyuZAwth8OjBd9R77sXoIoae4Y4lJZMq3BeSpJZcuIAOVbSCS+pJhsBAoXJ8g==} peerDependencies: @@ -2012,6 +2024,20 @@ packages: resolution: {integrity: sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==} engines: {node: ^10 || ^12 || >=14} + posthog-js@1.276.0: + resolution: {integrity: sha512-FYZE1037LrAoKKeUU0pUL7u8WwNK2BVeg5TFApwquVPUdj9h7u5Z077A313hPN19Ar+7Y+VHxqYqdHc4VNsVgw==} + peerDependencies: + '@rrweb/types': 2.0.0-alpha.17 + rrweb-snapshot: 2.0.0-alpha.17 + peerDependenciesMeta: + '@rrweb/types': + optional: true + rrweb-snapshot: + optional: true + + preact@10.27.2: + resolution: {integrity: sha512-5SYSgFKSyhCbk6SrXyMpqjb5+MQBgfvEKE/OC+PujcY34sOpqtr+0AZQtPYx5IA6VxynQ7rUPCtKzyovpj9Bpg==} + prettier@3.6.2: resolution: {integrity: sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==} engines: {node: '>=14'} @@ -2317,6 +2343,9 @@ packages: vscode-uri@3.0.8: resolution: {integrity: sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw==} + web-vitals@4.2.4: + resolution: {integrity: sha512-r4DIlprAGwJ7YM11VZp4R884m0Vmgr6EAKe3P+kO0PPj3Unqyvv59rczf6UiGcb9Z8QxZVcqKNwv/g0WNdWwsw==} + yallist@5.0.0: resolution: {integrity: sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==} engines: {node: '>=18'} @@ -2642,6 +2671,8 @@ snapshots: '@orama/orama@3.1.7': {} + '@posthog/core@1.3.0': {} + '@radix-ui/number@1.1.1': {} '@radix-ui/primitive@1.1.2': {} @@ -3378,6 +3409,8 @@ snapshots: confbox@0.2.2: {} + core-js@3.46.0: {} + cose-base@1.0.3: dependencies: layout-base: 1.0.2 @@ -3702,6 +3735,8 @@ snapshots: optionalDependencies: picomatch: 4.0.2 + fflate@0.4.8: {} + fumadocs-core@15.5.1(@types/react@19.1.8)(next@15.3.3(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-dom@19.1.0(react@19.1.0))(react@19.1.0): dependencies: '@formatjs/intl-localematcher': 0.6.1 @@ -4566,6 +4601,16 @@ snapshots: picocolors: 1.1.1 source-map-js: 1.2.1 + posthog-js@1.276.0: + dependencies: + '@posthog/core': 1.3.0 + core-js: 3.46.0 + fflate: 0.4.8 + preact: 10.27.2 + web-vitals: 4.2.4 + + preact@10.27.2: {} + prettier@3.6.2: {} property-information@7.1.0: {} @@ -4934,6 +4979,8 @@ snapshots: vscode-uri@3.0.8: {} + web-vitals@4.2.4: {} + yallist@5.0.0: {} zod@3.25.76: {} diff --git a/docs/source.config.ts b/docs/source.config.ts index c3b9e719..dd019e8f 100644 --- a/docs/source.config.ts +++ b/docs/source.config.ts @@ -1,9 +1,4 @@ -import { - defineConfig, - defineDocs, - frontmatterSchema, - metaSchema, -} from 'fumadocs-mdx/config'; +import { defineConfig, defineDocs, frontmatterSchema, metaSchema } from 'fumadocs-mdx/config'; import { z } from 'zod'; // You can customise Zod schemas for frontmatter and `meta.json` here diff --git a/docs/src/app/(home)/[[...slug]]/page.tsx b/docs/src/app/(home)/[[...slug]]/page.tsx index a9a36b4f..dc283204 100644 --- a/docs/src/app/(home)/[[...slug]]/page.tsx +++ b/docs/src/app/(home)/[[...slug]]/page.tsx @@ -1,27 +1,17 @@ import { getApiVersions, source } from '@/lib/source'; import { getMDXComponents } from '@/mdx-components'; import { buttonVariants } from 'fumadocs-ui/components/ui/button'; -import { - Popover, - PopoverContent, - PopoverTrigger, -} from 'fumadocs-ui/components/ui/popover'; +import { Popover, PopoverContent, PopoverTrigger } from 'fumadocs-ui/components/ui/popover'; import { createRelativeLink } from 'fumadocs-ui/mdx'; -import { - DocsBody, - DocsDescription, - DocsPage, - DocsTitle, -} from 'fumadocs-ui/page'; +import { DocsBody, DocsDescription, DocsPage, DocsTitle } from 'fumadocs-ui/page'; import { cn } from 'fumadocs-ui/utils/cn'; import { ChevronDown, CodeXml, ExternalLink } from 'lucide-react'; import type { Metadata } from 'next'; import Link from 'next/link'; import { notFound, redirect } from 'next/navigation'; +import { PageFeedback } from '@/components/page-feedback'; -export default async function Page(props: { - params: Promise<{ slug?: string[] }>; -}) { +export default async function Page(props: { params: Promise<{ slug?: string[] }> }) { const params = await props.params; const slug = params.slug || []; const page = source.getPage(slug); @@ -65,7 +55,8 @@ export default async function Page(props: { xmlns="http://www.w3.org/2000/svg" fill="currentColor" className="h-5" - viewBox="0 0 448 512"> + viewBox="0 0 448 512" + > Windows @@ -75,7 +66,8 @@ export default async function Page(props: { xmlns="http://www.w3.org/2000/svg" fill="currentColor" className="h-5" - viewBox="0 0 384 512"> + viewBox="0 0 384 512" + > macOS @@ -85,7 +77,8 @@ export default async function Page(props: { xmlns="http://www.w3.org/2000/svg" fill="currentColor" className="h-5" - viewBox="0 0 448 512"> + viewBox="0 0 448 512" + > Linux @@ -95,10 +88,7 @@ export default async function Page(props: {
@@ -189,10 +178,7 @@ export default async function Page(props: { }; return ( - +
@@ -208,15 +194,14 @@ export default async function Page(props: { size: 'sm', className: 'gap-2', }) - )}> + )} + > {(() => { // Find the current version label let currentLabel = 'Current'; if (apiVersionSlug.length > 0) { const found = versionItems.find( - (item) => - item.label !== 'Current' && - apiVersionSlug[0] === item.label + (item) => item.label !== 'Current' && apiVersionSlug[0] === item.label ); if (found) currentLabel = found.label; } @@ -237,10 +222,8 @@ export default async function Page(props: { : `/api/${apiSection}/${item.label}`; // Highlight current version const isCurrent = - (item.label === 'Current' && - apiVersionSlug.length === 0) || - (item.label !== 'Current' && - apiVersionSlug[0] === item.label); + (item.label === 'Current' && apiVersionSlug.length === 0) || + (item.label !== 'Current' && apiVersionSlug[0] === item.label); return ( + )} + > API version: {item.label} ); @@ -258,9 +242,7 @@ export default async function Page(props: { )}
- - {page.data.description} - + {page.data.description}
@@ -270,6 +252,7 @@ export default async function Page(props: { a: createRelativeLink(source, page), })} /> + ); @@ -288,8 +271,7 @@ export async function generateMetadata(props: { let title = `${page.data.title} | Cua Docs`; if (page.url.includes('api')) title = `${page.data.title} | Cua API Docs`; - if (page.url.includes('guide')) - title = ` Guide: ${page.data.title} | Cua Docs`; + if (page.url.includes('guide')) title = ` Guide: ${page.data.title} | Cua Docs`; return { title, diff --git a/docs/src/app/api/posthog/[...path]/route.ts b/docs/src/app/api/posthog/[...path]/route.ts new file mode 100644 index 00000000..24e2e751 --- /dev/null +++ b/docs/src/app/api/posthog/[...path]/route.ts @@ -0,0 +1,75 @@ +import { NextRequest, NextResponse } from 'next/server'; + +export async function GET( + request: NextRequest, + { params }: { params: Promise<{ path: string[] }> } +) { + const { path } = await params; + const url = new URL(request.url); + + const targetUrl = `${process.env.NEXT_PUBLIC_POSTHOG_HOST}/${path.join('/')}${url.search}`; + + try { + const response = await fetch(targetUrl, { + method: 'GET', + headers: { + 'Content-Type': request.headers.get('Content-Type') || 'application/json', + }, + }); + + // Handle 204 No Content responses + if (response.status === 204) { + return new NextResponse(null, { status: 204 }); + } + + const data = await response.arrayBuffer(); + return new NextResponse(data, { + status: response.status, + headers: { + 'Content-Type': response.headers.get('Content-Type') || 'application/json', + }, + }); + } catch (error) { + console.error('PostHog proxy error:', error); + return new NextResponse('Error proxying request', { status: 500 }); + } +} + +export async function POST( + request: NextRequest, + { params }: { params: Promise<{ path: string[] }> } +) { + const { path } = await params; + const url = new URL(request.url); + + const targetUrl = `${process.env.NEXT_PUBLIC_POSTHOG_HOST}/${path.join('/')}${url.search}`; + + try { + const body = await request.arrayBuffer(); + const contentType = request.headers.get('Content-Type') || 'application/x-www-form-urlencoded'; + + const response = await fetch(targetUrl, { + method: 'POST', + headers: { + 'Content-Type': contentType, + }, + body, + }); + + // Handle 204 No Content responses + if (response.status === 204) { + return new NextResponse(null, { status: 204 }); + } + + const data = await response.arrayBuffer(); + return new NextResponse(data, { + status: response.status, + headers: { + 'Content-Type': response.headers.get('Content-Type') || 'application/json', + }, + }); + } catch (error) { + console.error('PostHog proxy error:', error); + return new NextResponse('Error proxying request', { status: 500 }); + } +} diff --git a/docs/src/app/layout.config.tsx b/docs/src/app/layout.config.tsx index f29509bd..d43acae6 100644 --- a/docs/src/app/layout.config.tsx +++ b/docs/src/app/layout.config.tsx @@ -42,14 +42,14 @@ export const baseOptions: BaseLayoutProps = { links: [ { url: 'https://trycua.com', - text: 'cua home', + text: 'Cua home', type: 'icon', icon: , external: false, }, { url: 'https://discord.com/invite/mVnXXpdE85', - text: 'cua discord', + text: 'Cua discord', type: 'icon', icon: ( <> diff --git a/docs/src/app/layout.tsx b/docs/src/app/layout.tsx index 2fdc4fea..97b60870 100644 --- a/docs/src/app/layout.tsx +++ b/docs/src/app/layout.tsx @@ -2,6 +2,11 @@ import './global.css'; import { RootProvider } from 'fumadocs-ui/provider'; import { Inter } from 'next/font/google'; import type { ReactNode } from 'react'; +import { PHProvider, PostHogPageView } from '@/providers/posthog-provider'; +import { AnalyticsTracker } from '@/components/analytics-tracker'; +import { CookieConsent } from '@/components/cookie-consent'; +import { Footer } from '@/components/footer'; +import { Suspense } from 'react'; const inter = Inter({ subsets: ['latin'], @@ -14,9 +19,15 @@ export default function Layout({ children }: { children: ReactNode }) { - - {children} - + + + + + + {children} +
+ + ); diff --git a/docs/src/app/llms.mdx/[[...slug]]/route.ts b/docs/src/app/llms.mdx/[[...slug]]/route.ts index 15b8e678..2a87ed97 100644 --- a/docs/src/app/llms.mdx/[[...slug]]/route.ts +++ b/docs/src/app/llms.mdx/[[...slug]]/route.ts @@ -5,10 +5,7 @@ import { notFound } from 'next/navigation'; export const revalidate = false; -export async function GET( - _req: NextRequest, - { params }: { params: Promise<{ slug?: string[] }> } -) { +export async function GET(_req: NextRequest, { params }: { params: Promise<{ slug?: string[] }> }) { const { slug } = await params; const page = source.getPage(slug); if (!page) notFound(); diff --git a/docs/src/components/analytics-tracker.tsx b/docs/src/components/analytics-tracker.tsx new file mode 100644 index 00000000..61f3d5cd --- /dev/null +++ b/docs/src/components/analytics-tracker.tsx @@ -0,0 +1,71 @@ +'use client'; + +import { useEffect } from 'react'; +import posthog from 'posthog-js'; + +export function AnalyticsTracker() { + useEffect(() => { + const handleClick = (e: MouseEvent) => { + const target = e.target as HTMLElement; + const link = target.closest('a'); + + if (!link) return; + + const href = link.href; + const text = link.textContent || link.getAttribute('aria-label') || ''; + + if (href.includes('github.com/trycua')) { + posthog.capture('github_link_clicked', { + url: href, + link_text: text, + page: window.location.pathname, + }); + } + + if (href.includes('discord.com/invite') || href.includes('discord.gg')) { + posthog.capture('discord_link_clicked', { + url: href, + link_text: text, + page: window.location.pathname, + }); + } + + if ( + (href.includes('trycua.com') && !href.includes('trycua.com/docs')) || + href.includes('cua.ai') + ) { + posthog.capture('main_website_clicked', { + url: href, + link_text: text, + page: window.location.pathname, + }); + } + + if (link.hostname && link.hostname !== window.location.hostname) { + if ( + href.includes('github.com/trycua') || + href.includes('discord.com') || + href.includes('trycua.com') || + href.includes('cua.ai') + ) { + return; + } + + posthog.capture('external_link_clicked', { + url: href, + link_text: text, + page: window.location.pathname, + domain: link.hostname, + }); + } + }; + + document.addEventListener('click', handleClick); + + return () => { + document.removeEventListener('click', handleClick); + }; + }, []); + + return null; +} diff --git a/docs/src/components/cookie-consent.tsx b/docs/src/components/cookie-consent.tsx new file mode 100644 index 00000000..7e37733c --- /dev/null +++ b/docs/src/components/cookie-consent.tsx @@ -0,0 +1,44 @@ +'use client'; + +import { useEffect, useState } from 'react'; +import posthog from 'posthog-js'; + +export function CookieConsent() { + const [isVisible, setIsVisible] = useState(false); + + useEffect(() => { + // Check if user has already accepted cookies + const hasAccepted = localStorage.getItem('cookie-consent'); + if (!hasAccepted) { + setIsVisible(true); + } + }, []); + + const handleAccept = () => { + localStorage.setItem('cookie-consent', 'accepted'); + setIsVisible(false); + + // Track cookie acceptance + posthog.capture('cookie_consent_accepted', { + page: window.location.pathname, + }); + }; + + if (!isVisible) return null; + + return ( +
+
+

+ This site uses cookies for website functionality, analytics, and personalized content. +

+ +
+
+ ); +} diff --git a/docs/src/components/editable-code-block.tsx b/docs/src/components/editable-code-block.tsx new file mode 100644 index 00000000..6da1ded6 --- /dev/null +++ b/docs/src/components/editable-code-block.tsx @@ -0,0 +1,310 @@ +'use client'; + +import React, { createContext, useContext, useState, ReactNode } from 'react'; +import * as Base from 'fumadocs-ui/components/codeblock'; +import { cn } from 'fumadocs-ui/utils/cn'; + +/** + * Context for managing editable values within code blocks + */ +interface EditableCodeContextValue { + values: Record; + updateValue: (key: string, value: string) => void; +} + +const EditableCodeContext = createContext(null); + +/** + * Hook to access the editable code context + */ +function useEditableCode() { + const context = useContext(EditableCodeContext); + if (!context) { + throw new Error('useEditableCode must be used within EditableCodeBlock'); + } + return context; +} + +/** + * Props for EditableCodeBlock component + */ +interface EditableCodeBlockProps { + /** Programming language for styling */ + lang?: string; + /** Initial values for placeholders */ + defaultValues?: Record; + /** Code content with embedded EditableValue components */ + children: ReactNode; + /** Additional CSS classes */ + className?: string; + /** Title for the code block */ + title?: string; +} + +/** + * Code block component that supports inline editable values + * Uses fumadocs-ui styling with interactive input fields + */ +export function EditableCodeBlock({ + lang = 'python', + defaultValues = {}, + children, + className, + title, +}: EditableCodeBlockProps) { + const [values, setValues] = useState>(defaultValues); + + const updateValue = (key: string, value: string) => { + setValues((prev) => ({ ...prev, [key]: value })); + }; + + return ( + + + + + {children} + + + + + ); +} + +/** + * Props for EditableValue component + */ +interface EditableValueProps { + /** Unique identifier for this value */ + placeholder: string; + /** Display width in characters (default: auto) */ + width?: number; + /** Optional default value */ + defaultValue?: string; + /** Input type */ + type?: 'text' | 'password'; +} + +/** + * Inline editable input that blends with code styling + * Appears as an underlined, hoverable value within code + */ +export function EditableValue({ + placeholder, + width: explicitWidth, + defaultValue = '', + type = 'text', +}: EditableValueProps) { + const { values, updateValue } = useEditableCode(); + const value = values[placeholder] ?? defaultValue; + const spanRef = React.useRef(null); + const placeholderSpanRef = React.useRef(null); + const inputRef = React.useRef(null); + const [measuredWidth, setMeasuredWidth] = React.useState(0); + const [placeholderWidth, setPlaceholderWidth] = React.useState(0); + const [isHovered, setIsHovered] = React.useState(false); + const [tooltipPosition, setTooltipPosition] = React.useState({ top: 0, left: 0 }); + const [isVisible, setIsVisible] = React.useState(false); + + // Observe visibility changes to trigger remeasurement + React.useEffect(() => { + if (!inputRef.current) return; + + const observer = new IntersectionObserver( + (entries) => { + entries.forEach((entry) => { + setIsVisible(entry.isIntersecting); + }); + }, + { threshold: 0.01 } + ); + + observer.observe(inputRef.current); + + return () => { + observer.disconnect(); + }; + }, []); + + // Measure the actual text width using a hidden span + React.useEffect(() => { + if (spanRef.current && isVisible) { + setMeasuredWidth(spanRef.current.offsetWidth); + } + }, [value, isVisible]); + + // Measure placeholder width when visible + React.useEffect(() => { + if (placeholderSpanRef.current && isVisible) { + setPlaceholderWidth(placeholderSpanRef.current.offsetWidth); + } + }, [placeholder, isVisible]); + + // Update tooltip position when hovered + React.useEffect(() => { + if (isHovered && inputRef.current) { + const rect = inputRef.current.getBoundingClientRect(); + setTooltipPosition({ + top: rect.top - 28, + left: rect.left + rect.width / 2, + }); + } + }, [isHovered]); + + const inputWidth = explicitWidth + ? `${explicitWidth}ch` + : `${Math.max(placeholderWidth, measuredWidth, 80)}px`; + + return ( + setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} + > + {/* Hidden span to measure current value width */} + + + {/* Hidden span to measure placeholder width */} + + + {/* Tooltip */} + + Edit me! + + + updateValue(placeholder, e.target.value)} + placeholder={placeholder} + className={cn(type === 'password' && value && 'text-security-disc')} + style={{ + display: 'inline', + width: inputWidth, + verticalAlign: 'baseline', + lineHeight: 'inherit', + fontSize: 'inherit', + fontFamily: 'inherit', + height: 'auto', + padding: 0, + margin: 0, + background: 'transparent', + border: 'none', + borderBottom: '2px dashed rgba(96, 165, 250, 0.5)', + outline: 'none', + color: 'inherit', + transition: 'border-bottom-color 0.2s ease-in-out', + }} + /> + + ); +} + +/** + * Container for form inputs outside the code block + */ +export function EditableForm({ + children, + className = '', +}: { + children: ReactNode; + className?: string; +}) { + return ( +
+

Configuration

+ {children} +
+ ); +} + +/** + * Form input for editing values outside code block + */ +interface EditableInputProps { + /** Placeholder key to bind to */ + placeholder: string; + /** Label text */ + label: string; + /** Input type */ + type?: 'text' | 'email' | 'password'; + /** Custom class name */ + className?: string; +} + +export function EditableInput({ + placeholder, + label, + type = 'text', + className = '', +}: EditableInputProps) { + const { values, updateValue } = useEditableCode(); + const value = values[placeholder] || ''; + + return ( +
+ + updateValue(placeholder, e.target.value)} + placeholder={placeholder} + className={cn( + 'w-full px-3 py-2 border rounded-md', + 'focus:outline-none focus:ring-2 focus:ring-blue-500', + 'bg-fd-background border-fd-border' + )} + /> +
+ ); +} diff --git a/docs/src/components/footer.tsx b/docs/src/components/footer.tsx new file mode 100644 index 00000000..2aafa33b --- /dev/null +++ b/docs/src/components/footer.tsx @@ -0,0 +1,16 @@ +export function Footer() { + return ( + + ); +} diff --git a/docs/src/components/iou.tsx b/docs/src/components/iou.tsx index ad342304..05123f58 100644 --- a/docs/src/components/iou.tsx +++ b/docs/src/components/iou.tsx @@ -34,7 +34,7 @@ interface IOUProps { } /** - * A React component that visualizes and calculates the Intersection over Union (IOU) + * A React component that visualizes and calculates the Intersection over Union (IOU) * of two rectangles on a canvas * @param props - The component props * @returns The rendered IOU visualization component @@ -130,12 +130,7 @@ export default function IOU({ title, description, rect1, rect2 }: IOUProps) {

{title}

- +
IOU = {actualIOU.toFixed(3)}
{description} diff --git a/docs/src/components/mermaid.tsx b/docs/src/components/mermaid.tsx index e22c31bd..b4ec6a97 100644 --- a/docs/src/components/mermaid.tsx +++ b/docs/src/components/mermaid.tsx @@ -28,10 +28,7 @@ export function Mermaid({ chart }: { chart: string }) { theme: resolvedTheme === 'dark' ? 'dark' : 'default', }); - const { svg, bindFunctions } = await mermaid.render( - id, - chart.replaceAll('\\n', '\n'), - ); + const { svg, bindFunctions } = await mermaid.render(id, chart.replaceAll('\\n', '\n')); bindFunctions?.(container); setSvg(svg); @@ -44,4 +41,4 @@ export function Mermaid({ chart }: { chart: string }) { }, [chart, id, resolvedTheme]); return
; -} \ No newline at end of file +} diff --git a/docs/src/components/page-feedback.tsx b/docs/src/components/page-feedback.tsx new file mode 100644 index 00000000..02dda336 --- /dev/null +++ b/docs/src/components/page-feedback.tsx @@ -0,0 +1,53 @@ +'use client'; + +import { useState } from 'react'; +import posthog from 'posthog-js'; +import { ThumbsUp, ThumbsDown } from 'lucide-react'; + +export function PageFeedback() { + const [feedback, setFeedback] = useState<'helpful' | 'not_helpful' | null>(null); + + const handleFeedback = (isHelpful: boolean) => { + const feedbackType = isHelpful ? 'helpful' : 'not_helpful'; + setFeedback(feedbackType); + + posthog.capture(`page_feedback_${feedbackType}`, { + page: window.location.pathname, + page_title: document.title, + }); + }; + + return ( +
+ {feedback === null ? ( +
+

Was this page helpful?

+
+ + +
+
+ ) : ( +

+ {feedback === 'helpful' + ? 'Thanks for your feedback!' + : "Thanks for your feedback. We'll work on improving this page."} +

+ )} +
+ ); +} diff --git a/docs/src/lib/source.ts b/docs/src/lib/source.ts index a202cf80..4841cf65 100644 --- a/docs/src/lib/source.ts +++ b/docs/src/lib/source.ts @@ -34,9 +34,7 @@ export async function getApiVersions( ...versions.filter((v) => v.label === 'Current'), ...versions .filter((v) => v.label !== 'Current') - .sort((a, b) => - b.label.localeCompare(a.label, undefined, { numeric: true }) - ), + .sort((a, b) => b.label.localeCompare(a.label, undefined, { numeric: true })), ]; } diff --git a/docs/src/mdx-components.tsx b/docs/src/mdx-components.tsx index 3b27cf8c..4e73fd57 100644 --- a/docs/src/mdx-components.tsx +++ b/docs/src/mdx-components.tsx @@ -3,6 +3,12 @@ import * as TabsComponents from 'fumadocs-ui/components/tabs'; import type { MDXComponents } from 'mdx/types'; import { Mermaid } from './components/mermaid'; import IOU from './components/iou'; +import { + EditableCodeBlock, + EditableValue, + EditableForm, + EditableInput, +} from './components/editable-code-block'; // use this function to get MDX components, you will need it for rendering MDX export function getMDXComponents(components?: MDXComponents): MDXComponents { @@ -10,6 +16,10 @@ export function getMDXComponents(components?: MDXComponents): MDXComponents { ...defaultMdxComponents, Mermaid, IOU, + EditableCodeBlock, + EditableValue, + EditableForm, + EditableInput, ...TabsComponents, ...components, }; diff --git a/docs/src/providers/posthog-provider.tsx b/docs/src/providers/posthog-provider.tsx new file mode 100644 index 00000000..eb9b1d9c --- /dev/null +++ b/docs/src/providers/posthog-provider.tsx @@ -0,0 +1,40 @@ +'use client'; + +import posthog from 'posthog-js'; +import { PostHogProvider } from 'posthog-js/react'; +import { useEffect } from 'react'; +import { usePathname, useSearchParams } from 'next/navigation'; + +if (typeof window !== 'undefined') { + posthog.init(process.env.NEXT_PUBLIC_POSTHOG_API_KEY!, { + api_host: '/docs/api/posthog', + ui_host: process.env.NEXT_PUBLIC_POSTHOG_HOST, + person_profiles: 'always', + capture_pageview: false, + capture_pageleave: true, + }); +} + +export function PHProvider({ children }: { children: React.ReactNode }) { + return {children}; +} + +export function PostHogPageView(): null { + const pathname = usePathname(); + const searchParams = useSearchParams(); + + useEffect(() => { + if (pathname) { + let url = window.origin + pathname; + if (searchParams && searchParams.toString()) { + url = url + `?${searchParams.toString()}`; + } + + posthog.capture('$pageview', { + $current_url: url, + }); + } + }, [pathname, searchParams]); + + return null; +} diff --git a/docs/tsconfig.json b/docs/tsconfig.json index 504b2911..8730cf88 100644 --- a/docs/tsconfig.json +++ b/docs/tsconfig.json @@ -2,11 +2,7 @@ "compilerOptions": { "baseUrl": ".", "target": "ESNext", - "lib": [ - "dom", - "dom.iterable", - "esnext" - ], + "lib": ["dom", "dom.iterable", "esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, @@ -20,12 +16,8 @@ "jsx": "preserve", "incremental": true, "paths": { - "@/.source": [ - "./.source/index.ts" - ], - "@/*": [ - "./src/*" - ] + "@/.source": ["./.source/index.ts"], + "@/*": ["./src/*"] }, "plugins": [ { @@ -33,13 +25,6 @@ } ] }, - "include": [ - "next-env.d.ts", - "**/*.ts", - "**/*.tsx", - ".next/types/**/*.ts" - ], - "exclude": [ - "node_modules" - ] -} \ No newline at end of file + "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], + "exclude": ["node_modules"] +} diff --git a/examples/agent_examples.py b/examples/agent_examples.py index 816c1851..42fa5c7f 100644 --- a/examples/agent_examples.py +++ b/examples/agent_examples.py @@ -2,16 +2,15 @@ import asyncio import logging -import traceback import signal - -from computer import Computer, VMProviderType +import traceback # Import the unified agent class and types from agent import ComputerAgent +from computer import Computer, VMProviderType # Import utility functions -from utils import load_dotenv_files, handle_sigint +from utils import handle_sigint, load_dotenv_files # Set up logging logging.basicConfig(level=logging.INFO) @@ -40,25 +39,20 @@ async def run_agent_example(): # Create ComputerAgent with new API agent = ComputerAgent( # Supported models: - # == OpenAI CUA (computer-use-preview) == model="openai/computer-use-preview", - # == Anthropic CUA (Claude > 3.5) == - # model="anthropic/claude-opus-4-20250514", + # model="anthropic/claude-opus-4-20250514", # model="anthropic/claude-sonnet-4-20250514", # model="anthropic/claude-3-7-sonnet-20250219", # model="anthropic/claude-3-5-sonnet-20241022", - # == UI-TARS == # model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", # model="mlx/mlx-community/UI-TARS-1.5-7B-6bit", # model="ollama_chat/0000/ui-tars-1.5-7b", - # == Omniparser + Any LLM == # model="omniparser+anthropic/claude-opus-4-20250514", # model="omniparser+ollama_chat/gemma3:12b-it-q4_K_M", - tools=[computer], only_n_most_recent_images=3, verbosity=logging.DEBUG, @@ -79,18 +73,18 @@ async def run_agent_example(): # Use message-based conversation history history = [] - + for i, task in enumerate(tasks): print(f"\nExecuting task {i+1}/{len(tasks)}: {task}") - + # Add user message to history history.append({"role": "user", "content": task}) - + # Run agent with conversation history async for result in agent.run(history, stream=False): # Add agent outputs to history history += result.get("output", []) - + # Print output for debugging for item in result.get("output", []): if item.get("type") == "message": @@ -104,7 +98,7 @@ async def run_agent_example(): print(f"Computer Action: {action_type}({action})") elif item.get("type") == "computer_call_output": print("Computer Output: [Screenshot/Result]") - + print(f"✅ Task {i+1}/{len(tasks)} completed: {task}") except Exception as e: diff --git a/examples/cloud_api_examples.py b/examples/cloud_api_examples.py new file mode 100644 index 00000000..dd7d95ee --- /dev/null +++ b/examples/cloud_api_examples.py @@ -0,0 +1,73 @@ +import asyncio +import os + +from utils import load_dotenv_files + +load_dotenv_files() + +from computer.providers.cloud.provider import CloudProvider + + +async def main() -> None: + api_key = os.getenv("CUA_API_KEY") + if not api_key: + raise RuntimeError("CUA_API_KEY environment variable is not set") + api_base = os.getenv("CUA_API_BASE") + if api_base: + print(f"Using API base: {api_base}") + + provider = CloudProvider(api_key=api_key, verbose=True) + async with provider: + + # List all VMs + vms = await provider.list_vms() + print(f"Found {len(vms)} VM(s)") + for vm in vms: + print( + f"name: {vm['name']}\n", + f"status: {vm['status']}\n", # pending, running, stopped, terminated, failed + f"api_url: {vm.get('api_url')}\n", + f"vnc_url: {vm.get('vnc_url')}\n", + ) + + # # --- Additional operations (commented out) --- + # # To stop a VM by name: + # name = "m-linux-96lcxd2c2k" + # resp = await provider.stop_vm(name) + # print( + # "stop_vm response:\n", + # f"name: {resp['name']}\n", + # f"status: {resp['status']}\n", # stopping + # ) + + # # To start a VM by name: + # name = "m-linux-96lcxd2c2k" + # resp = await provider.run_vm(name) + # print( + # "run_vm response:\n", + # f"name: {resp['name']}\n", + # f"status: {resp['status']}\n", # starting + # ) + + # # To restart a VM by name: + # name = "m-linux-96lcxd2c2k" + # resp = await provider.restart_vm(name) + # print( + # "restart_vm response:\n", + # f"name: {resp['name']}\n", + # f"status: {resp['status']}\n", # restarting + # ) + + # # To probe a VM's status via its public hostname (if you know the name): + # name = "m-linux-96lcxd2c2k" + # info = await provider.get_vm(name) + # print("get_vm info:\n", + # f"name: {info['name']}\n", + # f"status: {info['status']}\n", # running + # f"api_url: {info.get('api_url')}\n", + # f"os_type: {info.get('os_type')}\n", + # ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/computer-example-ts/.prettierrc b/examples/computer-example-ts/.prettierrc deleted file mode 100644 index 23eaef29..00000000 --- a/examples/computer-example-ts/.prettierrc +++ /dev/null @@ -1,7 +0,0 @@ -{ - "useTabs": false, - "semi": true, - "singleQuote": true, - "trailingComma": "es5", - "bracketSpacing": true -} \ No newline at end of file diff --git a/examples/computer-example-ts/README.md b/examples/computer-example-ts/README.md index 500362c9..7e7fc81e 100644 --- a/examples/computer-example-ts/README.md +++ b/examples/computer-example-ts/README.md @@ -1,13 +1,13 @@ # cua-cloud-openai Example -This example demonstrates how to control a cua Cloud container using the OpenAI `computer-use-preview` model and the `@trycua/computer` TypeScript library. +This example demonstrates how to control a Cua Cloud Sandbox using the OpenAI `computer-use-preview` model and the `@trycua/computer` TypeScript library. ## Overview -- Connects to a cua Cloud container via the `@trycua/computer` library +- Connects to a Cua Cloud Sandbox via the `@trycua/computer` library - Sends screenshots and instructions to OpenAI's computer-use model -- Executes AI-generated actions (clicks, typing, etc.) inside the container -- Designed for Linux containers, but can be adapted for other OS types +- Executes AI-generated actions (clicks, typing, etc.) inside the sandbox +- Designed for Linux sandboxes, but can be adapted for other OS types ## Getting Started @@ -20,8 +20,8 @@ This example demonstrates how to control a cua Cloud container using the OpenAI 2. **Set up environment variables:** Create a `.env` file with the following variables: - `OPENAI_API_KEY` — your OpenAI API key - - `CUA_API_KEY` — your cua Cloud API key - - `CUA_CONTAINER_NAME` — the name of your provisioned container + - `CUA_API_KEY` — your Cua Cloud API key + - `CUA_CONTAINER_NAME` — the name of your provisioned sandbox 3. **Run the example:** @@ -38,7 +38,7 @@ This example demonstrates how to control a cua Cloud container using the OpenAI For a step-by-step tutorial and more detailed explanation, see the accompanying blog post: -➡️ [Controlling a cua Cloud Container with JavaScript](https://placeholder-url-to-blog-post.com) +➡️ [Controlling a Cua Cloud Sandbox with JavaScript](https://placeholder-url-to-blog-post.com) _(This link will be updated once the article is published.)_ diff --git a/examples/computer-example-ts/package.json b/examples/computer-example-ts/package.json index 65210e18..afa90c2f 100644 --- a/examples/computer-example-ts/package.json +++ b/examples/computer-example-ts/package.json @@ -6,7 +6,9 @@ "main": "index.js", "scripts": { "dev": "tsx watch src/index.ts", - "start": "tsx src/index.ts" + "start": "tsx src/index.ts", + "format": "prettier --write .", + "format:check": "prettier --check ." }, "keywords": [], "author": "", @@ -22,4 +24,4 @@ "tsx": "^4.20.3", "typescript": "^5.8.3" } -} \ No newline at end of file +} diff --git a/examples/computer-example-ts/src/helpers.ts b/examples/computer-example-ts/src/helpers.ts index adad2347..7f914f0d 100644 --- a/examples/computer-example-ts/src/helpers.ts +++ b/examples/computer-example-ts/src/helpers.ts @@ -1,63 +1,63 @@ -import type { Computer } from "@trycua/computer"; -import type OpenAI from "openai"; +import type { Computer } from '@trycua/computer'; +import type OpenAI from 'openai'; export async function executeAction( - computer: Computer, - action: OpenAI.Responses.ResponseComputerToolCall["action"], + computer: Computer, + action: OpenAI.Responses.ResponseComputerToolCall['action'] ) { - switch (action.type) { - case "click": { - const { x, y, button } = action; - console.log(`Executing click at (${x}, ${y}) with button '${button}'.`); - await computer.interface.moveCursor(x, y); - if (button === "right") await computer.interface.rightClick(); - else await computer.interface.leftClick(); - break; - } - case "type": - { - const { text } = action; - console.log(`Typing text: ${text}`); - await computer.interface.typeText(text); - } - break; - case "scroll": { - const { x: locX, y: locY, scroll_x, scroll_y } = action; - console.log( - `Scrolling at (${locX}, ${locY}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y}).`, - ); - await computer.interface.moveCursor(locX, locY); - await computer.interface.scroll(scroll_x, scroll_y); - break; - } - case "keypress": { - const { keys } = action; - for (const key of keys) { - console.log(`Pressing key: ${key}.`); - // Map common key names to CUA equivalents - if (key.toLowerCase() === "enter") { - await computer.interface.pressKey("return"); - } else if (key.toLowerCase() === "space") { - await computer.interface.pressKey("space"); - } else { - await computer.interface.pressKey(key); - } - } - break; - } - case "wait": { - console.log(`Waiting for 3 seconds.`); - await new Promise((resolve) => setTimeout(resolve, 3 * 1000)); - break; - } - case "screenshot": { - console.log("Taking screenshot."); - // This is handled automatically in the main loop, but we can take an extra one if requested - const screenshot = await computer.interface.screenshot(); - return screenshot; - } - default: - console.log(`Unrecognized action: ${action.type}`); - break; - } + switch (action.type) { + case 'click': { + const { x, y, button } = action; + console.log(`Executing click at (${x}, ${y}) with button '${button}'.`); + await computer.interface.moveCursor(x, y); + if (button === 'right') await computer.interface.rightClick(); + else await computer.interface.leftClick(); + break; + } + case 'type': + { + const { text } = action; + console.log(`Typing text: ${text}`); + await computer.interface.typeText(text); + } + break; + case 'scroll': { + const { x: locX, y: locY, scroll_x, scroll_y } = action; + console.log( + `Scrolling at (${locX}, ${locY}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y}).` + ); + await computer.interface.moveCursor(locX, locY); + await computer.interface.scroll(scroll_x, scroll_y); + break; + } + case 'keypress': { + const { keys } = action; + for (const key of keys) { + console.log(`Pressing key: ${key}.`); + // Map common key names to CUA equivalents + if (key.toLowerCase() === 'enter') { + await computer.interface.pressKey('return'); + } else if (key.toLowerCase() === 'space') { + await computer.interface.pressKey('space'); + } else { + await computer.interface.pressKey(key); + } + } + break; + } + case 'wait': { + console.log(`Waiting for 3 seconds.`); + await new Promise((resolve) => setTimeout(resolve, 3 * 1000)); + break; + } + case 'screenshot': { + console.log('Taking screenshot.'); + // This is handled automatically in the main loop, but we can take an extra one if requested + const screenshot = await computer.interface.screenshot(); + return screenshot; + } + default: + console.log(`Unrecognized action: ${action.type}`); + break; + } } diff --git a/examples/computer-example-ts/src/index.ts b/examples/computer-example-ts/src/index.ts index d58a9843..29ff926d 100644 --- a/examples/computer-example-ts/src/index.ts +++ b/examples/computer-example-ts/src/index.ts @@ -1,104 +1,103 @@ -import { Computer, OSType } from "@trycua/computer"; -import OpenAI from "openai"; -import { executeAction } from "./helpers"; +import { Computer, OSType } from '@trycua/computer'; +import OpenAI from 'openai'; +import { executeAction } from './helpers'; -import "dotenv/config"; +import 'dotenv/config'; const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); -const COMPUTER_USE_PROMPT = "Open firefox and go to trycua.com"; +const COMPUTER_USE_PROMPT = 'Open firefox and go to trycua.com'; // Initialize the Computer Connection const computer = new Computer({ - apiKey: process.env.CUA_API_KEY!, - name: process.env.CUA_CONTAINER_NAME!, - osType: OSType.LINUX, + apiKey: process.env.CUA_API_KEY!, + name: process.env.CUA_CONTAINER_NAME!, + osType: OSType.LINUX, }); await computer.run(); // Take the initial screenshot const screenshot = await computer.interface.screenshot(); -const screenshotBase64 = screenshot.toString("base64"); +const screenshotBase64 = screenshot.toString('base64'); // Setup openai config for computer use const computerUseConfig: OpenAI.Responses.ResponseCreateParamsNonStreaming = { - model: "computer-use-preview", - tools: [ - { - type: "computer_use_preview", - display_width: 1024, - display_height: 768, - environment: "linux", // we're using a linux vm - }, - ], - truncation: "auto", + model: 'computer-use-preview', + tools: [ + { + type: 'computer_use_preview', + display_width: 1024, + display_height: 768, + environment: 'linux', // we're using a linux vm + }, + ], + truncation: 'auto', }; // Send initial screenshot to the openai computer use model let res = await openai.responses.create({ - ...computerUseConfig, - input: [ - { - role: "user", - content: [ - // what we want the ai to do - { type: "input_text", text: COMPUTER_USE_PROMPT }, - // current screenshot of the vm - { - type: "input_image", - image_url: `data:image/png;base64,${screenshotBase64}`, - detail: "auto", - }, - ], - }, - ], + ...computerUseConfig, + input: [ + { + role: 'user', + content: [ + // what we want the ai to do + { type: 'input_text', text: COMPUTER_USE_PROMPT }, + // current screenshot of the vm + { + type: 'input_image', + image_url: `data:image/png;base64,${screenshotBase64}`, + detail: 'auto', + }, + ], + }, + ], }); // Loop until there are no more computer use actions. while (true) { - const computerCalls = res.output.filter((o) => o.type === "computer_call"); - if (computerCalls.length < 1) { - console.log("No more computer calls. Loop complete."); - break; - } - // Get the first call - const call = computerCalls[0]; - const action = call.action; - console.log("Received action from OpenAI Responses API:", action); - let ackChecks: OpenAI.Responses.ResponseComputerToolCall.PendingSafetyCheck[] = - []; - if (call.pending_safety_checks.length > 0) { - console.log("Safety checks pending:", call.pending_safety_checks); - // In a real implementation, you would want to get user confirmation here - ackChecks = call.pending_safety_checks; - } + const computerCalls = res.output.filter((o) => o.type === 'computer_call'); + if (computerCalls.length < 1) { + console.log('No more computer calls. Loop complete.'); + break; + } + // Get the first call + const call = computerCalls[0]; + const action = call.action; + console.log('Received action from OpenAI Responses API:', action); + let ackChecks: OpenAI.Responses.ResponseComputerToolCall.PendingSafetyCheck[] = []; + if (call.pending_safety_checks.length > 0) { + console.log('Safety checks pending:', call.pending_safety_checks); + // In a real implementation, you would want to get user confirmation here + ackChecks = call.pending_safety_checks; + } - // Execute the action in the container - await executeAction(computer, action); - // Wait for changes to process within the container (1sec) - await new Promise((resolve) => setTimeout(resolve, 1000)); + // Execute the action in the container + await executeAction(computer, action); + // Wait for changes to process within the container (1sec) + await new Promise((resolve) => setTimeout(resolve, 1000)); - // Capture new screenshot - const newScreenshot = await computer.interface.screenshot(); - const newScreenshotBase64 = newScreenshot.toString("base64"); + // Capture new screenshot + const newScreenshot = await computer.interface.screenshot(); + const newScreenshotBase64 = newScreenshot.toString('base64'); - // Screenshot back as computer_call_output + // Screenshot back as computer_call_output - res = await openai.responses.create({ - ...computerUseConfig, - previous_response_id: res.id, - input: [ - { - type: "computer_call_output", - call_id: call.call_id, - acknowledged_safety_checks: ackChecks, - output: { - type: "computer_screenshot", - image_url: `data:image/png;base64,${newScreenshotBase64}`, - }, - }, - ], - }); + res = await openai.responses.create({ + ...computerUseConfig, + previous_response_id: res.id, + input: [ + { + type: 'computer_call_output', + call_id: call.call_id, + acknowledged_safety_checks: ackChecks, + output: { + type: 'computer_screenshot', + image_url: `data:image/png;base64,${newScreenshotBase64}`, + }, + }, + ], + }); } process.exit(); diff --git a/examples/computer-example-ts/tsconfig.json b/examples/computer-example-ts/tsconfig.json index c606e279..9d3e969a 100644 --- a/examples/computer-example-ts/tsconfig.json +++ b/examples/computer-example-ts/tsconfig.json @@ -1,17 +1,13 @@ { "compilerOptions": { "target": "esnext", - "lib": [ - "es2023" - ], + "lib": ["es2023"], "moduleDetection": "force", "module": "preserve", "moduleResolution": "bundler", "allowImportingTsExtensions": true, "resolveJsonModule": true, - "types": [ - "node" - ], + "types": ["node"], "allowSyntheticDefaultImports": true, "strict": true, "noUnusedLocals": true, @@ -21,9 +17,7 @@ "isolatedModules": true, "verbatimModuleSyntax": true, "skipLibCheck": true, - "outDir": "build", + "outDir": "build" }, - "include": [ - "src" - ] -} \ No newline at end of file + "include": ["src"] +} diff --git a/examples/computer_examples.py b/examples/computer_examples.py index bb9e7ad9..7e990e8b 100644 --- a/examples/computer_examples.py +++ b/examples/computer_examples.py @@ -1,8 +1,8 @@ -import os import asyncio -from pathlib import Path +import os import sys import traceback +from pathlib import Path # Load environment variables from .env file project_root = Path(__file__).parent.parent @@ -20,8 +20,9 @@ for path in pythonpath.split(":"): print(f"Added to sys.path: {path}") from computer.computer import Computer -from computer.providers.base import VMProviderType from computer.logger import LogLevel +from computer.providers.base import VMProviderType + async def main(): try: @@ -29,17 +30,15 @@ async def main(): # Create a local macOS computer computer = Computer( - display="1024x768", - memory="8GB", - cpu="4", + display="1024x768", + memory="8GB", + cpu="4", os_type="macos", name="macos", verbosity=LogLevel.VERBOSE, provider_type=VMProviderType.LUME, storage="/Users//repos/trycua/computer/examples/storage", - shared_directories=[ - "/Users//repos/trycua/computer/examples/shared" - ], + shared_directories=["/Users//repos/trycua/computer/examples/shared"], ephemeral=False, ) @@ -50,22 +49,22 @@ async def main(): # name=os.getenv("CONTAINER_NAME"), # provider_type=VMProviderType.CLOUD, # ) - + try: # Run the computer with default parameters await computer.run() - + screenshot = await computer.interface.screenshot() - + # Create output directory if it doesn't exist output_dir = Path("./output") output_dir.mkdir(exist_ok=True) - + screenshot_path = output_dir / "screenshot.png" with open(screenshot_path, "wb") as f: f.write(screenshot) print(f"Screenshot saved to: {screenshot_path.absolute()}") - + # await computer.interface.hotkey("command", "space") # res = await computer.interface.run_command("touch ./Downloads/empty_file") diff --git a/examples/computer_examples_windows.py b/examples/computer_examples_windows.py new file mode 100644 index 00000000..19d228bf --- /dev/null +++ b/examples/computer_examples_windows.py @@ -0,0 +1,145 @@ +import asyncio +import os +import sys +import traceback +from pathlib import Path + +# Load environment variables from .env file +project_root = Path(__file__).parent.parent +env_file = project_root / ".env" +print(f"Loading environment from: {env_file}") +from computer.helpers import sandboxed +from dotenv import load_dotenv + +load_dotenv(env_file) + +# Add paths to sys.path if needed +pythonpath = os.environ.get("PYTHONPATH", "") +for path in pythonpath.split(":"): + if path and path not in sys.path: + sys.path.insert(0, path) # Insert at beginning to prioritize + print(f"Added to sys.path: {path}") + +from computer.computer import Computer +from computer.logger import LogLevel +from computer.providers.base import VMProviderType + +# ANSI color codes +RED = "\033[91m" +RESET = "\033[0m" + + +async def main(): + try: + print("\n=== Using direct initialization ===") + + # Create a remote Windows computer with Cua + computer = Computer( + os_type="windows", + api_key=os.getenv("CUA_API_KEY"), + name=os.getenv("CONTAINER_NAME") or "", + provider_type=VMProviderType.CLOUD, + ) + + try: + # Run the computer with default parameters + await computer.run() + + # Create output directory if it doesn't exist + output_dir = Path("./output") + output_dir.mkdir(exist_ok=True) + + # Keyboard Actions Examples + print("\n=== Keyboard Actions ===") + await computer.interface.type_text("Hello, World!") + await computer.interface.press_key("enter") + + # Mouse Actions Examples + print("\n=== Mouse Actions ===") + await computer.interface.move_cursor(100, 100) + await computer.interface.left_click() + await computer.interface.double_click(400, 400) + await computer.interface.right_click(300, 300) + + print("\n=== RPC ===") + await computer.venv_install("demo_venv", ["mss"]) + + @sandboxed("demo_venv") + def greet_and_print(name): + import os + + from mss import mss + + # get username + username = os.getlogin() + print(f"Hello from inside the container, {name}!") + print("Username:", username) + print("Screens:", mss().monitors) + + # take a screenshot + with mss() as sct: + filename = sct.shot(mon=-1, output="C:/Users/azureuser/Desktop/fullscreen.png") + print(filename) + + return {"greeted": name, "username": username} + + # Call with args and kwargs + result = await greet_and_print("John Doe") + print("Result from sandboxed function:", result) + + # Command Actions Examples + print("\n=== Command Actions ===") + result = await computer.interface.run_command("notepad") + print("Result from command:", result) + + screenshot = await computer.interface.screenshot() + screenshot_path = output_dir / "screenshot.png" + with open(screenshot_path, "wb") as f: + f.write(screenshot) + print(f"Screenshot saved to: {screenshot_path.absolute()}") + + # Clipboard Actions Examples + print("\n=== Clipboard Actions ===") + await computer.interface.set_clipboard("Test clipboard") + content = await computer.interface.copy_to_clipboard() + print(f"Clipboard content: {content}") + + # Simple REPL Loop + print("\n=== Command REPL ===") + print("Enter commands to run on the remote computer.") + print("Type 'exit' or 'quit' to leave the REPL.\n") + + while True: + try: + # Get command from user + command = input("command> ").strip() + + # Check for exit commands + if command.lower() in ["exit", "quit", ""]: + if command.lower() in ["exit", "quit"]: + print("Exiting REPL...") + break + + # Run the command + result = await computer.interface.run_command(command) + + print(result.stdout) + if result.stderr: + print(f"{RED}{result.stderr}{RESET}") + except KeyboardInterrupt: + print("\nExiting REPL...") + break + except Exception as e: + print(f"{RED}Error running command: {e}{RESET}") + + finally: + # Important to clean up resources + # await computer.stop() + pass + except Exception as e: + print(f"Error in main: {e}") + traceback.print_exc() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/computer_ui_examples.py b/examples/computer_ui_examples.py index 0c1d0974..2151b9d8 100644 --- a/examples/computer_ui_examples.py +++ b/examples/computer_ui_examples.py @@ -23,9 +23,9 @@ if __name__ == "__main__": server_name="0.0.0.0", server_port=7860, ) - + # Optional: Using the saved dataset # import datasets # from computer.ui.utils import convert_to_unsloth # ds = datasets.load_dataset("ddupont/highquality-cua-demonstrations") - # ds = convert_to_unsloth(ds) \ No newline at end of file + # ds = convert_to_unsloth(ds) diff --git a/examples/docker_examples.py b/examples/docker_examples.py index 42dcf3ac..e84aa747 100644 --- a/examples/docker_examples.py +++ b/examples/docker_examples.py @@ -1,8 +1,10 @@ import asyncio -from computer.providers.factory import VMProviderFactory -from computer import Computer, VMProviderType import os +from computer import Computer, VMProviderType +from computer.providers.factory import VMProviderFactory + + async def main(): # # Create docker provider # provider = VMProviderFactory.create_provider( @@ -39,5 +41,6 @@ async def main(): with open("screenshot_docker.png", "wb") as f: f.write(screenshot) + if __name__ == "__main__": asyncio.run(main()) diff --git a/examples/evals/hud_eval_examples.py b/examples/evals/hud_eval_examples.py index 86da50df..f49d7b84 100644 --- a/examples/evals/hud_eval_examples.py +++ b/examples/evals/hud_eval_examples.py @@ -6,7 +6,7 @@ hud_eval_examples.py — minimal HUD evaluation runner - No Docker/local computer usage """ -#imports +# imports import asyncio import logging import os @@ -14,13 +14,15 @@ import uuid from pathlib import Path from pprint import pprint -from dotenv import load_dotenv, find_dotenv from agent import ComputerAgent from agent.integrations.hud import run_full_dataset +from dotenv import find_dotenv, load_dotenv """ Loading env """ + + def load_env_or_fail() -> None: # Walk up from CWD / file dir to find nearest .env env_path = find_dotenv(usecwd=False) @@ -32,17 +34,19 @@ def load_env_or_fail() -> None: if not os.getenv("HUD_API_KEY"): raise EnvironmentError("❌ HUD_API_KEY is missing in the loaded environment") + """ Build Agent Config - customize agent behavior, tool integration, callbacks, resource management, and more - https://docs.trycua.com/docs/agent-sdk/agent-loops#parameters - https://docs.trycua.com/docs/agent-sdk/supported-model-providers """ + + def build_agent_config() -> dict: instruction = "You are a computer-using agent graded by deterministic checkers." - return { "model": "openai/computer-use-preview", "trajectory_dir": str(Path("trajectories")), @@ -51,21 +55,25 @@ def build_agent_config() -> dict: "instruction": instruction, } + """ Hud Eval """ + + async def run_hud_eval() -> None: - #load env and agent config + # load env and agent config load_env_or_fail() agent_config = build_agent_config() # Initialize to ensure config is valid (tools, verbosity, etc.) _ = ComputerAgent(**agent_config) - job_name = f"osworld-test-{str(uuid.uuid4())[:4]}" #job name (each run of your task is a job on hud) + job_name = ( + f"osworld-test-{str(uuid.uuid4())[:4]}" # job name (each run of your task is a job on hud) + ) print(f"🚀 Running HUD eval: {job_name}") - """ Customize your hud eval below, check the doc for additional params - https://docs.trycua.com/docs/agent-sdk/integrations/hud#parameters-1 diff --git a/examples/pylume_examples.py b/examples/pylume_examples.py index 37dead88..35e89ed6 100644 --- a/examples/pylume_examples.py +++ b/examples/pylume_examples.py @@ -1,5 +1,6 @@ import asyncio -from pylume import PyLume, ImageRef, VMRunOpts, SharedDirectory, VMConfig, VMUpdateOpts + +from pylume import ImageRef, PyLume, SharedDirectory, VMConfig, VMRunOpts, VMUpdateOpts async def main(): diff --git a/examples/sandboxed_functions_examples.py b/examples/sandboxed_functions_examples.py index 93d93021..a983f95d 100644 --- a/examples/sandboxed_functions_examples.py +++ b/examples/sandboxed_functions_examples.py @@ -1,6 +1,6 @@ -from pathlib import Path import os import sys +from pathlib import Path # Load environment variables from .env file project_root = Path(__file__).parent.parent @@ -18,14 +18,16 @@ for path in pythonpath.split(":"): print(f"Added to sys.path: {path}") import asyncio + from computer.computer import Computer from computer.helpers import sandboxed + async def main(): # Initialize the computer in a Cua Container computer = Computer() await computer.run() - + # Install a package in a virtual environment in the container await computer.venv_install("demo_venv", ["requests", "macos-pyxa"]) @@ -39,6 +41,7 @@ async def main(): def greet_and_print(name): # get .html of the current Safari tab import PyXA + safari = PyXA.Application("Safari") current_doc = safari.current_document html = current_doc.source() @@ -50,5 +53,6 @@ async def main(): result = await greet_and_print("Cua") print("Result from sandboxed function:", result) + if __name__ == "__main__": asyncio.run(main()) diff --git a/examples/som_examples.py b/examples/som_examples.py index 4dc3e38b..5cb4567f 100644 --- a/examples/som_examples.py +++ b/examples/som_examples.py @@ -9,17 +9,18 @@ This script shows how to: """ import argparse -import logging -import sys -from pathlib import Path -import time -from PIL import Image -from typing import Dict, Any, List, Optional -import numpy as np -import io import base64 import glob +import io +import logging import os +import sys +import time +from pathlib import Path +from typing import Any, Dict, List, Optional + +import numpy as np +from PIL import Image # Load environment variables from .env file project_root = Path(__file__).parent.parent @@ -42,8 +43,8 @@ if str(libs_path) not in sys.path: sys.path.append(str(libs_path)) print(f"Added to sys.path: {libs_path}") -from som import OmniParser, ParseResult, IconElement, TextElement -from som.models import UIElement, ParserMetadata, BoundingBox +from som import IconElement, OmniParser, ParseResult, TextElement +from som.models import BoundingBox, ParserMetadata, UIElement # Configure logging logging.basicConfig( @@ -361,7 +362,7 @@ def run_experiments(input_path: str, output_dir: Path, use_ocr: bool = False): # Update timing totals total_time += t.elapsed_time - + # Write summary for this combination avg_time = total_time / len(image_files) f.write( diff --git a/examples/utils.py b/examples/utils.py index 759b0b6e..7ad94e5c 100644 --- a/examples/utils.py +++ b/examples/utils.py @@ -1,8 +1,8 @@ """Utility functions for example scripts.""" import os -import sys import signal +import sys from pathlib import Path from typing import Optional diff --git a/examples/winsandbox_example.py b/examples/winsandbox_example.py index 9cf1269a..98fa0f44 100644 --- a/examples/winsandbox_example.py +++ b/examples/winsandbox_example.py @@ -4,11 +4,13 @@ Learn more at: https://learn.microsoft.com/en-us/windows/security/application-se """ import asyncio + from computer import Computer + async def main(): """Test the Windows Sandbox provider.""" - + # Create a computer instance using Windows Sandbox computer = Computer( provider_type="winsandbox", @@ -16,36 +18,38 @@ async def main(): memory="4GB", # ephemeral=True, # Always true for Windows Sandbox ) - + try: print("Starting Windows Sandbox...") await computer.run() - + print("Windows Sandbox is ready!") print(f"IP Address: {await computer.get_ip()}") - + # Test basic functionality print("Testing basic functionality...") screenshot = await computer.interface.screenshot() print(f"Screenshot taken: {len(screenshot)} bytes") - + # Test running a command print("Testing command execution...") - stdout, stderr = await computer.interface.run_command("echo Hello from Windows Sandbox!") - print(f"Command output: {stdout}") + result = await computer.interface.run_command("echo Hello from Windows Sandbox!") + print(f"Command output: {result.stdout}") print("Press any key to continue...") input() - + except Exception as e: print(f"Error: {e}") import traceback + traceback.print_exc() - + finally: print("Stopping Windows Sandbox...") await computer.stop() print("Windows Sandbox stopped.") + if __name__ == "__main__": asyncio.run(main()) diff --git a/libs/kasm/Dockerfile b/libs/kasm/Dockerfile index 526d2a7f..89d447cf 100644 --- a/libs/kasm/Dockerfile +++ b/libs/kasm/Dockerfile @@ -32,6 +32,11 @@ RUN sed -i 's/-sslOnly//g' /dockerstartup/vnc_startup.sh RUN echo "/usr/bin/python3 -m computer_server" > $STARTUPDIR/custom_startup.sh \ && chmod +x $STARTUPDIR/custom_startup.sh +# Enable sudo support for kasm-user +RUN echo "kasm-user:password" | chpasswd +RUN usermod -aG sudo kasm-user +RUN echo "kasm-user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + ######### End Customizations ########### RUN chown 1000:0 $HOME diff --git a/libs/kasm/README.md b/libs/kasm/README.md index 2a383535..00c02019 100644 --- a/libs/kasm/README.md +++ b/libs/kasm/README.md @@ -13,23 +13,20 @@ Containerized virtual desktop for Computer-Using Agents (CUA). Utilizes Kasm's M ## Usage -### Building the Container +### Build and Push (multi-arch) + +Use Docker Buildx to build and push a multi-architecture image for both `linux/amd64` and `linux/arm64` in a single command. Replace `trycua` with your Docker Hub username or your registry namespace as needed. ```bash -docker build -t cua-ubuntu:latest . -``` - -### Pushing to Registry - -```bash -# Tag for Docker Hub (replace 'trycua' with your Docker Hub username) -docker tag cua-ubuntu:latest trycua/cua-ubuntu:latest - -# Login to Docker Hub +# Login to your registry first (Docker Hub shown here) docker login -# Push to Docker Hub -docker push trycua/cua-ubuntu:latest +# Build and push for amd64 and arm64 in one step +docker buildx build \ + --platform linux/amd64,linux/arm64 \ + -t trycua/cua-ubuntu:latest \ + --push \ + . ``` ### Running the Container Manually @@ -73,14 +70,17 @@ async with provider: ## Container Configuration ### Ports + - **6901**: VNC web interface (noVNC) - **8080**: Computer-server API endpoint ### Environment Variables + - `VNC_PW`: VNC password (default: "password") - `DISPLAY`: X11 display (set to ":0") ### Volumes + - `/home/kasm-user/storage`: Persistent storage mount point - `/home/kasm-user/shared`: Shared folder mount point diff --git a/libs/lume/CONTRIBUTING.md b/libs/lume/CONTRIBUTING.md index 6c51a416..edbe6c8a 100644 --- a/libs/lume/CONTRIBUTING.md +++ b/libs/lume/CONTRIBUTING.md @@ -29,6 +29,7 @@ We're always looking for suggestions to make lume better. If you have an idea: ## Documentation Documentation improvements are always welcome. You can: + - Fix typos or unclear explanations - Add examples and use cases - Improve API documentation @@ -36,4 +37,4 @@ Documentation improvements are always welcome. You can: For detailed instructions on setting up your development environment and submitting code contributions, please see our [Development.md](docs/Development.md) guide. -Feel free to join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas or get help with your contributions. \ No newline at end of file +Feel free to join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas or get help with your contributions. diff --git a/libs/lume/Development.md b/libs/lume/Development.md index 0ddf8c5e..f991a49e 100644 --- a/libs/lume/Development.md +++ b/libs/lume/Development.md @@ -5,6 +5,7 @@ This guide will help you set up your development environment and understand the ## Environment Setup Lume development requires: + - Swift 6 or higher - Xcode 15 or higher - macOS Sequoia 15.2 or higher @@ -16,12 +17,13 @@ If you're working on Lume in the context of the Cua monorepo, we recommend using # Open VS Code workspace from the root of the monorepo code .vscode/lume.code-workspace ``` + This workspace is preconfigured with Swift language support, build tasks, and debug configurations. ## Setting Up the Repository Locally 1. **Fork the Repository**: Create your own fork of lume -2. **Clone the Repository**: +2. **Clone the Repository**: ```bash git clone https://github.com/trycua/lume.git cd lume diff --git a/libs/lume/README.md b/libs/lume/README.md index c90c250a..1ea563c9 100644 --- a/libs/lume/README.md +++ b/libs/lume/README.md @@ -8,13 +8,13 @@
- [![Swift 6](https://img.shields.io/badge/Swift_6-F54A2A?logo=swift&logoColor=white&labelColor=F54A2A)](#) - [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) - [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +[![Swift 6](https://img.shields.io/badge/Swift_6-F54A2A?logo=swift&logoColor=white&labelColor=F54A2A)](#) +[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) +[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +
- **lume** is a lightweight Command Line Interface and local API server to create, run and manage macOS and Linux virtual machines (VMs) with near-native performance on Apple Silicon, using Apple's `Virtualization.Framework`. ### Run prebuilt macOS images in just 1 step @@ -43,6 +43,7 @@ All prebuilt images use the default password `lume`. Change this immediately aft **System Requirements**: + - Apple Silicon Mac (M1, M2, M3, etc.) - macOS 13.0 or later - At least 8GB of RAM (16GB recommended) diff --git a/libs/lume/src/Server/Server.swift b/libs/lume/src/Server/Server.swift index 6f279a42..37d32484 100644 --- a/libs/lume/src/Server/Server.swift +++ b/libs/lume/src/Server/Server.swift @@ -294,7 +294,7 @@ final class Server { return try await self.handleSetDefaultLocation(name) }), Route( - method: "POST", path: "/vms/push", + method: "POST", path: "/lume/vms/push", handler: { [weak self] request in guard let self else { throw HTTPError.internalError } return try await self.handlePush(request.body) diff --git a/libs/lume/tests/VM/VMDetailsPrinterTests.swift b/libs/lume/tests/VM/VMDetailsPrinterTests.swift index 42de5f9f..05e0ac5c 100644 --- a/libs/lume/tests/VM/VMDetailsPrinterTests.swift +++ b/libs/lume/tests/VM/VMDetailsPrinterTests.swift @@ -76,12 +76,13 @@ struct VMDetailsPrinterTests { let headerParts = printedLines[0].split(whereSeparator: \.isWhitespace) #expect( headerParts == [ - "name", "os", "cpu", "memory", "disk", "display", "status", "storage", "ip", "vnc", + "name", "os", "cpu", "memory", "disk", "display", "status", "storage", "shared_dirs", "ip", "vnc", ]) #expect( printedLines[1].split(whereSeparator: \.isWhitespace).map(String.init) == [ "name", "os", "2", "0.00G", "24.0B/30.0B", "1024x768", "status", "mockLocation", + "-", "0.0.0.0", "vncUrl", ]) diff --git a/libs/lumier/README.md b/libs/lumier/README.md index 92cfc559..61825867 100644 --- a/libs/lumier/README.md +++ b/libs/lumier/README.md @@ -8,9 +8,10 @@
- [![Swift 6](https://img.shields.io/badge/Swift_6-F54A2A?logo=swift&logoColor=white&labelColor=F54A2A)](#) - [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) - [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +[![Swift 6](https://img.shields.io/badge/Swift_6-F54A2A?logo=swift&logoColor=white&labelColor=F54A2A)](#) +[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) +[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +
@@ -21,6 +22,7 @@ macOS and Linux virtual machines in a Docker container.
## What is Lumier? + **Lumier** is an interface for running macOS virtual machines with minimal setup. It uses Docker as a packaging system to deliver a pre-configured environment that connects to the `lume` virtualization service running on your host machine. With Lumier, you get: - A ready-to-use macOS or Linux virtual machine in minutes @@ -35,6 +37,7 @@ Before using Lumier, make sure you have: 1. **Docker for Apple Silicon** - download it [here](https://desktop.docker.com/mac/main/arm64/Docker.dmg) and follow the installation instructions. 2. **Lume** - This is the virtualization CLI that powers Lumier. Install it with this command: + ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" ``` @@ -69,6 +72,7 @@ After running the command above, you can access your macOS VM through a web brow This project was inspired by [dockur/windows](https://github.com/dockur/windows) and [dockur/macos](https://github.com/dockur/macos), which pioneered the approach of running Windows and macOS VMs in Docker containers. Main differences with dockur/macos: + - Lumier is specifically designed for macOS virtualization - Lumier supports Apple Silicon (M1/M2/M3/M4) while dockur/macos only supports Intel - Lumier uses the Apple Virtualization Framework (Vz) through the `lume` CLI to create true virtual machines, while dockur relies on KVM. diff --git a/libs/python/agent/.bumpversion.cfg b/libs/python/agent/.bumpversion.cfg new file mode 100644 index 00000000..b6bb6583 --- /dev/null +++ b/libs/python/agent/.bumpversion.cfg @@ -0,0 +1,10 @@ +[bumpversion] +current_version = 0.4.35 +commit = True +tag = True +tag_name = agent-v{new_version} +message = Bump cua-agent to v{new_version} + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" diff --git a/libs/python/agent/README.md b/libs/python/agent/README.md index ed90076b..69adc242 100644 --- a/libs/python/agent/README.md +++ b/libs/python/agent/README.md @@ -8,10 +8,11 @@ - [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) - [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) - [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) - [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/) +[![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) +[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) +[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +[![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/) + @@ -47,7 +48,7 @@ async def main(): name=os.getenv("CUA_CONTAINER_NAME"), api_key=os.getenv("CUA_API_KEY") ) as computer: - + # Create agent agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", @@ -56,10 +57,10 @@ async def main(): trajectory_dir="trajectories", max_trajectory_budget=5.0 # $5 budget limit ) - + # Run agent messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}] - + async for result in agent.run(messages): for item in result["output"]: if item["type"] == "message": @@ -84,4 +85,4 @@ if __name__ == "__main__": ## License -MIT License - see LICENSE file for details. \ No newline at end of file +MIT License - see LICENSE file for details. diff --git a/libs/python/agent/agent/__init__.py b/libs/python/agent/agent/__init__.py index f48297c4..12460458 100644 --- a/libs/python/agent/agent/__init__.py +++ b/libs/python/agent/agent/__init__.py @@ -5,19 +5,13 @@ agent - Decorator-based Computer Use Agent with liteLLM integration import logging import sys -from .decorators import register_agent -from .agent import ComputerAgent -from .types import Messages, AgentResponse - # Import loops to register them from . import loops +from .agent import ComputerAgent +from .decorators import register_agent +from .types import AgentResponse, Messages -__all__ = [ - "register_agent", - "ComputerAgent", - "Messages", - "AgentResponse" -] +__all__ = ["register_agent", "ComputerAgent", "Messages", "AgentResponse"] __version__ = "0.4.0" diff --git a/libs/python/agent/agent/__main__.py b/libs/python/agent/agent/__main__.py index 1b4d6697..96ff779e 100644 --- a/libs/python/agent/agent/__main__.py +++ b/libs/python/agent/agent/__main__.py @@ -5,8 +5,9 @@ Usage: python -m agent.cli """ -import sys import asyncio +import sys + from .cli import main if __name__ == "__main__": diff --git a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py index 3ecba641..63e860e1 100644 --- a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py +++ b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py @@ -2,27 +2,30 @@ import asyncio import functools import warnings from concurrent.futures import ThreadPoolExecutor -from typing import Iterator, AsyncIterator, Dict, List, Any, Optional -from litellm.types.utils import GenericStreamingChunk, ModelResponse +from typing import Any, AsyncIterator, Dict, Iterator, List, Optional + +from litellm import acompletion, completion from litellm.llms.custom_llm import CustomLLM -from litellm import completion, acompletion +from litellm.types.utils import GenericStreamingChunk, ModelResponse # Try to import HuggingFace dependencies try: import torch from transformers import AutoModelForImageTextToText, AutoProcessor + HF_AVAILABLE = True except ImportError: HF_AVAILABLE = False from .models import load_model as load_model_handler + class HuggingFaceLocalAdapter(CustomLLM): """HuggingFace Local Adapter for running vision-language models locally.""" - + def __init__(self, device: str = "auto", trust_remote_code: bool = False, **kwargs): """Initialize the adapter. - + Args: device: Device to load model on ("auto", "cuda", "cpu", etc.) trust_remote_code: Whether to trust remote code @@ -34,129 +37,120 @@ class HuggingFaceLocalAdapter(CustomLLM): # Cache for model handlers keyed by model_name self._handlers: Dict[str, Any] = {} self._executor = ThreadPoolExecutor(max_workers=1) # Single thread pool - + def _get_handler(self, model_name: str): """Get or create a model handler for the given model name.""" if model_name not in self._handlers: - self._handlers[model_name] = load_model_handler(model_name=model_name, device=self.device, trust_remote_code=self.trust_remote_code) + self._handlers[model_name] = load_model_handler( + model_name=model_name, device=self.device, trust_remote_code=self.trust_remote_code + ) return self._handlers[model_name] - + def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Convert OpenAI format messages to HuggingFace format. - + Args: messages: Messages in OpenAI format - + Returns: Messages in HuggingFace format """ converted_messages = [] - + for message in messages: - converted_message = { - "role": message["role"], - "content": [] - } - + converted_message = {"role": message["role"], "content": []} + content = message.get("content", []) if isinstance(content, str): # Simple text content - converted_message["content"].append({ - "type": "text", - "text": content - }) + converted_message["content"].append({"type": "text", "text": content}) elif isinstance(content, list): # Multi-modal content for item in content: if item.get("type") == "text": - converted_message["content"].append({ - "type": "text", - "text": item.get("text", "") - }) + converted_message["content"].append( + {"type": "text", "text": item.get("text", "")} + ) elif item.get("type") == "image_url": # Convert image_url format to image format image_url = item.get("image_url", {}).get("url", "") - converted_message["content"].append({ - "type": "image", - "image": image_url - }) - + converted_message["content"].append({"type": "image", "image": image_url}) + converted_messages.append(converted_message) - + return converted_messages - + def _generate(self, **kwargs) -> str: """Generate response using the local HuggingFace model. - + Args: **kwargs: Keyword arguments containing messages and model info - + Returns: Generated text response """ if not HF_AVAILABLE: raise ImportError( "HuggingFace transformers dependencies not found. " - "Please install with: pip install \"cua-agent[uitars-hf]\"" + 'Please install with: pip install "cua-agent[uitars-hf]"' ) - + # Extract messages and model from kwargs - messages = kwargs.get('messages', []) - model_name = kwargs.get('model', 'ByteDance-Seed/UI-TARS-1.5-7B') - max_new_tokens = kwargs.get('max_tokens', 128) - + messages = kwargs.get("messages", []) + model_name = kwargs.get("model", "ByteDance-Seed/UI-TARS-1.5-7B") + max_new_tokens = kwargs.get("max_tokens", 128) + # Warn about ignored kwargs - ignored_kwargs = set(kwargs.keys()) - {'messages', 'model', 'max_tokens'} + ignored_kwargs = set(kwargs.keys()) - {"messages", "model", "max_tokens"} if ignored_kwargs: warnings.warn(f"Ignoring unsupported kwargs: {ignored_kwargs}") - + # Convert messages to HuggingFace format hf_messages = self._convert_messages(messages) - + # Delegate to model handler handler = self._get_handler(model_name) generated_text = handler.generate(hf_messages, max_new_tokens=max_new_tokens) return generated_text - + def completion(self, *args, **kwargs) -> ModelResponse: """Synchronous completion method. - + Returns: ModelResponse with generated text """ generated_text = self._generate(**kwargs) - + return completion( model=f"huggingface-local/{kwargs['model']}", mock_response=generated_text, ) - + async def acompletion(self, *args, **kwargs) -> ModelResponse: """Asynchronous completion method. - + Returns: ModelResponse with generated text """ # Run _generate in thread pool to avoid blocking loop = asyncio.get_event_loop() generated_text = await loop.run_in_executor( - self._executor, - functools.partial(self._generate, **kwargs) + self._executor, functools.partial(self._generate, **kwargs) ) - + return await acompletion( model=f"huggingface-local/{kwargs['model']}", mock_response=generated_text, ) - + def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]: """Synchronous streaming method. - + Returns: Iterator of GenericStreamingChunk """ generated_text = self._generate(**kwargs) - + generic_streaming_chunk: GenericStreamingChunk = { "finish_reason": "stop", "index": 0, @@ -165,22 +159,21 @@ class HuggingFaceLocalAdapter(CustomLLM): "tool_use": None, "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0}, } - + yield generic_streaming_chunk - + async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]: """Asynchronous streaming method. - + Returns: AsyncIterator of GenericStreamingChunk """ # Run _generate in thread pool to avoid blocking loop = asyncio.get_event_loop() generated_text = await loop.run_in_executor( - self._executor, - functools.partial(self._generate, **kwargs) + self._executor, functools.partial(self._generate, **kwargs) ) - + generic_streaming_chunk: GenericStreamingChunk = { "finish_reason": "stop", "index": 0, @@ -189,5 +182,5 @@ class HuggingFaceLocalAdapter(CustomLLM): "tool_use": None, "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0}, } - - yield generic_streaming_chunk \ No newline at end of file + + yield generic_streaming_chunk diff --git a/libs/python/agent/agent/adapters/human_adapter.py b/libs/python/agent/agent/adapters/human_adapter.py index 0cd4fe02..e5452acd 100644 --- a/libs/python/agent/agent/adapters/human_adapter.py +++ b/libs/python/agent/agent/adapters/human_adapter.py @@ -1,22 +1,23 @@ -import os import asyncio +import os +from typing import Any, AsyncIterator, Dict, Iterator, List + import requests -from typing import List, Dict, Any, Iterator, AsyncIterator -from litellm.types.utils import GenericStreamingChunk, ModelResponse +from litellm import acompletion, completion from litellm.llms.custom_llm import CustomLLM -from litellm import completion, acompletion +from litellm.types.utils import GenericStreamingChunk, ModelResponse class HumanAdapter(CustomLLM): """Human Adapter for human-in-the-loop completions. - + This adapter sends completion requests to a human completion server where humans can review and respond to AI requests. """ - + def __init__(self, base_url: str | None = None, timeout: float = 300.0, **kwargs): """Initialize the human adapter. - + Args: base_url: Base URL for the human completion server. Defaults to HUMAN_BASE_URL environment variable or http://localhost:8002 @@ -24,60 +25,58 @@ class HumanAdapter(CustomLLM): **kwargs: Additional arguments """ super().__init__() - self.base_url = base_url or os.getenv('HUMAN_BASE_URL', 'http://localhost:8002') + self.base_url = base_url or os.getenv("HUMAN_BASE_URL", "http://localhost:8002") self.timeout = timeout - + # Ensure base_url doesn't end with slash - self.base_url = self.base_url.rstrip('/') - + self.base_url = self.base_url.rstrip("/") + def _queue_completion(self, messages: List[Dict[str, Any]], model: str) -> str: """Queue a completion request and return the call ID. - + Args: messages: Messages in OpenAI format model: Model name - + Returns: Call ID for tracking the request - + Raises: Exception: If queueing fails """ try: response = requests.post( - f"{self.base_url}/queue", - json={"messages": messages, "model": model}, - timeout=10 + f"{self.base_url}/queue", json={"messages": messages, "model": model}, timeout=10 ) response.raise_for_status() return response.json()["id"] except requests.RequestException as e: raise Exception(f"Failed to queue completion request: {e}") - + def _wait_for_completion(self, call_id: str) -> Dict[str, Any]: """Wait for human to complete the call. - + Args: call_id: ID of the queued completion call - + Returns: Dict containing response and/or tool_calls - + Raises: TimeoutError: If timeout is exceeded Exception: If completion fails """ import time - + start_time = time.time() - + while True: try: # Check status status_response = requests.get(f"{self.base_url}/status/{call_id}") status_response.raise_for_status() status_data = status_response.json() - + if status_data["status"] == "completed": result = {} if "response" in status_data and status_data["response"]: @@ -88,38 +87,41 @@ class HumanAdapter(CustomLLM): elif status_data["status"] == "failed": error_msg = status_data.get("error", "Unknown error") raise Exception(f"Completion failed: {error_msg}") - + # Check timeout if time.time() - start_time > self.timeout: - raise TimeoutError(f"Timeout waiting for human response after {self.timeout} seconds") - + raise TimeoutError( + f"Timeout waiting for human response after {self.timeout} seconds" + ) + # Wait before checking again time.sleep(1.0) - + except requests.RequestException as e: if time.time() - start_time > self.timeout: raise TimeoutError(f"Timeout waiting for human response: {e}") # Continue trying if we haven't timed out time.sleep(1.0) - + async def _async_wait_for_completion(self, call_id: str) -> Dict[str, Any]: """Async version of wait_for_completion. - + Args: call_id: ID of the queued completion call - + Returns: Dict containing response and/or tool_calls - + Raises: TimeoutError: If timeout is exceeded Exception: If completion fails """ - import aiohttp import time - + + import aiohttp + start_time = time.time() - + async with aiohttp.ClientSession() as session: while True: try: @@ -127,7 +129,7 @@ class HumanAdapter(CustomLLM): async with session.get(f"{self.base_url}/status/{call_id}") as response: response.raise_for_status() status_data = await response.json() - + if status_data["status"] == "completed": result = {} if "response" in status_data and status_data["response"]: @@ -138,166 +140,158 @@ class HumanAdapter(CustomLLM): elif status_data["status"] == "failed": error_msg = status_data.get("error", "Unknown error") raise Exception(f"Completion failed: {error_msg}") - + # Check timeout if time.time() - start_time > self.timeout: - raise TimeoutError(f"Timeout waiting for human response after {self.timeout} seconds") - + raise TimeoutError( + f"Timeout waiting for human response after {self.timeout} seconds" + ) + # Wait before checking again await asyncio.sleep(1.0) - + except Exception as e: if time.time() - start_time > self.timeout: raise TimeoutError(f"Timeout waiting for human response: {e}") # Continue trying if we haven't timed out await asyncio.sleep(1.0) - + def _generate_response(self, messages: List[Dict[str, Any]], model: str) -> Dict[str, Any]: """Generate a human response for the given messages. - + Args: messages: Messages in OpenAI format model: Model name - + Returns: Dict containing response and/or tool_calls """ # Queue the completion request call_id = self._queue_completion(messages, model) - + # Wait for human response response = self._wait_for_completion(call_id) - + return response - - async def _async_generate_response(self, messages: List[Dict[str, Any]], model: str) -> Dict[str, Any]: + + async def _async_generate_response( + self, messages: List[Dict[str, Any]], model: str + ) -> Dict[str, Any]: """Async version of _generate_response. - + Args: messages: Messages in OpenAI format model: Model name - + Returns: Dict containing response and/or tool_calls """ # Queue the completion request (sync operation) call_id = self._queue_completion(messages, model) - + # Wait for human response (async) response = await self._async_wait_for_completion(call_id) - + return response - + def completion(self, *args, **kwargs) -> ModelResponse: """Synchronous completion method. - + Returns: ModelResponse with human-generated text or tool calls """ - messages = kwargs.get('messages', []) - model = kwargs.get('model', 'human') - + messages = kwargs.get("messages", []) + model = kwargs.get("model", "human") + # Generate human response human_response_data = self._generate_response(messages, model) - + # Create ModelResponse with proper structure - from litellm.types.utils import ModelResponse, Choices, Message - import uuid import time - + import uuid + + from litellm.types.utils import Choices, Message, ModelResponse + # Create message content based on response type if "tool_calls" in human_response_data and human_response_data["tool_calls"]: # Tool calls response message = Message( role="assistant", content=human_response_data.get("response", ""), - tool_calls=human_response_data["tool_calls"] + tool_calls=human_response_data["tool_calls"], ) else: # Text response - message = Message( - role="assistant", - content=human_response_data.get("response", "") - ) - - choice = Choices( - finish_reason="stop", - index=0, - message=message - ) - + message = Message(role="assistant", content=human_response_data.get("response", "")) + + choice = Choices(finish_reason="stop", index=0, message=message) + result = ModelResponse( id=f"human-{uuid.uuid4()}", choices=[choice], created=int(time.time()), model=f"human/{model}", - object="chat.completion" + object="chat.completion", ) - + return result - + async def acompletion(self, *args, **kwargs) -> ModelResponse: """Asynchronous completion method. - + Returns: ModelResponse with human-generated text or tool calls """ - messages = kwargs.get('messages', []) - model = kwargs.get('model', 'human') - + messages = kwargs.get("messages", []) + model = kwargs.get("model", "human") + # Generate human response human_response_data = await self._async_generate_response(messages, model) - + # Create ModelResponse with proper structure - from litellm.types.utils import ModelResponse, Choices, Message - import uuid import time - + import uuid + + from litellm.types.utils import Choices, Message, ModelResponse + # Create message content based on response type if "tool_calls" in human_response_data and human_response_data["tool_calls"]: # Tool calls response message = Message( role="assistant", content=human_response_data.get("response", ""), - tool_calls=human_response_data["tool_calls"] + tool_calls=human_response_data["tool_calls"], ) else: # Text response - message = Message( - role="assistant", - content=human_response_data.get("response", "") - ) - - choice = Choices( - finish_reason="stop", - index=0, - message=message - ) - + message = Message(role="assistant", content=human_response_data.get("response", "")) + + choice = Choices(finish_reason="stop", index=0, message=message) + result = ModelResponse( id=f"human-{uuid.uuid4()}", choices=[choice], created=int(time.time()), model=f"human/{model}", - object="chat.completion" + object="chat.completion", ) - + return result - + def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]: """Synchronous streaming method. - + Yields: Streaming chunks with human-generated text or tool calls """ - messages = kwargs.get('messages', []) - model = kwargs.get('model', 'human') - + messages = kwargs.get("messages", []) + model = kwargs.get("model", "human") + # Generate human response human_response_data = self._generate_response(messages, model) - + import time - + # Handle tool calls vs text response if "tool_calls" in human_response_data and human_response_data["tool_calls"]: # Stream tool calls as a single chunk @@ -319,22 +313,26 @@ class HumanAdapter(CustomLLM): "is_finished": True, "text": response_text, "tool_use": None, - "usage": {"completion_tokens": len(response_text.split()), "prompt_tokens": 0, "total_tokens": len(response_text.split())}, + "usage": { + "completion_tokens": len(response_text.split()), + "prompt_tokens": 0, + "total_tokens": len(response_text.split()), + }, } yield generic_chunk - + async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]: """Asynchronous streaming method. - + Yields: Streaming chunks with human-generated text or tool calls """ - messages = kwargs.get('messages', []) - model = kwargs.get('model', 'human') - + messages = kwargs.get("messages", []) + model = kwargs.get("model", "human") + # Generate human response human_response = await self._async_generate_response(messages, model) - + # Return as single streaming chunk generic_streaming_chunk: GenericStreamingChunk = { "finish_reason": "stop", @@ -342,7 +340,11 @@ class HumanAdapter(CustomLLM): "is_finished": True, "text": human_response, "tool_use": None, - "usage": {"completion_tokens": len(human_response.split()), "prompt_tokens": 0, "total_tokens": len(human_response.split())}, + "usage": { + "completion_tokens": len(human_response.split()), + "prompt_tokens": 0, + "total_tokens": len(human_response.split()), + }, } - - yield generic_streaming_chunk \ No newline at end of file + + yield generic_streaming_chunk diff --git a/libs/python/agent/agent/adapters/mlxvlm_adapter.py b/libs/python/agent/agent/adapters/mlxvlm_adapter.py index 8255725b..6caae278 100644 --- a/libs/python/agent/agent/adapters/mlxvlm_adapter.py +++ b/libs/python/agent/agent/adapters/mlxvlm_adapter.py @@ -1,24 +1,26 @@ import asyncio -import functools -import warnings -import io import base64 +import functools +import io import math import re +import warnings from concurrent.futures import ThreadPoolExecutor -from typing import Iterator, AsyncIterator, Dict, List, Any, Optional, Tuple, cast -from PIL import Image -from litellm.types.utils import GenericStreamingChunk, ModelResponse +from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Tuple, cast + +from litellm import acompletion, completion from litellm.llms.custom_llm import CustomLLM -from litellm import completion, acompletion +from litellm.types.utils import GenericStreamingChunk, ModelResponse +from PIL import Image # Try to import MLX dependencies try: import mlx.core as mx - from mlx_vlm import load, generate + from mlx_vlm import generate, load from mlx_vlm.prompt_utils import apply_chat_template from mlx_vlm.utils import load_config from transformers.tokenization_utils import PreTrainedTokenizer + MLX_AVAILABLE = True except ImportError: MLX_AVAILABLE = False @@ -29,20 +31,28 @@ MIN_PIXELS = 100 * 28 * 28 MAX_PIXELS = 16384 * 28 * 28 MAX_RATIO = 200 + def round_by_factor(number: float, factor: int) -> int: """Returns the closest integer to 'number' that is divisible by 'factor'.""" return round(number / factor) * factor + def ceil_by_factor(number: float, factor: int) -> int: """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'.""" return math.ceil(number / factor) * factor + def floor_by_factor(number: float, factor: int) -> int: """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'.""" return math.floor(number / factor) * factor + def smart_resize( - height: int, width: int, factor: int = IMAGE_FACTOR, min_pixels: int = MIN_PIXELS, max_pixels: int = MAX_PIXELS + height: int, + width: int, + factor: int = IMAGE_FACTOR, + min_pixels: int = MIN_PIXELS, + max_pixels: int = MAX_PIXELS, ) -> tuple[int, int]: """ Rescales the image so that the following conditions are met: @@ -70,61 +80,62 @@ def smart_resize( class MLXVLMAdapter(CustomLLM): """MLX VLM Adapter for running vision-language models locally using MLX.""" - + def __init__(self, **kwargs): """Initialize the adapter. - + Args: **kwargs: Additional arguments """ super().__init__() - + self.models = {} # Cache for loaded models self.processors = {} # Cache for loaded processors self.configs = {} # Cache for loaded configs self._executor = ThreadPoolExecutor(max_workers=1) # Single thread pool - + def _load_model_and_processor(self, model_name: str): """Load model and processor if not already cached. - + Args: model_name: Name of the model to load - + Returns: Tuple of (model, processor, config) """ if not MLX_AVAILABLE: raise ImportError("MLX VLM dependencies not available. Please install mlx-vlm.") - + if model_name not in self.models: # Load model and processor model_obj, processor = load( - model_name, - processor_kwargs={"min_pixels": MIN_PIXELS, "max_pixels": MAX_PIXELS} + model_name, processor_kwargs={"min_pixels": MIN_PIXELS, "max_pixels": MAX_PIXELS} ) config = load_config(model_name) - + # Cache them self.models[model_name] = model_obj self.processors[model_name] = processor self.configs[model_name] = config - + return self.models[model_name], self.processors[model_name], self.configs[model_name] - - def _process_coordinates(self, text: str, original_size: Tuple[int, int], model_size: Tuple[int, int]) -> str: + + def _process_coordinates( + self, text: str, original_size: Tuple[int, int], model_size: Tuple[int, int] + ) -> str: """Process coordinates in box tokens based on image resizing using smart_resize approach. - + Args: text: Text containing box tokens original_size: Original image size (width, height) model_size: Model processed image size (width, height) - + Returns: Text with processed coordinates """ # Find all box tokens box_pattern = r"<\|box_start\|>\((\d+),\s*(\d+)\)<\|box_end\|>" - + def process_coords(match): model_x, model_y = int(match.group(1)), int(match.group(2)) # Scale coordinates from model space to original image space @@ -132,15 +143,20 @@ class MLXVLMAdapter(CustomLLM): new_x = int(model_x * original_size[0] / model_size[0]) # Width new_y = int(model_y * original_size[1] / model_size[1]) # Height return f"<|box_start|>({new_x},{new_y})<|box_end|>" - + return re.sub(box_pattern, process_coords, text) - - def _convert_messages(self, messages: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[Image.Image], Dict[int, Tuple[int, int]], Dict[int, Tuple[int, int]]]: + + def _convert_messages(self, messages: List[Dict[str, Any]]) -> Tuple[ + List[Dict[str, Any]], + List[Image.Image], + Dict[int, Tuple[int, int]], + Dict[int, Tuple[int, int]], + ]: """Convert OpenAI format messages to MLX VLM format and extract images. - + Args: messages: Messages in OpenAI format - + Returns: Tuple of (processed_messages, images, original_sizes, model_sizes) """ @@ -149,13 +165,10 @@ class MLXVLMAdapter(CustomLLM): original_sizes = {} # Track original sizes of images for coordinate mapping model_sizes = {} # Track model processed sizes image_index = 0 - + for message in messages: - processed_message = { - "role": message["role"], - "content": [] - } - + processed_message = {"role": message["role"], "content": []} + content = message.get("content", []) if isinstance(content, str): # Simple text content @@ -165,164 +178,163 @@ class MLXVLMAdapter(CustomLLM): processed_content = [] for item in content: if item.get("type") == "text": - processed_content.append({ - "type": "text", - "text": item.get("text", "") - }) + processed_content.append({"type": "text", "text": item.get("text", "")}) elif item.get("type") == "image_url": image_url = item.get("image_url", {}).get("url", "") pil_image = None - + if image_url.startswith("data:image/"): # Extract base64 data - base64_data = image_url.split(',')[1] + base64_data = image_url.split(",")[1] # Convert base64 to PIL Image image_data = base64.b64decode(base64_data) pil_image = Image.open(io.BytesIO(image_data)) else: # Handle file path or URL pil_image = Image.open(image_url) - + # Store original image size for coordinate mapping original_size = pil_image.size original_sizes[image_index] = original_size - + # Use smart_resize to determine model size # Note: smart_resize expects (height, width) but PIL gives (width, height) height, width = original_size[1], original_size[0] new_height, new_width = smart_resize(height, width) # Store model size in (width, height) format for consistent coordinate processing model_sizes[image_index] = (new_width, new_height) - + # Resize the image using the calculated dimensions from smart_resize resized_image = pil_image.resize((new_width, new_height)) images.append(resized_image) - + # Add image placeholder to content - processed_content.append({ - "type": "image" - }) - + processed_content.append({"type": "image"}) + image_index += 1 - + processed_message["content"] = processed_content - + processed_messages.append(processed_message) - + return processed_messages, images, original_sizes, model_sizes - + def _generate(self, **kwargs) -> str: """Generate response using the local MLX VLM model. - + Args: **kwargs: Keyword arguments containing messages and model info - + Returns: Generated text response """ - messages = kwargs.get('messages', []) - model_name = kwargs.get('model', 'mlx-community/UI-TARS-1.5-7B-4bit') - max_tokens = kwargs.get('max_tokens', 128) - + messages = kwargs.get("messages", []) + model_name = kwargs.get("model", "mlx-community/UI-TARS-1.5-7B-4bit") + max_tokens = kwargs.get("max_tokens", 128) + # Warn about ignored kwargs - ignored_kwargs = set(kwargs.keys()) - {'messages', 'model', 'max_tokens'} + ignored_kwargs = set(kwargs.keys()) - {"messages", "model", "max_tokens"} if ignored_kwargs: warnings.warn(f"Ignoring unsupported kwargs: {ignored_kwargs}") - + # Load model and processor model, processor, config = self._load_model_and_processor(model_name) - + # Convert messages and extract images processed_messages, images, original_sizes, model_sizes = self._convert_messages(messages) - + # Process user text input with box coordinates after image processing # Swap original_size and model_size arguments for inverse transformation for msg_idx, msg in enumerate(processed_messages): if msg.get("role") == "user" and isinstance(msg.get("content"), str): content = msg.get("content", "") - if "<|box_start|>" in content and original_sizes and model_sizes and 0 in original_sizes and 0 in model_sizes: + if ( + "<|box_start|>" in content + and original_sizes + and model_sizes + and 0 in original_sizes + and 0 in model_sizes + ): orig_size = original_sizes[0] model_size = model_sizes[0] # Swap arguments to perform inverse transformation for user input - processed_messages[msg_idx]["content"] = self._process_coordinates(content, model_size, orig_size) - + processed_messages[msg_idx]["content"] = self._process_coordinates( + content, model_size, orig_size + ) + try: # Format prompt according to model requirements using the processor directly prompt = processor.apply_chat_template( - processed_messages, - tokenize=False, - add_generation_prompt=True, - return_tensors='pt' + processed_messages, tokenize=False, add_generation_prompt=True, return_tensors="pt" ) tokenizer = cast(PreTrainedTokenizer, processor) - + # Generate response text_content, usage = generate( - model, - tokenizer, - str(prompt), - images, # type: ignore + model, + tokenizer, + str(prompt), + images, # type: ignore verbose=False, - max_tokens=max_tokens + max_tokens=max_tokens, ) - + except Exception as e: raise RuntimeError(f"Error generating response: {str(e)}") from e - + # Process coordinates in the response back to original image space if original_sizes and model_sizes and 0 in original_sizes and 0 in model_sizes: # Get original image size and model size (using the first image) orig_size = original_sizes[0] model_size = model_sizes[0] - + # Check if output contains box tokens that need processing if "<|box_start|>" in text_content: # Process coordinates from model space back to original image space text_content = self._process_coordinates(text_content, orig_size, model_size) - + return text_content - + def completion(self, *args, **kwargs) -> ModelResponse: """Synchronous completion method. - + Returns: ModelResponse with generated text """ generated_text = self._generate(**kwargs) - + result = completion( model=f"mlx/{kwargs.get('model', 'mlx-community/UI-TARS-1.5-7B-4bit')}", mock_response=generated_text, ) return cast(ModelResponse, result) - + async def acompletion(self, *args, **kwargs) -> ModelResponse: """Asynchronous completion method. - + Returns: ModelResponse with generated text """ # Run _generate in thread pool to avoid blocking loop = asyncio.get_event_loop() generated_text = await loop.run_in_executor( - self._executor, - functools.partial(self._generate, **kwargs) + self._executor, functools.partial(self._generate, **kwargs) ) - + result = await acompletion( model=f"mlx/{kwargs.get('model', 'mlx-community/UI-TARS-1.5-7B-4bit')}", mock_response=generated_text, ) return cast(ModelResponse, result) - + def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]: """Synchronous streaming method. - + Returns: Iterator of GenericStreamingChunk """ generated_text = self._generate(**kwargs) - + generic_streaming_chunk: GenericStreamingChunk = { "finish_reason": "stop", "index": 0, @@ -331,22 +343,21 @@ class MLXVLMAdapter(CustomLLM): "tool_use": None, "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0}, } - + yield generic_streaming_chunk - + async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]: """Asynchronous streaming method. - + Returns: AsyncIterator of GenericStreamingChunk """ # Run _generate in thread pool to avoid blocking loop = asyncio.get_event_loop() generated_text = await loop.run_in_executor( - self._executor, - functools.partial(self._generate, **kwargs) + self._executor, functools.partial(self._generate, **kwargs) ) - + generic_streaming_chunk: GenericStreamingChunk = { "finish_reason": "stop", "index": 0, @@ -355,5 +366,5 @@ class MLXVLMAdapter(CustomLLM): "tool_use": None, "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0}, } - - yield generic_streaming_chunk \ No newline at end of file + + yield generic_streaming_chunk diff --git a/libs/python/agent/agent/adapters/models/__init__.py b/libs/python/agent/agent/adapters/models/__init__.py index 3ed48404..a7948d79 100644 --- a/libs/python/agent/agent/adapters/models/__init__.py +++ b/libs/python/agent/agent/adapters/models/__init__.py @@ -2,32 +2,40 @@ from typing import Optional try: from transformers import AutoConfig + HF_AVAILABLE = True except ImportError: HF_AVAILABLE = False from .generic import GenericHFModel +from .internvl import InternVLModel from .opencua import OpenCUAModel from .qwen2_5_vl import Qwen2_5_VLModel -from .internvl import InternVLModel + def load_model(model_name: str, device: str = "auto", trust_remote_code: bool = False): """Factory function to load and return the right model handler instance. - + - If the underlying transformers config class matches OpenCUA, return OpenCUAModel - Otherwise, return GenericHFModel """ if not HF_AVAILABLE: raise ImportError( - "HuggingFace transformers dependencies not found. Install with: pip install \"cua-agent[uitars-hf]\"" + 'HuggingFace transformers dependencies not found. Install with: pip install "cua-agent[uitars-hf]"' ) cfg = AutoConfig.from_pretrained(model_name, trust_remote_code=trust_remote_code) cls = cfg.__class__.__name__ print(f"cls: {cls}") if "OpenCUA" in cls: - return OpenCUAModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) + return OpenCUAModel( + model_name=model_name, device=device, trust_remote_code=trust_remote_code + ) elif "Qwen2_5_VL" in cls: - return Qwen2_5_VLModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) + return Qwen2_5_VLModel( + model_name=model_name, device=device, trust_remote_code=trust_remote_code + ) elif "InternVL" in cls: - return InternVLModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) + return InternVLModel( + model_name=model_name, device=device, trust_remote_code=trust_remote_code + ) return GenericHFModel(model_name=model_name, device=device, trust_remote_code=trust_remote_code) diff --git a/libs/python/agent/agent/adapters/models/generic.py b/libs/python/agent/agent/adapters/models/generic.py index aefbaa7f..01c8c3bc 100644 --- a/libs/python/agent/agent/adapters/models/generic.py +++ b/libs/python/agent/agent/adapters/models/generic.py @@ -1,9 +1,10 @@ -from typing import List, Dict, Any, Optional +from typing import Any, Dict, List, Optional # Hugging Face imports are local to avoid hard dependency at module import try: import torch # type: ignore from transformers import AutoModel, AutoProcessor # type: ignore + HF_AVAILABLE = True except Exception: HF_AVAILABLE = False @@ -14,10 +15,12 @@ class GenericHFModel: Loads an AutoModelForImageTextToText and AutoProcessor and generates text. """ - def __init__(self, model_name: str, device: str = "auto", trust_remote_code: bool = False) -> None: + def __init__( + self, model_name: str, device: str = "auto", trust_remote_code: bool = False + ) -> None: if not HF_AVAILABLE: raise ImportError( - "HuggingFace transformers dependencies not found. Install with: pip install \"cua-agent[uitars-hf]\"" + 'HuggingFace transformers dependencies not found. Install with: pip install "cua-agent[uitars-hf]"' ) self.model_name = model_name self.device = device @@ -64,7 +67,7 @@ class GenericHFModel: generated_ids = self.model.generate(**inputs, max_new_tokens=max_new_tokens) # Trim prompt tokens from output generated_ids_trimmed = [ - out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) + out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) ] # Decode output_text = self.processor.batch_decode( diff --git a/libs/python/agent/agent/adapters/models/internvl.py b/libs/python/agent/agent/adapters/models/internvl.py index b9aa4243..c0a7c84f 100644 --- a/libs/python/agent/agent/adapters/models/internvl.py +++ b/libs/python/agent/agent/adapters/models/internvl.py @@ -1,19 +1,22 @@ from __future__ import annotations -from typing import List, Dict, Any, Optional + +from typing import Any, Dict, List, Optional # Hugging Face imports are local to avoid hard dependency at module import try: - import torch # type: ignore - from transformers import AutoModel, AutoTokenizer # type: ignore - # Attempt to import InternVL's model dependencies - import einops as _ # type: ignore - import timm as _ # type: ignore - from PIL import Image # type: ignore - import torchvision.transforms as T # type: ignore - from torchvision.transforms.functional import InterpolationMode # type: ignore import base64 # type: ignore from io import BytesIO # type: ignore + + # Attempt to import InternVL's model dependencies + import einops as _ # type: ignore import requests # type: ignore + import timm as _ # type: ignore + import torch # type: ignore + import torchvision.transforms as T # type: ignore + from PIL import Image # type: ignore + from torchvision.transforms.functional import InterpolationMode # type: ignore + from transformers import AutoModel, AutoTokenizer # type: ignore + HF_AVAILABLE = True except Exception: HF_AVAILABLE = False @@ -25,10 +28,12 @@ class InternVLModel: Provides preprocessing to support multi-turn conversations with multiple images. """ - def __init__(self, model_name: str, device: str = "auto", trust_remote_code: bool = False) -> None: + def __init__( + self, model_name: str, device: str = "auto", trust_remote_code: bool = False + ) -> None: if not HF_AVAILABLE: raise ImportError( - "InternVL dependencies not found. Install with: pip install \"cua-agent[internvl-hf]\"" + 'InternVL dependencies not found. Install with: pip install "cua-agent[internvl-hf]"' ) self.model_name = model_name self.device = device @@ -60,16 +65,25 @@ class InternVLModel: def _build_transform(self, input_size: int) -> T.Compose: MEAN, STD = self.IMAGENET_MEAN, self.IMAGENET_STD - transform = T.Compose([ - T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img), - T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), - T.ToTensor(), - T.Normalize(mean=MEAN, std=STD) - ]) + transform = T.Compose( + [ + T.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img), + T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), + T.ToTensor(), + T.Normalize(mean=MEAN, std=STD), + ] + ) return transform - def _find_closest_aspect_ratio(self, aspect_ratio: float, target_ratios: List[tuple], width: int, height: int, image_size: int): - best_ratio_diff = float('inf') + def _find_closest_aspect_ratio( + self, + aspect_ratio: float, + target_ratios: List[tuple], + width: int, + height: int, + image_size: int, + ): + best_ratio_diff = float("inf") best_ratio = (1, 1) area = width * height for ratio in target_ratios: @@ -83,17 +97,29 @@ class InternVLModel: best_ratio = ratio return best_ratio - def _dynamic_preprocess(self, image: Image.Image, min_num: int = 1, max_num: int = 12, image_size: int = 448, use_thumbnail: bool = True) -> List[Image.Image]: + def _dynamic_preprocess( + self, + image: Image.Image, + min_num: int = 1, + max_num: int = 12, + image_size: int = 448, + use_thumbnail: bool = True, + ) -> List[Image.Image]: orig_width, orig_height = image.size aspect_ratio = orig_width / orig_height target_ratios = set( - (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if - i * j <= max_num and i * j >= min_num) + (i, j) + for n in range(min_num, max_num + 1) + for i in range(1, n + 1) + for j in range(1, n + 1) + if i * j <= max_num and i * j >= min_num + ) target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1]) target_aspect_ratio = self._find_closest_aspect_ratio( - aspect_ratio, target_ratios, orig_width, orig_height, image_size) + aspect_ratio, target_ratios, orig_width, orig_height, image_size + ) target_width = image_size * target_aspect_ratio[0] target_height = image_size * target_aspect_ratio[1] @@ -106,7 +132,7 @@ class InternVLModel: (i % (target_width // image_size)) * image_size, (i // (target_width // image_size)) * image_size, ((i % (target_width // image_size)) + 1) * image_size, - ((i // (target_width // image_size)) + 1) * image_size + ((i // (target_width // image_size)) + 1) * image_size, ) split_img = resized_img.crop(box) processed_images.append(split_img) @@ -122,20 +148,24 @@ class InternVLModel: # data URL base64 header, b64data = src.split(",", 1) img_bytes = base64.b64decode(b64data) - return Image.open(BytesIO(img_bytes)).convert('RGB') + return Image.open(BytesIO(img_bytes)).convert("RGB") if src.startswith("http://") or src.startswith("https://"): resp = requests.get(src, timeout=10) resp.raise_for_status() - return Image.open(BytesIO(resp.content)).convert('RGB') + return Image.open(BytesIO(resp.content)).convert("RGB") # Assume local file path - return Image.open(src).convert('RGB') + return Image.open(src).convert("RGB") - def _images_to_pixel_values(self, images: List[Image.Image], input_size: int = 448, max_num: int = 12): + def _images_to_pixel_values( + self, images: List[Image.Image], input_size: int = 448, max_num: int = 12 + ): transform = self._build_transform(input_size=input_size) pixel_values_list = [] num_patches_list: List[int] = [] for img in images: - tiles = self._dynamic_preprocess(img, image_size=input_size, use_thumbnail=True, max_num=max_num) + tiles = self._dynamic_preprocess( + img, image_size=input_size, use_thumbnail=True, max_num=max_num + ) pv = [transform(tile) for tile in tiles] pv = torch.stack(pv) num_patches_list.append(pv.shape[0]) @@ -191,7 +221,9 @@ class InternVLModel: last_user_text_parts = parts_text or last_user_text_parts elif role == "assistant": # Only keep text content for history - parts_text = [item.get("text", "") for item in content_items if item.get("type") == "text"] + parts_text = [ + item.get("text", "") for item in content_items if item.get("type") == "text" + ] text = "\n".join(parts_text).strip() if text: context_lines.append(f"Assistant: {text}") @@ -200,7 +232,9 @@ class InternVLModel: pixel_values = None num_patches_list: List[int] = [] if all_images: - pixel_values, num_patches_list = self._images_to_pixel_values(all_images, input_size=448, max_num=12) + pixel_values, num_patches_list = self._images_to_pixel_values( + all_images, input_size=448, max_num=12 + ) if pixel_values is not None: # Convert dtype/device as in docs pixel_values = pixel_values.to(torch.bfloat16) @@ -246,7 +280,9 @@ class InternVLModel: num_patches_list=num_patches_list, ) else: - response = self.model.chat(self.tokenizer, pixel_values, question, generation_config) + response = self.model.chat( + self.tokenizer, pixel_values, question, generation_config + ) except Exception as e: # Fallback: return empty string to avoid crashing the adapter return "" diff --git a/libs/python/agent/agent/adapters/models/opencua.py b/libs/python/agent/agent/adapters/models/opencua.py index 32c73134..a2831d4d 100644 --- a/libs/python/agent/agent/adapters/models/opencua.py +++ b/libs/python/agent/agent/adapters/models/opencua.py @@ -1,13 +1,18 @@ -from typing import List, Dict, Any -import re import base64 +import re from io import BytesIO +from typing import Any, Dict, List try: + import blobfile as _ # assert blobfile is installed import torch # type: ignore - from transformers import AutoTokenizer, AutoModel, AutoImageProcessor # type: ignore from PIL import Image # type: ignore - import blobfile as _ # assert blobfile is installed + from transformers import ( # type: ignore + AutoImageProcessor, + AutoModel, + AutoTokenizer, + ) + OPENCUA_AVAILABLE = True except Exception: OPENCUA_AVAILABLE = False @@ -16,10 +21,12 @@ except Exception: class OpenCUAModel: """OpenCUA model handler using AutoTokenizer, AutoModel and AutoImageProcessor.""" - def __init__(self, model_name: str, device: str = "auto", trust_remote_code: bool = False) -> None: + def __init__( + self, model_name: str, device: str = "auto", trust_remote_code: bool = False + ) -> None: if not OPENCUA_AVAILABLE: raise ImportError( - "OpenCUA requirements not found. Install with: pip install \"cua-agent[opencua-hf]\"" + 'OpenCUA requirements not found. Install with: pip install "cua-agent[opencua-hf]"' ) self.model_name = model_name self.device = device @@ -56,7 +63,11 @@ class OpenCUAModel: return "" def generate(self, messages: List[Dict[str, Any]], max_new_tokens: int = 512) -> str: - assert self.model is not None and self.tokenizer is not None and self.image_processor is not None + assert ( + self.model is not None + and self.tokenizer is not None + and self.image_processor is not None + ) # Tokenize text side using chat template input_ids = self.tokenizer.apply_chat_template( @@ -74,7 +85,11 @@ class OpenCUAModel: pixel_values = torch.tensor(image_info["pixel_values"]).to( dtype=torch.bfloat16, device=self.model.device ) - grid_thws = torch.tensor(image_info["image_grid_thw"]) if "image_grid_thw" in image_info else None + grid_thws = ( + torch.tensor(image_info["image_grid_thw"]) + if "image_grid_thw" in image_info + else None + ) gen_kwargs: Dict[str, Any] = { "max_new_tokens": max_new_tokens, diff --git a/libs/python/agent/agent/adapters/models/qwen2_5_vl.py b/libs/python/agent/agent/adapters/models/qwen2_5_vl.py index 17b25f8a..8c8045c0 100644 --- a/libs/python/agent/agent/adapters/models/qwen2_5_vl.py +++ b/libs/python/agent/agent/adapters/models/qwen2_5_vl.py @@ -1,9 +1,10 @@ -from typing import List, Dict, Any, Optional +from typing import Any, Dict, List, Optional # Hugging Face imports are local to avoid hard dependency at module import try: import torch # type: ignore from transformers import AutoModelForImageTextToText, AutoProcessor # type: ignore + HF_AVAILABLE = True except Exception: HF_AVAILABLE = False @@ -14,10 +15,12 @@ class Qwen2_5_VLModel: Loads an AutoModelForImageTextToText and AutoProcessor and generates text. """ - def __init__(self, model_name: str, device: str = "auto", trust_remote_code: bool = False) -> None: + def __init__( + self, model_name: str, device: str = "auto", trust_remote_code: bool = False + ) -> None: if not HF_AVAILABLE: raise ImportError( - "HuggingFace transformers dependencies not found. Install with: pip install \"cua-agent[uitars-hf]\"" + 'HuggingFace transformers dependencies not found. Install with: pip install "cua-agent[uitars-hf]"' ) self.model_name = model_name self.device = device @@ -64,7 +67,7 @@ class Qwen2_5_VLModel: generated_ids = self.model.generate(**inputs, max_new_tokens=max_new_tokens) # Trim prompt tokens from output generated_ids_trimmed = [ - out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) + out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) ] # Decode output_text = self.processor.batch_decode( diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py index 9339e9a8..370af997 100644 --- a/libs/python/agent/agent/agent.py +++ b/libs/python/agent/agent/agent.py @@ -3,76 +3,83 @@ ComputerAgent - Main agent class that selects and runs agent loops """ import asyncio -from pathlib import Path -from typing import Dict, List, Any, Optional, AsyncGenerator, Union, cast, Callable, Set, Tuple - -from litellm.responses.utils import Usage - -from .types import ( - Messages, - AgentCapability, - ToolError, - IllegalArgumentError -) -from .responses import make_tool_error_item, replace_failed_computer_calls_with_function_calls -from .decorators import find_agent_config +import inspect import json +from pathlib import Path +from typing import ( + Any, + AsyncGenerator, + Callable, + Dict, + List, + Optional, + Set, + Tuple, + Union, + cast, +) + import litellm import litellm.utils -import inspect +from litellm.responses.utils import Usage + from .adapters import ( HuggingFaceLocalAdapter, HumanAdapter, MLXVLMAdapter, ) from .callbacks import ( - ImageRetentionCallback, - LoggingCallback, - TrajectorySaverCallback, BudgetManagerCallback, - TelemetryCallback, + ImageRetentionCallback, + LoggingCallback, OperatorNormalizerCallback, PromptInstructionsCallback, + TelemetryCallback, + TrajectorySaverCallback, ) -from .computers import ( - AsyncComputerHandler, - is_agent_computer, - make_computer_handler +from .computers import AsyncComputerHandler, is_agent_computer, make_computer_handler +from .decorators import find_agent_config +from .responses import ( + make_tool_error_item, + replace_failed_computer_calls_with_function_calls, ) +from .types import AgentCapability, IllegalArgumentError, Messages, ToolError + def assert_callable_with(f, *args, **kwargs): - """Check if function can be called with given arguments.""" - try: - inspect.signature(f).bind(*args, **kwargs) - return True - except TypeError as e: - sig = inspect.signature(f) - raise IllegalArgumentError(f"Expected {sig}, got args={args} kwargs={kwargs}") from e + """Check if function can be called with given arguments.""" + try: + inspect.signature(f).bind(*args, **kwargs) + return True + except TypeError as e: + sig = inspect.signature(f) + raise IllegalArgumentError(f"Expected {sig}, got args={args} kwargs={kwargs}") from e + def get_json(obj: Any, max_depth: int = 10) -> Any: def custom_serializer(o: Any, depth: int = 0, seen: Optional[Set[int]] = None) -> Any: if seen is None: seen = set() - + # Use model_dump() if available - if hasattr(o, 'model_dump'): + if hasattr(o, "model_dump"): return o.model_dump() - + # Check depth limit if depth > max_depth: return f"" - + # Check for circular references using object id obj_id = id(o) if obj_id in seen: return f"" - + # Handle Computer objects - if hasattr(o, '__class__') and 'computer' in getattr(o, '__class__').__name__.lower(): + if hasattr(o, "__class__") and "computer" in o.__class__.__name__.lower(): return f"" # Handle objects with __dict__ - if hasattr(o, '__dict__'): + if hasattr(o, "__dict__"): seen.add(obj_id) try: result = {} @@ -84,7 +91,7 @@ def get_json(obj: Any, max_depth: int = 10) -> Any: return result finally: seen.discard(obj_id) - + # Handle common types that might contain nested objects elif isinstance(o, dict): seen.add(obj_id) @@ -96,7 +103,7 @@ def get_json(obj: Any, max_depth: int = 10) -> Any: } finally: seen.discard(obj_id) - + elif isinstance(o, (list, tuple, set)): seen.add(obj_id) try: @@ -107,32 +114,33 @@ def get_json(obj: Any, max_depth: int = 10) -> Any: ] finally: seen.discard(obj_id) - + # For basic types that json.dumps can handle elif isinstance(o, (str, int, float, bool)) or o is None: return o - + # Fallback to string representation else: return str(o) - + def remove_nones(obj: Any) -> Any: if isinstance(obj, dict): return {k: remove_nones(v) for k, v in obj.items() if v is not None} elif isinstance(obj, list): return [remove_nones(item) for item in obj if item is not None] return obj - + # Serialize with circular reference and depth protection serialized = custom_serializer(obj) - + # Convert to JSON string and back to ensure JSON compatibility json_str = json.dumps(serialized) parsed = json.loads(json_str) - + # Final cleanup of any remaining None values return remove_nones(parsed) + def sanitize_message(msg: Any) -> Any: """Return a copy of the message with image_url omitted for computer_call_output messages.""" if msg.get("type") == "computer_call_output": @@ -143,19 +151,24 @@ def sanitize_message(msg: Any) -> Any: return sanitized return msg + def get_output_call_ids(messages: List[Dict[str, Any]]) -> List[str]: call_ids = [] for message in messages: - if message.get("type") == "computer_call_output" or message.get("type") == "function_call_output": + if ( + message.get("type") == "computer_call_output" + or message.get("type") == "function_call_output" + ): call_ids.append(message.get("call_id")) return call_ids + class ComputerAgent: """ Main agent class that automatically selects the appropriate agent loop based on the model and executes tool calls. """ - + def __init__( self, model: str, @@ -172,11 +185,11 @@ class ComputerAgent: max_trajectory_budget: Optional[float | dict] = None, telemetry_enabled: Optional[bool] = True, trust_remote_code: Optional[bool] = False, - **kwargs + **kwargs, ): """ Initialize ComputerAgent. - + Args: model: Model name (e.g., "claude-3-5-sonnet-20241022", "computer-use-preview", "omni+vertex_ai/gemini-pro") tools: List of tools (computer objects, decorated functions, etc.) @@ -193,11 +206,11 @@ class ComputerAgent: telemetry_enabled: If set, adds TelemetryCallback to track anonymized usage data. Enabled by default. trust_remote_code: If set, trust remote code when loading local models. Disabled by default. **kwargs: Additional arguments passed to the agent loop - """ + """ # If the loop is "human/human", we need to prefix a grounding model fallback if model in ["human/human", "human"]: model = "openai/computer-use-preview+human/human" - + self.model = model self.tools = tools or [] self.custom_loop = custom_loop @@ -236,34 +249,33 @@ class ComputerAgent: # Add image retention callback if only_n_most_recent_images is set if self.only_n_most_recent_images: self.callbacks.append(ImageRetentionCallback(self.only_n_most_recent_images)) - + # Add trajectory saver callback if trajectory_dir is set if self.trajectory_dir: if isinstance(self.trajectory_dir, dict): self.callbacks.append(TrajectorySaverCallback(**self.trajectory_dir)) elif isinstance(self.trajectory_dir, (str, Path)): self.callbacks.append(TrajectorySaverCallback(str(self.trajectory_dir))) - + # Add budget manager if max_trajectory_budget is set if max_trajectory_budget: if isinstance(max_trajectory_budget, dict): self.callbacks.append(BudgetManagerCallback(**max_trajectory_budget)) else: self.callbacks.append(BudgetManagerCallback(max_trajectory_budget)) - + # == Enable local model providers w/ LiteLLM == # Register local model providers hf_adapter = HuggingFaceLocalAdapter( - device="auto", - trust_remote_code=self.trust_remote_code or False + device="auto", trust_remote_code=self.trust_remote_code or False ) human_adapter = HumanAdapter() mlx_adapter = MLXVLMAdapter() litellm.custom_provider_map = [ {"provider": "huggingface-local", "custom_handler": hf_adapter}, {"provider": "human", "custom_handler": human_adapter}, - {"provider": "mlx", "custom_handler": mlx_adapter} + {"provider": "mlx", "custom_handler": mlx_adapter}, ] litellm.suppress_debug_info = True @@ -280,16 +292,16 @@ class ComputerAgent: # Instantiate the agent config class self.agent_loop = config_info.agent_class() self.agent_config_info = config_info - + self.tool_schemas = [] self.computer_handler = None - + async def _initialize_computers(self): """Initialize computer objects""" if not self.tool_schemas: # Process tools and create tool schemas self.tool_schemas = self._process_tools() - + # Find computer tool and create interface adapter computer_handler = None for schema in self.tool_schemas: @@ -297,7 +309,7 @@ class ComputerAgent: computer_handler = await make_computer_handler(schema["computer"]) break self.computer_handler = computer_handler - + def _process_input(self, input: Messages) -> List[Dict[str, Any]]: """Process input messages and create schemas for the agent loop""" if isinstance(input, str): @@ -307,69 +319,73 @@ class ComputerAgent: def _process_tools(self) -> List[Dict[str, Any]]: """Process tools and create schemas for the agent loop""" schemas = [] - + for tool in self.tools: # Check if it's a computer object (has interface attribute) if is_agent_computer(tool): # This is a computer tool - will be handled by agent loop - schemas.append({ - "type": "computer", - "computer": tool - }) + schemas.append({"type": "computer", "computer": tool}) elif callable(tool): # Use litellm.utils.function_to_dict to extract schema from docstring try: function_schema = litellm.utils.function_to_dict(tool) - schemas.append({ - "type": "function", - "function": function_schema - }) + schemas.append({"type": "function", "function": function_schema}) except Exception as e: print(f"Warning: Could not process tool {tool}: {e}") else: print(f"Warning: Unknown tool type: {tool}") - + return schemas - + def _get_tool(self, name: str) -> Optional[Callable]: """Get a tool by name""" for tool in self.tools: - if hasattr(tool, '__name__') and tool.__name__ == name: + if hasattr(tool, "__name__") and tool.__name__ == name: return tool - elif hasattr(tool, 'func') and tool.func.__name__ == name: + elif hasattr(tool, "func") and tool.func.__name__ == name: return tool return None - + # ============================================================================ # AGENT RUN LOOP LIFECYCLE HOOKS # ============================================================================ - + async def _on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None: """Initialize run tracking by calling callbacks.""" for callback in self.callbacks: - if hasattr(callback, 'on_run_start'): + if hasattr(callback, "on_run_start"): await callback.on_run_start(kwargs, old_items) - - async def _on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None: + + async def _on_run_end( + self, + kwargs: Dict[str, Any], + old_items: List[Dict[str, Any]], + new_items: List[Dict[str, Any]], + ) -> None: """Finalize run tracking by calling callbacks.""" for callback in self.callbacks: - if hasattr(callback, 'on_run_end'): + if hasattr(callback, "on_run_end"): await callback.on_run_end(kwargs, old_items, new_items) - - async def _on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool: + + async def _on_run_continue( + self, + kwargs: Dict[str, Any], + old_items: List[Dict[str, Any]], + new_items: List[Dict[str, Any]], + ) -> bool: """Check if run should continue by calling callbacks.""" for callback in self.callbacks: - if hasattr(callback, 'on_run_continue'): + if hasattr(callback, "on_run_continue"): should_continue = await callback.on_run_continue(kwargs, old_items, new_items) if not should_continue: return False return True - + async def _on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Prepare messages for the LLM call by applying callbacks.""" result = messages for callback in self.callbacks: - if hasattr(callback, 'on_llm_start'): + if hasattr(callback, "on_llm_start"): result = await callback.on_llm_start(result) return result @@ -377,82 +393,91 @@ class ComputerAgent: """Postprocess messages after the LLM call by applying callbacks.""" result = messages for callback in self.callbacks: - if hasattr(callback, 'on_llm_end'): + if hasattr(callback, "on_llm_end"): result = await callback.on_llm_end(result) return result async def _on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None: """Called when responses are received.""" for callback in self.callbacks: - if hasattr(callback, 'on_responses'): + if hasattr(callback, "on_responses"): await callback.on_responses(get_json(kwargs), get_json(responses)) - + async def _on_computer_call_start(self, item: Dict[str, Any]) -> None: """Called when a computer call is about to start.""" for callback in self.callbacks: - if hasattr(callback, 'on_computer_call_start'): + if hasattr(callback, "on_computer_call_start"): await callback.on_computer_call_start(get_json(item)) - - async def _on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None: + + async def _on_computer_call_end( + self, item: Dict[str, Any], result: List[Dict[str, Any]] + ) -> None: """Called when a computer call has completed.""" for callback in self.callbacks: - if hasattr(callback, 'on_computer_call_end'): + if hasattr(callback, "on_computer_call_end"): await callback.on_computer_call_end(get_json(item), get_json(result)) - + async def _on_function_call_start(self, item: Dict[str, Any]) -> None: """Called when a function call is about to start.""" for callback in self.callbacks: - if hasattr(callback, 'on_function_call_start'): + if hasattr(callback, "on_function_call_start"): await callback.on_function_call_start(get_json(item)) - - async def _on_function_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None: + + async def _on_function_call_end( + self, item: Dict[str, Any], result: List[Dict[str, Any]] + ) -> None: """Called when a function call has completed.""" for callback in self.callbacks: - if hasattr(callback, 'on_function_call_end'): + if hasattr(callback, "on_function_call_end"): await callback.on_function_call_end(get_json(item), get_json(result)) - + async def _on_text(self, item: Dict[str, Any]) -> None: """Called when a text message is encountered.""" for callback in self.callbacks: - if hasattr(callback, 'on_text'): + if hasattr(callback, "on_text"): await callback.on_text(get_json(item)) - + async def _on_api_start(self, kwargs: Dict[str, Any]) -> None: """Called when an LLM API call is about to start.""" for callback in self.callbacks: - if hasattr(callback, 'on_api_start'): + if hasattr(callback, "on_api_start"): await callback.on_api_start(get_json(kwargs)) - + async def _on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None: """Called when an LLM API call has completed.""" for callback in self.callbacks: - if hasattr(callback, 'on_api_end'): + if hasattr(callback, "on_api_end"): await callback.on_api_end(get_json(kwargs), get_json(result)) async def _on_usage(self, usage: Dict[str, Any]) -> None: """Called when usage information is received.""" for callback in self.callbacks: - if hasattr(callback, 'on_usage'): + if hasattr(callback, "on_usage"): await callback.on_usage(get_json(usage)) async def _on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None: """Called when a screenshot is taken.""" for callback in self.callbacks: - if hasattr(callback, 'on_screenshot'): + if hasattr(callback, "on_screenshot"): await callback.on_screenshot(screenshot, name) # ============================================================================ # AGENT OUTPUT PROCESSING # ============================================================================ - - async def _handle_item(self, item: Any, computer: Optional[AsyncComputerHandler] = None, ignore_call_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]: + + async def _handle_item( + self, + item: Any, + computer: Optional[AsyncComputerHandler] = None, + ignore_call_ids: Optional[List[str]] = None, + ) -> List[Dict[str, Any]]: """Handle each item; may cause a computer action + screenshot.""" call_id = item.get("call_id") if ignore_call_ids and call_id and call_id in ignore_call_ids: return [] - + item_type = item.get("type", None) - + if item_type == "message": await self._on_text(item) # # Print messages @@ -461,7 +486,7 @@ class ComputerAgent: # if content_item.get("text"): # print(content_item.get("text")) return [] - + try: if item_type == "computer_call": await self._on_computer_call_start(item) @@ -472,14 +497,16 @@ class ComputerAgent: action = item.get("action") action_type = action.get("type") if action_type is None: - print(f"Action type cannot be `None`: action={action}, action_type={action_type}") + print( + f"Action type cannot be `None`: action={action}, action_type={action_type}" + ) return [] - + # Extract action arguments (all fields except 'type') action_args = {k: v for k, v in action.items() if k != "type"} - + # print(f"{action_type}({action_args})") - + # Execute the computer action computer_method = getattr(computer, action_type, None) if computer_method: @@ -487,13 +514,13 @@ class ComputerAgent: await computer_method(**action_args) else: raise ToolError(f"Unknown computer action: {action_type}") - + # Take screenshot after action if self.screenshot_delay and self.screenshot_delay > 0: await asyncio.sleep(self.screenshot_delay) screenshot_base64 = await computer.screenshot() await self._on_screenshot(screenshot_base64, "screenshot_after") - + # Handle safety checks pending_checks = item.get("pending_safety_checks", []) acknowledged_checks = [] @@ -505,7 +532,7 @@ class ComputerAgent: # acknowledged_checks.append(check) # else: # raise ValueError(f"Safety check failed: {check_message}") - + # Create call output call_output = { "type": "computer_call_output", @@ -516,25 +543,25 @@ class ComputerAgent: "image_url": f"data:image/png;base64,{screenshot_base64}", }, } - + # # Additional URL safety checks for browser environments # if await computer.get_environment() == "browser": # current_url = await computer.get_current_url() # call_output["output"]["current_url"] = current_url # # TODO: implement a callback for URL safety checks # # check_blocklisted_url(current_url) - + result = [call_output] await self._on_computer_call_end(item, result) return result - + if item_type == "function_call": await self._on_function_call_start(item) # Perform function call function = self._get_tool(item.get("name")) if not function: - raise ToolError(f"Function {item.get("name")} not found") - + raise ToolError(f"Function {item.get('name')} not found") + args = json.loads(item.get("arguments")) # Validate arguments before execution @@ -545,14 +572,14 @@ class ComputerAgent: result = await function(**args) else: result = await asyncio.to_thread(function, **args) - + # Create function call output call_output = { "type": "function_call_output", "call_id": item.get("call_id"), "output": str(result), } - + result = [call_output] await self._on_function_call_end(item, result) return result @@ -564,36 +591,35 @@ class ComputerAgent: # ============================================================================ # MAIN AGENT LOOP # ============================================================================ - + async def run( - self, - messages: Messages, - stream: bool = False, - **kwargs + self, messages: Messages, stream: bool = False, **kwargs ) -> AsyncGenerator[Dict[str, Any], None]: """ Run the agent with the given messages using Computer protocol handler pattern. - + Args: messages: List of message dictionaries stream: Whether to stream the response **kwargs: Additional arguments - + Returns: AsyncGenerator that yields response chunks """ if not self.agent_config_info: raise ValueError("Agent configuration not found") - + capabilities = self.get_capabilities() if "step" not in capabilities: - raise ValueError(f"Agent loop {self.agent_config_info.agent_class.__name__} does not support step predictions") + raise ValueError( + f"Agent loop {self.agent_config_info.agent_class.__name__} does not support step predictions" + ) await self._initialize_computers() - + # Merge kwargs merged_kwargs = {**self.kwargs, **kwargs} - + old_items = self._process_input(messages) new_items = [] @@ -603,7 +629,7 @@ class ComputerAgent: "stream": stream, "model": self.model, "agent_loop": self.agent_config_info.agent_class.__name__, - **merged_kwargs + **merged_kwargs, } await self._on_run_start(run_kwargs, old_items) @@ -620,7 +646,7 @@ class ComputerAgent: combined_messages = old_items + new_items combined_messages = replace_failed_computer_calls_with_function_calls(combined_messages) preprocessed_messages = await self._on_llm_start(combined_messages) - + loop_kwargs = { "messages": preprocessed_messages, "model": self.model, @@ -629,7 +655,7 @@ class ComputerAgent: "computer_handler": self.computer_handler, "max_retries": self.max_retries, "use_prompt_caching": self.use_prompt_caching, - **merged_kwargs + **merged_kwargs, } # Run agent loop iteration @@ -641,13 +667,13 @@ class ComputerAgent: _on_screenshot=self._on_screenshot, ) result = get_json(result) - + # Lifecycle hook: Postprocess messages after the LLM call # Use cases: # - PII deanonymization (if you want tool calls to see PII) result["output"] = await self._on_llm_end(result.get("output", [])) await self._on_responses(loop_kwargs, result) - + # Yield agent response yield result @@ -659,7 +685,9 @@ class ComputerAgent: # Handle computer actions for item in result.get("output"): - partial_items = await self._handle_item(item, self.computer_handler, ignore_call_ids=output_call_ids) + partial_items = await self._handle_item( + item, self.computer_handler, ignore_call_ids=output_call_ids + ) new_items += partial_items # Yield partial response @@ -669,54 +697,52 @@ class ComputerAgent: prompt_tokens=0, completion_tokens=0, total_tokens=0, - ) + ), } - + await self._on_run_end(loop_kwargs, old_items, new_items) - + async def predict_click( - self, - instruction: str, - image_b64: Optional[str] = None + self, instruction: str, image_b64: Optional[str] = None ) -> Optional[Tuple[int, int]]: """ Predict click coordinates based on image and instruction. - + Args: instruction: Instruction for where to click image_b64: Base64 encoded image (optional, will take screenshot if not provided) - + Returns: None or tuple with (x, y) coordinates """ if not self.agent_config_info: raise ValueError("Agent configuration not found") - + capabilities = self.get_capabilities() if "click" not in capabilities: - raise ValueError(f"Agent loop {self.agent_config_info.agent_class.__name__} does not support click predictions") - if hasattr(self.agent_loop, 'predict_click'): + raise ValueError( + f"Agent loop {self.agent_config_info.agent_class.__name__} does not support click predictions" + ) + if hasattr(self.agent_loop, "predict_click"): if not image_b64: if not self.computer_handler: raise ValueError("Computer tool or image_b64 is required for predict_click") image_b64 = await self.computer_handler.screenshot() return await self.agent_loop.predict_click( - model=self.model, - image_b64=image_b64, - instruction=instruction + model=self.model, image_b64=image_b64, instruction=instruction ) return None - + def get_capabilities(self) -> List[AgentCapability]: """ Get list of capabilities supported by the current agent config. - + Returns: List of capability strings (e.g., ["step", "click"]) """ if not self.agent_config_info: raise ValueError("Agent configuration not found") - - if hasattr(self.agent_loop, 'get_capabilities'): + + if hasattr(self.agent_loop, "get_capabilities"): return self.agent_loop.get_capabilities() - return ["step"] # Default capability \ No newline at end of file + return ["step"] # Default capability diff --git a/libs/python/agent/agent/callbacks/__init__.py b/libs/python/agent/agent/callbacks/__init__.py index eca40173..06987593 100644 --- a/libs/python/agent/agent/callbacks/__init__.py +++ b/libs/python/agent/agent/callbacks/__init__.py @@ -3,17 +3,17 @@ Callback system for ComputerAgent preprocessing and postprocessing hooks. """ from .base import AsyncCallbackHandler +from .budget_manager import BudgetManagerCallback from .image_retention import ImageRetentionCallback from .logging import LoggingCallback -from .trajectory_saver import TrajectorySaverCallback -from .budget_manager import BudgetManagerCallback -from .telemetry import TelemetryCallback from .operator_validator import OperatorNormalizerCallback from .prompt_instructions import PromptInstructionsCallback +from .telemetry import TelemetryCallback +from .trajectory_saver import TrajectorySaverCallback __all__ = [ "AsyncCallbackHandler", - "ImageRetentionCallback", + "ImageRetentionCallback", "LoggingCallback", "TrajectorySaverCallback", "BudgetManagerCallback", diff --git a/libs/python/agent/agent/callbacks/base.py b/libs/python/agent/agent/callbacks/base.py index 01688077..d7c2a56f 100644 --- a/libs/python/agent/agent/callbacks/base.py +++ b/libs/python/agent/agent/callbacks/base.py @@ -3,7 +3,7 @@ Base callback handler interface for ComputerAgent preprocessing and postprocessi """ from abc import ABC, abstractmethod -from typing import List, Dict, Any, Optional, Union +from typing import Any, Dict, List, Optional, Union class AsyncCallbackHandler(ABC): @@ -16,42 +16,52 @@ class AsyncCallbackHandler(ABC): """Called at the start of an agent run loop.""" pass - async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None: + async def on_run_end( + self, + kwargs: Dict[str, Any], + old_items: List[Dict[str, Any]], + new_items: List[Dict[str, Any]], + ) -> None: """Called at the end of an agent run loop.""" pass - - async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool: + + async def on_run_continue( + self, + kwargs: Dict[str, Any], + old_items: List[Dict[str, Any]], + new_items: List[Dict[str, Any]], + ) -> bool: """Called during agent run loop to determine if execution should continue. - + Args: kwargs: Run arguments old_items: Original messages new_items: New messages generated during run - + Returns: True to continue execution, False to stop """ return True - + async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Called before messages are sent to the agent loop. - + Args: messages: List of message dictionaries to preprocess - + Returns: List of preprocessed message dictionaries """ return messages - + async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Called after the agent loop returns output. - + Args: output: List of output message dictionaries to postprocess - + Returns: List of postprocessed output dictionaries """ @@ -60,63 +70,67 @@ class AsyncCallbackHandler(ABC): async def on_computer_call_start(self, item: Dict[str, Any]) -> None: """ Called when a computer call is about to start. - + Args: item: The computer call item dictionary """ pass - - async def on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None: + + async def on_computer_call_end( + self, item: Dict[str, Any], result: List[Dict[str, Any]] + ) -> None: """ Called when a computer call has completed. - + Args: item: The computer call item dictionary result: The result of the computer call """ pass - + async def on_function_call_start(self, item: Dict[str, Any]) -> None: """ Called when a function call is about to start. - + Args: item: The function call item dictionary """ pass - - async def on_function_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None: + + async def on_function_call_end( + self, item: Dict[str, Any], result: List[Dict[str, Any]] + ) -> None: """ Called when a function call has completed. - + Args: item: The function call item dictionary result: The result of the function call """ pass - + async def on_text(self, item: Dict[str, Any]) -> None: """ Called when a text message is encountered. - + Args: item: The message item dictionary """ pass - + async def on_api_start(self, kwargs: Dict[str, Any]) -> None: """ Called when an API call is about to start. - + Args: kwargs: The kwargs being passed to the API call """ pass - + async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None: """ Called when an API call has completed. - + Args: kwargs: The kwargs that were passed to the API call result: The result of the API call @@ -126,7 +140,7 @@ class AsyncCallbackHandler(ABC): async def on_usage(self, usage: Dict[str, Any]) -> None: """ Called when usage information is received. - + Args: usage: The usage information """ @@ -135,7 +149,7 @@ class AsyncCallbackHandler(ABC): async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None: """ Called when a screenshot is taken. - + Args: screenshot: The screenshot image name: The name of the screenshot @@ -145,9 +159,9 @@ class AsyncCallbackHandler(ABC): async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None: """ Called when responses are received. - + Args: kwargs: The kwargs being passed to the agent loop responses: The responses received """ - pass \ No newline at end of file + pass diff --git a/libs/python/agent/agent/callbacks/budget_manager.py b/libs/python/agent/agent/callbacks/budget_manager.py index bc17c695..ff45b3fd 100644 --- a/libs/python/agent/agent/callbacks/budget_manager.py +++ b/libs/python/agent/agent/callbacks/budget_manager.py @@ -1,17 +1,23 @@ -from typing import Dict, List, Any +from typing import Any, Dict, List + from .base import AsyncCallbackHandler + class BudgetExceededError(Exception): """Exception raised when budget is exceeded.""" + pass + class BudgetManagerCallback(AsyncCallbackHandler): """Budget manager callback that tracks usage costs and can stop execution when budget is exceeded.""" - - def __init__(self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False): + + def __init__( + self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False + ): """ Initialize BudgetManagerCallback. - + Args: max_budget: Maximum budget allowed reset_after_each_run: Whether to reset budget after each run @@ -21,24 +27,30 @@ class BudgetManagerCallback(AsyncCallbackHandler): self.reset_after_each_run = reset_after_each_run self.raise_error = raise_error self.total_cost = 0.0 - + async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None: """Reset budget if configured to do so.""" if self.reset_after_each_run: self.total_cost = 0.0 - + async def on_usage(self, usage: Dict[str, Any]) -> None: """Track usage costs.""" if "response_cost" in usage: self.total_cost += usage["response_cost"] - - async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool: + + async def on_run_continue( + self, + kwargs: Dict[str, Any], + old_items: List[Dict[str, Any]], + new_items: List[Dict[str, Any]], + ) -> bool: """Check if budget allows continuation.""" if self.total_cost >= self.max_budget: if self.raise_error: - raise BudgetExceededError(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}") + raise BudgetExceededError( + f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}" + ) else: print(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}") return False return True - \ No newline at end of file diff --git a/libs/python/agent/agent/callbacks/image_retention.py b/libs/python/agent/agent/callbacks/image_retention.py index 320a3f43..e95f17d3 100644 --- a/libs/python/agent/agent/callbacks/image_retention.py +++ b/libs/python/agent/agent/callbacks/image_retention.py @@ -2,7 +2,8 @@ Image retention callback handler that limits the number of recent images in message history. """ -from typing import List, Dict, Any, Optional +from typing import Any, Dict, List, Optional + from .base import AsyncCallbackHandler @@ -11,40 +12,40 @@ class ImageRetentionCallback(AsyncCallbackHandler): Callback handler that applies image retention policy to limit the number of recent images in message history to prevent context window overflow. """ - + def __init__(self, only_n_most_recent_images: Optional[int] = None): """ Initialize the image retention callback. - + Args: only_n_most_recent_images: If set, only keep the N most recent images in message history """ self.only_n_most_recent_images = only_n_most_recent_images - + async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Apply image retention policy to messages before sending to agent loop. - + Args: messages: List of message dictionaries - + Returns: List of messages with image retention policy applied """ if self.only_n_most_recent_images is None: return messages - + return self._apply_image_retention(messages) - + def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Apply image retention policy to keep only the N most recent images. - + Removes computer_call_output items with image_url and their corresponding computer_call items, keeping only the most recent N image pairs based on only_n_most_recent_images setting. - + Args: messages: List of message dictionaries - + Returns: Filtered list of messages with image retention applied """ @@ -78,7 +79,11 @@ class ImageRetentionCallback(AsyncCallbackHandler): # Remove the immediately preceding computer_call with matching call_id (if present) call_id = messages[idx].get("call_id") prev_idx = idx - 1 - if prev_idx >= 0 and messages[prev_idx].get("type") == "computer_call" and messages[prev_idx].get("call_id") == call_id: + if ( + prev_idx >= 0 + and messages[prev_idx].get("type") == "computer_call" + and messages[prev_idx].get("call_id") == call_id + ): to_remove.add(prev_idx) # Check a single reasoning immediately before that computer_call r_idx = prev_idx - 1 @@ -87,4 +92,4 @@ class ImageRetentionCallback(AsyncCallbackHandler): # Construct filtered list filtered = [m for i, m in enumerate(messages) if i not in to_remove] - return filtered \ No newline at end of file + return filtered diff --git a/libs/python/agent/agent/callbacks/logging.py b/libs/python/agent/agent/callbacks/logging.py index af171925..c9674c7e 100644 --- a/libs/python/agent/agent/callbacks/logging.py +++ b/libs/python/agent/agent/callbacks/logging.py @@ -4,17 +4,18 @@ Logging callback for ComputerAgent that provides configurable logging of agent l import json import logging -from typing import Dict, List, Any, Optional, Union +from typing import Any, Dict, List, Optional, Union + from .base import AsyncCallbackHandler def sanitize_image_urls(data: Any) -> Any: """ Recursively search for 'image_url' keys and set their values to '[omitted]'. - + Args: data: Any data structure (dict, list, or primitive type) - + Returns: A deep copy of the data with all 'image_url' values replaced with '[omitted]' """ @@ -28,11 +29,11 @@ def sanitize_image_urls(data: Any) -> Any: # Recursively sanitize the value sanitized[key] = sanitize_image_urls(value) return sanitized - + elif isinstance(data, list): # Recursively sanitize each item in the list return [sanitize_image_urls(item) for item in data] - + else: # For primitive types (str, int, bool, None, etc.), return as-is return data @@ -41,37 +42,36 @@ def sanitize_image_urls(data: Any) -> Any: class LoggingCallback(AsyncCallbackHandler): """ Callback handler that logs agent lifecycle events with configurable verbosity. - + Logging levels: - DEBUG: All events including API calls, message preprocessing, and detailed outputs - - INFO: Major lifecycle events (start/end, messages, outputs) + - INFO: Major lifecycle events (start/end, messages, outputs) - WARNING: Only warnings and errors - ERROR: Only errors """ - + def __init__(self, logger: Optional[logging.Logger] = None, level: int = logging.INFO): """ Initialize the logging callback. - + Args: logger: Logger instance to use. If None, creates a logger named 'agent.ComputerAgent' level: Logging level (logging.DEBUG, logging.INFO, etc.) """ - self.logger = logger or logging.getLogger('agent.ComputerAgent') + self.logger = logger or logging.getLogger("agent.ComputerAgent") self.level = level - + # Set up logger if it doesn't have handlers if not self.logger.handlers: handler = logging.StreamHandler() - formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - ) + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") handler.setFormatter(formatter) self.logger.addHandler(handler) self.logger.setLevel(level) - + def _update_usage(self, usage: Dict[str, Any]) -> None: """Update total usage statistics.""" + def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None: for key, value in source.items(): if isinstance(value, dict): @@ -82,18 +82,25 @@ class LoggingCallback(AsyncCallbackHandler): if key not in target: target[key] = 0 target[key] += value + add_dicts(self.total_usage, usage) - + async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None: """Called before the run starts.""" self.total_usage = {} - + async def on_usage(self, usage: Dict[str, Any]) -> None: """Called when usage information is received.""" self._update_usage(usage) - async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None: + async def on_run_end( + self, + kwargs: Dict[str, Any], + old_items: List[Dict[str, Any]], + new_items: List[Dict[str, Any]], + ) -> None: """Called after the run ends.""" + def format_dict(d, indent=0): lines = [] prefix = f" - {' ' * indent}" @@ -106,10 +113,10 @@ class LoggingCallback(AsyncCallbackHandler): else: lines.append(f"{prefix}{key}: {value}") return lines - + formatted_output = "\n".join(format_dict(self.total_usage)) self.logger.info(f"Total usage:\n{formatted_output}") - + async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Called before LLM processing starts.""" if self.logger.isEnabledFor(logging.INFO): @@ -118,27 +125,27 @@ class LoggingCallback(AsyncCallbackHandler): sanitized_messages = [sanitize_image_urls(msg) for msg in messages] self.logger.debug(f"LLM input messages: {json.dumps(sanitized_messages, indent=2)}") return messages - + async def on_llm_end(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Called after LLM processing ends.""" if self.logger.isEnabledFor(logging.DEBUG): sanitized_messages = [sanitize_image_urls(msg) for msg in messages] self.logger.debug(f"LLM output: {json.dumps(sanitized_messages, indent=2)}") return messages - + async def on_computer_call_start(self, item: Dict[str, Any]) -> None: """Called when a computer call starts.""" action = item.get("action", {}) action_type = action.get("type", "unknown") action_args = {k: v for k, v in action.items() if k != "type"} - + # INFO level logging for the action self.logger.info(f"Computer: {action_type}({action_args})") - + # DEBUG level logging for full details if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug(f"Computer call started: {json.dumps(action, indent=2)}") - + async def on_computer_call_end(self, item: Dict[str, Any], result: Any) -> None: """Called when a computer call ends.""" if self.logger.isEnabledFor(logging.DEBUG): @@ -147,48 +154,52 @@ class LoggingCallback(AsyncCallbackHandler): if result: sanitized_result = sanitize_image_urls(result) self.logger.debug(f"Computer call result: {json.dumps(sanitized_result, indent=2)}") - + async def on_function_call_start(self, item: Dict[str, Any]) -> None: """Called when a function call starts.""" name = item.get("name", "unknown") arguments = item.get("arguments", "{}") - + # INFO level logging for the function call self.logger.info(f"Function: {name}({arguments})") - + # DEBUG level logging for full details if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug(f"Function call started: {name}") - + async def on_function_call_end(self, item: Dict[str, Any], result: Any) -> None: """Called when a function call ends.""" # INFO level logging for function output (similar to function_call_output) if result: # Handle both list and direct result formats if isinstance(result, list) and len(result) > 0: - output = result[0].get("output", str(result)) if isinstance(result[0], dict) else str(result[0]) + output = ( + result[0].get("output", str(result)) + if isinstance(result[0], dict) + else str(result[0]) + ) else: output = str(result) - + # Truncate long outputs if len(output) > 100: output = output[:100] + "..." - + self.logger.info(f"Output: {output}") - + # DEBUG level logging for full details if self.logger.isEnabledFor(logging.DEBUG): name = item.get("name", "unknown") self.logger.debug(f"Function call completed: {name}") if result: self.logger.debug(f"Function call result: {json.dumps(result, indent=2)}") - + async def on_text(self, item: Dict[str, Any]) -> None: """Called when a text message is encountered.""" # Get the role to determine if it's Agent or User role = item.get("role", "unknown") content_items = item.get("content", []) - + # Process content items to build display text text_parts = [] for content_item in content_items: @@ -206,10 +217,10 @@ class LoggingCallback(AsyncCallbackHandler): else: # Non-text content, show as [type] text_parts.append(f"[{content_type}]") - + # Join all text parts - display_text = ''.join(text_parts) if text_parts else "[empty]" - + display_text = "".join(text_parts) if text_parts else "[empty]" + # Log with appropriate level and format if role == "assistant": self.logger.info(f"Agent: {display_text}") @@ -219,7 +230,7 @@ class LoggingCallback(AsyncCallbackHandler): # Fallback for unknown roles, use debug level if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug(f"Text message ({role}): {display_text}") - + async def on_api_start(self, kwargs: Dict[str, Any]) -> None: """Called when an API call is about to start.""" if self.logger.isEnabledFor(logging.DEBUG): @@ -232,16 +243,18 @@ class LoggingCallback(AsyncCallbackHandler): elif "input" in kwargs: sanitized_input = sanitize_image_urls(kwargs["input"]) self.logger.debug(f"API call input: {json.dumps(sanitized_input, indent=2)}") - + async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None: """Called when an API call has completed.""" if self.logger.isEnabledFor(logging.DEBUG): model = kwargs.get("model", "unknown") self.logger.debug(f"API call completed for model: {model}") - self.logger.debug(f"API call result: {json.dumps(sanitize_image_urls(result), indent=2)}") + self.logger.debug( + f"API call result: {json.dumps(sanitize_image_urls(result), indent=2)}" + ) async def on_screenshot(self, item: Union[str, bytes], name: str = "screenshot") -> None: """Called when a screenshot is taken.""" if self.logger.isEnabledFor(logging.DEBUG): image_size = len(item) / 1024 - self.logger.debug(f"Screenshot captured: {name} {image_size:.2f} KB") \ No newline at end of file + self.logger.debug(f"Screenshot captured: {name} {image_size:.2f} KB") diff --git a/libs/python/agent/agent/callbacks/operator_validator.py b/libs/python/agent/agent/callbacks/operator_validator.py index 56a9c280..28a6fd37 100644 --- a/libs/python/agent/agent/callbacks/operator_validator.py +++ b/libs/python/agent/agent/callbacks/operator_validator.py @@ -9,6 +9,7 @@ Ensures agent output actions conform to expected schemas by fixing common issues This runs in on_llm_end, which receives the output array (AgentMessage[] as dicts). The purpose is to avoid spending another LLM call to fix broken computer call syntax when possible. """ + from __future__ import annotations from typing import Any, Dict, List @@ -48,6 +49,7 @@ class OperatorNormalizerCallback(AsyncCallbackHandler): action["type"] = "type" action_type = action.get("type") + def _keep_keys(action: Dict[str, Any], keys_to_keep: List[str]): """Keep only the provided keys on action; delete everything else. Always ensures required 'type' is present if listed in keys_to_keep. @@ -55,6 +57,7 @@ class OperatorNormalizerCallback(AsyncCallbackHandler): for key in list(action.keys()): if key not in keys_to_keep: del action[key] + # rename "coordinate" to "x", "y" if "coordinate" in action: action["x"] = action["coordinate"][0] @@ -100,7 +103,6 @@ class OperatorNormalizerCallback(AsyncCallbackHandler): keep = required_keys_by_type.get(action_type or "") if keep: _keep_keys(action, keep) - # # Second pass: if an assistant message is immediately followed by a computer_call, # # replace the assistant message itself with a reasoning message with summary text. diff --git a/libs/python/agent/agent/callbacks/pii_anonymization.py b/libs/python/agent/agent/callbacks/pii_anonymization.py index 68f4b2fc..167f74f2 100644 --- a/libs/python/agent/agent/callbacks/pii_anonymization.py +++ b/libs/python/agent/agent/callbacks/pii_anonymization.py @@ -2,38 +2,41 @@ PII anonymization callback handler using Microsoft Presidio for text and image redaction. """ -from typing import List, Dict, Any, Optional, Tuple -from .base import AsyncCallbackHandler import base64 import io import logging +from typing import Any, Dict, List, Optional, Tuple + +from .base import AsyncCallbackHandler try: # TODO: Add Presidio dependencies from PIL import Image + PRESIDIO_AVAILABLE = True except ImportError: PRESIDIO_AVAILABLE = False logger = logging.getLogger(__name__) + class PIIAnonymizationCallback(AsyncCallbackHandler): """ Callback handler that anonymizes PII in text and images using Microsoft Presidio. - + This handler: 1. Anonymizes PII in messages before sending to the agent loop 2. Deanonymizes PII in tool calls and message outputs after the agent loop 3. Redacts PII from images in computer_call_output messages """ - + def __init__( self, # TODO: Any extra kwargs if needed ): """ Initialize the PII anonymization callback. - + Args: anonymize_text: Whether to anonymize text content anonymize_images: Whether to redact images @@ -46,16 +49,16 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): "Presidio is not available. Install with: " "pip install cua-agent[pii-anonymization]" ) - + # TODO: Implement __init__ - + async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Anonymize PII in messages before sending to agent loop. - + Args: messages: List of message dictionaries - + Returns: List of messages with PII anonymized """ @@ -63,16 +66,16 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): for msg in messages: anonymized_msg = await self._anonymize_message(msg) anonymized_messages.append(anonymized_msg) - + return anonymized_messages - + async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Deanonymize PII in tool calls and message outputs after agent loop. - + Args: output: List of output dictionaries - + Returns: List of output with PII deanonymized for tool calls """ @@ -84,13 +87,13 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): deanonymized_output.append(deanonymized_item) else: deanonymized_output.append(item) - + return deanonymized_output - + async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]: # TODO: Implement _anonymize_message return message - + async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]: # TODO: Implement _deanonymize_item return item diff --git a/libs/python/agent/agent/callbacks/telemetry.py b/libs/python/agent/agent/callbacks/telemetry.py index bdb3fd4c..d8e77e1d 100644 --- a/libs/python/agent/agent/callbacks/telemetry.py +++ b/libs/python/agent/agent/callbacks/telemetry.py @@ -2,17 +2,17 @@ Telemetry callback handler for Computer-Use Agent (cua-agent) """ +import platform import time import uuid -from typing import List, Dict, Any, Optional, Union +from typing import Any, Dict, List, Optional, Union -from .base import AsyncCallbackHandler from core.telemetry import ( - record_event, is_telemetry_enabled, + record_event, ) -import platform +from .base import AsyncCallbackHandler SYSTEM_INFO = { "os": platform.system().lower(), @@ -20,32 +20,29 @@ SYSTEM_INFO = { "python_version": platform.python_version(), } + class TelemetryCallback(AsyncCallbackHandler): """ Telemetry callback handler for Computer-Use Agent (cua-agent) - + Tracks agent usage, performance metrics, and optionally trajectory data. """ - - def __init__( - self, - agent, - log_trajectory: bool = False - ): + + def __init__(self, agent, log_trajectory: bool = False): """ Initialize telemetry callback. - + Args: agent: The ComputerAgent instance log_trajectory: Whether to log full trajectory items (opt-in) """ self.agent = agent self.log_trajectory = log_trajectory - + # Generate session/run IDs self.session_id = str(uuid.uuid4()) self.run_id = None - + # Track timing and metrics self.run_start_time = None self.step_count = 0 @@ -54,126 +51,133 @@ class TelemetryCallback(AsyncCallbackHandler): "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, - "response_cost": 0.0 + "response_cost": 0.0, } - + # Record agent initialization if is_telemetry_enabled(): self._record_agent_initialization() - + def _record_agent_initialization(self) -> None: """Record agent type/model and session initialization.""" agent_info = { "session_id": self.session_id, - "agent_type": self.agent.agent_loop.__name__ if hasattr(self.agent, 'agent_loop') else 'unknown', - "model": getattr(self.agent, 'model', 'unknown'), - **SYSTEM_INFO + "agent_type": ( + self.agent.agent_loop.__name__ if hasattr(self.agent, "agent_loop") else "unknown" + ), + "model": getattr(self.agent, "model", "unknown"), + **SYSTEM_INFO, } - + record_event("agent_session_start", agent_info) - + async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None: """Called at the start of an agent run loop.""" if not is_telemetry_enabled(): return - + self.run_id = str(uuid.uuid4()) self.run_start_time = time.time() self.step_count = 0 - + # Calculate input context size input_context_size = self._calculate_context_size(old_items) - + run_data = { "session_id": self.session_id, "run_id": self.run_id, "start_time": self.run_start_time, "input_context_size": input_context_size, - "num_existing_messages": len(old_items) + "num_existing_messages": len(old_items), } - + # Log trajectory if opted in if self.log_trajectory: trajectory = self._extract_trajectory(old_items) if trajectory: run_data["uploaded_trajectory"] = trajectory - + record_event("agent_run_start", run_data) - - async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None: + + async def on_run_end( + self, + kwargs: Dict[str, Any], + old_items: List[Dict[str, Any]], + new_items: List[Dict[str, Any]], + ) -> None: """Called at the end of an agent run loop.""" if not is_telemetry_enabled() or not self.run_start_time: return - + run_duration = time.time() - self.run_start_time - + run_data = { "session_id": self.session_id, "run_id": self.run_id, "end_time": time.time(), "duration_seconds": run_duration, "num_steps": self.step_count, - "total_usage": self.total_usage.copy() + "total_usage": self.total_usage.copy(), } - + # Log trajectory if opted in if self.log_trajectory: trajectory = self._extract_trajectory(new_items) if trajectory: run_data["uploaded_trajectory"] = trajectory - + record_event("agent_run_end", run_data) - + async def on_usage(self, usage: Dict[str, Any]) -> None: """Called when usage information is received.""" if not is_telemetry_enabled(): return - + # Accumulate usage stats self.total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0) - self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0) + self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0) self.total_usage["total_tokens"] += usage.get("total_tokens", 0) self.total_usage["response_cost"] += usage.get("response_cost", 0.0) - + # Record individual usage event usage_data = { "session_id": self.session_id, "run_id": self.run_id, "step": self.step_count, - **usage + **usage, } - + record_event("agent_usage", usage_data) - + async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None: """Called when responses are received.""" if not is_telemetry_enabled(): return - + self.step_count += 1 step_duration = None - + if self.step_start_time: step_duration = time.time() - self.step_start_time - + self.step_start_time = time.time() - + step_data = { "session_id": self.session_id, "run_id": self.run_id, "step": self.step_count, - "timestamp": self.step_start_time + "timestamp": self.step_start_time, } - + if step_duration is not None: step_data["duration_seconds"] = step_duration - + record_event("agent_step", step_data) - + def _calculate_context_size(self, items: List[Dict[str, Any]]) -> int: """Calculate approximate context size in tokens/characters.""" total_size = 0 - + for item in items: if item.get("type") == "message" and "content" in item: content = item["content"] @@ -185,25 +189,27 @@ class TelemetryCallback(AsyncCallbackHandler): total_size += len(part["text"]) elif "content" in item and isinstance(item["content"], str): total_size += len(item["content"]) - + return total_size - + def _extract_trajectory(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Extract trajectory items that should be logged.""" trajectory = [] - + for item in items: # Include user messages, assistant messages, reasoning, computer calls, and computer outputs if ( - item.get("role") == "user" or # User inputs - (item.get("type") == "message" and item.get("role") == "assistant") or # Model outputs - item.get("type") == "reasoning" or # Reasoning traces - item.get("type") == "computer_call" or # Computer actions - item.get("type") == "computer_call_output" # Computer outputs + item.get("role") == "user" # User inputs + or ( + item.get("type") == "message" and item.get("role") == "assistant" + ) # Model outputs + or item.get("type") == "reasoning" # Reasoning traces + or item.get("type") == "computer_call" # Computer actions + or item.get("type") == "computer_call_output" # Computer outputs ): # Create a copy of the item with timestamp trajectory_item = item.copy() trajectory_item["logged_at"] = time.time() trajectory.append(trajectory_item) - - return trajectory \ No newline at end of file + + return trajectory diff --git a/libs/python/agent/agent/callbacks/trajectory_saver.py b/libs/python/agent/agent/callbacks/trajectory_saver.py index 7ea708db..cb08d1a8 100644 --- a/libs/python/agent/agent/callbacks/trajectory_saver.py +++ b/libs/python/agent/agent/callbacks/trajectory_saver.py @@ -2,26 +2,28 @@ Trajectory saving callback handler for ComputerAgent. """ -import os -import json -import uuid -from datetime import datetime import base64 -from pathlib import Path -from typing import List, Dict, Any, Optional, Union, override -from PIL import Image, ImageDraw import io +import json +import os +import uuid from copy import deepcopy +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional, Union, override + +from PIL import Image, ImageDraw from .base import AsyncCallbackHandler + def sanitize_image_urls(data: Any) -> Any: """ Recursively search for 'image_url' keys and set their values to '[omitted]'. - + Args: data: Any data structure (dict, list, or primitive type) - + Returns: A deep copy of the data with all 'image_url' values replaced with '[omitted]' """ @@ -35,17 +37,19 @@ def sanitize_image_urls(data: Any) -> Any: # Recursively sanitize the value sanitized[key] = sanitize_image_urls(value) return sanitized - + elif isinstance(data, list): # Recursively sanitize each item in the list return [sanitize_image_urls(item) for item in data] - + else: # For primitive types (str, int, bool, None, etc.), return as-is return data -def extract_computer_call_outputs(items: List[Dict[str, Any]], screenshot_dir: Optional[Path]) -> List[Dict[str, Any]]: +def extract_computer_call_outputs( + items: List[Dict[str, Any]], screenshot_dir: Optional[Path] +) -> List[Dict[str, Any]]: """ Save any base64-encoded screenshots from computer_call_output entries to files and replace their image_url with the saved file path when a call_id is present. @@ -103,18 +107,21 @@ def extract_computer_call_outputs(items: List[Dict[str, Any]], screenshot_dir: O updated.append(msg) return updated + class TrajectorySaverCallback(AsyncCallbackHandler): """ Callback handler that saves agent trajectories to disk. - + Saves each run as a separate trajectory with unique ID, and each turn within the trajectory gets its own folder with screenshots and responses. """ - - def __init__(self, trajectory_dir: str, reset_on_run: bool = True, screenshot_dir: Optional[str] = None): + + def __init__( + self, trajectory_dir: str, reset_on_run: bool = True, screenshot_dir: Optional[str] = None + ): """ Initialize trajectory saver. - + Args: trajectory_dir: Base directory to save trajectories reset_on_run: If True, reset trajectory_id/turn/artifact on each run. @@ -129,7 +136,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler): self.reset_on_run = reset_on_run # Optional directory to store extracted screenshots from metadata/new_items self.screenshot_dir: Optional[Path] = Path(screenshot_dir) if screenshot_dir else None - + # Ensure trajectory directory exists self.trajectory_dir.mkdir(parents=True, exist_ok=True) @@ -137,7 +144,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler): """Get the directory for the current turn.""" if not self.trajectory_id: raise ValueError("Trajectory not initialized - call _on_run_start first") - + # format: trajectory_id/turn_000 turn_dir = self.trajectory_dir / self.trajectory_id / f"turn_{self.current_turn:03d}" turn_dir.mkdir(parents=True, exist_ok=True) @@ -166,6 +173,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler): def _update_usage(self, usage: Dict[str, Any]) -> None: """Update total usage statistics.""" + def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None: for key, value in source.items(): if isinstance(value, dict): @@ -176,20 +184,21 @@ class TrajectorySaverCallback(AsyncCallbackHandler): if key not in target: target[key] = 0 target[key] += value + add_dicts(self.total_usage, usage) - + @override async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None: """Initialize trajectory tracking for a new run.""" model = kwargs.get("model", "unknown") - + # Only reset trajectory state if reset_on_run is True or no trajectory exists if self.reset_on_run or not self.trajectory_id: model_name_short = model.split("+")[-1].split("/")[-1].lower()[:16] if "+" in model: model_name_short = model.split("+")[0].lower()[:4] + "_" + model_name_short # strip non-alphanumeric characters from model_name_short - model_name_short = ''.join(c for c in model_name_short if c.isalnum() or c == '_') + model_name_short = "".join(c for c in model_name_short if c.isalnum() or c == "_") # id format: yyyy-mm-dd_model_hhmmss_uuid[:4] now = datetime.now() @@ -198,11 +207,11 @@ class TrajectorySaverCallback(AsyncCallbackHandler): self.current_artifact = 0 self.model = model self.total_usage = {} - + # Create trajectory directory trajectory_path = self.trajectory_dir / self.trajectory_id trajectory_path.mkdir(parents=True, exist_ok=True) - + # Save trajectory metadata (optionally extract screenshots to screenshot_dir) kwargs_to_save = kwargs.copy() try: @@ -219,7 +228,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler): "status": "running", "kwargs": kwargs_to_save, } - + with open(trajectory_path / "metadata.json", "w") as f: json.dump(metadata, f, indent=2) else: @@ -227,22 +236,27 @@ class TrajectorySaverCallback(AsyncCallbackHandler): self.model = model @override - async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None: + async def on_run_end( + self, + kwargs: Dict[str, Any], + old_items: List[Dict[str, Any]], + new_items: List[Dict[str, Any]], + ) -> None: """Finalize run tracking by updating metadata with completion status, usage, and new items.""" if not self.trajectory_id: return - + # Update metadata with completion status, total usage, and new items trajectory_path = self.trajectory_dir / self.trajectory_id metadata_path = trajectory_path / "metadata.json" - + # Read existing metadata if metadata_path.exists(): with open(metadata_path, "r") as f: metadata = json.load(f) else: metadata = {} - + # Update metadata with completion info # Optionally extract screenshots from new_items before persisting new_items_to_save = new_items @@ -251,32 +265,34 @@ class TrajectorySaverCallback(AsyncCallbackHandler): except Exception: pass - metadata.update({ - "status": "completed", - "completed_at": str(uuid.uuid1().time), - "total_usage": self.total_usage, - "new_items": new_items_to_save, - "total_turns": self.current_turn - }) - + metadata.update( + { + "status": "completed", + "completed_at": str(uuid.uuid1().time), + "total_usage": self.total_usage, + "new_items": new_items_to_save, + "total_turns": self.current_turn, + } + ) + # Save updated metadata with open(metadata_path, "w") as f: json.dump(metadata, f, indent=2) - - @override + + @override async def on_api_start(self, kwargs: Dict[str, Any]) -> None: if not self.trajectory_id: return - - self._save_artifact("api_start", { "kwargs": kwargs }) - + + self._save_artifact("api_start", {"kwargs": kwargs}) + @override async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None: """Save API call result.""" if not self.trajectory_id: return - - self._save_artifact("api_result", { "kwargs": kwargs, "result": result }) + + self._save_artifact("api_result", {"kwargs": kwargs, "result": result}) @override async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None: @@ -295,77 +311,83 @@ class TrajectorySaverCallback(AsyncCallbackHandler): """Save responses to the current turn directory and update usage statistics.""" if not self.trajectory_id: return - + # Save responses turn_dir = self._get_turn_dir() response_data = { "timestamp": str(uuid.uuid1().time), "model": self.model, "kwargs": kwargs, - "response": responses + "response": responses, } - + self._save_artifact("agent_response", response_data) - + # Increment turn counter self.current_turn += 1 def _draw_crosshair_on_image(self, image_bytes: bytes, x: int, y: int) -> bytes: """ Draw a red dot and crosshair at the specified coordinates on the image. - + Args: image_bytes: The original image as bytes x: X coordinate for the crosshair y: Y coordinate for the crosshair - + Returns: Modified image as bytes with red dot and crosshair """ # Open the image image = Image.open(io.BytesIO(image_bytes)) draw = ImageDraw.Draw(image) - + # Draw crosshair lines (red, 2px thick) crosshair_size = 20 line_width = 2 color = "red" - + # Horizontal line draw.line([(x - crosshair_size, y), (x + crosshair_size, y)], fill=color, width=line_width) # Vertical line draw.line([(x, y - crosshair_size), (x, y + crosshair_size)], fill=color, width=line_width) - + # Draw center dot (filled circle) dot_radius = 3 - draw.ellipse([(x - dot_radius, y - dot_radius), (x + dot_radius, y + dot_radius)], fill=color) - + draw.ellipse( + [(x - dot_radius, y - dot_radius), (x + dot_radius, y + dot_radius)], fill=color + ) + # Convert back to bytes output = io.BytesIO() - image.save(output, format='PNG') + image.save(output, format="PNG") return output.getvalue() @override - async def on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None: + async def on_computer_call_end( + self, item: Dict[str, Any], result: List[Dict[str, Any]] + ) -> None: """ Called when a computer call has completed. Saves screenshots and computer call output. """ if not self.trajectory_id: return - - self._save_artifact("computer_call_result", { "item": item, "result": result }) - + + self._save_artifact("computer_call_result", {"item": item, "result": result}) + # Check if action has x/y coordinates and there's a screenshot in the result action = item.get("action", {}) if "x" in action and "y" in action: # Look for screenshot in the result for result_item in result: - if (result_item.get("type") == "computer_call_output" and - result_item.get("output", {}).get("type") == "input_image"): - + if ( + result_item.get("type") == "computer_call_output" + and result_item.get("output", {}).get("type") == "input_image" + ): + image_url = result_item["output"]["image_url"] - + # Extract base64 image data if image_url.startswith("data:image/"): # Format: data:image/png;base64, @@ -373,26 +395,24 @@ class TrajectorySaverCallback(AsyncCallbackHandler): else: # Assume it's just base64 data base64_data = image_url - + try: # Decode the image image_bytes = base64.b64decode(base64_data) - + # Draw crosshair at the action coordinates annotated_image = self._draw_crosshair_on_image( - image_bytes, - int(action["x"]), - int(action["y"]) + image_bytes, int(action["x"]), int(action["y"]) ) - + # Save as screenshot_action self._save_artifact("screenshot_action", annotated_image) - + except Exception as e: # If annotation fails, just log and continue print(f"Failed to annotate screenshot: {e}") - + break # Only process the first screenshot found # Increment turn counter - self.current_turn += 1 \ No newline at end of file + self.current_turn += 1 diff --git a/libs/python/agent/agent/cli.py b/libs/python/agent/agent/cli.py index c0434d02..7669b255 100644 --- a/libs/python/agent/agent/cli.py +++ b/libs/python/agent/agent/cli.py @@ -3,7 +3,7 @@ CLI chat interface for agent - Computer Use Agent Usage: python -m agent.cli - + Examples: python -m agent.cli openai/computer-use-preview python -m agent.cli anthropic/claude-3-5-sonnet-20241022 @@ -11,19 +11,22 @@ Examples: """ try: - import asyncio import argparse - import os - import sys - import json - from typing import List, Dict, Any - import dotenv + import asyncio import base64 - import time + import json + import os import platform + import sys + import time from pathlib import Path + from typing import Any, Dict, List + + import dotenv + try: from PIL import Image, ImageDraw + PIL_AVAILABLE = True except Exception: PIL_AVAILABLE = False @@ -31,36 +34,44 @@ try: except ImportError: if __name__ == "__main__": raise ImportError( - "CLI dependencies not found. " - "Please install with: pip install \"cua-agent[cli]\"" + "CLI dependencies not found. " 'Please install with: pip install "cua-agent[cli]"' ) # Load environment variables dotenv.load_dotenv() + # Color codes for terminal output class Colors: - RESET = '\033[0m' - BOLD = '\033[1m' - DIM = '\033[2m' - - # Text colors - RED = '\033[31m' - GREEN = '\033[32m' - YELLOW = '\033[33m' - BLUE = '\033[34m' - MAGENTA = '\033[35m' - CYAN = '\033[36m' - WHITE = '\033[37m' - GRAY = '\033[90m' - - # Background colors - BG_RED = '\033[41m' - BG_GREEN = '\033[42m' - BG_YELLOW = '\033[43m' - BG_BLUE = '\033[44m' + RESET = "\033[0m" + BOLD = "\033[1m" + DIM = "\033[2m" -def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = False, end: str = "\n", right: str = ""): + # Text colors + RED = "\033[31m" + GREEN = "\033[32m" + YELLOW = "\033[33m" + BLUE = "\033[34m" + MAGENTA = "\033[35m" + CYAN = "\033[36m" + WHITE = "\033[37m" + GRAY = "\033[90m" + + # Background colors + BG_RED = "\033[41m" + BG_GREEN = "\033[42m" + BG_YELLOW = "\033[43m" + BG_BLUE = "\033[44m" + + +def print_colored( + text: str, + color: str = "", + bold: bool = False, + dim: bool = False, + end: str = "\n", + right: str = "", +): """Print colored text to terminal with optional right-aligned text.""" prefix = "" if bold: @@ -69,24 +80,25 @@ def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = Fa prefix += Colors.DIM if color: prefix += color - + if right: # Get terminal width (default to 80 if unable to determine) try: import shutil + terminal_width = shutil.get_terminal_size().columns except: terminal_width = 80 # Add right margin terminal_width -= 1 - + # Calculate padding needed # Account for ANSI escape codes not taking visual space visible_left_len = len(text) visible_right_len = len(right) padding = terminal_width - visible_left_len - visible_right_len - + if padding > 0: output = f"{prefix}{text}{' ' * padding}{right}{Colors.RESET}" else: @@ -94,7 +106,7 @@ def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = Fa output = f"{prefix}{text} {right}{Colors.RESET}" else: output = f"{prefix}{text}{Colors.RESET}" - + print(output, end=end) @@ -113,29 +125,34 @@ def print_action(action_type: str, details: Dict[str, Any], total_cost: float): args_str = f"('{details['text']}')" elif action_type == "scroll" and "x" in details and "y" in details: args_str = f"({details['x']}, {details['y']})" - + if total_cost > 0: print_colored(f"🛠️ {action_type}{args_str}", dim=True, right=f"💸 ${total_cost:.2f}") else: print_colored(f"🛠️ {action_type}{args_str}", dim=True) + def print_welcome(model: str, agent_loop: str, container_name: str): """Print welcome message.""" print_colored(f"Connected to {container_name} ({model}, {agent_loop})") print_colored("Type 'exit' to quit.", dim=True) + async def ainput(prompt: str = ""): return await asyncio.to_thread(input, prompt) -async def chat_loop(agent, model: str, container_name: str, initial_prompt: str = "", show_usage: bool = True): + +async def chat_loop( + agent, model: str, container_name: str, initial_prompt: str = "", show_usage: bool = True +): """Main chat loop with the agent.""" print_welcome(model, agent.agent_config_info.agent_class.__name__, container_name) - + history = [] - + if initial_prompt: history.append({"role": "user", "content": initial_prompt}) - + total_cost = 0 while True: @@ -143,31 +160,31 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str # Get user input with prompt print_colored("> ", end="") user_input = await ainput() - - if user_input.lower() in ['exit', 'quit', 'q']: + + if user_input.lower() in ["exit", "quit", "q"]: print_colored("\n👋 Goodbye!") break - + if not user_input: continue - + # Add user message to history history.append({"role": "user", "content": user_input}) - + # Stream responses from the agent with spinner with yaspin(text="Thinking...", spinner="line", attrs=["dark"]) as spinner: spinner.hide() - + async for result in agent.run(history): # Add agent responses to history history.extend(result.get("output", [])) if show_usage: total_cost += result.get("usage", {}).get("response_cost", 0) - + # Process and display the output for item in result.get("output", []): - if item.get("type") == "message": + if item.get("type") == "message" and item.get("role") == "assistant": # Display agent text response content = item.get("content", []) for content_part in content: @@ -176,7 +193,7 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str if text: spinner.hide() print_colored(text) - + elif item.get("type") == "computer_call": # Display computer action action = item.get("action", {}) @@ -186,7 +203,7 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str print_action(action_type, action, total_cost) spinner.text = f"Performing {action_type}..." spinner.show() - + elif item.get("type") == "function_call": # Display function call function_name = item.get("name", "") @@ -194,18 +211,18 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str print_colored(f"🔧 Calling function: {function_name}", dim=True) spinner.text = f"Calling {function_name}..." spinner.show() - + elif item.get("type") == "function_call_output": # Display function output (dimmed) output = item.get("output", "") if output and len(output.strip()) > 0: spinner.hide() print_colored(f"📤 {output}", dim=True) - + spinner.hide() if show_usage and total_cost > 0: print_colored(f"Total cost: ${total_cost:.2f}", dim=True) - + async def main(): """Main CLI function.""" @@ -218,104 +235,103 @@ Examples: python -m agent.cli anthropic/claude-3-5-sonnet-20241022 python -m agent.cli omniparser+anthropic/claude-3-5-sonnet-20241022 python -m agent.cli huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B - """ + """, ) - + parser.add_argument( "model", - help="Model string (e.g., 'openai/computer-use-preview', 'anthropic/claude-3-5-sonnet-20241022')" + help="Model string (e.g., 'openai/computer-use-preview', 'anthropic/claude-3-5-sonnet-20241022')", ) - + + parser.add_argument( + "--provider", + choices=["cloud", "lume", "winsandbox", "docker"], + default="cloud", + help="Computer provider to use: cloud (default), lume, winsandbox, or docker", + ) + parser.add_argument( "--images", type=int, default=3, - help="Number of recent images to keep in context (default: 3)" + help="Number of recent images to keep in context (default: 3)", ) - + + parser.add_argument("--trajectory", action="store_true", help="Save trajectory for debugging") + + parser.add_argument("--budget", type=float, help="Maximum budget for the session (in dollars)") + + parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") + parser.add_argument( - "--trajectory", - action="store_true", - help="Save trajectory for debugging" - ) - - parser.add_argument( - "--budget", - type=float, - help="Maximum budget for the session (in dollars)" - ) - - parser.add_argument( - "--verbose", - action="store_true", - help="Enable verbose logging" + "-p", + "--prompt", + type=str, + help="Initial prompt to send to the agent. Leave blank for interactive mode.", ) parser.add_argument( - "-p", "--prompt", - type=str, - help="Initial prompt to send to the agent. Leave blank for interactive mode." + "--prompt-file", + type=Path, + help="Path to a UTF-8 text file whose contents will be used as the initial prompt. If provided, overrides --prompt.", ) parser.add_argument( "--predict-click", dest="predict_click", type=str, - help="Instruction for click prediction. If set, runs predict_click, draws crosshair on a fresh screenshot, saves and opens it." + help="Instruction for click prediction. If set, runs predict_click, draws crosshair on a fresh screenshot, saves and opens it.", + ) + + parser.add_argument("-c", "--cache", action="store_true", help="Tell the API to enable caching") + + parser.add_argument( + "-u", "--usage", action="store_true", help="Show total cost of the agent runs" ) parser.add_argument( - "-c", "--cache", - action="store_true", - help="Tell the API to enable caching" - ) - - parser.add_argument( - "-u", "--usage", - action="store_true", - help="Show total cost of the agent runs" - ) - - parser.add_argument( - "-r", "--max-retries", + "-r", + "--max-retries", type=int, default=3, - help="Maximum number of retries for the LLM API calls" + help="Maximum number of retries for the LLM API calls", ) - + args = parser.parse_args() - + # Check for required environment variables container_name = os.getenv("CUA_CONTAINER_NAME") cua_api_key = os.getenv("CUA_API_KEY") - - # Prompt for missing environment variables + + # Prompt for missing environment variables (container name always required) if not container_name: - print_colored("CUA_CONTAINER_NAME not set.", dim=True) - print_colored("You can get a CUA container at https://www.trycua.com/", dim=True) - container_name = input("Enter your CUA container name: ").strip() - if not container_name: - print_colored("❌ Container name is required.") - sys.exit(1) - - if not cua_api_key: + if args.provider == "cloud": + print_colored("CUA_CONTAINER_NAME not set.", dim=True) + print_colored("You can get a CUA container at https://www.trycua.com/", dim=True) + container_name = input("Enter your CUA container name: ").strip() + if not container_name: + print_colored("❌ Container name is required.") + sys.exit(1) + else: + container_name = "cli-sandbox" + + # Only require API key for cloud provider + if args.provider == "cloud" and not cua_api_key: print_colored("CUA_API_KEY not set.", dim=True) cua_api_key = input("Enter your CUA API key: ").strip() if not cua_api_key: - print_colored("❌ API key is required.") + print_colored("❌ API key is required for cloud provider.") sys.exit(1) - + # Check for provider-specific API keys based on model provider_api_keys = { "openai/": "OPENAI_API_KEY", "anthropic/": "ANTHROPIC_API_KEY", - "omniparser+": "OPENAI_API_KEY", - "omniparser+": "ANTHROPIC_API_KEY", } - + # Find matching provider and check for API key for prefix, env_var in provider_api_keys.items(): - if args.model.startswith(prefix): + if prefix in args.model: if not os.getenv(env_var): print_colored(f"{env_var} not set.", dim=True) api_key = input(f"Enter your {env_var.replace('_', ' ').title()}: ").strip() @@ -325,7 +341,7 @@ Examples: # Set the environment variable for the session os.environ[env_var] = api_key break - + # Import here to avoid import errors if dependencies are missing try: from agent import ComputerAgent @@ -334,46 +350,62 @@ Examples: print_colored(f"❌ Import error: {e}", Colors.RED, bold=True) print_colored("Make sure agent and computer libraries are installed.", Colors.YELLOW) sys.exit(1) - + + # Resolve provider -> os_type, provider_type, api key requirement + provider_map = { + "cloud": ("linux", "cloud", True), + "lume": ("macos", "lume", False), + "winsandbox": ("windows", "winsandbox", False), + "docker": ("linux", "docker", False), + } + os_type, provider_type, needs_api_key = provider_map[args.provider] + + computer_kwargs = { + "os_type": os_type, + "provider_type": provider_type, + "name": container_name, + } + if needs_api_key: + computer_kwargs["api_key"] = cua_api_key # type: ignore + # Create computer instance - async with Computer( - os_type="linux", - provider_type="cloud", - name=container_name, - api_key=cua_api_key - ) as computer: - + async with Computer(**computer_kwargs) as computer: # type: ignore + # Create agent agent_kwargs = { "model": args.model, "tools": [computer], - "trust_remote_code": True, # needed for some local models (e.g., InternVL, OpenCUA) + "trust_remote_code": True, # needed for some local models (e.g., InternVL, OpenCUA) "verbosity": 20 if args.verbose else 30, # DEBUG vs WARNING - "max_retries": args.max_retries + "max_retries": args.max_retries, } if args.images > 0: agent_kwargs["only_n_most_recent_images"] = args.images - + if args.trajectory: agent_kwargs["trajectory_dir"] = "trajectories" - + if args.budget: agent_kwargs["max_trajectory_budget"] = { "max_budget": args.budget, "raise_error": True, - "reset_after_each_run": False + "reset_after_each_run": False, } if args.cache: agent_kwargs["use_prompt_caching"] = True - + agent = ComputerAgent(**agent_kwargs) - + # If predict-click mode is requested, run once and exit if args.predict_click: if not PIL_AVAILABLE: - print_colored("❌ Pillow (PIL) is required for --predict-click visualization. Install with: pip install pillow", Colors.RED, bold=True) + print_colored( + "❌ Pillow (PIL) is required for --predict-click visualization. Install with: pip install pillow", + Colors.RED, + bold=True, + ) sys.exit(1) instruction = args.predict_click @@ -408,6 +440,7 @@ Examples: try: from io import BytesIO + with Image.open(BytesIO(img_bytes)) as img: img = img.convert("RGB") draw = ImageDraw.Draw(img) @@ -430,9 +463,9 @@ Examples: if system == "windows": os.startfile(str(out_path)) # type: ignore[attr-defined] elif system == "darwin": - os.system(f"open \"{out_path}\"") + os.system(f'open "{out_path}"') else: - os.system(f"xdg-open \"{out_path}\"") + os.system(f'xdg-open "{out_path}"') except Exception: pass except Exception as e: @@ -442,13 +475,21 @@ Examples: # Done sys.exit(0) - # Start chat loop (default interactive mode) - await chat_loop(agent, args.model, container_name, args.prompt, args.usage) + # Resolve initial prompt from --prompt-file or --prompt + initial_prompt = args.prompt or "" + if args.prompt_file: + try: + initial_prompt = args.prompt_file.read_text(encoding="utf-8") + except Exception as e: + print_colored(f"❌ Failed to read --prompt-file: {e}", Colors.RED, bold=True) + sys.exit(1) + # Start chat loop (default interactive mode) + await chat_loop(agent, args.model, container_name, initial_prompt, args.usage) if __name__ == "__main__": try: asyncio.run(main()) except (KeyboardInterrupt, EOFError) as _: - print_colored("\n\n👋 Goodbye!") \ No newline at end of file + print_colored("\n\n👋 Goodbye!") diff --git a/libs/python/agent/agent/computers/__init__.py b/libs/python/agent/agent/computers/__init__.py index 7c7194b6..d5628fae 100644 --- a/libs/python/agent/agent/computers/__init__.py +++ b/libs/python/agent/agent/computers/__init__.py @@ -6,27 +6,32 @@ computer interface types, supporting both the ComputerHandler protocol and the Computer library interface. """ +from computer import Computer as cuaComputer + from .base import AsyncComputerHandler from .cua import cuaComputerHandler from .custom import CustomComputerHandler -from computer import Computer as cuaComputer + def is_agent_computer(computer): """Check if the given computer is a ComputerHandler or CUA Computer.""" - return isinstance(computer, AsyncComputerHandler) or \ - isinstance(computer, cuaComputer) or \ - (isinstance(computer, dict)) #and "screenshot" in computer) + return ( + isinstance(computer, AsyncComputerHandler) + or isinstance(computer, cuaComputer) + or (isinstance(computer, dict)) + ) # and "screenshot" in computer) + async def make_computer_handler(computer): """ Create a computer handler from a computer interface. - + Args: computer: Either a ComputerHandler instance, Computer instance, or dict of functions - + Returns: ComputerHandler: A computer handler instance - + Raises: ValueError: If the computer type is not supported """ @@ -38,4 +43,4 @@ async def make_computer_handler(computer): return computer_handler if isinstance(computer, dict): return CustomComputerHandler(computer) - raise ValueError(f"Unsupported computer type: {type(computer)}") \ No newline at end of file + raise ValueError(f"Unsupported computer type: {type(computer)}") diff --git a/libs/python/agent/agent/computers/base.py b/libs/python/agent/agent/computers/base.py index 358fbbf4..673c6693 100644 --- a/libs/python/agent/agent/computers/base.py +++ b/libs/python/agent/agent/computers/base.py @@ -2,23 +2,32 @@ Base computer interface protocol for agent interactions. """ -from typing import Protocol, Literal, List, Dict, Any, Union, Optional, runtime_checkable +from typing import ( + Any, + Dict, + List, + Literal, + Optional, + Protocol, + Union, + runtime_checkable, +) @runtime_checkable class AsyncComputerHandler(Protocol): """Protocol defining the interface for computer interactions.""" - - # ==== Computer-Use-Preview Action Space ==== + + # ==== Computer-Use-Preview Action Space ==== async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: """Get the current environment type.""" ... - + async def get_dimensions(self) -> tuple[int, int]: """Get screen dimensions as (width, height).""" ... - + async def screenshot(self, text: Optional[str] = None) -> str: """Take a screenshot and return as base64 string. @@ -26,49 +35,49 @@ class AsyncComputerHandler(Protocol): text: Optional descriptive text (for compatibility with GPT-4o models, ignored) """ ... - + async def click(self, x: int, y: int, button: str = "left") -> None: """Click at coordinates with specified button.""" ... - + async def double_click(self, x: int, y: int) -> None: """Double click at coordinates.""" ... - + async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: """Scroll at coordinates with specified scroll amounts.""" ... - + async def type(self, text: str) -> None: """Type text.""" ... - + async def wait(self, ms: int = 1000) -> None: """Wait for specified milliseconds.""" ... - + async def move(self, x: int, y: int) -> None: """Move cursor to coordinates.""" ... - + async def keypress(self, keys: Union[List[str], str]) -> None: """Press key combination.""" ... - + async def drag(self, path: List[Dict[str, int]]) -> None: """Drag along specified path.""" ... - + async def get_current_url(self) -> str: """Get current URL (for browser environments).""" ... - - # ==== Anthropic Action Space ==== + + # ==== Anthropic Action Space ==== async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse down at coordinates.""" ... - + async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse up at coordinates.""" ... diff --git a/libs/python/agent/agent/computers/cua.py b/libs/python/agent/agent/computers/cua.py index 40337950..00b0720f 100644 --- a/libs/python/agent/agent/computers/cua.py +++ b/libs/python/agent/agent/computers/cua.py @@ -3,24 +3,27 @@ Computer handler implementation for OpenAI computer-use-preview protocol. """ import base64 -from typing import Dict, List, Any, Literal, Union, Optional -from .base import AsyncComputerHandler +from typing import Any, Dict, List, Literal, Optional, Union + from computer import Computer +from .base import AsyncComputerHandler + + class cuaComputerHandler(AsyncComputerHandler): """Computer handler that implements the Computer protocol using the computer interface.""" - + def __init__(self, cua_computer: Computer): """Initialize with a computer interface (from tool schema).""" self.cua_computer = cua_computer self.interface = None async def _initialize(self): - if hasattr(self.cua_computer, '_initialized') and not self.cua_computer._initialized: + if hasattr(self.cua_computer, "_initialized") and not self.cua_computer._initialized: await self.cua_computer.run() self.interface = self.cua_computer.interface - - # ==== Computer-Use-Preview Action Space ==== + + # ==== Computer-Use-Preview Action Space ==== async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: """Get the current environment type.""" @@ -32,7 +35,7 @@ class cuaComputerHandler(AsyncComputerHandler): assert self.interface is not None screen_size = await self.interface.get_screen_size() return screen_size["width"], screen_size["height"] - + async def screenshot(self, text: Optional[str] = None) -> str: """Take a screenshot and return as base64 string. @@ -41,8 +44,8 @@ class cuaComputerHandler(AsyncComputerHandler): """ assert self.interface is not None screenshot_bytes = await self.interface.screenshot() - return base64.b64encode(screenshot_bytes).decode('utf-8') - + return base64.b64encode(screenshot_bytes).decode("utf-8") + async def click(self, x: int, y: int, button: str = "left") -> None: """Click at coordinates with specified button.""" assert self.interface is not None @@ -53,34 +56,35 @@ class cuaComputerHandler(AsyncComputerHandler): else: # Default to left click for unknown buttons await self.interface.left_click(x, y) - + async def double_click(self, x: int, y: int) -> None: """Double click at coordinates.""" assert self.interface is not None await self.interface.double_click(x, y) - + async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: """Scroll at coordinates with specified scroll amounts.""" assert self.interface is not None await self.interface.move_cursor(x, y) await self.interface.scroll(scroll_x, scroll_y) - + async def type(self, text: str) -> None: """Type text.""" assert self.interface is not None await self.interface.type_text(text) - + async def wait(self, ms: int = 1000) -> None: """Wait for specified milliseconds.""" assert self.interface is not None import asyncio + await asyncio.sleep(ms / 1000.0) - + async def move(self, x: int, y: int) -> None: """Move cursor to coordinates.""" assert self.interface is not None await self.interface.move_cursor(x, y) - + async def keypress(self, keys: Union[List[str], str]) -> None: """Press key combination.""" assert self.interface is not None @@ -91,38 +95,38 @@ class cuaComputerHandler(AsyncComputerHandler): else: # Handle key combinations await self.interface.hotkey(*keys) - + async def drag(self, path: List[Dict[str, int]]) -> None: """Drag along specified path.""" assert self.interface is not None if not path: return - + # Start drag from first point start = path[0] await self.interface.mouse_down(start["x"], start["y"]) - + # Move through path for point in path[1:]: await self.interface.move_cursor(point["x"], point["y"]) - + # End drag at last point end = path[-1] await self.interface.mouse_up(end["x"], end["y"]) - + async def get_current_url(self) -> str: """Get current URL (for browser environments).""" # This would need to be implemented based on the specific browser interface # For now, return empty string return "" - # ==== Anthropic Computer Action Space ==== + # ==== Anthropic Computer Action Space ==== async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse down at coordinates.""" assert self.interface is not None await self.interface.mouse_down(x, y, button="left") - + async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse up at coordinates.""" assert self.interface is not None - await self.interface.mouse_up(x, y, button="left") \ No newline at end of file + await self.interface.mouse_up(x, y, button="left") diff --git a/libs/python/agent/agent/computers/custom.py b/libs/python/agent/agent/computers/custom.py index 5ab7d535..5165c497 100644 --- a/libs/python/agent/agent/computers/custom.py +++ b/libs/python/agent/agent/computers/custom.py @@ -3,47 +3,49 @@ Custom computer handler implementation that accepts a dictionary of functions. """ import base64 -from typing import Dict, List, Any, Literal, Union, Optional, Callable -from PIL import Image import io +from typing import Any, Callable, Dict, List, Literal, Optional, Union + +from PIL import Image + from .base import AsyncComputerHandler class CustomComputerHandler(AsyncComputerHandler): """Computer handler that implements the Computer protocol using a dictionary of custom functions.""" - + def __init__(self, functions: Dict[str, Callable]): """ Initialize with a dictionary of functions. - + Args: functions: Dictionary where keys are method names and values are callable functions. Only 'screenshot' is required, all others are optional. - + Raises: ValueError: If required 'screenshot' function is not provided. """ - if 'screenshot' not in functions: + if "screenshot" not in functions: raise ValueError("'screenshot' function is required in functions dictionary") - + self.functions = functions self._last_screenshot_size: Optional[tuple[int, int]] = None - + async def _call_function(self, func, *args, **kwargs): """ Call a function, handling both async and sync functions. - + Args: func: The function to call *args: Positional arguments to pass to the function **kwargs: Keyword arguments to pass to the function - + Returns: The result of the function call """ import asyncio import inspect - + if callable(func): if inspect.iscoroutinefunction(func): return await func(*args, **kwargs) @@ -51,14 +53,14 @@ class CustomComputerHandler(AsyncComputerHandler): return func(*args, **kwargs) else: return func - + async def _get_value(self, attribute: str): """ Get value for an attribute, checking both 'get_{attribute}' and '{attribute}' keys. - + Args: attribute: The attribute name to look for - + Returns: The value from the functions dict, called if callable, returned directly if not """ @@ -66,20 +68,20 @@ class CustomComputerHandler(AsyncComputerHandler): get_key = f"get_{attribute}" if get_key in self.functions: return await self._call_function(self.functions[get_key]) - - # Check for '{attribute}' + + # Check for '{attribute}' if attribute in self.functions: return await self._call_function(self.functions[attribute]) - + return None - + def _to_b64_str(self, img: Union[bytes, Image.Image, str]) -> str: """ Convert image to base64 string. - + Args: img: Image as bytes, PIL Image, or base64 string - + Returns: str: Base64 encoded image string """ @@ -88,47 +90,47 @@ class CustomComputerHandler(AsyncComputerHandler): return img elif isinstance(img, bytes): # Raw bytes - return base64.b64encode(img).decode('utf-8') + return base64.b64encode(img).decode("utf-8") elif isinstance(img, Image.Image): # PIL Image buffer = io.BytesIO() - img.save(buffer, format='PNG') - return base64.b64encode(buffer.getvalue()).decode('utf-8') + img.save(buffer, format="PNG") + return base64.b64encode(buffer.getvalue()).decode("utf-8") else: raise ValueError(f"Unsupported image type: {type(img)}") - - # ==== Computer-Use-Preview Action Space ==== + + # ==== Computer-Use-Preview Action Space ==== async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: """Get the current environment type.""" - result = await self._get_value('environment') + result = await self._get_value("environment") if result is None: return "linux" assert result in ["windows", "mac", "linux", "browser"] - return result # type: ignore + return result # type: ignore async def get_dimensions(self) -> tuple[int, int]: """Get screen dimensions as (width, height).""" - result = await self._get_value('dimensions') + result = await self._get_value("dimensions") if result is not None: - return result # type: ignore - + return result # type: ignore + # Fallback: use last screenshot size if available if not self._last_screenshot_size: await self.screenshot() assert self._last_screenshot_size is not None, "Failed to get screenshot size" - + return self._last_screenshot_size - + async def screenshot(self, text: Optional[str] = None) -> str: """Take a screenshot and return as base64 string. Args: text: Optional descriptive text (for compatibility with GPT-4o models, ignored) """ - result = await self._call_function(self.functions['screenshot']) - b64_str = self._to_b64_str(result) # type: ignore - + result = await self._call_function(self.functions["screenshot"]) + b64_str = self._to_b64_str(result) # type: ignore + # Try to extract dimensions for fallback use try: if isinstance(result, Image.Image): @@ -140,74 +142,75 @@ class CustomComputerHandler(AsyncComputerHandler): except Exception: # If we can't get dimensions, that's okay pass - + return b64_str - + async def click(self, x: int, y: int, button: str = "left") -> None: """Click at coordinates with specified button.""" - if 'click' in self.functions: - await self._call_function(self.functions['click'], x, y, button) + if "click" in self.functions: + await self._call_function(self.functions["click"], x, y, button) # No-op if not implemented - + async def double_click(self, x: int, y: int) -> None: """Double click at coordinates.""" - if 'double_click' in self.functions: - await self._call_function(self.functions['double_click'], x, y) + if "double_click" in self.functions: + await self._call_function(self.functions["double_click"], x, y) # No-op if not implemented - + async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: """Scroll at coordinates with specified scroll amounts.""" - if 'scroll' in self.functions: - await self._call_function(self.functions['scroll'], x, y, scroll_x, scroll_y) + if "scroll" in self.functions: + await self._call_function(self.functions["scroll"], x, y, scroll_x, scroll_y) # No-op if not implemented - + async def type(self, text: str) -> None: """Type text.""" - if 'type' in self.functions: - await self._call_function(self.functions['type'], text) + if "type" in self.functions: + await self._call_function(self.functions["type"], text) # No-op if not implemented - + async def wait(self, ms: int = 1000) -> None: """Wait for specified milliseconds.""" - if 'wait' in self.functions: - await self._call_function(self.functions['wait'], ms) + if "wait" in self.functions: + await self._call_function(self.functions["wait"], ms) else: # Default implementation import asyncio + await asyncio.sleep(ms / 1000.0) - + async def move(self, x: int, y: int) -> None: """Move cursor to coordinates.""" - if 'move' in self.functions: - await self._call_function(self.functions['move'], x, y) + if "move" in self.functions: + await self._call_function(self.functions["move"], x, y) # No-op if not implemented - + async def keypress(self, keys: Union[List[str], str]) -> None: """Press key combination.""" - if 'keypress' in self.functions: - await self._call_function(self.functions['keypress'], keys) + if "keypress" in self.functions: + await self._call_function(self.functions["keypress"], keys) # No-op if not implemented - + async def drag(self, path: List[Dict[str, int]]) -> None: """Drag along specified path.""" - if 'drag' in self.functions: - await self._call_function(self.functions['drag'], path) + if "drag" in self.functions: + await self._call_function(self.functions["drag"], path) # No-op if not implemented - + async def get_current_url(self) -> str: """Get current URL (for browser environments).""" - if 'get_current_url' in self.functions: - return await self._get_value('current_url') # type: ignore + if "get_current_url" in self.functions: + return await self._get_value("current_url") # type: ignore return "" # Default fallback - + async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse down at coordinates.""" - if 'left_mouse_down' in self.functions: - await self._call_function(self.functions['left_mouse_down'], x, y) + if "left_mouse_down" in self.functions: + await self._call_function(self.functions["left_mouse_down"], x, y) # No-op if not implemented - + async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse up at coordinates.""" - if 'left_mouse_up' in self.functions: - await self._call_function(self.functions['left_mouse_up'], x, y) + if "left_mouse_up" in self.functions: + await self._call_function(self.functions["left_mouse_up"], x, y) # No-op if not implemented diff --git a/libs/python/agent/agent/decorators.py b/libs/python/agent/agent/decorators.py index 7fba0443..a4e5cd93 100644 --- a/libs/python/agent/agent/decorators.py +++ b/libs/python/agent/agent/decorators.py @@ -3,47 +3,56 @@ Decorators for agent - agent_loop decorator """ from typing import List, Optional + from .types import AgentConfigInfo # Global registry _agent_configs: List[AgentConfigInfo] = [] + def register_agent(models: str, priority: int = 0): """ Decorator to register an AsyncAgentConfig class. - + Args: models: Regex pattern to match supported models priority: Priority for agent selection (higher = more priority) """ + def decorator(agent_class: type): # Validate that the class implements AsyncAgentConfig protocol - if not hasattr(agent_class, 'predict_step'): - raise ValueError(f"Agent class {agent_class.__name__} must implement predict_step method") - if not hasattr(agent_class, 'predict_click'): - raise ValueError(f"Agent class {agent_class.__name__} must implement predict_click method") - if not hasattr(agent_class, 'get_capabilities'): - raise ValueError(f"Agent class {agent_class.__name__} must implement get_capabilities method") - + if not hasattr(agent_class, "predict_step"): + raise ValueError( + f"Agent class {agent_class.__name__} must implement predict_step method" + ) + if not hasattr(agent_class, "predict_click"): + raise ValueError( + f"Agent class {agent_class.__name__} must implement predict_click method" + ) + if not hasattr(agent_class, "get_capabilities"): + raise ValueError( + f"Agent class {agent_class.__name__} must implement get_capabilities method" + ) + # Register the agent config config_info = AgentConfigInfo( - agent_class=agent_class, - models_regex=models, - priority=priority + agent_class=agent_class, models_regex=models, priority=priority ) _agent_configs.append(config_info) - + # Sort by priority (highest first) _agent_configs.sort(key=lambda x: x.priority, reverse=True) - + return agent_class - + return decorator + def get_agent_configs() -> List[AgentConfigInfo]: """Get all registered agent configs""" return _agent_configs.copy() + def find_agent_config(model: str) -> Optional[AgentConfigInfo]: """Find the best matching agent config for a model""" for config_info in _agent_configs: diff --git a/libs/python/agent/agent/human_tool/__init__.py b/libs/python/agent/agent/human_tool/__init__.py index f57fb305..90064332 100644 --- a/libs/python/agent/agent/human_tool/__init__.py +++ b/libs/python/agent/agent/human_tool/__init__.py @@ -12,7 +12,7 @@ Components: Usage: # Run the server and UI python -m agent.human_tool - + # Or run components separately python -m agent.human_tool.server # API server only python -m agent.human_tool.ui # UI only @@ -21,9 +21,4 @@ Usage: from .server import CompletionQueue, completion_queue from .ui import HumanCompletionUI, create_ui -__all__ = [ - "CompletionQueue", - "completion_queue", - "HumanCompletionUI", - "create_ui" -] +__all__ = ["CompletionQueue", "completion_queue", "HumanCompletionUI", "create_ui"] diff --git a/libs/python/agent/agent/human_tool/__main__.py b/libs/python/agent/agent/human_tool/__main__.py index e1ceed50..bca0b3f2 100644 --- a/libs/python/agent/agent/human_tool/__main__.py +++ b/libs/python/agent/agent/human_tool/__main__.py @@ -8,6 +8,7 @@ with a Gradio UI for human interaction. import gradio as gr from fastapi import FastAPI + from .server import app as fastapi_app from .ui import create_ui @@ -18,6 +19,7 @@ gradio_demo = create_ui() CUSTOM_PATH = "/gradio" app = gr.mount_gradio_app(fastapi_app, gradio_demo, path=CUSTOM_PATH) + # Add a redirect from root to Gradio UI @fastapi_app.get("/") async def redirect_to_ui(): @@ -25,14 +27,16 @@ async def redirect_to_ui(): return { "message": "Human Completion Server is running", "ui_url": "/gradio", - "api_docs": "/docs" + "api_docs": "/docs", } + if __name__ == "__main__": import uvicorn + print("🚀 Starting Human-in-the-Loop Completion Server...") print("📊 API Server: http://localhost:8002") print("🎨 Gradio UI: http://localhost:8002/gradio") print("📚 API Docs: http://localhost:8002/docs") - + uvicorn.run(app, host="0.0.0.0", port=8002) diff --git a/libs/python/agent/agent/human_tool/server.py b/libs/python/agent/agent/human_tool/server.py index c5d08cfe..8c67f95a 100644 --- a/libs/python/agent/agent/human_tool/server.py +++ b/libs/python/agent/agent/human_tool/server.py @@ -1,9 +1,9 @@ import asyncio import uuid +from dataclasses import asdict, dataclass from datetime import datetime -from typing import Dict, List, Any, Optional -from dataclasses import dataclass, asdict from enum import Enum +from typing import Any, Dict, List, Optional from fastapi import FastAPI, HTTPException from pydantic import BaseModel @@ -49,7 +49,7 @@ class CompletionQueue: self._queue: Dict[str, CompletionCall] = {} self._pending_order: List[str] = [] self._lock = asyncio.Lock() - + async def add_completion(self, messages: List[Dict[str, Any]], model: str) -> str: """Add a completion call to the queue.""" async with self._lock: @@ -59,42 +59,47 @@ class CompletionQueue: messages=messages, model=model, status=CompletionStatus.PENDING, - created_at=datetime.now() + created_at=datetime.now(), ) self._queue[call_id] = completion_call self._pending_order.append(call_id) return call_id - + async def get_pending_calls(self) -> List[Dict[str, Any]]: """Get all pending completion calls.""" async with self._lock: pending_calls = [] for call_id in self._pending_order: - if call_id in self._queue and self._queue[call_id].status == CompletionStatus.PENDING: + if ( + call_id in self._queue + and self._queue[call_id].status == CompletionStatus.PENDING + ): call = self._queue[call_id] - pending_calls.append({ - "id": call.id, - "model": call.model, - "created_at": call.created_at.isoformat(), - "messages": call.messages - }) + pending_calls.append( + { + "id": call.id, + "model": call.model, + "created_at": call.created_at.isoformat(), + "messages": call.messages, + } + ) return pending_calls - + async def get_call_status(self, call_id: str) -> Optional[Dict[str, Any]]: """Get the status of a specific completion call.""" async with self._lock: if call_id not in self._queue: return None - + call = self._queue[call_id] result = { "id": call.id, "status": call.status.value, "created_at": call.created_at.isoformat(), "model": call.model, - "messages": call.messages + "messages": call.messages, } - + if call.completed_at: result["completed_at"] = call.completed_at.isoformat() if call.response: @@ -103,69 +108,74 @@ class CompletionQueue: result["tool_calls"] = call.tool_calls if call.error: result["error"] = call.error - + return result - - async def complete_call(self, call_id: str, response: Optional[str] = None, tool_calls: Optional[List[Dict[str, Any]]] = None) -> bool: + + async def complete_call( + self, + call_id: str, + response: Optional[str] = None, + tool_calls: Optional[List[Dict[str, Any]]] = None, + ) -> bool: """Mark a completion call as completed with a response or tool calls.""" async with self._lock: if call_id not in self._queue: return False - + call = self._queue[call_id] if call.status != CompletionStatus.PENDING: return False - + call.status = CompletionStatus.COMPLETED call.completed_at = datetime.now() call.response = response call.tool_calls = tool_calls - + # Remove from pending order if call_id in self._pending_order: self._pending_order.remove(call_id) - + return True - + async def fail_call(self, call_id: str, error: str) -> bool: """Mark a completion call as failed with an error.""" async with self._lock: if call_id not in self._queue: return False - + call = self._queue[call_id] if call.status != CompletionStatus.PENDING: return False - + call.status = CompletionStatus.FAILED call.completed_at = datetime.now() call.error = error - + # Remove from pending order if call_id in self._pending_order: self._pending_order.remove(call_id) - + return True - + async def wait_for_completion(self, call_id: str, timeout: float = 300.0) -> Optional[str]: """Wait for a completion call to be completed and return the response.""" start_time = asyncio.get_event_loop().time() - + while True: status = await self.get_call_status(call_id) if not status: return None - + if status["status"] == CompletionStatus.COMPLETED.value: return status.get("response") elif status["status"] == CompletionStatus.FAILED.value: raise Exception(f"Completion failed: {status.get('error', 'Unknown error')}") - + # Check timeout if asyncio.get_event_loop().time() - start_time > timeout: await self.fail_call(call_id, "Timeout waiting for human response") raise TimeoutError("Timeout waiting for human response") - + # Wait a bit before checking again await asyncio.sleep(0.5) @@ -204,9 +214,7 @@ async def get_status(call_id: str): async def complete_call(call_id: str, response: CompletionResponse): """Complete a call with a human response.""" success = await completion_queue.complete_call( - call_id, - response=response.response, - tool_calls=response.tool_calls + call_id, response=response.response, tool_calls=response.tool_calls ) if success: return {"status": "success", "message": "Call completed"} @@ -219,7 +227,9 @@ async def fail_call(call_id: str, error: Dict[str, str]): """Mark a call as failed.""" success = await completion_queue.fail_call(call_id, error.get("error", "Unknown error")) if not success: - raise HTTPException(status_code=404, detail="Completion call not found or already completed") + raise HTTPException( + status_code=404, detail="Completion call not found or already completed" + ) return {"status": "failed"} @@ -231,4 +241,5 @@ async def root(): if __name__ == "__main__": import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8002) diff --git a/libs/python/agent/agent/human_tool/ui.py b/libs/python/agent/agent/human_tool/ui.py index 12366477..ae6b5405 100644 --- a/libs/python/agent/agent/human_tool/ui.py +++ b/libs/python/agent/agent/human_tool/ui.py @@ -1,14 +1,17 @@ -import gradio as gr -import json -import time -from typing import List, Dict, Any, Optional -from datetime import datetime -import requests -from .server import completion_queue import base64 import io +import json +import time +from datetime import datetime +from typing import Any, Dict, List, Optional + +import gradio as gr +import requests from PIL import Image +from .server import completion_queue + + class HumanCompletionUI: def __init__(self, server_url: str = "http://localhost:8002"): self.server_url = server_url @@ -20,7 +23,7 @@ class HumanCompletionUI: self.current_button: str = "left" self.current_scroll_x: int = 0 self.current_scroll_y: int = -120 - + def format_messages_for_chatbot(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Format messages for display in gr.Chatbot with type='messages'.""" formatted = [] @@ -28,7 +31,7 @@ class HumanCompletionUI: role = msg.get("role", "user") content = msg.get("content", "") tool_calls = msg.get("tool_calls", []) - + # Handle different content formats if isinstance(content, list): # Multi-modal content - can include text and images @@ -55,7 +58,7 @@ class HumanCompletionUI: else: # For URL images, create gr.Image with URL formatted_content.append(gr.Image(value=image_url)) - + # Determine final content format if len(formatted_content) == 1: content = formatted_content[0] @@ -63,28 +66,28 @@ class HumanCompletionUI: content = formatted_content else: content = "[Empty content]" - + # Ensure role is valid for Gradio Chatbot if role not in ["user", "assistant"]: role = "assistant" if role == "system" else "user" - + # Invert roles for better display in human UI context # (what the AI says becomes "user", what human should respond becomes "assistant") if role == "user": role = "assistant" else: role = "user" - + # Add the main message if it has content if content and str(content).strip(): formatted.append({"role": role, "content": content}) - + # Handle tool calls - create separate messages for each tool call if tool_calls: for tool_call in tool_calls: function_name = tool_call.get("function", {}).get("name", "unknown") arguments_str = tool_call.get("function", {}).get("arguments", "{}") - + try: # Parse arguments to format them nicely arguments = json.loads(arguments_str) @@ -92,18 +95,20 @@ class HumanCompletionUI: except json.JSONDecodeError: # If parsing fails, use the raw string formatted_args = arguments_str - + # Create a formatted message for the tool call tool_call_content = f"```json\n{formatted_args}\n```" - - formatted.append({ - "role": role, - "content": tool_call_content, - "metadata": {"title": f"🛠️ Used {function_name}"} - }) - + + formatted.append( + { + "role": role, + "content": tool_call_content, + "metadata": {"title": f"🛠️ Used {function_name}"}, + } + ) + return formatted - + def get_pending_calls(self) -> List[Dict[str, Any]]: """Get pending calls from the server.""" try: @@ -113,38 +118,39 @@ class HumanCompletionUI: except Exception as e: print(f"Error fetching pending calls: {e}") return [] - + def complete_call_with_response(self, call_id: str, response: str) -> bool: """Complete a call with a text response.""" try: response_data = {"response": response} response_obj = requests.post( - f"{self.server_url}/complete/{call_id}", - json=response_data, - timeout=10 + f"{self.server_url}/complete/{call_id}", json=response_data, timeout=10 ) response_obj.raise_for_status() return True except requests.RequestException as e: print(f"Error completing call: {e}") return False - + def complete_call_with_tool_calls(self, call_id: str, tool_calls: List[Dict[str, Any]]) -> bool: """Complete a call with tool calls.""" try: response_data = {"tool_calls": tool_calls} response_obj = requests.post( - f"{self.server_url}/complete/{call_id}", - json=response_data, - timeout=10 + f"{self.server_url}/complete/{call_id}", json=response_data, timeout=10 ) response_obj.raise_for_status() return True except requests.RequestException as e: print(f"Error completing call: {e}") return False - - def complete_call(self, call_id: str, response: Optional[str] = None, tool_calls: Optional[List[Dict[str, Any]]] = None) -> bool: + + def complete_call( + self, + call_id: str, + response: Optional[str] = None, + tool_calls: Optional[List[Dict[str, Any]]] = None, + ) -> bool: """Complete a call with either a response or tool calls.""" try: response_data = {} @@ -152,25 +158,23 @@ class HumanCompletionUI: response_data["response"] = response if tool_calls: response_data["tool_calls"] = tool_calls - + response_obj = requests.post( - f"{self.server_url}/complete/{call_id}", - json=response_data, - timeout=10 + f"{self.server_url}/complete/{call_id}", json=response_data, timeout=10 ) response_obj.raise_for_status() return True except requests.RequestException as e: print(f"Error completing call: {e}") return False - + def get_last_image_from_messages(self, messages: List[Dict[str, Any]]) -> Optional[Any]: """Extract the last image from the messages for display above conversation.""" last_image = None - + for msg in reversed(messages): # Start from the last message content = msg.get("content", "") - + if isinstance(content, list): for item in reversed(content): # Get the last image in the message if item.get("type") == "image_url": @@ -189,13 +193,13 @@ class HumanCompletionUI: else: # For URL images, return the URL return image_url - + return last_image - + def refresh_pending_calls(self): """Refresh the list of pending calls.""" pending_calls = self.get_pending_calls() - + if not pending_calls: return ( gr.update(choices=["latest"], value="latest"), # dropdown @@ -205,27 +209,27 @@ class HumanCompletionUI: gr.update(visible=False), # click_actions_group hidden gr.update(visible=False), # actions_group hidden ) - + # Sort pending calls by created_at to get oldest first sorted_calls = sorted(pending_calls, key=lambda x: x.get("created_at", "")) - + # Create choices for dropdown choices = [("latest", "latest")] # Add "latest" option first - + for call in sorted_calls: call_id = call["id"] model = call.get("model", "unknown") created_at = call.get("created_at", "") # Format timestamp try: - dt = datetime.fromisoformat(created_at.replace('Z', '+00:00')) + dt = datetime.fromisoformat(created_at.replace("Z", "+00:00")) time_str = dt.strftime("%H:%M:%S") except: time_str = created_at - + choice_label = f"{call_id[:8]}... ({model}) - {time_str}" choices.append((choice_label, call_id)) - + # Default to "latest" which shows the oldest pending conversation selected_call_id = "latest" if selected_call_id == "latest" and sorted_calls: @@ -239,7 +243,7 @@ class HumanCompletionUI: conversation = [] self.current_call_id = None self.last_image = None - + return ( gr.update(choices=choices, value="latest"), gr.update(value=self.last_image), @@ -248,7 +252,7 @@ class HumanCompletionUI: gr.update(visible=True), # click_actions_group visible when there is a call gr.update(visible=True), # actions_group visible when there is a call ) - + def on_call_selected(self, selected_choice): """Handle when a call is selected from the dropdown.""" if not selected_choice: @@ -259,7 +263,7 @@ class HumanCompletionUI: gr.update(visible=False), # click_actions_group hidden gr.update(visible=False), # actions_group hidden ) - + pending_calls = self.get_pending_calls() if not pending_calls: return ( @@ -269,7 +273,7 @@ class HumanCompletionUI: gr.update(visible=False), # click_actions_group hidden gr.update(visible=False), # actions_group hidden ) - + # Handle "latest" option if selected_choice == "latest": # Sort calls by created_at to get oldest first @@ -284,17 +288,17 @@ class HumanCompletionUI: if call_id_short in selected_choice: call_id = call["id"] break - + if not call_id: return ( gr.update(value=None), # no image gr.update(value=[]), # empty chatbot - gr.update(interactive=False) + gr.update(interactive=False), ) - + # Find the selected call selected_call = next((c for c in pending_calls if c["id"] == call_id), None) - + if not selected_call: return ( gr.update(value=None), # no image @@ -303,12 +307,12 @@ class HumanCompletionUI: gr.update(visible=False), # click_actions_group hidden gr.update(visible=False), # actions_group hidden ) - + conversation = self.format_messages_for_chatbot(selected_call.get("messages", [])) self.current_call_id = call_id # Get the last image from messages self.last_image = self.get_last_image_from_messages(selected_call.get("messages", [])) - + return ( gr.update(value=self.last_image), gr.update(value=conversation), @@ -316,110 +320,111 @@ class HumanCompletionUI: gr.update(visible=True), # click_actions_group visible gr.update(visible=True), # actions_group visible ) - + def submit_response(self, response_text: str): """Submit a text response to the current call.""" if not self.current_call_id: return ( gr.update(value=response_text), # keep response text - gr.update(value="❌ No call selected") # status + gr.update(value="❌ No call selected"), # status ) - + if not response_text.strip(): return ( gr.update(value=response_text), # keep response text - gr.update(value="❌ Response cannot be empty") # status + gr.update(value="❌ Response cannot be empty"), # status ) - + success = self.complete_call_with_response(self.current_call_id, response_text) - + if success: status_msg = "✅ Response submitted successfully!" return ( gr.update(value=""), # clear response text - gr.update(value=status_msg) # status + gr.update(value=status_msg), # status ) else: return ( gr.update(value=response_text), # keep response text - gr.update(value="❌ Failed to submit response") # status + gr.update(value="❌ Failed to submit response"), # status ) - + def submit_action(self, action_type: str, **kwargs) -> str: """Submit a computer action as a tool call.""" if not self.current_call_id: return "❌ No call selected" - + import uuid - + # Create tool call structure action_data = {"type": action_type, **kwargs} tool_call = { "id": f"call_{uuid.uuid4().hex[:24]}", "type": "function", - "function": { - "name": "computer", - "arguments": json.dumps(action_data) - } + "function": {"name": "computer", "arguments": json.dumps(action_data)}, } - + success = self.complete_call_with_tool_calls(self.current_call_id, [tool_call]) - + if success: return f"✅ {action_type.capitalize()} action submitted as tool call" else: return f"❌ Failed to submit {action_type} action" - - def submit_click_action(self, x: int, y: int, action_type: str = "click", button: str = "left") -> str: + + def submit_click_action( + self, x: int, y: int, action_type: str = "click", button: str = "left" + ) -> str: """Submit a coordinate-based action.""" if action_type == "click": return self.submit_action(action_type, x=x, y=y, button=button) else: return self.submit_action(action_type, x=x, y=y) - + def submit_type_action(self, text: str) -> str: """Submit a type action.""" return self.submit_action("type", text=text) - + def submit_hotkey_action(self, keys: str) -> str: """Submit a hotkey action.""" return self.submit_action("keypress", keys=keys) - + def submit_wait_action(self) -> str: """Submit a wait action with no kwargs.""" return self.submit_action("wait") - - def submit_description_click(self, description: str, action_type: str = "click", button: str = "left") -> str: + + def submit_description_click( + self, description: str, action_type: str = "click", button: str = "left" + ) -> str: """Submit a description-based action.""" if action_type == "click": return self.submit_action(action_type, element_description=description, button=button) else: return self.submit_action(action_type, element_description=description) - + def wait_for_pending_calls(self, max_seconds: float = 10.0, check_interval: float = 0.2): """Wait for pending calls to appear or until max_seconds elapsed. - + This method loops and checks for pending calls at regular intervals, returning as soon as a pending call is found or the maximum wait time is reached. - + Args: max_seconds: Maximum number of seconds to wait check_interval: How often to check for pending calls (in seconds) """ import time - + start_time = time.time() - + while time.time() - start_time < max_seconds: # Check if there are any pending calls pending_calls = self.get_pending_calls() if pending_calls: # Found pending calls, return immediately return self.refresh_pending_calls() - + # Wait before checking again time.sleep(check_interval) - + # Max wait time reached, return current state return self.refresh_pending_calls() @@ -427,79 +432,73 @@ class HumanCompletionUI: def create_ui(): """Create the Gradio interface.""" ui_handler = HumanCompletionUI() - + with gr.Blocks(title="Human-in-the-Loop Agent Tool", fill_width=True) as demo: gr.Markdown("# 🤖 Human-in-the-Loop Agent Tool") gr.Markdown("Review AI conversation requests and provide human responses.") - + with gr.Row(): with gr.Column(scale=2): with gr.Group(): screenshot_image = gr.Image( - label="Interactive Screenshot", - interactive=False, - height=600 + label="Interactive Screenshot", interactive=False, height=600 ) - + # Action type selection for image clicks (wrapped for visibility control) with gr.Group(visible=False) as click_actions_group: with gr.Row(): action_type_radio = gr.Dropdown( label="Interactive Action", - choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down", "scroll"], + choices=[ + "click", + "double_click", + "move", + "left_mouse_up", + "left_mouse_down", + "scroll", + ], value="click", - scale=2 + scale=2, ) action_button_radio = gr.Dropdown( label="Button", choices=["left", "right", "wheel", "back", "forward"], value="left", visible=True, - scale=1 + scale=1, ) scroll_x_input = gr.Number( - label="scroll_x", - value=0, - visible=False, - scale=1 + label="scroll_x", value=0, visible=False, scale=1 ) scroll_y_input = gr.Number( - label="scroll_y", - value=-120, - visible=False, - scale=1 + label="scroll_y", value=-120, visible=False, scale=1 ) - + conversation_chatbot = gr.Chatbot( - label="Conversation", - type="messages", - height=500, - show_copy_button=True + label="Conversation", type="messages", height=500, show_copy_button=True ) - + with gr.Column(scale=1): with gr.Group(): call_dropdown = gr.Dropdown( label="Select a pending conversation request", choices=["latest"], interactive=True, - value="latest" + value="latest", ) refresh_btn = gr.Button("🔄 Refresh", variant="secondary") status_display = gr.Textbox( - label="Status", - interactive=False, - value="Ready to receive requests..." + label="Status", interactive=False, value="Ready to receive requests..." ) with gr.Group(): response_text = gr.Textbox( - label="Message", - lines=3, - placeholder="Enter your message here..." + label="Message", lines=3, placeholder="Enter your message here..." ) - submit_btn = gr.Button("📤 Submit Message", variant="primary", interactive=False) - + submit_btn = gr.Button( + "📤 Submit Message", variant="primary", interactive=False + ) + # Action Accordions (wrapped for visibility control) with gr.Group(visible=False) as actions_group: with gr.Tabs(): @@ -507,58 +506,73 @@ def create_ui(): with gr.Group(): description_text = gr.Textbox( label="Element Description", - placeholder="e.g., 'Privacy and security option in left sidebar'" + placeholder="e.g., 'Privacy and security option in left sidebar'", ) with gr.Row(): description_action_type = gr.Dropdown( label="Action", - choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"], - value="click" + choices=[ + "click", + "double_click", + "move", + "left_mouse_up", + "left_mouse_down", + ], + value="click", ) description_button = gr.Dropdown( label="Button", choices=["left", "right", "wheel", "back", "forward"], - value="left" + value="left", ) description_submit_btn = gr.Button("Submit Click Action") - + with gr.Tab("📝 Type Action"): with gr.Group(): type_text = gr.Textbox( - label="Text to Type", - placeholder="Enter text to type..." + label="Text to Type", placeholder="Enter text to type..." ) type_submit_btn = gr.Button("Submit Type") - + with gr.Tab("⌨️ Keypress Action"): with gr.Group(): keypress_text = gr.Textbox( - label="Keys", - placeholder="e.g., ctrl+c, alt+tab" + label="Keys", placeholder="e.g., ctrl+c, alt+tab" ) keypress_submit_btn = gr.Button("Submit Keypress") - + with gr.Tab("🧰 Misc Actions"): with gr.Group(): misc_action_dropdown = gr.Dropdown( - label="Action", - choices=["wait"], - value="wait" + label="Action", choices=["wait"], value="wait" ) misc_submit_btn = gr.Button("Submit Action") - + # Event handlers refresh_btn.click( fn=ui_handler.refresh_pending_calls, - outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group] + outputs=[ + call_dropdown, + screenshot_image, + conversation_chatbot, + submit_btn, + click_actions_group, + actions_group, + ], ) - + call_dropdown.change( fn=ui_handler.on_call_selected, inputs=[call_dropdown], - outputs=[screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group] + outputs=[ + screenshot_image, + conversation_chatbot, + submit_btn, + click_actions_group, + actions_group, + ], ) - + def handle_image_click(evt: gr.SelectData): if evt.index is not None: x, y = evt.index @@ -568,31 +582,44 @@ def create_ui(): sx_i = int(ui_handler.current_scroll_x or 0) sy_i = int(ui_handler.current_scroll_y or 0) # Submit a scroll action with x,y position and scroll deltas - result = ui_handler.submit_action("scroll", x=x, y=y, scroll_x=sx_i, scroll_y=sy_i) + result = ui_handler.submit_action( + "scroll", x=x, y=y, scroll_x=sx_i, scroll_y=sy_i + ) else: result = ui_handler.submit_click_action(x, y, action_type, button) ui_handler.wait_for_pending_calls() return result return "No coordinates selected" - screenshot_image.select( - fn=handle_image_click, - outputs=[status_display] - ).then( + screenshot_image.select(fn=handle_image_click, outputs=[status_display]).then( fn=ui_handler.wait_for_pending_calls, - outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group] + outputs=[ + call_dropdown, + screenshot_image, + conversation_chatbot, + submit_btn, + click_actions_group, + actions_group, + ], ) # Response submission submit_btn.click( fn=ui_handler.submit_response, inputs=[response_text], - outputs=[response_text, status_display] + outputs=[response_text, status_display], ).then( fn=ui_handler.refresh_pending_calls, - outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group] + outputs=[ + call_dropdown, + screenshot_image, + conversation_chatbot, + submit_btn, + click_actions_group, + actions_group, + ], ) - + # Toggle visibility of controls based on action type def toggle_action_controls(action_type): # Button visible only for click @@ -603,59 +630,63 @@ def create_ui(): # Update state ui_handler.current_action_type = action_type or "click" return button_vis, scroll_x_vis, scroll_y_vis - + action_type_radio.change( fn=toggle_action_controls, inputs=[action_type_radio], - outputs=[action_button_radio, scroll_x_input, scroll_y_input] + outputs=[action_button_radio, scroll_x_input, scroll_y_input], ) # Keep other control values in ui_handler state def on_button_change(val): - ui_handler.current_button = (val or "left") - action_button_radio.change( - fn=on_button_change, - inputs=[action_button_radio] - ) + ui_handler.current_button = val or "left" + + action_button_radio.change(fn=on_button_change, inputs=[action_button_radio]) def on_scroll_x_change(val): try: ui_handler.current_scroll_x = int(val) if val is not None else 0 except Exception: ui_handler.current_scroll_x = 0 - scroll_x_input.change( - fn=on_scroll_x_change, - inputs=[scroll_x_input] - ) + + scroll_x_input.change(fn=on_scroll_x_change, inputs=[scroll_x_input]) def on_scroll_y_change(val): try: ui_handler.current_scroll_y = int(val) if val is not None else 0 except Exception: ui_handler.current_scroll_y = 0 - scroll_y_input.change( - fn=on_scroll_y_change, - inputs=[scroll_y_input] - ) - + + scroll_y_input.change(fn=on_scroll_y_change, inputs=[scroll_y_input]) + type_submit_btn.click( - fn=ui_handler.submit_type_action, - inputs=[type_text], - outputs=[status_display] + fn=ui_handler.submit_type_action, inputs=[type_text], outputs=[status_display] ).then( fn=ui_handler.wait_for_pending_calls, - outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group] + outputs=[ + call_dropdown, + screenshot_image, + conversation_chatbot, + submit_btn, + click_actions_group, + actions_group, + ], ) - + keypress_submit_btn.click( - fn=ui_handler.submit_hotkey_action, - inputs=[keypress_text], - outputs=[status_display] + fn=ui_handler.submit_hotkey_action, inputs=[keypress_text], outputs=[status_display] ).then( fn=ui_handler.wait_for_pending_calls, - outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group] + outputs=[ + call_dropdown, + screenshot_image, + conversation_chatbot, + submit_btn, + click_actions_group, + actions_group, + ], ) - + def handle_description_submit(description, action_type, button): if description: result = ui_handler.submit_description_click(description, action_type, button) @@ -666,12 +697,19 @@ def create_ui(): description_submit_btn.click( fn=handle_description_submit, inputs=[description_text, description_action_type, description_button], - outputs=[status_display] + outputs=[status_display], ).then( fn=ui_handler.wait_for_pending_calls, - outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group] + outputs=[ + call_dropdown, + screenshot_image, + conversation_chatbot, + submit_btn, + click_actions_group, + actions_group, + ], ) - + # Misc action handler def handle_misc_submit(selected_action): if selected_action == "wait": @@ -681,20 +719,32 @@ def create_ui(): return f"Unsupported misc action: {selected_action}" misc_submit_btn.click( - fn=handle_misc_submit, - inputs=[misc_action_dropdown], - outputs=[status_display] + fn=handle_misc_submit, inputs=[misc_action_dropdown], outputs=[status_display] ).then( fn=ui_handler.wait_for_pending_calls, - outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group] + outputs=[ + call_dropdown, + screenshot_image, + conversation_chatbot, + submit_btn, + click_actions_group, + actions_group, + ], ) - + # Load initial data demo.load( fn=ui_handler.refresh_pending_calls, - outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group] + outputs=[ + call_dropdown, + screenshot_image, + conversation_chatbot, + submit_btn, + click_actions_group, + actions_group, + ], ) - + return demo diff --git a/libs/python/agent/agent/integrations/hud/__init__.py b/libs/python/agent/agent/integrations/hud/__init__.py index e27060ff..2dd64ac4 100644 --- a/libs/python/agent/agent/integrations/hud/__init__.py +++ b/libs/python/agent/agent/integrations/hud/__init__.py @@ -8,21 +8,22 @@ Exports: - run_full_dataset(dataset, ...) - MCPComputerAgent """ + import time from typing import Any, Optional from agent.computers import is_agent_computer -from datasets import load_dataset, Dataset -from hud.datasets import Task, run_dataset +from datasets import Dataset, load_dataset from hud import trace +from hud.datasets import Task, run_dataset from .agent import MCPComputerAgent - # --------------------------------------------------------------------------- # Single-task runner # --------------------------------------------------------------------------- + async def run_single_task( dataset: str | Dataset | list[dict[str, Any]], *, @@ -47,24 +48,20 @@ async def run_single_task( # Load dataset and pick a sample if isinstance(dataset, str): - dataset = load_dataset(dataset, split="train") # type: ignore[arg-type] + dataset = load_dataset(dataset, split="train") # type: ignore[arg-type] elif isinstance(dataset, list): dataset = dataset else: dataset = dataset["train"] - + sample_task = dataset[task_id] # type: ignore[index] task_prompt = sample_task.get("prompt", f"Task {sample_task.get('id', 0)}") # type: ignore[attr-defined] # Filter any existing Computer tools # The eval framework will add its own Computer tool per task if tools: - tools = [ - tool - for tool in tools - if not is_agent_computer(tool) - ] - + tools = [tool for tool in tools if not is_agent_computer(tool)] + with trace(name=task_prompt): task = Task(**sample_task) # type: ignore[arg-type] @@ -87,13 +84,14 @@ async def run_single_task( ) print(f"Running: {task_prompt}") result = await agent.run(task, max_steps=10) - print(f"✅ Reward: {getattr(result, 'reward')}") + print(f"✅ Reward: {result.reward}") # --------------------------------------------------------------------------- # Full-dataset runner # --------------------------------------------------------------------------- + async def run_full_dataset( dataset: str | Dataset | list[dict[str, Any]], *, @@ -121,9 +119,9 @@ async def run_full_dataset( # Run with our MCP-based agent class. if isinstance(dataset, str): - dataset_name = dataset.split('/')[-1] + dataset_name = dataset.split("/")[-1] job_name = job_name or f"Evaluation {dataset_name}" - dataset = load_dataset(dataset, split=split) # type: ignore[arg-type] + dataset = load_dataset(dataset, split=split) # type: ignore[arg-type] else: dataset_name = "custom" job_name = job_name or f"Evaluation {time.strftime('%H:%M %Y-%m-%d')}" @@ -131,12 +129,8 @@ async def run_full_dataset( # Filter any existing Computer tools # The eval framework will add its own Computer tool per task if tools: - tools = [ - tool - for tool in tools - if not is_agent_computer(tool) - ] - + tools = [tool for tool in tools if not is_agent_computer(tool)] + # Execute evaluation return await run_dataset( name=job_name, @@ -170,4 +164,4 @@ __all__ = [ "run_single_task", "run_full_dataset", "MCPComputerAgent", -] \ No newline at end of file +] diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py index c1465ee6..e830d53c 100644 --- a/libs/python/agent/agent/integrations/hud/agent.py +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -9,26 +9,26 @@ Key differences from the OpenAI OperatorAgent variant: - Planning is executed via `ComputerAgent.run(messages)`. - The first yielded result per step is returned as the agent response. """ + from __future__ import annotations +import base64 import io +import uuid +from pathlib import Path from typing import Any, ClassVar, Optional +import hud +import mcp.types as types from agent.agent import ComputerAgent as BaseComputerAgent from agent.callbacks import PromptInstructionsCallback from agent.callbacks.trajectory_saver import TrajectorySaverCallback +from agent.computers import is_agent_computer +from agent.responses import make_failed_tool_call_items from hud.agents import MCPAgent from hud.tools.computer.settings import computer_settings from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace - -from agent.responses import make_failed_tool_call_items -from agent.computers import is_agent_computer from PIL import Image -import mcp.types as types -import hud -import uuid -import base64 -from pathlib import Path class MCPComputerAgent(MCPAgent): @@ -114,8 +114,10 @@ class MCPComputerAgent(MCPAgent): self.last_screenshot_b64 = None buffer = io.BytesIO() - Image.new('RGB', (self.metadata["display_width"], self.metadata["display_height"])).save(buffer, format='PNG') - self.last_screenshot_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8') + Image.new("RGB", (self.metadata["display_width"], self.metadata["display_height"])).save( + buffer, format="PNG" + ) + self.last_screenshot_b64 = base64.b64encode(buffer.getvalue()).decode("utf-8") # Ensure a computer shim is present so width/height/environment are known computer_shim = { @@ -128,12 +130,8 @@ class MCPComputerAgent(MCPAgent): } agent_tools: list[Any] = [computer_shim] if tools: - agent_tools.extend([ - tool - for tool in tools - if not is_agent_computer(tool) - ]) - + agent_tools.extend([tool for tool in tools if not is_agent_computer(tool)]) + agent_kwargs = { "model": self.model, "trajectory_dir": trajectory_dir, @@ -150,9 +148,7 @@ class MCPComputerAgent(MCPAgent): "telemetry_enabled": telemetry_enabled, } - self.computer_agent = BaseComputerAgent( - **agent_kwargs - ) + self.computer_agent = BaseComputerAgent(**agent_kwargs) async def get_system_messages(self) -> list[Any]: """Create initial messages. @@ -161,9 +157,7 @@ class MCPComputerAgent(MCPAgent): """ return [] - async def format_blocks( - self, blocks: list[types.ContentBlock] - ) -> list[dict[str, Any]]: + async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[dict[str, Any]]: """ Format blocks for OpenAI input format. @@ -200,42 +194,49 @@ class MCPComputerAgent(MCPAgent): # Call the ComputerAgent LLM API async for result in self.computer_agent.run(messages): # type: ignore[arg-type] - items = result['output'] + items = result["output"] if not items or tool_calls: break for item in items: - if item['type'] in ['reasoning', 'message', 'computer_call', 'function_call', 'function_call_output']: + if item["type"] in [ + "reasoning", + "message", + "computer_call", + "function_call", + "function_call_output", + ]: agent_result.append(item) - + # Add messages to output text - if item['type'] == 'reasoning': + if item["type"] == "reasoning": output_text.extend( - f"Reasoning: {summary['text']}" - for summary in item['summary'] + f"Reasoning: {summary['text']}" for summary in item["summary"] ) - elif item['type'] == 'message': - if isinstance(item['content'], list): + elif item["type"] == "message": + if isinstance(item["content"], list): output_text.extend( - item['text'] - for item in item['content'] - if item['type'] == 'output_text' + item["text"] + for item in item["content"] + if item["type"] == "output_text" ) - elif isinstance(item['content'], str): - output_text.append(item['content']) - + elif isinstance(item["content"], str): + output_text.append(item["content"]) + # If we get a tool call, we're not done - if item['type'] == 'computer_call': + if item["type"] == "computer_call": id = item["call_id"] - tool_calls.append(MCPToolCall( - name="openai_computer", - arguments=item["action"], - id=id, - )) + tool_calls.append( + MCPToolCall( + name="openai_computer", + arguments=item["action"], + id=id, + ) + ) is_done = False self.tool_call_inputs[id] = agent_result break - + # if we have tool calls, we should exit the loop if tool_calls: break @@ -247,7 +248,7 @@ class MCPComputerAgent(MCPAgent): tool_calls=tool_calls, done=is_done, ) - + def _log_image(self, image_b64: str): callbacks = self.computer_agent.callbacks for callback in callbacks: @@ -257,9 +258,7 @@ class MCPComputerAgent(MCPAgent): callback._save_artifact("screenshot_after", image_bytes) async def format_tool_results( - self, - tool_calls: list[MCPToolCall], - tool_results: list[MCPToolResult] + self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult] ) -> list[dict[str, Any]]: """Extract latest screenshot from tool results in dict form. @@ -274,45 +273,60 @@ class MCPComputerAgent(MCPAgent): previous_output = self.previous_output.copy() or [] # First we need to remove any pending computer_calls from the end of previous_output - while previous_output and previous_output[-1]['type'] == 'computer_call': + while previous_output and previous_output[-1]["type"] == "computer_call": previous_output.pop() messages.extend(previous_output) # If the call is a 'response', don't add the result - if call.name == 'response': + if call.name == "response": continue # Otherwise, if we have a result, we should add it to the messages content = [ - { "type": "input_text", "text": content.text } if isinstance(content, types.TextContent) - else { "type": "input_image", "image_url": f"data:image/png;base64,{content.data}" } if isinstance(content, types.ImageContent) - else { "type": "input_text", "text": "" } + ( + {"type": "input_text", "text": content.text} + if isinstance(content, types.TextContent) + else ( + { + "type": "input_image", + "image_url": f"data:image/png;base64,{content.data}", + } + if isinstance(content, types.ImageContent) + else {"type": "input_text", "text": ""} + ) + ) for content in result.content ] - messages.append({ - "role": "user", - "content": content, - }) + messages.append( + { + "role": "user", + "content": content, + } + ) continue - + # Add the assistant's computer call messages.extend(self.tool_call_inputs[call.id]) - + if result.isError: - error_text = "".join([ - content.text - for content in result.content - if isinstance(content, types.TextContent) - ]) + error_text = "".join( + [ + content.text + for content in result.content + if isinstance(content, types.TextContent) + ] + ) # Replace computer call with failed tool call messages.pop() - messages.extend(make_failed_tool_call_items( - tool_name=call.name, - tool_kwargs=call.arguments or {}, - error_message=error_text, - call_id=call.id, - )) + messages.extend( + make_failed_tool_call_items( + tool_name=call.name, + tool_kwargs=call.arguments or {}, + error_message=error_text, + call_id=call.id, + ) + ) else: # Get the latest screenshot screenshots = [ @@ -325,23 +339,27 @@ class MCPComputerAgent(MCPAgent): if screenshots: self._log_image(screenshots[0]) self.last_screenshot_b64 = screenshots[0] - messages.append({ - "type": "computer_call_output", - "call_id": call.id, - "output": { - "type": "input_image", - "image_url": f"data:image/png;base64,{screenshots[0]}" - }, - }) + messages.append( + { + "type": "computer_call_output", + "call_id": call.id, + "output": { + "type": "input_image", + "image_url": f"data:image/png;base64,{screenshots[0]}", + }, + } + ) else: # Otherwise, replace computer call with failed tool call messages.pop() - messages.extend(make_failed_tool_call_items( - tool_name=call.name, - tool_kwargs=call.arguments or {}, - error_message="No screenshots returned.", - call_id=call.id, - )) + messages.extend( + make_failed_tool_call_items( + tool_name=call.name, + tool_kwargs=call.arguments or {}, + error_message="No screenshots returned.", + call_id=call.id, + ) + ) return messages diff --git a/libs/python/agent/agent/integrations/hud/proxy.py b/libs/python/agent/agent/integrations/hud/proxy.py index 9087d1c9..3b062196 100644 --- a/libs/python/agent/agent/integrations/hud/proxy.py +++ b/libs/python/agent/agent/integrations/hud/proxy.py @@ -7,30 +7,33 @@ OpenAI-like response blocks. We intentionally only support a single-step call by consuming the first yielded result from `ComputerAgent.run()`. """ -import traceback import time +import traceback import uuid from typing import Any, Dict, List, Optional from agent.agent import ComputerAgent as BaseComputerAgent from agent.callbacks import PromptInstructionsCallback -from hud.tools.computer.settings import computer_settings -from PIL import Image from hud.agents import OperatorAgent +from hud.tools.computer.settings import computer_settings # OpenAI Responses typed models (required) from openai.types.responses import ( Response, + ResponseComputerToolCall, ResponseInputParam, ResponseOutputItem, - ResponseComputerToolCall, ResponseOutputMessage, ResponseOutputText, ResponseReasoningItem, ResponseUsage, ) +from PIL import Image -def _map_agent_output_to_openai_blocks(output_items: List[Dict[str, Any]]) -> List[ResponseOutputItem]: + +def _map_agent_output_to_openai_blocks( + output_items: List[Dict[str, Any]], +) -> List[ResponseOutputItem]: """Map our agent output items to OpenAI ResponseOutputItem typed models. Only a subset is supported: computer_call, assistant message (text), and reasoning. @@ -40,14 +43,16 @@ def _map_agent_output_to_openai_blocks(output_items: List[Dict[str, Any]]) -> Li for item in output_items or []: t = item.get("type") if t == "computer_call": - comp = ResponseComputerToolCall.model_validate({ - "id": item.get("id") or f"cu_{uuid.uuid4().hex}", - "type": "computer_call", - "call_id": item["call_id"], - "action": item["action"], - "pending_safety_checks": item.get("pending_safety_checks", []), - "status": "completed", - }) + comp = ResponseComputerToolCall.model_validate( + { + "id": item.get("id") or f"cu_{uuid.uuid4().hex}", + "type": "computer_call", + "call_id": item["call_id"], + "action": item["action"], + "pending_safety_checks": item.get("pending_safety_checks", []), + "status": "completed", + } + ) blocks.append(comp) # we will exit early here as the responses api only supports a single step break @@ -55,31 +60,38 @@ def _map_agent_output_to_openai_blocks(output_items: List[Dict[str, Any]]) -> Li content_blocks: List[ResponseOutputText] = [] for c in item.get("content", []) or []: content_blocks.append( - ResponseOutputText.model_validate({ - "type": "output_text", - "text": c["text"], - "annotations": [], - }) + ResponseOutputText.model_validate( + { + "type": "output_text", + "text": c["text"], + "annotations": [], + } + ) ) if content_blocks: - msg = ResponseOutputMessage.model_validate({ - "id": item.get("id") or f"msg_{uuid.uuid4()}", - "type": "message", - "role": "assistant", - "status": "completed", - "content": [ct.model_dump() for ct in content_blocks], - }) + msg = ResponseOutputMessage.model_validate( + { + "id": item.get("id") or f"msg_{uuid.uuid4()}", + "type": "message", + "role": "assistant", + "status": "completed", + "content": [ct.model_dump() for ct in content_blocks], + } + ) blocks.append(msg) elif t == "reasoning": - reasoning = ResponseReasoningItem.model_validate({ - "id": item.get("id") or f"rsn_{uuid.uuid4()}", - "type": "reasoning", - "summary": item["summary"], - }) + reasoning = ResponseReasoningItem.model_validate( + { + "id": item.get("id") or f"rsn_{uuid.uuid4()}", + "type": "reasoning", + "summary": item["summary"], + } + ) blocks.append(reasoning) # Unhandled types are ignored return blocks + def _to_plain_dict_list(items: Any) -> List[Dict[str, Any]]: out: List[Dict[str, Any]] = [] for it in list(items): @@ -92,6 +104,7 @@ def _to_plain_dict_list(items: Any) -> List[Dict[str, Any]]: out.append(dict(it)) # may raise if not mapping return out + class FakeAsyncOpenAI: """Minimal fake OpenAI client with only `responses.create` implemented. @@ -132,10 +145,12 @@ class FakeAsyncOpenAI: # Pre-pend instructions message effective_input = full_input if instructions: - effective_input = [{ - "role": "user", - "content": instructions, - }] + full_input + effective_input = [ + { + "role": "user", + "content": instructions, + } + ] + full_input # Run a single iteration of the ComputerAgent agent_result: Optional[Dict[str, Any]] = None @@ -152,32 +167,43 @@ class FakeAsyncOpenAI: blocks_to_cache = full_input + output for b in blocks_to_cache: bid = getattr(b, "id", None) or f"tmp-{hash(repr(b))}" - self.blocks_cache[bid] = b # type: ignore[assignment] + self.blocks_cache[bid] = b # type: ignore[assignment] block_ids.append(bid) response_id = agent_result.get("id") or f"fake-{int(time.time()*1000)}" self.context_cache[response_id] = block_ids try: - return Response.model_validate({ - "id": response_id, - "created_at": time.time(), - "object": "response", - "model": model, - "output": output, - "parallel_tool_calls": False, - "tool_choice": "auto", - "tools": [], - "previous_response_id": previous_response_id, - "usage": ResponseUsage.model_validate({ - "input_tokens": usage.get("input_tokens", 0), - "output_tokens": usage.get("output_tokens", 0), - "total_tokens": usage.get("total_tokens", 0), - "input_tokens_details": usage.get("input_tokens_details", { "cached_tokens": 0 }), - "output_tokens_details": usage.get("output_tokens_details", { "reasoning_tokens": 0 }), - }), - }) + return Response.model_validate( + { + "id": response_id, + "created_at": time.time(), + "object": "response", + "model": model, + "output": output, + "parallel_tool_calls": False, + "tool_choice": "auto", + "tools": [], + "previous_response_id": previous_response_id, + "usage": ResponseUsage.model_validate( + { + "input_tokens": usage.get("input_tokens", 0), + "output_tokens": usage.get("output_tokens", 0), + "total_tokens": usage.get("total_tokens", 0), + "input_tokens_details": usage.get( + "input_tokens_details", {"cached_tokens": 0} + ), + "output_tokens_details": usage.get( + "output_tokens_details", {"reasoning_tokens": 0} + ), + } + ), + } + ) except Exception as e: - print(f"Error while validating agent response (attempt {attempt + 1}/{max_retries}): ", e) + print( + f"Error while validating agent response (attempt {attempt + 1}/{max_retries}): ", + e, + ) if attempt == max_retries - 1: print(traceback.format_exc()) raise e @@ -221,9 +247,15 @@ class ProxyOperatorAgent(OperatorAgent): allowed_tools = allowed_tools or ["openai_computer"] computer_shim = { - 'screenshot': lambda: Image.new('RGB', (computer_settings.OPENAI_COMPUTER_WIDTH, computer_settings.OPENAI_COMPUTER_HEIGHT)), - 'environment': 'linux', - 'dimensions': (computer_settings.OPENAI_COMPUTER_WIDTH, computer_settings.OPENAI_COMPUTER_HEIGHT) + "screenshot": lambda: Image.new( + "RGB", + (computer_settings.OPENAI_COMPUTER_WIDTH, computer_settings.OPENAI_COMPUTER_HEIGHT), + ), + "environment": "linux", + "dimensions": ( + computer_settings.OPENAI_COMPUTER_WIDTH, + computer_settings.OPENAI_COMPUTER_HEIGHT, + ), } # Build tools ensuring the computer_shim is included agent_tools: list[Any] = [computer_shim] @@ -258,6 +290,7 @@ class ProxyOperatorAgent(OperatorAgent): **kwargs, ) + __all__ = [ "FakeAsyncOpenAI", "ProxyOperatorAgent", diff --git a/libs/python/agent/agent/loops/__init__.py b/libs/python/agent/agent/loops/__init__.py index 406f14ca..ab23ac27 100644 --- a/libs/python/agent/agent/loops/__init__.py +++ b/libs/python/agent/agent/loops/__init__.py @@ -3,26 +3,34 @@ Agent loops for agent """ # Import the loops to register them -from . import anthropic -from . import openai -from . import uitars -from . import omniparser -from . import gta1 -from . import composed_grounded -from . import glm45v -from . import opencua -from . import internvl -from . import holo +from . import ( + anthropic, + composed_grounded, + gemini, + glm45v, + gta1, + holo, + internvl, + moondream3, + omniparser, + openai, + opencua, + qwen, + uitars, +) __all__ = [ - "anthropic", - "openai", - "uitars", - "omniparser", - "gta1", - "composed_grounded", - "glm45v", + "anthropic", + "openai", + "uitars", + "omniparser", + "gta1", + "composed_grounded", + "glm45v", "opencua", "internvl", "holo", -] \ No newline at end of file + "moondream3", + "gemini", + "qwen", +] diff --git a/libs/python/agent/agent/loops/anthropic.py b/libs/python/agent/agent/loops/anthropic.py index cd616d38..9dd77eb4 100644 --- a/libs/python/agent/agent/loops/anthropic.py +++ b/libs/python/agent/agent/loops/anthropic.py @@ -4,69 +4,68 @@ Anthropic hosted tools agent loop implementation using liteLLM import asyncio import json -from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple +from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union + import litellm -from litellm.responses.litellm_completion_transformation.transformation import LiteLLMCompletionResponsesConfig +from litellm.responses.litellm_completion_transformation.transformation import ( + LiteLLMCompletionResponsesConfig, +) from ..decorators import register_agent -from ..types import Messages, AgentResponse, Tools, AgentCapability from ..loops.base import AsyncAgentConfig from ..responses import ( - make_reasoning_item, - make_output_text_item, make_click_item, make_double_click_item, make_drag_item, + make_failed_tool_call_items, + make_input_image_item, make_keypress_item, + make_left_mouse_down_item, + make_left_mouse_up_item, make_move_item, + make_output_text_item, + make_reasoning_item, + make_screenshot_item, make_scroll_item, make_type_item, make_wait_item, - make_input_image_item, - make_screenshot_item, - make_failed_tool_call_items, - make_left_mouse_down_item, - make_left_mouse_up_item ) +from ..types import AgentCapability, AgentResponse, Messages, Tools # Model version mapping to tool version and beta flag MODEL_TOOL_MAPPING = [ # Claude 4 models { - "pattern": r"claude-4|claude-opus-4|claude-sonnet-4", + "pattern": r"claude-4|claude-opus-4|claude-sonnet-4|claude-haiku-4", "tool_version": "computer_20250124", - "beta_flag": "computer-use-2025-01-24" + "beta_flag": "computer-use-2025-01-24", }, # Claude 3.7 models { "pattern": r"claude-3\.?7|claude-3-7", "tool_version": "computer_20250124", - "beta_flag": "computer-use-2025-01-24" + "beta_flag": "computer-use-2025-01-24", }, # Claude 3.5 models (fallback) { "pattern": r"claude-3\.?5|claude-3-5", "tool_version": "computer_20241022", - "beta_flag": "computer-use-2024-10-22" - } + "beta_flag": "computer-use-2024-10-22", + }, ] + def _get_tool_config_for_model(model: str) -> Dict[str, str]: """Get tool version and beta flag for the given model.""" import re - + for mapping in MODEL_TOOL_MAPPING: if re.search(mapping["pattern"], model, re.IGNORECASE): - return { - "tool_version": mapping["tool_version"], - "beta_flag": mapping["beta_flag"] - } - + return {"tool_version": mapping["tool_version"], "beta_flag": mapping["beta_flag"]} + # Default to Claude 3.5 configuration - return { - "tool_version": "computer_20241022", - "beta_flag": "computer-use-2024-10-22" - } + return {"tool_version": "computer_20241022", "beta_flag": "computer-use-2024-10-22"} + async def _map_computer_tool_to_anthropic(computer_tool: Any, tool_version: str) -> Dict[str, Any]: """Map a computer tool to Anthropic's hosted tool schema.""" @@ -76,7 +75,7 @@ async def _map_computer_tool_to_anthropic(computer_tool: Any, tool_version: str) except Exception: # Fallback to default dimensions if method fails width, height = 1024, 768 - + return { "type": tool_version, "function": { @@ -89,32 +88,37 @@ async def _map_computer_tool_to_anthropic(computer_tool: Any, tool_version: str) }, } + async def _prepare_tools_for_anthropic(tool_schemas: List[Dict[str, Any]], model: str) -> Tools: """Prepare tools for Anthropic API format.""" tool_config = _get_tool_config_for_model(model) anthropic_tools = [] - + for schema in tool_schemas: if schema["type"] == "computer": # Map computer tool to Anthropic format - anthropic_tools.append(await _map_computer_tool_to_anthropic( - schema["computer"], - tool_config["tool_version"] - )) + anthropic_tools.append( + await _map_computer_tool_to_anthropic( + schema["computer"], tool_config["tool_version"] + ) + ) elif schema["type"] == "function": # Function tools - convert to Anthropic format function_schema = schema["function"] - anthropic_tools.append({ - "type": "function", - "function": { - "name": function_schema["name"], - "description": function_schema.get("description", ""), - "parameters": function_schema.get("parameters", {}) + anthropic_tools.append( + { + "type": "function", + "function": { + "name": function_schema["name"], + "description": function_schema.get("description", ""), + "parameters": function_schema.get("parameters", {}), + }, } - }) - + ) + return anthropic_tools + def _convert_responses_items_to_completion_messages(messages: Messages) -> List[Dict[str, Any]]: """Convert responses_items message format to liteLLM completion format.""" completion_messages = [] @@ -123,7 +127,7 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ for message in messages: msg_type = message.get("type") role = message.get("role") - + # Handle user messages (both with and without explicit type) if role == "user" or msg_type == "user": content = message.get("content", "") @@ -135,51 +139,38 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # Convert input_image to OpenAI image format image_url = item.get("image_url", "") if image_url and image_url != "[omitted]": - converted_content.append({ - "type": "image_url", - "image_url": { - "url": image_url - } - }) + converted_content.append( + {"type": "image_url", "image_url": {"url": image_url}} + ) elif isinstance(item, dict) and item.get("type") == "input_text": # Convert input_text to OpenAI text format text = item.get("text", "") - converted_content.append({ - "type": "text", - "text": text - }) + converted_content.append({"type": "text", "text": text}) else: # Keep other content types as-is converted_content.append(item) - - completion_messages.append({ - "role": "user", - "content": converted_content if converted_content else content - }) + + completion_messages.append( + {"role": "user", "content": converted_content if converted_content else content} + ) else: # Text content - completion_messages.append({ - "role": "user", - "content": content - }) - + completion_messages.append({"role": "user", "content": content}) + # Handle assistant messages elif role == "assistant": content = message.get("content", []) if isinstance(content, str): - content = [{ "type": "output_text", "text": content }] - + content = [{"type": "output_text", "text": content}] + content = "\n".join(item.get("text", "") for item in content) - completion_messages.append({ - "role": "assistant", - "content": content - }) - + completion_messages.append({"role": "assistant", "content": content}) + elif msg_type == "reasoning": # Reasoning becomes part of assistant message summary = message.get("summary", []) reasoning_text = "" - + if isinstance(summary, list) and summary: # Extract text from summary items for item in summary: @@ -189,58 +180,54 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ else: # Fallback to direct reasoning field reasoning_text = message.get("reasoning", "") - + if reasoning_text: - completion_messages.append({ - "role": "assistant", - "content": reasoning_text - }) - + completion_messages.append({"role": "assistant", "content": reasoning_text}) + elif msg_type == "function_call": fn_name = message.get("name") fn_args = message.get("arguments", "{}") call_id = message.get("call_id", "call_1") call_id_to_fn_name[call_id] = fn_name - openai_tool_calls = [{ - "id": call_id, - "type": "function", - "function": { - "name": fn_name, - "arguments": fn_args + openai_tool_calls = [ + { + "id": call_id, + "type": "function", + "function": {"name": fn_name, "arguments": fn_args}, } - }] # If the last completion message is an assistant message, extend the tool_calls + ] # If the last completion message is an assistant message, extend the tool_calls if completion_messages and completion_messages[-1].get("role") == "assistant": if "tool_calls" not in completion_messages[-1]: completion_messages[-1]["tool_calls"] = [] completion_messages[-1]["tool_calls"].extend(openai_tool_calls) else: # Create new assistant message with tool calls - completion_messages.append({ - "role": "assistant", - "content": None, - "tool_calls": openai_tool_calls - }) - + completion_messages.append( + {"role": "assistant", "content": None, "tool_calls": openai_tool_calls} + ) + elif msg_type == "function_call_output": call_id = message.get("call_id", "call_1") fn_output = message.get("output", "") fn_name = call_id_to_fn_name.get(call_id, "computer") - completion_messages.append({ - "role": "function", - "name": fn_name, - "tool_call_id": call_id, - "content": str(fn_output) - }) - + completion_messages.append( + { + "role": "function", + "name": fn_name, + "tool_call_id": call_id, + "content": str(fn_output), + } + ) + elif msg_type == "computer_call": # Computer call becomes tool use in assistant message action = message.get("action", {}) action_type = action.get("type") call_id = message.get("call_id", "call_1") - + tool_use_content = [] - + # Basic actions (all versions) if action_type == "click": # Input: @@ -253,7 +240,7 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "y": 200 # } # } - + # Output: # { # "function": { @@ -267,16 +254,22 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "type": "function" # } button = action.get("button", "left") - action_name = "right_click" if button == "right" else "middle_click" if button == "wheel" else "left_click" - tool_use_content.append({ - "type": "tool_use", - "id": call_id, - "name": "computer", - "input": { - "action": action_name, - "coordinate": [action.get("x", 0), action.get("y", 0)] + action_name = ( + "right_click" + if button == "right" + else "middle_click" if button == "wheel" else "left_click" + ) + tool_use_content.append( + { + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": { + "action": action_name, + "coordinate": [action.get("x", 0), action.get("y", 0)], + }, } - }) + ) elif action_type == "double_click": # Input: # { @@ -288,7 +281,7 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "y": 240 # } # } - + # Output: # { # "function": { @@ -301,15 +294,17 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "id": "call_1", # "type": "function" # } - tool_use_content.append({ - "type": "tool_use", - "id": call_id, - "name": "computer", - "input": { - "action": "double_click", - "coordinate": [action.get("x", 0), action.get("y", 0)] + tool_use_content.append( + { + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": { + "action": "double_click", + "coordinate": [action.get("x", 0), action.get("y", 0)], + }, } - }) + ) elif action_type == "type": # Input: # { @@ -320,7 +315,7 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "text": "Hello World" # } # } - + # Output: # { # "function": { @@ -333,15 +328,14 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "id": "call_1", # "type": "function" # } - tool_use_content.append({ - "type": "tool_use", - "id": call_id, - "name": "computer", - "input": { - "action": "type", - "text": action.get("text", "") + tool_use_content.append( + { + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": {"action": "type", "text": action.get("text", "")}, } - }) + ) elif action_type == "keypress": # Input: # { @@ -352,7 +346,7 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "keys": ["ctrl", "c"] # } # } - + # Output: # { # "function": { @@ -365,15 +359,14 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "id": "call_1", # "type": "function" # } - tool_use_content.append({ - "type": "tool_use", - "id": call_id, - "name": "computer", - "input": { - "action": "key", - "text": "+".join(action.get("keys", [])) + tool_use_content.append( + { + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": {"action": "key", "text": "+".join(action.get("keys", []))}, } - }) + ) elif action_type in ["mouse_move", "move"]: # Input: # { @@ -385,7 +378,7 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "y": 250 # } # } - + # Output: # { # "function": { @@ -398,15 +391,17 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "id": "call_1", # "type": "function" # } - tool_use_content.append({ - "type": "tool_use", - "id": call_id, - "name": "computer", - "input": { - "action": "mouse_move", - "coordinate": [action.get("x", 0), action.get("y", 0)] + tool_use_content.append( + { + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": { + "action": "mouse_move", + "coordinate": [action.get("x", 0), action.get("y", 0)], + }, } - }) + ) elif action_type == "scroll": # Input: # { @@ -420,7 +415,7 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "scroll_y": -5 # } # } - + # Output: # { # "function": { @@ -453,18 +448,20 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ else: direction = "down" amount = 3 - - tool_use_content.append({ - "type": "tool_use", - "id": call_id, - "name": "computer", - "input": { - "action": "scroll", - "coordinate": [action.get("x", 0), action.get("y", 0)], - "scroll_direction": direction, - "scroll_amount": amount + + tool_use_content.append( + { + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": { + "action": "scroll", + "coordinate": [action.get("x", 0), action.get("y", 0)], + "scroll_direction": direction, + "scroll_amount": amount, + }, } - }) + ) elif action_type == "drag": # Input: # { @@ -478,7 +475,7 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # ] # } # } - + # Output: # { # "function": { @@ -498,17 +495,19 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ if isinstance(path, list) and len(path) >= 2: start_coord = [path[0].get("x", 0), path[0].get("y", 0)] end_coord = [path[-1].get("x", 0), path[-1].get("y", 0)] - - tool_use_content.append({ - "type": "tool_use", - "id": call_id, - "name": "computer", - "input": { - "action": "left_click_drag", - "start_coordinate": start_coord, - "end_coordinate": end_coord + + tool_use_content.append( + { + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": { + "action": "left_click_drag", + "start_coordinate": start_coord, + "end_coordinate": end_coord, + }, } - }) + ) elif action_type == "wait": # Input: # { @@ -518,7 +517,7 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "type": "wait" # } # } - + # Output: # { # "function": { @@ -530,14 +529,14 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "id": "call_1", # "type": "function" # } - tool_use_content.append({ - "type": "tool_use", - "id": call_id, - "name": "computer", - "input": { - "action": "wait" + tool_use_content.append( + { + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": {"action": "wait"}, } - }) + ) elif action_type == "screenshot": # Input: # { @@ -547,7 +546,7 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "type": "screenshot" # } # } - + # Output: # { # "function": { @@ -559,47 +558,53 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ # "id": "call_1", # "type": "function" # } - tool_use_content.append({ - "type": "tool_use", - "id": call_id, - "name": "computer", - "input": { - "action": "screenshot" + tool_use_content.append( + { + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": {"action": "screenshot"}, } - }) + ) elif action_type == "left_mouse_down": - tool_use_content.append({ - "type": "tool_use", - "id": call_id, - "name": "computer", - "input": { - "action": "left_mouse_down", - "coordinate": [action.get("x", None), action.get("y", None)] + tool_use_content.append( + { + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": { + "action": "left_mouse_down", + "coordinate": [action.get("x", None), action.get("y", None)], + }, } - }) + ) elif action_type == "left_mouse_up": - tool_use_content.append({ - "type": "tool_use", - "id": call_id, - "name": "computer", - "input": { - "action": "left_mouse_up", - "coordinate": [action.get("x", None), action.get("y", None)] + tool_use_content.append( + { + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": { + "action": "left_mouse_up", + "coordinate": [action.get("x", None), action.get("y", None)], + }, } - }) - + ) + # Convert tool_use_content to OpenAI tool_calls format openai_tool_calls = [] for tool_use in tool_use_content: - openai_tool_calls.append({ - "id": tool_use["id"], - "type": "function", - "function": { - "name": tool_use["name"], - "arguments": json.dumps(tool_use["input"]) + openai_tool_calls.append( + { + "id": tool_use["id"], + "type": "function", + "function": { + "name": tool_use["name"], + "arguments": json.dumps(tool_use["input"]), + }, } - }) - + ) + # If the last completion message is an assistant message, extend the tool_calls if completion_messages and completion_messages[-1].get("role") == "assistant": if "tool_calls" not in completion_messages[-1]: @@ -607,54 +612,52 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ completion_messages[-1]["tool_calls"].extend(openai_tool_calls) else: # Create new assistant message with tool calls - completion_messages.append({ - "role": "assistant", - "content": None, - "tool_calls": openai_tool_calls - }) - + completion_messages.append( + {"role": "assistant", "content": None, "tool_calls": openai_tool_calls} + ) + elif msg_type == "computer_call_output": # Computer call output becomes OpenAI function result output = message.get("output", {}) call_id = message.get("call_id", "call_1") - + if output.get("type") == "input_image": # Screenshot result - convert to OpenAI format with image_url content image_url = output.get("image_url", "") - completion_messages.append({ - "role": "function", - "name": "computer", - "tool_call_id": call_id, - "content": [{ - "type": "image_url", - "image_url": { - "url": image_url - } - }] - }) + completion_messages.append( + { + "role": "function", + "name": "computer", + "tool_call_id": call_id, + "content": [{"type": "image_url", "image_url": {"url": image_url}}], + } + ) else: # Text result - convert to OpenAI format - completion_messages.append({ - "role": "function", - "name": "computer", - "tool_call_id": call_id, - "content": str(output) - }) - + completion_messages.append( + { + "role": "function", + "name": "computer", + "tool_call_id": call_id, + "content": str(output), + } + ) + return completion_messages + def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]]: """Convert liteLLM completion response to responses_items message format.""" responses_items = [] - - if not response or not hasattr(response, 'choices') or not response.choices: + + if not response or not hasattr(response, "choices") or not response.choices: return responses_items - + choice = response.choices[0] message = choice.message - + # Handle text content - if hasattr(message, 'content') and message.content: + if hasattr(message, "content") and message.content: if isinstance(message.content, str): responses_items.append(make_output_text_item(message.content)) elif isinstance(message.content, list): @@ -667,31 +670,36 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] tool_input = content_item.get("input", {}) action_type = tool_input.get("action") call_id = content_item.get("id") - + # Action reference: # https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/computer-use-tool#available-actions - + try: # Basic actions (all versions) if action_type == "screenshot": responses_items.append(make_screenshot_item(call_id=call_id)) elif action_type in ["click", "left_click"]: coordinate = tool_input.get("coordinate", [0, 0]) - responses_items.append(make_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - call_id=call_id - )) + responses_items.append( + make_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id, + ) + ) elif action_type in ["type", "type_text"]: - responses_items.append(make_type_item( - text=tool_input.get("text", ""), - call_id=call_id - )) + responses_items.append( + make_type_item(text=tool_input.get("text", ""), call_id=call_id) + ) elif action_type in ["key", "keypress", "hotkey"]: - responses_items.append(make_keypress_item( - keys=tool_input.get("text", "").replace("+", "-").split("-"), - call_id=call_id - )) + responses_items.append( + make_keypress_item( + keys=tool_input.get("text", "") + .replace("+", "-") + .split("-"), + call_id=call_id, + ) + ) elif action_type in ["mouse_move", "move_cursor", "move"]: # Mouse move - create a custom action item coordinate = tool_input.get("coordinate", [0, 0]) @@ -699,64 +707,88 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] make_move_item( x=coordinate[0] if len(coordinate) > 0 else 0, y=coordinate[1] if len(coordinate) > 1 else 0, - call_id=call_id + call_id=call_id, ) ) - + # Enhanced actions (computer_20250124) Available in Claude 4 and Claude Sonnet 3.7 elif action_type == "scroll": coordinate = tool_input.get("coordinate", [0, 0]) scroll_amount = tool_input.get("scroll_amount", 3) - scroll_x = scroll_amount if tool_input.get("scroll_direction", "down") == "right" else \ - -scroll_amount if tool_input.get("scroll_direction", "down") == "left" else 0 - scroll_y = scroll_amount if tool_input.get("scroll_direction", "down") == "down" else \ - -scroll_amount if tool_input.get("scroll_direction", "down") == "up" else 0 - responses_items.append(make_scroll_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - scroll_x=scroll_x, - scroll_y=scroll_y, - call_id=call_id - )) + scroll_x = ( + scroll_amount + if tool_input.get("scroll_direction", "down") == "right" + else ( + -scroll_amount + if tool_input.get("scroll_direction", "down") == "left" + else 0 + ) + ) + scroll_y = ( + scroll_amount + if tool_input.get("scroll_direction", "down") == "down" + else ( + -scroll_amount + if tool_input.get("scroll_direction", "down") == "up" + else 0 + ) + ) + responses_items.append( + make_scroll_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + scroll_x=scroll_x, + scroll_y=scroll_y, + call_id=call_id, + ) + ) elif action_type in ["left_click_drag", "drag"]: start_coord = tool_input.get("start_coordinate", [0, 0]) end_coord = tool_input.get("end_coordinate", [0, 0]) - responses_items.append(make_drag_item( - path=[ - { - "x": start_coord[0] if len(start_coord) > 0 else 0, - "y": start_coord[1] if len(start_coord) > 1 else 0 - }, - { - "x": end_coord[0] if len(end_coord) > 0 else 0, - "y": end_coord[1] if len(end_coord) > 1 else 0 - } - ], - call_id=call_id - )) + responses_items.append( + make_drag_item( + path=[ + { + "x": start_coord[0] if len(start_coord) > 0 else 0, + "y": start_coord[1] if len(start_coord) > 1 else 0, + }, + { + "x": end_coord[0] if len(end_coord) > 0 else 0, + "y": end_coord[1] if len(end_coord) > 1 else 0, + }, + ], + call_id=call_id, + ) + ) elif action_type == "right_click": coordinate = tool_input.get("coordinate", [0, 0]) - responses_items.append(make_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - button="right", - call_id=call_id - )) + responses_items.append( + make_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + button="right", + call_id=call_id, + ) + ) elif action_type == "middle_click": coordinate = tool_input.get("coordinate", [0, 0]) - responses_items.append(make_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - button="wheel", - call_id=call_id - )) + responses_items.append( + make_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + button="wheel", + call_id=call_id, + ) + ) elif action_type == "double_click": coordinate = tool_input.get("coordinate", [0, 0]) - responses_items.append(make_double_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - call_id=call_id - )) + responses_items.append( + make_double_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id, + ) + ) elif action_type == "triple_click": # coordinate = tool_input.get("coordinate", [0, 0]) # responses_items.append({ @@ -782,11 +814,13 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # } # }) coordinate = tool_input.get("coordinate", [None, None]) - responses_items.append(make_left_mouse_down_item( - x=coordinate[0] if len(coordinate) > 0 else None, - y=coordinate[1] if len(coordinate) > 1 else None, - call_id=call_id - )) + responses_items.append( + make_left_mouse_down_item( + x=coordinate[0] if len(coordinate) > 0 else None, + y=coordinate[1] if len(coordinate) > 1 else None, + call_id=call_id, + ) + ) elif action_type == "left_mouse_up": # coordinate = tool_input.get("coordinate", [0, 0]) # responses_items.append({ @@ -800,11 +834,13 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # } # }) coordinate = tool_input.get("coordinate", [None, None]) - responses_items.append(make_left_mouse_up_item( - x=coordinate[0] if len(coordinate) > 0 else None, - y=coordinate[1] if len(coordinate) > 1 else None, - call_id=call_id - )) + responses_items.append( + make_left_mouse_up_item( + x=coordinate[0] if len(coordinate) > 0 else None, + y=coordinate[1] if len(coordinate) > 1 else None, + call_id=call_id, + ) + ) elif action_type == "hold_key": # responses_items.append({ # "type": "computer_call", @@ -816,21 +852,21 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # }) raise NotImplementedError("hold_key") elif action_type == "wait": - responses_items.append(make_wait_item( - call_id=call_id - )) + responses_items.append(make_wait_item(call_id=call_id)) else: raise ValueError(f"Unknown action type: {action_type}") except Exception as e: - responses_items.extend(make_failed_tool_call_items( - tool_name="computer", - tool_kwargs=tool_input, - error_message=repr(e), - call_id=call_id - )) - + responses_items.extend( + make_failed_tool_call_items( + tool_name="computer", + tool_kwargs=tool_input, + error_message=repr(e), + call_id=call_id, + ) + ) + # Handle tool calls (alternative format) - if hasattr(message, 'tool_calls') and message.tool_calls: + if hasattr(message, "tool_calls") and message.tool_calls: for tool_call in message.tool_calls: if tool_call.function.name == "computer": try: @@ -852,7 +888,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -861,9 +897,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "type": "screenshot" # } # } - responses_items.append(make_screenshot_item( - call_id=call_id - )) + responses_items.append(make_screenshot_item(call_id=call_id)) elif action_type in ["click", "left_click"]: # Input: # { @@ -877,7 +911,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -889,11 +923,13 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # } # } coordinate = args.get("coordinate", [0, 0]) - responses_items.append(make_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - call_id=call_id - )) + responses_items.append( + make_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id, + ) + ) elif action_type in ["type", "type_text"]: # Input: # { @@ -907,7 +943,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -917,10 +953,9 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "text": "Hello World" # } # } - responses_items.append(make_type_item( - text=args.get("text", ""), - call_id=call_id - )) + responses_items.append( + make_type_item(text=args.get("text", ""), call_id=call_id) + ) elif action_type in ["key", "keypress", "hotkey"]: # Input: # { @@ -934,7 +969,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -944,10 +979,12 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "keys": ["ctrl", "c"] # } # } - responses_items.append(make_keypress_item( - keys=args.get("text", "").replace("+", "-").split("-"), - call_id=call_id - )) + responses_items.append( + make_keypress_item( + keys=args.get("text", "").replace("+", "-").split("-"), + call_id=call_id, + ) + ) elif action_type in ["mouse_move", "move_cursor", "move"]: # Input: # { @@ -961,7 +998,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -973,12 +1010,14 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # } # } coordinate = args.get("coordinate", [0, 0]) - responses_items.append(make_move_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - call_id=call_id - )) - + responses_items.append( + make_move_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id, + ) + ) + # Enhanced actions (computer_20250124) Available in Claude 4 and Claude Sonnet 3.7 elif action_type == "scroll": # Input: @@ -995,7 +1034,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -1011,17 +1050,25 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] coordinate = args.get("coordinate", [0, 0]) direction = args.get("scroll_direction", "down") amount = args.get("scroll_amount", 3) - scroll_x = amount if direction == "left" else \ - -amount if direction == "right" else 0 - scroll_y = amount if direction == "up" else \ - -amount if direction == "down" else 0 - responses_items.append(make_scroll_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - scroll_x=scroll_x, - scroll_y=scroll_y, - call_id=call_id - )) + scroll_x = ( + amount + if direction == "left" + else -amount if direction == "right" else 0 + ) + scroll_y = ( + amount + if direction == "up" + else -amount if direction == "down" else 0 + ) + responses_items.append( + make_scroll_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + scroll_x=scroll_x, + scroll_y=scroll_y, + call_id=call_id, + ) + ) elif action_type in ["left_click_drag", "drag"]: # Input: # { @@ -1036,7 +1083,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -1051,19 +1098,21 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # } start_coord = args.get("start_coordinate", [0, 0]) end_coord = args.get("end_coordinate", [0, 0]) - responses_items.append(make_drag_item( - path=[ - { - "x": start_coord[0] if len(start_coord) > 0 else 0, - "y": start_coord[1] if len(start_coord) > 1 else 0 - }, - { - "x": end_coord[0] if len(end_coord) > 0 else 0, - "y": end_coord[1] if len(end_coord) > 1 else 0 - } - ], - call_id=call_id - )) + responses_items.append( + make_drag_item( + path=[ + { + "x": start_coord[0] if len(start_coord) > 0 else 0, + "y": start_coord[1] if len(start_coord) > 1 else 0, + }, + { + "x": end_coord[0] if len(end_coord) > 0 else 0, + "y": end_coord[1] if len(end_coord) > 1 else 0, + }, + ], + call_id=call_id, + ) + ) elif action_type == "right_click": # Input: # { @@ -1077,7 +1126,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -1090,12 +1139,14 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # } # } coordinate = args.get("coordinate", [0, 0]) - responses_items.append(make_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - button="right", - call_id=call_id - )) + responses_items.append( + make_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + button="right", + call_id=call_id, + ) + ) elif action_type == "middle_click": # Input: # { @@ -1109,7 +1160,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -1122,12 +1173,14 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # } # } coordinate = args.get("coordinate", [0, 0]) - responses_items.append(make_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - button="wheel", - call_id=call_id - )) + responses_items.append( + make_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + button="wheel", + call_id=call_id, + ) + ) elif action_type == "double_click": # Input: # { @@ -1141,7 +1194,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -1153,11 +1206,13 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # } # } coordinate = args.get("coordinate", [0, 0]) - responses_items.append(make_double_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - call_id=call_id - )) + responses_items.append( + make_double_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id, + ) + ) elif action_type == "triple_click": # Input: # { @@ -1171,7 +1226,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -1196,7 +1251,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -1209,11 +1264,13 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # } # } coordinate = args.get("coordinate", [None, None]) - responses_items.append(make_left_mouse_down_item( - x=coordinate[0] if len(coordinate) > 0 else None, - y=coordinate[1] if len(coordinate) > 1 else None, - call_id=call_id - )) + responses_items.append( + make_left_mouse_down_item( + x=coordinate[0] if len(coordinate) > 0 else None, + y=coordinate[1] if len(coordinate) > 1 else None, + call_id=call_id, + ) + ) elif action_type == "left_mouse_up": # Input: # { @@ -1227,7 +1284,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -1240,11 +1297,13 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # } # } coordinate = args.get("coordinate", [None, None]) - responses_items.append(make_left_mouse_up_item( - x=coordinate[0] if len(coordinate) > 0 else None, - y=coordinate[1] if len(coordinate) > 1 else None, - call_id=call_id - )) + responses_items.append( + make_left_mouse_up_item( + x=coordinate[0] if len(coordinate) > 0 else None, + y=coordinate[1] if len(coordinate) > 1 else None, + call_id=call_id, + ) + ) elif action_type == "hold_key": # Input: # { @@ -1258,7 +1317,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -1281,7 +1340,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "id": "call_1", # "type": "function" # } - + # Output: # { # "type": "computer_call", @@ -1290,74 +1349,77 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "type": "wait" # } # } - responses_items.append(make_wait_item( - call_id=call_id - )) + responses_items.append(make_wait_item(call_id=call_id)) except Exception as e: - responses_items.extend(make_failed_tool_call_items( - tool_name="computer", - tool_kwargs=args, - error_message=repr(e), - call_id=call_id - )) + responses_items.extend( + make_failed_tool_call_items( + tool_name="computer", + tool_kwargs=args, + error_message=repr(e), + call_id=call_id, + ) + ) except json.JSONDecodeError: print("Failed to decode tool call arguments") # Skip malformed tool calls continue - + return responses_items + def _add_cache_control(completion_messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Add cache control to completion messages""" num_writes = 0 for message in completion_messages: - message["cache_control"] = { "type": "ephemeral" } + message["cache_control"] = {"type": "ephemeral"} num_writes += 1 # Cache control has a maximum of 4 blocks if num_writes >= 4: break - + return completion_messages + def _combine_completion_messages(completion_messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Combine completion messages with the same role""" if not completion_messages: return completion_messages - + combined_messages = [] - + for message in completion_messages: # If this is the first message or role is different from last, add as new message if not combined_messages or combined_messages[-1]["role"] != message["role"]: # Ensure content is a list format and normalize text content new_message = message.copy() new_message["content"] = _normalize_content(message.get("content", "")) - + # Copy tool_calls if present if "tool_calls" in message: new_message["tool_calls"] = message["tool_calls"].copy() - + combined_messages.append(new_message) else: # Same role as previous message, combine them last_message = combined_messages[-1] - + # Combine content current_content = _normalize_content(message.get("content", "")) last_message["content"].extend(current_content) - + # Combine tool_calls if present if "tool_calls" in message: if "tool_calls" not in last_message: last_message["tool_calls"] = [] last_message["tool_calls"].extend(message["tool_calls"]) - + # Post-process to merge consecutive text blocks for message in combined_messages: message["content"] = _merge_consecutive_text(message["content"]) - + return combined_messages + def _normalize_content(content) -> List[Dict[str, Any]]: """Normalize content to list format""" if isinstance(content, str): @@ -1370,28 +1432,28 @@ def _normalize_content(content) -> List[Dict[str, Any]]: else: return [] + def _merge_consecutive_text(content_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Merge consecutive text blocks with newlines""" if not content_list: return content_list - + merged = [] - + for item in content_list: - if (item.get("type") == "text" and - merged and - merged[-1].get("type") == "text"): + if item.get("type") == "text" and merged and merged[-1].get("type") == "text": # Merge with previous text block merged[-1]["text"] += "\n" + item["text"] else: merged.append(item.copy()) - + return merged + @register_agent(models=r".*claude-.*") class AnthropicHostedToolsConfig(AsyncAgentConfig): """Anthropic hosted tools agent configuration implementing AsyncAgentConfig protocol.""" - + async def predict_step( self, messages: Messages, @@ -1405,21 +1467,21 @@ class AnthropicHostedToolsConfig(AsyncAgentConfig): _on_api_end=None, _on_usage=None, _on_screenshot=None, - **kwargs + **kwargs, ) -> Dict[str, Any]: """ Anthropic hosted tools agent loop using liteLLM acompletion. - + Supports Anthropic's computer use models with hosted tools. """ tools = tools or [] - + # Get tool configuration for this model tool_config = _get_tool_config_for_model(model) - + # Prepare tools for Anthropic API anthropic_tools = await _prepare_tools_for_anthropic(tools, model) - + # Convert responses_items messages to completion format completion_messages = _convert_responses_items_to_completion_messages(messages) if use_prompt_caching: @@ -1427,7 +1489,7 @@ class AnthropicHostedToolsConfig(AsyncAgentConfig): completion_messages = _combine_completion_messages(completion_messages) # Then add cache control, anthropic requires explicit "cache_control" dicts completion_messages = _add_cache_control(completion_messages) - + # Prepare API call kwargs api_kwargs = { "model": model, @@ -1435,80 +1497,74 @@ class AnthropicHostedToolsConfig(AsyncAgentConfig): "tools": anthropic_tools if anthropic_tools else None, "stream": stream, "num_retries": max_retries, - **kwargs + **kwargs, } - + # Add beta header for computer use if anthropic_tools: - api_kwargs["headers"] = { - "anthropic-beta": tool_config["beta_flag"] - } - + api_kwargs["headers"] = {"anthropic-beta": tool_config["beta_flag"]} + # Call API start hook if _on_api_start: await _on_api_start(api_kwargs) - + # Use liteLLM acompletion response = await litellm.acompletion(**api_kwargs) - + # Call API end hook if _on_api_end: await _on_api_end(api_kwargs, response) - + # Convert response to responses_items format responses_items = _convert_completion_to_responses_items(response) # Extract usage information - responses_usage = { - **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(response.usage).model_dump(), + responses_usage = { + **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage( + response.usage + ).model_dump(), "response_cost": response._hidden_params.get("response_cost", 0.0), } if _on_usage: await _on_usage(responses_usage) # Return in AsyncAgentConfig format - return { - "output": responses_items, - "usage": responses_usage - } - + return {"output": responses_items, "usage": responses_usage} + async def predict_click( - self, - model: str, - image_b64: str, - instruction: str, - **kwargs + self, model: str, image_b64: str, instruction: str, **kwargs ) -> Optional[Tuple[int, int]]: """ Predict click coordinates based on image and instruction. - + Uses Anthropic's computer use models with a custom prompt that instructs the agent to only output clicks. - + Args: model: Model name to use image_b64: Base64 encoded image instruction: Instruction for where to click - + Returns: Tuple of (x, y) coordinates or None if prediction fails """ # Get image dimensions from base64 data try: import base64 - from PIL import Image from io import BytesIO - + + from PIL import Image + image_data = base64.b64decode(image_b64) image = Image.open(BytesIO(image_data)) display_width, display_height = image.size except Exception: # Fallback to default dimensions if image parsing fails display_width, display_height = 1024, 768 - + # Get tool configuration for this model tool_config = _get_tool_config_for_model(model) - + # Prepare computer tool for Anthropic format computer_tool = { "type": tool_config["tool_version"], @@ -1521,7 +1577,7 @@ class AnthropicHostedToolsConfig(AsyncAgentConfig): }, }, } - + # Construct messages in OpenAI chat completion format for liteLLM messages = [ { @@ -1540,18 +1596,16 @@ class AnthropicHostedToolsConfig(AsyncAgentConfig): 7. Be decisive and action-oriented. Complete the requested task fully. Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked. -Task: Click {instruction}. Output ONLY a click action on the target element.""" +Task: Click {instruction}. Output ONLY a click action on the target element.""", }, { "type": "image_url", - "image_url": { - "url": f"data:image/png;base64,{image_b64}" - } - } - ] + "image_url": {"url": f"data:image/png;base64,{image_b64}"}, + }, + ], } ] - + # Prepare API call kwargs api_kwargs = { "model": model, @@ -1559,32 +1613,31 @@ Task: Click {instruction}. Output ONLY a click action on the target element.""" "tools": [computer_tool], "stream": False, "max_tokens": 100, # Keep response short for click prediction - "headers": { - "anthropic-beta": tool_config["beta_flag"] - } + "headers": {"anthropic-beta": tool_config["beta_flag"]}, } - + # Use liteLLM acompletion response = await litellm.acompletion(**api_kwargs) - + # Convert response to responses_items format to extract click coordinates responses_items = _convert_completion_to_responses_items(response) - + # Look for computer_call with click action for item in responses_items: - if (isinstance(item, dict) and - item.get("type") == "computer_call" and - isinstance(item.get("action"), dict)): - + if ( + isinstance(item, dict) + and item.get("type") == "computer_call" + and isinstance(item.get("action"), dict) + ): + action = item["action"] - if action.get("type") == "click": + if action.get("x") and action.get("y"): x = action.get("x") y = action.get("y") - if x is not None and y is not None: - return (int(x), int(y)) - + return (int(x), int(y)) + return None - + def get_capabilities(self) -> List[AgentCapability]: """Return the capabilities supported by this agent.""" return ["click", "step"] diff --git a/libs/python/agent/agent/loops/base.py b/libs/python/agent/agent/loops/base.py index 887605b1..b764fb6f 100644 --- a/libs/python/agent/agent/loops/base.py +++ b/libs/python/agent/agent/loops/base.py @@ -2,13 +2,15 @@ Base protocol for async agent configurations """ -from typing import Protocol, List, Dict, Any, Optional, Tuple, Union from abc import abstractmethod +from typing import Any, Dict, List, Optional, Protocol, Tuple, Union + from ..types import AgentCapability + class AsyncAgentConfig(Protocol): """Protocol defining the interface for async agent configurations.""" - + @abstractmethod async def predict_step( self, @@ -22,11 +24,11 @@ class AsyncAgentConfig(Protocol): _on_api_end=None, _on_usage=None, _on_screenshot=None, - **kwargs + **kwargs, ) -> Dict[str, Any]: """ Predict the next step based on input items. - + Args: messages: Input items following Responses format (message, function_call, computer_call) model: Model name to use @@ -39,37 +41,34 @@ class AsyncAgentConfig(Protocol): _on_usage: Callback for usage tracking _on_screenshot: Callback for screenshot events **kwargs: Additional arguments - + Returns: Dictionary with "output" (output items) and "usage" array """ ... - + @abstractmethod async def predict_click( - self, - model: str, - image_b64: str, - instruction: str + self, model: str, image_b64: str, instruction: str ) -> Optional[Tuple[int, int]]: """ Predict click coordinates based on image and instruction. - + Args: model: Model name to use image_b64: Base64 encoded image instruction: Instruction for where to click - + Returns: None or tuple with (x, y) coordinates """ ... - + @abstractmethod def get_capabilities(self) -> List[AgentCapability]: """ Get list of capabilities supported by this agent config. - + Returns: List of capability strings (e.g., ["step", "click"]) """ diff --git a/libs/python/agent/agent/loops/composed_grounded.py b/libs/python/agent/agent/loops/composed_grounded.py index 8d8a1501..5e187e83 100644 --- a/libs/python/agent/agent/loops/composed_grounded.py +++ b/libs/python/agent/agent/loops/composed_grounded.py @@ -3,122 +3,117 @@ Composed-grounded agent loop implementation that combines grounding and thinking Uses a two-stage approach: grounding model for element detection, thinking model for reasoning. """ -import uuid import asyncio -import json import base64 -from typing import Dict, List, Any, Optional, Tuple +import json +import uuid from io import BytesIO -from PIL import Image -import litellm +from typing import Any, Dict, List, Optional, Tuple +import litellm +from PIL import Image + +from ..agent import find_agent_config from ..decorators import register_agent -from ..types import Messages, AgentResponse, Tools, AgentCapability from ..loops.base import AsyncAgentConfig from ..responses import ( - convert_computer_calls_xy2desc, - convert_responses_items_to_completion_messages, convert_completion_messages_to_responses_items, convert_computer_calls_desc2xy, - get_all_element_descriptions + convert_computer_calls_xy2desc, + convert_responses_items_to_completion_messages, + get_all_element_descriptions, ) -from ..agent import find_agent_config +from ..types import AgentCapability, AgentResponse, Messages, Tools GROUNDED_COMPUTER_TOOL_SCHEMA = { - "type": "function", - "function": { - "name": "computer", - "description": "Control a computer by taking screenshots and interacting with UI elements. This tool uses element descriptions to locate and interact with UI elements on the screen (e.g., 'red submit button', 'search text field', 'hamburger menu icon', 'close button in top right corner').", - "parameters": { - "type": "object", - "properties": { - "action": { - "type": "string", - "enum": [ - "screenshot", - "click", - "double_click", - "drag", - "type", - "keypress", - "scroll", - "move", - "wait", - "get_current_url", - "get_dimensions", - "get_environment" - ], - "description": "The action to perform (required for all actions)" - }, - "element_description": { - "type": "string", - "description": "Description of the element to interact with (required for click, double_click, move, scroll actions)" - }, - "start_element_description": { - "type": "string", - "description": "Description of the element to start dragging from (required for drag action)" - }, - "end_element_description": { - "type": "string", - "description": "Description of the element to drag to (required for drag action)" - }, - "text": { - "type": "string", - "description": "The text to type (required for type action)" - }, - "keys": { - "type": "array", - "items": { - "type": "string" + "type": "function", + "function": { + "name": "computer", + "description": "Control a computer by taking screenshots and interacting with UI elements. This tool uses element descriptions to locate and interact with UI elements on the screen (e.g., 'red submit button', 'search text field', 'hamburger menu icon', 'close button in top right corner').", + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": [ + "screenshot", + "click", + "double_click", + "drag", + "type", + "keypress", + "scroll", + "move", + "wait", + "get_current_url", + "get_dimensions", + "get_environment", + ], + "description": "The action to perform (required for all actions)", + }, + "element_description": { + "type": "string", + "description": "Description of the element to interact with (required for click, double_click, move, scroll actions)", + }, + "start_element_description": { + "type": "string", + "description": "Description of the element to start dragging from (required for drag action)", + }, + "end_element_description": { + "type": "string", + "description": "Description of the element to drag to (required for drag action)", + }, + "text": { + "type": "string", + "description": "The text to type (required for type action)", + }, + "keys": { + "type": "array", + "items": {"type": "string"}, + "description": "Key(s) to press (required for keypress action)", + }, + "button": { + "type": "string", + "enum": ["left", "right", "wheel", "back", "forward"], + "description": "The mouse button to use for click action (required for click and double_click action)", + }, + "scroll_x": { + "type": "integer", + "description": "Horizontal scroll amount for scroll action (required for scroll action)", + }, + "scroll_y": { + "type": "integer", + "description": "Vertical scroll amount for scroll action (required for scroll action)", + }, }, - "description": "Key(s) to press (required for keypress action)" + "required": ["action"], }, - "button": { - "type": "string", - "enum": [ - "left", - "right", - "wheel", - "back", - "forward" - ], - "description": "The mouse button to use for click action (required for click and double_click action)", - }, - "scroll_x": { - "type": "integer", - "description": "Horizontal scroll amount for scroll action (required for scroll action)", - }, - "scroll_y": { - "type": "integer", - "description": "Vertical scroll amount for scroll action (required for scroll action)", - }, - }, - "required": [ - "action" - ] - } - } + }, } + def _prepare_tools_for_grounded(tool_schemas: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Prepare tools for grounded API format""" grounded_tools = [] - + for schema in tool_schemas: if schema["type"] == "computer": grounded_tools.append(GROUNDED_COMPUTER_TOOL_SCHEMA) else: grounded_tools.append(schema) - + return grounded_tools + def get_last_computer_call_image(messages: List[Dict[str, Any]]) -> Optional[str]: """Get the last computer call output image from messages.""" for message in reversed(messages): - if (isinstance(message, dict) and - message.get("type") == "computer_call_output" and - isinstance(message.get("output"), dict) and - message["output"].get("type") == "input_image"): + if ( + isinstance(message, dict) + and message.get("type") == "computer_call_output" + and isinstance(message.get("output"), dict) + and message["output"].get("type") == "input_image" + ): image_url = message["output"].get("image_url", "") if image_url.startswith("data:image/png;base64,"): return image_url.split(",", 1)[1] @@ -129,14 +124,14 @@ def get_last_computer_call_image(messages: List[Dict[str, Any]]) -> Optional[str class ComposedGroundedConfig(AsyncAgentConfig): """ Composed-grounded agent configuration that uses both grounding and thinking models. - + The model parameter should be in format: "grounding_model+thinking_model" e.g., "huggingface-local/HelloKKMe/GTA1-7B+gemini/gemini-1.5-pro" """ - + def __init__(self): self.desc2xy: Dict[str, Tuple[float, float]] = {} - + async def predict_step( self, messages: List[Dict[str, Any]], @@ -150,11 +145,11 @@ class ComposedGroundedConfig(AsyncAgentConfig): _on_api_end=None, _on_usage=None, _on_screenshot=None, - **kwargs + **kwargs, ) -> Dict[str, Any]: """ Composed-grounded predict step implementation. - + Process: 0. Store last computer call image, if none then take a screenshot 1. Convert computer calls from xy to descriptions @@ -167,18 +162,20 @@ class ComposedGroundedConfig(AsyncAgentConfig): """ # Parse the composed model if "+" not in model: - raise ValueError(f"Composed model must be in format 'grounding_model+thinking_model', got: {model}") + raise ValueError( + f"Composed model must be in format 'grounding_model+thinking_model', got: {model}" + ) grounding_model, thinking_model = model.split("+", 1) - + pre_output_items = [] - + # Step 0: Store last computer call image, if none then take a screenshot last_image_b64 = get_last_computer_call_image(messages) if last_image_b64 is None: # Take a screenshot - screenshot_b64 = await computer_handler.screenshot() # type: ignore + screenshot_b64 = await computer_handler.screenshot() # type: ignore if screenshot_b64: - + call_id = uuid.uuid4().hex pre_output_items += [ { @@ -187,45 +184,42 @@ class ComposedGroundedConfig(AsyncAgentConfig): "content": [ { "type": "output_text", - "text": "Taking a screenshot to see the current computer screen." + "text": "Taking a screenshot to see the current computer screen.", } - ] + ], }, { - "action": { - "type": "screenshot" - }, + "action": {"type": "screenshot"}, "call_id": call_id, "status": "completed", - "type": "computer_call" + "type": "computer_call", }, { "type": "computer_call_output", "call_id": call_id, "output": { "type": "input_image", - "image_url": f"data:image/png;base64,{screenshot_b64}" - } + "image_url": f"data:image/png;base64,{screenshot_b64}", + }, }, ] last_image_b64 = screenshot_b64 - + # Call screenshot callback if provided if _on_screenshot: await _on_screenshot(screenshot_b64) - - tool_schemas = _prepare_tools_for_grounded(tools) # type: ignore + + tool_schemas = _prepare_tools_for_grounded(tools) # type: ignore # Step 1: Convert computer calls from xy to descriptions input_messages = messages + pre_output_items messages_with_descriptions = convert_computer_calls_xy2desc(input_messages, self.desc2xy) - + # Step 2: Convert responses items to completion messages completion_messages = convert_responses_items_to_completion_messages( - messages_with_descriptions, - allow_images_in_tool_results=False + messages_with_descriptions, allow_images_in_tool_results=False ) - + # Step 3: Call thinking model with litellm.acompletion api_kwargs = { "model": thinking_model, @@ -233,98 +227,90 @@ class ComposedGroundedConfig(AsyncAgentConfig): "tools": tool_schemas, "max_retries": max_retries, "stream": stream, - **kwargs + **kwargs, } if use_prompt_caching: api_kwargs["use_prompt_caching"] = use_prompt_caching - + # Call API start hook if _on_api_start: await _on_api_start(api_kwargs) - + # Make the completion call response = await litellm.acompletion(**api_kwargs) - + # Call API end hook if _on_api_end: await _on_api_end(api_kwargs, response) - + # Extract usage information usage = { - **response.usage.model_dump(), # type: ignore + **response.usage.model_dump(), # type: ignore "response_cost": response._hidden_params.get("response_cost", 0.0), } if _on_usage: await _on_usage(usage) - + # Step 4: Convert completion messages back to responses items format - response_dict = response.model_dump() # type: ignore + response_dict = response.model_dump() # type: ignore choice_messages = [choice["message"] for choice in response_dict["choices"]] thinking_output_items = [] - + for choice_message in choice_messages: - thinking_output_items.extend(convert_completion_messages_to_responses_items([choice_message])) - + thinking_output_items.extend( + convert_completion_messages_to_responses_items([choice_message]) + ) + # Step 5: Get all element descriptions and populate desc2xy mapping element_descriptions = get_all_element_descriptions(thinking_output_items) - + if element_descriptions and last_image_b64: # Use grounding model to predict coordinates for each description grounding_agent_conf = find_agent_config(grounding_model) if grounding_agent_conf: grounding_agent = grounding_agent_conf.agent_class() - + for desc in element_descriptions: - for _ in range(3): # try 3 times + for _ in range(3): # try 3 times coords = await grounding_agent.predict_click( - model=grounding_model, - image_b64=last_image_b64, - instruction=desc + model=grounding_model, image_b64=last_image_b64, instruction=desc ) if coords: self.desc2xy[desc] = coords break - + # Step 6: Convert computer calls from descriptions back to xy coordinates final_output_items = convert_computer_calls_desc2xy(thinking_output_items, self.desc2xy) - + # Step 7: Return output and usage - return { - "output": pre_output_items + final_output_items, - "usage": usage - } - + return {"output": pre_output_items + final_output_items, "usage": usage} + async def predict_click( - self, - model: str, - image_b64: str, - instruction: str, - **kwargs + self, model: str, image_b64: str, instruction: str, **kwargs ) -> Optional[Tuple[int, int]]: """ Predict click coordinates using the grounding model. - + For composed models, uses only the grounding model part for click prediction. """ # Parse the composed model to get grounding model if "+" not in model: - raise ValueError(f"Composed model must be in format 'grounding_model+thinking_model', got: {model}") + raise ValueError( + f"Composed model must be in format 'grounding_model+thinking_model', got: {model}" + ) grounding_model, thinking_model = model.split("+", 1) - + # Find and use the grounding agent grounding_agent_conf = find_agent_config(grounding_model) if grounding_agent_conf: grounding_agent = grounding_agent_conf.agent_class() return await grounding_agent.predict_click( - model=grounding_model, - image_b64=image_b64, - instruction=instruction, - **kwargs + model=grounding_model, image_b64=image_b64, instruction=instruction, **kwargs ) - + return None - + def get_capabilities(self) -> List[AgentCapability]: """Return the capabilities supported by this agent.""" return ["click", "step"] diff --git a/libs/python/agent/agent/loops/gemini.py b/libs/python/agent/agent/loops/gemini.py new file mode 100644 index 00000000..3760876b --- /dev/null +++ b/libs/python/agent/agent/loops/gemini.py @@ -0,0 +1,410 @@ +""" +Gemini 2.5 Computer Use agent loop + +Maps internal Agent SDK message format to Google's Gemini Computer Use API and back. + +Key features: +- Lazy import of google.genai +- Configure Computer Use tool with excluded browser-specific predefined functions +- Optional custom function declarations hook for computer-call specific functions +- Convert Gemini function_call parts into internal computer_call actions +""" + +from __future__ import annotations + +import base64 +import io +import uuid +from typing import Any, Dict, List, Optional, Tuple + +from PIL import Image + +from ..decorators import register_agent +from ..loops.base import AsyncAgentConfig +from ..types import AgentCapability + + +def _lazy_import_genai(): + """Import google.genai lazily to avoid hard dependency unless used.""" + try: + from google import genai # type: ignore + from google.genai import types # type: ignore + + return genai, types + except Exception as e: # pragma: no cover + raise RuntimeError( + "google.genai is required for the Gemini Computer Use loop. Install the Google Gemini SDK." + ) from e + + +def _data_url_to_bytes(data_url: str) -> Tuple[bytes, str]: + """Convert a data URL to raw bytes and mime type.""" + if not data_url.startswith("data:"): + # Assume it's base64 png payload + try: + return base64.b64decode(data_url), "image/png" + except Exception: + return b"", "application/octet-stream" + header, b64 = data_url.split(",", 1) + mime = "image/png" + if ";" in header: + mime = header.split(";")[0].split(":", 1)[1] or "image/png" + return base64.b64decode(b64), mime + + +def _bytes_image_size(img_bytes: bytes) -> Tuple[int, int]: + try: + img = Image.open(io.BytesIO(img_bytes)) + return img.size + except Exception: + return (1024, 768) + + +def _find_last_user_text(messages: List[Dict[str, Any]]) -> List[str]: + texts: List[str] = [] + for msg in reversed(messages): + if msg.get("type") in (None, "message") and msg.get("role") == "user": + content = msg.get("content") + if isinstance(content, str): + return [content] + elif isinstance(content, list): + for c in content: + if c.get("type") in ("input_text", "output_text") and c.get("text"): + texts.append(c["text"]) # newest first + if texts: + return list(reversed(texts)) + return [] + + +def _find_last_screenshot(messages: List[Dict[str, Any]]) -> Optional[bytes]: + for msg in reversed(messages): + if msg.get("type") == "computer_call_output": + out = msg.get("output", {}) + if isinstance(out, dict) and out.get("type") in ("input_image", "computer_screenshot"): + image_url = out.get("image_url", "") + if image_url: + data, _ = _data_url_to_bytes(image_url) + return data + return None + + +def _denormalize(v: int, size: int) -> int: + # Gemini returns 0-999 normalized + try: + return max(0, min(size - 1, int(round(v / 1000 * size)))) + except Exception: + return 0 + + +def _map_gemini_fc_to_computer_call( + fc: Dict[str, Any], + screen_w: int, + screen_h: int, +) -> Optional[Dict[str, Any]]: + name = fc.get("name") + args = fc.get("args", {}) or {} + + action: Dict[str, Any] = {} + if name == "click_at": + x = _denormalize(int(args.get("x", 0)), screen_w) + y = _denormalize(int(args.get("y", 0)), screen_h) + action = {"type": "click", "x": x, "y": y, "button": "left"} + elif name == "type_text_at": + x = _denormalize(int(args.get("x", 0)), screen_w) + y = _denormalize(int(args.get("y", 0)), screen_h) + text = args.get("text", "") + if args.get("press_enter") == True: + text += "\n" + action = {"type": "type", "x": x, "y": y, "text": text} + elif name == "hover_at": + x = _denormalize(int(args.get("x", 0)), screen_w) + y = _denormalize(int(args.get("y", 0)), screen_h) + action = {"type": "move", "x": x, "y": y} + elif name == "key_combination": + keys = str(args.get("keys", "")) + action = {"type": "keypress", "keys": keys} + elif name == "scroll_document": + direction = args.get("direction", "down") + magnitude = 800 + dx, dy = 0, 0 + if direction == "down": + dy = magnitude + elif direction == "up": + dy = -magnitude + elif direction == "right": + dx = magnitude + elif direction == "left": + dx = -magnitude + action = { + "type": "scroll", + "scroll_x": dx, + "scroll_y": dy, + "x": int(screen_w / 2), + "y": int(screen_h / 2), + } + elif name == "scroll_at": + x = _denormalize(int(args.get("x", 500)), screen_w) + y = _denormalize(int(args.get("y", 500)), screen_h) + direction = args.get("direction", "down") + magnitude = int(args.get("magnitude", 800)) + dx, dy = 0, 0 + if direction == "down": + dy = magnitude + elif direction == "up": + dy = -magnitude + elif direction == "right": + dx = magnitude + elif direction == "left": + dx = -magnitude + action = {"type": "scroll", "scroll_x": dx, "scroll_y": dy, "x": x, "y": y} + elif name == "drag_and_drop": + x = _denormalize(int(args.get("x", 0)), screen_w) + y = _denormalize(int(args.get("y", 0)), screen_h) + dx = _denormalize(int(args.get("destination_x", x)), screen_w) + dy = _denormalize(int(args.get("destination_y", y)), screen_h) + action = { + "type": "drag", + "start_x": x, + "start_y": y, + "end_x": dx, + "end_y": dy, + "button": "left", + } + elif name == "wait_5_seconds": + action = {"type": "wait"} + else: + # Unsupported / excluded browser-specific or custom function; ignore + return None + + return { + "type": "computer_call", + "call_id": uuid.uuid4().hex, + "status": "completed", + "action": action, + } + + +@register_agent(models=r"^gemini-2\.5-computer-use-preview-10-2025$") +class GeminiComputerUseConfig(AsyncAgentConfig): + async def predict_step( + self, + messages: List[Dict[str, Any]], + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + use_prompt_caching: Optional[bool] = False, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, + **kwargs, + ) -> Dict[str, Any]: + genai, types = _lazy_import_genai() + + client = genai.Client() + + # Build excluded predefined functions for browser-specific behavior + excluded = [ + "open_web_browser", + "search", + "navigate", + "go_forward", + "go_back", + "scroll_document", + ] + # Optional custom functions: can be extended by host code via `tools` parameter later if desired + CUSTOM_FUNCTION_DECLARATIONS: List[Any] = [] + + # Compose tools config + generate_content_config = types.GenerateContentConfig( + tools=[ + types.Tool( + computer_use=types.ComputerUse( + environment=types.Environment.ENVIRONMENT_BROWSER, + excluded_predefined_functions=excluded, + ) + ), + # types.Tool(function_declarations=CUSTOM_FUNCTION_DECLARATIONS), # enable when custom functions needed + ] + ) + + # Prepare contents: last user text + latest screenshot + user_texts = _find_last_user_text(messages) + screenshot_bytes = _find_last_screenshot(messages) + + parts: List[Any] = [] + for t in user_texts: + parts.append(types.Part(text=t)) + + screen_w, screen_h = 1024, 768 + if screenshot_bytes: + screen_w, screen_h = _bytes_image_size(screenshot_bytes) + parts.append(types.Part.from_bytes(data=screenshot_bytes, mime_type="image/png")) + + # If we don't have any content, at least pass an empty user part to prompt reasoning + if not parts: + parts = [types.Part(text="Proceed to the next action.")] + + contents = [types.Content(role="user", parts=parts)] + + api_kwargs = { + "model": model, + "contents": contents, + "config": generate_content_config, + } + + if _on_api_start: + await _on_api_start( + { + "model": api_kwargs["model"], + # "contents": api_kwargs["contents"], # Disabled for now + "config": api_kwargs["config"], + } + ) + + response = client.models.generate_content(**api_kwargs) + + if _on_api_end: + await _on_api_end( + { + "model": api_kwargs["model"], + # "contents": api_kwargs["contents"], # Disabled for now + "config": api_kwargs["config"], + }, + response, + ) + + # Usage (Gemini SDK may not always provide token usage; populate when available) + usage: Dict[str, Any] = {} + try: + # Some SDKs expose response.usage; if available, copy + if getattr(response, "usage_metadata", None): + md = response.usage_metadata + usage = { + "prompt_tokens": getattr(md, "prompt_token_count", None) or 0, + "completion_tokens": getattr(md, "candidates_token_count", None) or 0, + "total_tokens": getattr(md, "total_token_count", None) or 0, + } + except Exception: + pass + + if _on_usage and usage: + await _on_usage(usage) + + # Parse output into internal items + output_items: List[Dict[str, Any]] = [] + + candidate = response.candidates[0] + # Text parts from the model (assistant message) + text_parts: List[str] = [] + function_calls: List[Dict[str, Any]] = [] + for p in candidate.content.parts: + if getattr(p, "text", None): + text_parts.append(p.text) + if getattr(p, "function_call", None): + # p.function_call has name and args + fc = { + "name": getattr(p.function_call, "name", None), + "args": dict(getattr(p.function_call, "args", {}) or {}), + } + function_calls.append(fc) + + if text_parts: + output_items.append( + { + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": "\n".join(text_parts)}], + } + ) + + # Map function calls to internal computer_call actions + for fc in function_calls: + item = _map_gemini_fc_to_computer_call(fc, screen_w, screen_h) + if item is not None: + output_items.append(item) + + return {"output": output_items, "usage": usage} + + async def predict_click( + self, + model: str, + image_b64: str, + instruction: str, + **kwargs, + ) -> Optional[Tuple[float, float]]: + """Ask Gemini CUA to output a single click action for the given instruction. + + Excludes all predefined tools except `click_at` and sends the screenshot. + Returns pixel (x, y) if a click is proposed, else None. + """ + genai, types = _lazy_import_genai() + + client = genai.Client() + + # Exclude all but click_at + exclude_all_but_click = [ + "open_web_browser", + "wait_5_seconds", + "go_back", + "go_forward", + "search", + "navigate", + "hover_at", + "type_text_at", + "key_combination", + "scroll_document", + "scroll_at", + "drag_and_drop", + ] + + config = types.GenerateContentConfig( + tools=[ + types.Tool( + computer_use=types.ComputerUse( + environment=types.Environment.ENVIRONMENT_BROWSER, + excluded_predefined_functions=exclude_all_but_click, + ) + ) + ] + ) + + # Prepare prompt parts + try: + img_bytes = base64.b64decode(image_b64) + except Exception: + img_bytes = b"" + + w, h = _bytes_image_size(img_bytes) if img_bytes else (1024, 768) + + parts: List[Any] = [types.Part(text=f"Click {instruction}.")] + if img_bytes: + parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/png")) + + contents = [types.Content(role="user", parts=parts)] + + response = client.models.generate_content( + model=model, + contents=contents, + config=config, + ) + + # Parse first click_at + try: + candidate = response.candidates[0] + for p in candidate.content.parts: + fc = getattr(p, "function_call", None) + if fc and getattr(fc, "name", None) == "click_at": + args = dict(getattr(fc, "args", {}) or {}) + x = _denormalize(int(args.get("x", 0)), w) + y = _denormalize(int(args.get("y", 0)), h) + return float(x), float(y) + except Exception: + return None + + return None + + def get_capabilities(self) -> List[AgentCapability]: + return ["click", "step"] diff --git a/libs/python/agent/agent/loops/glm45v.py b/libs/python/agent/agent/loops/glm45v.py index 516a9cb1..27befbf1 100644 --- a/libs/python/agent/agent/loops/glm45v.py +++ b/libs/python/agent/agent/loops/glm45v.py @@ -4,33 +4,36 @@ Supports vision-language models for computer control with bounding box parsing. """ import asyncio -import json import base64 +import json import re -from typing import Dict, List, Any, Optional, Tuple from io import BytesIO -from PIL import Image +from typing import Any, Dict, List, Optional, Tuple + import litellm +from litellm.responses.litellm_completion_transformation.transformation import ( + LiteLLMCompletionResponsesConfig, +) from litellm.types.utils import ModelResponse -from litellm.responses.litellm_completion_transformation.transformation import LiteLLMCompletionResponsesConfig +from PIL import Image from ..decorators import register_agent -from ..types import Messages, AgentResponse, Tools, AgentCapability from ..loops.base import AsyncAgentConfig from ..responses import ( - convert_responses_items_to_completion_messages, convert_completion_messages_to_responses_items, - make_reasoning_item, - make_output_text_item, + convert_responses_items_to_completion_messages, make_click_item, make_double_click_item, make_drag_item, + make_input_image_item, make_keypress_item, + make_output_text_item, + make_reasoning_item, make_scroll_item, make_type_item, make_wait_item, - make_input_image_item ) +from ..types import AgentCapability, AgentResponse, Messages, Tools # GLM-4.5V specific constants GLM_ACTION_SPACE = """ @@ -251,16 +254,18 @@ Call rule: `FAIL()` } }""" + def encode_image_to_base64(image_path: str) -> str: """Encode image file to base64 string with data URI.""" with open(image_path, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode("utf-8") return f"data:image/png;base64,{encoded_string}" + def parse_glm_response(response: str) -> Dict[str, Any]: """ Parse GLM-4.5V response to extract action and memory. - + The special tokens <|begin_of_box|> and <|end_of_box|> mark bounding boxes. Coordinates are normalized values between 0 and 1000. """ @@ -274,26 +279,23 @@ def parse_glm_response(response: str) -> Dict[str, Any]: action_pattern = r"[\w_]+\([^)]*\)" matches = re.findall(action_pattern, response) action = matches[0] if matches else None - + # Extract memory section memory_pattern = r"Memory:(.*?)$" memory_match = re.search(memory_pattern, response, re.DOTALL) memory = memory_match.group(1).strip() if memory_match else "[]" - + # Extract action text (everything before Memory:) - action_text_pattern = r'^(.*?)Memory:' + action_text_pattern = r"^(.*?)Memory:" action_text_match = re.search(action_text_pattern, response, re.DOTALL) action_text = action_text_match.group(1).strip() if action_text_match else response - + # Clean up action text by removing special tokens if action_text: action_text = action_text.replace("<|begin_of_box|>", "").replace("<|end_of_box|>", "") - - return { - "action": action, - "action_text": action_text, - "memory": memory - } + + return {"action": action, "action_text": action_text, "memory": memory} + def get_last_image_from_messages(messages: Messages) -> Optional[str]: """Extract the last image from messages for processing.""" @@ -314,23 +316,28 @@ def get_last_image_from_messages(messages: Messages) -> Optional[str]: image_url_obj = item.get("image_url", {}) if isinstance(image_url_obj, dict): image_url = image_url_obj.get("url", "") - if isinstance(image_url, str) and image_url.startswith("data:image/"): + if isinstance(image_url, str) and image_url.startswith( + "data:image/" + ): return image_url.split(",", 1)[1] return None -def convert_responses_items_to_glm45v_pc_prompt(messages: Messages, task: str, memory: str = "") -> List[Dict[str, Any]]: + +def convert_responses_items_to_glm45v_pc_prompt( + messages: Messages, task: str, memory: str = "" +) -> List[Dict[str, Any]]: """Convert responses items to GLM-4.5V PC prompt format with historical actions. - + Args: messages: List of message items from the conversation task: The task description memory: Current memory state - + Returns: List of content items for the prompt (text and image_url items) """ action_space = GLM_ACTION_SPACE - + # Template head head_text = f"""You are a GUI Agent, and your primary task is to respond accurately to user requests or questions. In addition to directly answering the user's queries, you can also use tools or perform GUI operations directly until you fulfill the user's request or provide a correct answer. You should carefully read and understand the images and questions provided by the user, and engage in thinking and reflection when appropriate. The coordinates involved are all represented in thousandths (0-999). @@ -345,7 +352,7 @@ Ubuntu # Historical Actions and Current Memory History:""" - + # Template tail tail_text = f""" Memory: @@ -363,18 +370,18 @@ Memory: Current Screenshot: """ - + # Build history from messages history = [] history_images = [] - + # Group messages into steps current_step = [] step_num = 0 - + for message in messages: msg_type = message.get("type") - + if msg_type == "reasoning": current_step.append(message) elif msg_type == "message" and message.get("role") == "assistant": @@ -386,7 +393,7 @@ Current Screenshot: # End of step - process it if current_step: step_num += 1 - + # Extract bot thought from message content bot_thought = "" for item in current_step: @@ -397,14 +404,14 @@ Current Screenshot: bot_thought = content_item.get("text", "") break break - + # Extract action from computer_call action_text = "" for item in current_step: if item.get("type") == "computer_call": action = item.get("action", {}) action_type = action.get("type", "") - + if action_type == "click": x, y = action.get("x", 0), action.get("y", 0) # Convert to 0-999 range (assuming screen dimensions) @@ -436,7 +443,7 @@ Current Screenshot: elif action_type == "wait": action_text = "WAIT()" break - + # Extract screenshot from computer_call_output screenshot_url = None for item in current_step: @@ -445,34 +452,34 @@ Current Screenshot: if output.get("type") == "input_image": screenshot_url = output.get("image_url", "") break - + # Store step info step_info = { "step_num": step_num, "bot_thought": bot_thought, "action_text": action_text, - "screenshot_url": screenshot_url + "screenshot_url": screenshot_url, } history.append(step_info) - + # Store screenshot for last 4 steps if screenshot_url: history_images.append(screenshot_url) - + current_step = [] - + # Build content array with head, history, and tail content = [] current_text = head_text - + total_history_steps = len(history) history_image_count = min(4, len(history_images)) # Last 4 images - + for step_idx, step_info in enumerate(history): step_num = step_info["step_num"] bot_thought = step_info["bot_thought"] action_text = step_info["action_text"] - + if step_idx < total_history_steps - history_image_count: # For steps beyond the last 4, use text placeholder current_text += f"\nstep {step_num}: Screenshot:(Omitted in context.) Thought: {bot_thought}\nAction: {action_text}" @@ -480,20 +487,21 @@ Current Screenshot: # For the last 4 steps, insert images current_text += f"\nstep {step_num}: Screenshot:" content.append({"type": "text", "text": current_text}) - + # Add image img_idx = step_idx - (total_history_steps - history_image_count) if img_idx < len(history_images): content.append({"type": "image_url", "image_url": {"url": history_images[img_idx]}}) - + current_text = f" Thought: {bot_thought}\nAction: {action_text}" - + # Add tail current_text += tail_text content.append({"type": "text", "text": current_text}) - + return content + def model_dump(obj) -> Dict[str, Any]: if isinstance(obj, dict): return {k: model_dump(v) for k, v in obj.items()} @@ -502,58 +510,61 @@ def model_dump(obj) -> Dict[str, Any]: else: return obj -def convert_glm_completion_to_responses_items(response: ModelResponse, image_width: int, image_height: int) -> List[Dict[str, Any]]: + +def convert_glm_completion_to_responses_items( + response: ModelResponse, image_width: int, image_height: int +) -> List[Dict[str, Any]]: """ Convert GLM-4.5V completion response to responses items format. - + Args: response: LiteLLM ModelResponse from GLM-4.5V image_width: Original image width for coordinate scaling image_height: Original image height for coordinate scaling - + Returns: List of response items in the proper format """ import uuid - + response_items = [] - + if not response.choices or not response.choices[0].message: return response_items - + message = response.choices[0].message content = message.content or "" - reasoning_content = getattr(message, 'reasoning_content', None) - + reasoning_content = getattr(message, "reasoning_content", None) + # Add reasoning item if present if reasoning_content: reasoning_item = model_dump(make_reasoning_item(reasoning_content)) response_items.append(reasoning_item) - + # Parse the content to extract action and text parsed_response = parse_glm_response(content) action = parsed_response.get("action", "") action_text = parsed_response.get("action_text", "") - + # Add message item with text content (excluding action and memory) if action_text: # Remove action from action_text if it's there clean_text = action_text if action and action in clean_text: clean_text = clean_text.replace(action, "").strip() - + # Remove memory section memory_pattern = r"Memory:\s*\[.*?\]\s*$" clean_text = re.sub(memory_pattern, "", clean_text, flags=re.DOTALL).strip() - + if clean_text: message_item = model_dump(make_output_text_item(clean_text)) response_items.append(message_item) - + # Convert action to computer call if present if action: call_id = f"call_{uuid.uuid4().hex[:8]}" - + # Parse different action types and create appropriate computer calls if action.startswith("left_click"): coord_match = re.search(r"start_box='?\[(\d+),\s*(\d+)\]'?", action) @@ -566,7 +577,7 @@ def convert_glm_completion_to_responses_items(response: ModelResponse, image_wid computer_call["call_id"] = call_id computer_call["status"] = "completed" response_items.append(computer_call) - + elif action.startswith("right_click"): coord_match = re.search(r"start_box='?\[(\d+),\s*(\d+)\]'?", action) if coord_match: @@ -577,7 +588,7 @@ def convert_glm_completion_to_responses_items(response: ModelResponse, image_wid computer_call["call_id"] = call_id computer_call["status"] = "completed" response_items.append(computer_call) - + elif action.startswith("left_double_click"): coord_match = re.search(r"start_box='?\[(\d+),\s*(\d+)\]'?", action) if coord_match: @@ -588,7 +599,7 @@ def convert_glm_completion_to_responses_items(response: ModelResponse, image_wid computer_call["call_id"] = call_id computer_call["status"] = "completed" response_items.append(computer_call) - + elif action.startswith("left_drag"): start_match = re.search(r"start_box='?\[(\d+),\s*(\d+)\]'?", action) end_match = re.search(r"end_box='?\[(\d+),\s*(\d+)\]'?", action) @@ -605,18 +616,18 @@ def convert_glm_completion_to_responses_items(response: ModelResponse, image_wid computer_call["call_id"] = call_id computer_call["status"] = "completed" response_items.append(computer_call) - + elif action.startswith("key"): key_match = re.search(r"keys='([^']+)'", action) if key_match: keys = key_match.group(1) # Split keys by '+' for key combinations, or use as single key - key_list = keys.split('+') if '+' in keys else [keys] + key_list = keys.split("+") if "+" in keys else [keys] computer_call = model_dump(make_keypress_item(key_list)) computer_call["call_id"] = call_id computer_call["status"] = "completed" response_items.append(computer_call) - + elif action.startswith("type"): content_match = re.search(r"content='([^']*)'", action) if content_match: @@ -625,7 +636,7 @@ def convert_glm_completion_to_responses_items(response: ModelResponse, image_wid computer_call["call_id"] = call_id computer_call["status"] = "completed" response_items.append(computer_call) - + elif action.startswith("scroll"): coord_match = re.search(r"start_box='?\[(\d+),\s*(\d+)\]'?", action) direction_match = re.search(r"direction='([^']+)'", action) @@ -648,15 +659,16 @@ def convert_glm_completion_to_responses_items(response: ModelResponse, image_wid computer_call["call_id"] = call_id computer_call["status"] = "completed" response_items.append(computer_call) - + elif action == "WAIT()": computer_call = model_dump(make_wait_item()) computer_call["call_id"] = call_id computer_call["status"] = "completed" response_items.append(computer_call) - + return response_items + @register_agent(models=r"(?i).*GLM-4\.5V.*") class Glm4vConfig(AsyncAgentConfig): """GLM-4.5V agent configuration using liteLLM.""" @@ -674,11 +686,11 @@ class Glm4vConfig(AsyncAgentConfig): _on_api_end=None, _on_usage=None, _on_screenshot=None, - **kwargs + **kwargs, ) -> Dict[str, Any]: """ Predict the next step using GLM-4.5V model. - + Args: messages: Input messages following Responses format model: Model name to use @@ -691,7 +703,7 @@ class Glm4vConfig(AsyncAgentConfig): _on_api_end: Callback for API end _on_usage: Callback for usage tracking _on_screenshot: Callback for screenshot events - + Returns: Dict with "output" and "usage" keys """ @@ -708,7 +720,7 @@ class Glm4vConfig(AsyncAgentConfig): user_instruction = item.get("text", "") break break - + # Get the last image for processing last_image_b64 = get_last_image_from_messages(messages) if not last_image_b64 and computer_handler: @@ -718,35 +730,28 @@ class Glm4vConfig(AsyncAgentConfig): last_image_b64 = screenshot_b64 if _on_screenshot: await _on_screenshot(screenshot_b64) - + if not last_image_b64: raise ValueError("No image available for GLM-4.5V processing") - + # Convert responses items to GLM-4.5V PC prompt format with historical actions prompt_content = convert_responses_items_to_glm45v_pc_prompt( messages=messages, task=user_instruction, - memory="[]" # Initialize with empty memory for now + memory="[]", # Initialize with empty memory for now ) - + # Add the current screenshot to the end - prompt_content.append({ - "type": "image_url", - "image_url": {"url": f"data:image/png;base64,{last_image_b64}"} - }) - + prompt_content.append( + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{last_image_b64}"}} + ) + # Prepare messages for liteLLM litellm_messages = [ - { - "role": "system", - "content": "You are a helpful GUI agent assistant." - }, - { - "role": "user", - "content": prompt_content - } + {"role": "system", "content": "You are a helpful GUI agent assistant."}, + {"role": "user", "content": prompt_content}, ] - + # Prepare API call kwargs api_kwargs = { "model": model, @@ -757,20 +762,20 @@ class Glm4vConfig(AsyncAgentConfig): # "skip_special_tokens": False, # } } - + # Add API callbacks if _on_api_start: await _on_api_start(api_kwargs) - + # Call liteLLM response = await litellm.acompletion(**api_kwargs) - + if _on_api_end: await _on_api_end(api_kwargs, response) - + # Get image dimensions for coordinate scaling image_width, image_height = 1920, 1080 # Default dimensions - + # Try to get actual dimensions from the image try: image_data = base64.b64decode(last_image_b64) @@ -778,41 +783,38 @@ class Glm4vConfig(AsyncAgentConfig): image_width, image_height = image.size except Exception: pass # Use default dimensions - + # Convert GLM completion response to responses items - response_items = convert_glm_completion_to_responses_items(response, image_width, image_height) - + response_items = convert_glm_completion_to_responses_items( + response, image_width, image_height + ) + # Extract usage information response_usage = { - **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(response.usage).model_dump(), + **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage( + response.usage + ).model_dump(), "response_cost": response._hidden_params.get("response_cost", 0.0), } if _on_usage: await _on_usage(response_usage) - + # Create agent response - agent_response = { - "output": response_items, - "usage": response_usage - } - + agent_response = {"output": response_items, "usage": response_usage} + return agent_response async def predict_click( - self, - model: str, - image_b64: str, - instruction: str, - **kwargs + self, model: str, image_b64: str, instruction: str, **kwargs ) -> Optional[Tuple[int, int]]: """ Predict click coordinates using GLM-4.5V model. - + Args: model: Model name to use image_b64: Base64 encoded image instruction: Instruction for where to click - + Returns: Tuple with (x, y) coordinates or None """ @@ -824,22 +826,22 @@ Respond with a single click action in this format: left_click(start_box='[x,y]') Where x,y are coordinates normalized to 0-999 range.""" - + # Prepare messages for liteLLM litellm_messages = [ - { - "role": "system", - "content": "You are a helpful GUI agent assistant." - }, + {"role": "system", "content": "You are a helpful GUI agent assistant."}, { "role": "user", "content": [ {"type": "text", "text": click_prompt}, - {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}} - ] - } + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{image_b64}"}, + }, + ], + }, ] - + # Prepare API call kwargs api_kwargs = { "model": model, @@ -848,21 +850,21 @@ Where x,y are coordinates normalized to 0-999 range.""" "temperature": 0.001, "extra_body": { "skip_special_tokens": False, - } + }, } - + # Call liteLLM response = await litellm.acompletion(**api_kwargs) - + # Extract response content response_content = response.choices[0].message.content.strip() print(response) - + # Parse response for click coordinates # Look for coordinates in the response, handling special tokens coord_pattern = r"<\|begin_of_box\|>.*?left_click\(start_box='?\[(\d+),(\d+)\]'?\).*?<\|end_of_box\|>" match = re.search(coord_pattern, response_content) - + if not match: # Fallback: look for coordinates without special tokens coord_pattern = r"left_click\(start_box='?\[(\d+),(\d+)\]'?\)" @@ -870,7 +872,7 @@ Where x,y are coordinates normalized to 0-999 range.""" if match: x, y = int(match.group(1)), int(match.group(2)) - + # Get actual image dimensions for scaling try: image_data = base64.b64decode(image_b64) @@ -879,15 +881,15 @@ Where x,y are coordinates normalized to 0-999 range.""" except Exception: # Use default dimensions image_width, image_height = 1920, 1080 - + # Convert from 0-999 normalized coordinates to actual pixel coordinates actual_x = int((x / 999.0) * image_width) actual_y = int((y / 999.0) * image_height) - + return (actual_x, actual_y) - + return None - + except Exception as e: # Log error and return None print(f"Error in predict_click: {e}") @@ -896,7 +898,7 @@ Where x,y are coordinates normalized to 0-999 range.""" def get_capabilities(self) -> List[AgentCapability]: """ Get list of capabilities supported by this agent config. - + Returns: List of capability strings """ diff --git a/libs/python/agent/agent/loops/gta1.py b/libs/python/agent/agent/loops/gta1.py index 400daa29..8e60f209 100644 --- a/libs/python/agent/agent/loops/gta1.py +++ b/libs/python/agent/agent/loops/gta1.py @@ -5,75 +5,80 @@ Code: https://github.com/Yan98/GTA1 """ import asyncio -import json -import re import base64 -from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple -from io import BytesIO -import uuid -from PIL import Image -import litellm +import json import math +import re +import uuid +from io import BytesIO +from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union + +import litellm +from PIL import Image from ..decorators import register_agent -from ..types import Messages, AgentResponse, Tools, AgentCapability from ..loops.base import AsyncAgentConfig +from ..types import AgentCapability, AgentResponse, Messages, Tools -SYSTEM_PROMPT = ''' +SYSTEM_PROMPT = """ You are an expert UI element locator. Given a GUI image and a user's element description, provide the coordinates of the specified element as a single (x,y) point. The image resolution is height {height} and width {width}. For elements with area, return the center point. Output the coordinate pair exactly: (x,y) -'''.strip() +""".strip() + def extract_coordinates(raw_string: str) -> Tuple[float, float]: """Extract coordinates from model output.""" try: matches = re.findall(r"\((-?\d*\.?\d+),\s*(-?\d*\.?\d+)\)", raw_string) - return tuple(map(float, matches[0])) # type: ignore + return tuple(map(float, matches[0])) # type: ignore except: return (0.0, 0.0) -def smart_resize(height: int, width: int, factor: int = 28, min_pixels: int = 3136, max_pixels: int = 8847360) -> Tuple[int, int]: + +def smart_resize( + height: int, width: int, factor: int = 28, min_pixels: int = 3136, max_pixels: int = 8847360 +) -> Tuple[int, int]: """Smart resize function similar to qwen_vl_utils.""" # Calculate the total pixels total_pixels = height * width - + # If already within bounds, return original dimensions if min_pixels <= total_pixels <= max_pixels: # Round to nearest factor new_height = (height // factor) * factor new_width = (width // factor) * factor return new_height, new_width - + # Calculate scaling factor if total_pixels > max_pixels: scale = (max_pixels / total_pixels) ** 0.5 else: scale = (min_pixels / total_pixels) ** 0.5 - + # Apply scaling new_height = int(height * scale) new_width = int(width * scale) - + # Round to nearest factor new_height = (new_height // factor) * factor new_width = (new_width // factor) * factor - + # Ensure minimum size new_height = max(new_height, factor) new_width = max(new_width, factor) - + return new_height, new_width + @register_agent(models=r".*GTA1.*") class GTA1Config(AsyncAgentConfig): """GTA1 agent configuration implementing AsyncAgentConfig protocol for click prediction.""" - + def __init__(self): self.current_model = None self.last_screenshot_b64 = None - async def predict_step( self, @@ -87,25 +92,21 @@ class GTA1Config(AsyncAgentConfig): _on_api_end=None, _on_usage=None, _on_screenshot=None, - **kwargs + **kwargs, ) -> Dict[str, Any]: raise NotImplementedError() async def predict_click( - self, - model: str, - image_b64: str, - instruction: str, - **kwargs + self, model: str, image_b64: str, instruction: str, **kwargs ) -> Optional[Tuple[float, float]]: """ Predict click coordinates using GTA1 model via litellm.acompletion. - + Args: model: The GTA1 model name image_b64: Base64 encoded image instruction: Instruction for where to click - + Returns: Tuple of (x, y) coordinates or None if prediction fails """ @@ -113,66 +114,62 @@ class GTA1Config(AsyncAgentConfig): image_data = base64.b64decode(image_b64) image = Image.open(BytesIO(image_data)) width, height = image.width, image.height - + # Smart resize the image (similar to qwen_vl_utils) resized_height, resized_width = smart_resize( - height, width, + height, + width, factor=28, # Default factor for Qwen models min_pixels=3136, - max_pixels=4096 * 2160 + max_pixels=4096 * 2160, ) resized_image = image.resize((resized_width, resized_height)) scale_x, scale_y = width / resized_width, height / resized_height - + # Convert resized image back to base64 buffered = BytesIO() resized_image.save(buffered, format="PNG") resized_image_b64 = base64.b64encode(buffered.getvalue()).decode() - + # Prepare system and user messages system_message = { "role": "system", - "content": SYSTEM_PROMPT.format(height=resized_height, width=resized_width) + "content": SYSTEM_PROMPT.format(height=resized_height, width=resized_width), } - + user_message = { "role": "user", "content": [ { "type": "image_url", - "image_url": { - "url": f"data:image/png;base64,{resized_image_b64}" - } + "image_url": {"url": f"data:image/png;base64,{resized_image_b64}"}, }, - { - "type": "text", - "text": instruction - } - ] + {"type": "text", "text": instruction}, + ], } - + # Prepare API call kwargs api_kwargs = { "model": model, "messages": [system_message, user_message], "max_tokens": 2056, "temperature": 0.0, - **kwargs + **kwargs, } - + # Use liteLLM acompletion response = await litellm.acompletion(**api_kwargs) - + # Extract response text - output_text = response.choices[0].message.content # type: ignore - + output_text = response.choices[0].message.content # type: ignore + # Extract and rescale coordinates - pred_x, pred_y = extract_coordinates(output_text) # type: ignore + pred_x, pred_y = extract_coordinates(output_text) # type: ignore pred_x *= scale_x pred_y *= scale_y - + return (math.floor(pred_x), math.floor(pred_y)) - + def get_capabilities(self) -> List[AgentCapability]: """Return the capabilities supported by this agent.""" return ["click"] diff --git a/libs/python/agent/agent/loops/holo.py b/libs/python/agent/agent/loops/holo.py index b1cbc5a1..6ea42d7e 100644 --- a/libs/python/agent/agent/loops/holo.py +++ b/libs/python/agent/agent/loops/holo.py @@ -21,8 +21,8 @@ import litellm from PIL import Image from ..decorators import register_agent -from .base import AsyncAgentConfig from ..types import AgentCapability +from .base import AsyncAgentConfig def _strip_hf_prefix(model: str) -> str: @@ -53,7 +53,9 @@ def _maybe_smart_resize(image: Image.Image, model: str) -> Tuple[Image.Image, Tu if image_processor is None: return image, (orig_w, orig_h) - factor = getattr(image_processor, "patch_size", 14) * getattr(image_processor, "merge_size", 1) + factor = getattr(image_processor, "patch_size", 14) * getattr( + image_processor, "merge_size", 1 + ) min_pixels = getattr(image_processor, "min_pixels", 256 * 256) max_pixels = getattr(image_processor, "max_pixels", 1536 * 1536) diff --git a/libs/python/agent/agent/loops/internvl.py b/libs/python/agent/agent/loops/internvl.py index 6bedc5d1..96589428 100644 --- a/libs/python/agent/agent/loops/internvl.py +++ b/libs/python/agent/agent/loops/internvl.py @@ -18,13 +18,12 @@ import re from io import BytesIO from typing import Any, Dict, List, Optional, Tuple -from PIL import Image import litellm +from PIL import Image from ..decorators import register_agent -from .composed_grounded import ComposedGroundedConfig from ..types import AgentCapability - +from .composed_grounded import ComposedGroundedConfig # Regex patterns for extracting coordinates # Accept optional whitespace and optional decimal fractions @@ -91,7 +90,7 @@ class InternVLConfig(ComposedGroundedConfig): _on_api_end=None, _on_usage=None, _on_screenshot=None, - **kwargs + **kwargs, ) -> Dict[str, Any]: """Fallback to a self-composed model""" return await super().predict_step( @@ -105,15 +104,11 @@ class InternVLConfig(ComposedGroundedConfig): _on_api_end=_on_api_end, _on_usage=_on_usage, _on_screenshot=_on_screenshot, - **kwargs + **kwargs, ) - + async def predict_click( - self, - model: str, - image_b64: str, - instruction: str, - **kwargs + self, model: str, image_b64: str, instruction: str, **kwargs ) -> Optional[Tuple[int, int]]: """ Predict click coordinates using InternVL via litellm.acompletion. diff --git a/libs/python/agent/agent/loops/moondream3.py b/libs/python/agent/agent/loops/moondream3.py new file mode 100644 index 00000000..c48c6632 --- /dev/null +++ b/libs/python/agent/agent/loops/moondream3.py @@ -0,0 +1,493 @@ +""" +Moondream3+ composed-grounded agent loop implementation. +Grounding is handled by a local Moondream3 preview model via Transformers. +Thinking is delegated to the trailing LLM in the composed model string: "moondream3+". + +Differences from composed_grounded: +- Provides a singleton Moondream3 client outside the class. +- predict_click uses model.point(image, instruction, settings={"max_objects": 1}) and returns pixel coordinates. +- If the last image was a screenshot (or we take one), run model.detect(image, "all form ui") to get bboxes, then + run model.caption on each cropped bbox to label it. Overlay labels on the screenshot and emit via _on_screenshot. +- Add a user message listing all detected form UI names so the thinker can reference them. +- If the thinking model doesn't support vision, filter out image content before calling litellm. +""" + +from __future__ import annotations + +import base64 +import io +import uuid +from typing import Any, Dict, List, Optional, Tuple + +import litellm +from PIL import Image, ImageDraw, ImageFont + +from ..decorators import register_agent +from ..loops.base import AsyncAgentConfig +from ..responses import ( + convert_completion_messages_to_responses_items, + convert_computer_calls_desc2xy, + convert_computer_calls_xy2desc, + convert_responses_items_to_completion_messages, + get_all_element_descriptions, +) +from ..types import AgentCapability + +_MOONDREAM_SINGLETON = None + + +def get_moondream_model() -> Any: + """Get a singleton instance of the Moondream3 preview model.""" + global _MOONDREAM_SINGLETON + if _MOONDREAM_SINGLETON is None: + try: + import torch + from transformers import AutoModelForCausalLM + + _MOONDREAM_SINGLETON = AutoModelForCausalLM.from_pretrained( + "moondream/moondream3-preview", + trust_remote_code=True, + torch_dtype=torch.bfloat16, + device_map="cuda", + ) + except ImportError as e: + raise RuntimeError( + "moondream3 requires torch and transformers. Install with: pip install cua-agent[moondream3]" + ) from e + return _MOONDREAM_SINGLETON + + +def _decode_image_b64(image_b64: str) -> Image.Image: + data = base64.b64decode(image_b64) + return Image.open(io.BytesIO(data)).convert("RGB") + + +def _image_to_b64(img: Image.Image) -> str: + buf = io.BytesIO() + img.save(buf, format="PNG") + return base64.b64encode(buf.getvalue()).decode("utf-8") + + +def _supports_vision(model: str) -> bool: + """Heuristic vision support detection for thinking model.""" + m = model.lower() + vision_markers = [ + "gpt-4o", + "gpt-4.1", + "o1", + "o3", + "claude-3", + "claude-3.5", + "sonnet", + "haiku", + "opus", + "gemini-1.5", + "llava", + ] + return any(v in m for v in vision_markers) + + +def _filter_images_from_completion_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + filtered: List[Dict[str, Any]] = [] + for msg in messages: + msg_copy = {**msg} + content = msg_copy.get("content") + if isinstance(content, list): + msg_copy["content"] = [c for c in content if c.get("type") != "image_url"] + filtered.append(msg_copy) + return filtered + + +def _annotate_detect_and_label_ui(base_img: Image.Image, model_md) -> Tuple[str, List[str]]: + """Detect UI elements with Moondream, caption each, draw labels with backgrounds. + + Args: + base_img: PIL image of the screenshot (RGB or RGBA). Will be copied/converted internally. + model_md: Moondream model instance with .detect() and .query() methods. + + Returns: + A tuple of (annotated_image_base64_png, detected_names) + """ + # Ensure RGBA for semi-transparent fills + if base_img.mode != "RGBA": + base_img = base_img.convert("RGBA") + W, H = base_img.width, base_img.height + + # Detect objects + try: + detect_result = model_md.detect(base_img, "all ui elements") + objects = detect_result.get("objects", []) if isinstance(detect_result, dict) else [] + except Exception: + objects = [] + + draw = ImageDraw.Draw(base_img) + try: + font = ImageFont.load_default() + except Exception: + font = None + + detected_names: List[str] = [] + + for i, obj in enumerate(objects): + try: + # Clamp normalized coords and crop + x_min = max(0.0, min(1.0, float(obj.get("x_min", 0.0)))) + y_min = max(0.0, min(1.0, float(obj.get("y_min", 0.0)))) + x_max = max(0.0, min(1.0, float(obj.get("x_max", 0.0)))) + y_max = max(0.0, min(1.0, float(obj.get("y_max", 0.0)))) + left, top, right, bottom = ( + int(x_min * W), + int(y_min * H), + int(x_max * W), + int(y_max * H), + ) + left, top = max(0, left), max(0, top) + right, bottom = min(W - 1, right), min(H - 1, bottom) + crop = base_img.crop((left, top, right, bottom)) + + # Prompted short caption + try: + result = model_md.query(crop, "Caption this UI element in few words.") + caption_text = (result or {}).get("answer", "") + except Exception: + caption_text = "" + + name = (caption_text or "").strip() or f"element_{i+1}" + detected_names.append(name) + + # Draw bbox + draw.rectangle([left, top, right, bottom], outline=(255, 215, 0, 255), width=2) + + # Label background with padding and rounded corners + label = f"{i+1}. {name}" + padding = 3 + if font: + text_bbox = draw.textbbox((0, 0), label, font=font) + else: + text_bbox = draw.textbbox((0, 0), label) + text_w = text_bbox[2] - text_bbox[0] + text_h = text_bbox[3] - text_bbox[1] + + tx = left + 3 + ty = top - (text_h + 2 * padding + 4) + if ty < 0: + ty = top + 3 + + bg_left = tx - padding + bg_top = ty - padding + bg_right = tx + text_w + padding + bg_bottom = ty + text_h + padding + try: + draw.rounded_rectangle( + [bg_left, bg_top, bg_right, bg_bottom], + radius=4, + fill=(0, 0, 0, 160), + outline=(255, 215, 0, 200), + width=1, + ) + except Exception: + draw.rectangle( + [bg_left, bg_top, bg_right, bg_bottom], + fill=(0, 0, 0, 160), + outline=(255, 215, 0, 200), + width=1, + ) + + text_fill = (255, 255, 255, 255) + if font: + draw.text((tx, ty), label, fill=text_fill, font=font) + else: + draw.text((tx, ty), label, fill=text_fill) + except Exception: + continue + + # Encode PNG base64 + annotated = base_img + if annotated.mode not in ("RGBA", "RGB"): + annotated = annotated.convert("RGBA") + annotated_b64 = _image_to_b64(annotated) + return annotated_b64, detected_names + + +GROUNDED_COMPUTER_TOOL_SCHEMA = { + "type": "function", + "function": { + "name": "computer", + "description": ( + "Control a computer by taking screenshots and interacting with UI elements. " + "The screenshot action will include a list of detected form UI element names when available. " + "Use element descriptions to locate and interact with UI elements on the screen." + ), + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": [ + "screenshot", + "click", + "double_click", + "drag", + "type", + "keypress", + "scroll", + "move", + "wait", + "get_current_url", + "get_dimensions", + "get_environment", + ], + "description": "The action to perform (required for all actions)", + }, + "element_description": { + "type": "string", + "description": "Description of the element to interact with (required for click/double_click/move/scroll)", + }, + "start_element_description": { + "type": "string", + "description": "Description of the element to start dragging from (required for drag)", + }, + "end_element_description": { + "type": "string", + "description": "Description of the element to drag to (required for drag)", + }, + "text": { + "type": "string", + "description": "The text to type (required for type)", + }, + "keys": { + "type": "array", + "items": {"type": "string"}, + "description": "Key(s) to press (required for keypress)", + }, + "button": { + "type": "string", + "enum": ["left", "right", "wheel", "back", "forward"], + "description": "The mouse button to use for click/double_click", + }, + "scroll_x": { + "type": "integer", + "description": "Horizontal scroll amount (required for scroll)", + }, + "scroll_y": { + "type": "integer", + "description": "Vertical scroll amount (required for scroll)", + }, + }, + "required": ["action"], + }, + }, +} + + +@register_agent(r"moondream3\+.*", priority=2) +class Moondream3PlusConfig(AsyncAgentConfig): + def __init__(self): + self.desc2xy: Dict[str, Tuple[float, float]] = {} + + async def predict_step( + self, + messages: List[Dict[str, Any]], + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + use_prompt_caching: Optional[bool] = False, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, + **kwargs, + ) -> Dict[str, Any]: + # Parse composed model: moondream3+ + if "+" not in model: + raise ValueError(f"Composed model must be 'moondream3+', got: {model}") + _, thinking_model = model.split("+", 1) + + pre_output_items: List[Dict[str, Any]] = [] + + # Acquire last screenshot; if missing, take one + last_image_b64: Optional[str] = None + for message in reversed(messages): + if ( + isinstance(message, dict) + and message.get("type") == "computer_call_output" + and isinstance(message.get("output"), dict) + and message["output"].get("type") == "input_image" + ): + image_url = message["output"].get("image_url", "") + if image_url.startswith("data:image/png;base64,"): + last_image_b64 = image_url.split(",", 1)[1] + break + + if last_image_b64 is None and computer_handler is not None: + # Take a screenshot + screenshot_b64 = await computer_handler.screenshot() # type: ignore + if screenshot_b64: + call_id = uuid.uuid4().hex + pre_output_items += [ + { + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "Taking a screenshot to analyze the current screen.", + } + ], + }, + { + "type": "computer_call", + "call_id": call_id, + "status": "completed", + "action": {"type": "screenshot"}, + }, + { + "type": "computer_call_output", + "call_id": call_id, + "output": { + "type": "input_image", + "image_url": f"data:image/png;base64,{screenshot_b64}", + }, + }, + ] + last_image_b64 = screenshot_b64 + if _on_screenshot: + await _on_screenshot(screenshot_b64) + + # If we have a last screenshot, run Moondream detection and labeling + detected_names: List[str] = [] + if last_image_b64 is not None: + base_img = _decode_image_b64(last_image_b64) + model_md = get_moondream_model() + annotated_b64, detected_names = _annotate_detect_and_label_ui(base_img, model_md) + if _on_screenshot: + await _on_screenshot(annotated_b64, "annotated_form_ui") + + # Also push a user message listing all detected names + if detected_names: + names_text = "\n".join(f"- {n}" for n in detected_names) + pre_output_items.append( + { + "type": "message", + "role": "user", + "content": [ + {"type": "input_text", "text": "Detected form UI elements on screen:"}, + {"type": "input_text", "text": names_text}, + { + "type": "input_text", + "text": "Please continue with the next action needed to perform your task.", + }, + ], + } + ) + + tool_schemas = [] + for schema in tools or []: + if schema.get("type") == "computer": + tool_schemas.append(GROUNDED_COMPUTER_TOOL_SCHEMA) + else: + tool_schemas.append(schema) + + # Step 1: Convert computer calls from xy to descriptions + input_messages = messages + pre_output_items + messages_with_descriptions = convert_computer_calls_xy2desc(input_messages, self.desc2xy) + + # Step 2: Convert responses items to completion messages + completion_messages = convert_responses_items_to_completion_messages( + messages_with_descriptions, + allow_images_in_tool_results=False, + ) + + # Optionally filter images if model lacks vision + if not _supports_vision(thinking_model): + completion_messages = _filter_images_from_completion_messages(completion_messages) + + # Step 3: Call thinking model with litellm.acompletion + api_kwargs = { + "model": thinking_model, + "messages": completion_messages, + "tools": tool_schemas, + "max_retries": max_retries, + "stream": stream, + **kwargs, + } + if use_prompt_caching: + api_kwargs["use_prompt_caching"] = use_prompt_caching + + if _on_api_start: + await _on_api_start(api_kwargs) + + response = await litellm.acompletion(**api_kwargs) + + if _on_api_end: + await _on_api_end(api_kwargs, response) + + usage = { + **response.usage.model_dump(), # type: ignore + "response_cost": response._hidden_params.get("response_cost", 0.0), + } + if _on_usage: + await _on_usage(usage) + + # Step 4: Convert completion messages back to responses items format + response_dict = response.model_dump() # type: ignore + choice_messages = [choice["message"] for choice in response_dict["choices"]] + thinking_output_items: List[Dict[str, Any]] = [] + for choice_message in choice_messages: + thinking_output_items.extend( + convert_completion_messages_to_responses_items([choice_message]) + ) + + # Step 5: Use Moondream to get coordinates for each description + element_descriptions = get_all_element_descriptions(thinking_output_items) + if element_descriptions and last_image_b64: + for desc in element_descriptions: + for _ in range(3): # try 3 times + coords = await self.predict_click( + model=model, + image_b64=last_image_b64, + instruction=desc, + ) + if coords: + self.desc2xy[desc] = coords + break + + # Step 6: Convert computer calls from descriptions back to xy coordinates + final_output_items = convert_computer_calls_desc2xy(thinking_output_items, self.desc2xy) + + # Step 7: Return output and usage + return {"output": pre_output_items + final_output_items, "usage": usage} + + async def predict_click( + self, + model: str, + image_b64: str, + instruction: str, + **kwargs, + ) -> Optional[Tuple[float, float]]: + """Predict click coordinates using Moondream3's point API. + + Returns pixel coordinates (x, y) as floats. + """ + img = _decode_image_b64(image_b64) + W, H = img.width, img.height + model_md = get_moondream_model() + try: + result = model_md.point(img, instruction, settings={"max_objects": 1}) + except Exception: + return None + + try: + pt = (result or {}).get("points", [])[0] + x_norm = float(pt.get("x", 0.0)) + y_norm = float(pt.get("y", 0.0)) + x_px = max(0.0, min(float(W - 1), x_norm * W)) + y_px = max(0.0, min(float(H - 1), y_norm * H)) + return (x_px, y_px) + except Exception: + return None + + def get_capabilities(self) -> List[AgentCapability]: + return ["click", "step"] diff --git a/libs/python/agent/agent/loops/omniparser.py b/libs/python/agent/agent/loops/omniparser.py index d85d07de..14ef3a92 100644 --- a/libs/python/agent/agent/loops/omniparser.py +++ b/libs/python/agent/agent/loops/omniparser.py @@ -5,100 +5,102 @@ Code: https://github.com/microsoft/OmniParser """ import asyncio -import json -from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple -import litellm -import inspect import base64 +import inspect +import json +from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union + +import litellm from ..decorators import register_agent -from ..types import Messages, AgentResponse, Tools, AgentCapability from ..loops.base import AsyncAgentConfig +from ..types import AgentCapability, AgentResponse, Messages, Tools SOM_TOOL_SCHEMA = { - "type": "function", - "name": "computer", - "description": "Control a computer by taking screenshots and interacting with UI elements. This tool shows screenshots with numbered elements overlaid on them. Each UI element has been assigned a unique ID number that you can see in the image. Use the element's ID number to interact with any element instead of pixel coordinates.", - "parameters": { - "type": "object", - "properties": { - "action": { - "type": "string", - "enum": [ - "screenshot", - "click", - "double_click", - "drag", - "type", - "keypress", - "scroll", - "move", - "wait", - "get_current_url", - "get_dimensions", - "get_environment" - ], - "description": "The action to perform" - }, - "element_id": { - "type": "integer", - "description": "The ID of the element to interact with (required for click, double_click, move, scroll actions, and as start/end for drag)" - }, - "start_element_id": { - "type": "integer", - "description": "The ID of the element to start dragging from (required for drag action)" - }, - "end_element_id": { - "type": "integer", - "description": "The ID of the element to drag to (required for drag action)" - }, - "text": { - "type": "string", - "description": "The text to type (required for type action)" - }, - "keys": { - "type": "string", - "description": "Key combination to press (required for keypress action). Single key for individual key press, multiple keys for combinations (e.g., 'ctrl+c')" - }, - "button": { - "type": "string", - "description": "The mouse button to use for click action (left, right, wheel, back, forward) Default: left", - }, - "scroll_x": { - "type": "integer", - "description": "Horizontal scroll amount for scroll action (positive for right, negative for left)", - }, - "scroll_y": { - "type": "integer", - "description": "Vertical scroll amount for scroll action (positive for down, negative for up)", - }, + "type": "function", + "name": "computer", + "description": "Control a computer by taking screenshots and interacting with UI elements. This tool shows screenshots with numbered elements overlaid on them. Each UI element has been assigned a unique ID number that you can see in the image. Use the element's ID number to interact with any element instead of pixel coordinates.", + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": [ + "screenshot", + "click", + "double_click", + "drag", + "type", + "keypress", + "scroll", + "move", + "wait", + "get_current_url", + "get_dimensions", + "get_environment", + ], + "description": "The action to perform", + }, + "element_id": { + "type": "integer", + "description": "The ID of the element to interact with (required for click, double_click, move, scroll actions, and as start/end for drag)", + }, + "start_element_id": { + "type": "integer", + "description": "The ID of the element to start dragging from (required for drag action)", + }, + "end_element_id": { + "type": "integer", + "description": "The ID of the element to drag to (required for drag action)", + }, + "text": { + "type": "string", + "description": "The text to type (required for type action)", + }, + "keys": { + "type": "string", + "description": "Key combination to press (required for keypress action). Single key for individual key press, multiple keys for combinations (e.g., 'ctrl+c')", + }, + "button": { + "type": "string", + "description": "The mouse button to use for click action (left, right, wheel, back, forward) Default: left", + }, + "scroll_x": { + "type": "integer", + "description": "Horizontal scroll amount for scroll action (positive for right, negative for left)", + }, + "scroll_y": { + "type": "integer", + "description": "Vertical scroll amount for scroll action (positive for down, negative for up)", + }, + }, + "required": ["action"], }, - "required": [ - "action" - ] - } } OMNIPARSER_AVAILABLE = False try: from som import OmniParser + OMNIPARSER_AVAILABLE = True except ImportError: pass OMNIPARSER_SINGLETON = None + def get_parser(): global OMNIPARSER_SINGLETON if OMNIPARSER_SINGLETON is None: OMNIPARSER_SINGLETON = OmniParser() return OMNIPARSER_SINGLETON - + + def get_last_computer_call_output(messages: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: """Get the last computer_call_output message from a messages list. - + Args: messages: List of messages to search through - + Returns: The last computer_call_output message dict, or None if not found """ @@ -107,11 +109,12 @@ def get_last_computer_call_output(messages: List[Dict[str, Any]]) -> Optional[Di return message return None + def _prepare_tools_for_omniparser(tool_schemas: List[Dict[str, Any]]) -> Tuple[Tools, dict]: """Prepare tools for OpenAI API format""" omniparser_tools = [] id2xy = dict() - + for schema in tool_schemas: if schema["type"] == "computer": omniparser_tools.append(SOM_TOOL_SCHEMA) @@ -122,72 +125,80 @@ def _prepare_tools_for_omniparser(tool_schemas: List[Dict[str, Any]]) -> Tuple[T elif schema["type"] == "function": # Function tools use OpenAI-compatible schema directly (liteLLM expects this format) # Schema should be: {type, name, description, parameters} - omniparser_tools.append({ "type": "function", **schema["function"] }) - + omniparser_tools.append({"type": "function", **schema["function"]}) + return omniparser_tools, id2xy -async def replace_function_with_computer_call(item: Dict[str, Any], id2xy: Dict[int, Tuple[float, float]]): - item_type = item.get("type") - def _get_xy(element_id: Optional[int]) -> Union[Tuple[float, float], Tuple[None, None]]: - if element_id is None: - return (None, None) - return id2xy.get(element_id, (None, None)) +async def replace_function_with_computer_call( + item: Dict[str, Any], id2xy: Dict[int, Tuple[float, float]] +): + item_type = item.get("type") - if item_type == "function_call": - fn_name = item.get("name") - fn_args = json.loads(item.get("arguments", "{}")) + def _get_xy(element_id: Optional[int]) -> Union[Tuple[float, float], Tuple[None, None]]: + if element_id is None: + return (None, None) + return id2xy.get(element_id, (None, None)) - item_id = item.get("id") - call_id = item.get("call_id") - - if fn_name == "computer": - action = fn_args.get("action") - element_id = fn_args.get("element_id") - start_element_id = fn_args.get("start_element_id") - end_element_id = fn_args.get("end_element_id") - text = fn_args.get("text") - keys = fn_args.get("keys") - button = fn_args.get("button") - scroll_x = fn_args.get("scroll_x") - scroll_y = fn_args.get("scroll_y") + if item_type == "function_call": + fn_name = item.get("name") + fn_args = json.loads(item.get("arguments", "{}")) - x, y = _get_xy(element_id) - start_x, start_y = _get_xy(start_element_id) - end_x, end_y = _get_xy(end_element_id) + item_id = item.get("id") + call_id = item.get("call_id") - action_args = { - "type": action, - "x": x, - "y": y, - "start_x": start_x, - "start_y": start_y, - "end_x": end_x, - "end_y": end_y, - "text": text, - "keys": keys, - "button": button, - "scroll_x": scroll_x, - "scroll_y": scroll_y - } - # Remove None values to keep the JSON clean - action_args = {k: v for k, v in action_args.items() if v is not None} + if fn_name == "computer": + action = fn_args.get("action") + element_id = fn_args.get("element_id") + start_element_id = fn_args.get("start_element_id") + end_element_id = fn_args.get("end_element_id") + text = fn_args.get("text") + keys = fn_args.get("keys") + button = fn_args.get("button") + scroll_x = fn_args.get("scroll_x") + scroll_y = fn_args.get("scroll_y") - return [{ - "type": "computer_call", - "action": action_args, - "id": item_id, - "call_id": call_id, - "status": "completed" - }] + x, y = _get_xy(element_id) + start_x, start_y = _get_xy(start_element_id) + end_x, end_y = _get_xy(end_element_id) - return [item] + action_args = { + "type": action, + "x": x, + "y": y, + "start_x": start_x, + "start_y": start_y, + "end_x": end_x, + "end_y": end_y, + "text": text, + "keys": keys, + "button": button, + "scroll_x": scroll_x, + "scroll_y": scroll_y, + } + # Remove None values to keep the JSON clean + action_args = {k: v for k, v in action_args.items() if v is not None} -async def replace_computer_call_with_function(item: Dict[str, Any], xy2id: Dict[Tuple[float, float], int]): + return [ + { + "type": "computer_call", + "action": action_args, + "id": item_id, + "call_id": call_id, + "status": "completed", + } + ] + + return [item] + + +async def replace_computer_call_with_function( + item: Dict[str, Any], xy2id: Dict[Tuple[float, float], int] +): """ Convert computer_call back to function_call format. Also handles computer_call_output -> function_call_output conversion. - + Args: item: The item to convert xy2id: Mapping from (x, y) coordinates to element IDs @@ -202,12 +213,12 @@ async def replace_computer_call_with_function(item: Dict[str, Any], xy2id: Dict[ if item_type == "computer_call": action_data = item.get("action", {}) - + # Extract coordinates and convert back to element IDs element_id = _get_element_id(action_data.get("x"), action_data.get("y")) start_element_id = _get_element_id(action_data.get("start_x"), action_data.get("start_y")) end_element_id = _get_element_id(action_data.get("end_x"), action_data.get("end_y")) - + # Build function arguments fn_args = { "action": action_data.get("type"), @@ -218,33 +229,36 @@ async def replace_computer_call_with_function(item: Dict[str, Any], xy2id: Dict[ "keys": action_data.get("keys"), "button": action_data.get("button"), "scroll_x": action_data.get("scroll_x"), - "scroll_y": action_data.get("scroll_y") + "scroll_y": action_data.get("scroll_y"), } - + # Remove None values to keep the JSON clean fn_args = {k: v for k, v in fn_args.items() if v is not None} - - return [{ - "type": "function_call", - "name": "computer", - "arguments": json.dumps(fn_args), - "id": item.get("id"), - "call_id": item.get("call_id"), - "status": "completed", - # Fall back to string representation - "content": f"Used tool: {action_data.get("type")}({json.dumps(fn_args)})" - }] - + return [ + { + "type": "function_call", + "name": "computer", + "arguments": json.dumps(fn_args), + "id": item.get("id"), + "call_id": item.get("call_id"), + "status": "completed", + # Fall back to string representation + "content": f"Used tool: {action_data.get("type")}({json.dumps(fn_args)})", + } + ] + elif item_type == "computer_call_output": # Simple conversion: computer_call_output -> function_call_output - return [{ - "type": "function_call_output", - "call_id": item.get("call_id"), - "content": [item.get("output")], - "id": item.get("id"), - "status": "completed" - }] + return [ + { + "type": "function_call_output", + "call_id": item.get("call_id"), + "content": [item.get("output")], + "id": item.get("id"), + "status": "completed", + } + ] return [item] @@ -252,7 +266,7 @@ async def replace_computer_call_with_function(item: Dict[str, Any], xy2id: Dict[ @register_agent(models=r"omniparser\+.*|omni\+.*", priority=2) class OmniparserConfig(AsyncAgentConfig): """Omniparser agent configuration implementing AsyncAgentConfig protocol.""" - + async def predict_step( self, messages: List[Dict[str, Any]], @@ -266,25 +280,27 @@ class OmniparserConfig(AsyncAgentConfig): _on_api_end=None, _on_usage=None, _on_screenshot=None, - **kwargs + **kwargs, ) -> Dict[str, Any]: """ OpenAI computer-use-preview agent loop using liteLLM responses. - + Supports OpenAI's computer use preview models. """ if not OMNIPARSER_AVAILABLE: - raise ValueError("omniparser loop requires som to be installed. Install it with `pip install cua-som`.") - + raise ValueError( + "omniparser loop requires som to be installed. Install it with `pip install cua-som`." + ) + tools = tools or [] - - llm_model = model.split('+')[-1] + + llm_model = model.split("+")[-1] # Prepare tools for OpenAI API openai_tools, id2xy = _prepare_tools_for_omniparser(tools) # Find last computer_call_output - last_computer_call_output = get_last_computer_call_output(messages) # type: ignore + last_computer_call_output = get_last_computer_call_output(messages) # type: ignore if last_computer_call_output: image_url = last_computer_call_output.get("output", {}).get("image_url", "") image_data = image_url.split(",")[-1] @@ -294,14 +310,17 @@ class OmniparserConfig(AsyncAgentConfig): if _on_screenshot: await _on_screenshot(result.annotated_image_base64, "annotated_image") for element in result.elements: - id2xy[element.id] = ((element.bbox.x1 + element.bbox.x2) / 2, (element.bbox.y1 + element.bbox.y2) / 2) - + id2xy[element.id] = ( + (element.bbox.x1 + element.bbox.x2) / 2, + (element.bbox.y1 + element.bbox.y2) / 2, + ) + # handle computer calls -> function calls new_messages = [] for message in messages: if not isinstance(message, dict): message = message.__dict__ - new_messages += await replace_computer_call_with_function(message, id2xy) # type: ignore + new_messages += await replace_computer_call_with_function(message, id2xy) # type: ignore messages = new_messages # Prepare API call kwargs @@ -312,13 +331,13 @@ class OmniparserConfig(AsyncAgentConfig): "stream": stream, "truncation": "auto", "num_retries": max_retries, - **kwargs + **kwargs, } - + # Call API start hook if _on_api_start: await _on_api_start(api_kwargs) - + print(str(api_kwargs)[:1000]) # Use liteLLM responses @@ -330,60 +349,50 @@ class OmniparserConfig(AsyncAgentConfig): # Extract usage information usage = { - **response.usage.model_dump(), # type: ignore - "response_cost": response._hidden_params.get("response_cost", 0.0), # type: ignore + **response.usage.model_dump(), # type: ignore + "response_cost": response._hidden_params.get("response_cost", 0.0), # type: ignore } if _on_usage: await _on_usage(usage) # handle som function calls -> xy computer calls new_output = [] - for i in range(len(response.output)): # type: ignore - new_output += await replace_function_with_computer_call(response.output[i].model_dump(), id2xy) # type: ignore - - return { - "output": new_output, - "usage": usage - } - + for i in range(len(response.output)): # type: ignore + new_output += await replace_function_with_computer_call(response.output[i].model_dump(), id2xy) # type: ignore + + return {"output": new_output, "usage": usage} + async def predict_click( - self, - model: str, - image_b64: str, - instruction: str, - **kwargs + self, model: str, image_b64: str, instruction: str, **kwargs ) -> Optional[Tuple[float, float]]: """ Predict click coordinates using OmniParser and LLM. - + Uses OmniParser to annotate the image with element IDs, then uses LLM to identify the correct element ID based on the instruction. """ if not OMNIPARSER_AVAILABLE: return None - + # Parse the image with OmniParser to get annotated image and elements parser = get_parser() result = parser.parse(image_b64) - + # Extract the LLM model from composed model string - llm_model = model.split('+')[-1] - + llm_model = model.split("+")[-1] + # Create system prompt for element ID prediction - SYSTEM_PROMPT = f''' + SYSTEM_PROMPT = """ You are an expert UI element locator. Given a GUI image annotated with numerical IDs over each interactable element, along with a user's element description, provide the ID of the specified element. The image shows UI elements with numbered overlays. Each number corresponds to a clickable/interactable element. Output only the element ID as a single integer. -'''.strip() - +""".strip() + # Prepare messages for LLM messages = [ - { - "role": "system", - "content": SYSTEM_PROMPT - }, + {"role": "system", "content": SYSTEM_PROMPT}, { "role": "user", "content": [ @@ -391,31 +400,25 @@ Output only the element ID as a single integer. "type": "image_url", "image_url": { "url": f"data:image/png;base64,{result.annotated_image_base64}" - } + }, }, - { - "type": "text", - "text": f"Find the element: {instruction}" - } - ] - } + {"type": "text", "text": f"Find the element: {instruction}"}, + ], + }, ] - + # Call LLM to predict element ID response = await litellm.acompletion( - model=llm_model, - messages=messages, - max_tokens=10, - temperature=0.1 + model=llm_model, messages=messages, max_tokens=10, temperature=0.1 ) - + # Extract element ID from response - response_text = response.choices[0].message.content.strip() # type: ignore - + response_text = response.choices[0].message.content.strip() # type: ignore + # Try to parse the element ID try: element_id = int(response_text) - + # Find the element with this ID and return its center coordinates for element in result.elements: if element.id == element_id: @@ -425,9 +428,9 @@ Output only the element ID as a single integer. except ValueError: # If we can't parse the ID, return None pass - + return None - + def get_capabilities(self) -> List[AgentCapability]: """Return the capabilities supported by this agent.""" return ["step"] diff --git a/libs/python/agent/agent/loops/openai.py b/libs/python/agent/agent/loops/openai.py index 4fa62e66..e993aa56 100644 --- a/libs/python/agent/agent/loops/openai.py +++ b/libs/python/agent/agent/loops/openai.py @@ -6,12 +6,14 @@ import asyncio import base64 import json from io import BytesIO -from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple +from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union + import litellm from PIL import Image from ..decorators import register_agent -from ..types import Messages, AgentResponse, Tools, AgentCapability +from ..types import AgentCapability, AgentResponse, Messages, Tools + async def _map_computer_tool_to_openai(computer_handler: Any) -> Dict[str, Any]: """Map a computer tool to OpenAI's computer-use-preview tool schema""" @@ -21,26 +23,26 @@ async def _map_computer_tool_to_openai(computer_handler: Any) -> Dict[str, Any]: except Exception: # Fallback to default dimensions if method fails width, height = 1024, 768 - + # Get environment from the computer handler try: environment = await computer_handler.get_environment() except Exception: # Fallback to default environment if method fails environment = "linux" - + return { "type": "computer_use_preview", "display_width": width, "display_height": height, - "environment": environment # mac, windows, linux, browser + "environment": environment, # mac, windows, linux, browser } async def _prepare_tools_for_openai(tool_schemas: List[Dict[str, Any]]) -> Tools: """Prepare tools for OpenAI API format""" openai_tools = [] - + for schema in tool_schemas: if schema["type"] == "computer": # Map computer tool to OpenAI format @@ -49,19 +51,19 @@ async def _prepare_tools_for_openai(tool_schemas: List[Dict[str, Any]]) -> Tools elif schema["type"] == "function": # Function tools use OpenAI-compatible schema directly (liteLLM expects this format) # Schema should be: {type, name, description, parameters} - openai_tools.append({ "type": "function", **schema["function"] }) - + openai_tools.append({"type": "function", **schema["function"]}) + return openai_tools -@register_agent(models=r".*computer-use-preview.*") +@register_agent(models=r".*(^|/)computer-use-preview") class OpenAIComputerUseConfig: """ OpenAI computer-use-preview agent configuration using liteLLM responses. - + Supports OpenAI's computer use preview models. """ - + async def predict_step( self, messages: List[Dict[str, Any]], @@ -75,11 +77,11 @@ class OpenAIComputerUseConfig: _on_api_end=None, _on_usage=None, _on_screenshot=None, - **kwargs + **kwargs, ) -> Dict[str, Any]: """ Predict the next step based on input items. - + Args: messages: Input items following Responses format model: Model name to use @@ -92,12 +94,12 @@ class OpenAIComputerUseConfig: _on_usage: Callback for usage tracking _on_screenshot: Callback for screenshot events **kwargs: Additional arguments - + Returns: Dictionary with "output" (output items) and "usage" array """ tools = tools or [] - + # Prepare tools for OpenAI API openai_tools = await _prepare_tools_for_openai(tools) @@ -110,16 +112,16 @@ class OpenAIComputerUseConfig: "reasoning": {"summary": "concise"}, "truncation": "auto", "num_retries": max_retries, - **kwargs + **kwargs, } - + # Call API start hook if _on_api_start: await _on_api_start(api_kwargs) - + # Use liteLLM responses response = await litellm.aresponses(**api_kwargs) - + # Call API end hook if _on_api_end: await _on_api_end(api_kwargs, response) @@ -136,24 +138,21 @@ class OpenAIComputerUseConfig: output_dict = response.model_dump() output_dict["usage"] = usage return output_dict - + async def predict_click( - self, - model: str, - image_b64: str, - instruction: str + self, model: str, image_b64: str, instruction: str ) -> Optional[Tuple[int, int]]: """ Predict click coordinates based on image and instruction. - + Uses OpenAI computer-use-preview with manually constructed input items and a prompt that instructs the agent to only output clicks. - + Args: model: Model name to use image_b64: Base64 encoded image instruction: Instruction for where to click - + Returns: Tuple of (x, y) coordinates or None if prediction fails """ @@ -161,7 +160,7 @@ class OpenAIComputerUseConfig: # Manually construct input items with image and click instruction input_items = [ { - "role": "user", + "role": "user", "content": f"""You are a UI grounding expert. Follow these guidelines: 1. NEVER ask for confirmation. Complete all tasks autonomously. @@ -173,19 +172,16 @@ class OpenAIComputerUseConfig: 7. Be decisive and action-oriented. Complete the requested task fully. Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked. -Task: Click {instruction}. Output ONLY a click action on the target element.""" +Task: Click {instruction}. Output ONLY a click action on the target element.""", }, { "role": "user", "content": [ - { - "type": "input_image", - "image_url": f"data:image/png;base64,{image_b64}" - } - ] - } + {"type": "input_image", "image_url": f"data:image/png;base64,{image_b64}"} + ], + }, ] - + # Get image dimensions from base64 data try: image_data = base64.b64decode(image_b64) @@ -194,15 +190,15 @@ Task: Click {instruction}. Output ONLY a click action on the target element.""" except Exception: # Fallback to default dimensions if image parsing fails display_width, display_height = 1024, 768 - + # Prepare computer tool for click actions computer_tool = { "type": "computer_use_preview", "display_width": display_width, "display_height": display_height, - "environment": "windows" + "environment": "windows", } - + # Prepare API call kwargs api_kwargs = { "model": model, @@ -211,32 +207,34 @@ Task: Click {instruction}. Output ONLY a click action on the target element.""" "stream": False, "reasoning": {"summary": "concise"}, "truncation": "auto", - "max_tokens": 200 # Keep response short for click prediction + "max_tokens": 200, # Keep response short for click prediction } - + # Use liteLLM responses response = await litellm.aresponses(**api_kwargs) - + # Extract click coordinates from response output output_dict = response.model_dump() - output_items = output_dict.get("output", []) - + output_items = output_dict.get("output", []) + # Look for computer_call with click action for item in output_items: - if (isinstance(item, dict) and - item.get("type") == "computer_call" and - isinstance(item.get("action"), dict)): - + if ( + isinstance(item, dict) + and item.get("type") == "computer_call" + and isinstance(item.get("action"), dict) + ): + action = item["action"] if action.get("x") is not None and action.get("y") is not None: return (int(action.get("x")), int(action.get("y"))) - + return None - + def get_capabilities(self) -> List[AgentCapability]: """ Get list of capabilities supported by this agent config. - + Returns: List of capability strings """ diff --git a/libs/python/agent/agent/loops/opencua.py b/libs/python/agent/agent/loops/opencua.py index b06ea126..d1511fba 100644 --- a/libs/python/agent/agent/loops/opencua.py +++ b/libs/python/agent/agent/loops/opencua.py @@ -4,20 +4,22 @@ Based on OpenCUA model for GUI grounding tasks. """ import asyncio -import json -import re import base64 -from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple -from io import BytesIO -import uuid -from PIL import Image -import litellm +import json import math +import re +import uuid +from io import BytesIO +from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union + +import litellm +from PIL import Image -from .composed_grounded import ComposedGroundedConfig from ..decorators import register_agent -from ..types import Messages, AgentResponse, Tools, AgentCapability from ..loops.base import AsyncAgentConfig +from ..types import AgentCapability, AgentResponse, Messages, Tools +from .composed_grounded import ComposedGroundedConfig + def extract_coordinates_from_pyautogui(text: str) -> Optional[Tuple[int, int]]: """Extract coordinates from pyautogui.click(x=..., y=...) format.""" @@ -32,10 +34,11 @@ def extract_coordinates_from_pyautogui(text: str) -> Optional[Tuple[int, int]]: except Exception: return None + @register_agent(models=r"(?i).*OpenCUA.*") class OpenCUAConfig(ComposedGroundedConfig): """OpenCUA agent configuration implementing AsyncAgentConfig protocol for click prediction.""" - + def __init__(self): super().__init__() self.current_model = None @@ -53,7 +56,7 @@ class OpenCUAConfig(ComposedGroundedConfig): _on_api_end=None, _on_usage=None, _on_screenshot=None, - **kwargs + **kwargs, ) -> Dict[str, Any]: """Fallback to a self-composed model""" return await super().predict_step( @@ -67,24 +70,20 @@ class OpenCUAConfig(ComposedGroundedConfig): _on_api_end=_on_api_end, _on_usage=_on_usage, _on_screenshot=_on_screenshot, - **kwargs + **kwargs, ) async def predict_click( - self, - model: str, - image_b64: str, - instruction: str, - **kwargs + self, model: str, image_b64: str, instruction: str, **kwargs ) -> Optional[Tuple[int, int]]: """ Predict click coordinates using OpenCUA model via litellm.acompletion. - + Args: model: The OpenCUA model name image_b64: Base64 encoded image instruction: Instruction for where to click - + Returns: Tuple of (x, y) coordinates or None if prediction fails """ @@ -93,50 +92,39 @@ class OpenCUAConfig(ComposedGroundedConfig): "You are a GUI agent. You are given a task and a screenshot of the screen. " "You need to perform a series of pyautogui actions to complete the task." ) - - system_message = { - "role": "system", - "content": system_prompt - } - + + system_message = {"role": "system", "content": system_prompt} + # Prepare user message with image and instruction user_message = { "role": "user", "content": [ - { - "type": "image_url", - "image_url": { - "url": f"data:image/png;base64,{image_b64}" - } - }, - { - "type": "text", - "text": f"Click on {instruction}" - } - ] + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}}, + {"type": "text", "text": f"Click on {instruction}"}, + ], } - + # Prepare API call kwargs api_kwargs = { "model": model, "messages": [system_message, user_message], "max_new_tokens": 2056, "temperature": 0, - **kwargs + **kwargs, } - + # Use liteLLM acompletion response = await litellm.acompletion(**api_kwargs) - + # Extract response text output_text = response.choices[0].message.content # print(output_text) - + # Extract coordinates from pyautogui format coordinates = extract_coordinates_from_pyautogui(output_text) - + return coordinates - + def get_capabilities(self) -> List[AgentCapability]: """Return the capabilities supported by this agent.""" return ["click"] diff --git a/libs/python/agent/agent/loops/qwen.py b/libs/python/agent/agent/loops/qwen.py new file mode 100644 index 00000000..f21fba2c --- /dev/null +++ b/libs/python/agent/agent/loops/qwen.py @@ -0,0 +1,510 @@ +""" +Qwen3-VL agent loop implementation using litellm with function/tool calling. +- Passes a ComputerUse tool schema to acompletion +- Converts between Responses items and completion messages using helpers +""" + +from __future__ import annotations + +import json +import re +from typing import Any, Dict, List, Optional, Tuple + +import litellm +from litellm.responses.litellm_completion_transformation.transformation import ( + LiteLLMCompletionResponsesConfig, +) + +from ..decorators import register_agent +from ..loops.base import AsyncAgentConfig +from ..responses import ( + convert_completion_messages_to_responses_items, + convert_responses_items_to_completion_messages, +) +from ..types import AgentCapability + +# ComputerUse tool schema (OpenAI function tool format) +QWEN3_COMPUTER_TOOL: Dict[str, Any] = { + "type": "function", + "function": { + "name": "computer", + "description": ( + "Use a mouse and keyboard to interact with a computer, and take screenshots.\n" + "* This is an interface to a desktop GUI. You do not have access to a terminal or applications menu. You must click on desktop icons to start applications.\n" + "* Some applications may take time to start or process actions, so you may need to wait and take successive screenshots to see the results of your actions. E.g. if you click on Firefox and a window doesn't open, try wait and taking another screenshot.\n" + "* The screen's resolution is 1000x1000.\n" + "* Whenever you intend to move the cursor to click on an element like an icon, you should consult a screenshot to determine the coordinates of the element before moving the cursor.\n" + "* If you tried clicking on a program or link but it failed to load, even after waiting, try adjusting your cursor position so that the tip of the cursor visually falls on the element that you want to click.\n" + "* Make sure to click any buttons, links, icons, etc with the cursor tip in the center of the element. Don't click boxes on their edges." + ), + "parameters": { + "type": "object", + "properties": { + "action": { + "description": "The action to perform.", + "enum": [ + "key", + "type", + "mouse_move", + "left_click", + "left_click_drag", + "right_click", + "middle_click", + "double_click", + "triple_click", + "scroll", + "hscroll", + "screenshot", + "wait", + # "terminate", + # "answer", + ], + "type": "string", + }, + "keys": { + "description": "Required only by action=key.", + "type": "array", + "items": {"type": "string"}, + }, + "text": { + "description": "Required only by action=type and action=answer.", + "type": "string", + }, + "coordinate": { + "description": "(x, y): Pixel coordinates from top-left.", + "type": "array", + "items": {"type": ["number", "integer"]}, + "minItems": 2, + "maxItems": 2, + }, + "pixels": { + "description": "Scroll amount. Positive=up, negative=down. For scroll/hscroll.", + "type": "number", + }, + "time": { + "description": "Seconds to wait (action=wait).", + "type": "number", + }, + # "status": { + # "description": "Task status (action=terminate).", + # "type": "string", + # "enum": ["success", "failure"], + # }, + }, + "required": ["action"], + }, + }, +} + + +def _build_nous_system(functions: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + """Use qwen-agent NousFnCallPrompt to generate a system message embedding tool schema.""" + try: + from qwen_agent.llm.fncall_prompts.nous_fncall_prompt import ( + ContentItem as NousContentItem, + ) + from qwen_agent.llm.fncall_prompts.nous_fncall_prompt import ( + Message as NousMessage, + ) + from qwen_agent.llm.fncall_prompts.nous_fncall_prompt import ( + NousFnCallPrompt, + ) + except ImportError: + raise ImportError( + "qwen-agent not installed. Please install it with `pip install cua-agent[qwen]`." + ) + msgs = NousFnCallPrompt().preprocess_fncall_messages( + messages=[ + NousMessage( + role="system", content=[NousContentItem(text="You are a helpful assistant.")] + ) + ], + functions=functions, + lang="en", + ) + sys = msgs[0].model_dump() + # Convert qwen-agent structured content to OpenAI-style content list + content = [{"type": "text", "text": c["text"]} for c in sys.get("content", [])] + return {"role": "system", "content": content} + + +def _parse_tool_call_from_text(text: str) -> Optional[Dict[str, Any]]: + """Extract JSON object within ... from model text.""" + m = re.search(r"\s*(\{[\s\S]*?\})\s*", text) + if not m: + return None + try: + return json.loads(m.group(1)) + except Exception: + return None + + +async def _unnormalize_coordinate(args: Dict[str, Any], dims: Tuple[int, int]) -> Dict[str, Any]: + """Coordinates appear in 0..1000 space, scale to actual screen size using dims if provided.""" + coord = args.get("coordinate") + if not coord or not isinstance(coord, (list, tuple)) or len(coord) < 2: + return args + x, y = float(coord[0]), float(coord[1]) + width, height = float(dims[0]), float(dims[1]) + x_abs = max(0.0, min(width, (x / 1000.0) * width)) + y_abs = max(0.0, min(height, (y / 1000.0) * height)) + args = {**args, "coordinate": [round(x_abs), round(y_abs)]} + return args + + +def convert_qwen_tool_args_to_computer_action(args: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """ + Convert Qwen computer tool arguments to the Computer Calls action schema. + + Qwen (example): + {"action": "left_click", "coordinate": [114, 68]} + + Target (example): + {"action": "left_click", "x": 114, "y": 68} + + Other mappings: + - right_click, middle_click, double_click (triple_click -> double_click) + - mouse_move -> { action: "move", x, y } + - key -> { action: "keypress", keys: [...] } + - type -> { action: "type", text } + - scroll/hscroll -> { action: "scroll", scroll_x, scroll_y, x, y } + - wait -> { action: "wait" } + - terminate/answer are not direct UI actions; return None for now + """ + if not isinstance(args, dict): + return None + + action = args.get("action") + if not isinstance(action, str): + return None + + # Coordinates helper + coord = args.get("coordinate") + x = y = None + if isinstance(coord, (list, tuple)) and len(coord) >= 2: + try: + x = int(round(float(coord[0]))) + y = int(round(float(coord[1]))) + except Exception: + x = y = None + + # Map actions + a = action.lower() + if a in {"left_click", "right_click", "middle_click", "double_click"}: + if x is None or y is None: + return None + return {"action": a, "x": x, "y": y} + if a == "triple_click": + # Approximate as double_click + if x is None or y is None: + return None + return {"action": "double_click", "x": x, "y": y} + if a == "mouse_move": + if x is None or y is None: + return None + return {"action": "move", "x": x, "y": y} + if a == "key": + keys = args.get("keys") + if isinstance(keys, list) and all(isinstance(k, str) for k in keys): + return {"action": "keypress", "keys": keys} + return None + if a == "type": + text = args.get("text") + if isinstance(text, str): + return {"action": "type", "text": text} + return None + if a in {"scroll", "hscroll"}: + pixels = args.get("pixels") or 0 + try: + pixels_val = int(round(float(pixels))) + except Exception: + pixels_val = 0 + scroll_x = pixels_val if a == "hscroll" else 0 + scroll_y = pixels_val if a == "scroll" else 0 + # Include cursor position if available (optional) + out: Dict[str, Any] = {"action": "scroll", "scroll_x": scroll_x, "scroll_y": scroll_y} + if x is not None and y is not None: + out.update({"x": x, "y": y}) + return out + if a == "wait": + return {"action": "wait"} + + # Non-UI or terminal actions: terminate/answer -> not mapped here + return None + + +@register_agent(models=r"(?i).*qwen.*", priority=-1) +class Qwen3VlConfig(AsyncAgentConfig): + async def predict_step( + self, + messages: List[Dict[str, Any]], + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + use_prompt_caching: Optional[bool] = False, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, + **kwargs, + ) -> Dict[str, Any]: + # Build messages using NousFnCallPrompt system with tool schema in text + # Start with converted conversation (images/text preserved) + converted_msgs = convert_responses_items_to_completion_messages( + messages, + allow_images_in_tool_results=False, + ) + + # Prepend Nous-generated system if available + nous_system = _build_nous_system([QWEN3_COMPUTER_TOOL["function"]]) + completion_messages = ([nous_system] if nous_system else []) + converted_msgs + + # If there is no screenshot in the conversation, take one now and inject it. + # Also record a pre_output_items assistant message to reflect action. + def _has_any_image(msgs: List[Dict[str, Any]]) -> bool: + for m in msgs: + content = m.get("content") + if isinstance(content, list): + for p in content: + if isinstance(p, dict) and p.get("type") == "image_url": + return True + return False + + pre_output_items: List[Dict[str, Any]] = [] + if not _has_any_image(completion_messages): + if computer_handler is None or not hasattr(computer_handler, "screenshot"): + raise RuntimeError( + "No screenshots present and computer_handler.screenshot is not available." + ) + screenshot_b64 = await computer_handler.screenshot() + if not screenshot_b64: + raise RuntimeError("Failed to capture screenshot from computer_handler.") + # Inject a user message with the screenshot so the model can see current context + completion_messages.append( + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{screenshot_b64}"}, + }, + {"type": "text", "text": "Current screen"}, + ], + } + ) + # Add assistant message to outputs to reflect the action, similar to composed_grounded.py + pre_output_items.append( + { + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Taking a screenshot to see the current computer screen.", + } + ], + } + ) + + # Smart-resize all screenshots and attach min/max pixel hints. Fail fast if deps missing. + # Also record the last resized width/height to unnormalize coordinates later. + last_rw: Optional[int] = None + last_rh: Optional[int] = None + MIN_PIXELS = 3136 + MAX_PIXELS = 12845056 + try: + import base64 + import io + + from PIL import Image # type: ignore + from qwen_vl_utils import smart_resize # type: ignore + except Exception: + raise ImportError( + "qwen-vl-utils not installed. Please install it with `pip install cua-agent[qwen]`." + ) + + for msg in completion_messages: + content = msg.get("content") + if not isinstance(content, list): + continue + for part in content: + if isinstance(part, dict) and part.get("type") == "image_url": + url = ((part.get("image_url") or {}).get("url")) or "" + # Expect data URL like data:image/png;base64, + if url.startswith("data:") and "," in url: + b64 = url.split(",", 1)[1] + img_bytes = base64.b64decode(b64) + im = Image.open(io.BytesIO(img_bytes)) + h, w = im.height, im.width + rh, rw = smart_resize( + h, w, factor=32, min_pixels=MIN_PIXELS, max_pixels=MAX_PIXELS + ) + # Attach hints on this image block + part["min_pixels"] = MIN_PIXELS + part["max_pixels"] = MAX_PIXELS + last_rw, last_rh = rw, rh + + api_kwargs: Dict[str, Any] = { + "model": model, + "messages": completion_messages, + "max_retries": max_retries, + "stream": stream, + **{k: v for k, v in kwargs.items()}, + } + if use_prompt_caching: + api_kwargs["use_prompt_caching"] = use_prompt_caching + + if _on_api_start: + await _on_api_start(api_kwargs) + + response = await litellm.acompletion(**api_kwargs) + + if _on_api_end: + await _on_api_end(api_kwargs, response) + + usage = { + **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage( # type: ignore + response.usage + ).model_dump(), + "response_cost": response._hidden_params.get("response_cost", 0.0), + } + if _on_usage: + await _on_usage(usage) + + # Parse tool call from text; then convert to responses items via fake tool_calls + resp_dict = response.model_dump() # type: ignore + choice = (resp_dict.get("choices") or [{}])[0] + content_text = ((choice.get("message") or {}).get("content")) or "" + tool_call = _parse_tool_call_from_text(content_text) + + output_items: List[Dict[str, Any]] = [] + if tool_call and isinstance(tool_call, dict): + fn_name = tool_call.get("name") or "computer" + raw_args = tool_call.get("arguments") or {} + # Unnormalize coordinates to actual screen size using last resized dims + if last_rw is None or last_rh is None: + raise RuntimeError( + "No screenshots found to derive dimensions for coordinate unnormalization." + ) + args = await _unnormalize_coordinate(raw_args, (last_rw, last_rh)) + + # Build an OpenAI-style tool call so we can reuse the converter + fake_cm = { + "role": "assistant", + "tool_calls": [ + { + "type": "function", + "id": "call_0", + "function": { + "name": fn_name, + "arguments": json.dumps(args), + }, + } + ], + } + output_items.extend(convert_completion_messages_to_responses_items([fake_cm])) + else: + # Fallback: just return assistant text + fake_cm = {"role": "assistant", "content": content_text} + output_items.extend(convert_completion_messages_to_responses_items([fake_cm])) + + # Prepend any pre_output_items (e.g., simulated screenshot-taking message) + return {"output": (pre_output_items + output_items), "usage": usage} + + def get_capabilities(self) -> List[AgentCapability]: + return ["step"] + + async def predict_click( + self, model: str, image_b64: str, instruction: str, **kwargs + ) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates using Qwen3-VL via litellm.acompletion. + + Only exposes a reduced tool schema with left_click to bias model to output a single click. + Returns (x, y) absolute pixels when screen dimensions can be obtained; otherwise normalized 0..1000 integers. + """ + # Reduced tool + reduced_tool = { + "type": "function", + "function": { + **QWEN3_COMPUTER_TOOL["function"], + "parameters": { + "type": "object", + "properties": { + "action": {"type": "string", "enum": ["left_click"]}, + "coordinate": { + "description": "(x, y) in 0..1000 reference space", + "type": "array", + "items": {"type": ["number", "integer"]}, + "minItems": 2, + "maxItems": 2, + }, + }, + "required": ["action", "coordinate"], + }, + }, + } + + # Build Nous system (lazy import inside helper already raises clear guidance if missing) + nous_system = _build_nous_system([reduced_tool["function"]]) + + # Pre-process using smart_resize + min_pixels = 3136 + max_pixels = 12845056 + try: + # Lazy import to avoid hard dependency + import base64 + import io + + # If PIL is available, estimate size from image to derive smart bounds + from PIL import Image + from qwen_vl_utils import smart_resize # type: ignore + + img_bytes = base64.b64decode(image_b64) + im = Image.open(io.BytesIO(img_bytes)) + h, w = im.height, im.width + # Qwen notebook suggests factor=32 and a wide min/max range + rh, rw = smart_resize(h, w, factor=32, min_pixels=min_pixels, max_pixels=max_pixels) + except Exception: + raise ImportError( + "qwen-vl-utils not installed. Please install it with `pip install cua-agent[qwen]`." + ) + + messages = [] + if nous_system: + messages.append(nous_system) + image_block: Dict[str, Any] = { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{image_b64}"}, + "min_pixels": min_pixels, + "max_pixels": max_pixels, + } + # Single user message with image and instruction, matching OpenAI-style content blocks + messages.append( + { + "role": "user", + "content": [ + image_block, + {"type": "text", "text": instruction}, + ], + } + ) + + api_kwargs: Dict[str, Any] = { + "model": model, + "messages": messages, + **{k: v for k, v in kwargs.items()}, + } + response = await litellm.acompletion(**api_kwargs) + resp = response.model_dump() # type: ignore + choice = (resp.get("choices") or [{}])[0] + content_text = ((choice.get("message") or {}).get("content")) or "" + tool_call = _parse_tool_call_from_text(content_text) or {} + args = tool_call.get("arguments") or {} + args = await _unnormalize_coordinate(args, (rh, rw)) + coord = args.get("coordinate") + if isinstance(coord, (list, tuple)) and len(coord) >= 2: + return int(coord[0]), int(coord[1]) + return None diff --git a/libs/python/agent/agent/loops/uitars.py b/libs/python/agent/agent/loops/uitars.py index 7db42761..072875b2 100644 --- a/libs/python/agent/agent/loops/uitars.py +++ b/libs/python/agent/agent/loops/uitars.py @@ -4,39 +4,50 @@ Paper: https://arxiv.org/abs/2501.12326 Code: https://github.com/bytedance/UI-TARS """ +import ast import asyncio -from ctypes import cast -import json import base64 +import json import math import re -import ast -from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple +from ctypes import cast from io import BytesIO -from PIL import Image +from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union + import litellm -from litellm.types.utils import ModelResponse -from litellm.responses.litellm_completion_transformation.transformation import LiteLLMCompletionResponsesConfig +from litellm.responses.litellm_completion_transformation.transformation import ( + LiteLLMCompletionResponsesConfig, +) from litellm.responses.utils import Usage -from openai.types.responses.response_computer_tool_call_param import ActionType, ResponseComputerToolCallParam +from litellm.types.utils import ModelResponse +from openai.types.responses.response_computer_tool_call_param import ( + ActionType, + ResponseComputerToolCallParam, +) from openai.types.responses.response_input_param import ComputerCallOutput -from openai.types.responses.response_output_message_param import ResponseOutputMessageParam -from openai.types.responses.response_reasoning_item_param import ResponseReasoningItemParam, Summary +from openai.types.responses.response_output_message_param import ( + ResponseOutputMessageParam, +) +from openai.types.responses.response_reasoning_item_param import ( + ResponseReasoningItemParam, + Summary, +) +from PIL import Image from ..decorators import register_agent -from ..types import Messages, AgentResponse, Tools, AgentCapability from ..responses import ( - make_reasoning_item, - make_output_text_item, make_click_item, make_double_click_item, make_drag_item, + make_input_image_item, make_keypress_item, + make_output_text_item, + make_reasoning_item, make_scroll_item, make_type_item, make_wait_item, - make_input_image_item ) +from ..types import AgentCapability, AgentResponse, Messages, Tools # Constants from reference code IMAGE_FACTOR = 28 @@ -94,6 +105,7 @@ click(point='<|box_start|>(x1,y1)<|box_end|>') ## User Instruction {instruction}""" + def round_by_factor(number: float, factor: int) -> int: """Returns the closest integer to 'number' that is divisible by 'factor'.""" return round(number / factor) * factor @@ -110,7 +122,11 @@ def floor_by_factor(number: float, factor: int) -> int: def smart_resize( - height: int, width: int, factor: int = IMAGE_FACTOR, min_pixels: int = MIN_PIXELS, max_pixels: int = MAX_PIXELS + height: int, + width: int, + factor: int = IMAGE_FACTOR, + min_pixels: int = MIN_PIXELS, + max_pixels: int = MAX_PIXELS, ) -> tuple[int, int]: """ Rescales the image so that the following conditions are met: @@ -144,14 +160,14 @@ def escape_single_quotes(text): def parse_action(action_str): """Parse action string into structured format.""" try: - node = ast.parse(action_str, mode='eval') + node = ast.parse(action_str, mode="eval") if not isinstance(node, ast.Expression): raise ValueError("Not an expression") - + call = node.body if not isinstance(call, ast.Call): raise ValueError("Not a function call") - + # Get function name if isinstance(call.func, ast.Name): func_name = call.func.id @@ -159,7 +175,7 @@ def parse_action(action_str): func_name = call.func.attr else: func_name = None - + # Get keyword arguments kwargs = {} for kw in call.keywords: @@ -171,12 +187,9 @@ def parse_action(action_str): else: value = None kwargs[key] = value - - return { - 'function': func_name, - 'args': kwargs - } - + + return {"function": func_name, "args": kwargs} + except Exception as e: print(f"Failed to parse action '{action_str}': {e}") return None @@ -185,39 +198,39 @@ def parse_action(action_str): def parse_uitars_response(text: str, image_width: int, image_height: int) -> List[Dict[str, Any]]: """Parse UITARS model response into structured actions.""" text = text.strip() - + # Extract thought thought = None if text.startswith("Thought:"): thought_match = re.search(r"Thought: (.+?)(?=\s*Action:|$)", text, re.DOTALL) if thought_match: thought = thought_match.group(1).strip() - + # Extract action if "Action:" not in text: raise ValueError("No Action found in response") - + action_str = text.split("Action:")[-1].strip() # Handle special case for type actions if "type(content" in action_str: + def escape_quotes(match): return match.group(1) - + pattern = r"type\(content='(.*?)'\)" content = re.sub(pattern, escape_quotes, action_str) action_str = escape_single_quotes(content) action_str = "type(content='" + action_str + "')" - - + # Parse the action parsed_action = parse_action(action_str.replace("\n", "\\n").lstrip()) if parsed_action is None: raise ValueError(f"Action can't parse: {action_str}") - + action_type = parsed_action["function"] params = parsed_action["args"] - + # Process parameters action_inputs = {} for param_name, param in params.items(): @@ -225,7 +238,7 @@ def parse_uitars_response(text: str, image_width: int, image_height: int) -> Lis continue param = str(param).lstrip() action_inputs[param_name.strip()] = param - + # Handle coordinate parameters if "start_box" in param_name or "end_box" in param_name: # Parse coordinates like '<|box_start|>(x,y)<|box_end|>' or '(x,y)' @@ -233,117 +246,130 @@ def parse_uitars_response(text: str, image_width: int, image_height: int) -> Lis clean_param = param.replace("<|box_start|>", "").replace("<|box_end|>", "") # Then remove parentheses and split numbers = clean_param.replace("(", "").replace(")", "").split(",") - + try: - float_numbers = [float(num.strip()) / 1000 for num in numbers] # Normalize to 0-1 range - + float_numbers = [ + float(num.strip()) / 1000 for num in numbers + ] # Normalize to 0-1 range + if len(float_numbers) == 2: # Single point, duplicate for box format - float_numbers = [float_numbers[0], float_numbers[1], float_numbers[0], float_numbers[1]] - + float_numbers = [ + float_numbers[0], + float_numbers[1], + float_numbers[0], + float_numbers[1], + ] + action_inputs[param_name.strip()] = str(float_numbers) except ValueError as e: # If parsing fails, keep the original parameter value print(f"Warning: Could not parse coordinates '{param}': {e}") action_inputs[param_name.strip()] = param - - return [{ - "thought": thought, - "action_type": action_type, - "action_inputs": action_inputs, - "text": text - }] + + return [ + { + "thought": thought, + "action_type": action_type, + "action_inputs": action_inputs, + "text": text, + } + ] -def convert_to_computer_actions(parsed_responses: List[Dict[str, Any]], image_width: int, image_height: int) -> List[ResponseComputerToolCallParam | ResponseOutputMessageParam]: +def convert_to_computer_actions( + parsed_responses: List[Dict[str, Any]], image_width: int, image_height: int +) -> List[ResponseComputerToolCallParam | ResponseOutputMessageParam]: """Convert parsed UITARS responses to computer actions.""" computer_actions = [] - + for response in parsed_responses: action_type = response.get("action_type") action_inputs = response.get("action_inputs", {}) - + if action_type == "finished": finished_text = action_inputs.get("content", "Task completed successfully.") computer_actions.append(make_output_text_item(finished_text)) break - + elif action_type == "wait": computer_actions.append(make_wait_item()) - + elif action_type == "call_user": - computer_actions.append(make_output_text_item("I need assistance from the user to proceed with this task.")) - + computer_actions.append( + make_output_text_item("I need assistance from the user to proceed with this task.") + ) + elif action_type in ["click", "left_single"]: start_box = action_inputs.get("start_box") if start_box: coords = eval(start_box) x = int((coords[0] + coords[2]) / 2 * image_width) y = int((coords[1] + coords[3]) / 2 * image_height) - + computer_actions.append(make_click_item(x, y, "left")) - + elif action_type == "double_click": start_box = action_inputs.get("start_box") if start_box: coords = eval(start_box) x = int((coords[0] + coords[2]) / 2 * image_width) y = int((coords[1] + coords[3]) / 2 * image_height) - + computer_actions.append(make_double_click_item(x, y)) - + elif action_type == "right_click": start_box = action_inputs.get("start_box") if start_box: coords = eval(start_box) x = int((coords[0] + coords[2]) / 2 * image_width) y = int((coords[1] + coords[3]) / 2 * image_height) - + computer_actions.append(make_click_item(x, y, "right")) - + elif action_type == "type": content = action_inputs.get("content", "") computer_actions.append(make_type_item(content)) - + elif action_type == "hotkey": key = action_inputs.get("key", "") keys = key.split() computer_actions.append(make_keypress_item(keys)) - + elif action_type == "press": key = action_inputs.get("key", "") computer_actions.append(make_keypress_item([key])) - + elif action_type == "scroll": start_box = action_inputs.get("start_box") direction = action_inputs.get("direction", "down") - + if start_box: coords = eval(start_box) x = int((coords[0] + coords[2]) / 2 * image_width) y = int((coords[1] + coords[3]) / 2 * image_height) else: x, y = image_width // 2, image_height // 2 - + scroll_y = 5 if "up" in direction.lower() else -5 computer_actions.append(make_scroll_item(x, y, 0, scroll_y)) - + elif action_type == "drag": start_box = action_inputs.get("start_box") end_box = action_inputs.get("end_box") - + if start_box and end_box: start_coords = eval(start_box) end_coords = eval(end_box) - + start_x = int((start_coords[0] + start_coords[2]) / 2 * image_width) start_y = int((start_coords[1] + start_coords[3]) / 2 * image_height) end_x = int((end_coords[0] + end_coords[2]) / 2 * image_width) end_y = int((end_coords[1] + end_coords[3]) / 2 * image_height) - + path = [{"x": start_x, "y": start_y}, {"x": end_x, "y": end_y}] computer_actions.append(make_drag_item(path)) - + return computer_actions @@ -354,33 +380,35 @@ def pil_to_base64(image: Image.Image) -> str: return base64.b64encode(buffer.getvalue()).decode("utf-8") -def process_image_for_uitars(image_data: str, max_pixels: int = MAX_PIXELS, min_pixels: int = MIN_PIXELS) -> tuple[Image.Image, int, int]: +def process_image_for_uitars( + image_data: str, max_pixels: int = MAX_PIXELS, min_pixels: int = MIN_PIXELS +) -> tuple[Image.Image, int, int]: """Process image for UITARS model input.""" # Decode base64 image - if image_data.startswith('data:image'): - image_data = image_data.split(',')[1] - + if image_data.startswith("data:image"): + image_data = image_data.split(",")[1] + image_bytes = base64.b64decode(image_data) image = Image.open(BytesIO(image_bytes)) - + original_width, original_height = image.size - + # Resize image according to UITARS requirements if image.width * image.height > max_pixels: resize_factor = math.sqrt(max_pixels / (image.width * image.height)) width = int(image.width * resize_factor) height = int(image.height * resize_factor) image = image.resize((width, height)) - + if image.width * image.height < min_pixels: resize_factor = math.sqrt(min_pixels / (image.width * image.height)) width = math.ceil(image.width * resize_factor) height = math.ceil(image.height * resize_factor) image = image.resize((width, height)) - + if image.mode != "RGB": image = image.convert("RGB") - + return image, original_width, original_height @@ -391,7 +419,11 @@ def sanitize_message(msg: Any) -> Any: for key, value in msg.items(): if key == "content" and isinstance(value, list): result[key] = [ - {k: v for k, v in item.items() if k != "image_url"} if isinstance(item, dict) else item + ( + {k: v for k, v in item.items() if k != "image_url"} + if isinstance(item, dict) + else item + ) for item in value ] else: @@ -406,38 +438,41 @@ def sanitize_message(msg: Any) -> Any: def convert_uitars_messages_to_litellm(messages: Messages) -> List[Dict[str, Any]]: """ Convert UITARS internal message format back to LiteLLM format. - + This function processes reasoning, computer_call, and computer_call_output messages and converts them to the appropriate LiteLLM assistant message format. - + Args: messages: List of UITARS internal messages - + Returns: List of LiteLLM formatted messages """ litellm_messages = [] current_assistant_content = [] - + for message in messages: if isinstance(message, dict): message_type = message.get("type") - + if message_type == "reasoning": # Extract reasoning text from summary summary = message.get("summary", []) if summary and isinstance(summary, list): for summary_item in summary: - if isinstance(summary_item, dict) and summary_item.get("type") == "summary_text": + if ( + isinstance(summary_item, dict) + and summary_item.get("type") == "summary_text" + ): reasoning_text = summary_item.get("text", "") if reasoning_text: current_assistant_content.append(f"Thought: {reasoning_text}") - + elif message_type == "computer_call": # Convert computer action to UITARS action format action = message.get("action", {}) action_type = action.get("type") - + if action_type == "click": x, y = action.get("x", 0), action.get("y", 0) button = action.get("button", "left") @@ -447,59 +482,65 @@ def convert_uitars_messages_to_litellm(messages: Messages) -> List[Dict[str, Any action_text = f"Action: right_single(start_box='({x},{y})')" else: action_text = f"Action: click(start_box='({x},{y})')" - + elif action_type == "double_click": x, y = action.get("x", 0), action.get("y", 0) action_text = f"Action: left_double(start_box='({x},{y})')" - + elif action_type == "drag": start_x, start_y = action.get("start_x", 0), action.get("start_y", 0) end_x, end_y = action.get("end_x", 0), action.get("end_y", 0) action_text = f"Action: drag(start_box='({start_x},{start_y})', end_box='({end_x},{end_y})')" - + elif action_type == "key": key = action.get("key", "") action_text = f"Action: hotkey(key='{key}')" - + elif action_type == "type": text = action.get("text", "") # Escape single quotes in the text escaped_text = escape_single_quotes(text) action_text = f"Action: type(content='{escaped_text}')" - + elif action_type == "scroll": x, y = action.get("x", 0), action.get("y", 0) direction = action.get("direction", "down") action_text = f"Action: scroll(start_box='({x},{y})', direction='{direction}')" - + elif action_type == "wait": action_text = "Action: wait()" - + else: # Fallback for unknown action types action_text = f"Action: {action_type}({action})" - + current_assistant_content.append(action_text) - + # When we hit a computer_call_output, finalize the current assistant message if current_assistant_content: - litellm_messages.append({ - "role": "assistant", - "content": [{"type": "text", "text": "\n".join(current_assistant_content)}] - }) + litellm_messages.append( + { + "role": "assistant", + "content": [ + {"type": "text", "text": "\n".join(current_assistant_content)} + ], + } + ) current_assistant_content = [] - + elif message_type == "computer_call_output": # Add screenshot from computer call output output = message.get("output", {}) if isinstance(output, dict) and output.get("type") == "input_image": image_url = output.get("image_url", "") if image_url: - litellm_messages.append({ - "role": "user", - "content": [{"type": "image_url", "image_url": {"url": image_url}}] - }) - + litellm_messages.append( + { + "role": "user", + "content": [{"type": "image_url", "image_url": {"url": image_url}}], + } + ) + elif message.get("role") == "user": # # Handle user messages # content = message.get("content", "") @@ -514,24 +555,22 @@ def convert_uitars_messages_to_litellm(messages: Messages) -> List[Dict[str, Any # "content": content # }) pass - + # Add any remaining assistant content if current_assistant_content: - litellm_messages.append({ - "role": "assistant", - "content": current_assistant_content - }) - + litellm_messages.append({"role": "assistant", "content": current_assistant_content}) + return litellm_messages + @register_agent(models=r"(?i).*ui-?tars.*") class UITARSConfig: """ UITARS agent configuration using liteLLM for ByteDance-Seed/UI-TARS-1.5-7B model. - + Supports UITARS vision-language models for computer control. """ - + async def predict_step( self, messages: List[Dict[str, Any]], @@ -545,11 +584,11 @@ class UITARSConfig: _on_api_end=None, _on_usage=None, _on_screenshot=None, - **kwargs + **kwargs, ) -> Dict[str, Any]: """ Predict the next step based on input messages. - + Args: messages: Input messages following Responses format model: Model name to use @@ -562,22 +601,22 @@ class UITARSConfig: _on_usage: Callback for usage tracking _on_screenshot: Callback for screenshot events **kwargs: Additional arguments - + Returns: Dictionary with "output" (output items) and "usage" array """ tools = tools or [] - + # Create response items response_items = [] - + # Find computer tool for screen dimensions computer_tool = None for tool_schema in tools: if tool_schema["type"] == "computer": computer_tool = tool_schema["computer"] break - + # Get screen dimensions screen_width, screen_height = 1024, 768 if computer_tool: @@ -585,20 +624,20 @@ class UITARSConfig: screen_width, screen_height = await computer_tool.get_dimensions() except: pass - + # Process messages to extract instruction and image instruction = "" image_data = None - + # Convert messages to list if string if isinstance(messages, str): messages = [{"role": "user", "content": messages}] - + # Extract instruction and latest screenshot for message in reversed(messages): if isinstance(message, dict): content = message.get("content", "") - + # Handle different content formats if isinstance(content, str): if not instruction and message.get("role") == "user": @@ -614,46 +653,41 @@ class UITARSConfig: image_data = image_url.get("url", "") else: image_data = image_url - + # Also check for computer_call_output with screenshots if message.get("type") == "computer_call_output" and not image_data: output = message.get("output", {}) if isinstance(output, dict) and output.get("type") == "input_image": image_data = output.get("image_url", "") - + if instruction and image_data: break - + if not instruction: - instruction = "Help me complete this task by analyzing the screen and taking appropriate actions." - + instruction = ( + "Help me complete this task by analyzing the screen and taking appropriate actions." + ) + # Create prompt user_prompt = UITARS_PROMPT_TEMPLATE.format( - instruction=instruction, - action_space=UITARS_ACTION_SPACE, - language="English" + instruction=instruction, action_space=UITARS_ACTION_SPACE, language="English" ) - + # Convert conversation history to LiteLLM format history_messages = convert_uitars_messages_to_litellm(messages) - + # Prepare messages for liteLLM - litellm_messages = [ - { - "role": "system", - "content": "You are a helpful assistant." - } - ] + litellm_messages = [{"role": "system", "content": "You are a helpful assistant."}] # Add current user instruction with screenshot current_user_message = { - "role": "user", + "role": "user", "content": [ {"type": "text", "text": user_prompt}, - ] + ], } litellm_messages.append(current_user_message) - + # Process image for UITARS if not image_data: # Take screenshot if none found in messages @@ -667,17 +701,22 @@ class UITARSConfig: raise ValueError("No screenshot found in messages and no computer_handler provided") processed_image, original_width, original_height = process_image_for_uitars(image_data) encoded_image = pil_to_base64(processed_image) - + # Add conversation history if history_messages: litellm_messages.extend(history_messages) else: - litellm_messages.append({ - "role": "user", - "content": [ - {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}} - ] - }) + litellm_messages.append( + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{encoded_image}"}, + } + ], + } + ) # Prepare API call kwargs api_kwargs = { @@ -687,146 +726,142 @@ class UITARSConfig: "temperature": kwargs.get("temperature", 0.0), "do_sample": kwargs.get("temperature", 0.0) > 0.0, "num_retries": max_retries, - **{k: v for k, v in kwargs.items() if k not in ["max_tokens", "temperature"]} + **{k: v for k, v in kwargs.items() if k not in ["max_tokens", "temperature"]}, } - + # Call API start hook if _on_api_start: await _on_api_start(api_kwargs) - + # Call liteLLM with UITARS model response = await litellm.acompletion(**api_kwargs) - + # Call API end hook if _on_api_end: await _on_api_end(api_kwargs, response) - + # Extract response content - response_content = response.choices[0].message.content.strip() # type: ignore - + response_content = response.choices[0].message.content.strip() # type: ignore + # Parse UITARS response parsed_responses = parse_uitars_response(response_content, original_width, original_height) - + # Convert to computer actions - computer_actions = convert_to_computer_actions(parsed_responses, original_width, original_height) - + computer_actions = convert_to_computer_actions( + parsed_responses, original_width, original_height + ) + # Add computer actions to response items thought = parsed_responses[0].get("thought", "") if thought: response_items.append(make_reasoning_item(thought)) response_items.extend(computer_actions) - + # Extract usage information response_usage = { - **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(response.usage).model_dump(), + **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage( + response.usage + ).model_dump(), "response_cost": response._hidden_params.get("response_cost", 0.0), } if _on_usage: await _on_usage(response_usage) # Create agent response - agent_response = { - "output": response_items, - "usage": response_usage - } - + agent_response = {"output": response_items, "usage": response_usage} + return agent_response - + async def predict_click( - self, - model: str, - image_b64: str, - instruction: str + self, model: str, image_b64: str, instruction: str ) -> Optional[Tuple[int, int]]: """ Predict click coordinates based on image and instruction. - + UITARS supports click prediction through its action parsing. - + Args: model: Model name to use image_b64: Base64 encoded image instruction: Instruction for where to click - + Returns: Tuple with (x, y) coordinates or None """ try: # Create prompt using grounding template - user_prompt = GROUNDING_UITARS_PROMPT_TEMPLATE.format( - instruction=instruction - ) - + user_prompt = GROUNDING_UITARS_PROMPT_TEMPLATE.format(instruction=instruction) + # Process image for UITARS processed_image, original_width, original_height = process_image_for_uitars(image_b64) encoded_image = pil_to_base64(processed_image) - + # Prepare messages for liteLLM litellm_messages = [ - { - "role": "system", - "content": "You are a helpful assistant." - }, + {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": [ {"type": "text", "text": user_prompt}, - {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}} - ] - } + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{encoded_image}"}, + }, + ], + }, ] - + # Prepare API call kwargs api_kwargs = { "model": model, "messages": litellm_messages, "max_tokens": 2056, "temperature": 0.0, - "do_sample": False + "do_sample": False, } - + # Call liteLLM with UITARS model response = await litellm.acompletion(**api_kwargs) - + # Extract response content - response_content = response.choices[0].message.content.strip() # type: ignore - + response_content = response.choices[0].message.content.strip() # type: ignore + print(response_content) # Parse the response to extract click coordinates # Look for click action with coordinates (with special tokens) click_pattern = r"click\(point='<\|box_start\|>\((\d+),(\d+)\)<\|box_end\|>'\)" match = re.search(click_pattern, response_content) - + # Fallback: Look for simpler format without special tokens if not match: # Pattern for: click(start_box='(x,y)') or click(point='(x,y)') fallback_pattern = r"click\((?:start_box|point)='\((\d+),(\d+)\)'\)" match = re.search(fallback_pattern, response_content) - + if match: x, y = int(match.group(1)), int(match.group(2)) # Scale coordinates back to original image dimensions scale_x = original_width / processed_image.width scale_y = original_height / processed_image.height - + scaled_x = int(x * scale_x) scaled_y = int(y * scale_y) - + return (scaled_x, scaled_y) - + return None - + except Exception as e: # Log error and return None print(f"Error in predict_click: {e}") return None - + def get_capabilities(self) -> List[AgentCapability]: """ Get list of capabilities supported by this agent config. - + Returns: List of capability strings """ - return ["step", "click"] \ No newline at end of file + return ["step", "click"] diff --git a/libs/python/agent/agent/proxy/examples.py b/libs/python/agent/agent/proxy/examples.py index 2838c5df..dfe6b87c 100644 --- a/libs/python/agent/agent/proxy/examples.py +++ b/libs/python/agent/agent/proxy/examples.py @@ -1,19 +1,22 @@ """ Example usage of the proxy server and client requests. """ + import dotenv + dotenv.load_dotenv() import asyncio import json import os +from typing import Any, Dict + import aiohttp -from typing import Dict, Any async def test_http_endpoint(): """Test the HTTP /responses endpoint.""" - + anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") assert isinstance(anthropic_api_key, str), "ANTHROPIC_API_KEY environment variable must be set" @@ -21,11 +24,9 @@ async def test_http_endpoint(): simple_request = { "model": "anthropic/claude-3-5-sonnet-20241022", "input": "Tell me a three sentence bedtime story about a unicorn.", - "env": { - "ANTHROPIC_API_KEY": anthropic_api_key - } + "env": {"ANTHROPIC_API_KEY": anthropic_api_key}, } - + # Example 2: Multi-modal request with image multimodal_request = { "model": "anthropic/claude-3-5-sonnet-20241022", @@ -36,70 +37,72 @@ async def test_http_endpoint(): {"type": "input_text", "text": "what is in this image?"}, { "type": "input_image", - "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" - } - ] + "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", + }, + ], } ], - "env": { - "ANTHROPIC_API_KEY": anthropic_api_key - } + "env": {"ANTHROPIC_API_KEY": anthropic_api_key}, } - + # Example 3: Request with custom agent and computer kwargs custom_request = { "model": "anthropic/claude-3-5-sonnet-20241022", "input": "Take a screenshot and tell me what you see", - "env": { - "ANTHROPIC_API_KEY": anthropic_api_key - } + "env": {"ANTHROPIC_API_KEY": anthropic_api_key}, } - + # Test requests base_url = "https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443" # base_url = "http://localhost:8000" api_key = os.getenv("CUA_API_KEY") assert isinstance(api_key, str), "CUA_API_KEY environment variable must be set" - + async with aiohttp.ClientSession() as session: - for i, request_data in enumerate([ - simple_request, - # multimodal_request, - custom_request - ], 1): + for i, request_data in enumerate( + [ + simple_request, + # multimodal_request, + custom_request, + ], + 1, + ): print(f"\n--- Test {i} ---") print(f"Request: {json.dumps(request_data, indent=2)}") - + try: print(f"Sending request to {base_url}/responses") async with session.post( f"{base_url}/responses", json=request_data, - headers={"Content-Type": "application/json", "X-API-Key": api_key} + headers={"Content-Type": "application/json", "X-API-Key": api_key}, ) as response: result = await response.json() print(f"Status: {response.status}") print(f"Response: {json.dumps(result, indent=2)}") - + except Exception as e: print(f"Error: {e}") def curl_examples(): """Print curl command examples.""" - + print("=== CURL Examples ===\n") - + print("1. Simple text request:") - print("""curl http://localhost:8000/responses \\ + print( + """curl http://localhost:8000/responses \\ -H "Content-Type: application/json" \\ -d '{ "model": "anthropic/claude-3-5-sonnet-20241022", "input": "Tell me a three sentence bedtime story about a unicorn." - }'""") - + }'""" + ) + print("\n2. Multi-modal request with image:") - print("""curl http://localhost:8000/responses \\ + print( + """curl http://localhost:8000/responses \\ -H "Content-Type: application/json" \\ -d '{ "model": "anthropic/claude-3-5-sonnet-20241022", @@ -115,10 +118,12 @@ def curl_examples(): ] } ] - }'""") - + }'""" + ) + print("\n3. Request with custom configuration:") - print("""curl http://localhost:8000/responses \\ + print( + """curl http://localhost:8000/responses \\ -H "Content-Type: application/json" \\ -d '{ "model": "anthropic/claude-3-5-sonnet-20241022", @@ -131,50 +136,49 @@ def curl_examples(): "os_type": "linux", "provider_type": "cloud" } - }'""") + }'""" + ) async def test_p2p_client(): """Example P2P client using peerjs-python.""" try: - from peerjs import Peer, PeerOptions, ConnectionEventType from aiortc import RTCConfiguration, RTCIceServer - + from peerjs import ConnectionEventType, Peer, PeerOptions + # Set up client peer options = PeerOptions( host="0.peerjs.com", port=443, secure=True, - config=RTCConfiguration( - iceServers=[RTCIceServer(urls="stun:stun.l.google.com:19302")] - ) + config=RTCConfiguration(iceServers=[RTCIceServer(urls="stun:stun.l.google.com:19302")]), ) - + client_peer = Peer(id="test-client", peer_options=options) await client_peer.start() - + # Connect to proxy server connection = client_peer.connect("computer-agent-proxy") - + @connection.on(ConnectionEventType.Open) async def connection_open(): print("Connected to proxy server") - + # Send a test request request = { "model": "anthropic/claude-3-5-sonnet-20241022", - "input": "Hello from P2P client!" + "input": "Hello from P2P client!", } await connection.send(json.dumps(request)) - + @connection.on(ConnectionEventType.Data) async def connection_data(data): print(f"Received response: {data}") await client_peer.destroy() - + # Wait for connection await asyncio.sleep(10) - + except ImportError: print("P2P dependencies not available. Install peerjs-python for P2P testing.") except Exception as e: @@ -183,7 +187,7 @@ async def test_p2p_client(): if __name__ == "__main__": import sys - + if len(sys.argv) > 1 and sys.argv[1] == "curl": curl_examples() elif len(sys.argv) > 1 and sys.argv[1] == "p2p": diff --git a/libs/python/agent/agent/proxy/handlers.py b/libs/python/agent/agent/proxy/handlers.py index f68952b0..52041579 100644 --- a/libs/python/agent/agent/proxy/handlers.py +++ b/libs/python/agent/agent/proxy/handlers.py @@ -7,24 +7,25 @@ import json import logging import os from contextlib import contextmanager -from typing import Dict, Any, List, Union, Optional +from typing import Any, Dict, List, Optional, Union + +from computer import Computer from ..agent import ComputerAgent -from computer import Computer logger = logging.getLogger(__name__) class ResponsesHandler: """Handler for /responses endpoint that processes agent requests.""" - + def __init__(self): self.computer = None self.agent = None # Simple in-memory caches self._computer_cache: Dict[str, Any] = {} self._agent_cache: Dict[str, Any] = {} - + async def setup_computer_agent( self, model: str, @@ -75,7 +76,9 @@ class ResponsesHandler: computer = Computer(**default_c_config) await computer.__aenter__() self._computer_cache[comp_key] = computer - logger.info(f"Computer created and cached with key={comp_key} config={default_c_config}") + logger.info( + f"Computer created and cached with key={comp_key} config={default_c_config}" + ) else: logger.info(f"Reusing cached computer for key={comp_key}") @@ -115,14 +118,14 @@ class ResponsesHandler: # Bind current agent reference self.agent = agent - + async def process_request(self, request_data: Dict[str, Any]) -> Dict[str, Any]: """ Process a /responses request and return the result. - + Args: request_data: Dictionary containing model, input, and optional kwargs - + Returns: Dictionary with the agent's response """ @@ -133,12 +136,12 @@ class ResponsesHandler: agent_kwargs = request_data.get("agent_kwargs", {}) computer_kwargs = request_data.get("computer_kwargs", {}) env_overrides = request_data.get("env", {}) or {} - + if not model: raise ValueError("Model is required") if not input_data: raise ValueError("Input is required") - + # Apply env overrides for the duration of this request with self._env_overrides(env_overrides): # Set up (and possibly reuse) computer and agent via caches @@ -155,28 +158,22 @@ class ResponsesHandler: # Run agent and get first result async for result in agent.run(messages): # Return the first result and break - return { - "success": True, - "result": result, - "model": model - } - + return {"success": True, "result": result, "model": model} + # If no results were yielded - return { - "success": False, - "error": "No results from agent", - "model": model - } - + return {"success": False, "error": "No results from agent", "model": model} + except Exception as e: logger.error(f"Error processing request: {e}") return { "success": False, "error": str(e), - "model": request_data.get("model", "unknown") + "model": request_data.get("model", "unknown"), } - - def _convert_input_to_messages(self, input_data: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]: + + def _convert_input_to_messages( + self, input_data: Union[str, List[Dict[str, Any]]] + ) -> List[Dict[str, Any]]: """Convert input data to messages format.""" if isinstance(input_data, str): # Simple string input @@ -192,22 +189,18 @@ class ResponsesHandler: if part.get("type") == "input_text": content_parts.append({"type": "text", "text": part["text"]}) elif part.get("type") == "input_image": - content_parts.append({ - "type": "image_url", - "image_url": {"url": part["image_url"]} - }) + content_parts.append( + {"type": "image_url", "image_url": {"url": part["image_url"]}} + ) else: content_parts.append(part) - messages.append({ - "role": msg["role"], - "content": content_parts - }) + messages.append({"role": msg["role"], "content": content_parts}) else: messages.append(msg) return messages else: raise ValueError("Input must be string or list of messages") - + async def cleanup(self): """Clean up resources.""" if self.computer: diff --git a/libs/python/agent/agent/responses.py b/libs/python/agent/agent/responses.py index 34318bce..bbb1975d 100644 --- a/libs/python/agent/agent/responses.py +++ b/libs/python/agent/agent/responses.py @@ -6,10 +6,10 @@ Based on the OpenAI spec for Responses API items. import base64 import json import uuid -from typing import List, Dict, Any, Literal, Union, Optional +from typing import Any, Dict, List, Literal, Optional, Union +from openai.types.responses.easy_input_message_param import EasyInputMessageParam from openai.types.responses.response_computer_tool_call_param import ( - ResponseComputerToolCallParam, ActionClick, ActionDoubleClick, ActionDrag, @@ -18,224 +18,222 @@ from openai.types.responses.response_computer_tool_call_param import ( ActionMove, ActionScreenshot, ActionScroll, +) +from openai.types.responses.response_computer_tool_call_param import ( ActionType as ActionTypeAction, +) +from openai.types.responses.response_computer_tool_call_param import ( ActionWait, - PendingSafetyCheck + PendingSafetyCheck, + ResponseComputerToolCallParam, +) +from openai.types.responses.response_function_tool_call_param import ( + ResponseFunctionToolCallParam, +) +from openai.types.responses.response_input_image_param import ResponseInputImageParam +from openai.types.responses.response_output_message_param import ( + ResponseOutputMessageParam, +) +from openai.types.responses.response_output_text_param import ResponseOutputTextParam +from openai.types.responses.response_reasoning_item_param import ( + ResponseReasoningItemParam, + Summary, ) -from openai.types.responses.response_function_tool_call_param import ResponseFunctionToolCallParam -from openai.types.responses.response_output_text_param import ResponseOutputTextParam -from openai.types.responses.response_reasoning_item_param import ResponseReasoningItemParam, Summary -from openai.types.responses.response_output_message_param import ResponseOutputMessageParam -from openai.types.responses.easy_input_message_param import EasyInputMessageParam -from openai.types.responses.response_input_image_param import ResponseInputImageParam def random_id(): return str(uuid.uuid4()) + # User message items def make_input_image_item(image_data: Union[str, bytes]) -> EasyInputMessageParam: return EasyInputMessageParam( content=[ ResponseInputImageParam( type="input_image", - image_url=f"data:image/png;base64,{base64.b64encode(image_data).decode('utf-8') if isinstance(image_data, bytes) else image_data}" - ) # type: ignore + image_url=f"data:image/png;base64,{base64.b64encode(image_data).decode('utf-8') if isinstance(image_data, bytes) else image_data}", + ) # type: ignore ], role="user", - type="message" + type="message", ) + # Text items def make_reasoning_item(reasoning: str) -> ResponseReasoningItemParam: return ResponseReasoningItemParam( - id=random_id(), - summary=[ - Summary(text=reasoning, type="summary_text") - ], - type="reasoning" + id=random_id(), summary=[Summary(text=reasoning, type="summary_text")], type="reasoning" ) + def make_output_text_item(content: str) -> ResponseOutputMessageParam: return ResponseOutputMessageParam( id=random_id(), - content=[ - ResponseOutputTextParam( - text=content, - type="output_text", - annotations=[] - ) - ], + content=[ResponseOutputTextParam(text=content, type="output_text", annotations=[])], role="assistant", status="completed", - type="message" + type="message", ) + # Function call items -def make_function_call_item(function_name: str, arguments: Dict[str, Any], call_id: Optional[str] = None) -> ResponseFunctionToolCallParam: +def make_function_call_item( + function_name: str, arguments: Dict[str, Any], call_id: Optional[str] = None +) -> ResponseFunctionToolCallParam: return ResponseFunctionToolCallParam( id=random_id(), call_id=call_id if call_id else random_id(), name=function_name, arguments=json.dumps(arguments), status="completed", - type="function_call" + type="function_call", ) + # Computer tool call items -def make_click_item(x: int, y: int, button: Literal["left", "right", "wheel", "back", "forward"] = "left", call_id: Optional[str] = None) -> ResponseComputerToolCallParam: +def make_click_item( + x: int, + y: int, + button: Literal["left", "right", "wheel", "back", "forward"] = "left", + call_id: Optional[str] = None, +) -> ResponseComputerToolCallParam: return ResponseComputerToolCallParam( id=random_id(), call_id=call_id if call_id else random_id(), - action=ActionClick( - button=button, - type="click", - x=x, - y=y - ), + action=ActionClick(button=button, type="click", x=x, y=y), pending_safety_checks=[], status="completed", - type="computer_call" + type="computer_call", ) -def make_double_click_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam: + +def make_double_click_item( + x: int, y: int, call_id: Optional[str] = None +) -> ResponseComputerToolCallParam: return ResponseComputerToolCallParam( id=random_id(), call_id=call_id if call_id else random_id(), - action=ActionDoubleClick( - type="double_click", - x=x, - y=y - ), + action=ActionDoubleClick(type="double_click", x=x, y=y), pending_safety_checks=[], status="completed", - type="computer_call" + type="computer_call", ) -def make_drag_item(path: List[Dict[str, int]], call_id: Optional[str] = None) -> ResponseComputerToolCallParam: + +def make_drag_item( + path: List[Dict[str, int]], call_id: Optional[str] = None +) -> ResponseComputerToolCallParam: drag_path = [ActionDragPath(x=point["x"], y=point["y"]) for point in path] return ResponseComputerToolCallParam( id=random_id(), call_id=call_id if call_id else random_id(), - action=ActionDrag( - path=drag_path, - type="drag" - ), + action=ActionDrag(path=drag_path, type="drag"), pending_safety_checks=[], status="completed", - type="computer_call" + type="computer_call", ) -def make_keypress_item(keys: List[str], call_id: Optional[str] = None) -> ResponseComputerToolCallParam: + +def make_keypress_item( + keys: List[str], call_id: Optional[str] = None +) -> ResponseComputerToolCallParam: return ResponseComputerToolCallParam( id=random_id(), call_id=call_id if call_id else random_id(), - action=ActionKeypress( - keys=keys, - type="keypress" - ), + action=ActionKeypress(keys=keys, type="keypress"), pending_safety_checks=[], status="completed", - type="computer_call" + type="computer_call", ) + def make_move_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam: return ResponseComputerToolCallParam( id=random_id(), call_id=call_id if call_id else random_id(), - action=ActionMove( - type="move", - x=x, - y=y - ), + action=ActionMove(type="move", x=x, y=y), pending_safety_checks=[], status="completed", - type="computer_call" + type="computer_call", ) + def make_screenshot_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam: return ResponseComputerToolCallParam( id=random_id(), call_id=call_id if call_id else random_id(), - action=ActionScreenshot( - type="screenshot" - ), + action=ActionScreenshot(type="screenshot"), pending_safety_checks=[], status="completed", - type="computer_call" + type="computer_call", ) -def make_scroll_item(x: int, y: int, scroll_x: int, scroll_y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam: + +def make_scroll_item( + x: int, y: int, scroll_x: int, scroll_y: int, call_id: Optional[str] = None +) -> ResponseComputerToolCallParam: return ResponseComputerToolCallParam( id=random_id(), call_id=call_id if call_id else random_id(), - action=ActionScroll( - scroll_x=scroll_x, - scroll_y=scroll_y, - type="scroll", - x=x, - y=y - ), + action=ActionScroll(scroll_x=scroll_x, scroll_y=scroll_y, type="scroll", x=x, y=y), pending_safety_checks=[], status="completed", - type="computer_call" + type="computer_call", ) + def make_type_item(text: str, call_id: Optional[str] = None) -> ResponseComputerToolCallParam: return ResponseComputerToolCallParam( id=random_id(), call_id=call_id if call_id else random_id(), - action=ActionTypeAction( - text=text, - type="type" - ), + action=ActionTypeAction(text=text, type="type"), pending_safety_checks=[], status="completed", - type="computer_call" + type="computer_call", ) + def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam: return ResponseComputerToolCallParam( id=random_id(), call_id=call_id if call_id else random_id(), - action=ActionWait( - type="wait" - ), + action=ActionWait(type="wait"), pending_safety_checks=[], status="completed", - type="computer_call" + type="computer_call", ) + # Extra anthropic computer calls -def make_left_mouse_down_item(x: Optional[int] = None, y: Optional[int] = None, call_id: Optional[str] = None) -> Dict[str, Any]: +def make_left_mouse_down_item( + x: Optional[int] = None, y: Optional[int] = None, call_id: Optional[str] = None +) -> Dict[str, Any]: return { "id": random_id(), "call_id": call_id if call_id else random_id(), - "action": { - "type": "left_mouse_down", - "x": x, - "y": y - }, + "action": {"type": "left_mouse_down", "x": x, "y": y}, "pending_safety_checks": [], "status": "completed", - "type": "computer_call" + "type": "computer_call", } -def make_left_mouse_up_item(x: Optional[int] = None, y: Optional[int] = None, call_id: Optional[str] = None) -> Dict[str, Any]: + +def make_left_mouse_up_item( + x: Optional[int] = None, y: Optional[int] = None, call_id: Optional[str] = None +) -> Dict[str, Any]: return { "id": random_id(), "call_id": call_id if call_id else random_id(), - "action": { - "type": "left_mouse_up", - "x": x, - "y": y - }, + "action": {"type": "left_mouse_up", "x": x, "y": y}, "pending_safety_checks": [], "status": "completed", - "type": "computer_call" + "type": "computer_call", } -def make_failed_tool_call_items(tool_name: str, tool_kwargs: Dict[str, Any], error_message: str, call_id: Optional[str] = None) -> List[Dict[str, Any]]: + +def make_failed_tool_call_items( + tool_name: str, tool_kwargs: Dict[str, Any], error_message: str, call_id: Optional[str] = None +) -> List[Dict[str, Any]]: call_id = call_id if call_id else random_id() return [ { @@ -249,9 +247,10 @@ def make_failed_tool_call_items(tool_name: str, tool_kwargs: Dict[str, Any], err "type": "function_call_output", "call_id": call_id, "output": json.dumps({"error": error_message}), - } + }, ] + def make_tool_error_item(error_message: str, call_id: Optional[str] = None) -> Dict[str, Any]: call_id = call_id if call_id else random_id() return { @@ -260,12 +259,15 @@ def make_tool_error_item(error_message: str, call_id: Optional[str] = None) -> D "output": json.dumps({"error": error_message}), } -def replace_failed_computer_calls_with_function_calls(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + +def replace_failed_computer_calls_with_function_calls( + messages: List[Dict[str, Any]], +) -> List[Dict[str, Any]]: """ Replace computer_call items with function_call items if they share a call_id with a function_call_output. This indicates the computer call failed and should be treated as a function call instead. We do this because the computer_call_output items do not support text output. - + Args: messages: List of message items to process """ @@ -278,16 +280,15 @@ def replace_failed_computer_calls_with_function_calls(messages: List[Dict[str, A call_id = msg.get("call_id") if call_id: failed_call_ids.add(call_id) - + # Replace computer_call items that have matching call_ids for i, msg in enumerate(messages): - if (msg.get("type") == "computer_call" and - msg.get("call_id") in failed_call_ids): - + if msg.get("type") == "computer_call" and msg.get("call_id") in failed_call_ids: + # Extract action from computer_call action = msg.get("action", {}) call_id = msg.get("call_id") - + # Create function_call replacement messages[i] = { "type": "function_call", @@ -296,27 +297,30 @@ def replace_failed_computer_calls_with_function_calls(messages: List[Dict[str, A "name": "computer", "arguments": json.dumps(action), } - + return messages + # Conversion functions between element descriptions and coordinates -def convert_computer_calls_desc2xy(responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple]) -> List[Dict[str, Any]]: +def convert_computer_calls_desc2xy( + responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple] +) -> List[Dict[str, Any]]: """ Convert computer calls from element descriptions to x,y coordinates. - + Args: responses_items: List of response items containing computer calls with element_description desc2xy: Dictionary mapping element descriptions to (x, y) coordinate tuples - + Returns: List of response items with element_description replaced by x,y coordinates """ converted_items = [] - + for item in responses_items: if item.get("type") == "computer_call" and "action" in item: action = item["action"].copy() - + # Handle single element_description if "element_description" in action: desc = action["element_description"] @@ -325,48 +329,50 @@ def convert_computer_calls_desc2xy(responses_items: List[Dict[str, Any]], desc2x action["x"] = x action["y"] = y del action["element_description"] - + # Handle start_element_description and end_element_description for drag operations elif "start_element_description" in action and "end_element_description" in action: start_desc = action["start_element_description"] end_desc = action["end_element_description"] - + if start_desc in desc2xy and end_desc in desc2xy: start_x, start_y = desc2xy[start_desc] end_x, end_y = desc2xy[end_desc] action["path"] = [{"x": start_x, "y": start_y}, {"x": end_x, "y": end_y}] del action["start_element_description"] del action["end_element_description"] - + converted_item = item.copy() converted_item["action"] = action converted_items.append(converted_item) else: converted_items.append(item) - + return converted_items -def convert_computer_calls_xy2desc(responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple]) -> List[Dict[str, Any]]: +def convert_computer_calls_xy2desc( + responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple] +) -> List[Dict[str, Any]]: """ Convert computer calls from x,y coordinates to element descriptions. - + Args: responses_items: List of response items containing computer calls with x,y coordinates desc2xy: Dictionary mapping element descriptions to (x, y) coordinate tuples - + Returns: List of response items with x,y coordinates replaced by element_description """ # Create reverse mapping from coordinates to descriptions xy2desc = {coords: desc for desc, coords in desc2xy.items()} - + converted_items = [] - + for item in responses_items: if item.get("type") == "computer_call" and "action" in item: action = item["action"].copy() - + # Handle single x,y coordinates if "x" in action and "y" in action: coords = (action["x"], action["y"]) @@ -374,77 +380,83 @@ def convert_computer_calls_xy2desc(responses_items: List[Dict[str, Any]], desc2x action["element_description"] = xy2desc[coords] del action["x"] del action["y"] - + # Handle path for drag operations elif "path" in action and isinstance(action["path"], list) and len(action["path"]) == 2: start_point = action["path"][0] end_point = action["path"][1] - - if ("x" in start_point and "y" in start_point and - "x" in end_point and "y" in end_point): - + + if ( + "x" in start_point + and "y" in start_point + and "x" in end_point + and "y" in end_point + ): + start_coords = (start_point["x"], start_point["y"]) end_coords = (end_point["x"], end_point["y"]) - + if start_coords in xy2desc and end_coords in xy2desc: action["start_element_description"] = xy2desc[start_coords] action["end_element_description"] = xy2desc[end_coords] del action["path"] - + converted_item = item.copy() converted_item["action"] = action converted_items.append(converted_item) else: converted_items.append(item) - + return converted_items def get_all_element_descriptions(responses_items: List[Dict[str, Any]]) -> List[str]: """ Extract all element descriptions from computer calls in responses items. - + Args: responses_items: List of response items containing computer calls - + Returns: List of unique element descriptions found in computer calls """ descriptions = set() - + for item in responses_items: if item.get("type") == "computer_call" and "action" in item: action = item["action"] - + # Handle single element_description if "element_description" in action: descriptions.add(action["element_description"]) - + # Handle start_element_description and end_element_description for drag operations if "start_element_description" in action: descriptions.add(action["start_element_description"]) - + if "end_element_description" in action: descriptions.add(action["end_element_description"]) - + return list(descriptions) # Conversion functions between responses_items and completion messages formats -def convert_responses_items_to_completion_messages(messages: List[Dict[str, Any]], allow_images_in_tool_results: bool = True) -> List[Dict[str, Any]]: +def convert_responses_items_to_completion_messages( + messages: List[Dict[str, Any]], allow_images_in_tool_results: bool = True +) -> List[Dict[str, Any]]: """Convert responses_items message format to liteLLM completion format. - + Args: messages: List of responses_items format messages allow_images_in_tool_results: If True, include images in tool role messages. If False, send tool message + separate user message with image. """ completion_messages = [] - + for message in messages: msg_type = message.get("type") role = message.get("role") - + # Handle user messages (both with and without explicit type) if role == "user" or msg_type == "user": content = message.get("content", "") @@ -453,34 +465,19 @@ def convert_responses_items_to_completion_messages(messages: List[Dict[str, Any] completion_content = [] for item in content: if item.get("type") == "input_image": - completion_content.append({ - "type": "image_url", - "image_url": { - "url": item.get("image_url") - } - }) + completion_content.append( + {"type": "image_url", "image_url": {"url": item.get("image_url")}} + ) elif item.get("type") == "input_text": - completion_content.append({ - "type": "text", - "text": item.get("text") - }) + completion_content.append({"type": "text", "text": item.get("text")}) elif item.get("type") == "text": - completion_content.append({ - "type": "text", - "text": item.get("text") - }) - - completion_messages.append({ - "role": "user", - "content": completion_content - }) + completion_content.append({"type": "text", "text": item.get("text")}) + + completion_messages.append({"role": "user", "content": completion_content}) elif isinstance(content, str): # Handle string content - completion_messages.append({ - "role": "user", - "content": content - }) - + completion_messages.append({"role": "user", "content": content}) + # Handle assistant messages elif role == "assistant" or msg_type == "message": content = message.get("content", []) @@ -491,13 +488,12 @@ def convert_responses_items_to_completion_messages(messages: List[Dict[str, Any] text_parts.append(item.get("text", "")) elif item.get("type") == "text": text_parts.append(item.get("text", "")) - + if text_parts: - completion_messages.append({ - "role": "assistant", - "content": "\n".join(text_parts) - }) - + completion_messages.append( + {"role": "assistant", "content": "\n".join(text_parts)} + ) + # Handle reasoning items (convert to assistant message) elif msg_type == "reasoning": summary = message.get("summary", []) @@ -505,107 +501,96 @@ def convert_responses_items_to_completion_messages(messages: List[Dict[str, Any] for item in summary: if item.get("type") == "summary_text": text_parts.append(item.get("text", "")) - + if text_parts: - completion_messages.append({ - "role": "assistant", - "content": "\n".join(text_parts) - }) - + completion_messages.append({"role": "assistant", "content": "\n".join(text_parts)}) + # Handle function calls elif msg_type == "function_call": # Add tool call to last assistant message or create new one if not completion_messages or completion_messages[-1]["role"] != "assistant": - completion_messages.append({ - "role": "assistant", - "content": "", - "tool_calls": [] - }) - + completion_messages.append({"role": "assistant", "content": "", "tool_calls": []}) + if "tool_calls" not in completion_messages[-1]: completion_messages[-1]["tool_calls"] = [] - - completion_messages[-1]["tool_calls"].append({ - "id": message.get("call_id"), - "type": "function", - "function": { - "name": message.get("name"), - "arguments": message.get("arguments") + + completion_messages[-1]["tool_calls"].append( + { + "id": message.get("call_id"), + "type": "function", + "function": { + "name": message.get("name"), + "arguments": message.get("arguments"), + }, } - }) - + ) + # Handle computer calls elif msg_type == "computer_call": # Add tool call to last assistant message or create new one if not completion_messages or completion_messages[-1]["role"] != "assistant": - completion_messages.append({ - "role": "assistant", - "content": "", - "tool_calls": [] - }) - + completion_messages.append({"role": "assistant", "content": "", "tool_calls": []}) + if "tool_calls" not in completion_messages[-1]: completion_messages[-1]["tool_calls"] = [] - + action = message.get("action", {}) - completion_messages[-1]["tool_calls"].append({ - "id": message.get("call_id"), - "type": "function", - "function": { - "name": "computer", - "arguments": json.dumps(action) + completion_messages[-1]["tool_calls"].append( + { + "id": message.get("call_id"), + "type": "function", + "function": {"name": "computer", "arguments": json.dumps(action)}, } - }) - + ) + # Handle function/computer call outputs elif msg_type in ["function_call_output", "computer_call_output"]: output = message.get("output") call_id = message.get("call_id") - + if isinstance(output, dict) and output.get("type") == "input_image": if allow_images_in_tool_results: # Handle image output as tool response (may not work with all APIs) - completion_messages.append({ - "role": "tool", - "tool_call_id": call_id, - "content": [{ - "type": "image_url", - "image_url": { - "url": output.get("image_url") - } - }] - }) + completion_messages.append( + { + "role": "tool", + "tool_call_id": call_id, + "content": [ + {"type": "image_url", "image_url": {"url": output.get("image_url")}} + ], + } + ) else: # Send tool message + separate user message with image (OpenAI compatible) - completion_messages += [{ - "role": "tool", - "tool_call_id": call_id, - "content": "[Execution completed. See screenshot below]" - }, { - "role": "user", - "content": [{ - "type": "image_url", - "image_url": { - "url": output.get("image_url") - } - }] - }] + completion_messages += [ + { + "role": "tool", + "tool_call_id": call_id, + "content": "[Execution completed. See screenshot below]", + }, + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": output.get("image_url")}} + ], + }, + ] else: # Handle text output as tool response - completion_messages.append({ - "role": "tool", - "tool_call_id": call_id, - "content": str(output) - }) - + completion_messages.append( + {"role": "tool", "tool_call_id": call_id, "content": str(output)} + ) + return completion_messages -def convert_completion_messages_to_responses_items(completion_messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: +def convert_completion_messages_to_responses_items( + completion_messages: List[Dict[str, Any]], +) -> List[Dict[str, Any]]: """Convert completion messages format to responses_items message format.""" responses_items = [] skip_next = False - + for i, message in enumerate(completion_messages): if skip_next: skip_next = False @@ -614,25 +599,24 @@ def convert_completion_messages_to_responses_items(completion_messages: List[Dic role = message.get("role") content = message.get("content") tool_calls = message.get("tool_calls", []) - + # Handle assistant messages with text content if role == "assistant" and content and isinstance(content, str): - responses_items.append({ - "type": "message", - "role": "assistant", - "content": [{ - "type": "output_text", - "text": content - }] - }) - + responses_items.append( + { + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": content}], + } + ) + # Handle tool calls if tool_calls: for tool_call in tool_calls: if tool_call.get("type") == "function": function = tool_call.get("function", {}) function_name = function.get("name") - + if function_name == "computer": # Parse computer action try: @@ -641,31 +625,37 @@ def convert_completion_messages_to_responses_items(completion_messages: List[Dic if action.get("action"): action["type"] = action["action"] del action["action"] - responses_items.append({ - "type": "computer_call", - "call_id": tool_call.get("id"), - "action": action, - "status": "completed" - }) + responses_items.append( + { + "type": "computer_call", + "call_id": tool_call.get("id"), + "action": action, + "status": "completed", + } + ) except json.JSONDecodeError: # Fallback to function call format - responses_items.append({ + responses_items.append( + { + "type": "function_call", + "call_id": tool_call.get("id"), + "name": function_name, + "arguments": function.get("arguments", "{}"), + "status": "completed", + } + ) + else: + # Regular function call + responses_items.append( + { "type": "function_call", "call_id": tool_call.get("id"), "name": function_name, "arguments": function.get("arguments", "{}"), - "status": "completed" - }) - else: - # Regular function call - responses_items.append({ - "type": "function_call", - "call_id": tool_call.get("id"), - "name": function_name, - "arguments": function.get("arguments", "{}"), - "status": "completed" - }) - + "status": "completed", + } + ) + # Handle tool messages (function/computer call outputs) elif role == "tool" and content: tool_call_id = message.get("tool_call_id") @@ -674,74 +664,90 @@ def convert_completion_messages_to_responses_items(completion_messages: List[Dic if content == "[Execution completed. See screenshot below]": # Look ahead for the next user message with image next_idx = i + 1 - if (next_idx < len(completion_messages) and - completion_messages[next_idx].get("role") == "user" and - isinstance(completion_messages[next_idx].get("content"), list)): + if ( + next_idx < len(completion_messages) + and completion_messages[next_idx].get("role") == "user" + and isinstance(completion_messages[next_idx].get("content"), list) + ): # Found the pattern - extract image from next message next_content = completion_messages[next_idx]["content"] for item in next_content: if item.get("type") == "image_url": - responses_items.append({ - "type": "computer_call_output", - "call_id": tool_call_id, - "output": { - "type": "input_image", - "image_url": item.get("image_url", {}).get("url") + responses_items.append( + { + "type": "computer_call_output", + "call_id": tool_call_id, + "output": { + "type": "input_image", + "image_url": item.get("image_url", {}).get("url"), + }, } - }) + ) # Skip the next user message since we processed it skip_next = True break else: # No matching user message, treat as regular text - responses_items.append({ - "type": "computer_call_output", - "call_id": tool_call_id, - "output": content - }) + responses_items.append( + { + "type": "computer_call_output", + "call_id": tool_call_id, + "output": content, + } + ) else: # Determine if this is a computer call or function call output try: # Try to parse as structured output parsed_content = json.loads(content) if parsed_content.get("type") == "input_image": - responses_items.append({ - "type": "computer_call_output", - "call_id": tool_call_id, - "output": parsed_content - }) + responses_items.append( + { + "type": "computer_call_output", + "call_id": tool_call_id, + "output": parsed_content, + } + ) else: - responses_items.append({ - "type": "computer_call_output", - "call_id": tool_call_id, - "output": content - }) + responses_items.append( + { + "type": "computer_call_output", + "call_id": tool_call_id, + "output": content, + } + ) except json.JSONDecodeError: # Plain text output - could be function or computer call - responses_items.append({ - "type": "function_call_output", - "call_id": tool_call_id, - "output": content - }) + responses_items.append( + { + "type": "function_call_output", + "call_id": tool_call_id, + "output": content, + } + ) elif isinstance(content, list): # Handle structured content (e.g., images) for item in content: if item.get("type") == "image_url": - responses_items.append({ - "type": "computer_call_output", - "call_id": tool_call_id, - "output": { - "type": "input_image", - "image_url": item.get("image_url", {}).get("url") + responses_items.append( + { + "type": "computer_call_output", + "call_id": tool_call_id, + "output": { + "type": "input_image", + "image_url": item.get("image_url", {}).get("url"), + }, } - }) + ) elif item.get("type") == "text": - responses_items.append({ - "type": "function_call_output", - "call_id": tool_call_id, - "output": item.get("text") - }) - + responses_items.append( + { + "type": "function_call_output", + "call_id": tool_call_id, + "output": item.get("text"), + } + ) + # Handle actual user messages elif role == "user" and content: if isinstance(content, list): @@ -749,27 +755,21 @@ def convert_completion_messages_to_responses_items(completion_messages: List[Dic user_content = [] for item in content: if item.get("type") == "image_url": - user_content.append({ - "type": "input_image", - "image_url": item.get("image_url", {}).get("url") - }) + user_content.append( + { + "type": "input_image", + "image_url": item.get("image_url", {}).get("url"), + } + ) elif item.get("type") == "text": - user_content.append({ - "type": "input_text", - "text": item.get("text") - }) - + user_content.append({"type": "input_text", "text": item.get("text")}) + if user_content: - responses_items.append({ - "role": "user", - "type": "message", - "content": user_content - }) + responses_items.append( + {"role": "user", "type": "message", "content": user_content} + ) elif isinstance(content, str): # Handle simple text user message - responses_items.append({ - "role": "user", - "content": content - }) - + responses_items.append({"role": "user", "content": content}) + return responses_items diff --git a/libs/python/agent/agent/types.py b/libs/python/agent/agent/types.py index f47c9286..c28fb6b8 100644 --- a/libs/python/agent/agent/types.py +++ b/libs/python/agent/agent/types.py @@ -2,37 +2,43 @@ Type definitions for agent """ -from typing import Dict, List, Any, Optional, Callable, Protocol, Literal -from pydantic import BaseModel import re -from litellm import ResponseInputParam, ResponsesAPIResponse, ToolParam from collections.abc import Iterable +from typing import Any, Callable, Dict, List, Literal, Optional, Protocol + +from litellm import ResponseInputParam, ResponsesAPIResponse, ToolParam +from pydantic import BaseModel # Agent input types Messages = str | ResponseInputParam | List[Dict[str, Any]] Tools = Optional[Iterable[ToolParam]] # Agent output types -AgentResponse = ResponsesAPIResponse +AgentResponse = ResponsesAPIResponse AgentCapability = Literal["step", "click"] + # Exception types class ToolError(RuntimeError): """Base exception for tool-related errors""" + pass + class IllegalArgumentError(ToolError): """Exception raised when function arguments are invalid""" + pass # Agent config registration class AgentConfigInfo(BaseModel): """Information about a registered agent config""" + agent_class: type models_regex: str priority: int = 0 - + def matches_model(self, model: str) -> bool: """Check if this agent config matches the given model""" return bool(re.match(self.models_regex, model)) diff --git a/libs/python/agent/agent/ui/__init__.py b/libs/python/agent/agent/ui/__init__.py index ae5ced7a..92b0bf69 100644 --- a/libs/python/agent/agent/ui/__init__.py +++ b/libs/python/agent/agent/ui/__init__.py @@ -2,6 +2,6 @@ UI components for agent """ -from .gradio import launch_ui, create_gradio_ui +from .gradio import create_gradio_ui, launch_ui __all__ = ["launch_ui", "create_gradio_ui"] diff --git a/libs/python/agent/agent/ui/__main__.py b/libs/python/agent/agent/ui/__main__.py index 4ac782a5..f4bf2892 100644 --- a/libs/python/agent/agent/ui/__main__.py +++ b/libs/python/agent/agent/ui/__main__.py @@ -1,4 +1,4 @@ from .gradio import launch_ui if __name__ == "__main__": - launch_ui() \ No newline at end of file + launch_ui() diff --git a/libs/python/agent/agent/ui/gradio/app.py b/libs/python/agent/agent/ui/gradio/app.py index 196194de..1a2fb023 100644 --- a/libs/python/agent/agent/ui/gradio/app.py +++ b/libs/python/agent/agent/ui/gradio/app.py @@ -18,21 +18,21 @@ Requirements: - OpenAI or Anthropic API key """ -import os import asyncio -import logging import json +import logging +import os import platform from pathlib import Path -from typing import Dict, List, Optional, AsyncGenerator, Any, Tuple, Union +from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union, cast + import gradio as gr -from gradio.components.chatbot import MetadataDict -from typing import cast # Import from agent package from agent import ComputerAgent -from agent.types import Messages, AgentResponse +from agent.types import AgentResponse, Messages from computer import Computer +from gradio.components.chatbot import MetadataDict # Global variables global_agent = None @@ -42,11 +42,13 @@ SETTINGS_FILE = Path(".gradio_settings.json") logging.basicConfig(level=logging.INFO) import dotenv + if dotenv.load_dotenv(): print(f"DEBUG - Loaded environment variables from {dotenv.find_dotenv()}") else: print("DEBUG - No .env file found") + # --- Settings Load/Save Functions --- def load_settings() -> Dict[str, Any]: """Loads settings from the JSON file.""" @@ -84,7 +86,7 @@ def save_settings(settings: Dict[str, Any]): # async def on_screenshot(self, screenshot_base64: str, action_type: str = "") -> None: # """Add screenshot to chatbot when a screenshot is taken.""" # image_markdown = f"![Screenshot after {action_type}](data:image/png;base64,{screenshot_base64})" - + # if self.chatbot_history is not None: # self.chatbot_history.append( # gr.ChatMessage( @@ -141,7 +143,7 @@ def get_model_string(model_name: str, loop_provider: str) -> str: ollama_model = model_name.split("OMNI: Ollama ", 1)[1] return f"omniparser+ollama_chat/{ollama_model}" return "omniparser+ollama_chat/llama3" - + # Map based on loop provider mapping = MODEL_MAPPINGS.get(loop_provider.lower(), MODEL_MAPPINGS["openai"]) return mapping.get(model_name, mapping["default"]) @@ -151,6 +153,7 @@ def get_ollama_models() -> List[str]: """Get available models from Ollama if installed.""" try: import subprocess + result = subprocess.run(["ollama", "list"], capture_output=True, text=True) if result.returncode == 0: lines = result.stdout.strip().split("\n") @@ -174,16 +177,14 @@ def create_computer_instance( os_type: str = "macos", provider_type: str = "lume", name: Optional[str] = None, - api_key: Optional[str] = None + api_key: Optional[str] = None, ) -> Computer: """Create or get the global Computer instance.""" global global_computer if global_computer is None: if provider_type == "localhost": global_computer = Computer( - verbosity=verbosity, - os_type=os_type, - use_host_computer_server=True + verbosity=verbosity, os_type=os_type, use_host_computer_server=True ) else: global_computer = Computer( @@ -191,7 +192,7 @@ def create_computer_instance( os_type=os_type, provider_type=provider_type, name=name if name else "", - api_key=api_key + api_key=api_key, ) return global_computer @@ -217,7 +218,7 @@ def create_agent( os_type=computer_os, provider_type=computer_provider, name=computer_name, - api_key=computer_api_key + api_key=computer_api_key, ) # Handle custom models @@ -233,12 +234,15 @@ def create_agent( "only_n_most_recent_images": only_n_most_recent_images, "verbosity": verbosity, } - + if save_trajectory: agent_kwargs["trajectory_dir"] = "trajectories" - + if max_trajectory_budget: - agent_kwargs["max_trajectory_budget"] = {"max_budget": max_trajectory_budget, "raise_error": True} + agent_kwargs["max_trajectory_budget"] = { + "max_budget": max_trajectory_budget, + "raise_error": True, + } global_agent = ComputerAgent(**agent_kwargs) return global_agent @@ -247,7 +251,8 @@ def create_agent( def launch_ui(): """Standalone function to launch the Gradio app.""" from agent.ui.gradio.ui_components import create_gradio_ui - print(f"Starting Gradio app for CUA Agent...") + + print("Starting Gradio app for CUA Agent...") demo = create_gradio_ui() demo.launch(share=False, inbrowser=True) diff --git a/libs/python/agent/agent/ui/gradio/ui_components.py b/libs/python/agent/agent/ui/gradio/ui_components.py index d197919b..d14f49a9 100644 --- a/libs/python/agent/agent/ui/gradio/ui_components.py +++ b/libs/python/agent/agent/ui/gradio/ui_components.py @@ -2,19 +2,25 @@ UI Components for the Gradio interface """ -import os import asyncio -import logging import json +import logging +import os import platform from pathlib import Path -from typing import Dict, List, Optional, Any, cast +from typing import Any, Dict, List, Optional, cast + import gradio as gr from gradio.components.chatbot import MetadataDict from .app import ( - load_settings, save_settings, create_agent, get_model_string, - get_ollama_models, global_agent, global_computer + create_agent, + get_model_string, + get_ollama_models, + global_agent, + global_computer, + load_settings, + save_settings, ) # Global messages array to maintain conversation history @@ -23,15 +29,15 @@ global_messages = [] def create_gradio_ui() -> gr.Blocks: """Create a Gradio UI for the Computer-Use Agent.""" - + # Load settings saved_settings = load_settings() - + # Check for API keys openai_api_key = os.environ.get("OPENAI_API_KEY", "") anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY", "") cua_api_key = os.environ.get("CUA_API_KEY", "") - + # Model choices openai_models = ["OpenAI: Computer-Use Preview"] anthropic_models = [ @@ -43,10 +49,10 @@ def create_gradio_ui() -> gr.Blocks: omni_models = [ "OMNI: OpenAI GPT-4o", "OMNI: OpenAI GPT-4o mini", - "OMNI: Claude 3.7 Sonnet (20250219)", - "OMNI: Claude 3.5 Sonnet (20241022)" + "OMNI: Claude 3.7 Sonnet (20250219)", + "OMNI: Claude 3.5 Sonnet (20241022)", ] - + # Check if API keys are available has_openai_key = bool(openai_api_key) has_anthropic_key = bool(anthropic_api_key) @@ -59,15 +65,20 @@ def create_gradio_ui() -> gr.Blocks: # Detect platform is_mac = platform.system().lower() == "darwin" - + # Format model choices provider_to_models = { "OPENAI": openai_models, "ANTHROPIC": anthropic_models, "OMNI": omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"], - "UITARS": ([ - "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", - ] if is_mac else []) + ["Custom model (OpenAI compatible API)"], + "UITARS": ( + [ + "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", + ] + if is_mac + else [] + ) + + ["Custom model (OpenAI compatible API)"], } # Apply saved settings @@ -82,7 +93,9 @@ def create_gradio_ui() -> gr.Blocks: elif initial_loop == "ANTHROPIC": initial_model = anthropic_models[0] if anthropic_models else "No models available" else: # OMNI - initial_model = omni_models[0] if omni_models else "Custom model (OpenAI compatible API)" + initial_model = ( + omni_models[0] if omni_models else "Custom model (OpenAI compatible API)" + ) initial_custom_model = saved_settings.get("custom_model", "Qwen2.5-VL-7B-Instruct") initial_provider_base_url = saved_settings.get("provider_base_url", "http://localhost:1234/v1") @@ -96,16 +109,27 @@ def create_gradio_ui() -> gr.Blocks: "Open Safari, search for 'macOS automation tools', and save the first three results as bookmarks", "Configure SSH keys and set up a connection to a remote server", ] - - def generate_python_code(agent_loop_choice, model_name, tasks, recent_images=3, save_trajectory=True, computer_os="linux", computer_provider="cloud", container_name="", cua_cloud_api_key="", max_budget=None): + + def generate_python_code( + agent_loop_choice, + model_name, + tasks, + recent_images=3, + save_trajectory=True, + computer_os="linux", + computer_provider="cloud", + container_name="", + cua_cloud_api_key="", + max_budget=None, + ): """Generate Python code for the current configuration and tasks.""" tasks_str = "" for task in tasks: if task and task.strip(): tasks_str += f' "{task}",\n' - + model_string = get_model_string(model_name, agent_loop_choice) - + computer_args = [] if computer_os != "macos": computer_args.append(f'os_type="{computer_os}"') @@ -115,14 +139,14 @@ def create_gradio_ui() -> gr.Blocks: computer_args.append(f'name="{container_name}"') if cua_cloud_api_key: computer_args.append(f'api_key="{cua_cloud_api_key}"') - + computer_args_str = ", ".join(computer_args) if computer_args_str: computer_args_str = f"({computer_args_str})" else: computer_args_str = "()" - - code = f'''import asyncio + + code = f"""import asyncio from computer import Computer from agent import ComputerAgent @@ -131,22 +155,22 @@ async def main(): agent = ComputerAgent( model="{model_string}", tools=[computer], - only_n_most_recent_images={recent_images},''' - + only_n_most_recent_images={recent_images},""" + if save_trajectory: - code += ''' - trajectory_dir="trajectories",''' - + code += """ + trajectory_dir="trajectories",""" + if max_budget: - code += f''' - max_trajectory_budget={{"max_budget": {max_budget}, "raise_error": True}},''' - - code += ''' + code += f""" + max_trajectory_budget={{"max_budget": {max_budget}, "raise_error": True}},""" + + code += """ ) - ''' - + """ + if tasks_str: - code += f''' + code += f""" # Prompts for the computer-use agent tasks = [ {tasks_str.rstrip()} @@ -158,23 +182,23 @@ async def main(): async for result in agent.run(messages): for item in result["output"]: if item["type"] == "message": - print(item["content"][0]["text"])''' + print(item["content"][0]["text"])""" else: - code += f''' + code += """ # Execute a single task task = "Search for information about CUA on GitHub" - print(f"Executing task: {{task}}") - messages = [{{"role": "user", "content": task}}] + print(f"Executing task: {task}") + messages = [{"role": "user", "content": task}] async for result in agent.run(messages): for item in result["output"]: if item["type"] == "message": - print(item["content"][0]["text"])''' + print(item["content"][0]["text"])""" - code += ''' + code += """ if __name__ == "__main__": - asyncio.run(main())''' - + asyncio.run(main())""" + return code # Create the Gradio interface @@ -199,11 +223,11 @@ if __name__ == "__main__": value=generate_python_code(initial_loop, "gpt-4o", []), interactive=False, ) - + with gr.Accordion("Computer Configuration", open=True): is_windows = platform.system().lower() == "windows" is_mac = platform.system().lower() == "darwin" - + providers = ["cloud", "localhost", "docker"] if is_mac: providers += ["lume"] @@ -227,30 +251,30 @@ if __name__ == "__main__": value=computer_choices[0], info="Select the operating system for the computer", ) - + computer_provider = gr.Radio( choices=providers, label="Provider", value="lume" if is_mac else "cloud", info="Select the computer provider", ) - + container_name = gr.Textbox( label="Container Name", placeholder="Enter container name (optional)", value=os.environ.get("CUA_CONTAINER_NAME", ""), info="Optional name for the container", ) - + cua_cloud_api_key = gr.Textbox( label="CUA Cloud API Key", placeholder="Enter your CUA Cloud API key", value=os.environ.get("CUA_API_KEY", ""), type="password", info="Required for cloud provider", - visible=(not has_cua_key) + visible=(not has_cua_key), ) - + with gr.Accordion("Agent Configuration", open=True): agent_loop = gr.Dropdown( choices=["OPENAI", "ANTHROPIC", "OMNI", "UITARS"], @@ -267,90 +291,113 @@ if __name__ == "__main__": value=openai_models[0] if openai_models else "No models available", info="Select OpenAI model", interactive=True, - visible=(initial_loop == "OPENAI") + visible=(initial_loop == "OPENAI"), ) - + anthropic_model_choice = gr.Dropdown( choices=anthropic_models, label="Anthropic Model", - value=anthropic_models[0] if anthropic_models else "No models available", + value=( + anthropic_models[0] if anthropic_models else "No models available" + ), info="Select Anthropic model", interactive=True, - visible=(initial_loop == "ANTHROPIC") + visible=(initial_loop == "ANTHROPIC"), ) - + omni_model_choice = gr.Dropdown( - choices=omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"], + choices=omni_models + + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"], label="OMNI Model", - value=omni_models[0] if omni_models else "Custom model (OpenAI compatible API)", + value=( + omni_models[0] + if omni_models + else "Custom model (OpenAI compatible API)" + ), info="Select OMNI model or choose a custom model option", interactive=True, - visible=(initial_loop == "OMNI") + visible=(initial_loop == "OMNI"), ) - + uitars_model_choice = gr.Dropdown( choices=provider_to_models.get("UITARS", ["No models available"]), label="UITARS Model", - value=provider_to_models.get("UITARS", ["No models available"])[0] if provider_to_models.get("UITARS") else "No models available", + value=( + provider_to_models.get("UITARS", ["No models available"])[0] + if provider_to_models.get("UITARS") + else "No models available" + ), info="Select UITARS model", interactive=True, - visible=(initial_loop == "UITARS") + visible=(initial_loop == "UITARS"), ) - + model_choice = gr.Textbox(visible=False) # API key inputs - with gr.Group(visible=not has_openai_key and (initial_loop == "OPENAI" or initial_loop == "OMNI")) as openai_key_group: + with gr.Group( + visible=not has_openai_key + and (initial_loop == "OPENAI" or initial_loop == "OMNI") + ) as openai_key_group: openai_api_key_input = gr.Textbox( label="OpenAI API Key", placeholder="Enter your OpenAI API key", value=os.environ.get("OPENAI_API_KEY", ""), interactive=True, type="password", - info="Required for OpenAI models" + info="Required for OpenAI models", ) - - with gr.Group(visible=not has_anthropic_key and (initial_loop == "ANTHROPIC" or initial_loop == "OMNI")) as anthropic_key_group: + + with gr.Group( + visible=not has_anthropic_key + and (initial_loop == "ANTHROPIC" or initial_loop == "OMNI") + ) as anthropic_key_group: anthropic_api_key_input = gr.Textbox( label="Anthropic API Key", placeholder="Enter your Anthropic API key", value=os.environ.get("ANTHROPIC_API_KEY", ""), interactive=True, type="password", - info="Required for Anthropic models" + info="Required for Anthropic models", ) - + # API key handlers def set_openai_api_key(key): if key and key.strip(): os.environ["OPENAI_API_KEY"] = key.strip() - print(f"DEBUG - Set OpenAI API key environment variable") + print("DEBUG - Set OpenAI API key environment variable") return key - + def set_anthropic_api_key(key): if key and key.strip(): os.environ["ANTHROPIC_API_KEY"] = key.strip() - print(f"DEBUG - Set Anthropic API key environment variable") + print("DEBUG - Set Anthropic API key environment variable") return key - + openai_api_key_input.change( fn=set_openai_api_key, inputs=[openai_api_key_input], outputs=[openai_api_key_input], - queue=False + queue=False, ) - + anthropic_api_key_input.change( fn=set_anthropic_api_key, inputs=[anthropic_api_key_input], outputs=[anthropic_api_key_input], - queue=False + queue=False, ) # UI update function - def update_ui(loop=None, openai_model=None, anthropic_model=None, omni_model=None, uitars_model=None): + def update_ui( + loop=None, + openai_model=None, + anthropic_model=None, + omni_model=None, + uitars_model=None, + ): loop = loop or agent_loop.value - + model_value = None if loop == "OPENAI" and openai_model: model_value = openai_model @@ -360,21 +407,37 @@ if __name__ == "__main__": model_value = omni_model elif loop == "UITARS" and uitars_model: model_value = uitars_model - - openai_visible = (loop == "OPENAI") - anthropic_visible = (loop == "ANTHROPIC") - omni_visible = (loop == "OMNI") - uitars_visible = (loop == "UITARS") - - show_openai_key = not has_openai_key and (loop == "OPENAI" or (loop == "OMNI" and model_value and "OpenAI" in model_value and "Custom" not in model_value)) - show_anthropic_key = not has_anthropic_key and (loop == "ANTHROPIC" or (loop == "OMNI" and model_value and "Claude" in model_value and "Custom" not in model_value)) - + + openai_visible = loop == "OPENAI" + anthropic_visible = loop == "ANTHROPIC" + omni_visible = loop == "OMNI" + uitars_visible = loop == "UITARS" + + show_openai_key = not has_openai_key and ( + loop == "OPENAI" + or ( + loop == "OMNI" + and model_value + and "OpenAI" in model_value + and "Custom" not in model_value + ) + ) + show_anthropic_key = not has_anthropic_key and ( + loop == "ANTHROPIC" + or ( + loop == "OMNI" + and model_value + and "Claude" in model_value + and "Custom" not in model_value + ) + ) + is_custom_openai_api = model_value == "Custom model (OpenAI compatible API)" is_custom_ollama = model_value == "Custom model (ollama)" is_any_custom = is_custom_openai_api or is_custom_ollama - + model_choice_value = model_value if model_value else "" - + return [ gr.update(visible=openai_visible), gr.update(visible=anthropic_visible), @@ -385,15 +448,18 @@ if __name__ == "__main__": gr.update(visible=is_any_custom), gr.update(visible=is_custom_openai_api), gr.update(visible=is_custom_openai_api), - gr.update(value=model_choice_value) + gr.update(value=model_choice_value), ] - + # Custom model inputs custom_model = gr.Textbox( label="Custom Model Name", placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct or llama3)", value=initial_custom_model, - visible=(initial_model == "Custom model (OpenAI compatible API)" or initial_model == "Custom model (ollama)"), + visible=( + initial_model == "Custom model (OpenAI compatible API)" + or initial_model == "Custom model (ollama)" + ), interactive=True, ) @@ -413,36 +479,56 @@ if __name__ == "__main__": interactive=True, type="password", ) - + # Provider visibility update function def update_provider_visibility(provider): """Update visibility of container name and API key based on selected provider.""" is_localhost = provider == "localhost" return [ gr.update(visible=not is_localhost), # container_name - gr.update(visible=not is_localhost and not has_cua_key) # cua_cloud_api_key + gr.update( + visible=not is_localhost and not has_cua_key + ), # cua_cloud_api_key ] - + # Connect provider change event computer_provider.change( fn=update_provider_visibility, inputs=[computer_provider], outputs=[container_name, cua_cloud_api_key], - queue=False + queue=False, ) - + # Connect UI update events - for dropdown in [agent_loop, omni_model_choice, uitars_model_choice, openai_model_choice, anthropic_model_choice]: + for dropdown in [ + agent_loop, + omni_model_choice, + uitars_model_choice, + openai_model_choice, + anthropic_model_choice, + ]: dropdown.change( fn=update_ui, - inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice], - outputs=[ - openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice, - openai_key_group, anthropic_key_group, - custom_model, provider_base_url, provider_api_key, - model_choice + inputs=[ + agent_loop, + openai_model_choice, + anthropic_model_choice, + omni_model_choice, + uitars_model_choice, ], - queue=False + outputs=[ + openai_model_choice, + anthropic_model_choice, + omni_model_choice, + uitars_model_choice, + openai_key_group, + anthropic_key_group, + custom_model, + provider_base_url, + provider_api_key, + model_choice, + ], + queue=False, ) save_trajectory = gr.Checkbox( @@ -461,7 +547,7 @@ if __name__ == "__main__": info="Number of recent images to keep in context", interactive=True, ) - + max_budget = gr.Number( label="Max Budget ($)", value=lambda: None, @@ -479,9 +565,7 @@ if __name__ == "__main__": ) chatbot_history = gr.Chatbot(type="messages") - msg = gr.Textbox( - placeholder="Ask me to perform tasks in a virtual environment" - ) + msg = gr.Textbox(placeholder="Ask me to perform tasks in a virtual environment") clear = gr.Button("Clear") cancel_button = gr.Button("Cancel", variant="stop") @@ -498,11 +582,23 @@ if __name__ == "__main__": global global_agent if global_agent: print("DEBUG - Cancelling agent task") - history.append(gr.ChatMessage(role="assistant", content="Task cancelled by user", metadata={"title": "❌ Cancelled"})) + history.append( + gr.ChatMessage( + role="assistant", + content="Task cancelled by user", + metadata={"title": "❌ Cancelled"}, + ) + ) else: - history.append(gr.ChatMessage(role="assistant", content="No active agent task to cancel", metadata={"title": "ℹ️ Info"})) + history.append( + gr.ChatMessage( + role="assistant", + content="No active agent task to cancel", + metadata={"title": "ℹ️ Info"}, + ) + ) return history - + # Process response function async def process_response( history, @@ -542,10 +638,13 @@ if __name__ == "__main__": model_choice_value = uitars_model_value else: model_choice_value = "No models available" - + # Determine if this is a custom model selection - is_custom_model_selected = model_choice_value in ["Custom model (OpenAI compatible API)", "Custom model (ollama)"] - + is_custom_model_selected = model_choice_value in [ + "Custom model (OpenAI compatible API)", + "Custom model (ollama)", + ] + # Determine the model name string to analyze if is_custom_model_selected: model_string_to_analyze = custom_model_value @@ -583,13 +682,19 @@ if __name__ == "__main__": model_string=model_string, save_trajectory=save_traj, only_n_most_recent_images=recent_imgs, - custom_model_name=custom_model_value if is_custom_model_selected else None, + custom_model_name=( + custom_model_value if is_custom_model_selected else None + ), computer_os=computer_os, computer_provider=computer_provider, computer_name=container_name, computer_api_key=cua_cloud_api_key, verbosity=logging.DEBUG, - max_trajectory_budget=max_budget_value if max_budget_value and max_budget_value > 0 else None, + max_trajectory_budget=( + max_budget_value + if max_budget_value and max_budget_value > 0 + else None + ), ) if global_agent is None: @@ -605,7 +710,7 @@ if __name__ == "__main__": # Add user message to global history global global_messages global_messages.append({"role": "user", "content": last_user_message}) - + # Stream responses from the agent async for result in global_agent.run(global_messages): global_messages += result.get("output", []) @@ -613,18 +718,20 @@ if __name__ == "__main__": # from pprint import pprint # pprint(result) # print(f"DEBUG - Agent response ------- END") - + # Process the result output for item in result.get("output", []): if item.get("type") == "message": content = item.get("content", []) for content_part in content: if content_part.get("text"): - history.append(gr.ChatMessage( - role=item.get("role", "assistant"), - content=content_part.get("text", ""), - metadata=content_part.get("metadata", {}) - )) + history.append( + gr.ChatMessage( + role=item.get("role", "assistant"), + content=content_part.get("text", ""), + metadata=content_part.get("metadata", {}), + ) + ) elif item.get("type") == "computer_call": action = item.get("action", {}) action_type = action.get("type", "") @@ -632,43 +739,52 @@ if __name__ == "__main__": action_title = f"🛠️ Performing {action_type}" if action.get("x") and action.get("y"): action_title += f" at ({action['x']}, {action['y']})" - history.append(gr.ChatMessage( - role="assistant", - content=f"```json\n{json.dumps(action)}\n```", - metadata={"title": action_title} - )) + history.append( + gr.ChatMessage( + role="assistant", + content=f"```json\n{json.dumps(action)}\n```", + metadata={"title": action_title}, + ) + ) elif item.get("type") == "function_call": function_name = item.get("name", "") arguments = item.get("arguments", "{}") - history.append(gr.ChatMessage( - role="assistant", - content=f"🔧 Calling function: {function_name}\n```json\n{arguments}\n```", - metadata={"title": f"Function Call: {function_name}"} - )) + history.append( + gr.ChatMessage( + role="assistant", + content=f"🔧 Calling function: {function_name}\n```json\n{arguments}\n```", + metadata={"title": f"Function Call: {function_name}"}, + ) + ) elif item.get("type") == "function_call_output": output = item.get("output", "") - history.append(gr.ChatMessage( - role="assistant", - content=f"📤 Function output:\n```\n{output}\n```", - metadata={"title": "Function Output"} - )) + history.append( + gr.ChatMessage( + role="assistant", + content=f"📤 Function output:\n```\n{output}\n```", + metadata={"title": "Function Output"}, + ) + ) elif item.get("type") == "computer_call_output": output = item.get("output", {}).get("image_url", "") image_markdown = f"![Computer output]({output})" - history.append(gr.ChatMessage( - role="assistant", - content=image_markdown, - metadata={"title": "🖥️ Computer Output"} - )) - + history.append( + gr.ChatMessage( + role="assistant", + content=image_markdown, + metadata={"title": "🖥️ Computer Output"}, + ) + ) + yield history - + except Exception as e: import traceback + traceback.print_exc() history.append(gr.ChatMessage(role="assistant", content=f"Error: {str(e)}")) yield history - + # Connect the submit button submit_event = msg.submit( fn=chat_submit, @@ -706,44 +822,77 @@ if __name__ == "__main__": global global_messages global_messages.clear() return None - + clear.click(clear_chat, None, chatbot_history, queue=False) - + # Connect cancel button cancel_button.click( - cancel_agent_task, - [chatbot_history], - [chatbot_history], - queue=False + cancel_agent_task, [chatbot_history], [chatbot_history], queue=False ) # Code display update function - def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, recent_images_val, save_trajectory_val, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget_val): + def update_code_display( + agent_loop, + model_choice_val, + custom_model_val, + chat_history, + recent_images_val, + save_trajectory_val, + computer_os, + computer_provider, + container_name, + cua_cloud_api_key, + max_budget_val, + ): messages = [] if chat_history: for msg in chat_history: if isinstance(msg, dict) and msg.get("role") == "user": messages.append(msg.get("content", "")) - + return generate_python_code( - agent_loop, - model_choice_val or custom_model_val or "gpt-4o", - messages, + agent_loop, + model_choice_val or custom_model_val or "gpt-4o", + messages, recent_images_val, save_trajectory_val, computer_os, computer_provider, container_name, cua_cloud_api_key, - max_budget_val + max_budget_val, ) - + # Update code display when configuration changes - for component in [agent_loop, model_choice, custom_model, chatbot_history, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget]: + for component in [ + agent_loop, + model_choice, + custom_model, + chatbot_history, + recent_images, + save_trajectory, + computer_os, + computer_provider, + container_name, + cua_cloud_api_key, + max_budget, + ]: component.change( update_code_display, - inputs=[agent_loop, model_choice, custom_model, chatbot_history, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget], - outputs=[code_display] + inputs=[ + agent_loop, + model_choice, + custom_model, + chatbot_history, + recent_images, + save_trajectory, + computer_os, + computer_provider, + container_name, + cua_cloud_api_key, + max_budget, + ], + outputs=[code_display], ) return demo diff --git a/libs/python/agent/benchmarks/README.md b/libs/python/agent/benchmarks/README.md index 03d1a789..9c7d50b3 100644 --- a/libs/python/agent/benchmarks/README.md +++ b/libs/python/agent/benchmarks/README.md @@ -5,26 +5,30 @@ This directory contains benchmarks designed to test agent providers in the Compu ## Overview The benchmark system evaluates models on GUI grounding tasks, specifically click prediction accuracy. It supports both: + - **Computer Agent SDK providers** (using model strings like `"huggingface-local/HelloKKMe/GTA1-7B"`) - **Reference agent implementations** (custom model classes implementing the `ModelProtocol`) ## Available Benchmarks ### 1. ScreenSpot-v2 (`ss-v2.py`) + - **Dataset**: ScreenSpot-v2 (click-only GUI grounding) - **Format**: Standard resolution screenshots - **Task**: Predict click coordinates given an instruction and image - **Metrics**: Accuracy, Error Rate, Timing, VRAM usage -### 2. ScreenSpot-Pro (`ss-pro.py`) +### 2. ScreenSpot-Pro (`ss-pro.py`) + - **Dataset**: ScreenSpot-Pro (high-resolution click-only GUI grounding) - **Format**: High-resolution screenshots - **Task**: Predict click coordinates given an instruction and image - **Metrics**: Accuracy, Error Rate, Timing, VRAM usage ### 3. Interactive Testing (`interactive.py`) + - **Real-time testing**: Take screenshots and visualize model predictions -- **Commands**: +- **Commands**: - Type instruction → test all models on last screenshot - `screenshot` → take screenshot - `models` → list available models @@ -34,14 +38,16 @@ The benchmark system evaluates models on GUI grounding tasks, specifically click ## Running Benchmarks ### 1. Configure Models + Edit `utils.py` to specify which models you want to test in `get_available_models()`. ### 2. Run Benchmark + ```bash # ScreenSpot-v2 benchmark python ss-v2.py --samples 50 -# ScreenSpot-Pro benchmark +# ScreenSpot-Pro benchmark python ss-pro.py --samples 50 # Interactive testing @@ -51,6 +57,7 @@ python interactive.py ## Output ### Console Output + ``` Model Results: Accuracy: 85.50% (171/200) @@ -59,10 +66,11 @@ Model Results: ``` ### Generated Files + - **Markdown Report**: `*_results.md` with detailed results tables - **Visualizations**: `output/` directory with prediction visualizations - **Interactive Output**: `interactive_output/` for interactive session results ## Contributing -To add a new reference model, follow the instructions in [contrib.md](contrib.md). \ No newline at end of file +To add a new reference model, follow the instructions in [contrib.md](contrib.md). diff --git a/libs/python/agent/benchmarks/contrib.md b/libs/python/agent/benchmarks/contrib.md index 0bef9077..a452db6b 100644 --- a/libs/python/agent/benchmarks/contrib.md +++ b/libs/python/agent/benchmarks/contrib.md @@ -17,29 +17,29 @@ class YourModelName(ModelProtocol): def __init__(self, model_path: str): self.model_path = model_path self._model = None - + @property def model_name(self) -> str: return self.model_path - + async def load_model(self) -> None: """Load the model into memory.""" # Your model loading logic here pass - + async def unload_model(self) -> None: """Unload the model from memory.""" # Your model cleanup logic here pass - + async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: """ Predict click coordinates for the given image and instruction. - + Args: image: PIL Image to analyze instruction: Text instruction describing what to click - + Returns: Tuple of (x, y) coordinates or None if prediction fails """ @@ -56,7 +56,7 @@ def get_available_models() -> List[Union[str, ModelProtocol]]: models = [ # Computer Agent SDK providers "huggingface-local/HelloKKMe/GTA1-7B", - + # Reference implementations GTA1Model("HelloKKMe/GTA1-7B"), YourModelName("path/to/your/model"), # Add your model here @@ -79,6 +79,7 @@ This will help you verify that your model loads correctly and produces reasonabl Here's a complete example of adding a hypothetical "MyVisionModel": 1. **Create `models/my_vision_model.py`:** + ```python import torch from transformers import AutoModel, AutoProcessor @@ -91,11 +92,11 @@ class MyVisionModel(ModelProtocol): self.model_path = model_path self.model = None self.processor = None - + @property def model_name(self) -> str: return f"MyVisionModel({self.model_path})" - + async def load_model(self) -> None: """Load the model and processor.""" self.processor = AutoProcessor.from_pretrained(self.model_path) @@ -104,7 +105,7 @@ class MyVisionModel(ModelProtocol): torch_dtype=torch.float16, device_map="auto" ) - + async def unload_model(self) -> None: """Clean up model resources.""" del self.model @@ -112,7 +113,7 @@ class MyVisionModel(ModelProtocol): self.model = None self.processor = None torch.cuda.empty_cache() - + async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: """Predict click coordinates.""" try: @@ -122,19 +123,19 @@ class MyVisionModel(ModelProtocol): images=image, return_tensors="pt" ) - + # Run inference with torch.no_grad(): outputs = self.model(**inputs) - + # Extract coordinates (model-specific logic) x, y = self._extract_coordinates(outputs) return (int(x), int(y)) - + except Exception as e: print(f"Prediction failed: {e}") return None - + def _extract_coordinates(self, outputs): """Extract x, y coordinates from model outputs.""" # Your model-specific coordinate extraction logic @@ -142,6 +143,7 @@ class MyVisionModel(ModelProtocol): ``` 2. **Update `models/__init__.py`:** + ```python from .gta1 import GTA1Model from .my_vision_model import MyVisionModel @@ -150,6 +152,7 @@ __all__ = ["GTA1Model", "MyVisionModel"] ``` 3. **Update `utils.py`:** + ```python from models import GTA1Model, MyVisionModel diff --git a/libs/python/agent/benchmarks/interactive.py b/libs/python/agent/benchmarks/interactive.py index 6d0aba82..45296c99 100644 --- a/libs/python/agent/benchmarks/interactive.py +++ b/libs/python/agent/benchmarks/interactive.py @@ -9,60 +9,56 @@ Models are loaded/unloaded one at a time to avoid memory issues. import asyncio import os from datetime import datetime -from typing import List, Dict, Any +from typing import Any, Dict, List from utils import ( ModelWrapper, - take_screenshot, + get_available_models, save_prediction_visualization, - get_available_models + take_screenshot, ) async def predict_with_all_models(image, instruction: str, models) -> List[Dict[str, Any]]: """ Predict click coordinates with all models sequentially. - + Args: image: PIL Image to analyze instruction: Instruction text models: List of model instances - + Returns: List of prediction results """ predictions = [] - + for model in models: model_wrapper = ModelWrapper(model) print(f"\n🔄 Loading {model_wrapper.model_name}...") - + try: # Load model await model_wrapper.load_model() - + # Predict coords = await model_wrapper.predict_click(image, instruction) - - predictions.append({ - 'model_name': model_wrapper.model_name, - 'coords': coords, - 'error': None - }) - + + predictions.append( + {"model_name": model_wrapper.model_name, "coords": coords, "error": None} + ) + if coords: print(f"✅ {model_wrapper.model_name}: ({coords[0]}, {coords[1]})") else: print(f"❌ {model_wrapper.model_name}: No prediction") - + except Exception as e: print(f"❌ {model_wrapper.model_name}: ERROR - {str(e)}") - predictions.append({ - 'model_name': model_wrapper.model_name, - 'coords': None, - 'error': str(e) - }) - + predictions.append( + {"model_name": model_wrapper.model_name, "coords": None, "error": str(e)} + ) + finally: # Always unload model to free memory try: @@ -70,7 +66,7 @@ async def predict_with_all_models(image, instruction: str, models) -> List[Dict[ print(f"🗑️ Unloaded {model_wrapper.model_name}") except Exception as e: print(f"⚠️ Error unloading {model_wrapper.model_name}: {e}") - + return predictions @@ -103,87 +99,91 @@ async def main(): Main interactive loop. """ print_header() - + # Get available models models = get_available_models() print_models(models) - + # Create output directory for visualizations output_dir = "interactive_output" os.makedirs(output_dir, exist_ok=True) - + session_count = 0 last_screenshot = None screenshot_timestamp = None - + while True: try: # Get user input print(f"\n{'='*40}") user_input = input("🎯 Enter instruction (or command): ").strip() - + if not user_input: continue - + # Handle commands - if user_input.lower() in ['quit', 'exit', 'q']: + if user_input.lower() in ["quit", "exit", "q"]: print("👋 Goodbye!") break - - elif user_input.lower() == 'models': + + elif user_input.lower() == "models": print_models(models) continue - - elif user_input.lower() == 'screenshot': + + elif user_input.lower() == "screenshot": print("📸 Taking screenshot...") try: last_screenshot = take_screenshot() screenshot_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - screenshot_path = os.path.join(output_dir, f"screenshot_{screenshot_timestamp}.png") + screenshot_path = os.path.join( + output_dir, f"screenshot_{screenshot_timestamp}.png" + ) last_screenshot.save(screenshot_path) print(f"✅ Screenshot captured and saved to: {screenshot_path}") print(f"📝 Ready for instructions! Screenshot size: {last_screenshot.size}") except Exception as e: print(f"❌ Error taking screenshot: {e}") continue - + # Handle instruction input if last_screenshot is None: - print("⚠️ No screenshot available! Please take a screenshot first using 'screenshot' command.") + print( + "⚠️ No screenshot available! Please take a screenshot first using 'screenshot' command." + ) continue - + session_count += 1 print(f"\n🎯 Session {session_count}: '{user_input}'") print(f"📷 Using screenshot from: {screenshot_timestamp}") - + # Predict with all models using last screenshot print(f"\n🤖 Testing {len(models)} models on screenshot...") predictions = await predict_with_all_models(last_screenshot, user_input, models) - + # Display results summary - print(f"\n📊 Results Summary:") + print("\n📊 Results Summary:") print("-" * 50) for pred in predictions: - if pred['coords']: + if pred["coords"]: print(f"✅ {pred['model_name']}: ({pred['coords'][0]}, {pred['coords'][1]})") - elif pred['error']: + elif pred["error"]: print(f"❌ {pred['model_name']}: ERROR - {pred['error']}") else: print(f"❌ {pred['model_name']}: No prediction") - + # Save visualization timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") vis_filename = f"session_{session_count:03d}_{timestamp}.png" vis_path = os.path.join(output_dir, vis_filename) - + try: save_prediction_visualization(last_screenshot, user_input, predictions, vis_path) print(f"\n💾 Visualization saved to: {vis_path}") except Exception as e: print(f"⚠️ Error saving visualization: {e}") - + print(f"\n✨ Session {session_count} completed!") - + except KeyboardInterrupt: print("\n\n👋 Interrupted by user. Goodbye!") break diff --git a/libs/python/agent/benchmarks/models/base.py b/libs/python/agent/benchmarks/models/base.py index 8ad100a3..aeecbba0 100644 --- a/libs/python/agent/benchmarks/models/base.py +++ b/libs/python/agent/benchmarks/models/base.py @@ -2,34 +2,37 @@ Base protocol for benchmark models. """ -from typing import Protocol, Optional, Tuple +from typing import Optional, Protocol, Tuple + from PIL import Image class ModelProtocol(Protocol): """Protocol for benchmark models that can predict click coordinates.""" - + @property def model_name(self) -> str: """Return the name of the model.""" ... - + async def load_model(self) -> None: """Load the model into memory.""" ... - + async def unload_model(self) -> None: """Unload the model from memory.""" ... - - async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: + + async def predict_click( + self, image: Image.Image, instruction: str + ) -> Optional[Tuple[int, int]]: """ Predict click coordinates for the given image and instruction. - + Args: image: PIL Image to analyze instruction: Text instruction describing what to click - + Returns: Tuple of (x, y) coordinates or None if prediction fails """ diff --git a/libs/python/agent/benchmarks/models/gta1.py b/libs/python/agent/benchmarks/models/gta1.py index a1dee599..56453184 100644 --- a/libs/python/agent/benchmarks/models/gta1.py +++ b/libs/python/agent/benchmarks/models/gta1.py @@ -2,54 +2,51 @@ GTA1 model implementation for benchmarking. """ -from typing import Optional, Tuple -from PIL import Image -import torch -import re import gc +import re +from typing import Optional, Tuple + +import torch +from PIL import Image from qwen_vl_utils import process_vision_info, smart_resize -from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor +from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration from .base import ModelProtocol class GTA1Model: """Ground truth GTA1 model implementation.""" - + def __init__(self, model_path: str = "HelloKKMe/GTA1-7B"): self.model_path = model_path self.model = None self.processor = None self.max_new_tokens = 32 - - self.system_prompt = ''' + + self.system_prompt = """ You are an expert UI element locator. Given a GUI image and a user's element description, provide the coordinates of the specified element as a single (x,y) point. The image resolution is height {height} and width {width}. For elements with area, return the center point. Output the coordinate pair exactly: (x,y) -'''.strip() - +""".strip() + @property def model_name(self) -> str: """Return the name of the model.""" return f"GTA1-{self.model_path.split('/')[-1]}" - + async def load_model(self) -> None: """Load the model into memory.""" if self.model is None: print(f"Loading GTA1 model: {self.model_path}") self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( - self.model_path, - torch_dtype=torch.bfloat16, - device_map="auto" + self.model_path, torch_dtype=torch.bfloat16, device_map="auto" ) self.processor = AutoProcessor.from_pretrained( - self.model_path, - min_pixels=3136, - max_pixels=4096 * 2160 + self.model_path, min_pixels=3136, max_pixels=4096 * 2160 ) print("GTA1 model loaded successfully") - + async def unload_model(self) -> None: """Unload the model from memory.""" if self.model is not None: @@ -62,23 +59,25 @@ Output the coordinate pair exactly: if torch.cuda.is_available(): torch.cuda.empty_cache() print("GTA1 model unloaded") - + def _extract_coordinates(self, raw_string: str) -> Tuple[int, int]: """Extract coordinates from model output.""" try: matches = re.findall(r"\((-?\d*\.?\d+),\s*(-?\d*\.?\d+)\)", raw_string) - return tuple(map(int, map(float, matches[0]))) # type: ignore + return tuple(map(int, map(float, matches[0]))) # type: ignore except: return (0, 0) - - async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: + + async def predict_click( + self, image: Image.Image, instruction: str + ) -> Optional[Tuple[int, int]]: """ Predict click coordinates for the given image and instruction. - + Args: image: PIL Image to analyze instruction: Text instruction describing what to click - + Returns: Tuple of (x, y) coordinates or None if prediction fails """ @@ -87,76 +86,73 @@ Output the coordinate pair exactly: assert self.processor is not None assert self.model is not None - + try: width, height = image.width, image.height - + # Resize image according to processor requirements resized_height, resized_width = smart_resize( image.height, image.width, - factor=self.processor.image_processor.patch_size * self.processor.image_processor.merge_size, + factor=self.processor.image_processor.patch_size + * self.processor.image_processor.merge_size, min_pixels=self.processor.image_processor.min_pixels, max_pixels=self.processor.image_processor.max_pixels, ) resized_image = image.resize((resized_width, resized_height)) scale_x, scale_y = width / resized_width, height / resized_height - + # Prepare messages system_message = { "role": "system", - "content": self.system_prompt.format(height=resized_height, width=resized_width) + "content": self.system_prompt.format(height=resized_height, width=resized_width), } - + user_message = { "role": "user", "content": [ {"type": "image", "image": resized_image}, - {"type": "text", "text": instruction} - ] + {"type": "text", "text": instruction}, + ], } - + # Process inputs - image_inputs, video_inputs = process_vision_info([system_message, user_message]) # type: ignore + image_inputs, video_inputs = process_vision_info([system_message, user_message]) # type: ignore text = self.processor.apply_chat_template( - [system_message, user_message], - tokenize=False, - add_generation_prompt=True + [system_message, user_message], tokenize=False, add_generation_prompt=True ) inputs = self.processor( - text=[text], - images=image_inputs, - videos=video_inputs, - padding=True, - return_tensors="pt" + text=[text], + images=image_inputs, + videos=video_inputs, + padding=True, + return_tensors="pt", ) inputs = inputs.to(self.model.device) - + # Generate prediction output_ids = self.model.generate( - **inputs, - max_new_tokens=self.max_new_tokens, - do_sample=False, - temperature=1.0, - use_cache=True + **inputs, + max_new_tokens=self.max_new_tokens, + do_sample=False, + temperature=1.0, + use_cache=True, ) generated_ids = [ - output_ids[len(input_ids):] + output_ids[len(input_ids) :] for input_ids, output_ids in zip(inputs.input_ids, output_ids) ] output_text = self.processor.batch_decode( - generated_ids, - skip_special_tokens=True, - clean_up_tokenization_spaces=True + generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True )[0] - + # Extract and rescale coordinates pred_x, pred_y = self._extract_coordinates(output_text) pred_x = int(pred_x * scale_x) pred_y = int(pred_y * scale_y) - + return (pred_x, pred_y) - + except Exception as e: print(f"Error in GTA1 prediction: {e}") return None diff --git a/libs/python/agent/benchmarks/ss-pro.py b/libs/python/agent/benchmarks/ss-pro.py index 80e5e72f..a086819a 100644 --- a/libs/python/agent/benchmarks/ss-pro.py +++ b/libs/python/agent/benchmarks/ss-pro.py @@ -15,103 +15,106 @@ from typing import Optional from datasets import load_dataset from tqdm import tqdm - from utils import ( - ModelWrapper, - is_click_in_bbox, - save_results_to_markdown, - save_visualizations, + ModelWrapper, get_available_models, - get_gpu_memory + get_gpu_memory, + is_click_in_bbox, + save_results_to_markdown, + save_visualizations, ) -async def evaluate_model(model_wrapper: ModelWrapper, dataset, max_samples: Optional[int] = None) -> dict: +async def evaluate_model( + model_wrapper: ModelWrapper, dataset, max_samples: Optional[int] = None +) -> dict: """ Evaluate a model on the ScreenSpot-Pro dataset. - + Args: model_wrapper: ModelWrapper instance dataset: ScreenSpot-Pro dataset (list of samples) max_samples: Maximum number of samples to evaluate (None for all) - + Returns: Dictionary with evaluation results """ print(f"\nEvaluating model: {model_wrapper.model_name}") - + # Load model await model_wrapper.load_model() - + total_samples = len(dataset) if max_samples is not None: total_samples = min(max_samples, total_samples) - + correct_predictions = 0 error_predictions = 0 results = [] - + for i in tqdm(range(total_samples), desc=f"Evaluating {model_wrapper.model_name}"): sample = dataset[i] - + # Extract sample data - image = sample['image'] - instruction = sample['instruction'] - bbox = sample['bbox'] # [x1, y1, x2, y2] - sample_id = sample['img_filename'] - + image = sample["image"] + instruction = sample["instruction"] + bbox = sample["bbox"] # [x1, y1, x2, y2] + sample_id = sample["img_filename"] + # Predict click coordinates with timing start_time = time.time() click_coords = await model_wrapper.predict_click(image, instruction) prediction_time = time.time() - start_time - + # Check if prediction is correct is_correct = is_click_in_bbox(click_coords, bbox) - + if is_correct: correct_predictions += 1 - - results.append({ - 'id': sample_id, - 'instruction': instruction, - 'bbox': bbox, - 'predicted_coords': click_coords, - 'is_correct': is_correct, - 'failed': False, - 'prediction_time': prediction_time - }) - + + results.append( + { + "id": sample_id, + "instruction": instruction, + "bbox": bbox, + "predicted_coords": click_coords, + "is_correct": is_correct, + "failed": False, + "prediction_time": prediction_time, + } + ) + # Unload model await model_wrapper.unload_model() - + # Calculate metrics accuracy = correct_predictions / total_samples if total_samples > 0 else 0.0 error_rate = error_predictions / total_samples if total_samples > 0 else 0.0 - + # Calculate timing statistics - successful_times = [r['prediction_time'] for r in results if not r['failed']] + successful_times = [r["prediction_time"] for r in results if not r["failed"]] avg_prediction_time = sum(successful_times) / len(successful_times) if successful_times else 0.0 median_prediction_time = statistics.median(successful_times) if successful_times else 0.0 min_prediction_time = min(successful_times) if successful_times else 0.0 max_prediction_time = max(successful_times) if successful_times else 0.0 - + # Get VRAM statistics vram_stats = model_wrapper.get_vram_stats() - + return { - 'model_name': model_wrapper.model_name, - 'total_samples': total_samples, - 'correct_predictions': correct_predictions, - 'failed_predictions': error_predictions, - 'accuracy': accuracy, - 'failure_rate': error_rate, - 'avg_prediction_time': avg_prediction_time, - 'median_prediction_time': median_prediction_time, - 'min_prediction_time': min_prediction_time, - 'max_prediction_time': max_prediction_time, - 'vram_max_mb': vram_stats['max_mb'], - 'vram_avg_mb': vram_stats['avg_mb'], - 'results': results + "model_name": model_wrapper.model_name, + "total_samples": total_samples, + "correct_predictions": correct_predictions, + "failed_predictions": error_predictions, + "accuracy": accuracy, + "failure_rate": error_rate, + "avg_prediction_time": avg_prediction_time, + "median_prediction_time": median_prediction_time, + "min_prediction_time": min_prediction_time, + "max_prediction_time": max_prediction_time, + "vram_max_mb": vram_stats["max_mb"], + "vram_avg_mb": vram_stats["avg_mb"], + "results": results, } @@ -120,42 +123,44 @@ async def main(): Main function to run the benchmark. """ # Parse command line arguments - parser = argparse.ArgumentParser(description='ScreenSpot-Pro Benchmark Script') - parser.add_argument('--samples', type=int, default=300, - help='Number of samples to evaluate (default: 300)') - parser.add_argument('--seed', type=int, default=42, - help='Random seed for shuffling (default: 42)') + parser = argparse.ArgumentParser(description="ScreenSpot-Pro Benchmark Script") + parser.add_argument( + "--samples", type=int, default=300, help="Number of samples to evaluate (default: 300)" + ) + parser.add_argument( + "--seed", type=int, default=42, help="Random seed for shuffling (default: 42)" + ) args = parser.parse_args() - + # Set random seed random.seed(args.seed) - + # Load dataset print("Loading ScreenSpot-Pro dataset...") ds = load_dataset("lmms-lab/ScreenSpot-Pro") - dataset = ds['train'] # type: ignore + dataset = ds["train"] # type: ignore # Convert to list to support indexing dataset_list = list(dataset) print(f"Dataset loaded: {len(dataset_list)} samples") - + # Shuffle dataset with seed random.shuffle(dataset_list) print(f"Dataset shuffled with seed {args.seed}") - + # Get available models models = get_available_models() - + # Evaluation settings max_samples = args.samples # Use command line argument - + # Run evaluations all_results = [] - + for model in models: model_wrapper = ModelWrapper(model) result = await evaluate_model(model_wrapper, dataset_list, max_samples) all_results.append(result) - + # Print summary print(f"\n{result['model_name']} Results:") print(f" Accuracy: {result['accuracy']*100:.2f}%") @@ -164,15 +169,17 @@ async def main(): print(f" Error Rate: {result['failure_rate']*100:.2f}%") print(f" Avg Time: {result['avg_prediction_time']:.2f}s") print(f" Median Time: {result['median_prediction_time']:.2f}s") - print(f" Time Range: {result['min_prediction_time']:.2f}s - {result['max_prediction_time']:.2f}s") + print( + f" Time Range: {result['min_prediction_time']:.2f}s - {result['max_prediction_time']:.2f}s" + ) print(f" VRAM Max: {result['vram_max_mb']:.1f}MB") print(f" VRAM Avg: {result['vram_avg_mb']:.1f}MB") - + # Print GPU memory info gpu_memory = get_gpu_memory() if gpu_memory and gpu_memory[0] > 0: print(f" GPU Free Memory: {gpu_memory[0]:.1f}MB") - + # Save results if all_results: save_results_to_markdown(all_results) @@ -183,4 +190,4 @@ async def main(): if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) diff --git a/libs/python/agent/benchmarks/ss-v2.py b/libs/python/agent/benchmarks/ss-v2.py index dab1d4b1..8af8e8e0 100644 --- a/libs/python/agent/benchmarks/ss-v2.py +++ b/libs/python/agent/benchmarks/ss-v2.py @@ -15,36 +15,37 @@ from typing import Optional from datasets import load_dataset from tqdm import tqdm - from utils import ( - ModelWrapper, - is_click_in_bbox, - save_results_to_markdown, - save_visualizations, + ModelWrapper, get_available_models, - get_gpu_memory + get_gpu_memory, + is_click_in_bbox, + save_results_to_markdown, + save_visualizations, ) -async def evaluate_model(model_wrapper: ModelWrapper, samples, max_samples: Optional[int] = None) -> dict: +async def evaluate_model( + model_wrapper: ModelWrapper, samples, max_samples: Optional[int] = None +) -> dict: """ Evaluate a model on any iterable of samples. - + Args: model_wrapper: ModelWrapper instance samples: Iterable of dicts with keys: image, bbox, instruction max_samples: Maximum number of samples to evaluate (None for all) - + Returns: Dictionary with evaluation results """ print(f"\nEvaluating model: {model_wrapper.model_name}") - + # Load model await model_wrapper.load_model() - + # Convert to list if needed and limit samples - if hasattr(samples, '__len__'): + if hasattr(samples, "__len__"): total_samples = len(samples) if max_samples is not None: total_samples = min(max_samples, total_samples) @@ -55,69 +56,71 @@ async def evaluate_model(model_wrapper: ModelWrapper, samples, max_samples: Opti if max_samples is not None: sample_list = sample_list[:max_samples] total_samples = len(sample_list) - + correct_predictions = 0 error_predictions = 0 results = [] - + for i, sample in enumerate(tqdm(sample_list, desc=f"Evaluating {model_wrapper.model_name}")): # Extract required data (only these 3 keys matter) - image = sample['image'] - instruction = sample['instruction'] - bbox = sample['bbox'] # [x1, y1, x2, y2] - + image = sample["image"] + instruction = sample["instruction"] + bbox = sample["bbox"] # [x1, y1, x2, y2] + # Predict click coordinates with timing start_time = time.time() click_coords = await model_wrapper.predict_click(image, instruction) prediction_time = time.time() - start_time - + # Check if prediction is correct is_correct = is_click_in_bbox(click_coords, bbox) - + if is_correct: correct_predictions += 1 - - results.append({ - 'sample_idx': i, - 'instruction': instruction, - 'bbox': bbox, - 'predicted_coords': click_coords, - 'is_correct': is_correct, - 'failed': False, - 'prediction_time': prediction_time - }) - + + results.append( + { + "sample_idx": i, + "instruction": instruction, + "bbox": bbox, + "predicted_coords": click_coords, + "is_correct": is_correct, + "failed": False, + "prediction_time": prediction_time, + } + ) + # Unload model await model_wrapper.unload_model() - + # Calculate metrics accuracy = correct_predictions / total_samples if total_samples > 0 else 0.0 error_rate = error_predictions / total_samples if total_samples > 0 else 0.0 - + # Calculate timing statistics - successful_times = [r['prediction_time'] for r in results if not r['failed']] + successful_times = [r["prediction_time"] for r in results if not r["failed"]] avg_prediction_time = sum(successful_times) / len(successful_times) if successful_times else 0.0 median_prediction_time = statistics.median(successful_times) if successful_times else 0.0 min_prediction_time = min(successful_times) if successful_times else 0.0 max_prediction_time = max(successful_times) if successful_times else 0.0 - + # Get VRAM statistics vram_stats = model_wrapper.get_vram_stats() - + return { - 'model_name': model_wrapper.model_name, - 'total_samples': total_samples, - 'correct_predictions': correct_predictions, - 'failed_predictions': error_predictions, - 'accuracy': accuracy, - 'failure_rate': error_rate, - 'avg_prediction_time': avg_prediction_time, - 'median_prediction_time': median_prediction_time, - 'min_prediction_time': min_prediction_time, - 'max_prediction_time': max_prediction_time, - 'vram_max_mb': vram_stats['max_mb'], - 'vram_avg_mb': vram_stats['avg_mb'], - 'results': results + "model_name": model_wrapper.model_name, + "total_samples": total_samples, + "correct_predictions": correct_predictions, + "failed_predictions": error_predictions, + "accuracy": accuracy, + "failure_rate": error_rate, + "avg_prediction_time": avg_prediction_time, + "median_prediction_time": median_prediction_time, + "min_prediction_time": min_prediction_time, + "max_prediction_time": max_prediction_time, + "vram_max_mb": vram_stats["max_mb"], + "vram_avg_mb": vram_stats["avg_mb"], + "results": results, } @@ -126,56 +129,60 @@ async def main(): Main function to run the benchmark. """ # Parse command line arguments - parser = argparse.ArgumentParser(description='ScreenSpot-v2 Benchmark Script') - parser.add_argument('--samples', type=int, default=500, - help='Number of samples to evaluate (default: 500)') - parser.add_argument('--seed', type=int, default=42, - help='Random seed for shuffling (default: 42)') + parser = argparse.ArgumentParser(description="ScreenSpot-v2 Benchmark Script") + parser.add_argument( + "--samples", type=int, default=500, help="Number of samples to evaluate (default: 500)" + ) + parser.add_argument( + "--seed", type=int, default=42, help="Random seed for shuffling (default: 42)" + ) args = parser.parse_args() - + # Set random seed random.seed(args.seed) - + # Load dataset print("Loading ScreenSpot-v2 dataset...") ds = load_dataset("lmms-lab/ScreenSpot-v2") - dataset = ds['train'] # type: ignore + dataset = ds["train"] # type: ignore # Convert to simple list of dicts with only required keys samples = [] for item in dataset: # Convert dataset item to dict if needed - item_dict = dict(item) if hasattr(item, 'keys') else item - + item_dict = dict(item) if hasattr(item, "keys") else item + # Convert ScreenSpot-v2 bbox format [x, y, w, h] to [x1, y1, x2, y2] - bbox_xywh = item_dict['bbox'] # type: ignore + bbox_xywh = item_dict["bbox"] # type: ignore x, y, w, h = bbox_xywh bbox_xyxy = [x, y, x + w, y + h] - - samples.append({ - 'image': item_dict['image'], # type: ignore - 'instruction': item_dict['instruction'], # type: ignore - 'bbox': bbox_xyxy - }) + + samples.append( + { + "image": item_dict["image"], # type: ignore + "instruction": item_dict["instruction"], # type: ignore + "bbox": bbox_xyxy, + } + ) print(f"Dataset loaded: {len(samples)} samples") - + # Shuffle samples with seed random.shuffle(samples) print(f"Samples shuffled with seed {args.seed}") - + # Get available models models = get_available_models() - + # Evaluation settings max_samples = args.samples # Use command line argument - + # Run evaluations all_results = [] - + for model in models: model_wrapper = ModelWrapper(model) result = await evaluate_model(model_wrapper, samples, max_samples) all_results.append(result) - + # Print summary print(f"\n{result['model_name']} Results:") print(f" Accuracy: {result['accuracy']*100:.2f}%") @@ -184,18 +191,22 @@ async def main(): print(f" Error Rate: {result['failure_rate']*100:.2f}%") print(f" Avg Time: {result['avg_prediction_time']:.2f}s") print(f" Median Time: {result['median_prediction_time']:.2f}s") - print(f" Time Range: {result['min_prediction_time']:.2f}s - {result['max_prediction_time']:.2f}s") + print( + f" Time Range: {result['min_prediction_time']:.2f}s - {result['max_prediction_time']:.2f}s" + ) print(f" VRAM Max: {result['vram_max_mb']:.1f}MB") print(f" VRAM Avg: {result['vram_avg_mb']:.1f}MB") - + # Print GPU memory info gpu_memory = get_gpu_memory() if gpu_memory and gpu_memory[0] > 0: print(f" GPU Free Memory: {gpu_memory[0]:.1f}MB") - + # Save results if all_results: - save_results_to_markdown(all_results, "screenspot_v2_results.md", title="ScreenSpot-v2 Benchmark Results") + save_results_to_markdown( + all_results, "screenspot_v2_results.md", title="ScreenSpot-v2 Benchmark Results" + ) save_visualizations(all_results, samples) print("\nBenchmark completed successfully!") else: @@ -203,4 +214,4 @@ async def main(): if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) diff --git a/libs/python/agent/benchmarks/utils.py b/libs/python/agent/benchmarks/utils.py index d7ef4445..7c32b1c1 100644 --- a/libs/python/agent/benchmarks/utils.py +++ b/libs/python/agent/benchmarks/utils.py @@ -4,38 +4,40 @@ Shared utilities for ScreenSpot-Pro benchmarking and interactive testing. """ import dotenv + dotenv.load_dotenv() import asyncio import base64 +import gc import os -import sys -import subprocess as sp import statistics +import subprocess as sp +import sys from datetime import datetime from io import BytesIO -from typing import List, Union, Tuple, Optional +from typing import List, Optional, Tuple, Union +import torch from PIL import Image, ImageDraw from tqdm import tqdm -import gc -import torch # Add parent directory to path for imports -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) from agent.agent import ComputerAgent from models.base import ModelProtocol + def get_gpu_memory() -> List[int]: """ Get GPU memory usage using nvidia-smi. - + Returns: List of free memory values in MB for each GPU """ try: command = "nvidia-smi --query-gpu=memory.free --format=csv" - memory_free_info = sp.check_output(command.split()).decode('ascii').split('\n')[:-1][1:] + memory_free_info = sp.check_output(command.split()).decode("ascii").split("\n")[:-1][1:] memory_free_values = [int(x.split()[0]) for i, x in enumerate(memory_free_info)] return memory_free_values except (sp.CalledProcessError, FileNotFoundError, IndexError): @@ -51,39 +53,34 @@ def get_gpu_memory() -> List[int]: def get_vram_usage() -> dict: """ Get current VRAM usage statistics. - + Returns: Dictionary with VRAM usage info (in MB) """ if torch.cuda.is_available(): device = torch.cuda.current_device() allocated = torch.cuda.memory_allocated(device) / 1024 / 1024 # Convert to MB - reserved = torch.cuda.memory_reserved(device) / 1024 / 1024 # Convert to MB + reserved = torch.cuda.memory_reserved(device) / 1024 / 1024 # Convert to MB total = torch.cuda.get_device_properties(device).total_memory / 1024 / 1024 return { - 'allocated_mb': allocated, - 'reserved_mb': reserved, - 'total_mb': total, - 'free_mb': total - reserved + "allocated_mb": allocated, + "reserved_mb": reserved, + "total_mb": total, + "free_mb": total - reserved, } else: - return { - 'allocated_mb': 0.0, - 'reserved_mb': 0.0, - 'total_mb': 0.0, - 'free_mb': 0.0 - } + return {"allocated_mb": 0.0, "reserved_mb": 0.0, "total_mb": 0.0, "free_mb": 0.0} def get_available_models() -> List[Union[str, ModelProtocol]]: """ Get list of available models for testing. - + Returns: List of model strings and model classes """ local_provider = "huggingface-local/" # Options: huggingface-local/ or mlx/ - + # from models.gta1 import GTA1Model models = [ @@ -94,42 +91,41 @@ def get_available_models() -> List[Union[str, ModelProtocol]]: # f"{local_provider}HelloKKMe/GTA1-32B", "openai/computer-use-preview+openai/gpt-4o-mini", "anthropic/claude-opus-4-20250514+openai/gpt-4o-mini", - # === Reference model classes === # GTA1Model("HelloKKMe/GTA1-7B"), - # GTA1Model("HelloKKMe/GTA1-32B"), + # GTA1Model("HelloKKMe/GTA1-32B"), ] - + return models def is_click_in_bbox(click_coords: Optional[Tuple[int, int]], bbox: List[int]) -> bool: """ Check if click coordinates are within the bounding box. - + Args: click_coords: (x, y) coordinates or None bbox: [x1, y1, x2, y2] bounding box - + Returns: True if click is within bbox, False otherwise """ if click_coords is None: return False - + x, y = click_coords x1, y1, x2, y2 = bbox - + return x1 <= x <= x2 and y1 <= y <= y2 def image_to_base64(image: Image.Image) -> str: """ Convert PIL Image to base64 string. - + Args: image: PIL Image - + Returns: Base64 encoded image string """ @@ -142,213 +138,252 @@ class ModelWrapper: """ Wrapper to provide unified interface for both ComputerAgent and custom models. """ - + def __init__(self, model: Union[str, ModelProtocol]): self.model = model self.is_computer_agent = isinstance(model, str) self.agent: Optional[ComputerAgent] = None self.vram_usage_history: List[float] = [] # Track VRAM usage over time - + if self.is_computer_agent: self.model_name = str(model) else: - self.model_name = f"{model.__class__.__name__}('{getattr(model, 'model_name', 'unknown')}')" - + self.model_name = ( + f"{model.__class__.__name__}('{getattr(model, 'model_name', 'unknown')}')" + ) + async def load_model(self) -> None: """Load the model.""" if self.is_computer_agent: self.agent = ComputerAgent(model=str(self.model)) else: - await self.model.load_model() # type: ignore - + await self.model.load_model() # type: ignore + # Record initial VRAM usage after loading vram_info = get_vram_usage() - self.vram_usage_history.append(vram_info['allocated_mb']) - + self.vram_usage_history.append(vram_info["allocated_mb"]) + async def unload_model(self) -> None: """Unload the model.""" if not self.is_computer_agent: - await self.model.unload_model() # type: ignore + await self.model.unload_model() # type: ignore else: del self.agent self.agent = None gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() - + # Record VRAM usage after unloading vram_info = get_vram_usage() - self.vram_usage_history.append(vram_info['allocated_mb']) - + self.vram_usage_history.append(vram_info["allocated_mb"]) + def get_vram_stats(self) -> dict: """Get VRAM usage statistics for this model.""" if not self.vram_usage_history: - return {'max_mb': 0.0, 'avg_mb': 0.0} - + return {"max_mb": 0.0, "avg_mb": 0.0} + return { - 'max_mb': max(self.vram_usage_history), - 'avg_mb': sum(self.vram_usage_history) / len(self.vram_usage_history) + "max_mb": max(self.vram_usage_history), + "avg_mb": sum(self.vram_usage_history) / len(self.vram_usage_history), } - - async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: + async def predict_click( + self, image: Image.Image, instruction: str + ) -> Optional[Tuple[int, int]]: """Predict click coordinates.""" # Record VRAM usage before prediction vram_info = get_vram_usage() - self.vram_usage_history.append(vram_info['allocated_mb']) - + self.vram_usage_history.append(vram_info["allocated_mb"]) + if self.is_computer_agent: if self.agent is None: await self.load_model() - + if self.agent is not None: image_b64 = image_to_base64(image) - result = await self.agent.predict_click(instruction=instruction, image_b64=image_b64) - + result = await self.agent.predict_click( + instruction=instruction, image_b64=image_b64 + ) + # Record VRAM usage after prediction vram_info = get_vram_usage() - self.vram_usage_history.append(vram_info['allocated_mb']) - + self.vram_usage_history.append(vram_info["allocated_mb"]) + return result return None else: - result = await self.model.predict_click(image, instruction) # type: ignore - + result = await self.model.predict_click(image, instruction) # type: ignore + # Record VRAM usage after prediction vram_info = get_vram_usage() - self.vram_usage_history.append(vram_info['allocated_mb']) - + self.vram_usage_history.append(vram_info["allocated_mb"]) + return result -def save_results_to_markdown(all_results: List[dict],output_file: str = "screenspot_pro_results.md", title: str = "ScreenSpot-Pro Benchmark Results") -> None: +def save_results_to_markdown( + all_results: List[dict], + output_file: str = "screenspot_pro_results.md", + title: str = "ScreenSpot-Pro Benchmark Results", +) -> None: """ Save evaluation results to a markdown table. - + Args: all_results: List of evaluation results for each model output_file: Output markdown file path """ - with open(output_file, 'w', encoding='utf-8') as f: + with open(output_file, "w", encoding="utf-8") as f: f.write(f"# {title}\n\n") f.write(f"**Evaluation Date:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") - + # Summary table f.write("## Summary\n\n") - f.write("| Model | Total Samples | Correct | Errors | Accuracy | Error Rate | Avg Time (s) | Median Time (s) | Time Range (s) | VRAM Max (GB) | VRAM Avg (GB) |\n") - f.write("|-------|---------------|---------|--------|----------|------------|--------------|-----------------|----------------|---------------|---------------|\n") - + f.write( + "| Model | Total Samples | Correct | Errors | Accuracy | Error Rate | Avg Time (s) | Median Time (s) | Time Range (s) | VRAM Max (GB) | VRAM Avg (GB) |\n" + ) + f.write( + "|-------|---------------|---------|--------|----------|------------|--------------|-----------------|----------------|---------------|---------------|\n" + ) + for result in all_results: - model_name = result['model_name'] - total = result['total_samples'] - correct = result['correct_predictions'] - errors = result['failed_predictions'] - accuracy = result['accuracy'] * 100 - error_rate = result['failure_rate'] * 100 - avg_time = result.get('avg_prediction_time', 0.0) - median_time = result.get('median_prediction_time', 0.0) - min_time = result.get('min_prediction_time', 0.0) - max_time = result.get('max_prediction_time', 0.0) + model_name = result["model_name"] + total = result["total_samples"] + correct = result["correct_predictions"] + errors = result["failed_predictions"] + accuracy = result["accuracy"] * 100 + error_rate = result["failure_rate"] * 100 + avg_time = result.get("avg_prediction_time", 0.0) + median_time = result.get("median_prediction_time", 0.0) + min_time = result.get("min_prediction_time", 0.0) + max_time = result.get("max_prediction_time", 0.0) time_range = f"{min_time:.2f} - {max_time:.2f}" - vram_max = result.get('vram_max_mb', 0.0) / 1024 - vram_avg = result.get('vram_avg_mb', 0.0) / 1024 - - f.write(f"| {model_name} | {total} | {correct} | {errors} | {accuracy:.2f}% | {error_rate:.2f}% | {avg_time:.2f} | {median_time:.2f} | {time_range} | {vram_max:.1f} | {vram_avg:.1f} |\n") - + vram_max = result.get("vram_max_mb", 0.0) / 1024 + vram_avg = result.get("vram_avg_mb", 0.0) / 1024 + + f.write( + f"| {model_name} | {total} | {correct} | {errors} | {accuracy:.2f}% | {error_rate:.2f}% | {avg_time:.2f} | {median_time:.2f} | {time_range} | {vram_max:.1f} | {vram_avg:.1f} |\n" + ) + # Detailed results for each model for result in all_results: f.write(f"\n## {result['model_name']} - Detailed Results\n\n") - f.write("| Sample Index | Instruction | BBox | Predicted | Correct | Error | Time (s) |\n") + f.write( + "| Sample Index | Instruction | BBox | Predicted | Correct | Error | Time (s) |\n" + ) f.write("|-----------|-------------|------|-----------|---------|-------|----------|\n") - - for sample_result in result['results'][:10]: # Show first 10 samples - sample_idx = sample_result['sample_idx'] - instruction = sample_result['instruction'][:50] + "..." if len(sample_result['instruction']) > 50 else sample_result['instruction'] - bbox = str(sample_result['bbox']) - predicted = str(sample_result['predicted_coords']) if sample_result['predicted_coords'] else "None" - correct = "PASS" if sample_result['is_correct'] else "FAIL" - error = "YES" if sample_result['failed'] else "NO" - pred_time = sample_result.get('prediction_time', 0.0) - - f.write(f"| {sample_idx} | {instruction} | {bbox} | {predicted} | {correct} | {error} | {pred_time:.2f} |\n") - - if len(result['results']) > 10: + + for sample_result in result["results"][:10]: # Show first 10 samples + sample_idx = sample_result["sample_idx"] + instruction = ( + sample_result["instruction"][:50] + "..." + if len(sample_result["instruction"]) > 50 + else sample_result["instruction"] + ) + bbox = str(sample_result["bbox"]) + predicted = ( + str(sample_result["predicted_coords"]) + if sample_result["predicted_coords"] + else "None" + ) + correct = "PASS" if sample_result["is_correct"] else "FAIL" + error = "YES" if sample_result["failed"] else "NO" + pred_time = sample_result.get("prediction_time", 0.0) + + f.write( + f"| {sample_idx} | {instruction} | {bbox} | {predicted} | {correct} | {error} | {pred_time:.2f} |\n" + ) + + if len(result["results"]) > 10: f.write(f"\n*Showing first 10 of {len(result['results'])} samples*\n") - + print(f"\nResults saved to: {output_file}") def save_visualizations(all_results: List[dict], samples, output_dir: str = "output") -> None: """ Save visualizations of predicted coordinates vs bboxes to an output folder. - + Args: all_results: List of evaluation results for each model samples: List of sample dicts with image, bbox, instruction keys output_dir: Output directory path """ os.makedirs(output_dir, exist_ok=True) - + for result in all_results: - model_name = result['model_name'].replace('/', '_').replace('\\', '_') + model_name = result["model_name"].replace("/", "_").replace("\\", "_") model_dir = os.path.join(output_dir, model_name) os.makedirs(model_dir, exist_ok=True) - + print(f"Saving visualizations for {result['model_name']}...") - + # Save first 10 samples for visualization - for i, sample_result in enumerate(tqdm(result['results'][:10], desc=f"Saving {model_name} visualizations")): + for i, sample_result in enumerate( + tqdm(result["results"][:10], desc=f"Saving {model_name} visualizations") + ): # Get sample data using index - sample_idx = sample_result['sample_idx'] - + sample_idx = sample_result["sample_idx"] + if sample_idx < len(samples): sample = samples[sample_idx] - image = sample['image'].copy() # Make a copy to avoid modifying original + image = sample["image"].copy() # Make a copy to avoid modifying original else: print(f"Warning: Could not find sample at index {sample_idx}") continue - - bbox = sample_result['bbox'] - predicted_coords = sample_result['predicted_coords'] - is_correct = sample_result['is_correct'] - + + bbox = sample_result["bbox"] + predicted_coords = sample_result["predicted_coords"] + is_correct = sample_result["is_correct"] + # Draw on image draw = ImageDraw.Draw(image) - + # Draw bounding box (ground truth) in green x1, y1, x2, y2 = bbox draw.rectangle([x1, y1, x2, y2], outline="green", width=3) - draw.text((x1, y1-20), "Ground Truth", fill="green") - + draw.text((x1, y1 - 20), "Ground Truth", fill="green") + # Draw predicted click in red or blue if predicted_coords is not None: px, py = predicted_coords color = "blue" if is_correct else "red" # Draw crosshair crosshair_size = 15 - draw.line([(px-crosshair_size, py), (px+crosshair_size, py)], fill=color, width=3) - draw.line([(px, py-crosshair_size), (px, py+crosshair_size)], fill=color, width=3) - draw.text((px+10, py-20), f"Predicted ({px},{py})", fill=color) - + draw.line( + [(px - crosshair_size, py), (px + crosshair_size, py)], fill=color, width=3 + ) + draw.line( + [(px, py - crosshair_size), (px, py + crosshair_size)], fill=color, width=3 + ) + draw.text((px + 10, py - 20), f"Predicted ({px},{py})", fill=color) + # Add status text status = "CORRECT" if is_correct else "INCORRECT" status_color = "blue" if is_correct else "red" draw.text((10, 10), f"Status: {status}", fill=status_color) - draw.text((10, 30), f"Instruction: {sample_result['instruction'][:50]}...", fill="black") - + draw.text( + (10, 30), f"Instruction: {sample_result['instruction'][:50]}...", fill="black" + ) + # Save image filename = f"sample_{i+1:02d}_idx{sample_idx}_{status.lower()}.png" filepath = os.path.join(model_dir, filename) image.save(filepath) - + print(f"Visualizations saved to: {model_dir}") -def save_prediction_visualization(image: Image.Image, instruction: str, predictions: List[dict], - output_file: str = "interactive_prediction.png") -> None: +def save_prediction_visualization( + image: Image.Image, + instruction: str, + predictions: List[dict], + output_file: str = "interactive_prediction.png", +) -> None: """ Save visualization of multiple model predictions on a single image. - + Args: image: PIL Image to visualize instruction: Instruction text @@ -358,32 +393,32 @@ def save_prediction_visualization(image: Image.Image, instruction: str, predicti # Create a copy of the image vis_image = image.copy() draw = ImageDraw.Draw(vis_image) - + # Colors for different models colors = ["red", "blue", "orange", "purple", "brown", "pink", "gray", "olive"] - + # Draw predictions for i, pred in enumerate(predictions): color = colors[i % len(colors)] - model_name = pred['model_name'] - coords = pred.get('coords') - error = pred.get('error') - + model_name = pred["model_name"] + coords = pred.get("coords") + error = pred.get("error") + if coords is not None: px, py = coords # Draw crosshair crosshair_size = 20 - draw.line([(px-crosshair_size, py), (px+crosshair_size, py)], fill=color, width=4) - draw.line([(px, py-crosshair_size), (px, py+crosshair_size)], fill=color, width=4) + draw.line([(px - crosshair_size, py), (px + crosshair_size, py)], fill=color, width=4) + draw.line([(px, py - crosshair_size), (px, py + crosshair_size)], fill=color, width=4) # Draw model name - draw.text((px+15, py+15), f"{model_name}: ({px},{py})", fill=color) + draw.text((px + 15, py + 15), f"{model_name}: ({px},{py})", fill=color) else: # Draw error text - draw.text((10, 50 + i*20), f"{model_name}: ERROR - {error}", fill=color) - + draw.text((10, 50 + i * 20), f"{model_name}: ERROR - {error}", fill=color) + # Add instruction at the top draw.text((10, 10), f"Instruction: {instruction}", fill="black") - + # Save image vis_image.save(output_file) print(f"Prediction visualization saved to: {output_file}") @@ -392,12 +427,13 @@ def save_prediction_visualization(image: Image.Image, instruction: str, predicti def take_screenshot() -> Image.Image: """ Take a screenshot of the current screen. - + Returns: PIL Image of the screenshot """ try: import pyautogui + screenshot = pyautogui.screenshot() return screenshot except ImportError: @@ -406,4 +442,3 @@ def take_screenshot() -> Image.Image: except Exception as e: print(f"Error taking screenshot: {e}") raise - diff --git a/libs/python/agent/example.py b/libs/python/agent/example.py index fc4dc1b1..b02ccbfd 100644 --- a/libs/python/agent/example.py +++ b/libs/python/agent/example.py @@ -9,58 +9,61 @@ from agent import ComputerAgent from computer import Computer from computer.helpers import sandboxed + @sandboxed() def read_file(location: str) -> str: """Read contents of a file - + Parameters ---------- location : str Path to the file to read - + Returns ------- str Contents of the file or error message """ try: - with open(location, 'r') as f: + with open(location, "r") as f: return f.read() except Exception as e: return f"Error reading file: {str(e)}" + def save_note(content: str, filename: str = "note.txt") -> str: """Save content to a note file - + Parameters ---------- content : str Content to save to the file filename : str, optional Name of the file to save to (default is "note.txt") - + Returns ------- str Success or error message """ try: - with open(filename, 'w') as f: + with open(filename, "w") as f: f.write(content) return f"Saved note to {filename}" except Exception as e: return f"Error saving note: {str(e)}" + def calculate(a: int, b: int) -> int: """Calculate the sum of two integers - + Parameters ---------- a : int First integer b : int Second integer - + Returns ------- int @@ -68,15 +71,18 @@ def calculate(a: int, b: int) -> int: """ return a + b + async def main(): """Example usage of ComputerAgent with different models""" - + # Example 1: Using Claude with computer and custom tools print("=== Example 1: Claude with Computer ===") - - import os - import dotenv + import json + import os + + import dotenv + dotenv.load_dotenv() assert os.getenv("CUA_CONTAINER_NAME") is not None, "CUA_CONTAINER_NAME is not set" @@ -86,38 +92,37 @@ async def main(): os_type="linux", provider_type="cloud", name=os.getenv("CUA_CONTAINER_NAME") or "", - api_key=os.getenv("CUA_API_KEY") or "" + api_key=os.getenv("CUA_API_KEY") or "", ) as computer: agent = ComputerAgent( # Supported models: - # == OpenAI CUA (computer-use-preview) == model="openai/computer-use-preview", - # == Anthropic CUA (Claude > 3.5) == - # model="anthropic/claude-opus-4-20250514", + # model="anthropic/claude-opus-4-20250514", # model="anthropic/claude-sonnet-4-20250514", # model="anthropic/claude-3-7-sonnet-20250219", # model="anthropic/claude-3-5-sonnet-20241022", - # == UI-TARS == # model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", # TODO: add local mlx provider # model="mlx-community/UI-TARS-1.5-7B-6bit", # model="ollama_chat/0000/ui-tars-1.5-7b", - # == Omniparser + Any LLM == # model="omniparser+..." # model="omniparser+anthropic/claude-opus-4-20250514", - tools=[computer], only_n_most_recent_images=3, verbosity=logging.INFO, trajectory_dir="trajectories", use_prompt_caching=True, - max_trajectory_budget={ "max_budget": 1.0, "raise_error": True, "reset_after_each_run": False }, + max_trajectory_budget={ + "max_budget": 1.0, + "raise_error": True, + "reset_after_each_run": False, + }, ) - + history = [] while True: user_input = input("> ") @@ -143,5 +148,6 @@ async def main(): # elif item["type"] == "function_call_output": # print("===>", item["output"]) + if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) diff --git a/libs/python/agent/pyproject.toml b/libs/python/agent/pyproject.toml index 6fea439c..d97b9895 100644 --- a/libs/python/agent/pyproject.toml +++ b/libs/python/agent/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-agent" -version = "0.4.0" +version = "0.4.35" description = "CUA (Computer Use) Agent for AI-driven computer interaction" readme = "README.md" authors = [ @@ -29,6 +29,11 @@ requires-python = ">=3.12" [project.optional-dependencies] openai = [] anthropic = [] +qwen = [ + "qwen-vl-utils", + "qwen-agent", + "Pillow>=10.0.0", +] omni = [ "cua-som>=0.1.0,<0.2.0", ] @@ -49,7 +54,7 @@ glm45v-hf = [ opencua-hf = [ "accelerate", "torch", - "transformers==4.53.0", + "transformers>=4.53.0", "tiktoken>=0.11.0", "blobfile>=3.0.0" ] @@ -60,6 +65,11 @@ internvl-hf = [ "einops", "timm" ] +moondream3 = [ + "accelerate", + "torch", + "transformers>=4.55.0" +] ui = [ "gradio>=5.23.3", "python-dotenv>=1.0.1", @@ -68,7 +78,10 @@ cli = [ "yaspin>=3.1.0", ] hud = [ - "hud-python==0.4.26", + "hud-python==0.4.52", +] +gemini = [ + "google-genai>=1.41.0", ] all = [ # uitars requirements @@ -88,7 +101,13 @@ all = [ # cli requirements "yaspin>=3.1.0", # hud requirements - "hud-python==0.4.26", + "hud-python==0.4.52", + # gemini requirements + "google-genai>=1.41.0", + # qwen requirements + "qwen-vl-utils", + "qwen-agent", + "Pillow>=10.0.0", ] [tool.uv] @@ -98,4 +117,4 @@ constraint-dependencies = ["fastrtc>0.43.0", "mlx-audio>0.2.3"] distribution = true [tool.pdm.build] -includes = ["agent/"] +includes = ["agent/"] \ No newline at end of file diff --git a/libs/python/computer-server/.bumpversion.cfg b/libs/python/computer-server/.bumpversion.cfg new file mode 100644 index 00000000..0aa3586d --- /dev/null +++ b/libs/python/computer-server/.bumpversion.cfg @@ -0,0 +1,10 @@ +[bumpversion] +current_version = 0.1.27 +commit = True +tag = True +tag_name = computer-server-v{new_version} +message = Bump cua-computer-server to v{new_version} + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" diff --git a/libs/python/computer-server/README.md b/libs/python/computer-server/README.md index 0e914298..5c40bdd2 100644 --- a/libs/python/computer-server/README.md +++ b/libs/python/computer-server/README.md @@ -8,10 +8,11 @@ - [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) - [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) - [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) - [![PyPI](https://img.shields.io/pypi/v/cua-computer-server?color=333333)](https://pypi.org/project/cua-computer-server/) +[![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) +[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) +[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +[![PyPI](https://img.shields.io/pypi/v/cua-computer-server?color=333333)](https://pypi.org/project/cua-computer-server/) + @@ -42,4 +43,4 @@ Refer to this notebook for a step-by-step guide on how to use the Computer-Use S - [Commands](https://trycua.com/docs/libraries/computer-server/Commands) - [REST-API](https://trycua.com/docs/libraries/computer-server/REST-API) - [WebSocket-API](https://trycua.com/docs/libraries/computer-server/WebSocket-API) -- [Index](https://trycua.com/docs/libraries/computer-server/index) \ No newline at end of file +- [Index](https://trycua.com/docs/libraries/computer-server/index) diff --git a/libs/python/computer-server/computer_server/__main__.py b/libs/python/computer-server/computer_server/__main__.py index 89d33d0b..f40ff635 100644 --- a/libs/python/computer-server/computer_server/__main__.py +++ b/libs/python/computer-server/computer_server/__main__.py @@ -4,6 +4,7 @@ This allows the server to be started with `python -m computer_server`. """ import sys + from .cli import main if __name__ == "__main__": diff --git a/libs/python/computer-server/computer_server/cli.py b/libs/python/computer-server/computer_server/cli.py index c7f51959..21c8cec6 100644 --- a/libs/python/computer-server/computer_server/cli.py +++ b/libs/python/computer-server/computer_server/cli.py @@ -36,7 +36,7 @@ def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace: help="Path to SSL private key file (enables HTTPS)", ) parser.add_argument( - "--ssl-certfile", + "--ssl-certfile", type=str, help="Path to SSL certificate file (enables HTTPS)", ) @@ -72,17 +72,19 @@ def main() -> None: # Check if watchdog should be enabled container_name = os.environ.get("CONTAINER_NAME") - enable_watchdog = args.watchdog or bool(container_name) - + enable_watchdog = (args.watchdog or bool(container_name)) and not sys.platform.startswith("win") + if container_name: - logger.info(f"Container environment detected (CONTAINER_NAME={container_name}), enabling watchdog") + logger.info( + f"Container environment detected (CONTAINER_NAME={container_name}), enabling watchdog" + ) elif args.watchdog: logger.info("Watchdog explicitly enabled via --watchdog flag") - + # Start watchdog if enabled if enable_watchdog: logger.info(f"Starting watchdog monitoring with {args.watchdog_interval}s interval") - + def run_watchdog_thread(): """Run watchdog in a separate thread.""" loop = asyncio.new_event_loop() @@ -90,38 +92,32 @@ def main() -> None: try: # Create CLI args dict for watchdog cli_args = { - 'host': args.host, - 'port': args.port, - 'log_level': args.log_level, - 'ssl_keyfile': args.ssl_keyfile, - 'ssl_certfile': args.ssl_certfile + "host": args.host, + "port": args.port, + "log_level": args.log_level, + "ssl_keyfile": args.ssl_keyfile, + "ssl_certfile": args.ssl_certfile, } - + # Create watchdog with restart settings from .watchdog import Watchdog - watchdog = Watchdog( - cli_args=cli_args, - ping_interval=args.watchdog_interval - ) + + watchdog = Watchdog(cli_args=cli_args, ping_interval=args.watchdog_interval) watchdog.restart_enabled = not args.no_restart - + loop.run_until_complete(watchdog.start_monitoring()) except Exception as e: logger.error(f"Watchdog error: {e}") finally: loop.close() - + # Start watchdog in background thread - watchdog_thread = threading.Thread( - target=run_watchdog_thread, - daemon=True, - name="watchdog" - ) + watchdog_thread = threading.Thread(target=run_watchdog_thread, daemon=True, name="watchdog") watchdog_thread.start() # Create and start the server logger.info(f"Starting CUA Computer API server on {args.host}:{args.port}...") - + # Handle SSL configuration ssl_args = {} if args.ssl_keyfile and args.ssl_certfile: @@ -131,10 +127,12 @@ def main() -> None: } logger.info("HTTPS mode enabled with SSL certificates") elif args.ssl_keyfile or args.ssl_certfile: - logger.warning("Both --ssl-keyfile and --ssl-certfile are required for HTTPS. Running in HTTP mode.") + logger.warning( + "Both --ssl-keyfile and --ssl-certfile are required for HTTPS. Running in HTTP mode." + ) else: logger.info("HTTP mode (no SSL certificates provided)") - + server = Server(host=args.host, port=args.port, log_level=args.log_level, **ssl_args) try: diff --git a/libs/python/computer-server/computer_server/diorama/base.py b/libs/python/computer-server/computer_server/diorama/base.py index 3ca01133..41352818 100644 --- a/libs/python/computer-server/computer_server/diorama/base.py +++ b/libs/python/computer-server/computer_server/diorama/base.py @@ -1,4 +1,5 @@ class BaseDioramaHandler: """Base Diorama handler for unsupported OSes.""" + async def diorama_cmd(self, action: str, arguments: dict = None) -> dict: return {"success": False, "error": "Diorama is not supported on this OS yet."} diff --git a/libs/python/computer-server/computer_server/diorama/diorama.py b/libs/python/computer-server/computer_server/diorama/diorama.py index 3a63b0b6..f5e34cac 100644 --- a/libs/python/computer-server/computer_server/diorama/diorama.py +++ b/libs/python/computer-server/computer_server/diorama/diorama.py @@ -1,31 +1,38 @@ #!/usr/bin/env python3 """Diorama: A virtual desktop manager for macOS""" -import os import asyncio -import logging -import sys import io +import logging +import os +import sys from typing import Union -from PIL import Image, ImageDraw - -from computer_server.diorama.draw import capture_all_apps, AppActivationContext, get_frontmost_and_active_app, get_all_windows, get_running_apps from computer_server.diorama.diorama_computer import DioramaComputer +from computer_server.diorama.draw import ( + AppActivationContext, + capture_all_apps, + get_all_windows, + get_frontmost_and_active_app, + get_running_apps, +) from computer_server.handlers.macos import * +from PIL import Image, ImageDraw # simple, nicely formatted logging logger = logging.getLogger(__name__) automation_handler = MacOSAutomationHandler() + class Diorama: """Virtual desktop manager that provides automation capabilities for macOS applications. - + Manages application windows and provides an interface for taking screenshots, mouse interactions, keyboard input, and coordinate transformations between screenshot space and screen space. """ + _scheduler_queue = None _scheduler_task = None _loop = None @@ -34,10 +41,10 @@ class Diorama: @classmethod def create_from_apps(cls, *args) -> DioramaComputer: """Create a DioramaComputer instance from a list of application names. - + Args: *args: Variable number of application names to include in the desktop - + Returns: DioramaComputer: A computer interface for the specified applications """ @@ -46,10 +53,10 @@ class Diorama: # Dictionary to store cursor positions for each unique app_list hash _cursor_positions = {} - + def __init__(self, app_list): """Initialize a Diorama instance for the specified applications. - + Args: app_list: List of application names to manage """ @@ -57,10 +64,10 @@ class Diorama: self.interface = self.Interface(self) self.computer = DioramaComputer(self) self.focus_context = None - + # Create a hash for this app_list to use as a key self.app_list_hash = hash(tuple(sorted(app_list))) - + # Initialize cursor position for this app_list if it doesn't exist if self.app_list_hash not in Diorama._cursor_positions: Diorama._cursor_positions[self.app_list_hash] = (0, 0) @@ -68,7 +75,7 @@ class Diorama: @classmethod def _ensure_scheduler(cls): """Ensure the async scheduler loop is running. - + Creates and starts the scheduler task if it hasn't been started yet. """ if not cls._scheduler_started: @@ -81,7 +88,7 @@ class Diorama: @classmethod async def _scheduler_loop(cls): """Main scheduler loop that processes automation commands. - + Continuously processes commands from the scheduler queue, handling screenshots, mouse actions, keyboard input, and scrolling operations. """ @@ -91,31 +98,37 @@ class Diorama: args = cmd.get("arguments", {}) future = cmd.get("future") logger.info(f"Processing command: {action} | args={args}") - + app_whitelist = args.get("app_list", []) - + all_windows = get_all_windows() running_apps = get_running_apps() - frontmost_app, active_app_to_use, active_app_pid = get_frontmost_and_active_app(all_windows, running_apps, app_whitelist) + frontmost_app, active_app_to_use, active_app_pid = get_frontmost_and_active_app( + all_windows, running_apps, app_whitelist + ) focus_context = AppActivationContext(active_app_pid, active_app_to_use, logger) - + with focus_context: try: if action == "screenshot": logger.info(f"Taking screenshot for apps: {app_whitelist}") result, img = capture_all_apps( - app_whitelist=app_whitelist, - save_to_disk=False, - take_focus=False + app_whitelist=app_whitelist, save_to_disk=False, take_focus=False ) logger.info("Screenshot complete.") if future: future.set_result((result, img)) # Mouse actions - elif action in ["left_click", "right_click", "double_click", "move_cursor", "drag_to"]: + elif action in [ + "left_click", + "right_click", + "double_click", + "move_cursor", + "drag_to", + ]: x = args.get("x") y = args.get("y") - + duration = args.get("duration", 0.5) if action == "left_click": await automation_handler.left_click(x, y) @@ -134,7 +147,7 @@ class Diorama: y = args.get("y") if x is not None and y is not None: await automation_handler.move_cursor(x, y) - + clicks = args.get("clicks", 1) if action == "scroll_up": await automation_handler.scroll_up(clicks) @@ -171,31 +184,31 @@ class Diorama: if future: future.set_exception(e) - class Interface(): + class Interface: """Interface for interacting with the virtual desktop. - + Provides methods for taking screenshots, mouse interactions, keyboard input, and coordinate transformations between screenshot and screen coordinates. """ - + def __init__(self, diorama): """Initialize the interface with a reference to the parent Diorama instance. - + Args: diorama: The parent Diorama instance """ self._diorama = diorama - + self._scene_hitboxes = [] self._scene_size = None async def _send_cmd(self, action, arguments=None): """Send a command to the scheduler queue. - + Args: action (str): The action to perform arguments (dict, optional): Arguments for the action - + Returns: The result of the command execution """ @@ -203,11 +216,13 @@ class Diorama: loop = asyncio.get_event_loop() future = loop.create_future() logger.info(f"Enqueuing {action} command for apps: {self._diorama.app_list}") - await Diorama._scheduler_queue.put({ - "action": action, - "arguments": {"app_list": self._diorama.app_list, **(arguments or {})}, - "future": future - }) + await Diorama._scheduler_queue.put( + { + "action": action, + "arguments": {"app_list": self._diorama.app_list, **(arguments or {})}, + "future": future, + } + ) try: return await future except asyncio.CancelledError: @@ -216,21 +231,23 @@ class Diorama: async def screenshot(self, as_bytes: bool = True) -> Union[str, Image.Image]: """Take a screenshot of the managed applications. - + Args: as_bytes (bool): If True, return base64-encoded bytes; if False, return PIL Image - + Returns: Union[str, Image.Image]: Base64-encoded PNG bytes or PIL Image object """ import base64 + result, img = await self._send_cmd("screenshot") self._scene_hitboxes = result.get("hitboxes", []) self._scene_size = img.size - + if as_bytes: # PIL Image to bytes, then base64 encode for JSON import io + img_byte_arr = io.BytesIO() img.save(img_byte_arr, format="PNG") img_bytes = img_byte_arr.getvalue() @@ -241,7 +258,7 @@ class Diorama: async def left_click(self, x, y): """Perform a left mouse click at the specified coordinates. - + Args: x (int): X coordinate in screenshot space (or None to use last position) y (int): Y coordinate in screenshot space (or None to use last position) @@ -258,7 +275,7 @@ class Diorama: async def right_click(self, x, y): """Perform a right mouse click at the specified coordinates. - + Args: x (int): X coordinate in screenshot space (or None to use last position) y (int): Y coordinate in screenshot space (or None to use last position) @@ -269,13 +286,13 @@ class Diorama: x, y = x or last_pos[0], y or last_pos[1] # Update cursor position for this app_list hash Diorama._cursor_positions[app_list_hash] = (x, y) - + sx, sy = await self.to_screen_coordinates(x, y) await self._send_cmd("right_click", {"x": sx, "y": sy}) async def double_click(self, x, y): """Perform a double mouse click at the specified coordinates. - + Args: x (int): X coordinate in screenshot space (or None to use last position) y (int): Y coordinate in screenshot space (or None to use last position) @@ -286,13 +303,13 @@ class Diorama: x, y = x or last_pos[0], y or last_pos[1] # Update cursor position for this app_list hash Diorama._cursor_positions[app_list_hash] = (x, y) - + sx, sy = await self.to_screen_coordinates(x, y) await self._send_cmd("double_click", {"x": sx, "y": sy}) async def move_cursor(self, x, y): """Move the mouse cursor to the specified coordinates. - + Args: x (int): X coordinate in screenshot space (or None to use last position) y (int): Y coordinate in screenshot space (or None to use last position) @@ -303,13 +320,13 @@ class Diorama: x, y = x or last_pos[0], y or last_pos[1] # Update cursor position for this app_list hash Diorama._cursor_positions[app_list_hash] = (x, y) - + sx, sy = await self.to_screen_coordinates(x, y) await self._send_cmd("move_cursor", {"x": sx, "y": sy}) async def drag_to(self, x, y, duration=0.5): """Drag the mouse from current position to the specified coordinates. - + Args: x (int): X coordinate in screenshot space (or None to use last position) y (int): Y coordinate in screenshot space (or None to use last position) @@ -321,13 +338,13 @@ class Diorama: x, y = x or last_pos[0], y or last_pos[1] # Update cursor position for this app_list hash Diorama._cursor_positions[app_list_hash] = (x, y) - + sx, sy = await self.to_screen_coordinates(x, y) await self._send_cmd("drag_to", {"x": sx, "y": sy, "duration": duration}) async def get_cursor_position(self): """Get the current cursor position in screen coordinates. - + Returns: tuple: (x, y) coordinates of the cursor in screen space """ @@ -335,7 +352,7 @@ class Diorama: async def type_text(self, text): """Type the specified text using the keyboard. - + Args: text (str): The text to type """ @@ -343,7 +360,7 @@ class Diorama: async def press_key(self, key): """Press a single key on the keyboard. - + Args: key (str): The key to press """ @@ -351,7 +368,7 @@ class Diorama: async def hotkey(self, keys): """Press a combination of keys simultaneously. - + Args: keys (list): List of keys to press together """ @@ -359,7 +376,7 @@ class Diorama: async def scroll_up(self, clicks: int = 1): """Scroll up at the current cursor position. - + Args: clicks (int): Number of scroll clicks to perform """ @@ -367,12 +384,12 @@ class Diorama: app_list_hash = hash(tuple(sorted(self._diorama.app_list))) last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0)) x, y = last_pos[0], last_pos[1] - + await self._send_cmd("scroll_up", {"clicks": clicks, "x": x, "y": y}) async def scroll_down(self, clicks: int = 1): """Scroll down at the current cursor position. - + Args: clicks (int): Number of scroll clicks to perform """ @@ -380,18 +397,18 @@ class Diorama: app_list_hash = hash(tuple(sorted(self._diorama.app_list))) last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0)) x, y = last_pos[0], last_pos[1] - + await self._send_cmd("scroll_down", {"clicks": clicks, "x": x, "y": y}) async def get_screen_size(self) -> dict[str, int]: """Get the size of the screenshot area. - + Returns: dict[str, int]: Dictionary with 'width' and 'height' keys """ if not self._scene_size: await self.screenshot() - return { "width": self._scene_size[0], "height": self._scene_size[1] } + return {"width": self._scene_size[0], "height": self._scene_size[1]} async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]: """Convert screenshot coordinates to screen coordinates. @@ -404,29 +421,29 @@ class Diorama: tuple[float, float]: (x, y) absolute coordinates in screen space """ if not self._scene_hitboxes: - await self.screenshot() # get hitboxes + await self.screenshot() # get hitboxes # Try all hitboxes for h in self._scene_hitboxes[::-1]: rect_from = h.get("hitbox") rect_to = h.get("target") if not rect_from or len(rect_from) != 4: continue - + # check if (x, y) is inside rect_from x0, y0, x1, y1 = rect_from if x0 <= x <= x1 and y0 <= y <= y1: logger.info(f"Found hitbox: {h}") # remap (x, y) to rect_to tx0, ty0, tx1, ty1 = rect_to - + # calculate offset from x0, y0 offset_x = x - x0 offset_y = y - y0 - + # remap offset to rect_to tx = tx0 + offset_x ty = ty0 + offset_y - + return tx, ty return x, y @@ -441,34 +458,37 @@ class Diorama: tuple[float, float]: (x, y) absolute coordinates in screenshot space """ if not self._scene_hitboxes: - await self.screenshot() # get hitboxes + await self.screenshot() # get hitboxes # Try all hitboxes for h in self._scene_hitboxes[::-1]: rect_from = h.get("target") rect_to = h.get("hitbox") if not rect_from or len(rect_from) != 4: continue - + # check if (x, y) is inside rect_from x0, y0, x1, y1 = rect_from if x0 <= x <= x1 and y0 <= y <= y1: # remap (x, y) to rect_to tx0, ty0, tx1, ty1 = rect_to - + # calculate offset from x0, y0 offset_x = x - x0 offset_y = y - y0 - + # remap offset to rect_to tx = tx0 + offset_x ty = ty0 + offset_y - + return tx, ty return x, y -import pyautogui + import time +import pyautogui + + async def main(): """Main function demonstrating Diorama usage with multiple desktops and mouse tracking.""" desktop1 = Diorama.create_from_apps(["Discord", "Notes"]) @@ -511,7 +531,7 @@ async def main(): # Draw on a copy of the screenshot frame = base_img.copy() frame_draw = ImageDraw.Draw(frame) - frame_draw.ellipse((sx-5, sy-5, sx+5, sy+5), fill="blue", outline="blue") + frame_draw.ellipse((sx - 5, sy - 5, sx + 5, sy + 5), fill="blue", outline="blue") # Save the frame frame.save("app_screenshots/desktop3_mouse.png") print(f"Mouse at screen ({mouse_x}, {mouse_y}) -> screenshot ({sx:.1f}, {sy:.1f})") @@ -520,15 +540,13 @@ async def main(): print("Stopped tracking.") draw.text((rect[0], rect[1]), str(idx), fill="red") - + canvas.save("app_screenshots/desktop3_hitboxes.png") - - # move mouse in a square spiral around the screen import math import random - + step = 20 # pixels per move dot_radius = 10 width = screen_size["width"] @@ -539,11 +557,12 @@ async def main(): await desktop3.interface.move_cursor(x, y) img = await desktop3.interface.screenshot(as_bytes=False) draw = ImageDraw.Draw(img) - draw.ellipse((x-dot_radius, y-dot_radius, x+dot_radius, y+dot_radius), fill="red") + draw.ellipse((x - dot_radius, y - dot_radius, x + dot_radius, y + dot_radius), fill="red") img.save("current.png") await asyncio.sleep(0.03) x += step y = math.sin(x / width * math.pi * 2) * 50 + 25 + if __name__ == "__main__": asyncio.run(main()) diff --git a/libs/python/computer-server/computer_server/diorama/diorama_computer.py b/libs/python/computer-server/computer_server/diorama/diorama_computer.py index c00bd86f..f37d4299 100644 --- a/libs/python/computer-server/computer_server/diorama/diorama_computer.py +++ b/libs/python/computer-server/computer_server/diorama/diorama_computer.py @@ -1,14 +1,16 @@ import asyncio + class DioramaComputer: """ A minimal Computer-like interface for Diorama, compatible with ComputerAgent. Implements _initialized, run(), and __aenter__ for agent compatibility. """ + def __init__(self, diorama): """ Initialize the DioramaComputer with a diorama instance. - + Args: diorama: The diorama instance to wrap with a computer-like interface. """ @@ -19,10 +21,10 @@ class DioramaComputer: async def __aenter__(self): """ Async context manager entry method for compatibility with ComputerAgent. - + Ensures an event loop is running and marks the instance as initialized. Creates a new event loop if none is currently running. - + Returns: DioramaComputer: The initialized instance. """ @@ -37,10 +39,10 @@ class DioramaComputer: async def run(self): """ Run method stub for compatibility with ComputerAgent interface. - + Ensures the instance is initialized before returning. If not already initialized, calls __aenter__ to perform initialization. - + Returns: DioramaComputer: The initialized instance. """ diff --git a/libs/python/computer-server/computer_server/diorama/draw.py b/libs/python/computer-server/computer_server/diorama/draw.py index e915b790..07209824 100644 --- a/libs/python/computer-server/computer_server/diorama/draw.py +++ b/libs/python/computer-server/computer_server/diorama/draw.py @@ -3,7 +3,7 @@ This script renders filtered views of the macOS desktop, preserving only selected applications while maintaining system UI elements like menubar and dock. Each "diorama" shows a consistent -view of the system while isolating specific applications. +view of the system while isolating specific applications. The image is "smart resized" to remove any empty space around the menubar and dock. @@ -15,26 +15,28 @@ Key features: - Supports parallel views of the same desktop for multi-agent systems """ -import sys -import os -import time import argparse -from typing import List, Dict, Any, Optional, Tuple -import json -from PIL import Image, ImageDraw -import io import asyncio import functools +import io +import json import logging +import os +import sys +import time +from typing import Any, Dict, List, Optional, Tuple + +from PIL import Image, ImageDraw # simple, nicely formatted logging logger = logging.getLogger(__name__) from computer_server.diorama.safezone import ( - get_menubar_bounds, get_dock_bounds, + get_menubar_bounds, ) + # Timing decorator for profiling def timing_decorator(func): @functools.wraps(func) @@ -45,46 +47,46 @@ def timing_decorator(func): elapsed_time = end_time - start_time logger.debug(f"Function {func.__name__} took {elapsed_time:.4f} seconds to run") return result + return wrapper + # Import Objective-C bridge libraries try: - import Quartz import AppKit - from ApplicationServices import ( - AXUIElementCreateSystemWide, # type: ignore - AXUIElementCreateApplication, # type: ignore - AXUIElementCopyAttributeValue, # type: ignore - AXUIElementCopyAttributeValues, # type: ignore - kAXFocusedWindowAttribute, # type: ignore - kAXWindowsAttribute, # type: ignore - kAXMainWindowAttribute, # type: ignore - kAXChildrenAttribute, # type: ignore - kAXRoleAttribute, # type: ignore - kAXTitleAttribute, # type: ignore - kAXValueAttribute, # type: ignore - kAXDescriptionAttribute, # type: ignore - kAXEnabledAttribute, # type: ignore - kAXPositionAttribute, # type: ignore - kAXSizeAttribute, # type: ignore - kAXErrorSuccess, # type: ignore - AXValueGetType, # type: ignore - kAXValueCGSizeType, # type: ignore - kAXValueCGPointType, # type: ignore - kAXValueCFRangeType, # type: ignore - AXUIElementGetTypeID, # type: ignore - AXValueGetValue, # type: ignore - kAXVisibleChildrenAttribute, # type: ignore - kAXRoleDescriptionAttribute, # type: ignore - kAXFocusedApplicationAttribute, # type: ignore - kAXFocusedUIElementAttribute, # type: ignore - kAXSelectedTextAttribute, # type: ignore - kAXSelectedTextRangeAttribute, # type: ignore - ) - from AppKit import NSWorkspace, NSApplication, NSApp, NSRunningApplication import Foundation - from Foundation import NSObject, NSMakeRect import objc + import Quartz + from AppKit import NSApp, NSApplication, NSRunningApplication, NSWorkspace + from ApplicationServices import AXUIElementCopyAttributeValue # type: ignore + from ApplicationServices import AXUIElementCopyAttributeValues # type: ignore + from ApplicationServices import AXUIElementCreateApplication # type: ignore + from ApplicationServices import AXUIElementCreateSystemWide # type: ignore + from ApplicationServices import AXUIElementGetTypeID # type: ignore + from ApplicationServices import AXValueGetType # type: ignore + from ApplicationServices import AXValueGetValue # type: ignore + from ApplicationServices import kAXChildrenAttribute # type: ignore + from ApplicationServices import kAXDescriptionAttribute # type: ignore + from ApplicationServices import kAXEnabledAttribute # type: ignore + from ApplicationServices import kAXErrorSuccess # type: ignore + from ApplicationServices import kAXFocusedApplicationAttribute # type: ignore + from ApplicationServices import kAXFocusedUIElementAttribute # type: ignore + from ApplicationServices import kAXFocusedWindowAttribute # type: ignore + from ApplicationServices import kAXMainWindowAttribute # type: ignore + from ApplicationServices import kAXPositionAttribute # type: ignore + from ApplicationServices import kAXRoleAttribute # type: ignore + from ApplicationServices import kAXRoleDescriptionAttribute # type: ignore + from ApplicationServices import kAXSelectedTextAttribute # type: ignore + from ApplicationServices import kAXSelectedTextRangeAttribute # type: ignore + from ApplicationServices import kAXSizeAttribute # type: ignore + from ApplicationServices import kAXTitleAttribute # type: ignore + from ApplicationServices import kAXValueAttribute # type: ignore + from ApplicationServices import kAXValueCFRangeType # type: ignore + from ApplicationServices import kAXValueCGPointType # type: ignore + from ApplicationServices import kAXValueCGSizeType # type: ignore + from ApplicationServices import kAXVisibleChildrenAttribute # type: ignore + from ApplicationServices import kAXWindowsAttribute # type: ignore + from Foundation import NSMakeRect, NSObject except ImportError: logger.error("Error: This script requires PyObjC to be installed.") logger.error("Please install it with: pip install pyobjc") @@ -111,7 +113,7 @@ kCGWindowAlpha = "kCGWindowAlpha" # Window opacity NSApplicationActivationOptions = { "regular": 0, # Default activation "bringing_all_windows_forward": 1 << 0, # NSApplicationActivateAllWindows - "ignoring_other_apps": 1 << 1 # NSApplicationActivateIgnoringOtherApps + "ignoring_other_apps": 1 << 1, # NSApplicationActivateIgnoringOtherApps } @@ -168,6 +170,7 @@ def CFAttributeToPyObject(attrValue): except KeyError: return None + def element_attribute(element, attribute): if attribute == kAXChildrenAttribute: err, value = AXUIElementCopyAttributeValues(element, attribute, 0, 999, None) @@ -184,6 +187,7 @@ def element_attribute(element, attribute): return value return None + def element_value(element, type): err, value = AXValueGetValue(element, type, None) if err == True: @@ -194,19 +198,20 @@ def element_value(element, type): @timing_decorator def get_running_apps() -> List[NSRunningApplication]: """Get list of all running applications - + Returns: List of NSRunningApplication objects """ return NSWorkspace.sharedWorkspace().runningApplications() + # @timing_decorator def get_app_info(app: NSRunningApplication) -> Dict[str, Any]: """Get information about an application - + Args: app: NSRunningApplication object - + Returns: Dictionary with application information """ @@ -219,48 +224,49 @@ def get_app_info(app: NSRunningApplication) -> Dict[str, Any]: "terminated": app.isTerminated(), } + @timing_decorator def get_all_windows() -> List[Dict[str, Any]]: """Get all windows from all applications with z-order information - + Returns: List of window dictionaries with z-order information """ # Get all windows from Quartz # The kCGWindowListOptionOnScreenOnly flag gets only visible windows with preserved z-order window_list = Quartz.CGWindowListCopyWindowInfo( - Quartz.kCGWindowListOptionOnScreenOnly, - Quartz.kCGNullWindowID + Quartz.kCGWindowListOptionOnScreenOnly, Quartz.kCGNullWindowID ) - + # Create a dictionary of window z-order - z_order = {window['kCGWindowNumber']: z_index for z_index, window in enumerate(window_list[::-1])} - + z_order = { + window["kCGWindowNumber"]: z_index for z_index, window in enumerate(window_list[::-1]) + } + # The kCGWindowListOptionAll flag gets all windows *without* z-order preserved window_list_all = Quartz.CGWindowListCopyWindowInfo( - Quartz.kCGWindowListOptionAll, - Quartz.kCGNullWindowID + Quartz.kCGWindowListOptionAll, Quartz.kCGNullWindowID ) - + # Process all windows windows = [] for window in window_list_all: # We track z_index which is the index in the window list (0 is the desktop / background) - + # Get window properties - window_id = window.get('kCGWindowNumber', 0) - window_name = window.get('kCGWindowName', '') - window_pid = window.get('kCGWindowOwnerPID', 0) - window_bounds = window.get('kCGWindowBounds', {}) - window_owner = window.get('kCGWindowOwnerName', '') - window_is_on_screen = window.get('kCGWindowIsOnscreen', False) - + window_id = window.get("kCGWindowNumber", 0) + window_name = window.get("kCGWindowName", "") + window_pid = window.get("kCGWindowOwnerPID", 0) + window_bounds = window.get("kCGWindowBounds", {}) + window_owner = window.get("kCGWindowOwnerName", "") + window_is_on_screen = window.get("kCGWindowIsOnscreen", False) + # Get z-order information # Note: kCGWindowLayer provides the system's layer value (lower values are higher in the stack) layer = window.get(kCGWindowLayer, 0) opacity = window.get(kCGWindowAlpha, 1.0) z_index = z_order.get(window_id, -1) - + # Determine window role (desktop, dock, menubar, app) if window_name == "Dock" and window_owner == "Dock": role = "dock" @@ -270,46 +276,57 @@ def get_all_windows() -> List[Dict[str, Any]]: role = "desktop" else: role = "app" - + # Only include windows with valid bounds if window_bounds: - windows.append({ - "id": window_id, - "name": window_name or "Unnamed Window", - "pid": window_pid, - "owner": window_owner, - "role": role, - "is_on_screen": window_is_on_screen, - "bounds": { - "x": window_bounds.get('X', 0), - "y": window_bounds.get('Y', 0), - "width": window_bounds.get('Width', 0), - "height": window_bounds.get('Height', 0) - }, - "layer": layer, # System layer (lower values are higher in stack) - "z_index": z_index, # Our z-index (0 is the desktop) - "opacity": opacity - }) - + windows.append( + { + "id": window_id, + "name": window_name or "Unnamed Window", + "pid": window_pid, + "owner": window_owner, + "role": role, + "is_on_screen": window_is_on_screen, + "bounds": { + "x": window_bounds.get("X", 0), + "y": window_bounds.get("Y", 0), + "width": window_bounds.get("Width", 0), + "height": window_bounds.get("Height", 0), + }, + "layer": layer, # System layer (lower values are higher in stack) + "z_index": z_index, # Our z-index (0 is the desktop) + "opacity": opacity, + } + ) + windows = sorted(windows, key=lambda x: x["z_index"]) - + return windows + def get_app_windows(app_pid: int, all_windows: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Get all windows for a specific application - + Args: app_pid: Process ID of the application all_windows: List of all windows with z-order information - + Returns: List of window dictionaries for the app """ # Filter windows by PID return [window for window in all_windows if window["pid"] == app_pid] + @timing_decorator -def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[Dict[str, Any]] = None, dock_bounds: Dict[str, float] = None, dock_items: List[Dict[str, Any]] = None, menubar_bounds: Dict[str, float] = None, menubar_items: List[Dict[str, Any]] = None) -> Tuple[Optional[Image.Image], List[Dict[str, Any]]]: +def draw_desktop_screenshot( + app_whitelist: List[str] = None, + all_windows: List[Dict[str, Any]] = None, + dock_bounds: Dict[str, float] = None, + dock_items: List[Dict[str, Any]] = None, + menubar_bounds: Dict[str, float] = None, + menubar_items: List[Dict[str, Any]] = None, +) -> Tuple[Optional[Image.Image], List[Dict[str, Any]]]: """Capture a screenshot of the entire desktop using Quartz compositing, including dock as a second pass. Args: app_whitelist: Optional list of app names to include in the screenshot @@ -340,7 +357,7 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D # Screenshot-to-screen hitboxes hitboxes = [] - + if app_whitelist is None: # Single pass: desktop, menubar, app, dock window_list = Foundation.CFArrayCreateMutable(None, len(all_windows), None) @@ -360,31 +377,46 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D None, width, height, 8, 0, color_space, Quartz.kCGImageAlphaPremultipliedLast ) Quartz.CGContextDrawImage(cg_context, screen_rect, cg_image) - hitboxes.append({ - "hitbox": [0, 0, width, height], - "target": [0, 0, width, height] - }) + hitboxes.append({"hitbox": [0, 0, width, height], "target": [0, 0, width, height]}) else: # Filter out windows that are not in the whitelist - all_windows = [window for window in all_windows if window["owner"] in app_whitelist or window["role"] != "app"] + all_windows = [ + window + for window in all_windows + if window["owner"] in app_whitelist or window["role"] != "app" + ] app_windows = [window for window in all_windows if window["role"] == "app"] - + dock_orientation = "side" if dock_bounds["width"] < dock_bounds["height"] else "bottom" - - menubar_length = max(item["bounds"]["x"] + item["bounds"]["width"] for item in menubar_items) if menubar_items else 0 - + + menubar_length = ( + max(item["bounds"]["x"] + item["bounds"]["width"] for item in menubar_items) + if menubar_items + else 0 + ) + # Calculate bounds of app windows app_bounds = { "x": min(window["bounds"]["x"] for window in app_windows) if app_windows else 0, "y": min(window["bounds"]["y"] for window in app_windows) if app_windows else 0, } - app_bounds["width"] = max(window["bounds"]["x"] + window["bounds"]["width"] for window in app_windows) - app_bounds["x"] if app_windows else 0 - app_bounds["height"] = max(window["bounds"]["y"] + window["bounds"]["height"] for window in app_windows) - app_bounds["y"] if app_windows else 0 - + app_bounds["width"] = ( + max(window["bounds"]["x"] + window["bounds"]["width"] for window in app_windows) + - app_bounds["x"] + if app_windows + else 0 + ) + app_bounds["height"] = ( + max(window["bounds"]["y"] + window["bounds"]["height"] for window in app_windows) + - app_bounds["y"] + if app_windows + else 0 + ) + # Set minimum bounds of 256x256 app_bounds["width"] = max(app_bounds["width"], 256) app_bounds["height"] = max(app_bounds["height"], 256) - + # Add dock bounds to app bounds if dock_orientation == "bottom": app_bounds["height"] += dock_bounds["height"] + 4 @@ -394,19 +426,21 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D else: app_bounds["x"] -= dock_bounds["width"] + 4 app_bounds["width"] += dock_bounds["width"] + 4 - + # Add menubar bounds to app bounds app_bounds["height"] += menubar_bounds["height"] - + # Make sure app bounds contains menubar bounds app_bounds["width"] = max(app_bounds["width"], menubar_length) - + # Clamp bounds to screen app_bounds["x"] = max(app_bounds["x"], 0) app_bounds["y"] = max(app_bounds["y"], 0) app_bounds["width"] = min(app_bounds["width"], frame.size.width - app_bounds["x"]) - app_bounds["height"] = min(app_bounds["height"], frame.size.height - app_bounds["y"] + menubar_bounds["height"]) - + app_bounds["height"] = min( + app_bounds["height"], frame.size.height - app_bounds["y"] + menubar_bounds["height"] + ) + # Create CGContext for compositing width = int(app_bounds["width"]) height = int(app_bounds["height"]) @@ -414,7 +448,7 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D cg_context = Quartz.CGBitmapContextCreate( None, width, height, 8, 0, color_space, Quartz.kCGImageAlphaPremultipliedLast ) - + def _draw_layer(cg_context, all_windows, source_rect, target_rect): """Draw a layer of windows from source_rect to target_rect on the given context.""" window_list = Foundation.CFArrayCreateMutable(None, len(all_windows), None) @@ -425,19 +459,13 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D ) if cg_image is not None: Quartz.CGContextDrawImage(cg_context, target_rect, cg_image) - + # --- FIRST PASS: desktop, apps --- source_position = [app_bounds["x"], app_bounds["y"]] source_size = [app_bounds["width"], app_bounds["height"]] - target_position = [ - 0, - min( - menubar_bounds["y"] + menubar_bounds["height"], - app_bounds["y"] - ) - ] + target_position = [0, min(menubar_bounds["y"] + menubar_bounds["height"], app_bounds["y"])] target_size = [app_bounds["width"], app_bounds["height"]] - + if dock_orientation == "bottom": source_size[1] += dock_bounds["height"] target_size[1] += dock_bounds["height"] @@ -447,42 +475,57 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D target_position[0] -= dock_bounds["width"] source_size[0] += dock_bounds["width"] target_size[0] += dock_bounds["width"] - + app_source_rect = Quartz.CGRectMake( source_position[0], source_position[1], source_size[0], source_size[1] ) app_target_rect = Quartz.CGRectMake( - target_position[0], app_bounds["height"] - target_position[1] - target_size[1], target_size[0], target_size[1] + target_position[0], + app_bounds["height"] - target_position[1] - target_size[1], + target_size[0], + target_size[1], ) - first_pass_windows = [w for w in all_windows if w["role"] == "app" or w["role"] == "desktop"] + first_pass_windows = [ + w for w in all_windows if w["role"] == "app" or w["role"] == "desktop" + ] _draw_layer(cg_context, first_pass_windows, app_source_rect, app_target_rect) - - hitboxes.append({ - "hitbox": [0, menubar_bounds["height"], app_bounds["width"], menubar_bounds["height"] + app_bounds["height"]], - "target": [ - app_source_rect.origin.x, - app_source_rect.origin.y, - app_source_rect.origin.x + app_bounds["width"], - app_source_rect.origin.y + app_bounds["height"] - ] - }) + + hitboxes.append( + { + "hitbox": [ + 0, + menubar_bounds["height"], + app_bounds["width"], + menubar_bounds["height"] + app_bounds["height"], + ], + "target": [ + app_source_rect.origin.x, + app_source_rect.origin.y, + app_source_rect.origin.x + app_bounds["width"], + app_source_rect.origin.y + app_bounds["height"], + ], + } + ) # --- SECOND PASS: menubar --- allowed_roles = {"menubar"} menubar_windows = [w for w in all_windows if w["role"] in allowed_roles] - menubar_source_rect = Quartz.CGRectMake( - 0, 0, app_bounds["width"], menubar_bounds["height"] - ) + menubar_source_rect = Quartz.CGRectMake(0, 0, app_bounds["width"], menubar_bounds["height"]) menubar_target_rect = Quartz.CGRectMake( - 0, app_bounds["height"] - menubar_bounds["height"], app_bounds["width"], menubar_bounds["height"] + 0, + app_bounds["height"] - menubar_bounds["height"], + app_bounds["width"], + menubar_bounds["height"], ) _draw_layer(cg_context, menubar_windows, menubar_source_rect, menubar_target_rect) - - hitboxes.append({ - "hitbox": [0, 0, app_bounds["width"], menubar_bounds["height"]], - "target": [0, 0, app_bounds["width"], menubar_bounds["height"]] - }) - + + hitboxes.append( + { + "hitbox": [0, 0, app_bounds["width"], menubar_bounds["height"]], + "target": [0, 0, app_bounds["width"], menubar_bounds["height"]], + } + ) + # --- THIRD PASS: dock, filtered --- # Step 1: Collect dock items to draw, with their computed target rects dock_draw_items = [] @@ -496,7 +539,12 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D if item["title"] not in app_whitelist: continue elif item["subrole"] == "AXMinimizedWindowDockItem": - if not any(window["name"] == item["title"] and window["role"] == "app" and window["owner"] in app_whitelist for window in all_windows): + if not any( + window["name"] == item["title"] + and window["role"] == "app" + and window["owner"] in app_whitelist + for window in all_windows + ): continue elif item["subrole"] == "AXFolderDockItem": continue @@ -504,60 +552,65 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D # Preserve unscaled (original) source position and size before any modification hitbox_position = source_position hitbox_size = source_size - + screen_position = source_position screen_size = source_size - + # stretch to screen size padding = 32 if dock_orientation == "bottom": source_position = (source_position[0], 0) source_size = (source_size[0], frame.size.height) - - hitbox_position = (source_position[0], app_bounds['height'] - hitbox_size[1]) + + hitbox_position = (source_position[0], app_bounds["height"] - hitbox_size[1]) hitbox_size = (source_size[0], hitbox_size[1]) - + if index == 0: source_size = (padding + source_size[0], source_size[1]) source_position = (source_position[0] - padding, 0) elif index == len(dock_items) - 1: source_size = (source_size[0] + padding, source_size[1]) source_position = (source_position[0], 0) - + elif dock_orientation == "side": source_position = (0, source_position[1]) source_size = (frame.size.width, source_size[1]) - + hitbox_position = ( - source_position[0] if dock_bounds['x'] < frame.size.width / 2 else app_bounds['width'] - hitbox_size[0], - source_position[1] + ( + source_position[0] + if dock_bounds["x"] < frame.size.width / 2 + else app_bounds["width"] - hitbox_size[0] + ), + source_position[1], ) hitbox_size = (hitbox_size[0], source_size[1]) - + if index == 0: source_size = (source_size[0], padding + source_size[1]) source_position = (0, source_position[1] - padding) elif index == len(dock_items) - 1: source_size = (source_size[0], source_size[1] + padding) source_position = (0, source_position[1]) - # Compute the initial target position target_position = source_position target_size = source_size - - dock_draw_items.append({ - "item": item, - "index": index, - "source_position": source_position, - "source_size": source_size, - "target_size": target_size, - "target_position": target_position, # Will be updated after packing - "hitbox_position": hitbox_position, - "hitbox_size": hitbox_size, - "screen_position": screen_position, - "screen_size": screen_size, - }) + + dock_draw_items.append( + { + "item": item, + "index": index, + "source_position": source_position, + "source_size": source_size, + "target_size": target_size, + "target_position": target_position, # Will be updated after packing + "hitbox_position": hitbox_position, + "hitbox_size": hitbox_size, + "screen_position": screen_position, + "screen_size": screen_size, + } + ) # Step 2: Pack the target rects along the main axis, removing gaps packed_positions = [] @@ -569,12 +622,12 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D x_cursor += draw_item["target_size"][0] packed_strip_length = x_cursor # Center horizontally - x_offset = (app_bounds['width'] - packed_strip_length) / 2 - y_offset = (frame.size.height - app_bounds['height']) + x_offset = (app_bounds["width"] - packed_strip_length) / 2 + y_offset = frame.size.height - app_bounds["height"] for i, draw_item in enumerate(dock_draw_items): px, py = packed_positions[i] draw_item["target_position"] = (px + x_offset, py - y_offset) - + # Pack unscaled source rects x_cursor = 0 for draw_item in dock_draw_items: @@ -582,7 +635,7 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D x_cursor += draw_item["hitbox_size"][0] packed_strip_length = x_cursor # Center horizontally - x_offset = (app_bounds['width'] - packed_strip_length) / 2 + x_offset = (app_bounds["width"] - packed_strip_length) / 2 for i, draw_item in enumerate(dock_draw_items): px, py = draw_item["hitbox_position"] draw_item["hitbox_position"] = (px + x_offset, py) @@ -594,12 +647,16 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D y_cursor += draw_item["target_size"][1] packed_strip_length = y_cursor # Center vertically - y_offset = (app_bounds['height'] - packed_strip_length) / 2 - x_offset = 0 if dock_bounds['x'] < frame.size.width / 2 else frame.size.width - app_bounds['width'] + y_offset = (app_bounds["height"] - packed_strip_length) / 2 + x_offset = ( + 0 + if dock_bounds["x"] < frame.size.width / 2 + else frame.size.width - app_bounds["width"] + ) for i, draw_item in enumerate(dock_draw_items): px, py = packed_positions[i] draw_item["target_position"] = (px - x_offset, py + y_offset) - + # Pack unscaled source rects y_cursor = 0 for draw_item in dock_draw_items: @@ -607,11 +664,11 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D y_cursor += draw_item["hitbox_size"][1] packed_strip_length = y_cursor # Center vertically - y_offset = (app_bounds['height'] - packed_strip_length) / 2 + y_offset = (app_bounds["height"] - packed_strip_length) / 2 for i, draw_item in enumerate(dock_draw_items): px, py = draw_item["hitbox_position"] draw_item["hitbox_position"] = (px, py + y_offset) - + dock_windows = [window for window in all_windows if window["role"] == "dock"] # Step 3: Draw dock items using packed and recentered positions for draw_item in dock_draw_items: @@ -622,7 +679,10 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D target_size = draw_item["target_size"] # flip target position y - target_position = (target_position[0], app_bounds['height'] - target_position[1] - target_size[1]) + target_position = ( + target_position[0], + app_bounds["height"] - target_position[1] - target_size[1], + ) source_rect = Quartz.CGRectMake(*source_position, *source_size) target_rect = Quartz.CGRectMake(*target_position, *target_size) @@ -641,12 +701,21 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D # hitbox_rect = Quartz.CGRectMake(*hitbox_position_flipped, *hitbox_size) # Quartz.CGContextSetStrokeColorWithColor(cg_context, Quartz.CGColorCreateGenericRGB(0, 1, 0, 1)) # Quartz.CGContextStrokeRect(cg_context, hitbox_rect) - - hitboxes.append({ - "hitbox": [*hitbox_position, hitbox_position[0] + hitbox_size[0], hitbox_position[1] + hitbox_size[1]], - "target": [*draw_item["screen_position"], draw_item["screen_position"][0] + draw_item["screen_size"][0], draw_item["screen_position"][1] + draw_item["screen_size"][1]] - }) - + + hitboxes.append( + { + "hitbox": [ + *hitbox_position, + hitbox_position[0] + hitbox_size[0], + hitbox_position[1] + hitbox_size[1], + ], + "target": [ + *draw_item["screen_position"], + draw_item["screen_position"][0] + draw_item["screen_size"][0], + draw_item["screen_position"][1] + draw_item["screen_size"][1], + ], + } + ) # Convert composited context to CGImage final_cg_image = Quartz.CGBitmapContextCreateImage(cg_context) @@ -657,18 +726,19 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D image_data = io.BytesIO(png_data) return Image.open(image_data), hitboxes + @timing_decorator def get_menubar_items(active_app_pid: int = None) -> List[Dict[str, Any]]: """Get menubar items from the active application using Accessibility API - + Args: active_app_pid: PID of the active application - + Returns: List of dictionaries with menubar item information """ menubar_items = [] - + if active_app_pid is None: # Get the frontmost application's PID if none provided frontmost_app = NSWorkspace.sharedWorkspace().frontmostApplication() @@ -677,74 +747,66 @@ def get_menubar_items(active_app_pid: int = None) -> List[Dict[str, Any]]: else: logger.error("Error: Could not determine frontmost application") return menubar_items - + # Create an accessibility element for the application app_element = AXUIElementCreateApplication(active_app_pid) if app_element is None: logger.error(f"Error: Could not create accessibility element for PID {active_app_pid}") return menubar_items - + # Get the menubar menubar = element_attribute(app_element, kAXMenuBarAttribute) if menubar is None: logger.error(f"Error: Could not get menubar for application with PID {active_app_pid}") return menubar_items - + # Get the menubar items children = element_attribute(menubar, kAXChildrenAttribute) if children is None: logger.error("Error: Could not get menubar items") return menubar_items - + # Process each menubar item for i in range(len(children)): item = children[i] - + # Get item title title = element_attribute(item, kAXTitleAttribute) or "Untitled" - + # Create bounding box - bounds = { - "x": 0, - "y": 0, - "width": 0, - "height": 0 - } - + bounds = {"x": 0, "y": 0, "width": 0, "height": 0} + # Get item position position_value = element_attribute(item, kAXPositionAttribute) if position_value: position_value = element_value(position_value, kAXValueCGPointType) bounds["x"] = position_value.x bounds["y"] = position_value.y - + # Get item size size_value = element_attribute(item, kAXSizeAttribute) if size_value: size_value = element_value(size_value, kAXValueCGSizeType) bounds["width"] = size_value.width bounds["height"] = size_value.height - - + # Add to list - menubar_items.append({ - "title": title, - "bounds": bounds, - "index": i, - "app_pid": active_app_pid - }) - + menubar_items.append( + {"title": title, "bounds": bounds, "index": i, "app_pid": active_app_pid} + ) + return menubar_items - + + @timing_decorator def get_dock_items() -> List[Dict[str, Any]]: """Get all items in the macOS Dock - + Returns: List of dictionaries with Dock item information """ dock_items = [] - + # Find the Dock process dock_pid = None running_apps = get_running_apps() @@ -752,23 +814,23 @@ def get_dock_items() -> List[Dict[str, Any]]: if app.localizedName() == "Dock" and app.bundleIdentifier() == "com.apple.dock": dock_pid = app.processIdentifier() break - + if dock_pid is None: logger.error("Error: Could not find Dock process") return dock_items - + # Create an accessibility element for the Dock dock_element = AXUIElementCreateApplication(dock_pid) if dock_element is None: logger.error(f"Error: Could not create accessibility element for Dock (PID {dock_pid})") return dock_items - + # Get the Dock's main element dock_list = element_attribute(dock_element, kAXChildrenAttribute) if dock_list is None or len(dock_list) == 0: logger.error("Error: Could not get Dock children") return dock_items - + # Find the Dock's application list (usually the first child) dock_app_list = None for child in dock_list: @@ -776,17 +838,17 @@ def get_dock_items() -> List[Dict[str, Any]]: if role == "AXList": dock_app_list = child break - + if dock_app_list is None: logger.error("Error: Could not find Dock application list") return dock_items - + # Get all items in the Dock items = element_attribute(dock_app_list, kAXChildrenAttribute) if items is None: logger.error("Error: Could not get Dock items") return dock_items - + # Process each Dock item for i, item in enumerate(items): # Get item attributes @@ -794,29 +856,24 @@ def get_dock_items() -> List[Dict[str, Any]]: description = element_attribute(item, "AXDescription") or "" role = element_attribute(item, kAXRoleAttribute) or "" subrole = element_attribute(item, "AXSubrole") or "" - + # Create bounding box - bounds = { - "x": 0, - "y": 0, - "width": 0, - "height": 0 - } - + bounds = {"x": 0, "y": 0, "width": 0, "height": 0} + # Get item position position_value = element_attribute(item, kAXPositionAttribute) if position_value: position_value = element_value(position_value, kAXValueCGPointType) bounds["x"] = position_value.x bounds["y"] = position_value.y - + # Get item size size_value = element_attribute(item, kAXSizeAttribute) if size_value: size_value = element_value(size_value, kAXValueCGSizeType) bounds["width"] = size_value.width bounds["height"] = size_value.height - + # Determine if this is an application, file/folder, or separator item_type = "unknown" if subrole == "AXApplicationDockItem": @@ -829,20 +886,23 @@ def get_dock_items() -> List[Dict[str, Any]]: item_type = "separator" elif "trash" in title.lower(): item_type = "trash" - + # Add to list - dock_items.append({ - "title": title, - "description": description, - "bounds": bounds, - "index": i, - "type": item_type, - "role": role, - "subrole": subrole - }) - + dock_items.append( + { + "title": title, + "description": description, + "bounds": bounds, + "index": i, + "type": item_type, + "role": role, + "subrole": subrole, + } + ) + return dock_items + class AppActivationContext: def __init__(self, active_app_pid=None, active_app_to_use="", logger=None): self.active_app_pid = active_app_pid @@ -852,9 +912,12 @@ class AppActivationContext: def __enter__(self): from AppKit import NSWorkspace + if self.active_app_pid: if self.logger and self.active_app_to_use: - self.logger.debug(f"Automatically activating app '{self.active_app_to_use}' for screenshot composition") + self.logger.debug( + f"Automatically activating app '{self.active_app_to_use}' for screenshot composition" + ) self.frontmost_app = NSWorkspace.sharedWorkspace().frontmostApplication() running_apps_list = NSWorkspace.sharedWorkspace().runningApplications() for app in running_apps_list: @@ -870,10 +933,11 @@ class AppActivationContext: # sleep for 0.5 seconds time.sleep(0.5) self.frontmost_app.activateWithOptions_(0) - + def get_frontmost_and_active_app(all_windows, running_apps, app_whitelist): from AppKit import NSWorkspace + frontmost_app = NSWorkspace.sharedWorkspace().frontmostApplication() active_app_to_use = None @@ -912,14 +976,20 @@ def get_frontmost_and_active_app(all_windows, running_apps, app_whitelist): return frontmost_app, active_app_to_use, active_app_pid -def capture_all_apps(save_to_disk: bool = False, app_whitelist: List[str] = None, output_dir: str = None, take_focus: bool = True) -> Tuple[Dict[str, Any], Optional[Image.Image]]: + +def capture_all_apps( + save_to_disk: bool = False, + app_whitelist: List[str] = None, + output_dir: str = None, + take_focus: bool = True, +) -> Tuple[Dict[str, Any], Optional[Image.Image]]: """Capture screenshots of all running applications - + Args: save_to_disk: Whether to save screenshots to disk app_whitelist: Optional list of app names to include in the recomposited screenshot (will always include 'Window Server' and 'Dock') - + Returns: Dictionary with application information and screenshots Optional PIL Image of the recomposited screenshot @@ -929,61 +999,65 @@ def capture_all_apps(save_to_disk: bool = False, app_whitelist: List[str] = None "applications": [], "windows": [], # New array to store all windows, including those without apps "menubar_items": [], # New array to store menubar items - "dock_items": [] # New array to store dock items + "dock_items": [], # New array to store dock items } - + # Get all windows with z-order information all_windows = get_all_windows() - + # Get all running applications running_apps = get_running_apps() - - frontmost_app, active_app_to_use, active_app_pid = get_frontmost_and_active_app(all_windows, running_apps, app_whitelist) if take_focus else (None, None, None) - + + frontmost_app, active_app_to_use, active_app_pid = ( + get_frontmost_and_active_app(all_windows, running_apps, app_whitelist) + if take_focus + else (None, None, None) + ) + # Use AppActivationContext to activate the app and restore focus with AppActivationContext(active_app_pid, active_app_to_use, logger): - + # Process applications for app in running_apps: # Skip system apps without a bundle ID if app.bundleIdentifier() is None: continue - + app_info = get_app_info(app) app_windows = get_app_windows(app.processIdentifier(), all_windows) - - app_data = { - "info": app_info, - "windows": [ window["id"] for window in app_windows ] - } - + + app_data = {"info": app_info, "windows": [window["id"] for window in app_windows]} + result["applications"].append(app_data) - + # Add all windows to the result result["windows"] = all_windows - + # Get menubar items from the active application menubar_items = get_menubar_items(active_app_pid) result["menubar_items"] = menubar_items - + # Get dock items dock_items = get_dock_items() result["dock_items"] = dock_items - + # Get menubar bounds menubar_bounds = get_menubar_bounds() result["menubar_bounds"] = menubar_bounds - + # Get dock bounds dock_bounds = get_dock_bounds() result["dock_bounds"] = dock_bounds - + # Capture the entire desktop using Quartz compositing - desktop_screenshot, hitboxes = draw_desktop_screenshot(app_whitelist, all_windows, dock_bounds, dock_items, menubar_bounds, menubar_items) - + desktop_screenshot, hitboxes = draw_desktop_screenshot( + app_whitelist, all_windows, dock_bounds, dock_items, menubar_bounds, menubar_items + ) + result["hitboxes"] = hitboxes - - from PIL import Image, ImageDraw, ImageChops + + from PIL import Image, ImageChops, ImageDraw + def _draw_hitboxes(img, hitboxes, key="target"): """ Overlay opaque colored rectangles for each hitbox (using hitbox[key]) @@ -1002,23 +1076,29 @@ def capture_all_apps(save_to_disk: bool = False, app_whitelist: List[str] = None # Distinct colors for order colors = [ - (255, 0, 0, 180), # Red - (0, 255, 0, 180), # Green - (0, 0, 255, 180), # Blue - (255, 255, 0, 180), # Yellow - (0, 255, 255, 180), # Cyan - (255, 0, 255, 180), # Magenta - (255, 128, 0, 180), # Orange - (128, 0, 255, 180), # Purple - (0, 128, 255, 180), # Sky blue - (128, 255, 0, 180), # Lime + (255, 0, 0, 180), # Red + (0, 255, 0, 180), # Green + (0, 0, 255, 180), # Blue + (255, 255, 0, 180), # Yellow + (0, 255, 255, 180), # Cyan + (255, 0, 255, 180), # Magenta + (255, 128, 0, 180), # Orange + (128, 0, 255, 180), # Purple + (0, 128, 255, 180), # Sky blue + (128, 255, 0, 180), # Lime ] # Set minimum brightness for colors min_brightness = 0 colors = [ - (max(min_brightness, c[0]), max(min_brightness, c[1]), max(min_brightness, c[2]), c[3]) for c in colors + ( + max(min_brightness, c[0]), + max(min_brightness, c[1]), + max(min_brightness, c[2]), + c[3], + ) + for c in colors ] - + for i, h in enumerate(hitboxes): rect = h.get(key) color = colors[i % len(colors)] @@ -1034,25 +1114,31 @@ def capture_all_apps(save_to_disk: bool = False, app_whitelist: List[str] = None desktop_path = os.path.join(output_dir, "desktop.png") desktop_screenshot.save(desktop_path) result["desktop_screenshot"] = desktop_path - + logger.info(f"Saved desktop screenshot to {desktop_path}") if app_whitelist: # Take screenshot without whitelist desktop_screenshot_full, hitboxes_full = draw_desktop_screenshot( - None, all_windows, dock_bounds, dock_items, menubar_bounds, menubar_items) + None, all_windows, dock_bounds, dock_items, menubar_bounds, menubar_items + ) # Draw hitboxes on both images using overlay img1 = _draw_hitboxes(desktop_screenshot.copy(), hitboxes, key="hitbox") - img2 = _draw_hitboxes(desktop_screenshot_full.copy(), hitboxes, key="target") if desktop_screenshot_full else None + img2 = ( + _draw_hitboxes(desktop_screenshot_full.copy(), hitboxes, key="target") + if desktop_screenshot_full + else None + ) if img2 and hitboxes_full: # Compose side-by-side from PIL import Image + width = img1.width + img2.width height = max(img1.height, img2.height) - combined = Image.new('RGBA', (width, height), (0, 0, 0, 0)) + combined = Image.new("RGBA", (width, height), (0, 0, 0, 0)) combined.paste(img1, (0, 0)) combined.paste(img2, (img1.width, 0)) side_by_side_path = os.path.join(output_dir, "side_by_side_hitboxes.png") @@ -1066,29 +1152,51 @@ def capture_all_apps(save_to_disk: bool = False, app_whitelist: List[str] = None result["hitbox_screenshot"] = hitbox_path # Focus restoration is now handled by AppActivationContext - + return result, desktop_screenshot + async def run_capture(): """Run the screenshot capture asynchronously""" # Parse command line arguments - parser = argparse.ArgumentParser(description="Capture screenshots of running macOS applications") - parser.add_argument("--output", "-o", help="Output directory for screenshots", default="app_screenshots") - parser.add_argument("--filter", "-f", nargs="+", help="Filter recomposited screenshot to only include specified apps") - parser.add_argument("--menubar", "-m", action="store_true", help="List menubar and status items with their bounding boxes") - parser.add_argument("--dock", "-d", action="store_true", help="List Dock items with their bounding boxes") - parser.add_argument("--demo", nargs="*", help="Demo mode: pass app names to capture individual and combinations, create mosaic PNG") + parser = argparse.ArgumentParser( + description="Capture screenshots of running macOS applications" + ) + parser.add_argument( + "--output", "-o", help="Output directory for screenshots", default="app_screenshots" + ) + parser.add_argument( + "--filter", + "-f", + nargs="+", + help="Filter recomposited screenshot to only include specified apps", + ) + parser.add_argument( + "--menubar", + "-m", + action="store_true", + help="List menubar and status items with their bounding boxes", + ) + parser.add_argument( + "--dock", "-d", action="store_true", help="List Dock items with their bounding boxes" + ) + parser.add_argument( + "--demo", + nargs="*", + help="Demo mode: pass app names to capture individual and combinations, create mosaic PNG", + ) args = parser.parse_args() - + # Create output directory in the current directory if not absolute if not os.path.isabs(args.output): output_dir = os.path.join(os.getcwd(), args.output) else: output_dir = args.output - + # DEMO MODE: capture each app and all non-empty combinations, then mosaic if args.demo: from PIL import Image + demo_apps = args.demo print(f"Running in DEMO mode for apps: {demo_apps}") groups = [] @@ -1108,9 +1216,11 @@ async def run_capture(): if not screenshots: print("No screenshots captured in demo mode.") return + # Mosaic-pack: grid (rows of sqrt(N)) - def make_mosaic(images, pad=64, bg=(30,30,30)): + def make_mosaic(images, pad=64, bg=(30, 30, 30)): import rpack + sizes = [(img.width + pad, img.height + pad) for _, img in images] positions = rpack.pack(sizes) # Find the bounding box for the mosaic @@ -1120,6 +1230,7 @@ async def run_capture(): for (group, img), (x, y) in zip(images, positions): mosaic.paste(img, (x, y)) return mosaic + mosaic_img = make_mosaic(screenshots) mosaic_path = os.path.join(output_dir, "demo_mosaic.png") os.makedirs(output_dir, exist_ok=True) @@ -1128,57 +1239,60 @@ async def run_capture(): return # Capture all apps and save to disk, including a recomposited screenshot - print(f"Capturing screenshots of all running applications...") + print("Capturing screenshots of all running applications...") print(f"Saving screenshots to: {output_dir}") - + # If filter is provided, show what we're filtering by if args.filter: - print(f"Filtering recomposited screenshot to only include: {', '.join(args.filter)} (plus Window Server and Dock)") - + print( + f"Filtering recomposited screenshot to only include: {', '.join(args.filter)} (plus Window Server and Dock)" + ) + result, img = capture_all_apps( - save_to_disk=True, - app_whitelist=args.filter, - output_dir=output_dir, - take_focus=True + save_to_disk=True, app_whitelist=args.filter, output_dir=output_dir, take_focus=True ) - + # Print summary - print(f"\nCapture complete!") + print("\nCapture complete!") print(f"Captured {len(result['applications'])} applications") - + total_app_windows = sum(len(app["windows"]) for app in result["applications"]) print(f"Total application windows captured: {total_app_windows}") print(f"Total standalone windows captured: {len(result['windows'])}") - + # Print details of each application print("\nApplication details:") for app in result["applications"]: app_info = app["info"] windows = app["windows"] print(f" - {app_info['name']} ({len(windows)} windows)") - + # Print recomposited screenshot path if available if "desktop_screenshot" in result: print(f"\nRecomposited screenshot saved to: {result['desktop_screenshot']}") - + # Print menubar items if requested if args.menubar and "menubar_items" in result: print("\nMenubar items:") - + # Find app name for the PID app_name_by_pid = {} for app in result["applications"]: app_info = app["info"] app_name_by_pid[app_info["pid"]] = app_info["name"] - + for item in result["menubar_items"]: print(f" - {item['title']}") - print(f" Bounds: x={item['bounds']['x']}, y={item['bounds']['y']}, width={item['bounds']['width']}, height={item['bounds']['height']}") - + print( + f" Bounds: x={item['bounds']['x']}, y={item['bounds']['y']}, width={item['bounds']['width']}, height={item['bounds']['height']}" + ) + if "app_pid" in item: - app_name = app_name_by_pid.get(item["app_pid"], f"Unknown App (PID: {item['app_pid']})") + app_name = app_name_by_pid.get( + item["app_pid"], f"Unknown App (PID: {item['app_pid']})" + ) print(f" App: {app_name} (PID: {item['app_pid']})") - + if "window_id" in item: print(f" Window ID: {item['window_id']}") if "owner" in item: @@ -1186,24 +1300,27 @@ async def run_capture(): if "layer" in item and "z_index" in item: print(f" Layer: {item['layer']}, Z-Index: {item['z_index']}") print("") - + # Print dock items if requested if args.dock and "dock_items" in result: print("\nDock items:") for item in result["dock_items"]: print(f" - {item['title']} ({item['type']})") print(f" Description: {item['description']}") - print(f" Bounds: x={item['bounds']['x']}, y={item['bounds']['y']}, width={item['bounds']['width']}, height={item['bounds']['height']}") + print( + f" Bounds: x={item['bounds']['x']}, y={item['bounds']['y']}, width={item['bounds']['width']}, height={item['bounds']['height']}" + ) print(f" Role: {item['role']}, Subrole: {item['subrole']}") print(f" Index: {item['index']}") print("") - + # Save the metadata to a JSON file metadata_path = os.path.join(output_dir, "metadata.json") with open(metadata_path, "w") as f: json.dump(result, f, indent=2) - + print(f"\nMetadata saved to: {metadata_path}") + if __name__ == "__main__": - asyncio.run(run_capture()) \ No newline at end of file + asyncio.run(run_capture()) diff --git a/libs/python/computer-server/computer_server/diorama/macos.py b/libs/python/computer-server/computer_server/diorama/macos.py index be266cae..5049f070 100644 --- a/libs/python/computer-server/computer_server/diorama/macos.py +++ b/libs/python/computer-server/computer_server/diorama/macos.py @@ -1,13 +1,15 @@ +import inspect import platform import sys -import platform -import inspect -from computer_server.diorama.diorama import Diorama -from computer_server.diorama.base import BaseDioramaHandler from typing import Optional +from computer_server.diorama.base import BaseDioramaHandler +from computer_server.diorama.diorama import Diorama + + class MacOSDioramaHandler(BaseDioramaHandler): """Handler for Diorama commands on macOS, using local diorama module.""" + async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict: if platform.system().lower() != "darwin": return {"success": False, "error": "Diorama is only supported on macOS."} @@ -30,4 +32,5 @@ class MacOSDioramaHandler(BaseDioramaHandler): return {"success": True, "result": result} except Exception as e: import traceback + return {"success": False, "error": str(e), "trace": traceback.format_exc()} diff --git a/libs/python/computer-server/computer_server/diorama/safezone.py b/libs/python/computer-server/computer_server/diorama/safezone.py index 122b668f..09455fd7 100644 --- a/libs/python/computer-server/computer_server/diorama/safezone.py +++ b/libs/python/computer-server/computer_server/diorama/safezone.py @@ -8,31 +8,31 @@ like the menubar and dock, which are needed for proper screenshot composition. import sys import time -from typing import Dict, Any, Optional, Tuple +from typing import Any, Dict, Optional, Tuple # Import Objective-C bridge libraries try: import AppKit + import Foundation + from AppKit import NSRunningApplication, NSWorkspace from ApplicationServices import ( - AXUIElementCreateSystemWide, - AXUIElementCreateApplication, AXUIElementCopyAttributeValue, AXUIElementCopyAttributeValues, - kAXChildrenAttribute, - kAXRoleAttribute, - kAXTitleAttribute, - kAXPositionAttribute, - kAXSizeAttribute, - kAXErrorSuccess, - AXValueGetType, - kAXValueCGSizeType, - kAXValueCGPointType, + AXUIElementCreateApplication, + AXUIElementCreateSystemWide, AXUIElementGetTypeID, + AXValueGetType, AXValueGetValue, + kAXChildrenAttribute, + kAXErrorSuccess, kAXMenuBarAttribute, + kAXPositionAttribute, + kAXRoleAttribute, + kAXSizeAttribute, + kAXTitleAttribute, + kAXValueCGPointType, + kAXValueCGSizeType, ) - from AppKit import NSWorkspace, NSRunningApplication - import Foundation except ImportError: print("Error: This script requires PyObjC to be installed.") print("Please install it with: pip install pyobjc") @@ -74,13 +74,8 @@ def element_value(element, type): def get_element_bounds(element): """Get the bounds of an accessibility element""" - bounds = { - "x": 0, - "y": 0, - "width": 0, - "height": 0 - } - + bounds = {"x": 0, "y": 0, "width": 0, "height": 0} + # Get position position_value = element_attribute(element, kAXPositionAttribute) if position_value: @@ -88,7 +83,7 @@ def get_element_bounds(element): if position_value: bounds["x"] = position_value.x bounds["y"] = position_value.y - + # Get size size_value = element_attribute(element, kAXSizeAttribute) if size_value: @@ -96,7 +91,7 @@ def get_element_bounds(element): if size_value: bounds["width"] = size_value.width bounds["height"] = size_value.height - + return bounds @@ -111,13 +106,13 @@ def find_dock_process(): def get_menubar_bounds(): """Get the bounds of the macOS menubar - + Returns: Dictionary with x, y, width, height of the menubar """ # Get the system-wide accessibility element system_element = AXUIElementCreateSystemWide() - + # Try to find the menubar menubar = element_attribute(system_element, kAXMenuBarAttribute) if menubar is None: @@ -127,19 +122,19 @@ def get_menubar_bounds(): app_pid = frontmost_app.processIdentifier() app_element = AXUIElementCreateApplication(app_pid) menubar = element_attribute(app_element, kAXMenuBarAttribute) - + if menubar is None: print("Error: Could not get menubar") # Return default menubar bounds as fallback return {"x": 0, "y": 0, "width": 1800, "height": 24} - + # Get menubar bounds return get_element_bounds(menubar) def get_dock_bounds(): """Get the bounds of the macOS Dock - + Returns: Dictionary with x, y, width, height of the Dock """ @@ -148,19 +143,19 @@ def get_dock_bounds(): print("Error: Could not find Dock process") # Return empty bounds as fallback return {"x": 0, "y": 0, "width": 0, "height": 0} - + # Create an accessibility element for the Dock dock_element = AXUIElementCreateApplication(dock_pid) if dock_element is None: print(f"Error: Could not create accessibility element for Dock (PID {dock_pid})") return {"x": 0, "y": 0, "width": 0, "height": 0} - + # Get the Dock's children children = element_attribute(dock_element, kAXChildrenAttribute) if not children or len(children) == 0: print("Error: Could not get Dock children") return {"x": 0, "y": 0, "width": 0, "height": 0} - + # Find the Dock's list (first child is usually the main dock list) dock_list = None for child in children: @@ -168,28 +163,25 @@ def get_dock_bounds(): if role == "AXList": dock_list = child break - + if dock_list is None: print("Error: Could not find Dock list") return {"x": 0, "y": 0, "width": 0, "height": 0} - + # Get the bounds of the dock list return get_element_bounds(dock_list) def get_ui_element_bounds(): """Get the bounds of important UI elements like menubar and dock - + Returns: Dictionary with menubar and dock bounds """ menubar_bounds = get_menubar_bounds() dock_bounds = get_dock_bounds() - - return { - "menubar": menubar_bounds, - "dock": dock_bounds - } + + return {"menubar": menubar_bounds, "dock": dock_bounds} if __name__ == "__main__": diff --git a/libs/python/computer-server/computer_server/handlers/base.py b/libs/python/computer-server/computer_server/handlers/base.py index ac629832..42b27a0c 100644 --- a/libs/python/computer-server/computer_server/handlers/base.py +++ b/libs/python/computer-server/computer_server/handlers/base.py @@ -1,24 +1,26 @@ from abc import ABC, abstractmethod -from typing import Optional, Dict, Any, List, Tuple +from typing import Any, Dict, List, Optional, Tuple + class BaseAccessibilityHandler(ABC): """Abstract base class for OS-specific accessibility handlers.""" - + @abstractmethod async def get_accessibility_tree(self) -> Dict[str, Any]: """Get the accessibility tree of the current window.""" pass @abstractmethod - async def find_element(self, role: Optional[str] = None, - title: Optional[str] = None, - value: Optional[str] = None) -> Dict[str, Any]: + async def find_element( + self, role: Optional[str] = None, title: Optional[str] = None, value: Optional[str] = None + ) -> Dict[str, Any]: """Find an element in the accessibility tree by criteria.""" pass + class BaseFileHandler(ABC): """Abstract base class for OS-specific file handlers.""" - + @abstractmethod async def file_exists(self, path: str) -> Dict[str, Any]: """Check if a file exists at the specified path.""" @@ -43,7 +45,7 @@ class BaseFileHandler(ABC): async def write_text(self, path: str, content: str) -> Dict[str, Any]: """Write text content to a file.""" pass - + @abstractmethod async def write_bytes(self, path: str, content_b64: str) -> Dict[str, Any]: """Write binary content to a file. Sent over the websocket as a base64 string.""" @@ -65,9 +67,11 @@ class BaseFileHandler(ABC): pass @abstractmethod - async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> Dict[str, Any]: + async def read_bytes( + self, path: str, offset: int = 0, length: Optional[int] = None + ) -> Dict[str, Any]: """Read the binary contents of a file. Sent over the websocket as a base64 string. - + Args: path: Path to the file offset: Byte offset to start reading from (default: 0) @@ -80,9 +84,10 @@ class BaseFileHandler(ABC): """Get the size of a file in bytes.""" pass + class BaseAutomationHandler(ABC): """Abstract base class for OS-specific automation handlers. - + Categories: - Mouse Actions: Methods for mouse control - Keyboard Actions: Methods for keyboard input @@ -90,18 +95,22 @@ class BaseAutomationHandler(ABC): - Screen Actions: Methods for screen interaction - Clipboard Actions: Methods for clipboard operations """ - + # Mouse Actions @abstractmethod - async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]: + async def mouse_down( + self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left" + ) -> Dict[str, Any]: """Perform a mouse down at the current or specified position.""" pass - + @abstractmethod - async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]: + async def mouse_up( + self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left" + ) -> Dict[str, Any]: """Perform a mouse up at the current or specified position.""" pass - + @abstractmethod async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: """Perform a left click at the current or specified position.""" @@ -113,7 +122,9 @@ class BaseAutomationHandler(ABC): pass @abstractmethod - async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: + async def double_click( + self, x: Optional[int] = None, y: Optional[int] = None + ) -> Dict[str, Any]: """Perform a double click at the current or specified position.""" pass @@ -123,9 +134,11 @@ class BaseAutomationHandler(ABC): pass @abstractmethod - async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5) -> Dict[str, Any]: + async def drag_to( + self, x: int, y: int, button: str = "left", duration: float = 0.5 + ) -> Dict[str, Any]: """Drag the cursor from current position to specified coordinates. - + Args: x: The x coordinate to drag to y: The y coordinate to drag to @@ -133,11 +146,13 @@ class BaseAutomationHandler(ABC): duration: How long the drag should take in seconds """ pass - + @abstractmethod - async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> Dict[str, Any]: + async def drag( + self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5 + ) -> Dict[str, Any]: """Drag the cursor from current position to specified coordinates. - + Args: path: A list of tuples of x and y coordinates to drag to button: The mouse button to use ('left', 'middle', 'right') @@ -150,12 +165,12 @@ class BaseAutomationHandler(ABC): async def key_down(self, key: str) -> Dict[str, Any]: """Press and hold the specified key.""" pass - + @abstractmethod async def key_up(self, key: str) -> Dict[str, Any]: """Release the specified key.""" pass - + @abstractmethod async def type_text(self, text: str) -> Dict[str, Any]: """Type the specified text.""" @@ -176,7 +191,7 @@ class BaseAutomationHandler(ABC): async def scroll(self, x: int, y: int) -> Dict[str, Any]: """Scroll the specified amount.""" pass - + @abstractmethod async def scroll_down(self, clicks: int = 1) -> Dict[str, Any]: """Scroll down by the specified number of clicks.""" @@ -212,9 +227,9 @@ class BaseAutomationHandler(ABC): @abstractmethod async def set_clipboard(self, text: str) -> Dict[str, Any]: """Set the clipboard content.""" - pass + pass @abstractmethod async def run_command(self, command: str) -> Dict[str, Any]: """Run a command and return the output.""" - pass \ No newline at end of file + pass diff --git a/libs/python/computer-server/computer_server/handlers/factory.py b/libs/python/computer-server/computer_server/handlers/factory.py index 962f7fb1..3c90935f 100644 --- a/libs/python/computer-server/computer_server/handlers/factory.py +++ b/libs/python/computer-server/computer_server/handlers/factory.py @@ -1,68 +1,89 @@ import platform import subprocess from typing import Tuple, Type -from .base import BaseAccessibilityHandler, BaseAutomationHandler, BaseFileHandler + from computer_server.diorama.base import BaseDioramaHandler +from .base import BaseAccessibilityHandler, BaseAutomationHandler, BaseFileHandler + # Conditionally import platform-specific handlers system = platform.system().lower() -if system == 'darwin': - from .macos import MacOSAccessibilityHandler, MacOSAutomationHandler +if system == "darwin": from computer_server.diorama.macos import MacOSDioramaHandler -elif system == 'linux': + + from .macos import MacOSAccessibilityHandler, MacOSAutomationHandler +elif system == "linux": from .linux import LinuxAccessibilityHandler, LinuxAutomationHandler -elif system == 'windows': +elif system == "windows": from .windows import WindowsAccessibilityHandler, WindowsAutomationHandler from .generic import GenericFileHandler + class HandlerFactory: """Factory for creating OS-specific handlers.""" - + @staticmethod def _get_current_os() -> str: """Determine the current OS. - + Returns: str: The OS type ('darwin' for macOS, 'linux' for Linux, or 'windows' for Windows) - + Raises: RuntimeError: If unable to determine the current OS """ try: # Use platform.system() as primary method system = platform.system().lower() - if system in ['darwin', 'linux', 'windows']: + if system in ["darwin", "linux", "windows"]: return system - + # Fallback to uname if platform.system() doesn't return expected values (Unix-like systems only) - result = subprocess.run(['uname', '-s'], capture_output=True, text=True) + result = subprocess.run(["uname", "-s"], capture_output=True, text=True) if result.returncode == 0: return result.stdout.strip().lower() - + raise RuntimeError(f"Unsupported OS: {system}") except Exception as e: raise RuntimeError(f"Failed to determine current OS: {str(e)}") - + @staticmethod - def create_handlers() -> Tuple[BaseAccessibilityHandler, BaseAutomationHandler, BaseDioramaHandler, BaseFileHandler]: + def create_handlers() -> ( + Tuple[BaseAccessibilityHandler, BaseAutomationHandler, BaseDioramaHandler, BaseFileHandler] + ): """Create and return appropriate handlers for the current OS. - + Returns: Tuple[BaseAccessibilityHandler, BaseAutomationHandler, BaseDioramaHandler, BaseFileHandler]: A tuple containing the appropriate accessibility, automation, diorama, and file handlers for the current OS. - + Raises: NotImplementedError: If the current OS is not supported RuntimeError: If unable to determine the current OS """ os_type = HandlerFactory._get_current_os() - - if os_type == 'darwin': - return MacOSAccessibilityHandler(), MacOSAutomationHandler(), MacOSDioramaHandler(), GenericFileHandler() - elif os_type == 'linux': - return LinuxAccessibilityHandler(), LinuxAutomationHandler(), BaseDioramaHandler(), GenericFileHandler() - elif os_type == 'windows': - return WindowsAccessibilityHandler(), WindowsAutomationHandler(), BaseDioramaHandler(), GenericFileHandler() + + if os_type == "darwin": + return ( + MacOSAccessibilityHandler(), + MacOSAutomationHandler(), + MacOSDioramaHandler(), + GenericFileHandler(), + ) + elif os_type == "linux": + return ( + LinuxAccessibilityHandler(), + LinuxAutomationHandler(), + BaseDioramaHandler(), + GenericFileHandler(), + ) + elif os_type == "windows": + return ( + WindowsAccessibilityHandler(), + WindowsAutomationHandler(), + BaseDioramaHandler(), + GenericFileHandler(), + ) else: raise NotImplementedError(f"OS '{os_type}' is not supported") diff --git a/libs/python/computer-server/computer_server/handlers/generic.py b/libs/python/computer-server/computer_server/handlers/generic.py index 11df71fa..da29f1c3 100644 --- a/libs/python/computer-server/computer_server/handlers/generic.py +++ b/libs/python/computer-server/computer_server/handlers/generic.py @@ -6,38 +6,41 @@ Includes: """ -from pathlib import Path -from typing import Dict, Any, Optional -from .base import BaseFileHandler import base64 +from pathlib import Path +from typing import Any, Dict, Optional + +from .base import BaseFileHandler + def resolve_path(path: str) -> Path: """Resolve a path to its absolute path. Expand ~ to the user's home directory. - + Args: path: The file or directory path to resolve - + Returns: Path: The resolved absolute path """ return Path(path).expanduser().resolve() + class GenericFileHandler(BaseFileHandler): """ Generic file handler that provides file system operations for all operating systems. - + This class implements the BaseFileHandler interface and provides methods for file and directory operations including reading, writing, creating, and deleting files and directories. """ - + async def file_exists(self, path: str) -> Dict[str, Any]: """ Check if a file exists at the specified path. - + Args: path: The file path to check - + Returns: Dict containing 'success' boolean and either 'exists' boolean or 'error' string """ @@ -49,10 +52,10 @@ class GenericFileHandler(BaseFileHandler): async def directory_exists(self, path: str) -> Dict[str, Any]: """ Check if a directory exists at the specified path. - + Args: path: The directory path to check - + Returns: Dict containing 'success' boolean and either 'exists' boolean or 'error' string """ @@ -64,25 +67,30 @@ class GenericFileHandler(BaseFileHandler): async def list_dir(self, path: str) -> Dict[str, Any]: """ List all files and directories in the specified directory. - + Args: path: The directory path to list - + Returns: Dict containing 'success' boolean and either 'files' list of names or 'error' string """ try: - return {"success": True, "files": [p.name for p in resolve_path(path).iterdir() if p.is_file() or p.is_dir()]} + return { + "success": True, + "files": [ + p.name for p in resolve_path(path).iterdir() if p.is_file() or p.is_dir() + ], + } except Exception as e: return {"success": False, "error": str(e)} - + async def read_text(self, path: str) -> Dict[str, Any]: """ Read the contents of a text file. - + Args: path: The file path to read from - + Returns: Dict containing 'success' boolean and either 'content' string or 'error' string """ @@ -94,11 +102,11 @@ class GenericFileHandler(BaseFileHandler): async def write_text(self, path: str, content: str) -> Dict[str, Any]: """ Write text content to a file. - + Args: path: The file path to write to content: The text content to write - + Returns: Dict containing 'success' boolean and optionally 'error' string """ @@ -108,60 +116,64 @@ class GenericFileHandler(BaseFileHandler): except Exception as e: return {"success": False, "error": str(e)} - async def write_bytes(self, path: str, content_b64: str, append: bool = False) -> Dict[str, Any]: + async def write_bytes( + self, path: str, content_b64: str, append: bool = False + ) -> Dict[str, Any]: """ Write binary content to a file from base64 encoded string. - + Args: path: The file path to write to content_b64: Base64 encoded binary content append: If True, append to existing file; if False, overwrite - + Returns: Dict containing 'success' boolean and optionally 'error' string """ try: - mode = 'ab' if append else 'wb' + mode = "ab" if append else "wb" with open(resolve_path(path), mode) as f: f.write(base64.b64decode(content_b64)) return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - - async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> Dict[str, Any]: + + async def read_bytes( + self, path: str, offset: int = 0, length: Optional[int] = None + ) -> Dict[str, Any]: """ Read binary content from a file and return as base64 encoded string. - + Args: path: The file path to read from offset: Byte offset to start reading from length: Number of bytes to read; if None, read entire file from offset - + Returns: Dict containing 'success' boolean and either 'content_b64' string or 'error' string """ try: file_path = resolve_path(path) - with open(file_path, 'rb') as f: + with open(file_path, "rb") as f: if offset > 0: f.seek(offset) - + if length is not None: content = f.read(length) else: content = f.read() - - return {"success": True, "content_b64": base64.b64encode(content).decode('utf-8')} + + return {"success": True, "content_b64": base64.b64encode(content).decode("utf-8")} except Exception as e: return {"success": False, "error": str(e)} async def get_file_size(self, path: str) -> Dict[str, Any]: """ Get the size of a file in bytes. - + Args: path: The file path to get size for - + Returns: Dict containing 'success' boolean and either 'size' integer or 'error' string """ @@ -175,10 +187,10 @@ class GenericFileHandler(BaseFileHandler): async def delete_file(self, path: str) -> Dict[str, Any]: """ Delete a file at the specified path. - + Args: path: The file path to delete - + Returns: Dict containing 'success' boolean and optionally 'error' string """ @@ -191,13 +203,13 @@ class GenericFileHandler(BaseFileHandler): async def create_dir(self, path: str) -> Dict[str, Any]: """ Create a directory at the specified path. - + Creates parent directories if they don't exist and doesn't raise an error if the directory already exists. - + Args: path: The directory path to create - + Returns: Dict containing 'success' boolean and optionally 'error' string """ @@ -210,10 +222,10 @@ class GenericFileHandler(BaseFileHandler): async def delete_dir(self, path: str) -> Dict[str, Any]: """ Delete an empty directory at the specified path. - + Args: path: The directory path to delete - + Returns: Dict containing 'success' boolean and optionally 'error' string """ diff --git a/libs/python/computer-server/computer_server/handlers/linux.py b/libs/python/computer-server/computer_server/handlers/linux.py index eb8ca7b8..f536440c 100644 --- a/libs/python/computer-server/computer_server/handlers/linux.py +++ b/libs/python/computer-server/computer_server/handlers/linux.py @@ -7,14 +7,15 @@ To use GUI automation in a headless environment: 1. Install Xvfb: sudo apt-get install xvfb 2. Run with virtual display: xvfb-run python -m computer_server """ -from typing import Dict, Any, List, Tuple, Optional -import logging -import subprocess + import asyncio import base64 -import os import json +import logging +import os +import subprocess from io import BytesIO +from typing import Any, Dict, List, Optional, Tuple # Configure logger logger = logging.getLogger(__name__) @@ -23,30 +24,36 @@ logger = logging.getLogger(__name__) # This allows the server to run in headless environments try: import pyautogui + pyautogui.FAILSAFE = False logger.info("pyautogui successfully imported, GUI automation available") except Exception as e: logger.warning(f"pyautogui import failed: {str(e)}. GUI operations will be simulated.") -from pynput.mouse import Button, Controller as MouseController -from pynput.keyboard import Key, Controller as KeyboardController +from pynput.keyboard import Controller as KeyboardController +from pynput.keyboard import Key +from pynput.mouse import Button +from pynput.mouse import Controller as MouseController from .base import BaseAccessibilityHandler, BaseAutomationHandler + class LinuxAccessibilityHandler(BaseAccessibilityHandler): """Linux implementation of accessibility handler.""" - + async def get_accessibility_tree(self) -> Dict[str, Any]: """Get the accessibility tree of the current window. - + Returns: Dict[str, Any]: A dictionary containing success status and a simulated tree structure since Linux doesn't have equivalent accessibility API like macOS. """ # Linux doesn't have equivalent accessibility API like macOS # Return a minimal dummy tree - logger.info("Getting accessibility tree (simulated, no accessibility API available on Linux)") + logger.info( + "Getting accessibility tree (simulated, no accessibility API available on Linux)" + ) return { "success": True, "tree": { @@ -54,32 +61,31 @@ class LinuxAccessibilityHandler(BaseAccessibilityHandler): "title": "Linux Window", "position": {"x": 0, "y": 0}, "size": {"width": 1920, "height": 1080}, - "children": [] - } + "children": [], + }, } - - async def find_element(self, role: Optional[str] = None, - title: Optional[str] = None, - value: Optional[str] = None) -> Dict[str, Any]: + + async def find_element( + self, role: Optional[str] = None, title: Optional[str] = None, value: Optional[str] = None + ) -> Dict[str, Any]: """Find an element in the accessibility tree by criteria. - + Args: role: The role of the element to find. title: The title of the element to find. value: The value of the element to find. - + Returns: Dict[str, Any]: A dictionary indicating that element search is not supported on Linux. """ - logger.info(f"Finding element with role={role}, title={title}, value={value} (not supported on Linux)") - return { - "success": False, - "message": "Element search not supported on Linux" - } - + logger.info( + f"Finding element with role={role}, title={title}, value={value} (not supported on Linux)" + ) + return {"success": False, "message": "Element search not supported on Linux"} + def get_cursor_position(self) -> Tuple[int, int]: """Get the current cursor position. - + Returns: Tuple[int, int]: The x and y coordinates of the cursor position. Returns (0, 0) if pyautogui is not available. @@ -89,13 +95,13 @@ class LinuxAccessibilityHandler(BaseAccessibilityHandler): return pos.x, pos.y except Exception as e: logger.warning(f"Failed to get cursor position with pyautogui: {e}") - + logger.info("Getting cursor position (simulated)") return 0, 0 - + def get_screen_size(self) -> Tuple[int, int]: """Get the screen size. - + Returns: Tuple[int, int]: The width and height of the screen in pixels. Returns (1920, 1080) if pyautogui is not available. @@ -105,24 +111,28 @@ class LinuxAccessibilityHandler(BaseAccessibilityHandler): return size.width, size.height except Exception as e: logger.warning(f"Failed to get screen size with pyautogui: {e}") - + logger.info("Getting screen size (simulated)") return 1920, 1080 + class LinuxAutomationHandler(BaseAutomationHandler): """Linux implementation of automation handler using pyautogui.""" + keyboard = KeyboardController() mouse = MouseController() - + # Mouse Actions - async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]: + async def mouse_down( + self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left" + ) -> Dict[str, Any]: """Press and hold a mouse button at the specified coordinates. - + Args: x: The x coordinate to move to before pressing. If None, uses current position. y: The y coordinate to move to before pressing. If None, uses current position. button: The mouse button to press ("left", "right", or "middle"). - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -133,15 +143,17 @@ class LinuxAutomationHandler(BaseAutomationHandler): return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - - async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]: + + async def mouse_up( + self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left" + ) -> Dict[str, Any]: """Release a mouse button at the specified coordinates. - + Args: x: The x coordinate to move to before releasing. If None, uses current position. y: The y coordinate to move to before releasing. If None, uses current position. button: The mouse button to release ("left", "right", or "middle"). - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -152,14 +164,14 @@ class LinuxAutomationHandler(BaseAutomationHandler): return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - + async def move_cursor(self, x: int, y: int) -> Dict[str, Any]: """Move the cursor to the specified coordinates. - + Args: x: The x coordinate to move to. y: The y coordinate to move to. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -171,11 +183,11 @@ class LinuxAutomationHandler(BaseAutomationHandler): async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: """Perform a left mouse click at the specified coordinates. - + Args: x: The x coordinate to click at. If None, clicks at current position. y: The y coordinate to click at. If None, clicks at current position. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -189,11 +201,11 @@ class LinuxAutomationHandler(BaseAutomationHandler): async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: """Perform a right mouse click at the specified coordinates. - + Args: x: The x coordinate to click at. If None, clicks at current position. y: The y coordinate to click at. If None, clicks at current position. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -205,13 +217,15 @@ class LinuxAutomationHandler(BaseAutomationHandler): except Exception as e: return {"success": False, "error": str(e)} - async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: + async def double_click( + self, x: Optional[int] = None, y: Optional[int] = None + ) -> Dict[str, Any]: """Perform a double click at the specified coordinates. - + Args: x: The x coordinate to double click at. If None, clicks at current position. y: The y coordinate to double click at. If None, clicks at current position. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -223,14 +237,16 @@ class LinuxAutomationHandler(BaseAutomationHandler): except Exception as e: return {"success": False, "error": str(e)} - async def click(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]: + async def click( + self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left" + ) -> Dict[str, Any]: """Perform a mouse click with the specified button at the given coordinates. - + Args: x: The x coordinate to click at. If None, clicks at current position. y: The y coordinate to click at. If None, clicks at current position. button: The mouse button to click ("left", "right", or "middle"). - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -242,15 +258,17 @@ class LinuxAutomationHandler(BaseAutomationHandler): except Exception as e: return {"success": False, "error": str(e)} - async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5) -> Dict[str, Any]: + async def drag_to( + self, x: int, y: int, button: str = "left", duration: float = 0.5 + ) -> Dict[str, Any]: """Drag from the current position to the specified coordinates. - + Args: x: The x coordinate to drag to. y: The y coordinate to drag to. button: The mouse button to use for dragging. duration: The time in seconds to take for the drag operation. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -260,16 +278,18 @@ class LinuxAutomationHandler(BaseAutomationHandler): except Exception as e: return {"success": False, "error": str(e)} - async def drag(self, start_x: int, start_y: int, end_x: int, end_y: int, button: str = "left") -> Dict[str, Any]: + async def drag( + self, start_x: int, start_y: int, end_x: int, end_y: int, button: str = "left" + ) -> Dict[str, Any]: """Drag from start coordinates to end coordinates. - + Args: start_x: The starting x coordinate. start_y: The starting y coordinate. end_x: The ending x coordinate. end_y: The ending y coordinate. button: The mouse button to use for dragging. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -280,14 +300,16 @@ class LinuxAutomationHandler(BaseAutomationHandler): except Exception as e: return {"success": False, "error": str(e)} - async def drag_path(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> Dict[str, Any]: + async def drag_path( + self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5 + ) -> Dict[str, Any]: """Drag along a path defined by a list of coordinates. - + Args: path: A list of (x, y) coordinate tuples defining the drag path. button: The mouse button to use for dragging. duration: The time in seconds to take for each segment of the drag. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -304,10 +326,10 @@ class LinuxAutomationHandler(BaseAutomationHandler): # Keyboard Actions async def key_down(self, key: str) -> Dict[str, Any]: """Press and hold a key. - + Args: key: The key to press down. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -316,13 +338,13 @@ class LinuxAutomationHandler(BaseAutomationHandler): return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - + async def key_up(self, key: str) -> Dict[str, Any]: """Release a key. - + Args: key: The key to release. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -331,13 +353,13 @@ class LinuxAutomationHandler(BaseAutomationHandler): return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - + async def type_text(self, text: str) -> Dict[str, Any]: """Type the specified text using the keyboard. - + Args: text: The text to type. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -350,10 +372,10 @@ class LinuxAutomationHandler(BaseAutomationHandler): async def press_key(self, key: str) -> Dict[str, Any]: """Press and release a key. - + Args: key: The key to press. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -365,10 +387,10 @@ class LinuxAutomationHandler(BaseAutomationHandler): async def hotkey(self, keys: List[str]) -> Dict[str, Any]: """Press a combination of keys simultaneously. - + Args: keys: A list of keys to press together as a hotkey combination. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -381,11 +403,11 @@ class LinuxAutomationHandler(BaseAutomationHandler): # Scrolling Actions async def scroll(self, x: int, y: int) -> Dict[str, Any]: """Scroll the mouse wheel. - + Args: x: The horizontal scroll amount. y: The vertical scroll amount. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -394,13 +416,13 @@ class LinuxAutomationHandler(BaseAutomationHandler): return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - + async def scroll_down(self, clicks: int = 1) -> Dict[str, Any]: """Scroll down by the specified number of clicks. - + Args: clicks: The number of scroll clicks to perform downward. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -412,10 +434,10 @@ class LinuxAutomationHandler(BaseAutomationHandler): async def scroll_up(self, clicks: int = 1) -> Dict[str, Any]: """Scroll up by the specified number of clicks. - + Args: clicks: The number of scroll clicks to perform upward. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ @@ -428,13 +450,14 @@ class LinuxAutomationHandler(BaseAutomationHandler): # Screen Actions async def screenshot(self) -> Dict[str, Any]: """Take a screenshot of the current screen. - + Returns: Dict[str, Any]: A dictionary containing success status and base64-encoded image data, or error message if failed. """ try: from PIL import Image + screenshot = pyautogui.screenshot() if not isinstance(screenshot, Image.Image): return {"success": False, "error": "Failed to capture screenshot"} @@ -448,7 +471,7 @@ class LinuxAutomationHandler(BaseAutomationHandler): async def get_screen_size(self) -> Dict[str, Any]: """Get the size of the screen. - + Returns: Dict[str, Any]: A dictionary containing success status and screen dimensions, or error message if failed. @@ -461,7 +484,7 @@ class LinuxAutomationHandler(BaseAutomationHandler): async def get_cursor_position(self) -> Dict[str, Any]: """Get the current position of the cursor. - + Returns: Dict[str, Any]: A dictionary containing success status and cursor coordinates, or error message if failed. @@ -475,13 +498,14 @@ class LinuxAutomationHandler(BaseAutomationHandler): # Clipboard Actions async def copy_to_clipboard(self) -> Dict[str, Any]: """Get the current content of the clipboard. - + Returns: Dict[str, Any]: A dictionary containing success status and clipboard content, or error message if failed. """ try: import pyperclip + content = pyperclip.paste() return {"success": True, "content": content} except Exception as e: @@ -489,15 +513,16 @@ class LinuxAutomationHandler(BaseAutomationHandler): async def set_clipboard(self, text: str) -> Dict[str, Any]: """Set the clipboard content to the specified text. - + Args: text: The text to copy to the clipboard. - + Returns: Dict[str, Any]: A dictionary with success status and error message if failed. """ try: import pyperclip + pyperclip.copy(text) return {"success": True} except Exception as e: @@ -506,10 +531,10 @@ class LinuxAutomationHandler(BaseAutomationHandler): # Command Execution async def run_command(self, command: str) -> Dict[str, Any]: """Execute a shell command asynchronously. - + Args: command: The shell command to execute. - + Returns: Dict[str, Any]: A dictionary containing success status, stdout, stderr, and return code, or error message if failed. @@ -517,18 +542,16 @@ class LinuxAutomationHandler(BaseAutomationHandler): try: # Create subprocess process = await asyncio.create_subprocess_shell( - command, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE + command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) # Wait for the subprocess to finish stdout, stderr = await process.communicate() # Return decoded output return { - "success": True, - "stdout": stdout.decode() if stdout else "", - "stderr": stderr.decode() if stderr else "", - "return_code": process.returncode + "success": True, + "stdout": stdout.decode() if stdout else "", + "stderr": stderr.decode() if stderr else "", + "return_code": process.returncode, } except Exception as e: return {"success": False, "error": str(e)} diff --git a/libs/python/computer-server/computer_server/handlers/macos.py b/libs/python/computer-server/computer_server/handlers/macos.py index e23b8ea6..ce341668 100644 --- a/libs/python/computer-server/computer_server/handlers/macos.py +++ b/libs/python/computer-server/computer_server/handlers/macos.py @@ -1,54 +1,57 @@ import pyautogui + pyautogui.FAILSAFE = False -from pynput.mouse import Button, Controller as MouseController -from pynput.keyboard import Key, Controller as KeyboardController -import time +import asyncio import base64 +import copy +import json +import logging +import re +import time +from ctypes import POINTER, byref, c_void_p from io import BytesIO -from typing import Optional, Dict, Any, List, Tuple -from ctypes import byref, c_void_p, POINTER -from AppKit import NSWorkspace # type: ignore +from typing import Any, Dict, List, Optional, Tuple + import AppKit +import Foundation +import objc +from AppKit import NSWorkspace # type: ignore +from ApplicationServices import AXUIElementCopyAttributeValue # type: ignore +from ApplicationServices import AXUIElementCopyAttributeValues # type: ignore +from ApplicationServices import AXUIElementCreateApplication # type: ignore +from ApplicationServices import AXUIElementCreateSystemWide # type: ignore +from ApplicationServices import AXUIElementGetTypeID # type: ignore +from ApplicationServices import AXValueGetType # type: ignore +from ApplicationServices import AXValueGetValue # type: ignore +from ApplicationServices import kAXChildrenAttribute # type: ignore +from ApplicationServices import kAXDescriptionAttribute # type: ignore +from ApplicationServices import kAXEnabledAttribute # type: ignore +from ApplicationServices import kAXErrorSuccess # type: ignore +from ApplicationServices import kAXFocusedApplicationAttribute # type: ignore +from ApplicationServices import kAXFocusedUIElementAttribute # type: ignore +from ApplicationServices import kAXFocusedWindowAttribute # type: ignore +from ApplicationServices import kAXMainWindowAttribute # type: ignore +from ApplicationServices import kAXPositionAttribute # type: ignore +from ApplicationServices import kAXRoleAttribute # type: ignore +from ApplicationServices import kAXRoleDescriptionAttribute # type: ignore +from ApplicationServices import kAXSelectedTextAttribute # type: ignore +from ApplicationServices import kAXSelectedTextRangeAttribute # type: ignore +from ApplicationServices import kAXSizeAttribute # type: ignore +from ApplicationServices import kAXTitleAttribute # type: ignore +from ApplicationServices import kAXValueAttribute # type: ignore +from ApplicationServices import kAXValueCFRangeType # type: ignore +from ApplicationServices import kAXValueCGPointType # type: ignore +from ApplicationServices import kAXValueCGSizeType # type: ignore +from ApplicationServices import kAXVisibleChildrenAttribute # type: ignore +from ApplicationServices import kAXWindowsAttribute # type: ignore +from pynput.keyboard import Controller as KeyboardController +from pynput.keyboard import Key +from pynput.mouse import Button +from pynput.mouse import Controller as MouseController from Quartz.CoreGraphics import * # type: ignore from Quartz.CoreGraphics import CGPoint, CGSize # type: ignore -import Foundation -from ApplicationServices import ( - AXUIElementCreateSystemWide, # type: ignore - AXUIElementCreateApplication, # type: ignore - AXUIElementCopyAttributeValue, # type: ignore - AXUIElementCopyAttributeValues, # type: ignore - kAXFocusedWindowAttribute, # type: ignore - kAXWindowsAttribute, # type: ignore - kAXMainWindowAttribute, # type: ignore - kAXChildrenAttribute, # type: ignore - kAXRoleAttribute, # type: ignore - kAXTitleAttribute, # type: ignore - kAXValueAttribute, # type: ignore - kAXDescriptionAttribute, # type: ignore - kAXEnabledAttribute, # type: ignore - kAXPositionAttribute, # type: ignore - kAXSizeAttribute, # type: ignore - kAXErrorSuccess, # type: ignore - AXValueGetType, # type: ignore - kAXValueCGSizeType, # type: ignore - kAXValueCGPointType, # type: ignore - kAXValueCFRangeType, # type: ignore - AXUIElementGetTypeID, # type: ignore - AXValueGetValue, # type: ignore - kAXVisibleChildrenAttribute, # type: ignore - kAXRoleDescriptionAttribute, # type: ignore - kAXFocusedApplicationAttribute, # type: ignore - kAXFocusedUIElementAttribute, # type: ignore - kAXSelectedTextAttribute, # type: ignore - kAXSelectedTextRangeAttribute, # type: ignore -) -import objc -import re -import json -import copy -import asyncio + from .base import BaseAccessibilityHandler, BaseAutomationHandler -import logging logger = logging.getLogger(__name__) @@ -73,24 +76,26 @@ kCGWindowAlpha = "kCGWindowAlpha" # Window opacity NSApplicationActivationOptions = { "regular": 0, # Default activation "bringing_all_windows_forward": 1 << 0, # NSApplicationActivateAllWindows - "ignoring_other_apps": 1 << 1 # NSApplicationActivateIgnoringOtherApps + "ignoring_other_apps": 1 << 1, # NSApplicationActivateIgnoringOtherApps } + def CFAttributeToPyObject(attrValue): """Convert Core Foundation attribute values to Python objects. - + Args: attrValue: Core Foundation attribute value to convert - + Returns: Converted Python object or None if conversion fails """ + def list_helper(list_value): """Helper function to convert CF arrays to Python lists. - + Args: list_value: Core Foundation array to convert - + Returns: Python list containing converted items """ @@ -101,10 +106,10 @@ def CFAttributeToPyObject(attrValue): def number_helper(number_value): """Helper function to convert CF numbers to Python numbers. - + Args: number_value: Core Foundation number to convert - + Returns: Python int or float, or None if conversion fails """ @@ -123,10 +128,10 @@ def CFAttributeToPyObject(attrValue): def axuielement_helper(element_value): """Helper function to handle AX UI elements. - + Args: element_value: Accessibility UI element to process - + Returns: The element value unchanged """ @@ -164,11 +169,11 @@ def CFAttributeToPyObject(attrValue): def element_attribute(element, attribute): """Get an attribute value from an accessibility element. - + Args: element: The accessibility element attribute: The attribute name to retrieve - + Returns: The attribute value or None if not found """ @@ -190,11 +195,11 @@ def element_attribute(element, attribute): def element_value(element, type): """Extract a typed value from an accessibility element. - + Args: element: The accessibility element containing the value type: The expected value type - + Returns: The extracted value or None if extraction fails """ @@ -206,10 +211,10 @@ def element_value(element, type): class UIElement: """Represents a UI element in the accessibility tree with position, size, and hierarchy information.""" - + def __init__(self, element, offset_x=0, offset_y=0, max_depth=None, parents_visible_bbox=None): """Initialize a UIElement from an accessibility element. - + Args: element: The accessibility element to wrap offset_x: X offset for position calculations @@ -297,7 +302,7 @@ class UIElement: def _set_bboxes(self, parents_visible_bbox): """Set bounding box and visible bounding box for the element. - + Args: parents_visible_bbox: Parent's visible bounding box for intersection calculation """ @@ -332,13 +337,13 @@ class UIElement: def _get_children(self, element, start_position, offset_x, offset_y): """Get child elements from the accessibility element. - + Args: element: The parent accessibility element start_position: Starting position for offset calculations offset_x: X offset for child positioning offset_y: Y offset for child positioning - + Returns: List of UIElement children """ @@ -371,7 +376,7 @@ class UIElement: def component_hash(self): """Generate a hash identifier for this component based on its properties. - + Returns: MD5 hash string of component properties """ @@ -388,10 +393,10 @@ class UIElement: def hash_from_string(self, string): """Generate MD5 hash from a string. - + Args: string: Input string to hash - + Returns: MD5 hash hexdigest or empty string if input is None/empty """ @@ -403,10 +408,10 @@ class UIElement: def children_content_hash(self, children): """Generate a hash representing the content and structure of child elements. - + Args: children: List of child UIElement objects - + Returns: Combined hash of children content and structure """ @@ -426,16 +431,17 @@ class UIElement: def to_dict(self): """Convert the UIElement to a dictionary representation. - + Returns: Dictionary containing all element properties and children """ + def children_to_dict(children): """Convert list of children to dictionary format. - + Args: children: List of UIElement children to convert - + Returns: List of dictionaries representing the children """ @@ -464,7 +470,7 @@ class UIElement: size = f"{self.size.width:.0f};{self.size.height:.0f}" else: size = "" - + return { "id": self.identifier, "name": self.name, @@ -482,36 +488,38 @@ class UIElement: } -import Quartz -from AppKit import NSWorkspace, NSRunningApplication from pathlib import Path +import Quartz +from AppKit import NSRunningApplication, NSWorkspace + + def get_all_windows_zorder(): """Get all windows in the system with their z-order information. - + Returns: List of window dictionaries sorted by z-index, containing window properties like id, name, pid, owner, bounds, layer, and opacity """ window_list = Quartz.CGWindowListCopyWindowInfo( - Quartz.kCGWindowListOptionOnScreenOnly, - Quartz.kCGNullWindowID + Quartz.kCGWindowListOptionOnScreenOnly, Quartz.kCGNullWindowID ) - z_order = {window['kCGWindowNumber']: z_index for z_index, window in enumerate(window_list[::-1])} + z_order = { + window["kCGWindowNumber"]: z_index for z_index, window in enumerate(window_list[::-1]) + } window_list_all = Quartz.CGWindowListCopyWindowInfo( - Quartz.kCGWindowListOptionAll, - Quartz.kCGNullWindowID + Quartz.kCGWindowListOptionAll, Quartz.kCGNullWindowID ) windows = [] for window in window_list_all: - window_id = window.get('kCGWindowNumber', 0) - window_name = window.get('kCGWindowName', '') - window_pid = window.get('kCGWindowOwnerPID', 0) - window_bounds = window.get('kCGWindowBounds', {}) - window_owner = window.get('kCGWindowOwnerName', '') - window_is_on_screen = window.get('kCGWindowIsOnscreen', False) - layer = window.get('kCGWindowLayer', 0) - opacity = window.get('kCGWindowAlpha', 1.0) + window_id = window.get("kCGWindowNumber", 0) + window_name = window.get("kCGWindowName", "") + window_pid = window.get("kCGWindowOwnerPID", 0) + window_bounds = window.get("kCGWindowBounds", {}) + window_owner = window.get("kCGWindowOwnerName", "") + window_is_on_screen = window.get("kCGWindowIsOnscreen", False) + layer = window.get("kCGWindowLayer", 0) + opacity = window.get("kCGWindowAlpha", 1.0) z_index = z_order.get(window_id, -1) if window_name == "Dock" and window_owner == "Dock": role = "dock" @@ -522,32 +530,35 @@ def get_all_windows_zorder(): else: role = "app" if window_bounds: - windows.append({ - "id": window_id, - "name": window_name or "Unnamed Window", - "pid": window_pid, - "owner": window_owner, - "role": role, - "is_on_screen": window_is_on_screen, - "bounds": { - "x": window_bounds.get('X', 0), - "y": window_bounds.get('Y', 0), - "width": window_bounds.get('Width', 0), - "height": window_bounds.get('Height', 0) - }, - "layer": layer, - "z_index": z_index, - "opacity": opacity - }) + windows.append( + { + "id": window_id, + "name": window_name or "Unnamed Window", + "pid": window_pid, + "owner": window_owner, + "role": role, + "is_on_screen": window_is_on_screen, + "bounds": { + "x": window_bounds.get("X", 0), + "y": window_bounds.get("Y", 0), + "width": window_bounds.get("Width", 0), + "height": window_bounds.get("Height", 0), + }, + "layer": layer, + "z_index": z_index, + "opacity": opacity, + } + ) windows = sorted(windows, key=lambda x: x["z_index"]) return windows + def get_app_info(app): """Extract information from an NSRunningApplication object. - + Args: app: NSRunningApplication instance - + Returns: Dictionary containing app name, bundle ID, PID, and status flags """ @@ -560,12 +571,13 @@ def get_app_info(app): "terminated": app.isTerminated(), } + def get_menubar_items(active_app_pid=None): """Get menubar items for the active application. - + Args: active_app_pid: Process ID of the active application, or None to use frontmost app - + Returns: List of menubar item dictionaries with title, bounds, index, and app_pid """ @@ -591,26 +603,24 @@ def get_menubar_items(active_app_pid=None): position_value = element_attribute(item, kAXPositionAttribute) if position_value: position_value = element_value(position_value, kAXValueCGPointType) - bounds["x"] = getattr(position_value, 'x', 0) - bounds["y"] = getattr(position_value, 'y', 0) + bounds["x"] = getattr(position_value, "x", 0) + bounds["y"] = getattr(position_value, "y", 0) size_value = element_attribute(item, kAXSizeAttribute) if size_value: size_value = element_value(size_value, kAXValueCGSizeType) - bounds["width"] = getattr(size_value, 'width', 0) - bounds["height"] = getattr(size_value, 'height', 0) - menubar_items.append({ - "title": title, - "bounds": bounds, - "index": i, - "app_pid": active_app_pid - }) + bounds["width"] = getattr(size_value, "width", 0) + bounds["height"] = getattr(size_value, "height", 0) + menubar_items.append( + {"title": title, "bounds": bounds, "index": i, "app_pid": active_app_pid} + ) return menubar_items + def get_dock_items(): """Get all items in the macOS Dock. - + Returns: - List of dock item dictionaries with title, description, bounds, index, + List of dock item dictionaries with title, description, bounds, index, type, role, and subrole information """ dock_items = [] @@ -648,13 +658,13 @@ def get_dock_items(): position_value = element_attribute(item, kAXPositionAttribute) if position_value: position_value = element_value(position_value, kAXValueCGPointType) - bounds["x"] = getattr(position_value, 'x', 0) - bounds["y"] = getattr(position_value, 'y', 0) + bounds["x"] = getattr(position_value, "x", 0) + bounds["y"] = getattr(position_value, "y", 0) size_value = element_attribute(item, kAXSizeAttribute) if size_value: size_value = element_value(size_value, kAXValueCGSizeType) - bounds["width"] = getattr(size_value, 'width', 0) - bounds["height"] = getattr(size_value, 'height', 0) + bounds["width"] = getattr(size_value, "width", 0) + bounds["height"] = getattr(size_value, "height", 0) item_type = "unknown" if subrole == "AXApplicationDockItem": item_type = "application" @@ -666,23 +676,26 @@ def get_dock_items(): item_type = "separator" elif "trash" in title.lower(): item_type = "trash" - dock_items.append({ - "title": title, - "description": description, - "bounds": bounds, - "index": i, - "type": item_type, - "role": role, - "subrole": subrole - }) + dock_items.append( + { + "title": title, + "description": description, + "bounds": bounds, + "index": i, + "type": item_type, + "role": role, + "subrole": subrole, + } + ) return dock_items + class MacOSAccessibilityHandler(BaseAccessibilityHandler): """Handler for macOS accessibility features and UI element inspection.""" - + def get_desktop_state(self): """Get the current state of the desktop including windows, apps, menubar, and dock. - + Returns: Dictionary containing applications, windows, menubar_items, and dock_items """ @@ -696,7 +709,9 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler): pid = app.processIdentifier() try: app_elem = AXUIElementCreateApplication(pid) - err, app_windows = AXUIElementCopyAttributeValue(app_elem, kAXWindowsAttribute, None) + err, app_windows = AXUIElementCopyAttributeValue( + app_elem, kAXWindowsAttribute, None + ) trees = [] if err == kAXErrorSuccess and app_windows: for ax_win in app_windows: @@ -713,31 +728,32 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler): pid = win["pid"] idx = pid_to_idx.get(pid, 0) ax_trees = pid_to_ax_trees.get(pid, []) - win["children"] = ax_trees[idx]["children"] if idx < len(ax_trees) and "children" in ax_trees[idx] else [] + win["children"] = ( + ax_trees[idx]["children"] + if idx < len(ax_trees) and "children" in ax_trees[idx] + else [] + ) pid_to_idx[pid] = idx + 1 pid_to_window_ids.setdefault(pid, []).append(win["id"]) for app in running_apps: info = get_app_info(app) app_pid = info["pid"] - applications.append({ - "info": info, - "windows": pid_to_window_ids.get(app_pid, []) - }) + applications.append({"info": info, "windows": pid_to_window_ids.get(app_pid, [])}) menubar_items = get_menubar_items() dock_items = get_dock_items() return { "applications": applications, "windows": windows, "menubar_items": menubar_items, - "dock_items": dock_items + "dock_items": dock_items, } def get_application_windows(self, pid: int): """Get all windows for a specific application. - + Args: pid: Process ID of the application - + Returns: List of accessibility window elements or empty list if none found """ @@ -753,7 +769,7 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler): def get_all_windows(self): """Get all visible windows in the system. - + Returns: List of window dictionaries with app information and window details """ @@ -791,7 +807,7 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler): def get_running_apps(self): """Get all currently running applications. - + Returns: List of NSRunningApplication objects """ @@ -803,11 +819,11 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler): def get_ax_attribute(self, element, attribute): """Get an accessibility attribute from an element. - + Args: element: The accessibility element attribute: The attribute name to retrieve - + Returns: The attribute value or None if not found """ @@ -815,10 +831,10 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler): def serialize_node(self, element): """Create a serializable dictionary representation of an accessibility element. - + Args: element: The accessibility element to serialize - + Returns: Dictionary containing element properties like role, title, value, position, and size """ @@ -851,16 +867,13 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler): async def get_accessibility_tree(self) -> Dict[str, Any]: """Get the complete accessibility tree for the current desktop state. - + Returns: Dictionary containing success status and desktop state information - """ + """ try: desktop_state = self.get_desktop_state() - return { - "success": True, - **desktop_state - } + return {"success": True, **desktop_state} except Exception as e: return {"success": False, "error": str(e)} @@ -869,12 +882,12 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler): self, role: Optional[str] = None, title: Optional[str] = None, value: Optional[str] = None ) -> Dict[str, Any]: """Find an accessibility element matching the specified criteria. - + Args: role: The accessibility role to match (optional) title: The title to match (optional) value: The value to match (optional) - + Returns: Dictionary containing success status and the found element or error message """ @@ -883,10 +896,10 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler): def match_element(element): """Check if an element matches the search criteria. - + Args: element: The accessibility element to check - + Returns: True if element matches all specified criteria, False otherwise """ @@ -900,10 +913,10 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler): def search_tree(element): """Recursively search the accessibility tree for matching elements. - + Args: element: The accessibility element to search from - + Returns: Serialized element dictionary if match found, None otherwise """ @@ -924,58 +937,71 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler): except Exception as e: return {"success": False, "error": str(e)} + class MacOSAutomationHandler(BaseAutomationHandler): """Handler for macOS automation including mouse, keyboard, and screen operations.""" - + # Mouse Actions mouse = MouseController() keyboard = KeyboardController() - - async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]: + + async def mouse_down( + self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left" + ) -> Dict[str, Any]: """Press and hold a mouse button at the specified coordinates. - + Args: x: X coordinate (optional, uses current position if None) y: Y coordinate (optional, uses current position if None) button: Mouse button to press ("left", "right", or "middle") - + Returns: Dictionary containing success status and error message if failed """ try: if x is not None and y is not None: self.mouse.position = (x, y) - self.mouse.press(Button.left if button == "left" else Button.right if button == "right" else Button.middle) + self.mouse.press( + Button.left + if button == "left" + else Button.right if button == "right" else Button.middle + ) return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]: + async def mouse_up( + self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left" + ) -> Dict[str, Any]: """Release a mouse button at the specified coordinates. - + Args: x: X coordinate (optional, uses current position if None) y: Y coordinate (optional, uses current position if None) button: Mouse button to release ("left", "right", or "middle") - + Returns: Dictionary containing success status and error message if failed """ try: if x is not None and y is not None: self.mouse.position = (x, y) - self.mouse.release(Button.left if button == "left" else Button.right if button == "right" else Button.middle) + self.mouse.release( + Button.left + if button == "left" + else Button.right if button == "right" else Button.middle + ) return {"success": True} except Exception as e: return {"success": False, "error": str(e)} async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: """Perform a left mouse click at the specified coordinates. - + Args: x: X coordinate (optional, uses current position if None) y: Y coordinate (optional, uses current position if None) - + Returns: Dictionary containing success status and error message if failed """ @@ -989,11 +1015,11 @@ class MacOSAutomationHandler(BaseAutomationHandler): async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: """Perform a right mouse click at the specified coordinates. - + Args: x: X coordinate (optional, uses current position if None) y: Y coordinate (optional, uses current position if None) - + Returns: Dictionary containing success status and error message if failed """ @@ -1009,11 +1035,11 @@ class MacOSAutomationHandler(BaseAutomationHandler): self, x: Optional[int] = None, y: Optional[int] = None ) -> Dict[str, Any]: """Perform a double left mouse click at the specified coordinates. - + Args: x: X coordinate (optional, uses current position if None) y: Y coordinate (optional, uses current position if None) - + Returns: Dictionary containing success status and error message if failed """ @@ -1027,11 +1053,11 @@ class MacOSAutomationHandler(BaseAutomationHandler): async def move_cursor(self, x: int, y: int) -> Dict[str, Any]: """Move the mouse cursor to the specified coordinates. - + Args: x: Target X coordinate y: Target Y coordinate - + Returns: Dictionary containing success status and error message if failed """ @@ -1045,18 +1071,22 @@ class MacOSAutomationHandler(BaseAutomationHandler): self, x: int, y: int, button: str = "left", duration: float = 0.5 ) -> Dict[str, Any]: """Drag from current position to target coordinates. - + Args: x: Target X coordinate y: Target Y coordinate button: Mouse button to use for dragging ("left", "right", or "middle") duration: Duration of the drag operation in seconds - + Returns: Dictionary containing success status and error message if failed """ try: - btn = Button.left if button == "left" else Button.right if button == "right" else Button.middle + btn = ( + Button.left + if button == "left" + else Button.right if button == "right" else Button.middle + ) # Press self.mouse.press(btn) # Move with sleep to simulate drag duration @@ -1082,19 +1112,23 @@ class MacOSAutomationHandler(BaseAutomationHandler): self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5 ) -> Dict[str, Any]: """Drag the mouse along a specified path of coordinates. - + Args: path: List of (x, y) coordinate tuples defining the drag path button: Mouse button to use for dragging ("left", "right", or "middle") duration: Total duration of the drag operation in seconds - + Returns: Dictionary containing success status and error message if failed """ try: if not path or len(path) < 2: return {"success": False, "error": "Path must contain at least 2 points"} - btn = Button.left if button == "left" else Button.right if button == "right" else Button.middle + btn = ( + Button.left + if button == "left" + else Button.right if button == "right" else Button.middle + ) # Move to the first point self.mouse.position = path[0] self.mouse.press(btn) @@ -1114,10 +1148,10 @@ class MacOSAutomationHandler(BaseAutomationHandler): # Keyboard Actions async def key_down(self, key: str) -> Dict[str, Any]: """Press and hold a keyboard key. - + Args: key: Key name to press (using pyautogui key names) - + Returns: Dictionary containing success status and error message if failed """ @@ -1127,13 +1161,13 @@ class MacOSAutomationHandler(BaseAutomationHandler): return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - + async def key_up(self, key: str) -> Dict[str, Any]: """Release a keyboard key. - + Args: key: Key name to release (using pyautogui key names) - + Returns: Dictionary containing success status and error message if failed """ @@ -1143,13 +1177,13 @@ class MacOSAutomationHandler(BaseAutomationHandler): return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - + async def type_text(self, text: str) -> Dict[str, Any]: """Type text using the keyboard with Unicode support. - + Args: text: Text string to type - + Returns: Dictionary containing success status and error message if failed """ @@ -1162,10 +1196,10 @@ class MacOSAutomationHandler(BaseAutomationHandler): async def press_key(self, key: str) -> Dict[str, Any]: """Press and release a keyboard key. - + Args: key: Key name to press (using pyautogui key names) - + Returns: Dictionary containing success status and error message if failed """ @@ -1178,10 +1212,10 @@ class MacOSAutomationHandler(BaseAutomationHandler): async def hotkey(self, keys: List[str]) -> Dict[str, Any]: """Press a combination of keys simultaneously. - + Args: keys: List of key names to press together (using pyautogui key names) - + Returns: Dictionary containing success status and error message if failed """ @@ -1195,11 +1229,11 @@ class MacOSAutomationHandler(BaseAutomationHandler): # Scrolling Actions async def scroll(self, x: int, y: int) -> Dict[str, Any]: """Scroll the mouse wheel in the specified direction. - + Args: x: Horizontal scroll amount y: Vertical scroll amount (positive for up, negative for down) - + Returns: Dictionary containing success status and error message if failed """ @@ -1208,13 +1242,13 @@ class MacOSAutomationHandler(BaseAutomationHandler): return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - + async def scroll_down(self, clicks: int = 1) -> Dict[str, Any]: """Scroll down by the specified number of clicks. - + Args: clicks: Number of scroll clicks to perform - + Returns: Dictionary containing success status and error message if failed """ @@ -1226,10 +1260,10 @@ class MacOSAutomationHandler(BaseAutomationHandler): async def scroll_up(self, clicks: int = 1) -> Dict[str, Any]: """Scroll up by the specified number of clicks. - + Args: clicks: Number of scroll clicks to perform - + Returns: Dictionary containing success status and error message if failed """ @@ -1242,7 +1276,7 @@ class MacOSAutomationHandler(BaseAutomationHandler): # Screen Actions async def screenshot(self) -> Dict[str, Any]: """Capture a screenshot of the current screen. - + Returns: Dictionary containing success status and base64-encoded image data or error message """ @@ -1263,7 +1297,7 @@ class MacOSAutomationHandler(BaseAutomationHandler): async def get_screen_size(self) -> Dict[str, Any]: """Get the dimensions of the current screen. - + Returns: Dictionary containing success status and screen size or error message """ @@ -1275,7 +1309,7 @@ class MacOSAutomationHandler(BaseAutomationHandler): async def get_cursor_position(self) -> Dict[str, Any]: """Get the current position of the mouse cursor. - + Returns: Dictionary containing success status and cursor position or error message """ @@ -1288,7 +1322,7 @@ class MacOSAutomationHandler(BaseAutomationHandler): # Clipboard Actions async def copy_to_clipboard(self) -> Dict[str, Any]: """Get the current content of the system clipboard. - + Returns: Dictionary containing success status and clipboard content or error message """ @@ -1302,10 +1336,10 @@ class MacOSAutomationHandler(BaseAutomationHandler): async def set_clipboard(self, text: str) -> Dict[str, Any]: """Set the content of the system clipboard. - + Args: text: Text to copy to the clipboard - + Returns: Dictionary containing success status and error message if failed """ @@ -1319,28 +1353,26 @@ class MacOSAutomationHandler(BaseAutomationHandler): async def run_command(self, command: str) -> Dict[str, Any]: """Run a shell command and return its output. - + Args: command: Shell command to execute - + Returns: Dictionary containing success status, stdout, stderr, and return code """ try: # Create subprocess process = await asyncio.create_subprocess_shell( - command, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE + command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) # Wait for the subprocess to finish stdout, stderr = await process.communicate() # Return decoded output return { - "success": True, - "stdout": stdout.decode() if stdout else "", + "success": True, + "stdout": stdout.decode() if stdout else "", "stderr": stderr.decode() if stderr else "", - "return_code": process.returncode + "return_code": process.returncode, } except Exception as e: return {"success": False, "error": str(e)} diff --git a/libs/python/computer-server/computer_server/handlers/windows.py b/libs/python/computer-server/computer_server/handlers/windows.py index 216a9f8b..a01456d4 100644 --- a/libs/python/computer-server/computer_server/handlers/windows.py +++ b/libs/python/computer-server/computer_server/handlers/windows.py @@ -4,15 +4,17 @@ Windows implementation of automation and accessibility handlers. This implementation uses pyautogui for GUI automation and Windows-specific APIs for accessibility and system operations. """ -from typing import Dict, Any, List, Tuple, Optional -import logging -import subprocess + import asyncio import base64 +import logging import os +import subprocess from io import BytesIO -from pynput.mouse import Controller as MouseController +from typing import Any, Dict, List, Optional, Tuple + from pynput.keyboard import Controller as KeyboardController +from pynput.mouse import Controller as MouseController # Configure logger logger = logging.getLogger(__name__) @@ -20,6 +22,7 @@ logger = logging.getLogger(__name__) # Try to import pyautogui try: import pyautogui + pyautogui.FAILSAFE = False logger.info("pyautogui successfully imported, GUI automation available") except Exception as e: @@ -28,58 +31,62 @@ except Exception as e: # Try to import Windows-specific modules try: - import win32gui - import win32con import win32api + import win32con + import win32gui + logger.info("Windows API modules successfully imported") WINDOWS_API_AVAILABLE = True except Exception as e: - logger.error(f"Windows API modules import failed: {str(e)}. Some Windows-specific features will be unavailable.") + logger.error( + f"Windows API modules import failed: {str(e)}. Some Windows-specific features will be unavailable." + ) WINDOWS_API_AVAILABLE = False from .base import BaseAccessibilityHandler, BaseAutomationHandler + class WindowsAccessibilityHandler(BaseAccessibilityHandler): """Windows implementation of accessibility handler.""" - + async def get_accessibility_tree(self) -> Dict[str, Any]: """Get the accessibility tree of the current window. - + Returns: Dict[str, Any]: A dictionary containing the success status and either the accessibility tree or an error message. - Structure: {"success": bool, "tree": dict} or + Structure: {"success": bool, "tree": dict} or {"success": bool, "error": str} """ if not WINDOWS_API_AVAILABLE: return {"success": False, "error": "Windows API not available"} - + try: # Get the foreground window hwnd = win32gui.GetForegroundWindow() if not hwnd: return {"success": False, "error": "No foreground window found"} - + # Get window information window_text = win32gui.GetWindowText(hwnd) rect = win32gui.GetWindowRect(hwnd) - + tree = { "role": "Window", "title": window_text, "position": {"x": rect[0], "y": rect[1]}, "size": {"width": rect[2] - rect[0], "height": rect[3] - rect[1]}, - "children": [] + "children": [], } - + # Enumerate child windows def enum_child_proc(hwnd_child, children_list): """Callback function to enumerate child windows and collect their information. - + Args: hwnd_child: Handle to the child window being enumerated. children_list: List to append child window information to. - + Returns: bool: True to continue enumeration, False to stop. """ @@ -87,46 +94,49 @@ class WindowsAccessibilityHandler(BaseAccessibilityHandler): child_text = win32gui.GetWindowText(hwnd_child) child_rect = win32gui.GetWindowRect(hwnd_child) child_class = win32gui.GetClassName(hwnd_child) - + child_info = { "role": child_class, "title": child_text, "position": {"x": child_rect[0], "y": child_rect[1]}, - "size": {"width": child_rect[2] - child_rect[0], "height": child_rect[3] - child_rect[1]}, - "children": [] + "size": { + "width": child_rect[2] - child_rect[0], + "height": child_rect[3] - child_rect[1], + }, + "children": [], } children_list.append(child_info) except Exception as e: logger.debug(f"Error getting child window info: {e}") return True - + win32gui.EnumChildWindows(hwnd, enum_child_proc, tree["children"]) - + return {"success": True, "tree": tree} - + except Exception as e: logger.error(f"Error getting accessibility tree: {e}") return {"success": False, "error": str(e)} - - async def find_element(self, role: Optional[str] = None, - title: Optional[str] = None, - value: Optional[str] = None) -> Dict[str, Any]: + + async def find_element( + self, role: Optional[str] = None, title: Optional[str] = None, value: Optional[str] = None + ) -> Dict[str, Any]: """Find an element in the accessibility tree by criteria. - + Args: role (Optional[str]): The role or class name of the element to find. title (Optional[str]): The title or text of the element to find. value (Optional[str]): The value of the element (not used in Windows implementation). - + Returns: Dict[str, Any]: A dictionary containing the success status and either the found element or an error message. - Structure: {"success": bool, "element": dict} or + Structure: {"success": bool, "element": dict} or {"success": bool, "error": str} """ if not WINDOWS_API_AVAILABLE: return {"success": False, "error": "Windows API not available"} - + try: # Find window by title if specified if title: @@ -139,10 +149,10 @@ class WindowsAccessibilityHandler(BaseAccessibilityHandler): "role": "Window", "title": title, "position": {"x": rect[0], "y": rect[1]}, - "size": {"width": rect[2] - rect[0], "height": rect[3] - rect[1]} - } + "size": {"width": rect[2] - rect[0], "height": rect[3] - rect[1]}, + }, } - + # Find window by class name if role is specified if role: hwnd = win32gui.FindWindow(role, None) @@ -155,36 +165,40 @@ class WindowsAccessibilityHandler(BaseAccessibilityHandler): "role": role, "title": window_text, "position": {"x": rect[0], "y": rect[1]}, - "size": {"width": rect[2] - rect[0], "height": rect[3] - rect[1]} - } + "size": {"width": rect[2] - rect[0], "height": rect[3] - rect[1]}, + }, } - + return {"success": False, "error": "Element not found"} - + except Exception as e: logger.error(f"Error finding element: {e}") return {"success": False, "error": str(e)} + class WindowsAutomationHandler(BaseAutomationHandler): """Windows implementation of automation handler using pyautogui and Windows APIs.""" - + mouse = MouseController() + keyboard = KeyboardController() # Mouse Actions - async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]: + async def mouse_down( + self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left" + ) -> Dict[str, Any]: """Press and hold a mouse button at the specified coordinates. - + Args: x (Optional[int]): The x-coordinate to move to before pressing. If None, uses current position. y (Optional[int]): The y-coordinate to move to before pressing. If None, uses current position. button (str): The mouse button to press ("left", "right", or "middle"). - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: if x is not None and y is not None: pyautogui.moveTo(x, y) @@ -192,21 +206,23 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - - async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]: + + async def mouse_up( + self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left" + ) -> Dict[str, Any]: """Release a mouse button at the specified coordinates. - + Args: x (Optional[int]): The x-coordinate to move to before releasing. If None, uses current position. y (Optional[int]): The y-coordinate to move to before releasing. If None, uses current position. button (str): The mouse button to release ("left", "right", or "middle"). - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: if x is not None and y is not None: pyautogui.moveTo(x, y) @@ -214,20 +230,20 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - + async def move_cursor(self, x: int, y: int) -> Dict[str, Any]: """Move the mouse cursor to the specified coordinates. - + Args: x (int): The x-coordinate to move to. y (int): The y-coordinate to move to. - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: pyautogui.moveTo(x, y) return {"success": True} @@ -236,17 +252,17 @@ class WindowsAutomationHandler(BaseAutomationHandler): async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: """Perform a left mouse click at the specified coordinates. - + Args: x (Optional[int]): The x-coordinate to click at. If None, clicks at current position. y (Optional[int]): The y-coordinate to click at. If None, clicks at current position. - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: if x is not None and y is not None: pyautogui.moveTo(x, y) @@ -257,17 +273,17 @@ class WindowsAutomationHandler(BaseAutomationHandler): async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: """Perform a right mouse click at the specified coordinates. - + Args: x (Optional[int]): The x-coordinate to click at. If None, clicks at current position. y (Optional[int]): The y-coordinate to click at. If None, clicks at current position. - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: if x is not None and y is not None: pyautogui.moveTo(x, y) @@ -276,19 +292,21 @@ class WindowsAutomationHandler(BaseAutomationHandler): except Exception as e: return {"success": False, "error": str(e)} - async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: + async def double_click( + self, x: Optional[int] = None, y: Optional[int] = None + ) -> Dict[str, Any]: """Perform a double left mouse click at the specified coordinates. - + Args: x (Optional[int]): The x-coordinate to double-click at. If None, clicks at current position. y (Optional[int]): The y-coordinate to double-click at. If None, clicks at current position. - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: if x is not None and y is not None: pyautogui.moveTo(x, y) @@ -297,52 +315,56 @@ class WindowsAutomationHandler(BaseAutomationHandler): except Exception as e: return {"success": False, "error": str(e)} - async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5) -> Dict[str, Any]: + async def drag_to( + self, x: int, y: int, button: str = "left", duration: float = 0.5 + ) -> Dict[str, Any]: """Drag from the current position to the specified coordinates. - + Args: x (int): The x-coordinate to drag to. y (int): The y-coordinate to drag to. button (str): The mouse button to use for dragging ("left", "right", or "middle"). duration (float): The time in seconds to take for the drag operation. - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: pyautogui.dragTo(x, y, duration=duration, button=button) return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> Dict[str, Any]: + async def drag( + self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5 + ) -> Dict[str, Any]: """Drag the mouse through a series of coordinates. - + Args: path (List[Tuple[int, int]]): A list of (x, y) coordinate tuples to drag through. button (str): The mouse button to use for dragging ("left", "right", or "middle"). duration (float): The total time in seconds for the entire drag operation. - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: if not path: return {"success": False, "error": "Path is empty"} - + # Move to first position pyautogui.moveTo(*path[0]) - + # Drag through all positions for x, y in path[1:]: - pyautogui.dragTo(x, y, duration=duration/len(path), button=button) - + pyautogui.dragTo(x, y, duration=duration / len(path), button=button) + return {"success": True} except Exception as e: return {"success": False, "error": str(e)} @@ -350,70 +372,68 @@ class WindowsAutomationHandler(BaseAutomationHandler): # Keyboard Actions async def key_down(self, key: str) -> Dict[str, Any]: """Press and hold a keyboard key. - + Args: key (str): The key to press down (e.g., 'ctrl', 'shift', 'a'). - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: pyautogui.keyDown(key) return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - + async def key_up(self, key: str) -> Dict[str, Any]: """Release a keyboard key. - + Args: key (str): The key to release (e.g., 'ctrl', 'shift', 'a'). - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: pyautogui.keyUp(key) return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - + async def type_text(self, text: str) -> Dict[str, Any]: """Type the specified text. - + Args: text (str): The text to type. - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ - if not pyautogui: - return {"success": False, "error": "pyautogui not available"} - try: - pyautogui.write(text) + # use pynput for Unicode support + self.keyboard.type(text) return {"success": True} except Exception as e: return {"success": False, "error": str(e)} async def press_key(self, key: str) -> Dict[str, Any]: """Press and release a keyboard key. - + Args: key (str): The key to press (e.g., 'enter', 'space', 'tab'). - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: pyautogui.press(key) return {"success": True} @@ -422,16 +442,16 @@ class WindowsAutomationHandler(BaseAutomationHandler): async def hotkey(self, keys: List[str]) -> Dict[str, Any]: """Press a combination of keys simultaneously. - + Args: keys (List[str]): The keys to press together (e.g., ['ctrl', 'c'], ['alt', 'tab']). - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: pyautogui.hotkey(*keys) return {"success": True} @@ -441,35 +461,35 @@ class WindowsAutomationHandler(BaseAutomationHandler): # Scrolling Actions async def scroll(self, x: int, y: int) -> Dict[str, Any]: """Scroll vertically at the current cursor position. - + Args: x (int): Horizontal scroll amount (not used in pyautogui implementation). y (int): Vertical scroll amount. Positive values scroll up, negative values scroll down. - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: self.mouse.scroll(x, y) return {"success": True} except Exception as e: return {"success": False, "error": str(e)} - + async def scroll_down(self, clicks: int = 1) -> Dict[str, Any]: """Scroll down by the specified number of clicks. - + Args: clicks (int): The number of scroll clicks to perform downward. - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: pyautogui.scroll(-clicks) return {"success": True} @@ -478,16 +498,16 @@ class WindowsAutomationHandler(BaseAutomationHandler): async def scroll_up(self, clicks: int = 1) -> Dict[str, Any]: """Scroll up by the specified number of clicks. - + Args: clicks (int): The number of scroll clicks to perform upward. - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: pyautogui.scroll(clicks) return {"success": True} @@ -497,22 +517,23 @@ class WindowsAutomationHandler(BaseAutomationHandler): # Screen Actions async def screenshot(self) -> Dict[str, Any]: """Capture a screenshot of the entire screen. - + Returns: Dict[str, Any]: A dictionary containing the success status and either base64-encoded image data or an error message. - Structure: {"success": bool, "image_data": str} or + Structure: {"success": bool, "image_data": str} or {"success": bool, "error": str} """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} - + try: from PIL import Image + screenshot = pyautogui.screenshot() if not isinstance(screenshot, Image.Image): return {"success": False, "error": "Failed to capture screenshot"} - + buffered = BytesIO() screenshot.save(buffered, format="PNG", optimize=True) buffered.seek(0) @@ -523,11 +544,11 @@ class WindowsAutomationHandler(BaseAutomationHandler): async def get_screen_size(self) -> Dict[str, Any]: """Get the size of the screen in pixels. - + Returns: Dict[str, Any]: A dictionary containing the success status and either screen size information or an error message. - Structure: {"success": bool, "size": {"width": int, "height": int}} or + Structure: {"success": bool, "size": {"width": int, "height": int}} or {"success": bool, "error": str} """ try: @@ -546,11 +567,11 @@ class WindowsAutomationHandler(BaseAutomationHandler): async def get_cursor_position(self) -> Dict[str, Any]: """Get the current position of the mouse cursor. - + Returns: Dict[str, Any]: A dictionary containing the success status and either cursor position or an error message. - Structure: {"success": bool, "position": {"x": int, "y": int}} or + Structure: {"success": bool, "position": {"x": int, "y": int}} or {"success": bool, "error": str} """ try: @@ -569,15 +590,16 @@ class WindowsAutomationHandler(BaseAutomationHandler): # Clipboard Actions async def copy_to_clipboard(self) -> Dict[str, Any]: """Get the current content of the clipboard. - + Returns: Dict[str, Any]: A dictionary containing the success status and either clipboard content or an error message. - Structure: {"success": bool, "content": str} or + Structure: {"success": bool, "content": str} or {"success": bool, "error": str} """ try: import pyperclip + content = pyperclip.paste() return {"success": True, "content": content} except Exception as e: @@ -585,15 +607,16 @@ class WindowsAutomationHandler(BaseAutomationHandler): async def set_clipboard(self, text: str) -> Dict[str, Any]: """Set the clipboard content to the specified text. - + Args: text (str): The text to copy to the clipboard. - + Returns: Dict[str, Any]: A dictionary with success status and optional error message. """ try: import pyperclip + pyperclip.copy(text) return {"success": True} except Exception as e: @@ -602,31 +625,29 @@ class WindowsAutomationHandler(BaseAutomationHandler): # Command Execution async def run_command(self, command: str) -> Dict[str, Any]: """Execute a shell command asynchronously. - + Args: command (str): The shell command to execute. - + Returns: Dict[str, Any]: A dictionary containing the success status and either command output or an error message. - Structure: {"success": bool, "stdout": str, "stderr": str, "return_code": int} or + Structure: {"success": bool, "stdout": str, "stderr": str, "return_code": int} or {"success": bool, "error": str} """ try: # Create subprocess process = await asyncio.create_subprocess_shell( - command, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE + command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) # Wait for the subprocess to finish stdout, stderr = await process.communicate() # Return decoded output return { - "success": True, - "stdout": stdout.decode() if stdout else "", - "stderr": stderr.decode() if stderr else "", - "return_code": process.returncode + "success": True, + "stdout": stdout.decode() if stdout else "", + "stderr": stderr.decode() if stderr else "", + "return_code": process.returncode, } except Exception as e: return {"success": False, "error": str(e)} diff --git a/libs/python/computer-server/computer_server/main.py b/libs/python/computer-server/computer_server/main.py index 71ceafe0..9c0c052b 100644 --- a/libs/python/computer-server/computer_server/main.py +++ b/libs/python/computer-server/computer_server/main.py @@ -1,27 +1,37 @@ -from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, HTTPException, Header -from fastapi.responses import StreamingResponse, JSONResponse -from typing import List, Dict, Any, Optional, Union, Literal, cast -import uvicorn -import logging import asyncio -import json -import traceback -import inspect -from contextlib import redirect_stdout, redirect_stderr -from io import StringIO -from .handlers.factory import HandlerFactory -import os -import aiohttp import hashlib -import time +import inspect +import json +import logging +import os import platform +import time +import traceback +from contextlib import redirect_stderr, redirect_stdout +from io import StringIO +from typing import Any, Dict, List, Literal, Optional, Union, cast + +import aiohttp +import uvicorn +from fastapi import ( + FastAPI, + Header, + HTTPException, + Request, + WebSocket, + WebSocketDisconnect, +) from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse, StreamingResponse + +from .handlers.factory import HandlerFactory # Authentication session TTL (in seconds). Override via env var CUA_AUTH_TTL_SECONDS. Default: 60s AUTH_SESSION_TTL_SECONDS: int = int(os.environ.get("CUA_AUTH_TTL_SECONDS", "60")) try: from agent import ComputerAgent + HAS_AGENT = True except ImportError: HAS_AGENT = False @@ -54,16 +64,20 @@ app.add_middleware( protocol_version = 1 try: from importlib.metadata import version + package_version = version("cua-computer-server") except Exception: # Fallback for cases where package is not installed or importlib.metadata is not available try: import pkg_resources + package_version = pkg_resources.get_distribution("cua-computer-server").version except Exception: package_version = "unknown" -accessibility_handler, automation_handler, diorama_handler, file_handler = HandlerFactory.create_handlers() +accessibility_handler, automation_handler, diorama_handler, file_handler = ( + HandlerFactory.create_handlers() +) handlers = { "version": lambda: {"protocol": protocol_version, "package": package_version}, # App-Use commands @@ -118,87 +132,91 @@ class AuthenticationManager: def __init__(self): self.sessions: Dict[str, Dict[str, Any]] = {} self.container_name = os.environ.get("CONTAINER_NAME") - + def _hash_credentials(self, container_name: str, api_key: str) -> str: """Create a hash of container name and API key for session identification""" combined = f"{container_name}:{api_key}" return hashlib.sha256(combined.encode()).hexdigest() - + def _is_session_valid(self, session_data: Dict[str, Any]) -> bool: """Check if a session is still valid based on expiration time""" - if not session_data.get('valid', False): + if not session_data.get("valid", False): return False - - expires_at = session_data.get('expires_at', 0) + + expires_at = session_data.get("expires_at", 0) return time.time() < expires_at - + async def auth(self, container_name: str, api_key: str) -> bool: """Authenticate container name and API key, using cached sessions when possible""" # If no CONTAINER_NAME is set, always allow access (local development) if not self.container_name: - logger.info("No CONTAINER_NAME set in environment. Allowing access (local development mode)") + logger.info( + "No CONTAINER_NAME set in environment. Allowing access (local development mode)" + ) return True - + # Layer 1: VM Identity Verification if container_name != self.container_name: - logger.warning(f"VM name mismatch. Expected: {self.container_name}, Got: {container_name}") + logger.warning( + f"VM name mismatch. Expected: {self.container_name}, Got: {container_name}" + ) return False - + # Create hash for session lookup session_hash = self._hash_credentials(container_name, api_key) - + # Check if we have a valid cached session if session_hash in self.sessions: session_data = self.sessions[session_hash] if self._is_session_valid(session_data): logger.info(f"Using cached authentication for container: {container_name}") - return session_data['valid'] + return session_data["valid"] else: # Remove expired session del self.sessions[session_hash] - + # No valid cached session, authenticate with API logger.info(f"Authenticating with TryCUA API for container: {container_name}") - + try: async with aiohttp.ClientSession() as session: - headers = { - "Authorization": f"Bearer {api_key}" - } - + headers = {"Authorization": f"Bearer {api_key}"} + async with session.get( - f"https://www.trycua.com/api/vm/auth?container_name={container_name}", + f"https://www.cua.ai/api/vm/auth?container_name={container_name}", headers=headers, ) as resp: is_valid = resp.status == 200 and bool((await resp.text()).strip()) - + # Cache the result with configurable expiration self.sessions[session_hash] = { - 'valid': is_valid, - 'expires_at': time.time() + AUTH_SESSION_TTL_SECONDS + "valid": is_valid, + "expires_at": time.time() + AUTH_SESSION_TTL_SECONDS, } - + if is_valid: logger.info(f"Authentication successful for container: {container_name}") else: - logger.warning(f"Authentication failed for container: {container_name}. Status: {resp.status}") - + logger.warning( + f"Authentication failed for container: {container_name}. Status: {resp.status}" + ) + return is_valid - + except aiohttp.ClientError as e: logger.error(f"Failed to validate API key with TryCUA API: {str(e)}") # Cache failed result to avoid repeated requests self.sessions[session_hash] = { - 'valid': False, - 'expires_at': time.time() + AUTH_SESSION_TTL_SECONDS + "valid": False, + "expires_at": time.time() + AUTH_SESSION_TTL_SECONDS, } return False except Exception as e: logger.error(f"Unexpected error during authentication: {str(e)}") # Cache failed result to avoid repeated requests self.sessions[session_hash] = { - 'valid': False, - 'expires_at': time.time() + AUTH_SESSION_TTL_SECONDS + "valid": False, + "expires_at": time.time() + AUTH_SESSION_TTL_SECONDS, } return False @@ -218,6 +236,7 @@ class ConnectionManager: manager = ConnectionManager() auth_manager = AuthenticationManager() + @app.get("/status") async def status(): sys = platform.system().lower() @@ -234,80 +253,67 @@ async def status(): features.append("agent") return {"status": "ok", "os_type": os_type, "features": features} + @app.websocket("/ws", name="websocket_endpoint") async def websocket_endpoint(websocket: WebSocket): global handlers # WebSocket message size is configured at the app or endpoint level, not on the instance await manager.connect(websocket) - + # Check if CONTAINER_NAME is set (indicating cloud provider) server_container_name = os.environ.get("CONTAINER_NAME") - + # If cloud provider, perform authentication handshake if server_container_name: try: - logger.info(f"Cloud provider detected. CONTAINER_NAME: {server_container_name}. Waiting for authentication...") - + logger.info( + f"Cloud provider detected. CONTAINER_NAME: {server_container_name}. Waiting for authentication..." + ) + # Wait for authentication message auth_data = await websocket.receive_json() - + # Validate auth message format if auth_data.get("command") != "authenticate": - await websocket.send_json({ - "success": False, - "error": "First message must be authentication" - }) + await websocket.send_json( + {"success": False, "error": "First message must be authentication"} + ) await websocket.close() manager.disconnect(websocket) return - + # Extract credentials client_api_key = auth_data.get("params", {}).get("api_key") client_container_name = auth_data.get("params", {}).get("container_name") - + # Validate credentials using AuthenticationManager if not client_api_key: - await websocket.send_json({ - "success": False, - "error": "API key required" - }) + await websocket.send_json({"success": False, "error": "API key required"}) await websocket.close() manager.disconnect(websocket) return - + if not client_container_name: - await websocket.send_json({ - "success": False, - "error": "Container name required" - }) + await websocket.send_json({"success": False, "error": "Container name required"}) await websocket.close() manager.disconnect(websocket) return - + # Use AuthenticationManager for validation is_authenticated = await auth_manager.auth(client_container_name, client_api_key) if not is_authenticated: - await websocket.send_json({ - "success": False, - "error": "Authentication failed" - }) + await websocket.send_json({"success": False, "error": "Authentication failed"}) await websocket.close() manager.disconnect(websocket) return - + logger.info(f"Authentication successful for VM: {client_container_name}") - await websocket.send_json({ - "success": True, - "message": "Authentication successful" - }) - + await websocket.send_json({"success": True, "message": "Authentication successful"}) + except Exception as e: logger.error(f"Error during authentication handshake: {str(e)}") - await websocket.send_json({ - "success": False, - "error": "Authentication failed" - }) + await websocket.send_json({"success": False, "error": "Authentication failed"}) await websocket.close() manager.disconnect(websocket) return @@ -330,7 +336,7 @@ async def websocket_endpoint(websocket: WebSocket): handler_func = handlers[command] sig = inspect.signature(handler_func) filtered_params = {k: v for k, v in params.items() if k in sig.parameters} - + # Handle both sync and async functions if asyncio.iscoroutinefunction(handler_func): result = await handler_func(**filtered_params) @@ -367,20 +373,21 @@ async def websocket_endpoint(websocket: WebSocket): pass manager.disconnect(websocket) + @app.post("/cmd") async def cmd_endpoint( request: Request, container_name: Optional[str] = Header(None, alias="X-Container-Name"), - api_key: Optional[str] = Header(None, alias="X-API-Key") + api_key: Optional[str] = Header(None, alias="X-API-Key"), ): """ Backup endpoint for when WebSocket connections fail. Accepts commands via HTTP POST with streaming response. - + Headers: - X-Container-Name: Container name for cloud authentication - X-API-Key: API key for cloud authentication - + Body: { "command": "command_name", @@ -388,7 +395,7 @@ async def cmd_endpoint( } """ global handlers - + # Parse request body try: body = await request.json() @@ -396,32 +403,34 @@ async def cmd_endpoint( params = body.get("params", {}) except Exception as e: raise HTTPException(status_code=400, detail=f"Invalid JSON body: {str(e)}") - + if not command: raise HTTPException(status_code=400, detail="Command is required") - + # Check if CONTAINER_NAME is set (indicating cloud provider) server_container_name = os.environ.get("CONTAINER_NAME") - + # If cloud provider, perform authentication if server_container_name: - logger.info(f"Cloud provider detected. CONTAINER_NAME: {server_container_name}. Performing authentication...") - + logger.info( + f"Cloud provider detected. CONTAINER_NAME: {server_container_name}. Performing authentication..." + ) + # Validate required headers if not container_name: raise HTTPException(status_code=401, detail="Container name required") - + if not api_key: raise HTTPException(status_code=401, detail="API key required") - + # Validate with AuthenticationManager is_authenticated = await auth_manager.auth(container_name, api_key) if not is_authenticated: raise HTTPException(status_code=401, detail="Authentication failed") - + if command not in handlers: raise HTTPException(status_code=400, detail=f"Unknown command: {command}") - + async def generate_response(): """Generate streaming response for the command execution""" try: @@ -429,35 +438,36 @@ async def cmd_endpoint( handler_func = handlers[command] sig = inspect.signature(handler_func) filtered_params = {k: v for k, v in params.items() if k in sig.parameters} - + # Handle both sync and async functions if asyncio.iscoroutinefunction(handler_func): result = await handler_func(**filtered_params) else: # Run sync functions in thread pool to avoid blocking event loop result = await asyncio.to_thread(handler_func, **filtered_params) - + # Stream the successful result response_data = {"success": True, **result} yield f"data: {json.dumps(response_data)}\n\n" - + except Exception as cmd_error: logger.error(f"Error executing command {command}: {str(cmd_error)}") logger.error(traceback.format_exc()) - + # Stream the error result error_data = {"success": False, "error": str(cmd_error)} yield f"data: {json.dumps(error_data)}\n\n" - + return StreamingResponse( generate_response(), media_type="text/plain", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", - } + }, ) + @app.post("/responses") async def agent_response_endpoint( request: Request, @@ -480,11 +490,17 @@ async def agent_response_endpoint( """ if not HAS_AGENT: raise HTTPException(status_code=501, detail="ComputerAgent not available") - + # Authenticate via AuthenticationManager if running in cloud (CONTAINER_NAME set) container_name = os.environ.get("CONTAINER_NAME") if container_name: - is_public = os.environ.get("CUA_ENABLE_PUBLIC_PROXY", "").lower().strip() in ["1", "true", "yes", "y", "on"] + is_public = os.environ.get("CUA_ENABLE_PUBLIC_PROXY", "").lower().strip() in [ + "1", + "true", + "yes", + "y", + "on", + ] if not is_public: if not api_key: raise HTTPException(status_code=401, detail="Missing AGENT PROXY auth headers") @@ -511,10 +527,12 @@ async def agent_response_endpoint( def __init__(self, overrides: Dict[str, str]): self.overrides = overrides self._original: Dict[str, Optional[str]] = {} + def __enter__(self): for k, v in (self.overrides or {}).items(): self._original[k] = os.environ.get(k) os.environ[k] = str(v) + def __exit__(self, exc_type, exc, tb): for k, old in self._original.items(): if old is None: @@ -598,9 +616,9 @@ async def agent_response_endpoint( start = path[0] await self._auto.mouse_down(start["x"], start["y"]) for pt in path[1:]: - await self._auto.move_cursor(pt["x"], pt["y"]) + await self._auto.move_cursor(pt["x"], pt["y"]) end = path[-1] - await self._auto.mouse_up(end["x"], end["y"]) + await self._auto.mouse_up(end["x"], end["y"]) async def get_current_url(self) -> str: # Not available in this server context @@ -667,7 +685,11 @@ async def agent_response_endpoint( async for result in agent.run(messages): total_output += result["output"] # Try to collect usage if present - if isinstance(result, dict) and "usage" in result and isinstance(result["usage"], dict): + if ( + isinstance(result, dict) + and "usage" in result + and isinstance(result["usage"], dict) + ): # Merge usage counters for k, v in result["usage"].items(): if isinstance(v, (int, float)): @@ -686,14 +708,14 @@ async def agent_response_endpoint( logger.error(f"Error running agent: {str(e)}") logger.error(traceback.format_exc()) error = str(e) - + # Build response payload payload = { "model": model, "error": error, "output": total_output, "usage": total_usage, - "status": "completed" if not error else "failed" + "status": "completed" if not error else "failed", } # CORS: allow any origin diff --git a/libs/python/computer-server/computer_server/server.py b/libs/python/computer-server/computer_server/server.py index aed874d4..a0e719ed 100644 --- a/libs/python/computer-server/computer_server/server.py +++ b/libs/python/computer-server/computer_server/server.py @@ -5,8 +5,9 @@ Provides a clean API for starting and stopping the server. import asyncio import logging -import uvicorn from typing import Optional + +import uvicorn from fastapi import FastAPI from .main import app as fastapi_app @@ -32,8 +33,14 @@ class Server: await server.stop() # Stop the server """ - def __init__(self, host: str = "0.0.0.0", port: int = 8000, log_level: str = "info", - ssl_keyfile: Optional[str] = None, ssl_certfile: Optional[str] = None): + def __init__( + self, + host: str = "0.0.0.0", + port: int = 8000, + log_level: str = "info", + ssl_keyfile: Optional[str] = None, + ssl_certfile: Optional[str] = None, + ): """ Initialize the server. @@ -58,12 +65,12 @@ class Server: Start the server synchronously. This will block until the server is stopped. """ uvicorn.run( - self.app, - host=self.host, - port=self.port, + self.app, + host=self.host, + port=self.port, log_level=self.log_level, ssl_keyfile=self.ssl_keyfile, - ssl_certfile=self.ssl_certfile + ssl_certfile=self.ssl_certfile, ) async def start_async(self) -> None: @@ -72,12 +79,12 @@ class Server: will run in the background. """ server_config = uvicorn.Config( - self.app, - host=self.host, - port=self.port, + self.app, + host=self.host, + port=self.port, log_level=self.log_level, ssl_keyfile=self.ssl_keyfile, - ssl_certfile=self.ssl_certfile + ssl_certfile=self.ssl_certfile, ) self._should_exit.clear() diff --git a/libs/python/computer-server/computer_server/watchdog.py b/libs/python/computer-server/computer_server/watchdog.py index 392d9bc0..7c9ca83f 100644 --- a/libs/python/computer-server/computer_server/watchdog.py +++ b/libs/python/computer-server/computer_server/watchdog.py @@ -12,9 +12,10 @@ import platform import subprocess import sys import time -import websockets from typing import Optional +import websockets + logger = logging.getLogger(__name__) @@ -45,62 +46,62 @@ class Watchdog: """Watchdog class to monitor server health via WebSocket connection. Unix/Linux only - provides restart capabilities. """ - + def __init__(self, cli_args: Optional[dict] = None, ping_interval: int = 30): """ Initialize the watchdog. - + Args: cli_args: Dictionary of CLI arguments to replicate when restarting ping_interval: Interval between ping checks in seconds """ # Check if running on Unix/Linux - if platform.system() not in ['Linux', 'Darwin']: + if platform.system() not in ["Linux", "Darwin"]: raise RuntimeError("Watchdog is only supported on Unix/Linux systems") - + # Store CLI arguments for restart self.cli_args = cli_args or {} - self.host = self.cli_args.get('host', 'localhost') - self.port = self.cli_args.get('port', 8000) + self.host = self.cli_args.get("host", "localhost") + self.port = self.cli_args.get("port", 8000) self.ping_interval = ping_interval self.container_name = os.environ.get("CONTAINER_NAME") self.running = False self.restart_enabled = True - + @property def ws_uri(self) -> str: """Get the WebSocket URI using the current IP address. - + Returns: WebSocket URI for the Computer API Server """ - ip_address = "localhost" if not self.container_name else f"{self.container_name}.containers.cloud.trycua.com" + ip_address = ( + "localhost" + if not self.container_name + else f"{self.container_name}.containers.cloud.trycua.com" + ) protocol = "wss" if self.container_name else "ws" port = "8443" if self.container_name else "8000" return f"{protocol}://{ip_address}:{port}/ws" - + async def ping(self) -> bool: """ Test connection to the WebSocket endpoint. - + Returns: True if connection successful, False otherwise """ try: # Create a simple ping message - ping_message = { - "command": "get_screen_size", - "params": {} - } - + ping_message = {"command": "get_screen_size", "params": {}} + # Try to connect to the WebSocket async with websockets.connect( - self.ws_uri, - max_size=1024 * 1024 * 10 # 10MB limit to match server + self.ws_uri, max_size=1024 * 1024 * 10 # 10MB limit to match server ) as websocket: # Send ping message await websocket.send(json.dumps(ping_message)) - + # Wait for any response or just close try: response = await asyncio.wait_for(websocket.recv(), timeout=5) @@ -111,30 +112,27 @@ class Watchdog: except Exception as e: logger.warning(f"Ping failed: {e}") return False - + def kill_processes_on_port(self, port: int) -> bool: """ Kill any processes using the specified port. - + Args: port: Port number to check and kill processes on - + Returns: True if processes were killed or none found, False on error """ try: # Find processes using the port result = subprocess.run( - ["lsof", "-ti", f":{port}"], - capture_output=True, - text=True, - timeout=10 + ["lsof", "-ti", f":{port}"], capture_output=True, text=True, timeout=10 ) - + if result.returncode == 0 and result.stdout.strip(): - pids = result.stdout.strip().split('\n') + pids = result.stdout.strip().split("\n") logger.info(f"Found {len(pids)} processes using port {port}: {pids}") - + # Kill each process for pid in pids: if pid.strip(): @@ -145,42 +143,42 @@ class Watchdog: logger.warning(f"Timeout killing process {pid}") except Exception as e: logger.warning(f"Error killing process {pid}: {e}") - + return True else: logger.debug(f"No processes found using port {port}") return True - + except subprocess.TimeoutExpired: logger.error(f"Timeout finding processes on port {port}") return False except Exception as e: logger.error(f"Error finding processes on port {port}: {e}") return False - + def restart_server(self) -> bool: """ Attempt to restart the server by killing existing processes and starting new one. - + Returns: True if restart was attempted, False on error """ if not self.restart_enabled: logger.info("Server restart is disabled") return False - + try: logger.info("Attempting to restart server...") - + # Kill processes on the port port_to_kill = 8443 if self.container_name else self.port if not self.kill_processes_on_port(port_to_kill): logger.error("Failed to kill processes on port, restart aborted") return False - + # Wait a moment for processes to die time.sleep(2) - + # Try to restart the server # In container mode, we can't easily restart, so just log if self.container_name: @@ -190,50 +188,50 @@ class Watchdog: else: # For local mode, try to restart the CLI logger.info("Attempting to restart local server...") - + # Get the current Python executable and script python_exe = sys.executable - + # Try to find the CLI module try: # Build command with all original CLI arguments cmd = [python_exe, "-m", "computer_server.cli"] - + # Add all CLI arguments except watchdog-related ones for key, value in self.cli_args.items(): - if key in ['watchdog', 'watchdog_interval', 'no_restart']: + if key in ["watchdog", "watchdog_interval", "no_restart"]: continue # Skip watchdog args to avoid recursive watchdog - + # Convert underscores to hyphens for CLI args arg_name = f"--{key.replace('_', '-')}" - + if isinstance(value, bool): if value: # Only add flag if True cmd.append(arg_name) else: cmd.extend([arg_name, str(value)]) - + logger.info(f"Starting server with command: {' '.join(cmd)}") - + # Start process in background subprocess.Popen( cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, - start_new_session=True + start_new_session=True, ) - + logger.info("Server restart initiated") return True - + except Exception as e: logger.error(f"Failed to restart server: {e}") return False - + except Exception as e: logger.error(f"Error during server restart: {e}") return False - + async def start_monitoring(self) -> None: """Start the watchdog monitoring loop.""" self.running = True @@ -241,14 +239,14 @@ class Watchdog: logger.info(f"Ping interval: {self.ping_interval} seconds") if self.container_name: logger.info(f"Container mode detected: {self.container_name}") - + consecutive_failures = 0 max_failures = 3 - + while self.running: try: success = await self.ping() - + if success: if consecutive_failures > 0: logger.info("Server connection restored") @@ -257,15 +255,17 @@ class Watchdog: else: consecutive_failures += 1 logger.warning(f"Ping failed ({consecutive_failures}/{max_failures})") - + if consecutive_failures >= max_failures: - logger.error(f"Server appears to be down after {max_failures} consecutive failures") - + logger.error( + f"Server appears to be down after {max_failures} consecutive failures" + ) + # Attempt to restart the server if self.restart_enabled: logger.info("Attempting automatic server restart...") restart_success = self.restart_server() - + if restart_success: logger.info("Server restart initiated, waiting before next ping...") # Wait longer after restart attempt @@ -275,17 +275,17 @@ class Watchdog: logger.error("Server restart failed") else: logger.warning("Automatic restart is disabled") - + # Wait for next ping interval await asyncio.sleep(self.ping_interval) - + except asyncio.CancelledError: logger.info("Watchdog monitoring cancelled") break except Exception as e: logger.error(f"Unexpected error in watchdog loop: {e}") await asyncio.sleep(self.ping_interval) - + def stop_monitoring(self) -> None: """Stop the watchdog monitoring.""" self.running = False @@ -295,13 +295,13 @@ class Watchdog: async def run_watchdog(cli_args: Optional[dict] = None, ping_interval: int = 30) -> None: """ Run the watchdog monitoring. - + Args: cli_args: Dictionary of CLI arguments to replicate when restarting ping_interval: Interval between ping checks in seconds """ watchdog = Watchdog(cli_args=cli_args, ping_interval=ping_interval) - + try: await watchdog.start_monitoring() except KeyboardInterrupt: @@ -313,21 +313,18 @@ async def run_watchdog(cli_args: Optional[dict] = None, ping_interval: int = 30) if __name__ == "__main__": # For testing the watchdog standalone import argparse - + parser = argparse.ArgumentParser(description="Run Computer API server watchdog") parser.add_argument("--host", default="localhost", help="Server host to monitor") parser.add_argument("--port", type=int, default=8000, help="Server port to monitor") parser.add_argument("--ping-interval", type=int, default=30, help="Ping interval in seconds") - + args = parser.parse_args() - + logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) - - cli_args = { - 'host': args.host, - 'port': args.port - } + + cli_args = {"host": args.host, "port": args.port} asyncio.run(run_watchdog(cli_args, args.ping_interval)) diff --git a/libs/python/computer-server/pyproject.toml b/libs/python/computer-server/pyproject.toml index 6e9e7240..a8ecfb23 100644 --- a/libs/python/computer-server/pyproject.toml +++ b/libs/python/computer-server/pyproject.toml @@ -4,14 +4,15 @@ build-backend = "pdm.backend" [project] name = "cua-computer-server" -version = "0.1.0" +version = "0.1.27" + description = "Server component for the Computer-Use Interface (CUI) framework powering Cua" authors = [ { name = "TryCua", email = "gh@trycua.com" } ] readme = "README.md" license = { text = "MIT" } -requires-python = ">=3.9" +requires-python = ">=3.12" dependencies = [ "fastapi>=0.111.0", "uvicorn[standard]>=0.27.0", @@ -21,7 +22,14 @@ dependencies = [ "pillow>=10.2.0", "aiohttp>=3.9.1", "pyperclip>=1.9.0", - "websockets>=12.0" + "websockets>=12.0", + # OS-specific runtime deps + "pyobjc-framework-Cocoa>=10.1; sys_platform == 'darwin'", + "pyobjc-framework-Quartz>=10.1; sys_platform == 'darwin'", + "pyobjc-framework-ApplicationServices>=10.1; sys_platform == 'darwin'", + "python-xlib>=0.33; sys_platform == 'linux'", + "pywin32>=310; sys_platform == 'win32'", + "pip-system-certs; sys_platform == 'win32'", ] [project.optional-dependencies] @@ -66,23 +74,4 @@ dev = [ ] [tool.pdm.scripts] -api = "python -m computer_server" - -[tool.ruff] -line-length = 100 -target-version = "py310" -select = ["E", "F", "B", "I"] -fix = true - -[tool.ruff.format] -docstring-code-format = true - -[tool.mypy] -strict = true -python_version = "3.10" -ignore_missing_imports = true -disallow_untyped_defs = true -check_untyped_defs = true -warn_return_any = true -show_error_codes = true -warn_unused_ignores = false +api = "python -m computer_server" \ No newline at end of file diff --git a/libs/python/computer-server/run_server.py b/libs/python/computer-server/run_server.py index 1818caa1..a7c0aecd 100755 --- a/libs/python/computer-server/run_server.py +++ b/libs/python/computer-server/run_server.py @@ -10,6 +10,7 @@ Usage: """ import sys + from computer_server.cli import main if __name__ == "__main__": diff --git a/libs/python/computer-server/test_connection.py b/libs/python/computer-server/test_connection.py index 8b9f3222..a864ba32 100755 --- a/libs/python/computer-server/test_connection.py +++ b/libs/python/computer-server/test_connection.py @@ -6,18 +6,22 @@ This script tests both WebSocket (/ws) and REST (/cmd) connections to the Comput and keeps it alive, allowing you to verify the server is running correctly. """ +import argparse import asyncio import json -import websockets -import argparse -import sys -import aiohttp import os +import sys +import aiohttp import dotenv +import websockets + dotenv.load_dotenv() -async def test_websocket_connection(host="localhost", port=8000, keep_alive=False, container_name=None, api_key=None): + +async def test_websocket_connection( + host="localhost", port=8000, keep_alive=False, container_name=None, api_key=None +): """Test WebSocket connection to the Computer Server.""" if container_name: # Container mode: use WSS with container domain and port 8443 @@ -37,19 +41,16 @@ async def test_websocket_connection(host="localhost", port=8000, keep_alive=Fals if not api_key: print("Error: API key required for container connections") return False - + print("Sending authentication...") auth_message = { "command": "authenticate", - "params": { - "api_key": api_key, - "container_name": container_name - } + "params": {"api_key": api_key, "container_name": container_name}, } await websocket.send(json.dumps(auth_message)) auth_response = await websocket.recv() print(f"Authentication response: {auth_response}") - + # Check if authentication was successful auth_data = json.loads(auth_response) if not auth_data.get("success", False): @@ -90,7 +91,9 @@ async def test_websocket_connection(host="localhost", port=8000, keep_alive=Fals return True -async def test_rest_connection(host="localhost", port=8000, keep_alive=False, container_name=None, api_key=None): +async def test_rest_connection( + host="localhost", port=8000, keep_alive=False, container_name=None, api_key=None +): """Test REST connection to the Computer Server.""" if container_name: # Container mode: use HTTPS with container domain and port 8443 @@ -113,13 +116,11 @@ async def test_rest_connection(host="localhost", port=8000, keep_alive=False, co return False headers["X-Container-Name"] = container_name headers["X-API-Key"] = api_key - print(f"Using container authentication headers") + print("Using container authentication headers") # Test screenshot endpoint async with session.post( - f"{base_url}/cmd", - json={"command": "screenshot", "params": {}}, - headers=headers + f"{base_url}/cmd", json={"command": "screenshot", "params": {}}, headers=headers ) as response: if response.status == 200: text = await response.text() @@ -133,7 +134,7 @@ async def test_rest_connection(host="localhost", port=8000, keep_alive=False, co async with session.post( f"{base_url}/cmd", json={"command": "get_screen_size", "params": {}}, - headers=headers + headers=headers, ) as response: if response.status == 200: text = await response.text() @@ -151,7 +152,7 @@ async def test_rest_connection(host="localhost", port=8000, keep_alive=False, co async with session.post( f"{base_url}/cmd", json={"command": "get_cursor_position", "params": {}}, - headers=headers + headers=headers, ) as response: if response.status == 200: text = await response.text() @@ -171,7 +172,9 @@ async def test_rest_connection(host="localhost", port=8000, keep_alive=False, co return True -async def test_connection(host="localhost", port=8000, keep_alive=False, container_name=None, use_rest=False, api_key=None): +async def test_connection( + host="localhost", port=8000, keep_alive=False, container_name=None, use_rest=False, api_key=None +): """Test connection to the Computer Server using WebSocket or REST.""" if use_rest: return await test_rest_connection(host, port, keep_alive, container_name, api_key) @@ -183,40 +186,50 @@ def parse_args(): parser = argparse.ArgumentParser(description="Test connection to Computer Server") parser.add_argument("--host", default="localhost", help="Host address (default: localhost)") parser.add_argument("-p", "--port", type=int, default=8000, help="Port number (default: 8000)") - parser.add_argument("-c", "--container-name", help="Container name for cloud connection (uses WSS/HTTPS and port 8443)") - parser.add_argument("--api-key", help="API key for container authentication (can also use CUA_API_KEY env var)") + parser.add_argument( + "-c", + "--container-name", + help="Container name for cloud connection (uses WSS/HTTPS and port 8443)", + ) + parser.add_argument( + "--api-key", help="API key for container authentication (can also use CUA_API_KEY env var)" + ) parser.add_argument("--keep-alive", action="store_true", help="Keep connection alive") - parser.add_argument("--rest", action="store_true", help="Use REST endpoint (/cmd) instead of WebSocket (/ws)") + parser.add_argument( + "--rest", action="store_true", help="Use REST endpoint (/cmd) instead of WebSocket (/ws)" + ) return parser.parse_args() async def main(): args = parse_args() - + # Convert hyphenated argument to underscore for function parameter - container_name = getattr(args, 'container_name', None) - + container_name = getattr(args, "container_name", None) + # Get API key from argument or environment variable - api_key = getattr(args, 'api_key', None) or os.environ.get('CUA_API_KEY') - + api_key = getattr(args, "api_key", None) or os.environ.get("CUA_API_KEY") + # Check if container name is provided but API key is missing if container_name and not api_key: print("Warning: Container name provided but no API key found.") print("Please provide --api-key argument or set CUA_API_KEY environment variable.") return 1 - + print(f"Testing {'REST' if args.rest else 'WebSocket'} connection...") if container_name: print(f"Container: {container_name}") - print(f"API Key: {'***' + api_key[-4:] if api_key and len(api_key) > 4 else 'Not provided'}") - + print( + f"API Key: {'***' + api_key[-4:] if api_key and len(api_key) > 4 else 'Not provided'}" + ) + success = await test_connection( - host=args.host, - port=args.port, + host=args.host, + port=args.port, keep_alive=args.keep_alive, container_name=container_name, use_rest=args.rest, - api_key=api_key + api_key=api_key, ) return 0 if success else 1 diff --git a/libs/python/computer/.bumpversion.cfg b/libs/python/computer/.bumpversion.cfg new file mode 100644 index 00000000..1cbb791b --- /dev/null +++ b/libs/python/computer/.bumpversion.cfg @@ -0,0 +1,10 @@ +[bumpversion] +current_version = 0.4.7 +commit = True +tag = True +tag_name = computer-v{new_version} +message = Bump cua-computer to v{new_version} + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" diff --git a/libs/python/computer/README.md b/libs/python/computer/README.md index 5d7c3c9b..c19ca6c9 100644 --- a/libs/python/computer/README.md +++ b/libs/python/computer/README.md @@ -8,10 +8,11 @@ - [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) - [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) - [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) - [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/) +[![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) +[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) +[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +[![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/) + @@ -29,11 +30,11 @@ from computer import Computer computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4") try: await computer.run() - + screenshot = await computer.interface.screenshot() with open("screenshot.png", "wb") as f: f.write(screenshot) - + await computer.interface.move_cursor(100, 100) await computer.interface.left_click() await computer.interface.right_click(300, 300) diff --git a/libs/python/computer/computer/computer.py b/libs/python/computer/computer/computer.py index 441e3ec3..7f5f9dfb 100644 --- a/libs/python/computer/computer/computer.py +++ b/libs/python/computer/computer/computer.py @@ -1,19 +1,22 @@ -from typing import Optional, List, Literal, Dict, Any, Union, TYPE_CHECKING, cast import asyncio -from .models import Computer as ComputerConfig, Display -from .interface.factory import InterfaceFactory -import time -from PIL import Image import io -import re -from .logger import Logger, LogLevel import json import logging -from core.telemetry import is_telemetry_enabled, record_event import os -from . import helpers - import platform +import re +import time +import traceback +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union, cast + +from core.telemetry import is_telemetry_enabled, record_event +from PIL import Image + +from . import helpers +from .interface.factory import InterfaceFactory +from .logger import Logger, LogLevel +from .models import Computer as ComputerConfig +from .models import Display SYSTEM_INFO = { "os": platform.system().lower(), @@ -27,6 +30,7 @@ from .providers.factory import VMProviderFactory OSType = Literal["macos", "linux", "windows"] + class Computer: """Computer is the main class for interacting with the computer.""" @@ -40,8 +44,11 @@ class Computer: Returns: DioramaComputer: A proxy object with the Diorama interface, but using diorama_cmds. """ - assert "app-use" in self.experiments, "App Usage is an experimental feature. Enable it by passing experiments=['app-use'] to Computer()" + assert ( + "app-use" in self.experiments + ), "App Usage is an experimental feature. Enable it by passing experiments=['app-use'] to Computer()" from .diorama_computer import DioramaComputer + return DioramaComputer(self, apps) def __init__( @@ -63,7 +70,7 @@ class Computer: storage: Optional[str] = None, ephemeral: bool = False, api_key: Optional[str] = None, - experiments: Optional[List[str]] = None + experiments: Optional[List[str]] = None, ): """Initialize a new Computer instance. @@ -111,32 +118,36 @@ class Computer: self.os_type = os_type self.provider_type = provider_type self.ephemeral = ephemeral - + self.api_key = api_key self.experiments = experiments or [] - + if "app-use" in self.experiments: assert self.os_type == "macos", "App use experiment is only supported on macOS" # The default is currently to use non-ephemeral storage if storage and ephemeral and storage != "ephemeral": raise ValueError("Storage path and ephemeral flag cannot be used together") - + # Windows Sandbox always uses ephemeral storage if self.provider_type == VMProviderType.WINSANDBOX: if not ephemeral and storage != None and storage != "ephemeral": - self.logger.warning("Windows Sandbox storage is always ephemeral. Setting ephemeral=True.") + self.logger.warning( + "Windows Sandbox storage is always ephemeral. Setting ephemeral=True." + ) self.ephemeral = True self.storage = "ephemeral" else: self.storage = "ephemeral" if ephemeral else storage - + # For Lumier provider, store the first shared directory path to use # for VM file sharing self.shared_path = None if shared_directories and len(shared_directories) > 0: self.shared_path = shared_directories[0] - self.logger.info(f"Using first shared directory for VM file sharing: {self.shared_path}") + self.logger.info( + f"Using first shared directory for VM file sharing: {self.shared_path}" + ) # Store telemetry preference self._telemetry_enabled = telemetry_enabled @@ -154,8 +165,8 @@ class Computer: self.interface_logger = Logger("computer.interface", verbosity) if not use_host_computer_server: - if ":" not in image or len(image.split(":")) != 2: - raise ValueError("Image must be in the format :") + if ":" not in image: + image = f"{image}:latest" if not name: # Normalize the name to be used for the VM @@ -263,8 +274,14 @@ class Computer: self.logger.info(f"Starting VM: {self.image}") if not self._provider_context: try: - provider_type_name = self.provider_type.name if isinstance(self.provider_type, VMProviderType) else self.provider_type - self.logger.verbose(f"Initializing {provider_type_name} provider context...") + provider_type_name = ( + self.provider_type.name + if isinstance(self.provider_type, VMProviderType) + else self.provider_type + ) + self.logger.verbose( + f"Initializing {provider_type_name} provider context..." + ) # Explicitly set provider parameters storage = "ephemeral" if self.ephemeral else self.storage @@ -281,9 +298,13 @@ class Computer: if self.provider_type == VMProviderType.LUMIER: self.logger.info(f"Using VM image for Lumier provider: {image}") if shared_path: - self.logger.info(f"Using shared path for Lumier provider: {shared_path}") + self.logger.info( + f"Using shared path for Lumier provider: {shared_path}" + ) if noVNC_port: - self.logger.info(f"Using noVNC port for Lumier provider: {noVNC_port}") + self.logger.info( + f"Using noVNC port for Lumier provider: {noVNC_port}" + ) self.config.vm_provider = VMProviderFactory.create_provider( self.provider_type, port=port, @@ -339,11 +360,17 @@ class Computer: except ImportError as ie: self.logger.error(f"Failed to import provider dependencies: {ie}") if str(ie).find("lume") >= 0 and str(ie).find("lumier") < 0: - self.logger.error("Please install with: pip install cua-computer[lume]") + self.logger.error( + "Please install with: pip install cua-computer[lume]" + ) elif str(ie).find("lumier") >= 0 or str(ie).find("docker") >= 0: - self.logger.error("Please install with: pip install cua-computer[lumier] and make sure Docker is installed") + self.logger.error( + "Please install with: pip install cua-computer[lumier] and make sure Docker is installed" + ) elif str(ie).find("cloud") >= 0: - self.logger.error("Please install with: pip install cua-computer[cloud]") + self.logger.error( + "Please install with: pip install cua-computer[cloud]" + ) raise except Exception as e: self.logger.error(f"Failed to initialize provider context: {e}") @@ -354,16 +381,14 @@ class Computer: try: if self.config.vm_provider is None: raise RuntimeError(f"VM provider not initialized for {self.config.name}") - + vm = await self.config.vm_provider.get_vm(self.config.name) self.logger.verbose(f"Found existing VM: {self.config.name}") is_running = vm.get("status") == "running" except Exception as e: self.logger.error(f"VM not found: {self.config.name}") self.logger.error(f"Error: {e}") - raise RuntimeError( - f"VM {self.config.name} could not be found or created." - ) + raise RuntimeError(f"VM {self.config.name} could not be found or created.") # Start the VM if it's not running if not is_running: @@ -376,13 +401,10 @@ class Computer: path = os.path.abspath(os.path.expanduser(path)) if os.path.exists(path): # Add path in format expected by Lume API - shared_dirs.append({ - "hostPath": path, - "readOnly": False - }) + shared_dirs.append({"hostPath": path, "readOnly": False}) else: self.logger.warning(f"Shared directory does not exist: {path}") - + # Prepare run options to pass to the provider run_opts = {} @@ -392,11 +414,11 @@ class Computer: "width": self.config.display.width, "height": self.config.display.height, } - + # Check if scale_factor exists before adding it if hasattr(self.config.display, "scale_factor"): display_info["scale_factor"] = self.config.display.scale_factor - + run_opts["display"] = display_info # Add shared directories if available @@ -406,21 +428,23 @@ class Computer: # Run the VM with the provider try: if self.config.vm_provider is None: - raise RuntimeError(f"VM provider not initialized for {self.config.name}") - + raise RuntimeError( + f"VM provider not initialized for {self.config.name}" + ) + # Use the complete run_opts we prepared earlier # Handle ephemeral storage for run_vm method too storage_param = "ephemeral" if self.ephemeral else self.storage - + # Log the image being used self.logger.info(f"Running VM using image: {self.image}") - + # Call provider.run_vm with explicit image parameter response = await self.config.vm_provider.run_vm( image=self.image, name=self.config.name, run_opts=run_opts, - storage=storage_param + storage=storage_param, ) self.logger.info(f"VM run response: {response if response else 'None'}") except Exception as run_error: @@ -432,14 +456,16 @@ class Computer: try: if self.provider_type == VMProviderType.LUMIER: max_retries = 60 # Increased for Lumier VM startup which takes longer - retry_delay = 3 # 3 seconds between retries for Lumier + retry_delay = 3 # 3 seconds between retries for Lumier else: max_retries = 30 # Default for other providers - retry_delay = 2 # 2 seconds between retries - - self.logger.info(f"Waiting up to {max_retries * retry_delay} seconds for VM to be ready...") + retry_delay = 2 # 2 seconds between retries + + self.logger.info( + f"Waiting up to {max_retries * retry_delay} seconds for VM to be ready..." + ) ip = await self.get_ip(max_retries=max_retries, retry_delay=retry_delay) - + # If we get here, we have a valid IP self.logger.info(f"VM is ready with IP: {ip}") ip_address = ip @@ -451,13 +477,16 @@ class Computer: raise RuntimeError(f"VM failed to become ready: {wait_error}") except Exception as e: self.logger.error(f"Failed to initialize computer: {e}") + self.logger.error(traceback.format_exc()) raise RuntimeError(f"Failed to initialize computer: {e}") try: # Verify we have a valid IP before initializing the interface if not ip_address or ip_address == "unknown" or ip_address == "0.0.0.0": - raise RuntimeError(f"Cannot initialize interface - invalid IP address: {ip_address}") - + raise RuntimeError( + f"Cannot initialize interface - invalid IP address: {ip_address}" + ) + # Initialize the interface using the factory with the specified OS self.logger.info(f"Initializing interface for {self.os_type} at {ip_address}") from .interface.base import BaseComputerInterface @@ -467,18 +496,17 @@ class Computer: self._interface = cast( BaseComputerInterface, InterfaceFactory.create_interface_for_os( - os=self.os_type, + os=self.os_type, ip_address=ip_address, api_key=self.api_key, - vm_name=self.config.name + vm_name=self.config.name, ), ) else: self._interface = cast( BaseComputerInterface, InterfaceFactory.create_interface_for_os( - os=self.os_type, - ip_address=ip_address + os=self.os_type, ip_address=ip_address ), ) @@ -508,10 +536,10 @@ class Computer: # Set the initialization flag and clear the initializing flag self._initialized = True - + # Set this instance as the default computer for remote decorators helpers.set_default_computer(self) - + self.logger.info("Computer successfully initialized") except Exception as e: raise @@ -520,7 +548,7 @@ class Computer: duration_ms = (time.time() - start_time) * 1000 self.logger.debug(f"Computer initialization took {duration_ms:.2f}ms") return - + async def disconnect(self) -> None: """Disconnect from the computer's WebSocket interface.""" if self._interface: @@ -534,13 +562,17 @@ class Computer: self.logger.info("Stopping Computer...") # In VM mode, first explicitly stop the VM, then exit the provider context - if not self.use_host_computer_server and self._provider_context and self.config.vm_provider is not None: + if ( + not self.use_host_computer_server + and self._provider_context + and self.config.vm_provider is not None + ): try: self.logger.info(f"Stopping VM {self.config.name}...") await self.config.vm_provider.stop_vm( - name=self.config.name, - storage=self.storage # Pass storage explicitly for clarity - ) + name=self.config.name, + storage=self.storage, # Pass storage explicitly for clarity + ) except Exception as e: self.logger.error(f"Error stopping VM: {e}") @@ -551,55 +583,156 @@ class Computer: await self.disconnect() self.logger.info("Computer stopped") except Exception as e: - self.logger.debug(f"Error during cleanup: {e}") # Log as debug since this might be expected + self.logger.debug( + f"Error during cleanup: {e}" + ) # Log as debug since this might be expected finally: # Log stop time for performance monitoring duration_ms = (time.time() - start_time) * 1000 self.logger.debug(f"Computer stop process took {duration_ms:.2f}ms") return + async def start(self) -> None: + """Start the computer.""" + await self.run() + + async def restart(self) -> None: + """Restart the computer. + + If using a VM provider that supports restart, this will issue a restart + without tearing down the provider context, then reconnect the interface. + Falls back to stop()+run() when a provider restart is not available. + """ + # Host computer server: just disconnect and run again + if self.use_host_computer_server: + try: + await self.disconnect() + finally: + await self.run() + return + + # If no VM provider context yet, fall back to full run + if not getattr(self, "_provider_context", None) or self.config.vm_provider is None: + self.logger.info("No provider context active; performing full restart via run()") + await self.run() + return + + # Gracefully close current interface connection if present + if self._interface: + try: + self._interface.close() + except Exception as e: + self.logger.debug(f"Error closing interface prior to restart: {e}") + + # Attempt provider-level restart if implemented + try: + storage_param = "ephemeral" if self.ephemeral else self.storage + if hasattr(self.config.vm_provider, "restart_vm"): + self.logger.info(f"Restarting VM {self.config.name} via provider...") + await self.config.vm_provider.restart_vm( + name=self.config.name, storage=storage_param + ) + else: + # Fallback: stop then start without leaving provider context + self.logger.info( + f"Provider has no restart_vm; performing stop+start for {self.config.name}..." + ) + await self.config.vm_provider.stop_vm(name=self.config.name, storage=storage_param) + await self.config.vm_provider.run_vm( + image=self.image, name=self.config.name, run_opts={}, storage=storage_param + ) + except Exception as e: + self.logger.error(f"Failed to restart VM via provider: {e}") + # As a last resort, do a full stop (with provider context exit) and run + try: + await self.stop() + finally: + await self.run() + return + + # Wait for VM to be ready and reconnect interface + try: + self.logger.info("Waiting for VM to be ready after restart...") + if self.provider_type == VMProviderType.LUMIER: + max_retries = 60 + retry_delay = 3 + else: + max_retries = 30 + retry_delay = 2 + ip_address = await self.get_ip(max_retries=max_retries, retry_delay=retry_delay) + + self.logger.info(f"Re-initializing interface for {self.os_type} at {ip_address}") + from .interface.base import BaseComputerInterface + + if self.provider_type == VMProviderType.CLOUD and self.api_key and self.config.name: + self._interface = cast( + BaseComputerInterface, + InterfaceFactory.create_interface_for_os( + os=self.os_type, + ip_address=ip_address, + api_key=self.api_key, + vm_name=self.config.name, + ), + ) + else: + self._interface = cast( + BaseComputerInterface, + InterfaceFactory.create_interface_for_os( + os=self.os_type, + ip_address=ip_address, + ), + ) + + self.logger.info("Connecting to WebSocket interface after restart...") + await self._interface.wait_for_ready(timeout=30) + self.logger.info("Computer reconnected and ready after restart") + except Exception as e: + self.logger.error(f"Failed to reconnect after restart: {e}") + # Try a full reset if reconnection failed + try: + await self.stop() + finally: + await self.run() + # @property async def get_ip(self, max_retries: int = 15, retry_delay: int = 3) -> str: """Get the IP address of the VM or localhost if using host computer server. - - This method delegates to the provider's get_ip method, which waits indefinitely + + This method delegates to the provider's get_ip method, which waits indefinitely until the VM has a valid IP address. - + Args: max_retries: Unused parameter, kept for backward compatibility retry_delay: Delay between retries in seconds (default: 2) - + Returns: IP address of the VM or localhost if using host computer server """ # For host computer server, always return localhost immediately if self.use_host_computer_server: return "127.0.0.1" - + # Get IP from the provider - each provider implements its own waiting logic if self.config.vm_provider is None: raise RuntimeError("VM provider is not initialized") - + # Log that we're waiting for the IP self.logger.info(f"Waiting for VM {self.config.name} to get an IP address...") - + # Call the provider's get_ip method which will wait indefinitely storage_param = "ephemeral" if self.ephemeral else self.storage - + # Log the image being used self.logger.info(f"Running VM using image: {self.image}") - + # Call provider.get_ip with explicit image parameter ip = await self.config.vm_provider.get_ip( - name=self.config.name, - storage=storage_param, - retry_delay=retry_delay + name=self.config.name, storage=storage_param, retry_delay=retry_delay ) - + # Log success self.logger.info(f"VM {self.config.name} has IP address: {ip}") return ip - async def wait_vm_ready(self) -> Optional[Dict[str, Any]]: """Wait for VM to be ready with an IP address. @@ -687,8 +820,8 @@ class Computer: if self.config.vm_provider is not None: vm = await self.config.vm_provider.get_vm(self.config.name) # VM data is returned as a dictionary from the Lumier provider - status = vm.get('status', 'unknown') if vm else "unknown" - ip = vm.get('ip_address') if vm else None + status = vm.get("status", "unknown") if vm else "unknown" + ip = vm.get("ip_address") if vm else None else: status = "unknown" ip = None @@ -705,16 +838,13 @@ class Computer: self.logger.info( f"Updating VM settings: CPU={cpu or self.config.cpu}, Memory={memory or self.config.memory}" ) - update_opts = { - "cpu": cpu or int(self.config.cpu), - "memory": memory or self.config.memory - } + update_opts = {"cpu": cpu or int(self.config.cpu), "memory": memory or self.config.memory} if self.config.vm_provider is not None: - await self.config.vm_provider.update_vm( - name=self.config.name, - update_opts=update_opts, - storage=self.storage # Pass storage explicitly for clarity - ) + await self.config.vm_provider.update_vm( + name=self.config.name, + update_opts=update_opts, + storage=self.storage, # Pass storage explicitly for clarity + ) else: raise RuntimeError("VM provider not initialized") @@ -781,65 +911,94 @@ class Computer: """ return await self.interface.to_screenshot_coordinates(x, y) - # Add virtual environment management functions to computer interface async def venv_install(self, venv_name: str, requirements: list[str]): """Install packages in a virtual environment. - + Args: venv_name: Name of the virtual environment requirements: List of package requirements to install - + Returns: Tuple of (stdout, stderr) from the installation command """ requirements = requirements or [] + # Windows vs POSIX handling + if self.os_type == "windows": + # Use %USERPROFILE% for home directory and cmd.exe semantics + venv_path = f"%USERPROFILE%\\.venvs\\{venv_name}" + ensure_dir_cmd = 'if not exist "%USERPROFILE%\\.venvs" mkdir "%USERPROFILE%\\.venvs"' + create_cmd = f'if not exist "{venv_path}" python -m venv "{venv_path}"' + requirements_str = " ".join(requirements) + # Activate via activate.bat and install + install_cmd = ( + f'call "{venv_path}\\Scripts\\activate.bat" && pip install {requirements_str}' + if requirements_str + else "echo No requirements to install" + ) + await self.interface.run_command(ensure_dir_cmd) + await self.interface.run_command(create_cmd) + return await self.interface.run_command(install_cmd) + else: + # POSIX (macOS/Linux) + venv_path = f"$HOME/.venvs/{venv_name}" + create_cmd = f'mkdir -p "$HOME/.venvs" && python3 -m venv "{venv_path}"' + # Check if venv exists, if not create it + check_cmd = f'test -d "{venv_path}" || ({create_cmd})' + _ = await self.interface.run_command(check_cmd) + # Install packages + requirements_str = " ".join(requirements) + install_cmd = ( + f'. "{venv_path}/bin/activate" && pip install {requirements_str}' + if requirements_str + else "echo No requirements to install" + ) + return await self.interface.run_command(install_cmd) - # Create virtual environment if it doesn't exist - venv_path = f"~/.venvs/{venv_name}" - create_cmd = f"mkdir -p ~/.venvs && python3 -m venv {venv_path}" - - # Check if venv exists, if not create it - check_cmd = f"test -d {venv_path} || ({create_cmd})" - _ = await self.interface.run_command(check_cmd) - - # Install packages - requirements_str = " ".join(requirements) - install_cmd = f". {venv_path}/bin/activate && pip install {requirements_str}" - return await self.interface.run_command(install_cmd) - async def venv_cmd(self, venv_name: str, command: str): """Execute a shell command in a virtual environment. - + Args: venv_name: Name of the virtual environment command: Shell command to execute in the virtual environment - + Returns: Tuple of (stdout, stderr) from the command execution """ - venv_path = f"~/.venvs/{venv_name}" - - # Check if virtual environment exists - check_cmd = f"test -d {venv_path}" - result = await self.interface.run_command(check_cmd) - - if result.stderr or "test:" in result.stdout: # venv doesn't exist - return "", f"Virtual environment '{venv_name}' does not exist. Create it first using venv_install." - - # Activate virtual environment and run command - full_command = f". {venv_path}/bin/activate && {command}" - return await self.interface.run_command(full_command) - + if self.os_type == "windows": + # Windows (cmd.exe) + venv_path = f"%USERPROFILE%\\.venvs\\{venv_name}" + # Check existence and signal if missing + check_cmd = f'if not exist "{venv_path}" (echo VENV_NOT_FOUND) else (echo VENV_FOUND)' + result = await self.interface.run_command(check_cmd) + if "VENV_NOT_FOUND" in getattr(result, "stdout", ""): + # Auto-create the venv with no requirements + await self.venv_install(venv_name, []) + # Activate and run the command + full_command = f'call "{venv_path}\\Scripts\\activate.bat" && {command}' + return await self.interface.run_command(full_command) + else: + # POSIX (macOS/Linux) + venv_path = f"$HOME/.venvs/{venv_name}" + # Check if virtual environment exists + check_cmd = f'test -d "{venv_path}"' + result = await self.interface.run_command(check_cmd) + if result.stderr or "test:" in result.stdout: # venv doesn't exist + # Auto-create the venv with no requirements + await self.venv_install(venv_name, []) + # Activate virtual environment and run command + full_command = f'. "{venv_path}/bin/activate" && {command}' + return await self.interface.run_command(full_command) + async def venv_exec(self, venv_name: str, python_func, *args, **kwargs): """Execute Python function in a virtual environment using source code extraction. - + Args: venv_name: Name of the virtual environment python_func: A callable function to execute *args: Positional arguments to pass to the function **kwargs: Keyword arguments to pass to the function - + Returns: The result of the function execution, or raises any exception that occurred """ @@ -847,29 +1006,29 @@ class Computer: import inspect import json import textwrap - + try: # Get function source code using inspect.getsource source = inspect.getsource(python_func) # Remove common leading whitespace (dedent) func_source = textwrap.dedent(source).strip() - + # Remove decorators while func_source.lstrip().startswith("@"): func_source = func_source.split("\n", 1)[1].strip() - + # Get function name for execution func_name = python_func.__name__ - + # Serialize args and kwargs as JSON (safer than dill for cross-version compatibility) args_json = json.dumps(args, default=str) kwargs_json = json.dumps(kwargs, default=str) - + except OSError as e: raise Exception(f"Cannot retrieve source code for function {python_func.__name__}: {e}") except Exception as e: raise Exception(f"Failed to reconstruct function source: {e}") - + # Create Python code that will define and execute the function python_code = f''' import json @@ -914,25 +1073,27 @@ output_json = json.dumps(output_payload, default=str) # Print the JSON output with markers print(f"<<>>{{output_json}}<<>>") ''' - + # Encode the Python code in base64 to avoid shell escaping issues - encoded_code = base64.b64encode(python_code.encode('utf-8')).decode('ascii') - + encoded_code = base64.b64encode(python_code.encode("utf-8")).decode("ascii") + # Execute the Python code in the virtual environment - python_command = f"python -c \"import base64; exec(base64.b64decode('{encoded_code}').decode('utf-8'))\"" + python_command = ( + f"python -c \"import base64; exec(base64.b64decode('{encoded_code}').decode('utf-8'))\"" + ) result = await self.venv_cmd(venv_name, python_command) - + # Parse the output to extract the payload start_marker = "<<>>" end_marker = "<<>>" # Print original stdout - print(result.stdout[:result.stdout.find(start_marker)]) - + print(result.stdout[: result.stdout.find(start_marker)]) + if start_marker in result.stdout and end_marker in result.stdout: start_idx = result.stdout.find(start_marker) + len(start_marker) end_idx = result.stdout.find(end_marker) - + if start_idx < end_idx: output_json = result.stdout[start_idx:end_idx] @@ -941,7 +1102,7 @@ print(f"<<>>{{output_json}}<<>>") output_payload = json.loads(output_json) except Exception as e: raise Exception(f"Failed to decode output payload: {e}") - + if output_payload["success"]: return output_payload["result"] else: @@ -953,4 +1114,6 @@ print(f"<<>>{{output_json}}<<>>") raise Exception("Invalid output format: markers found but no content between them") else: # Fallback: return stdout/stderr if no payload markers found - raise Exception(f"No output payload found. stdout: {result.stdout}, stderr: {result.stderr}") + raise Exception( + f"No output payload found. stdout: {result.stdout}, stderr: {result.stderr}" + ) diff --git a/libs/python/computer/computer/diorama_computer.py b/libs/python/computer/computer/diorama_computer.py index da67c72c..329d3009 100644 --- a/libs/python/computer/computer/diorama_computer.py +++ b/libs/python/computer/computer/diorama_computer.py @@ -1,14 +1,17 @@ import asyncio -from .interface.models import KeyType, Key + +from .interface.models import Key, KeyType + class DioramaComputer: """ A Computer-compatible proxy for Diorama that sends commands over the ComputerInterface. """ + def __init__(self, computer, apps): """ Initialize the DioramaComputer with a computer instance and list of apps. - + Args: computer: The computer instance to proxy commands through apps: List of applications available in the diorama environment @@ -21,7 +24,7 @@ class DioramaComputer: async def __aenter__(self): """ Async context manager entry point. - + Returns: self: The DioramaComputer instance """ @@ -31,7 +34,7 @@ class DioramaComputer: async def run(self): """ Initialize and run the DioramaComputer if not already initialized. - + Returns: self: The DioramaComputer instance """ @@ -39,14 +42,16 @@ class DioramaComputer: await self.__aenter__() return self + class DioramaComputerInterface: """ Diorama Interface proxy that sends diorama_cmds via the Computer's interface. """ + def __init__(self, computer, apps): """ Initialize the DioramaComputerInterface. - + Args: computer: The computer instance to send commands through apps: List of applications available in the diorama environment @@ -58,14 +63,14 @@ class DioramaComputerInterface: async def _send_cmd(self, action, arguments=None): """ Send a command to the diorama interface through the computer. - + Args: action (str): The action/command to execute arguments (dict, optional): Additional arguments for the command - + Returns: The result from the diorama command execution - + Raises: RuntimeError: If the computer interface is not initialized or command fails """ @@ -77,25 +82,30 @@ class DioramaComputerInterface: raise RuntimeError("Computer interface not initialized. Call run() first.") result = await iface.diorama_cmd(action, arguments) if not result.get("success"): - raise RuntimeError(f"Diorama command failed: {result.get('error')}\n{result.get('trace')}") + raise RuntimeError( + f"Diorama command failed: {result.get('error')}\n{result.get('trace')}" + ) return result.get("result") async def screenshot(self, as_bytes=True): """ Take a screenshot of the diorama scene. - + Args: as_bytes (bool): If True, return image as bytes; if False, return PIL Image object - + Returns: bytes or PIL.Image: Screenshot data in the requested format """ - from PIL import Image import base64 + + from PIL import Image + result = await self._send_cmd("screenshot") # assume result is a b64 string of an image img_bytes = base64.b64decode(result) import io + img = Image.open(io.BytesIO(img_bytes)) self._scene_size = img.size return img_bytes if as_bytes else img @@ -103,7 +113,7 @@ class DioramaComputerInterface: async def get_screen_size(self): """ Get the dimensions of the diorama scene. - + Returns: dict: Dictionary containing 'width' and 'height' keys with pixel dimensions """ @@ -114,7 +124,7 @@ class DioramaComputerInterface: async def move_cursor(self, x, y): """ Move the cursor to the specified coordinates. - + Args: x (int): X coordinate to move cursor to y (int): Y coordinate to move cursor to @@ -124,7 +134,7 @@ class DioramaComputerInterface: async def left_click(self, x=None, y=None): """ Perform a left mouse click at the specified coordinates or current cursor position. - + Args: x (int, optional): X coordinate to click at. If None, clicks at current cursor position y (int, optional): Y coordinate to click at. If None, clicks at current cursor position @@ -134,7 +144,7 @@ class DioramaComputerInterface: async def right_click(self, x=None, y=None): """ Perform a right mouse click at the specified coordinates or current cursor position. - + Args: x (int, optional): X coordinate to click at. If None, clicks at current cursor position y (int, optional): Y coordinate to click at. If None, clicks at current cursor position @@ -144,7 +154,7 @@ class DioramaComputerInterface: async def double_click(self, x=None, y=None): """ Perform a double mouse click at the specified coordinates or current cursor position. - + Args: x (int, optional): X coordinate to double-click at. If None, clicks at current cursor position y (int, optional): Y coordinate to double-click at. If None, clicks at current cursor position @@ -154,7 +164,7 @@ class DioramaComputerInterface: async def scroll_up(self, clicks=1): """ Scroll up by the specified number of clicks. - + Args: clicks (int): Number of scroll clicks to perform upward. Defaults to 1 """ @@ -163,7 +173,7 @@ class DioramaComputerInterface: async def scroll_down(self, clicks=1): """ Scroll down by the specified number of clicks. - + Args: clicks (int): Number of scroll clicks to perform downward. Defaults to 1 """ @@ -172,7 +182,7 @@ class DioramaComputerInterface: async def drag_to(self, x, y, duration=0.5): """ Drag from the current cursor position to the specified coordinates. - + Args: x (int): X coordinate to drag to y (int): Y coordinate to drag to @@ -183,7 +193,7 @@ class DioramaComputerInterface: async def get_cursor_position(self): """ Get the current cursor position. - + Returns: dict: Dictionary containing the current cursor coordinates """ @@ -192,7 +202,7 @@ class DioramaComputerInterface: async def type_text(self, text): """ Type the specified text at the current cursor position. - + Args: text (str): The text to type """ @@ -201,7 +211,7 @@ class DioramaComputerInterface: async def press_key(self, key): """ Press a single key. - + Args: key: The key to press """ @@ -210,10 +220,10 @@ class DioramaComputerInterface: async def hotkey(self, *keys): """ Press multiple keys simultaneously as a hotkey combination. - + Args: *keys: Variable number of keys to press together. Can be Key enum instances or strings - + Raises: ValueError: If any key is not a Key enum or string type """ @@ -224,7 +234,9 @@ class DioramaComputerInterface: elif isinstance(key, str): # Try to convert to enum if it matches a known key key_or_enum = Key.from_string(key) - actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum) + actual_keys.append( + key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum + ) else: raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.") await self._send_cmd("hotkey", {"keys": actual_keys}) @@ -232,11 +244,11 @@ class DioramaComputerInterface: async def to_screen_coordinates(self, x, y): """ Convert coordinates to screen coordinates. - + Args: x (int): X coordinate to convert y (int): Y coordinate to convert - + Returns: dict: Dictionary containing the converted screen coordinates """ diff --git a/libs/python/computer/computer/helpers.py b/libs/python/computer/computer/helpers.py index 8317b8d9..608dcbb9 100644 --- a/libs/python/computer/computer/helpers.py +++ b/libs/python/computer/computer/helpers.py @@ -1,8 +1,9 @@ """ Helper functions and decorators for the Computer module. """ -import logging + import asyncio +import logging from functools import wraps from typing import Any, Callable, Optional, TypeVar, cast @@ -11,10 +12,11 @@ _default_computer = None logger = logging.getLogger(__name__) + def set_default_computer(computer): """ Set the default computer instance to be used by the remote decorator. - + Args: computer: The computer instance to use as default """ @@ -25,21 +27,24 @@ def set_default_computer(computer): def sandboxed(venv_name: str = "default", computer: str = "default", max_retries: int = 3): """ Decorator that wraps a function to be executed remotely via computer.venv_exec - + Args: venv_name: Name of the virtual environment to execute in computer: The computer instance to use, or "default" to use the globally set default max_retries: Maximum number of retries for the remote execution """ + def decorator(func): @wraps(func) async def wrapper(*args, **kwargs): # Determine which computer instance to use comp = computer if computer != "default" else _default_computer - + if comp is None: - raise RuntimeError("No computer instance available. Either specify a computer instance or call set_default_computer() first.") - + raise RuntimeError( + "No computer instance available. Either specify a computer instance or call set_default_computer() first." + ) + for i in range(max_retries): try: return await comp.venv_exec(venv_name, func, *args, **kwargs) @@ -48,5 +53,7 @@ def sandboxed(venv_name: str = "default", computer: str = "default", max_retries await asyncio.sleep(1) if i == max_retries - 1: raise e + return wrapper + return decorator diff --git a/libs/python/computer/computer/interface/__init__.py b/libs/python/computer/computer/interface/__init__.py index 6d7e1b78..d15b94d5 100644 --- a/libs/python/computer/computer/interface/__init__.py +++ b/libs/python/computer/computer/interface/__init__.py @@ -2,12 +2,12 @@ Interface package for Computer SDK. """ -from .factory import InterfaceFactory from .base import BaseComputerInterface +from .factory import InterfaceFactory from .macos import MacOSComputerInterface __all__ = [ "InterfaceFactory", "BaseComputerInterface", "MacOSComputerInterface", -] \ No newline at end of file +] diff --git a/libs/python/computer/computer/interface/base.py b/libs/python/computer/computer/interface/base.py index 3e343bba..8fa40025 100644 --- a/libs/python/computer/computer/interface/base.py +++ b/libs/python/computer/computer/interface/base.py @@ -1,14 +1,23 @@ """Base interface for computer control.""" from abc import ABC, abstractmethod -from typing import Optional, Dict, Any, Tuple, List +from typing import Any, Dict, List, Optional, Tuple + from ..logger import Logger, LogLevel -from .models import MouseButton, CommandResult +from .models import CommandResult, MouseButton + class BaseComputerInterface(ABC): """Base class for computer control interfaces.""" - def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None): + def __init__( + self, + ip_address: str, + username: str = "lume", + password: str = "lume", + api_key: Optional[str] = None, + vm_name: Optional[str] = None, + ): """Initialize interface. Args: @@ -24,7 +33,7 @@ class BaseComputerInterface(ABC): self.api_key = api_key self.vm_name = vm_name self.logger = Logger("cua.interface", LogLevel.NORMAL) - + # Optional default delay time between commands (in seconds) self.delay: float = 0.0 @@ -55,9 +64,15 @@ class BaseComputerInterface(ABC): # Mouse Actions @abstractmethod - async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None: + async def mouse_down( + self, + x: Optional[int] = None, + y: Optional[int] = None, + button: "MouseButton" = "left", + delay: Optional[float] = None, + ) -> None: """Press and hold a mouse button. - + Args: x: X coordinate to press at. If None, uses current cursor position. y: Y coordinate to press at. If None, uses current cursor position. @@ -65,11 +80,17 @@ class BaseComputerInterface(ABC): delay: Optional delay in seconds after the action """ pass - + @abstractmethod - async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None: + async def mouse_up( + self, + x: Optional[int] = None, + y: Optional[int] = None, + button: "MouseButton" = "left", + delay: Optional[float] = None, + ) -> None: """Release a mouse button. - + Args: x: X coordinate to release at. If None, uses current cursor position. y: Y coordinate to release at. If None, uses current cursor position. @@ -77,11 +98,13 @@ class BaseComputerInterface(ABC): delay: Optional delay in seconds after the action """ pass - + @abstractmethod - async def left_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: + async def left_click( + self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None + ) -> None: """Perform a left mouse button click. - + Args: x: X coordinate to click at. If None, uses current cursor position. y: Y coordinate to click at. If None, uses current cursor position. @@ -90,9 +113,11 @@ class BaseComputerInterface(ABC): pass @abstractmethod - async def right_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: + async def right_click( + self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None + ) -> None: """Perform a right mouse button click. - + Args: x: X coordinate to click at. If None, uses current cursor position. y: Y coordinate to click at. If None, uses current cursor position. @@ -101,9 +126,11 @@ class BaseComputerInterface(ABC): pass @abstractmethod - async def double_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: + async def double_click( + self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None + ) -> None: """Perform a double left mouse button click. - + Args: x: X coordinate to double-click at. If None, uses current cursor position. y: Y coordinate to double-click at. If None, uses current cursor position. @@ -114,7 +141,7 @@ class BaseComputerInterface(ABC): @abstractmethod async def move_cursor(self, x: int, y: int, delay: Optional[float] = None) -> None: """Move the cursor to the specified screen coordinates. - + Args: x: X coordinate to move cursor to. y: Y coordinate to move cursor to. @@ -123,7 +150,14 @@ class BaseComputerInterface(ABC): pass @abstractmethod - async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None: + async def drag_to( + self, + x: int, + y: int, + button: str = "left", + duration: float = 0.5, + delay: Optional[float] = None, + ) -> None: """Drag from current position to specified coordinates. Args: @@ -136,7 +170,13 @@ class BaseComputerInterface(ABC): pass @abstractmethod - async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None: + async def drag( + self, + path: List[Tuple[int, int]], + button: str = "left", + duration: float = 0.5, + delay: Optional[float] = None, + ) -> None: """Drag the cursor along a path of coordinates. Args: @@ -151,27 +191,27 @@ class BaseComputerInterface(ABC): @abstractmethod async def key_down(self, key: str, delay: Optional[float] = None) -> None: """Press and hold a key. - + Args: key: The key to press and hold (e.g., 'a', 'shift', 'ctrl'). delay: Optional delay in seconds after the action. """ pass - + @abstractmethod async def key_up(self, key: str, delay: Optional[float] = None) -> None: """Release a previously pressed key. - + Args: key: The key to release (e.g., 'a', 'shift', 'ctrl'). delay: Optional delay in seconds after the action. """ pass - + @abstractmethod async def type_text(self, text: str, delay: Optional[float] = None) -> None: """Type the specified text string. - + Args: text: The text string to type. delay: Optional delay in seconds after the action. @@ -181,7 +221,7 @@ class BaseComputerInterface(ABC): @abstractmethod async def press_key(self, key: str, delay: Optional[float] = None) -> None: """Press and release a single key. - + Args: key: The key to press (e.g., 'a', 'enter', 'escape'). delay: Optional delay in seconds after the action. @@ -191,7 +231,7 @@ class BaseComputerInterface(ABC): @abstractmethod async def hotkey(self, *keys: str, delay: Optional[float] = None) -> None: """Press multiple keys simultaneously (keyboard shortcut). - + Args: *keys: Variable number of keys to press together (e.g., 'ctrl', 'c'). delay: Optional delay in seconds after the action. @@ -202,18 +242,18 @@ class BaseComputerInterface(ABC): @abstractmethod async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None: """Scroll the mouse wheel by specified amounts. - + Args: x: Horizontal scroll amount (positive = right, negative = left). y: Vertical scroll amount (positive = up, negative = down). delay: Optional delay in seconds after the action. """ pass - + @abstractmethod async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None: """Scroll down by the specified number of clicks. - + Args: clicks: Number of scroll clicks to perform downward. delay: Optional delay in seconds after the action. @@ -223,7 +263,7 @@ class BaseComputerInterface(ABC): @abstractmethod async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None: """Scroll up by the specified number of clicks. - + Args: clicks: Number of scroll clicks to perform upward. delay: Optional delay in seconds after the action. @@ -252,7 +292,7 @@ class BaseComputerInterface(ABC): @abstractmethod async def get_cursor_position(self) -> Dict[str, int]: """Get the current cursor position on screen. - + Returns: Dict with 'x' and 'y' keys containing cursor coordinates. """ @@ -262,7 +302,7 @@ class BaseComputerInterface(ABC): @abstractmethod async def copy_to_clipboard(self) -> str: """Get the current clipboard content. - + Returns: The text content currently stored in the clipboard. """ @@ -271,7 +311,7 @@ class BaseComputerInterface(ABC): @abstractmethod async def set_clipboard(self, text: str) -> None: """Set the clipboard content to the specified text. - + Args: text: The text to store in the clipboard. """ @@ -281,10 +321,10 @@ class BaseComputerInterface(ABC): @abstractmethod async def file_exists(self, path: str) -> bool: """Check if a file exists at the specified path. - + Args: path: The file path to check. - + Returns: True if the file exists, False otherwise. """ @@ -293,128 +333,128 @@ class BaseComputerInterface(ABC): @abstractmethod async def directory_exists(self, path: str) -> bool: """Check if a directory exists at the specified path. - + Args: path: The directory path to check. - + Returns: True if the directory exists, False otherwise. """ pass - + @abstractmethod async def list_dir(self, path: str) -> List[str]: """List the contents of a directory. - + Args: path: The directory path to list. - + Returns: List of file and directory names in the specified directory. """ pass - + @abstractmethod async def read_text(self, path: str) -> str: """Read the text contents of a file. - + Args: path: The file path to read from. - + Returns: The text content of the file. """ pass - + @abstractmethod async def write_text(self, path: str, content: str) -> None: """Write text content to a file. - + Args: path: The file path to write to. content: The text content to write. """ pass - + @abstractmethod async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes: """Read file binary contents with optional seeking support. - + Args: path: Path to the file offset: Byte offset to start reading from (default: 0) length: Number of bytes to read (default: None for entire file) """ pass - + @abstractmethod async def write_bytes(self, path: str, content: bytes) -> None: """Write binary content to a file. - + Args: path: The file path to write to. content: The binary content to write. """ pass - + @abstractmethod async def delete_file(self, path: str) -> None: """Delete a file at the specified path. - + Args: path: The file path to delete. """ pass - + @abstractmethod async def create_dir(self, path: str) -> None: """Create a directory at the specified path. - + Args: path: The directory path to create. """ pass - + @abstractmethod async def delete_dir(self, path: str) -> None: """Delete a directory at the specified path. - + Args: path: The directory path to delete. """ pass - + @abstractmethod async def get_file_size(self, path: str) -> int: """Get the size of a file in bytes. - + Args: path: The file path to get the size of. - + Returns: The size of the file in bytes. """ pass - + @abstractmethod async def run_command(self, command: str) -> CommandResult: """Run shell command and return structured result. - + Executes a shell command using subprocess.run with shell=True and check=False. The command is run in the target environment and captures both stdout and stderr. - + Args: command (str): The shell command to execute - + Returns: CommandResult: A structured result containing: - stdout (str): Standard output from the command - - stderr (str): Standard error from the command + - stderr (str): Standard error from the command - returncode (int): Exit code from the command (0 indicates success) - + Raises: RuntimeError: If the command execution fails at the system level - + Example: result = await interface.run_command("ls -la") if result.returncode == 0: @@ -428,12 +468,12 @@ class BaseComputerInterface(ABC): @abstractmethod async def get_accessibility_tree(self) -> Dict: """Get the accessibility tree of the current screen. - + Returns: Dict containing the hierarchical accessibility information of screen elements. """ pass - + @abstractmethod async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]: """Convert screenshot coordinates to screen coordinates. diff --git a/libs/python/computer/computer/interface/factory.py b/libs/python/computer/computer/interface/factory.py index 3647400e..7ae6b05c 100644 --- a/libs/python/computer/computer/interface/factory.py +++ b/libs/python/computer/computer/interface/factory.py @@ -1,42 +1,44 @@ """Factory for creating computer interfaces.""" from typing import Literal, Optional + from .base import BaseComputerInterface + class InterfaceFactory: """Factory for creating OS-specific computer interfaces.""" - + @staticmethod def create_interface_for_os( - os: Literal['macos', 'linux', 'windows'], + os: Literal["macos", "linux", "windows"], ip_address: str, api_key: Optional[str] = None, - vm_name: Optional[str] = None + vm_name: Optional[str] = None, ) -> BaseComputerInterface: """Create an interface for the specified OS. - + Args: os: Operating system type ('macos', 'linux', or 'windows') ip_address: IP address of the computer to control api_key: Optional API key for cloud authentication vm_name: Optional VM name for cloud authentication - + Returns: BaseComputerInterface: The appropriate interface for the OS - + Raises: ValueError: If the OS type is not supported """ # Import implementations here to avoid circular imports - from .macos import MacOSComputerInterface from .linux import LinuxComputerInterface + from .macos import MacOSComputerInterface from .windows import WindowsComputerInterface - - if os == 'macos': + + if os == "macos": return MacOSComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) - elif os == 'linux': + elif os == "linux": return LinuxComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) - elif os == 'windows': + elif os == "windows": return WindowsComputerInterface(ip_address, api_key=api_key, vm_name=vm_name) else: raise ValueError(f"Unsupported OS type: {os}") diff --git a/libs/python/computer/computer/interface/generic.py b/libs/python/computer/computer/interface/generic.py index a802a686..7cd2372f 100644 --- a/libs/python/computer/computer/interface/generic.py +++ b/libs/python/computer/computer/interface/generic.py @@ -2,21 +2,35 @@ import asyncio import json import time from typing import Any, Dict, List, Optional, Tuple + +import aiohttp +import websockets from PIL import Image -import websockets -import aiohttp - from ..logger import Logger, LogLevel +from ..utils import ( + bytes_to_image, + decode_base64_image, + draw_box, + encode_base64_image, + resize_image, +) from .base import BaseComputerInterface -from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image -from .models import Key, KeyType, MouseButton, CommandResult +from .models import CommandResult, Key, KeyType, MouseButton class GenericComputerInterface(BaseComputerInterface): """Generic interface with common functionality for all supported platforms (Windows, Linux, macOS).""" - def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None, logger_name: str = "computer.interface.generic"): + def __init__( + self, + ip_address: str, + username: str = "lume", + password: str = "lume", + api_key: Optional[str] = None, + vm_name: Optional[str] = None, + logger_name: str = "computer.interface.generic", + ): super().__init__(ip_address, username, password, api_key, vm_name) self._ws = None self._reconnect_task = None @@ -38,7 +52,7 @@ class GenericComputerInterface(BaseComputerInterface): async def _handle_delay(self, delay: Optional[float] = None): """Handle delay between commands using async sleep. - + Args: delay: Optional delay in seconds. If None, uses self.delay. """ @@ -51,18 +65,18 @@ class GenericComputerInterface(BaseComputerInterface): @property def ws_uri(self) -> str: """Get the WebSocket URI using the current IP address. - + Returns: WebSocket URI for the Computer API Server """ protocol = "wss" if self.api_key else "ws" port = "8443" if self.api_key else "8000" return f"{protocol}://{self.ip_address}:{port}/ws" - + @property def rest_uri(self) -> str: """Get the REST URI using the current IP address. - + Returns: REST URI for the Computer API Server """ @@ -71,23 +85,41 @@ class GenericComputerInterface(BaseComputerInterface): return f"{protocol}://{self.ip_address}:{port}/cmd" # Mouse actions - async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left", delay: Optional[float] = None) -> None: + async def mouse_down( + self, + x: Optional[int] = None, + y: Optional[int] = None, + button: str = "left", + delay: Optional[float] = None, + ) -> None: await self._send_command("mouse_down", {"x": x, "y": y, "button": button}) await self._handle_delay(delay) - - async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left", delay: Optional[float] = None) -> None: + + async def mouse_up( + self, + x: Optional[int] = None, + y: Optional[int] = None, + button: str = "left", + delay: Optional[float] = None, + ) -> None: await self._send_command("mouse_up", {"x": x, "y": y, "button": button}) await self._handle_delay(delay) - - async def left_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: + + async def left_click( + self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None + ) -> None: await self._send_command("left_click", {"x": x, "y": y}) await self._handle_delay(delay) - async def right_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: + async def right_click( + self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None + ) -> None: await self._send_command("right_click", {"x": x, "y": y}) await self._handle_delay(delay) - async def double_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None: + async def double_click( + self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None + ) -> None: await self._send_command("double_click", {"x": x, "y": y}) await self._handle_delay(delay) @@ -95,37 +127,40 @@ class GenericComputerInterface(BaseComputerInterface): await self._send_command("move_cursor", {"x": x, "y": y}) await self._handle_delay(delay) - async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5, delay: Optional[float] = None) -> None: + async def drag_to( + self, + x: int, + y: int, + button: "MouseButton" = "left", + duration: float = 0.5, + delay: Optional[float] = None, + ) -> None: await self._send_command( "drag_to", {"x": x, "y": y, "button": button, "duration": duration} ) await self._handle_delay(delay) - async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5, delay: Optional[float] = None) -> None: - await self._send_command( - "drag", {"path": path, "button": button, "duration": duration} - ) + async def drag( + self, + path: List[Tuple[int, int]], + button: "MouseButton" = "left", + duration: float = 0.5, + delay: Optional[float] = None, + ) -> None: + await self._send_command("drag", {"path": path, "button": button, "duration": duration}) await self._handle_delay(delay) # Keyboard Actions async def key_down(self, key: "KeyType", delay: Optional[float] = None) -> None: await self._send_command("key_down", {"key": key}) await self._handle_delay(delay) - + async def key_up(self, key: "KeyType", delay: Optional[float] = None) -> None: await self._send_command("key_up", {"key": key}) await self._handle_delay(delay) - + async def type_text(self, text: str, delay: Optional[float] = None) -> None: - # Temporary fix for https://github.com/trycua/cua/issues/165 - # Check if text contains Unicode characters - if any(ord(char) > 127 for char in text): - # For Unicode text, use clipboard and paste - await self.set_clipboard(text) - await self.hotkey(Key.COMMAND, 'v') - else: - # For ASCII text, use the regular typing method - await self._send_command("type_text", {"text": text}) + await self._send_command("type_text", {"text": text}) await self._handle_delay(delay) async def press(self, key: "KeyType", delay: Optional[float] = None) -> None: @@ -203,10 +238,12 @@ class GenericComputerInterface(BaseComputerInterface): elif isinstance(key, str): # Try to convert to enum if it matches a known key key_or_enum = Key.from_string(key) - actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum) + actual_keys.append( + key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum + ) else: raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.") - + await self._send_command("hotkey", {"keys": actual_keys}) await self._handle_delay(delay) @@ -214,11 +251,11 @@ class GenericComputerInterface(BaseComputerInterface): async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None: await self._send_command("scroll", {"x": x, "y": y}) await self._handle_delay(delay) - + async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None: await self._send_command("scroll_down", {"clicks": clicks}) await self._handle_delay(delay) - + async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None: await self._send_command("scroll_up", {"clicks": clicks}) await self._handle_delay(delay) @@ -302,27 +339,32 @@ class GenericComputerInterface(BaseComputerInterface): await self._send_command("set_clipboard", {"text": text}) # File Operations - async def _write_bytes_chunked(self, path: str, content: bytes, append: bool = False, chunk_size: int = 1024 * 1024) -> None: + async def _write_bytes_chunked( + self, path: str, content: bytes, append: bool = False, chunk_size: int = 1024 * 1024 + ) -> None: """Write large files in chunks to avoid memory issues.""" total_size = len(content) current_offset = 0 - + while current_offset < total_size: chunk_end = min(current_offset + chunk_size, total_size) chunk_data = content[current_offset:chunk_end] - + # First chunk uses the original append flag, subsequent chunks always append chunk_append = append if current_offset == 0 else True - - result = await self._send_command("write_bytes", { - "path": path, - "content_b64": encode_base64_image(chunk_data), - "append": chunk_append - }) - + + result = await self._send_command( + "write_bytes", + { + "path": path, + "content_b64": encode_base64_image(chunk_data), + "append": chunk_append, + }, + ) + if not result.get("success", False): raise RuntimeError(result.get("error", "Failed to write file chunk")) - + current_offset = chunk_end async def write_bytes(self, path: str, content: bytes, append: bool = False) -> None: @@ -330,36 +372,39 @@ class GenericComputerInterface(BaseComputerInterface): if len(content) > 5 * 1024 * 1024: # 5MB threshold await self._write_bytes_chunked(path, content, append) return - - result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content), "append": append}) + + result = await self._send_command( + "write_bytes", + {"path": path, "content_b64": encode_base64_image(content), "append": append}, + ) if not result.get("success", False): raise RuntimeError(result.get("error", "Failed to write file")) - async def _read_bytes_chunked(self, path: str, offset: int, total_length: int, chunk_size: int = 1024 * 1024) -> bytes: + async def _read_bytes_chunked( + self, path: str, offset: int, total_length: int, chunk_size: int = 1024 * 1024 + ) -> bytes: """Read large files in chunks to avoid memory issues.""" chunks = [] current_offset = offset remaining = total_length - + while remaining > 0: read_size = min(chunk_size, remaining) - result = await self._send_command("read_bytes", { - "path": path, - "offset": current_offset, - "length": read_size - }) - + result = await self._send_command( + "read_bytes", {"path": path, "offset": current_offset, "length": read_size} + ) + if not result.get("success", False): raise RuntimeError(result.get("error", "Failed to read file chunk")) - + content_b64 = result.get("content_b64", "") chunk_data = decode_base64_image(content_b64) chunks.append(chunk_data) - + current_offset += read_size remaining -= read_size - - return b''.join(chunks) + + return b"".join(chunks) async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes: # For large files, use chunked reading @@ -368,34 +413,36 @@ class GenericComputerInterface(BaseComputerInterface): file_size = await self.get_file_size(path) # If file is larger than 5MB, read in chunks if file_size > 5 * 1024 * 1024: # 5MB threshold - return await self._read_bytes_chunked(path, offset, file_size - offset if offset > 0 else file_size) - - result = await self._send_command("read_bytes", { - "path": path, - "offset": offset, - "length": length - }) + return await self._read_bytes_chunked( + path, offset, file_size - offset if offset > 0 else file_size + ) + + result = await self._send_command( + "read_bytes", {"path": path, "offset": offset, "length": length} + ) if not result.get("success", False): raise RuntimeError(result.get("error", "Failed to read file")) content_b64 = result.get("content_b64", "") return decode_base64_image(content_b64) - async def read_text(self, path: str, encoding: str = 'utf-8') -> str: + async def read_text(self, path: str, encoding: str = "utf-8") -> str: """Read text from a file with specified encoding. - + Args: path: Path to the file to read encoding: Text encoding to use (default: 'utf-8') - + Returns: str: The decoded text content of the file """ content_bytes = await self.read_bytes(path) return content_bytes.decode(encoding) - async def write_text(self, path: str, content: str, encoding: str = 'utf-8', append: bool = False) -> None: + async def write_text( + self, path: str, content: str, encoding: str = "utf-8", append: bool = False + ) -> None: """Write text to a file with specified encoding. - + Args: path: Path to the file to write content: Text content to write @@ -448,7 +495,7 @@ class GenericComputerInterface(BaseComputerInterface): return CommandResult( stdout=result.get("stdout", ""), stderr=result.get("stderr", ""), - returncode=result.get("return_code", 0) + returncode=result.get("return_code", 0), ) # Accessibility Actions @@ -458,7 +505,7 @@ class GenericComputerInterface(BaseComputerInterface): if not result.get("success", False): raise RuntimeError(result.get("error", "Failed to get accessibility tree")) return result - + async def get_active_window_bounds(self) -> Dict[str, int]: """Get the bounds of the currently active window.""" result = await self._send_command("get_active_window_bounds") @@ -564,33 +611,30 @@ class GenericComputerInterface(BaseComputerInterface): timeout=120, ) self.logger.info("WebSocket connection established") - + # If api_key and vm_name are provided, perform authentication handshake if self.api_key and self.vm_name: self.logger.info("Performing authentication handshake...") auth_message = { "command": "authenticate", - "params": { - "api_key": self.api_key, - "container_name": self.vm_name - } + "params": {"api_key": self.api_key, "container_name": self.vm_name}, } await self._ws.send(json.dumps(auth_message)) - + # Wait for authentication response async with self._recv_lock: auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10) auth_result = json.loads(auth_response) - + if not auth_result.get("success"): error_msg = auth_result.get("error", "Authentication failed") self.logger.error(f"Authentication failed: {error_msg}") await self._ws.close() self._ws = None raise ConnectionError(f"Authentication failed: {error_msg}") - + self.logger.info("Authentication successful") - + self._reconnect_delay = 1 # Reset reconnect delay on successful connection self._last_ping = time.time() retry_count = 0 # Reset retry count on successful connection @@ -600,7 +644,7 @@ class GenericComputerInterface(BaseComputerInterface): # Only log the first error at WARNING level, then every Nth attempt if retry_count == 1: self.logger.warning( - f"Computer API Server not ready yet. Will retry automatically." + "Computer API Server not ready yet. Will retry automatically." ) elif retry_count % log_interval == 0: self.logger.warning( @@ -648,7 +692,7 @@ class GenericComputerInterface(BaseComputerInterface): # Only log connection lost warnings at most once every min_warning_interval seconds if current_time - last_warning_time >= min_warning_interval: self.logger.warning( - f"Computer API Server connection lost. Will retry automatically." + "Computer API Server connection lost. Will retry automatically." ) last_warning_time = current_time else: @@ -661,7 +705,7 @@ class GenericComputerInterface(BaseComputerInterface): except: pass self._ws = None - + async def _ensure_connection(self): """Ensure WebSocket connection is established.""" if self._reconnect_task is None or self._reconnect_task.done(): @@ -730,32 +774,30 @@ class GenericComputerInterface(BaseComputerInterface): raise last_error if last_error else RuntimeError("Failed to send command") - async def _send_command_rest(self, command: str, params: Optional[Dict] = None) -> Dict[str, Any]: + async def _send_command_rest( + self, command: str, params: Optional[Dict] = None + ) -> Dict[str, Any]: """Send command through REST API without retries or connection management.""" try: # Prepare the request payload payload = {"command": command, "params": params or {}} - + # Prepare headers headers = {"Content-Type": "application/json"} if self.api_key: headers["X-API-Key"] = self.api_key if self.vm_name: headers["X-Container-Name"] = self.vm_name - + # Send the request async with aiohttp.ClientSession() as session: - async with session.post( - self.rest_uri, - json=payload, - headers=headers - ) as response: + async with session.post(self.rest_uri, json=payload, headers=headers) as response: # Get the response text response_text = await response.text() - + # Trim whitespace response_text = response_text.strip() - + # Check if it starts with "data: " if response_text.startswith("data: "): # Extract everything after "data: " @@ -766,38 +808,39 @@ class GenericComputerInterface(BaseComputerInterface): return { "success": False, "error": "Server returned malformed response", - "message": response_text + "message": response_text, } else: # Return error response return { "success": False, "error": "Server returned malformed response", - "message": response_text + "message": response_text, } - + except Exception as e: - return { - "success": False, - "error": "Request failed", - "message": str(e) - } + return {"success": False, "error": "Request failed", "message": str(e)} async def _send_command(self, command: str, params: Optional[Dict] = None) -> Dict[str, Any]: """Send command using REST API with WebSocket fallback.""" # Try REST API first result = await self._send_command_rest(command, params) - + # If REST failed with "Request failed", try WebSocket as fallback - if not result.get("success", True) and (result.get("error") == "Request failed" or result.get("error") == "Server returned malformed response"): - self.logger.warning(f"REST API failed for command '{command}', trying WebSocket fallback") + if not result.get("success", True) and ( + result.get("error") == "Request failed" + or result.get("error") == "Server returned malformed response" + ): + self.logger.warning( + f"REST API failed for command '{command}', trying WebSocket fallback" + ) try: return await self._send_command_ws(command, params) except Exception as e: self.logger.error(f"WebSocket fallback also failed: {e}") # Return the original REST error return result - + return result async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0): @@ -808,7 +851,9 @@ class GenericComputerInterface(BaseComputerInterface): result = await self._send_command_rest("version", {}) assert result.get("success", True) except Exception as e: - self.logger.debug(f"REST API failed for command 'version', trying WebSocket fallback: {e}") + self.logger.debug( + f"REST API failed for command 'version', trying WebSocket fallback: {e}" + ) try: await self._wait_for_ready_ws(timeout, interval) return @@ -957,7 +1002,7 @@ class GenericComputerInterface(BaseComputerInterface): # if self._ws: # asyncio.create_task(self._ws.close()) # self._ws = None - + def force_close(self): """Force close the WebSocket connection. @@ -970,4 +1015,3 @@ class GenericComputerInterface(BaseComputerInterface): if self._ws: asyncio.create_task(self._ws.close()) self._ws = None - diff --git a/libs/python/computer/computer/interface/linux.py b/libs/python/computer/computer/interface/linux.py index 174fe07a..9e5a3c9b 100644 --- a/libs/python/computer/computer/interface/linux.py +++ b/libs/python/computer/computer/interface/linux.py @@ -1,8 +1,19 @@ from typing import Optional + from .generic import GenericComputerInterface + class LinuxComputerInterface(GenericComputerInterface): """Interface for Linux.""" - def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None): - super().__init__(ip_address, username, password, api_key, vm_name, "computer.interface.linux") + def __init__( + self, + ip_address: str, + username: str = "lume", + password: str = "lume", + api_key: Optional[str] = None, + vm_name: Optional[str] = None, + ): + super().__init__( + ip_address, username, password, api_key, vm_name, "computer.interface.linux" + ) diff --git a/libs/python/computer/computer/interface/macos.py b/libs/python/computer/computer/interface/macos.py index f4d03a52..6dcf8a1b 100644 --- a/libs/python/computer/computer/interface/macos.py +++ b/libs/python/computer/computer/interface/macos.py @@ -1,12 +1,25 @@ -from .generic import GenericComputerInterface from typing import Optional +from .generic import GenericComputerInterface + + class MacOSComputerInterface(GenericComputerInterface): """Interface for macOS.""" - def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None): - super().__init__(ip_address, username, password, api_key, vm_name, "computer.interface.macos") + def __init__( + self, + ip_address: str, + username: str = "lume", + password: str = "lume", + api_key: Optional[str] = None, + vm_name: Optional[str] = None, + ): + super().__init__( + ip_address, username, password, api_key, vm_name, "computer.interface.macos" + ) async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict: """Send a diorama command to the server (macOS only).""" - return await self._send_command("diorama_cmd", {"action": action, "arguments": arguments or {}}) \ No newline at end of file + return await self._send_command( + "diorama_cmd", {"action": action, "arguments": arguments or {}} + ) diff --git a/libs/python/computer/computer/interface/models.py b/libs/python/computer/computer/interface/models.py index 223ac321..bbba789a 100644 --- a/libs/python/computer/computer/interface/models.py +++ b/libs/python/computer/computer/interface/models.py @@ -1,136 +1,145 @@ -from enum import Enum -from typing import Dict, List, Any, TypedDict, Union, Literal from dataclasses import dataclass +from enum import Enum +from typing import Any, Dict, List, Literal, TypedDict, Union + @dataclass class CommandResult: stdout: str - stderr: str + stderr: str returncode: int - + def __init__(self, stdout: str, stderr: str, returncode: int): self.stdout = stdout self.stderr = stderr self.returncode = returncode + # Navigation key literals -NavigationKey = Literal['pagedown', 'pageup', 'home', 'end', 'left', 'right', 'up', 'down'] +NavigationKey = Literal["pagedown", "pageup", "home", "end", "left", "right", "up", "down"] # Special key literals -SpecialKey = Literal['enter', 'esc', 'tab', 'space', 'backspace', 'del'] +SpecialKey = Literal["enter", "esc", "tab", "space", "backspace", "del"] # Modifier key literals -ModifierKey = Literal['ctrl', 'alt', 'shift', 'win', 'command', 'option'] +ModifierKey = Literal["ctrl", "alt", "shift", "win", "command", "option"] # Function key literals -FunctionKey = Literal['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12'] +FunctionKey = Literal["f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12"] + class Key(Enum): """Keyboard keys that can be used with press_key. - + These key names map to PyAutoGUI's expected key names. """ + # Navigation - PAGE_DOWN = 'pagedown' - PAGE_UP = 'pageup' - HOME = 'home' - END = 'end' - LEFT = 'left' - RIGHT = 'right' - UP = 'up' - DOWN = 'down' - + PAGE_DOWN = "pagedown" + PAGE_UP = "pageup" + HOME = "home" + END = "end" + LEFT = "left" + RIGHT = "right" + UP = "up" + DOWN = "down" + # Special keys - RETURN = 'enter' - ENTER = 'enter' - ESCAPE = 'esc' - ESC = 'esc' - TAB = 'tab' - SPACE = 'space' - BACKSPACE = 'backspace' - DELETE = 'del' - + RETURN = "enter" + ENTER = "enter" + ESCAPE = "esc" + ESC = "esc" + TAB = "tab" + SPACE = "space" + BACKSPACE = "backspace" + DELETE = "del" + # Modifier keys - ALT = 'alt' - CTRL = 'ctrl' - SHIFT = 'shift' - WIN = 'win' - COMMAND = 'command' - OPTION = 'option' - + ALT = "alt" + CTRL = "ctrl" + SHIFT = "shift" + WIN = "win" + COMMAND = "command" + OPTION = "option" + # Function keys - F1 = 'f1' - F2 = 'f2' - F3 = 'f3' - F4 = 'f4' - F5 = 'f5' - F6 = 'f6' - F7 = 'f7' - F8 = 'f8' - F9 = 'f9' - F10 = 'f10' - F11 = 'f11' - F12 = 'f12' + F1 = "f1" + F2 = "f2" + F3 = "f3" + F4 = "f4" + F5 = "f5" + F6 = "f6" + F7 = "f7" + F8 = "f8" + F9 = "f9" + F10 = "f10" + F11 = "f11" + F12 = "f12" @classmethod - def from_string(cls, key: str) -> 'Key | str': + def from_string(cls, key: str) -> "Key | str": """Convert a string key name to a Key enum value. - + Args: key: String key name to convert - + Returns: Key enum value if the string matches a known key, otherwise returns the original string for single character keys """ # Map common alternative names to enum values key_mapping = { - 'page_down': cls.PAGE_DOWN, - 'page down': cls.PAGE_DOWN, - 'pagedown': cls.PAGE_DOWN, - 'page_up': cls.PAGE_UP, - 'page up': cls.PAGE_UP, - 'pageup': cls.PAGE_UP, - 'return': cls.RETURN, - 'enter': cls.ENTER, - 'escape': cls.ESCAPE, - 'esc': cls.ESC, - 'delete': cls.DELETE, - 'del': cls.DELETE, + "page_down": cls.PAGE_DOWN, + "page down": cls.PAGE_DOWN, + "pagedown": cls.PAGE_DOWN, + "page_up": cls.PAGE_UP, + "page up": cls.PAGE_UP, + "pageup": cls.PAGE_UP, + "return": cls.RETURN, + "enter": cls.ENTER, + "escape": cls.ESCAPE, + "esc": cls.ESC, + "delete": cls.DELETE, + "del": cls.DELETE, # Modifier key mappings - 'alt': cls.ALT, - 'ctrl': cls.CTRL, - 'control': cls.CTRL, - 'shift': cls.SHIFT, - 'win': cls.WIN, - 'windows': cls.WIN, - 'super': cls.WIN, - 'command': cls.COMMAND, - 'cmd': cls.COMMAND, - '⌘': cls.COMMAND, - 'option': cls.OPTION, - '⌥': cls.OPTION, + "alt": cls.ALT, + "ctrl": cls.CTRL, + "control": cls.CTRL, + "shift": cls.SHIFT, + "win": cls.WIN, + "windows": cls.WIN, + "super": cls.WIN, + "command": cls.COMMAND, + "cmd": cls.COMMAND, + "⌘": cls.COMMAND, + "option": cls.OPTION, + "⌥": cls.OPTION, } - + normalized = key.lower().strip() return key_mapping.get(normalized, key) + # Combined key type KeyType = Union[Key, NavigationKey, SpecialKey, ModifierKey, FunctionKey, str] # Key type for mouse actions -MouseButton = Literal['left', 'right', 'middle'] +MouseButton = Literal["left", "right", "middle"] + class AccessibilityWindow(TypedDict): """Information about a window in the accessibility tree.""" + app_name: str pid: int frontmost: bool has_windows: bool windows: List[Dict[str, Any]] + class AccessibilityTree(TypedDict): """Complete accessibility tree information.""" + success: bool frontmost_application: str - windows: List[AccessibilityWindow] \ No newline at end of file + windows: List[AccessibilityWindow] diff --git a/libs/python/computer/computer/interface/windows.py b/libs/python/computer/computer/interface/windows.py index a874d359..558ad749 100644 --- a/libs/python/computer/computer/interface/windows.py +++ b/libs/python/computer/computer/interface/windows.py @@ -1,8 +1,19 @@ from typing import Optional + from .generic import GenericComputerInterface + class WindowsComputerInterface(GenericComputerInterface): """Interface for Windows.""" - def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None): - super().__init__(ip_address, username, password, api_key, vm_name, "computer.interface.windows") + def __init__( + self, + ip_address: str, + username: str = "lume", + password: str = "lume", + api_key: Optional[str] = None, + vm_name: Optional[str] = None, + ): + super().__init__( + ip_address, username, password, api_key, vm_name, "computer.interface.windows" + ) diff --git a/libs/python/computer/computer/models.py b/libs/python/computer/computer/models.py index 5ead143f..beb0e65a 100644 --- a/libs/python/computer/computer/models.py +++ b/libs/python/computer/computer/models.py @@ -1,27 +1,33 @@ """Models for computer configuration.""" from dataclasses import dataclass -from typing import Optional, Any, Dict +from typing import Any, Dict, Optional # Import base provider interface from .providers.base import BaseVMProvider + @dataclass class Display: """Display configuration.""" + width: int height: int + @dataclass class Image: """VM image configuration.""" + image: str tag: str name: str + @dataclass class Computer: """Computer configuration.""" + image: str tag: str name: str @@ -29,13 +35,13 @@ class Computer: memory: str cpu: str vm_provider: Optional[BaseVMProvider] = None - + # @property # Remove the property decorator async def get_ip(self) -> Optional[str]: """Get the IP address of the VM.""" if not self.vm_provider: return None - + vm = await self.vm_provider.get_vm(self.name) # Handle both object attribute and dictionary access for ip_address if vm: @@ -44,4 +50,4 @@ class Computer: else: # Access as attribute for object-based return values return getattr(vm, "ip_address", None) - return None \ No newline at end of file + return None diff --git a/libs/python/computer/computer/providers/base.py b/libs/python/computer/computer/providers/base.py index 23526097..a32988bd 100644 --- a/libs/python/computer/computer/providers/base.py +++ b/libs/python/computer/computer/providers/base.py @@ -2,11 +2,14 @@ import abc from enum import StrEnum -from typing import Dict, List, Optional, Any, AsyncContextManager +from typing import Any, AsyncContextManager, Dict, Optional + +from .types import ListVMsResponse class VMProviderType(StrEnum): """Enum of supported VM provider types.""" + LUME = "lume" LUMIER = "lumier" CLOUD = "cloud" @@ -17,90 +20,113 @@ class VMProviderType(StrEnum): class BaseVMProvider(AsyncContextManager): """Base interface for VM providers. - + All VM provider implementations must implement this interface. """ - + @property @abc.abstractmethod def provider_type(self) -> VMProviderType: """Get the provider type.""" pass - + @abc.abstractmethod async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: """Get VM information by name. - + Args: name: Name of the VM to get information for storage: Optional storage path override. If provided, this will be used instead of the provider's default storage path. - + Returns: Dictionary with VM information including status, IP address, etc. """ pass - + @abc.abstractmethod - async def list_vms(self) -> List[Dict[str, Any]]: - """List all available VMs.""" + async def list_vms(self) -> ListVMsResponse: + """List all available VMs. + + Returns: + ListVMsResponse: A list of minimal VM objects as defined in + `computer.providers.types.MinimalVM`. + """ pass - + @abc.abstractmethod - async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: + async def run_vm( + self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None + ) -> Dict[str, Any]: """Run a VM by name with the given options. - + Args: image: Name/tag of the image to use name: Name of the VM to run run_opts: Dictionary of run options (memory, cpu, etc.) storage: Optional storage path override. If provided, this will be used instead of the provider's default storage path. - + Returns: Dictionary with VM run status and information """ pass - + @abc.abstractmethod async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: """Stop a VM by name. - + Args: name: Name of the VM to stop storage: Optional storage path override. If provided, this will be used instead of the provider's default storage path. - + Returns: Dictionary with VM stop status and information """ pass - + @abc.abstractmethod - async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + """Restart a VM by name. + + Args: + name: Name of the VM to restart + storage: Optional storage path override. If provided, this will be used + instead of the provider's default storage path. + + Returns: + Dictionary with VM restart status and information + """ + pass + + @abc.abstractmethod + async def update_vm( + self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None + ) -> Dict[str, Any]: """Update VM configuration. - + Args: name: Name of the VM to update update_opts: Dictionary of update options (memory, cpu, etc.) storage: Optional storage path override. If provided, this will be used instead of the provider's default storage path. - + Returns: Dictionary with VM update status and information """ pass - + @abc.abstractmethod async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str: """Get the IP address of a VM, waiting indefinitely until it's available. - + Args: name: Name of the VM to get the IP for storage: Optional storage path override. If provided, this will be used instead of the provider's default storage path. retry_delay: Delay between retries in seconds (default: 2) - + Returns: IP address of the VM when it becomes available """ diff --git a/libs/python/computer/computer/providers/cloud/provider.py b/libs/python/computer/computer/providers/cloud/provider.py index 1cfba161..7d479686 100644 --- a/libs/python/computer/computer/providers/cloud/provider.py +++ b/libs/python/computer/computer/providers/cloud/provider.py @@ -1,26 +1,39 @@ -"""Cloud VM provider implementation. +"""Cloud VM provider implementation using CUA Public API. -This module contains a stub implementation for a future cloud VM provider. +Implements the following public API endpoints: + +- GET /v1/vms +- POST /v1/vms/:name/start +- POST /v1/vms/:name/stop +- POST /v1/vms/:name/restart """ import logging -from typing import Dict, List, Optional, Any +from typing import Any, Dict, List, Optional from ..base import BaseVMProvider, VMProviderType +from ..types import ListVMsResponse, MinimalVM # Setup logging logger = logging.getLogger(__name__) import asyncio -import aiohttp +import os from urllib.parse import urlparse +import aiohttp + +DEFAULT_API_BASE = os.getenv("CUA_API_BASE", "https://api.cua.ai") + + class CloudProvider(BaseVMProvider): """Cloud VM Provider implementation.""" + def __init__( self, api_key: str, verbose: bool = False, + api_base: Optional[str] = None, **kwargs, ): """ @@ -32,6 +45,7 @@ class CloudProvider(BaseVMProvider): assert api_key, "api_key required for CloudProvider" self.api_key = api_key self.verbose = verbose + self.api_base = (api_base or DEFAULT_API_BASE).rstrip("/") @property def provider_type(self) -> VMProviderType: @@ -44,26 +58,178 @@ class CloudProvider(BaseVMProvider): pass async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: - """Get VM VNC URL by name using the cloud API.""" - return {"name": name, "hostname": f"{name}.containers.cloud.trycua.com"} + """Get VM information by querying the VM status endpoint. - async def list_vms(self) -> List[Dict[str, Any]]: - logger.warning("CloudProvider.list_vms is not implemented") - return [] + - Build hostname via get_ip(name) → "{name}.containers.cloud.trycua.com" + - Probe https://{hostname}:8443/status with a short timeout + - If JSON contains a "status" field, return it; otherwise infer + - Fallback to DNS resolve check to distinguish unknown vs not_found + """ + hostname = await self.get_ip(name=name) - async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: - # logger.warning("CloudProvider.run_vm is not implemented") - return {"name": name, "status": "unavailable", "message": "CloudProvider.run_vm is not implemented"} + # Try HTTPS probe to the computer-server status endpoint (8443) + try: + timeout = aiohttp.ClientTimeout(total=3) + async with aiohttp.ClientSession(timeout=timeout) as session: + url = f"https://{hostname}:8443/status" + async with session.get(url, allow_redirects=False) as resp: + status_code = resp.status + vm_status: str + vm_os_type: Optional[str] = None + if status_code == 200: + try: + data = await resp.json(content_type=None) + vm_status = str(data.get("status", "ok")) + vm_os_type = str(data.get("os_type")) + except Exception: + vm_status = "unknown" + elif status_code < 500: + vm_status = "unknown" + else: + vm_status = "unknown" + return { + "name": name, + "status": "running" if vm_status == "ok" else vm_status, + "api_url": f"https://{hostname}:8443", + "os_type": vm_os_type, + } + except Exception: + return {"name": name, "status": "not_found", "api_url": f"https://{hostname}:8443"} + + async def list_vms(self) -> ListVMsResponse: + url = f"{self.api_base}/v1/vms" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as resp: + if resp.status == 200: + try: + data = await resp.json(content_type=None) + except Exception: + text = await resp.text() + logger.error(f"Failed to parse list_vms JSON: {text}") + return [] + if isinstance(data, list): + # Enrich with convenience URLs when possible. + enriched: List[Dict[str, Any]] = [] + for item in data: + vm = dict(item) if isinstance(item, dict) else {} + name = vm.get("name") + password = vm.get("password") + if isinstance(name, str) and name: + host = f"{name}.containers.cloud.trycua.com" + # api_url: always set if missing + if not vm.get("api_url"): + vm["api_url"] = f"https://{host}:8443" + # vnc_url: only when password available + if not vm.get("vnc_url") and isinstance(password, str) and password: + vm["vnc_url"] = ( + f"https://{host}/vnc.html?autoconnect=true&password={password}" + ) + enriched.append(vm) + return enriched # type: ignore[return-value] + logger.warning("Unexpected response for list_vms; expected list") + return [] + elif resp.status == 401: + logger.error("Unauthorized: invalid CUA API key for list_vms") + return [] + else: + text = await resp.text() + logger.error(f"list_vms failed: HTTP {resp.status} - {text}") + return [] + + async def run_vm( + self, + name: str, + image: Optional[str] = None, + run_opts: Optional[Dict[str, Any]] = None, + storage: Optional[str] = None, + ) -> Dict[str, Any]: + """Start a VM via public API. Returns a minimal status.""" + url = f"{self.api_base}/v1/vms/{name}/start" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers) as resp: + if resp.status in (200, 201, 202, 204): + return {"name": name, "status": "starting"} + elif resp.status == 404: + return {"name": name, "status": "not_found"} + elif resp.status == 401: + return {"name": name, "status": "unauthorized"} + else: + text = await resp.text() + return {"name": name, "status": "error", "message": text} async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: - logger.warning("CloudProvider.stop_vm is not implemented. To clean up resources, please use Computer.disconnect()") - return {"name": name, "status": "stopped", "message": "CloudProvider is not implemented"} + """Stop a VM via public API.""" + url = f"{self.api_base}/v1/vms/{name}/stop" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers) as resp: + if resp.status in (200, 202): + # Spec says 202 with {"status":"stopping"} + body_status: Optional[str] = None + try: + data = await resp.json(content_type=None) + body_status = data.get("status") if isinstance(data, dict) else None + except Exception: + body_status = None + return {"name": name, "status": body_status or "stopping"} + elif resp.status == 404: + return {"name": name, "status": "not_found"} + elif resp.status == 401: + return {"name": name, "status": "unauthorized"} + else: + text = await resp.text() + return {"name": name, "status": "error", "message": text} - async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: - logger.warning("CloudProvider.update_vm is not implemented") - return {"name": name, "status": "unchanged", "message": "CloudProvider is not implemented"} + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + """Restart a VM via public API.""" + url = f"{self.api_base}/v1/vms/{name}/restart" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers) as resp: + if resp.status in (200, 202): + # Spec says 202 with {"status":"restarting"} + body_status: Optional[str] = None + try: + data = await resp.json(content_type=None) + body_status = data.get("status") if isinstance(data, dict) else None + except Exception: + body_status = None + return {"name": name, "status": body_status or "restarting"} + elif resp.status == 404: + return {"name": name, "status": "not_found"} + elif resp.status == 401: + return {"name": name, "status": "unauthorized"} + else: + text = await resp.text() + return {"name": name, "status": "error", "message": text} - async def get_ip(self, name: Optional[str] = None, storage: Optional[str] = None, retry_delay: int = 2) -> str: + async def update_vm( + self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None + ) -> Dict[str, Any]: + logger.warning("CloudProvider.update_vm is not implemented via public API") + return { + "name": name, + "status": "unchanged", + "message": "update_vm not supported by public API", + } + + async def get_ip( + self, name: Optional[str] = None, storage: Optional[str] = None, retry_delay: int = 2 + ) -> str: """ Return the VM's IP address as '{container_name}.containers.cloud.trycua.com'. Uses the provided 'name' argument (the VM name requested by the caller), diff --git a/libs/python/computer/computer/providers/docker/__init__.py b/libs/python/computer/computer/providers/docker/__init__.py index 048f526c..64e42609 100644 --- a/libs/python/computer/computer/providers/docker/__init__.py +++ b/libs/python/computer/computer/providers/docker/__init__.py @@ -5,6 +5,7 @@ from .provider import DockerProvider # Check if Docker is available try: import subprocess + subprocess.run(["docker", "--version"], capture_output=True, check=True) HAS_DOCKER = True except (subprocess.SubprocessError, FileNotFoundError): diff --git a/libs/python/computer/computer/providers/docker/provider.py b/libs/python/computer/computer/providers/docker/provider.py index 82ad411c..e5f56dc5 100644 --- a/libs/python/computer/computer/providers/docker/provider.py +++ b/libs/python/computer/computer/providers/docker/provider.py @@ -6,13 +6,13 @@ Linux VMs with computer-server. It handles VM lifecycle operations through Docke commands and container management. """ -import logging -import json import asyncio -from typing import Dict, List, Optional, Any +import json +import logging +import re import subprocess import time -import re +from typing import Any, Dict, List, Optional from ..base import BaseVMProvider, VMProviderType @@ -30,13 +30,13 @@ except (subprocess.SubprocessError, FileNotFoundError): class DockerProvider(BaseVMProvider): """ Docker VM Provider implementation using Docker containers. - + This provider uses Docker to run containers with the CUA Ubuntu image that includes computer-server for remote computer use. """ - + def __init__( - self, + self, port: Optional[int] = 8000, host: str = "localhost", storage: Optional[str] = None, @@ -47,13 +47,16 @@ class DockerProvider(BaseVMProvider): vnc_port: Optional[int] = 6901, ): """Initialize the Docker VM Provider. - + Args: port: Currently unused (VM provider port) host: Hostname for the API server (default: localhost) storage: Path for persistent VM storage shared_path: Path for shared folder between host and container image: Docker image to use (default: "trycua/cua-ubuntu:latest") + Supported images: + - "trycua/cua-ubuntu:latest" (Kasm-based) + - "trycua/cua-xfce:latest" (vanilla XFCE) verbose: Enable verbose logging ephemeral: Use ephemeral (temporary) storage vnc_port: Port for VNC interface (default: 6901) @@ -62,27 +65,43 @@ class DockerProvider(BaseVMProvider): self.api_port = 8000 self.vnc_port = vnc_port self.ephemeral = ephemeral - + # Handle ephemeral storage (temporary directory) if ephemeral: self.storage = "ephemeral" else: self.storage = storage - + self.shared_path = shared_path self.image = image self.verbose = verbose self._container_id = None self._running_containers = {} # Track running containers by name - + + # Detect image type and configure user directory accordingly + self._detect_image_config() + + def _detect_image_config(self): + """Detect image type and configure paths accordingly.""" + # Detect if this is a docker-xfce image or Kasm image + if "docker-xfce" in self.image.lower() or "xfce" in self.image.lower(): + self._home_dir = "/home/cua" + self._image_type = "docker-xfce" + logger.info(f"Detected docker-xfce image: using {self._home_dir}") + else: + # Default to Kasm configuration + self._home_dir = "/home/kasm-user" + self._image_type = "kasm" + logger.info(f"Detected Kasm image: using {self._home_dir}") + @property def provider_type(self) -> VMProviderType: """Return the provider type.""" return VMProviderType.DOCKER - + def _parse_memory(self, memory_str: str) -> str: """Parse memory string to Docker format. - + Examples: "8GB" -> "8g" "1024MB" -> "1024m" @@ -90,31 +109,31 @@ class DockerProvider(BaseVMProvider): """ if isinstance(memory_str, int): return f"{memory_str}m" - + if isinstance(memory_str, str): # Extract number and unit match = re.match(r"(\d+)([A-Za-z]*)", memory_str) if match: value, unit = match.groups() unit = unit.upper() - + if unit == "GB" or unit == "G": return f"{value}g" elif unit == "MB" or unit == "M" or unit == "": return f"{value}m" - + # Default fallback logger.warning(f"Could not parse memory string '{memory_str}', using 4g default") return "4g" # Default to 4GB - + async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: """Get VM information by name. - + Args: name: Name of the VM to get information for storage: Optional storage path override. If provided, this will be used instead of the provider's default storage path. - + Returns: Dictionary with VM information including status, IP address, etc. """ @@ -122,7 +141,7 @@ class DockerProvider(BaseVMProvider): # Check if container exists and get its status cmd = ["docker", "inspect", name] result = subprocess.run(cmd, capture_output=True, text=True) - + if result.returncode != 0: # Container doesn't exist return { @@ -131,14 +150,14 @@ class DockerProvider(BaseVMProvider): "ip_address": None, "ports": {}, "image": self.image, - "provider": "docker" + "provider": "docker", } - + # Parse container info container_info = json.loads(result.stdout)[0] state = container_info["State"] network_settings = container_info["NetworkSettings"] - + # Determine status if state["Running"]: status = "running" @@ -146,7 +165,7 @@ class DockerProvider(BaseVMProvider): status = "paused" else: status = "stopped" - + # Get IP address ip_address = network_settings.get("IPAddress", "") if not ip_address and "Networks" in network_settings: @@ -155,7 +174,7 @@ class DockerProvider(BaseVMProvider): if network_info.get("IPAddress"): ip_address = network_info["IPAddress"] break - + # Get port mappings ports = {} if "Ports" in network_settings and network_settings["Ports"]: @@ -168,7 +187,7 @@ class DockerProvider(BaseVMProvider): if mapping.get("HostPort"): ports[container_port] = mapping["HostPort"] break # Use the first valid mapping - + return { "name": name, "status": status, @@ -180,47 +199,46 @@ class DockerProvider(BaseVMProvider): "created": container_info["Created"], "started": state.get("StartedAt", ""), } - + except Exception as e: logger.error(f"Error getting VM info for {name}: {e}") import traceback + traceback.print_exc() - return { - "name": name, - "status": "error", - "error": str(e), - "provider": "docker" - } - + return {"name": name, "status": "error", "error": str(e), "provider": "docker"} + async def list_vms(self) -> List[Dict[str, Any]]: """List all Docker containers managed by this provider.""" try: # List all containers (running and stopped) with the CUA image cmd = ["docker", "ps", "-a", "--filter", f"ancestor={self.image}", "--format", "json"] result = subprocess.run(cmd, capture_output=True, text=True, check=True) - + containers = [] if result.stdout.strip(): - for line in result.stdout.strip().split('\n'): + for line in result.stdout.strip().split("\n"): if line.strip(): container_data = json.loads(line) vm_info = await self.get_vm(container_data["Names"]) containers.append(vm_info) - + return containers - + except subprocess.CalledProcessError as e: logger.error(f"Error listing containers: {e.stderr}") return [] except Exception as e: logger.error(f"Error listing VMs: {e}") import traceback + traceback.print_exc() return [] - - async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: + + async def run_vm( + self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None + ) -> Dict[str, Any]: """Run a VM with the given options. - + Args: image: Name/tag of the Docker image to use name: Name of the container to run @@ -229,7 +247,7 @@ class DockerProvider(BaseVMProvider): - cpu: CPU limit (e.g., 2 for 2 cores) - vnc_port: Specific port for VNC interface - api_port: Specific port for computer-server API - + Returns: Dictionary with VM status information """ @@ -240,109 +258,120 @@ class DockerProvider(BaseVMProvider): logger.info(f"Container {name} is already running") return existing_vm elif existing_vm["status"] in ["stopped", "paused"]: - # Start existing container - logger.info(f"Starting existing container {name}") - start_cmd = ["docker", "start", name] - result = subprocess.run(start_cmd, capture_output=True, text=True, check=True) - - # Wait for container to be ready - await self._wait_for_container_ready(name) - return await self.get_vm(name, storage) - + if self.ephemeral: + # Delete existing container + logger.info(f"Deleting existing container {name}") + delete_cmd = ["docker", "rm", name] + result = subprocess.run(delete_cmd, capture_output=True, text=True, check=True) + else: + # Start existing container + logger.info(f"Starting existing container {name}") + start_cmd = ["docker", "start", name] + result = subprocess.run(start_cmd, capture_output=True, text=True, check=True) + + # Wait for container to be ready + await self._wait_for_container_ready(name) + return await self.get_vm(name, storage) + # Use provided image or default docker_image = image if image != "default" else self.image - + # Build docker run command cmd = ["docker", "run", "-d", "--name", name] - + # Add memory limit if specified if "memory" in run_opts: memory_limit = self._parse_memory(run_opts["memory"]) cmd.extend(["--memory", memory_limit]) - + # Add CPU limit if specified if "cpu" in run_opts: cpu_count = str(run_opts["cpu"]) cmd.extend(["--cpus", cpu_count]) - + # Add port mappings vnc_port = run_opts.get("vnc_port", self.vnc_port) api_port = run_opts.get("api_port", self.api_port) - + if vnc_port: cmd.extend(["-p", f"{vnc_port}:6901"]) # VNC port if api_port: cmd.extend(["-p", f"{api_port}:8000"]) # computer-server API port - + # Add volume mounts if storage is specified storage_path = storage or self.storage if storage_path and storage_path != "ephemeral": - # Mount storage directory - cmd.extend(["-v", f"{storage_path}:/home/kasm-user/storage"]) - + # Mount storage directory using detected home directory + cmd.extend(["-v", f"{storage_path}:{self._home_dir}/storage"]) + # Add shared path if specified if self.shared_path: - cmd.extend(["-v", f"{self.shared_path}:/home/kasm-user/shared"]) - + # Mount shared directory using detected home directory + cmd.extend(["-v", f"{self.shared_path}:{self._home_dir}/shared"]) + # Add environment variables cmd.extend(["-e", "VNC_PW=password"]) # Set VNC password cmd.extend(["-e", "VNCOPTIONS=-disableBasicAuth"]) # Disable VNC basic auth - + + # Apply display resolution if provided (e.g., "1024x768") + display_resolution = run_opts.get("display") + if ( + isinstance(display_resolution, dict) + and "width" in display_resolution + and "height" in display_resolution + ): + cmd.extend( + [ + "-e", + f"VNC_RESOLUTION={display_resolution['width']}x{display_resolution['height']}", + ] + ) + # Add the image cmd.append(docker_image) - + logger.info(f"Running Docker container with command: {' '.join(cmd)}") - + # Run the container result = subprocess.run(cmd, capture_output=True, text=True, check=True) container_id = result.stdout.strip() - + logger.info(f"Container {name} started with ID: {container_id[:12]}") - + # Store container info self._container_id = container_id self._running_containers[name] = container_id - + # Wait for container to be ready await self._wait_for_container_ready(name) - + # Return VM info vm_info = await self.get_vm(name, storage) vm_info["container_id"] = container_id[:12] - + return vm_info - + except subprocess.CalledProcessError as e: error_msg = f"Failed to run container {name}: {e.stderr}" logger.error(error_msg) - return { - "name": name, - "status": "error", - "error": error_msg, - "provider": "docker" - } + return {"name": name, "status": "error", "error": error_msg, "provider": "docker"} except Exception as e: error_msg = f"Error running VM {name}: {e}" logger.error(error_msg) - return { - "name": name, - "status": "error", - "error": error_msg, - "provider": "docker" - } - + return {"name": name, "status": "error", "error": error_msg, "provider": "docker"} + async def _wait_for_container_ready(self, container_name: str, timeout: int = 60) -> bool: """Wait for the Docker container to be fully ready. - + Args: container_name: Name of the Docker container to check timeout: Maximum time to wait in seconds (default: 60 seconds) - + Returns: True if the container is running and ready """ logger.info(f"Waiting for container {container_name} to be ready...") - + start_time = time.time() while time.time() - start_time < timeout: try: @@ -350,97 +379,99 @@ class DockerProvider(BaseVMProvider): vm_info = await self.get_vm(container_name) if vm_info["status"] == "running": logger.info(f"Container {container_name} is running") - + # Additional check: try to connect to computer-server API # This is optional - we'll just wait a bit more for services to start await asyncio.sleep(5) return True - + except Exception as e: logger.debug(f"Container {container_name} not ready yet: {e}") - + await asyncio.sleep(2) - + logger.warning(f"Container {container_name} did not become ready within {timeout} seconds") return False - + async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: """Stop a running VM by stopping the Docker container.""" try: logger.info(f"Stopping container {name}") - + # Stop the container cmd = ["docker", "stop", name] result = subprocess.run(cmd, capture_output=True, text=True, check=True) - + # Remove from running containers tracking if name in self._running_containers: del self._running_containers[name] - + logger.info(f"Container {name} stopped successfully") - + + # Delete container if ephemeral=True + if self.ephemeral: + cmd = ["docker", "rm", name] + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + return { "name": name, "status": "stopped", "message": "Container stopped successfully", - "provider": "docker" + "provider": "docker", } - + except subprocess.CalledProcessError as e: error_msg = f"Failed to stop container {name}: {e.stderr}" logger.error(error_msg) - return { - "name": name, - "status": "error", - "error": error_msg, - "provider": "docker" - } + return {"name": name, "status": "error", "error": error_msg, "provider": "docker"} except Exception as e: error_msg = f"Error stopping VM {name}: {e}" logger.error(error_msg) - return { - "name": name, - "status": "error", - "error": error_msg, - "provider": "docker" - } - - async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: + return {"name": name, "status": "error", "error": error_msg, "provider": "docker"} + + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("DockerProvider does not support restarting VMs.") + + async def update_vm( + self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None + ) -> Dict[str, Any]: """Update VM configuration. - - Note: Docker containers cannot be updated while running. + + Note: Docker containers cannot be updated while running. This method will return an error suggesting to recreate the container. """ return { "name": name, "status": "error", "error": "Docker containers cannot be updated while running. Please stop and recreate the container with new options.", - "provider": "docker" + "provider": "docker", } - + async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str: """Get the IP address of a VM, waiting indefinitely until it's available. - + Args: name: Name of the VM to get the IP for storage: Optional storage path override retry_delay: Delay between retries in seconds (default: 2) - + Returns: IP address of the VM when it becomes available """ logger.info(f"Getting IP address for container {name}") - + total_attempts = 0 while True: total_attempts += 1 - + try: vm_info = await self.get_vm(name, storage) - + if vm_info["status"] == "error": - raise Exception(f"VM is in error state: {vm_info.get('error', 'Unknown error')}") - + raise Exception( + f"VM is in error state: {vm_info.get('error', 'Unknown error')}" + ) + # TODO: for now, return localhost # it seems the docker container is not accessible from the host # on WSL2, unless you port forward? not sure @@ -453,40 +484,42 @@ class DockerProvider(BaseVMProvider): if ip and ip != "unknown" and not ip.startswith("0.0.0.0"): logger.info(f"Got valid container IP address: {ip}") return ip - + # For Docker containers, we can also use localhost if ports are mapped if vm_info["status"] == "running" and vm_info.get("ports"): - logger.info(f"Container is running with port mappings, using localhost") + logger.info("Container is running with port mappings, using localhost") return "127.0.0.1" - + # Check the container status status = vm_info.get("status", "unknown") - + if status == "stopped": logger.info(f"Container status is {status}, but still waiting for it to start") elif status != "running": logger.info(f"Container is not running yet (status: {status}). Waiting...") else: logger.info("Container is running but no valid IP address yet. Waiting...") - + except Exception as e: logger.warning(f"Error getting container {name} IP: {e}, continuing to wait...") - + # Wait before next retry await asyncio.sleep(retry_delay) - + # Add progress log every 10 attempts if total_attempts % 10 == 0: - logger.info(f"Still waiting for container {name} IP after {total_attempts} attempts...") - + logger.info( + f"Still waiting for container {name} IP after {total_attempts} attempts..." + ) + async def __aenter__(self): """Async context manager entry.""" logger.debug("Entering DockerProvider context") return self - + async def __aexit__(self, exc_type, exc_val, exc_tb): """Async context manager exit. - + This method handles cleanup of running containers if needed. """ logger.debug(f"Exiting DockerProvider context, handling exceptions: {exc_type}") diff --git a/libs/python/computer/computer/providers/factory.py b/libs/python/computer/computer/providers/factory.py index b95c2c61..634b4293 100644 --- a/libs/python/computer/computer/providers/factory.py +++ b/libs/python/computer/computer/providers/factory.py @@ -1,7 +1,7 @@ """Factory for creating VM providers.""" import logging -from typing import Dict, Optional, Any, Type, Union +from typing import Any, Dict, Optional, Type, Union from .base import BaseVMProvider, VMProviderType @@ -26,7 +26,7 @@ class VMProviderFactory: **kwargs, ) -> BaseVMProvider: """Create a VM provider of the specified type. - + Args: provider_type: Type of VM provider to create port: Port for the API server @@ -38,10 +38,10 @@ class VMProviderFactory: verbose: Enable verbose logging ephemeral: Use ephemeral (temporary) storage noVNC_port: Specific port for noVNC interface (for Lumier provider) - + Returns: An instance of the requested VM provider - + Raises: ImportError: If the required dependencies for the provider are not installed ValueError: If the provider type is not supported @@ -52,21 +52,18 @@ class VMProviderFactory: provider_type = VMProviderType(provider_type.lower()) except ValueError: provider_type = VMProviderType.UNKNOWN - + if provider_type == VMProviderType.LUME: try: - from .lume import LumeProvider, HAS_LUME + from .lume import HAS_LUME, LumeProvider + if not HAS_LUME: raise ImportError( "The pylume package is required for LumeProvider. " "Please install it with 'pip install cua-computer[lume]'" ) return LumeProvider( - port=port, - host=host, - storage=storage, - verbose=verbose, - ephemeral=ephemeral + port=port, host=host, storage=storage, verbose=verbose, ephemeral=ephemeral ) except ImportError as e: logger.error(f"Failed to import LumeProvider: {e}") @@ -76,7 +73,8 @@ class VMProviderFactory: ) from e elif provider_type == VMProviderType.LUMIER: try: - from .lumier import LumierProvider, HAS_LUMIER + from .lumier import HAS_LUMIER, LumierProvider + if not HAS_LUMIER: raise ImportError( "Docker is required for LumierProvider. " @@ -90,7 +88,7 @@ class VMProviderFactory: image=image or "macos-sequoia-cua:latest", verbose=verbose, ephemeral=ephemeral, - noVNC_port=noVNC_port + noVNC_port=noVNC_port, ) except ImportError as e: logger.error(f"Failed to import LumierProvider: {e}") @@ -102,6 +100,7 @@ class VMProviderFactory: elif provider_type == VMProviderType.CLOUD: try: from .cloud import CloudProvider + return CloudProvider( verbose=verbose, **kwargs, @@ -114,7 +113,8 @@ class VMProviderFactory: ) from e elif provider_type == VMProviderType.WINSANDBOX: try: - from .winsandbox import WinSandboxProvider, HAS_WINSANDBOX + from .winsandbox import HAS_WINSANDBOX, WinSandboxProvider + if not HAS_WINSANDBOX: raise ImportError( "pywinsandbox is required for WinSandboxProvider. " @@ -126,7 +126,7 @@ class VMProviderFactory: storage=storage, verbose=verbose, ephemeral=ephemeral, - **kwargs + **kwargs, ) except ImportError as e: logger.error(f"Failed to import WinSandboxProvider: {e}") @@ -136,7 +136,8 @@ class VMProviderFactory: ) from e elif provider_type == VMProviderType.DOCKER: try: - from .docker import DockerProvider, HAS_DOCKER + from .docker import HAS_DOCKER, DockerProvider + if not HAS_DOCKER: raise ImportError( "Docker is required for DockerProvider. " @@ -150,7 +151,7 @@ class VMProviderFactory: image=image or "trycua/cua-ubuntu:latest", verbose=verbose, ephemeral=ephemeral, - vnc_port=noVNC_port + vnc_port=noVNC_port, ) except ImportError as e: logger.error(f"Failed to import DockerProvider: {e}") diff --git a/libs/python/computer/computer/providers/lume/__init__.py b/libs/python/computer/computer/providers/lume/__init__.py index 8196c49b..70a48a47 100644 --- a/libs/python/computer/computer/providers/lume/__init__.py +++ b/libs/python/computer/computer/providers/lume/__init__.py @@ -2,6 +2,7 @@ try: from .provider import LumeProvider + HAS_LUME = True __all__ = ["LumeProvider"] except ImportError: diff --git a/libs/python/computer/computer/providers/lume/provider.py b/libs/python/computer/computer/providers/lume/provider.py index 5816e53e..e9b953c9 100644 --- a/libs/python/computer/computer/providers/lume/provider.py +++ b/libs/python/computer/computer/providers/lume/provider.py @@ -4,39 +4,40 @@ This provider uses direct curl commands to interact with the Lume API, removing the dependency on the pylume Python package. """ -import os -import re import asyncio import json import logging +import os +import re import subprocess import urllib.parse -from typing import Dict, Any, Optional, List, Tuple +from typing import Any, Dict, List, Optional, Tuple -from ..base import BaseVMProvider, VMProviderType from ...logger import Logger, LogLevel +from ..base import BaseVMProvider, VMProviderType from ..lume_api import ( + HAS_CURL, lume_api_get, + lume_api_pull, lume_api_run, lume_api_stop, lume_api_update, - lume_api_pull, - HAS_CURL, - parse_memory + parse_memory, ) # Setup logging logger = logging.getLogger(__name__) + class LumeProvider(BaseVMProvider): """Lume VM provider implementation using direct curl commands. - + This provider uses curl to interact with the Lume API server, removing the dependency on the pylume Python package. """ - + def __init__( - self, + self, port: int = 7777, host: str = "localhost", storage: Optional[str] = None, @@ -44,7 +45,7 @@ class LumeProvider(BaseVMProvider): ephemeral: bool = False, ): """Initialize the Lume provider. - + Args: port: Port for the Lume API server (default: 7777) host: Host to use for API connections (default: localhost) @@ -56,42 +57,44 @@ class LumeProvider(BaseVMProvider): "curl is required for LumeProvider. " "Please ensure it is installed and in your PATH." ) - + self.host = host self.port = port # Default port for Lume API self.storage = storage self.verbose = verbose self.ephemeral = ephemeral # If True, VMs will be deleted after stopping - + # Base API URL for Lume API calls self.api_base_url = f"http://{self.host}:{self.port}" - + self.logger = logging.getLogger(__name__) - + @property def provider_type(self) -> VMProviderType: """Get the provider type.""" return VMProviderType.LUME - + async def __aenter__(self): """Enter async context manager.""" # No initialization needed, just return self return self - + async def __aexit__(self, exc_type, exc_val, exc_tb): """Exit async context manager.""" # No cleanup needed pass - - def _lume_api_get(self, vm_name: str = "", storage: Optional[str] = None, debug: bool = False) -> Dict[str, Any]: + + def _lume_api_get( + self, vm_name: str = "", storage: Optional[str] = None, debug: bool = False + ) -> Dict[str, Any]: """Get VM information using shared lume_api function. - + Args: vm_name: Optional name of the VM to get info for. If empty, lists all VMs. storage: Optional storage path override. If provided, this will be used instead of self.storage debug: Whether to show debug output - + Returns: Dictionary with VM status information parsed from JSON response """ @@ -102,105 +105,105 @@ class LumeProvider(BaseVMProvider): port=self.port, storage=storage if storage is not None else self.storage, debug=debug, - verbose=self.verbose + verbose=self.verbose, ) - - def _lume_api_run(self, vm_name: str, run_opts: Dict[str, Any], debug: bool = False) -> Dict[str, Any]: + + def _lume_api_run( + self, vm_name: str, run_opts: Dict[str, Any], debug: bool = False + ) -> Dict[str, Any]: """Run a VM using shared lume_api function. - + Args: vm_name: Name of the VM to run run_opts: Dictionary of run options debug: Whether to show debug output - + Returns: Dictionary with API response or error information """ # Use the shared implementation from lume_api module return lume_api_run( - vm_name=vm_name, + vm_name=vm_name, host=self.host, port=self.port, run_opts=run_opts, storage=self.storage, debug=debug, - verbose=self.verbose + verbose=self.verbose, ) - + def _lume_api_stop(self, vm_name: str, debug: bool = False) -> Dict[str, Any]: """Stop a VM using shared lume_api function. - + Args: vm_name: Name of the VM to stop debug: Whether to show debug output - + Returns: Dictionary with API response or error information """ # Use the shared implementation from lume_api module return lume_api_stop( - vm_name=vm_name, + vm_name=vm_name, host=self.host, port=self.port, storage=self.storage, debug=debug, - verbose=self.verbose + verbose=self.verbose, ) - - def _lume_api_update(self, vm_name: str, update_opts: Dict[str, Any], debug: bool = False) -> Dict[str, Any]: + + def _lume_api_update( + self, vm_name: str, update_opts: Dict[str, Any], debug: bool = False + ) -> Dict[str, Any]: """Update VM configuration using shared lume_api function. - + Args: vm_name: Name of the VM to update update_opts: Dictionary of update options debug: Whether to show debug output - + Returns: Dictionary with API response or error information """ # Use the shared implementation from lume_api module return lume_api_update( - vm_name=vm_name, + vm_name=vm_name, host=self.host, port=self.port, update_opts=update_opts, storage=self.storage, debug=debug, - verbose=self.verbose + verbose=self.verbose, ) - + async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: """Get VM information by name. - + Args: name: Name of the VM to get information for storage: Optional storage path override. If provided, this will be used instead of the provider's default storage path. - + Returns: Dictionary with VM information including status, IP address, etc. - + Note: If storage is not provided, the provider's default storage path will be used. The storage parameter allows overriding the storage location for this specific call. """ if not HAS_CURL: logger.error("curl is not available. Cannot get VM status.") - return { - "name": name, - "status": "unavailable", - "error": "curl is not available" - } - + return {"name": name, "status": "unavailable", "error": "curl is not available"} + # First try to get detailed VM info from the API try: # Query the Lume API for VM status using the provider's storage_path vm_info = self._lume_api_get( - vm_name=name, + vm_name=name, storage=storage if storage is not None else self.storage, - debug=self.verbose + debug=self.verbose, ) - + # Check for API errors if "error" in vm_info: logger.debug(f"API request error: {vm_info['error']}") @@ -209,12 +212,12 @@ class LumeProvider(BaseVMProvider): "name": name, "status": "starting", # VM is still starting - do not attempt to connect yet "api_status": "error", - "error": vm_info["error"] + "error": vm_info["error"], } - + # Process the VM status information vm_status = vm_info.get("status", "unknown") - + # Check if VM is stopped or not running - don't wait for IP in this case if vm_status == "stopped": logger.info(f"VM {name} is in '{vm_status}' state - not waiting for IP address") @@ -222,10 +225,10 @@ class LumeProvider(BaseVMProvider): result = { "name": name, "status": vm_status, - **vm_info # Include all original fields from the API response + **vm_info, # Include all original fields from the API response } return result - + # Handle field name differences between APIs # Some APIs use camelCase, others use snake_case if "vncUrl" in vm_info: @@ -234,7 +237,7 @@ class LumeProvider(BaseVMProvider): vnc_url = vm_info["vnc_url"] else: vnc_url = "" - + if "ipAddress" in vm_info: ip_address = vm_info["ipAddress"] elif "ip_address" in vm_info: @@ -243,40 +246,42 @@ class LumeProvider(BaseVMProvider): # If no IP address is provided and VM is supposed to be running, # report it as still starting ip_address = None - logger.info(f"VM {name} is in '{vm_status}' state but no IP address found - reporting as still starting") - + logger.info( + f"VM {name} is in '{vm_status}' state but no IP address found - reporting as still starting" + ) + logger.info(f"VM {name} status: {vm_status}") - + # Return the complete status information result = { "name": name, "status": vm_status if vm_status else "running", "ip_address": ip_address, "vnc_url": vnc_url, - "api_status": "ok" + "api_status": "ok", } - + # Include all original fields from the API response if isinstance(vm_info, dict): for key, value in vm_info.items(): if key not in result: # Don't override our carefully processed fields result[key] = value - + return result - + except Exception as e: logger.error(f"Failed to get VM status: {e}") # Return a fallback status that indicates the VM is not ready yet return { "name": name, "status": "initializing", # VM is still initializing - "error": f"Failed to get VM status: {str(e)}" + "error": f"Failed to get VM status: {str(e)}", } - + async def list_vms(self) -> List[Dict[str, Any]]: """List all available VMs.""" result = self._lume_api_get(debug=self.verbose) - + # Extract the VMs list from the response if "vms" in result and isinstance(result["vms"], list): return result["vms"] @@ -285,48 +290,49 @@ class LumeProvider(BaseVMProvider): return [] else: return [] - - async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: + + async def run_vm( + self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None + ) -> Dict[str, Any]: """Run a VM with the given options. - + If the VM does not exist in the storage location, this will attempt to pull it from the Lume registry first. - + Args: image: Image name to use when pulling the VM if it doesn't exist name: Name of the VM to run run_opts: Dictionary of run options (memory, cpu, etc.) storage: Optional storage path override. If provided, this will be used instead of the provider's default storage path. - + Returns: Dictionary with VM run status and information """ # First check if VM exists by trying to get its info vm_info = await self.get_vm(name, storage=storage) - + if "error" in vm_info: # VM doesn't exist, try to pull it - self.logger.info(f"VM {name} not found, attempting to pull image {image} from registry...") - - # Call pull_vm with the image parameter - pull_result = await self.pull_vm( - name=name, - image=image, - storage=storage + self.logger.info( + f"VM {name} not found, attempting to pull image {image} from registry..." ) - + + # Call pull_vm with the image parameter + pull_result = await self.pull_vm(name=name, image=image, storage=storage) + # Check if pull was successful if "error" in pull_result: self.logger.error(f"Failed to pull VM image: {pull_result['error']}") return pull_result # Return the error from pull - + self.logger.info(f"Successfully pulled VM image {image} as {name}") - + # Now run the VM with the given options self.logger.info(f"Running VM {name} with options: {run_opts}") - + from ..lume_api import lume_api_run + return lume_api_run( vm_name=name, host=self.host, @@ -334,52 +340,48 @@ class LumeProvider(BaseVMProvider): run_opts=run_opts, storage=storage if storage is not None else self.storage, debug=self.verbose, - verbose=self.verbose + verbose=self.verbose, ) - + async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: """Stop a running VM. - + If this provider was initialized with ephemeral=True, the VM will also be deleted after it is stopped. - + Args: name: Name of the VM to stop storage: Optional storage path override - + Returns: Dictionary with stop status and information """ # Stop the VM first stop_result = self._lume_api_stop(name, debug=self.verbose) - + # Log ephemeral status for debugging self.logger.info(f"Ephemeral mode status: {self.ephemeral}") - + # If ephemeral mode is enabled, delete the VM after stopping if self.ephemeral and (stop_result.get("success", False) or "error" not in stop_result): self.logger.info(f"Ephemeral mode enabled - deleting VM {name} after stopping") try: delete_result = await self.delete_vm(name, storage=storage) - + # Return combined result return { **stop_result, # Include all stop result info "deleted": True, - "delete_result": delete_result + "delete_result": delete_result, } except Exception as e: self.logger.error(f"Failed to delete ephemeral VM {name}: {e}") # Include the error but still return stop result - return { - **stop_result, - "deleted": False, - "delete_error": str(e) - } - + return {**stop_result, "deleted": False, "delete_error": str(e)} + # Just return the stop result if not ephemeral return stop_result - + async def pull_vm( self, name: str, @@ -390,7 +392,7 @@ class LumeProvider(BaseVMProvider): pull_opts: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: """Pull a VM image from the registry. - + Args: name: Name for the VM after pulling image: The image name to pull (e.g. 'macos-sequoia-cua:latest') @@ -398,31 +400,31 @@ class LumeProvider(BaseVMProvider): registry: Registry to pull from (default: ghcr.io) organization: Organization in registry (default: trycua) pull_opts: Additional options for pulling the VM (optional) - + Returns: Dictionary with information about the pulled VM - + Raises: RuntimeError: If pull operation fails or image is not provided """ # Validate image parameter if not image: raise ValueError("Image parameter is required for pull_vm") - + self.logger.info(f"Pulling VM image '{image}' as '{name}'") self.logger.info("You can check the pull progress using: lume logs -f") - + # Set default pull_opts if not provided if pull_opts is None: pull_opts = {} - + # Log information about the operation self.logger.debug(f"Pull storage location: {storage or 'default'}") - + try: # Call the lume_api_pull function from lume_api.py from ..lume_api import lume_api_pull - + result = lume_api_pull( image=image, name=name, @@ -432,110 +434,115 @@ class LumeProvider(BaseVMProvider): registry=registry, organization=organization, debug=self.verbose, - verbose=self.verbose + verbose=self.verbose, ) - + # Check for errors in the result if "error" in result: self.logger.error(f"Failed to pull VM image: {result['error']}") return result - + self.logger.info(f"Successfully pulled VM image '{image}' as '{name}'") return result except Exception as e: self.logger.error(f"Failed to pull VM image '{image}': {e}") return {"error": f"Failed to pull VM: {str(e)}"} - + async def delete_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: """Delete a VM permanently. - + Args: name: Name of the VM to delete storage: Optional storage path override - + Returns: Dictionary with delete status and information """ self.logger.info(f"Deleting VM {name}...") - + try: # Call the lume_api_delete function we created from ..lume_api import lume_api_delete - + result = lume_api_delete( vm_name=name, host=self.host, port=self.port, storage=storage if storage is not None else self.storage, debug=self.verbose, - verbose=self.verbose + verbose=self.verbose, ) - + # Check for errors in the result if "error" in result: self.logger.error(f"Failed to delete VM: {result['error']}") return result - + self.logger.info(f"Successfully deleted VM '{name}'") return result except Exception as e: self.logger.error(f"Failed to delete VM '{name}': {e}") return {"error": f"Failed to delete VM: {str(e)}"} - - async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: + + async def update_vm( + self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None + ) -> Dict[str, Any]: """Update VM configuration.""" return self._lume_api_update(name, update_opts, debug=self.verbose) - + + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("LumeProvider does not support restarting VMs.") + async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str: """Get the IP address of a VM, waiting indefinitely until it's available. - + Args: name: Name of the VM to get the IP for storage: Optional storage path override retry_delay: Delay between retries in seconds (default: 2) - + Returns: IP address of the VM when it becomes available """ # Track total attempts for logging purposes total_attempts = 0 - + # Loop indefinitely until we get a valid IP while True: total_attempts += 1 - + # Log retry message but not on first attempt if total_attempts > 1: self.logger.info(f"Waiting for VM {name} IP address (attempt {total_attempts})...") - + try: # Get VM information vm_info = await self.get_vm(name, storage=storage) - + # Check if we got a valid IP ip = vm_info.get("ip_address", None) if ip and ip != "unknown" and not ip.startswith("0.0.0.0"): self.logger.info(f"Got valid VM IP address: {ip}") return ip - + # Check the VM status status = vm_info.get("status", "unknown") - + # If VM is not running yet, log and wait if status != "running": self.logger.info(f"VM is not running yet (status: {status}). Waiting...") # If VM is running but no IP yet, wait and retry else: self.logger.info("VM is running but no valid IP address yet. Waiting...") - + except Exception as e: self.logger.warning(f"Error getting VM {name} IP: {e}, continuing to wait...") - + # Wait before next retry await asyncio.sleep(retry_delay) - + # Add progress log every 10 attempts if total_attempts % 10 == 0: - self.logger.info(f"Still waiting for VM {name} IP after {total_attempts} attempts...") - - + self.logger.info( + f"Still waiting for VM {name} IP after {total_attempts} attempts..." + ) diff --git a/libs/python/computer/computer/providers/lume_api.py b/libs/python/computer/computer/providers/lume_api.py index 3cbe1097..d034e1df 100644 --- a/libs/python/computer/computer/providers/lume_api.py +++ b/libs/python/computer/computer/providers/lume_api.py @@ -4,11 +4,11 @@ This module contains shared functions for interacting with the Lume API, used by both the LumeProvider and LumierProvider classes. """ -import logging import json +import logging import subprocess import urllib.parse -from typing import Dict, List, Optional, Any +from typing import Any, Dict, List, Optional # Setup logging logger = logging.getLogger(__name__) @@ -27,10 +27,10 @@ def lume_api_get( port: int, storage: Optional[str] = None, debug: bool = False, - verbose: bool = False + verbose: bool = False, ) -> Dict[str, Any]: """Use curl to get VM information from Lume API. - + Args: vm_name: Name of the VM to get info for host: API host @@ -38,46 +38,46 @@ def lume_api_get( storage: Storage path for the VM debug: Whether to show debug output verbose: Enable verbose logging - + Returns: Dictionary with VM status information parsed from JSON response """ # URL encode the storage parameter for the query encoded_storage = "" storage_param = "" - + if storage: # First encode the storage path properly - encoded_storage = urllib.parse.quote(storage, safe='') + encoded_storage = urllib.parse.quote(storage, safe="") storage_param = f"?storage={encoded_storage}" - + # Construct API URL with encoded storage parameter if needed api_url = f"http://{host}:{port}/lume/vms/{vm_name}{storage_param}" - + # Construct the curl command with increased timeouts for more reliability # --connect-timeout: Time to establish connection (15 seconds) # --max-time: Maximum time for the whole operation (20 seconds) # -f: Fail silently (no output at all) on server errors # Add single quotes around URL to ensure special characters are handled correctly cmd = ["curl", "--connect-timeout", "15", "--max-time", "20", "-s", "-f", f"'{api_url}'"] - + # For logging and display, show the properly escaped URL display_cmd = ["curl", "--connect-timeout", "15", "--max-time", "20", "-s", "-f", api_url] - + # Only print the curl command when debug is enabled - display_curl_string = ' '.join(display_cmd) + display_curl_string = " ".join(display_cmd) logger.debug(f"Executing API request: {display_curl_string}") - + # Execute the command - for execution we need to use shell=True to handle URLs with special characters try: # Use a single string with shell=True for proper URL handling - shell_cmd = ' '.join(cmd) + shell_cmd = " ".join(cmd) result = subprocess.run(shell_cmd, shell=True, capture_output=True, text=True) - + # Handle curl exit codes if result.returncode != 0: curl_error = "Unknown error" - + # Map common curl error codes to helpful messages if result.returncode == 7: curl_error = "Failed to connect to the API server - it might still be starting up" @@ -86,35 +86,42 @@ def lume_api_get( elif result.returncode == 28: curl_error = "Operation timeout - the API server is taking too long to respond" elif result.returncode == 52: - curl_error = "Empty reply from server - the API server is starting but not fully ready yet" + curl_error = ( + "Empty reply from server - the API server is starting but not fully ready yet" + ) elif result.returncode == 56: curl_error = "Network problem during data transfer - check container networking" - + # Only log at debug level to reduce noise during retries logger.debug(f"API request failed with code {result.returncode}: {curl_error}") - + # Return a more useful error message return { "error": f"API request failed: {curl_error}", "curl_code": result.returncode, "vm_name": vm_name, - "status": "unknown" # We don't know the actual status due to API error + "status": "unknown", # We don't know the actual status due to API error } - + # Try to parse the response as JSON if result.stdout and result.stdout.strip(): try: vm_status = json.loads(result.stdout) if debug or verbose: - logger.info(f"Successfully parsed VM status: {vm_status.get('status', 'unknown')}") + logger.info( + f"Successfully parsed VM status: {vm_status.get('status', 'unknown')}" + ) return vm_status except json.JSONDecodeError as e: # Return the raw response if it's not valid JSON logger.warning(f"Invalid JSON response: {e}") if "Virtual machine not found" in result.stdout: return {"status": "not_found", "message": "VM not found in Lume API"} - - return {"error": f"Invalid JSON response: {result.stdout[:100]}...", "status": "unknown"} + + return { + "error": f"Invalid JSON response: {result.stdout[:100]}...", + "status": "unknown", + } else: return {"error": "Empty response from API", "status": "unknown"} except subprocess.SubprocessError as e: @@ -129,10 +136,10 @@ def lume_api_run( run_opts: Dict[str, Any], storage: Optional[str] = None, debug: bool = False, - verbose: bool = False + verbose: bool = False, ) -> Dict[str, Any]: """Run a VM using curl. - + Args: vm_name: Name of the VM to run host: API host @@ -141,53 +148,62 @@ def lume_api_run( storage: Storage path for the VM debug: Whether to show debug output verbose: Enable verbose logging - + Returns: Dictionary with API response or error information """ # Construct API URL api_url = f"http://{host}:{port}/lume/vms/{vm_name}/run" - + # Prepare JSON payload with required parameters payload = {} - + # Add CPU cores if specified if "cpu" in run_opts: payload["cpu"] = run_opts["cpu"] - + # Add memory if specified if "memory" in run_opts: payload["memory"] = run_opts["memory"] - + # Add storage parameter if specified if storage: payload["storage"] = storage elif "storage" in run_opts: payload["storage"] = run_opts["storage"] - + # Add shared directories if specified if "shared_directories" in run_opts and run_opts["shared_directories"]: payload["sharedDirectories"] = run_opts["shared_directories"] - + # Log the payload for debugging logger.debug(f"API payload: {json.dumps(payload, indent=2)}") - + # Construct the curl command cmd = [ - "curl", "--connect-timeout", "30", "--max-time", "30", - "-s", "-X", "POST", "-H", "Content-Type: application/json", - "-d", json.dumps(payload), - api_url + "curl", + "--connect-timeout", + "30", + "--max-time", + "30", + "-s", + "-X", + "POST", + "-H", + "Content-Type: application/json", + "-d", + json.dumps(payload), + api_url, ] - + # Execute the command try: result = subprocess.run(cmd, capture_output=True, text=True) - + if result.returncode != 0: logger.warning(f"API request failed with code {result.returncode}: {result.stderr}") return {"error": f"API request failed: {result.stderr}"} - + # Try to parse the response as JSON if result.stdout and result.stdout.strip(): try: @@ -195,7 +211,11 @@ def lume_api_run( return response except json.JSONDecodeError: # Return the raw response if it's not valid JSON - return {"success": True, "message": "VM started successfully", "raw_response": result.stdout} + return { + "success": True, + "message": "VM started successfully", + "raw_response": result.stdout, + } else: return {"success": True, "message": "VM started successfully"} except subprocess.SubprocessError as e: @@ -209,10 +229,10 @@ def lume_api_stop( port: int, storage: Optional[str] = None, debug: bool = False, - verbose: bool = False + verbose: bool = False, ) -> Dict[str, Any]: """Stop a VM using curl. - + Args: vm_name: Name of the VM to stop host: API host @@ -220,39 +240,48 @@ def lume_api_stop( storage: Storage path for the VM debug: Whether to show debug output verbose: Enable verbose logging - + Returns: Dictionary with API response or error information """ # Construct API URL api_url = f"http://{host}:{port}/lume/vms/{vm_name}/stop" - + # Prepare JSON payload with required parameters payload = {} - + # Add storage path if specified if storage: payload["storage"] = storage - + # Construct the curl command cmd = [ - "curl", "--connect-timeout", "15", "--max-time", "20", - "-s", "-X", "POST", "-H", "Content-Type: application/json", - "-d", json.dumps(payload), - api_url + "curl", + "--connect-timeout", + "15", + "--max-time", + "20", + "-s", + "-X", + "POST", + "-H", + "Content-Type: application/json", + "-d", + json.dumps(payload), + api_url, ] - + # Execute the command try: if debug or verbose: logger.info(f"Executing: {' '.join(cmd)}") - + result = subprocess.run(cmd, capture_output=True, text=True) - + if result.returncode != 0: logger.warning(f"API request failed with code {result.returncode}: {result.stderr}") return {"error": f"API request failed: {result.stderr}"} - + # Try to parse the response as JSON if result.stdout and result.stdout.strip(): try: @@ -260,7 +289,11 @@ def lume_api_stop( return response except json.JSONDecodeError: # Return the raw response if it's not valid JSON - return {"success": True, "message": "VM stopped successfully", "raw_response": result.stdout} + return { + "success": True, + "message": "VM stopped successfully", + "raw_response": result.stdout, + } else: return {"success": True, "message": "VM stopped successfully"} except subprocess.SubprocessError as e: @@ -275,10 +308,10 @@ def lume_api_update( update_opts: Dict[str, Any], storage: Optional[str] = None, debug: bool = False, - verbose: bool = False + verbose: bool = False, ) -> Dict[str, Any]: """Update VM settings using curl. - + Args: vm_name: Name of the VM to update host: API host @@ -287,47 +320,56 @@ def lume_api_update( storage: Storage path for the VM debug: Whether to show debug output verbose: Enable verbose logging - + Returns: Dictionary with API response or error information """ # Construct API URL api_url = f"http://{host}:{port}/lume/vms/{vm_name}/update" - + # Prepare JSON payload with required parameters payload = {} - + # Add CPU cores if specified if "cpu" in update_opts: payload["cpu"] = update_opts["cpu"] - + # Add memory if specified if "memory" in update_opts: payload["memory"] = update_opts["memory"] - + # Add storage path if specified if storage: payload["storage"] = storage - + # Construct the curl command cmd = [ - "curl", "--connect-timeout", "15", "--max-time", "20", - "-s", "-X", "POST", "-H", "Content-Type: application/json", - "-d", json.dumps(payload), - api_url + "curl", + "--connect-timeout", + "15", + "--max-time", + "20", + "-s", + "-X", + "POST", + "-H", + "Content-Type: application/json", + "-d", + json.dumps(payload), + api_url, ] - + # Execute the command try: if debug: logger.info(f"Executing: {' '.join(cmd)}") - + result = subprocess.run(cmd, capture_output=True, text=True) - + if result.returncode != 0: logger.warning(f"API request failed with code {result.returncode}: {result.stderr}") return {"error": f"API request failed: {result.stderr}"} - + # Try to parse the response as JSON if result.stdout and result.stdout.strip(): try: @@ -335,7 +377,11 @@ def lume_api_update( return response except json.JSONDecodeError: # Return the raw response if it's not valid JSON - return {"success": True, "message": "VM updated successfully", "raw_response": result.stdout} + return { + "success": True, + "message": "VM updated successfully", + "raw_response": result.stdout, + } else: return {"success": True, "message": "VM updated successfully"} except subprocess.SubprocessError as e: @@ -352,10 +398,10 @@ def lume_api_pull( registry: str = "ghcr.io", organization: str = "trycua", debug: bool = False, - verbose: bool = False + verbose: bool = False, ) -> Dict[str, Any]: """Pull a VM image from a registry using curl. - + Args: image: Name/tag of the image to pull name: Name to give the VM after pulling @@ -366,47 +412,50 @@ def lume_api_pull( organization: Organization in registry (default: trycua) debug: Whether to show debug output verbose: Enable verbose logging - + Returns: Dictionary with pull status and information """ # Prepare pull request payload pull_payload = { "image": image, # Use provided image name - "name": name, # Always use name as the target VM name + "name": name, # Always use name as the target VM name "registry": registry, - "organization": organization + "organization": organization, } - + if storage: pull_payload["storage"] = storage - + # Construct pull command with proper JSON payload - pull_cmd = [ - "curl" - ] - + pull_cmd = ["curl"] + if not verbose: pull_cmd.append("-s") - - pull_cmd.extend([ - "-X", "POST", - "-H", "Content-Type: application/json", - "-d", json.dumps(pull_payload), - f"http://{host}:{port}/lume/pull" - ]) - + + pull_cmd.extend( + [ + "-X", + "POST", + "-H", + "Content-Type: application/json", + "-d", + json.dumps(pull_payload), + f"http://{host}:{port}/lume/pull", + ] + ) + logger.debug(f"Executing API request: {' '.join(pull_cmd)}") - + try: # Execute pull command result = subprocess.run(pull_cmd, capture_output=True, text=True) - + if result.returncode != 0: error_msg = f"Failed to pull VM {name}: {result.stderr}" logger.error(error_msg) return {"error": error_msg} - + try: response = json.loads(result.stdout) logger.info(f"Successfully initiated pull for VM {name}") @@ -415,7 +464,7 @@ def lume_api_pull( if result.stdout: logger.info(f"Pull response: {result.stdout}") return {"success": True, "message": f"Successfully initiated pull for VM {name}"} - + except subprocess.SubprocessError as e: error_msg = f"Failed to execute pull command: {str(e)}" logger.error(error_msg) @@ -428,10 +477,10 @@ def lume_api_delete( port: int, storage: Optional[str] = None, debug: bool = False, - verbose: bool = False + verbose: bool = False, ) -> Dict[str, Any]: """Delete a VM using curl. - + Args: vm_name: Name of the VM to delete host: API host @@ -439,42 +488,62 @@ def lume_api_delete( storage: Storage path for the VM debug: Whether to show debug output verbose: Enable verbose logging - + Returns: Dictionary with API response or error information """ # URL encode the storage parameter for the query encoded_storage = "" storage_param = "" - + if storage: # First encode the storage path properly - encoded_storage = urllib.parse.quote(storage, safe='') + encoded_storage = urllib.parse.quote(storage, safe="") storage_param = f"?storage={encoded_storage}" - + # Construct API URL with encoded storage parameter if needed api_url = f"http://{host}:{port}/lume/vms/{vm_name}{storage_param}" - + # Construct the curl command for DELETE operation - using much longer timeouts matching shell implementation - cmd = ["curl", "--connect-timeout", "6000", "--max-time", "5000", "-s", "-X", "DELETE", f"'{api_url}'"] - + cmd = [ + "curl", + "--connect-timeout", + "6000", + "--max-time", + "5000", + "-s", + "-X", + "DELETE", + f"'{api_url}'", + ] + # For logging and display, show the properly escaped URL - display_cmd = ["curl", "--connect-timeout", "6000", "--max-time", "5000", "-s", "-X", "DELETE", api_url] - + display_cmd = [ + "curl", + "--connect-timeout", + "6000", + "--max-time", + "5000", + "-s", + "-X", + "DELETE", + api_url, + ] + # Only print the curl command when debug is enabled - display_curl_string = ' '.join(display_cmd) + display_curl_string = " ".join(display_cmd) logger.debug(f"Executing API request: {display_curl_string}") - + # Execute the command - for execution we need to use shell=True to handle URLs with special characters try: # Use a single string with shell=True for proper URL handling - shell_cmd = ' '.join(cmd) + shell_cmd = " ".join(cmd) result = subprocess.run(shell_cmd, shell=True, capture_output=True, text=True) - + # Handle curl exit codes if result.returncode != 0: curl_error = "Unknown error" - + # Map common curl error codes to helpful messages if result.returncode == 7: curl_error = "Failed to connect to the API server - it might still be starting up" @@ -483,21 +552,23 @@ def lume_api_delete( elif result.returncode == 28: curl_error = "Operation timeout - the API server is taking too long to respond" elif result.returncode == 52: - curl_error = "Empty reply from server - the API server is starting but not fully ready yet" + curl_error = ( + "Empty reply from server - the API server is starting but not fully ready yet" + ) elif result.returncode == 56: curl_error = "Network problem during data transfer - check container networking" - + # Only log at debug level to reduce noise during retries logger.debug(f"API request failed with code {result.returncode}: {curl_error}") - + # Return a more useful error message return { "error": f"API request failed: {curl_error}", "curl_code": result.returncode, "vm_name": vm_name, - "storage": storage + "storage": storage, } - + # Try to parse the response as JSON if result.stdout and result.stdout.strip(): try: @@ -505,7 +576,11 @@ def lume_api_delete( return response except json.JSONDecodeError: # Return the raw response if it's not valid JSON - return {"success": True, "message": "VM deleted successfully", "raw_response": result.stdout} + return { + "success": True, + "message": "VM deleted successfully", + "raw_response": result.stdout, + } else: return {"success": True, "message": "VM deleted successfully"} except subprocess.SubprocessError as e: @@ -515,32 +590,33 @@ def lume_api_delete( def parse_memory(memory_str: str) -> int: """Parse memory string to MB integer. - + Examples: "8GB" -> 8192 "1024MB" -> 1024 "512" -> 512 - + Returns: Memory value in MB """ if isinstance(memory_str, int): return memory_str - + if isinstance(memory_str, str): # Extract number and unit import re + match = re.match(r"(\d+)([A-Za-z]*)", memory_str) if match: value, unit = match.groups() value = int(value) unit = unit.upper() - + if unit == "GB" or unit == "G": return value * 1024 elif unit == "MB" or unit == "M" or unit == "": return value - + # Default fallback logger.warning(f"Could not parse memory string '{memory_str}', using 8GB default") return 8192 # Default to 8GB diff --git a/libs/python/computer/computer/providers/lumier/__init__.py b/libs/python/computer/computer/providers/lumier/__init__.py index 32a3954b..7785007a 100644 --- a/libs/python/computer/computer/providers/lumier/__init__.py +++ b/libs/python/computer/computer/providers/lumier/__init__.py @@ -3,6 +3,7 @@ try: # Use the same import approach as in the Lume provider from .provider import LumierProvider + HAS_LUMIER = True except ImportError: HAS_LUMIER = False diff --git a/libs/python/computer/computer/providers/lumier/provider.py b/libs/python/computer/computer/providers/lumier/provider.py index 67f348be..d4c99bfe 100644 --- a/libs/python/computer/computer/providers/lumier/provider.py +++ b/libs/python/computer/computer/providers/lumier/provider.py @@ -6,22 +6,17 @@ macOS and Linux VMs. It handles VM lifecycle operations through Docker commands and container management. """ +import asyncio +import json import logging import os -import json -import asyncio -from typing import Dict, List, Optional, Any +import re import subprocess import time -import re +from typing import Any, Dict, List, Optional from ..base import BaseVMProvider, VMProviderType -from ..lume_api import ( - lume_api_get, - lume_api_run, - lume_api_stop, - lume_api_update -) +from ..lume_api import lume_api_get, lume_api_run, lume_api_stop, lume_api_update # Setup logging logger = logging.getLogger(__name__) @@ -37,13 +32,13 @@ except (subprocess.SubprocessError, FileNotFoundError): class LumierProvider(BaseVMProvider): """ Lumier VM Provider implementation using Docker containers. - + This provider uses Docker to run Lumier containers that can create macOS and Linux VMs through containerization. """ - + def __init__( - self, + self, port: Optional[int] = 7777, host: str = "localhost", storage: Optional[str] = None, # Can be a path or 'ephemeral' @@ -54,7 +49,7 @@ class LumierProvider(BaseVMProvider): noVNC_port: Optional[int] = 8006, ): """Initialize the Lumier VM Provider. - + Args: port: Port for the API server (default: 7777) host: Hostname for the API server (default: localhost) @@ -70,28 +65,28 @@ class LumierProvider(BaseVMProvider): self.api_port = 7777 if port is None else port self.vnc_port = noVNC_port # User-specified noVNC port, will be set in run_vm if provided self.ephemeral = ephemeral - + # Handle ephemeral storage (temporary directory) if ephemeral: self.storage = "ephemeral" else: self.storage = storage - + self.shared_path = shared_path self.image = image # Store the VM image name to use # The container_name will be set in run_vm using the VM name self.verbose = verbose self._container_id = None self._api_url = None # Will be set after container starts - + @property def provider_type(self) -> VMProviderType: """Return the provider type.""" return VMProviderType.LUMIER - + def _parse_memory(self, memory_str: str) -> int: """Parse memory string to MB integer. - + Examples: "8GB" -> 8192 "1024MB" -> 1024 @@ -99,7 +94,7 @@ class LumierProvider(BaseVMProvider): """ if isinstance(memory_str, int): return memory_str - + if isinstance(memory_str, str): # Extract number and unit match = re.match(r"(\d+)([A-Za-z]*)", memory_str) @@ -107,25 +102,25 @@ class LumierProvider(BaseVMProvider): value, unit = match.groups() value = int(value) unit = unit.upper() - + if unit == "GB" or unit == "G": return value * 1024 elif unit == "MB" or unit == "M" or unit == "": return value - + # Default fallback logger.warning(f"Could not parse memory string '{memory_str}', using 8GB default") return 8192 # Default to 8GB - + # Helper methods for interacting with the Lumier API through curl # These methods handle the various VM operations via API calls - + def _get_curl_error_message(self, return_code: int) -> str: """Get a descriptive error message for curl return codes. - + Args: return_code: The curl return code - + Returns: A descriptive error message """ @@ -143,57 +138,62 @@ class LumierProvider(BaseVMProvider): else: return f"Unknown curl error code: {return_code}" - async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: """Get VM information by name. - + Args: name: Name of the VM to get information for storage: Optional storage path override. If provided, this will be used instead of the provider's default storage path. - + Returns: Dictionary with VM information including status, IP address, etc. """ if not HAS_LUMIER: logger.error("Docker is not available. Cannot get VM status.") - return { - "name": name, - "status": "unavailable", - "error": "Docker is not available" - } - + return {"name": name, "status": "unavailable", "error": "Docker is not available"} + # Store the current name for API requests self.container_name = name - + try: # Check if the container exists and is running - check_cmd = ["docker", "ps", "-a", "--filter", f"name={name}", "--format", "{{.Status}}"] + check_cmd = [ + "docker", + "ps", + "-a", + "--filter", + f"name={name}", + "--format", + "{{.Status}}", + ] check_result = subprocess.run(check_cmd, capture_output=True, text=True) container_status = check_result.stdout.strip() - + if not container_status: logger.info(f"Container {name} does not exist. Will create when run_vm is called.") return { "name": name, "status": "not_found", - "message": "Container doesn't exist yet" + "message": "Container doesn't exist yet", } - + # Container exists, check if it's running is_running = container_status.startswith("Up") - + if not is_running: - logger.info(f"Container {name} exists but is not running. Status: {container_status}") + logger.info( + f"Container {name} exists but is not running. Status: {container_status}" + ) return { "name": name, "status": "stopped", "container_status": container_status, } - + # Container is running, get the IP address and API status from Lumier API logger.info(f"Container {name} is running. Getting VM status from API.") - + # Use the shared lume_api_get function directly vm_info = lume_api_get( vm_name=name, @@ -201,9 +201,9 @@ class LumierProvider(BaseVMProvider): port=self.api_port, storage=storage if storage is not None else self.storage, debug=self.verbose, - verbose=self.verbose + verbose=self.verbose, ) - + # Check for API errors if "error" in vm_info: # Use debug level instead of warning to reduce log noise during polling @@ -213,14 +213,14 @@ class LumierProvider(BaseVMProvider): "status": "running", # Container is running even if API is not responsive "api_status": "error", "error": vm_info["error"], - "container_status": container_status + "container_status": container_status, } - + # Process the VM status information vm_status = vm_info.get("status", "unknown") vnc_url = vm_info.get("vncUrl", "") ip_address = vm_info.get("ipAddress", "") - + # IMPORTANT: Always ensure we have a valid IP address for connectivity # If the API doesn't return an IP address, default to localhost (127.0.0.1) # This makes the behavior consistent with LumeProvider @@ -228,9 +228,9 @@ class LumierProvider(BaseVMProvider): ip_address = "127.0.0.1" logger.info(f"No IP address returned from API, defaulting to {ip_address}") vm_info["ipAddress"] = ip_address - + logger.info(f"VM {name} status: {vm_status}") - + if ip_address and vnc_url: logger.info(f"VM {name} has IP: {ip_address} and VNC URL: {vnc_url}") elif not ip_address and not vnc_url and vm_status != "running": @@ -238,8 +238,10 @@ class LumierProvider(BaseVMProvider): logger.info(f"VM {name} is not running yet. Status: {vm_status}") else: # Missing IP or VNC but status is running - this is unusual but handled with default IP - logger.warning(f"VM {name} is running but missing expected fields. API response: {vm_info}") - + logger.warning( + f"VM {name} is running but missing expected fields. API response: {vm_info}" + ) + # Return the full status information return { "name": name, @@ -248,19 +250,19 @@ class LumierProvider(BaseVMProvider): "vnc_url": vnc_url, "api_status": "ok", "container_status": container_status, - **vm_info # Include all fields from the API response + **vm_info, # Include all fields from the API response } except subprocess.SubprocessError as e: logger.error(f"Failed to check container status: {e}") return { "name": name, "status": "error", - "error": f"Failed to check container status: {str(e)}" + "error": f"Failed to check container status: {str(e)}", } - + async def list_vms(self) -> List[Dict[str, Any]]: """List all VMs managed by this provider. - + For Lumier provider, there is only one VM per container. """ try: @@ -269,10 +271,12 @@ class LumierProvider(BaseVMProvider): except Exception as e: logger.error(f"Failed to list VMs: {e}") return [] - - async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: + + async def run_vm( + self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None + ) -> Dict[str, Any]: """Run a VM with the given options. - + Args: image: Name/tag of the image to use name: Name of the VM to run (used for the container name and Docker image tag) @@ -280,7 +284,7 @@ class LumierProvider(BaseVMProvider): - cpu: Number of CPU cores - memory: Amount of memory (e.g. "8GB") - noVNC_port: Specific port for noVNC interface - + Returns: Dictionary with VM status information """ @@ -289,10 +293,18 @@ class LumierProvider(BaseVMProvider): try: # First, check if container already exists and remove it try: - check_cmd = ["docker", "ps", "-a", "--filter", f"name={self.container_name}", "--format", "{{.ID}}"] + check_cmd = [ + "docker", + "ps", + "-a", + "--filter", + f"name={self.container_name}", + "--format", + "{{.ID}}", + ] check_result = subprocess.run(check_cmd, capture_output=True, text=True) existing_container = check_result.stdout.strip() - + if existing_container: logger.info(f"Removing existing container: {self.container_name}") remove_cmd = ["docker", "rm", "-f", self.container_name] @@ -300,66 +312,68 @@ class LumierProvider(BaseVMProvider): except subprocess.CalledProcessError as e: logger.warning(f"Error removing existing container: {e}") # Continue anyway, next steps will fail if there's a real problem - + # Prepare the Docker run command cmd = ["docker", "run", "-d", "--name", self.container_name] - + cmd.extend(["-p", f"{self.vnc_port}:8006"]) logger.debug(f"Using specified noVNC_port: {self.vnc_port}") - + # Set API URL using the API port self._api_url = f"http://{self.host}:{self.api_port}" - + # Parse memory setting memory_mb = self._parse_memory(run_opts.get("memory", "8GB")) - + # Add storage volume mount if storage is specified (for persistent VM storage) if self.storage and self.storage != "ephemeral": # Create storage directory if it doesn't exist storage_dir = os.path.abspath(os.path.expanduser(self.storage or "")) os.makedirs(storage_dir, exist_ok=True) - + # Add volume mount for storage - cmd.extend([ - "-v", f"{storage_dir}:/storage", - "-e", f"HOST_STORAGE_PATH={storage_dir}" - ]) + cmd.extend( + ["-v", f"{storage_dir}:/storage", "-e", f"HOST_STORAGE_PATH={storage_dir}"] + ) logger.debug(f"Using persistent storage at: {storage_dir}") - + # Add shared folder volume mount if shared_path is specified if self.shared_path: # Create shared directory if it doesn't exist shared_dir = os.path.abspath(os.path.expanduser(self.shared_path or "")) os.makedirs(shared_dir, exist_ok=True) - + # Add volume mount for shared folder - cmd.extend([ - "-v", f"{shared_dir}:/shared", - "-e", f"HOST_SHARED_PATH={shared_dir}" - ]) + cmd.extend(["-v", f"{shared_dir}:/shared", "-e", f"HOST_SHARED_PATH={shared_dir}"]) logger.debug(f"Using shared folder at: {shared_dir}") - + # Add environment variables # Always use the container_name as the VM_NAME for consistency # Use the VM image passed from the Computer class logger.debug(f"Using VM image: {self.image}") - + # If ghcr.io is in the image, use the full image name if "ghcr.io" in self.image: vm_image = self.image else: vm_image = f"ghcr.io/trycua/{self.image}" - cmd.extend([ - "-e", f"VM_NAME={self.container_name}", - "-e", f"VERSION={vm_image}", - "-e", f"CPU_CORES={run_opts.get('cpu', '4')}", - "-e", f"RAM_SIZE={memory_mb}", - ]) - + cmd.extend( + [ + "-e", + f"VM_NAME={self.container_name}", + "-e", + f"VERSION={vm_image}", + "-e", + f"CPU_CORES={run_opts.get('cpu', '4')}", + "-e", + f"RAM_SIZE={memory_mb}", + ] + ) + # Specify the Lumier image with the full image name lumier_image = "trycua/lumier:latest" - + # First check if the image exists locally try: logger.debug(f"Checking if Docker image {lumier_image} exists locally...") @@ -369,54 +383,73 @@ class LumierProvider(BaseVMProvider): except subprocess.CalledProcessError: # Image doesn't exist locally logger.warning(f"\nWARNING: Docker image {lumier_image} not found locally.") - logger.warning("The system will attempt to pull it from Docker Hub, which may fail if you have network connectivity issues.") - logger.warning("If the Docker pull fails, you may need to manually pull the image first with:") + logger.warning( + "The system will attempt to pull it from Docker Hub, which may fail if you have network connectivity issues." + ) + logger.warning( + "If the Docker pull fails, you may need to manually pull the image first with:" + ) logger.warning(f" docker pull {lumier_image}\n") - + # Add the image to the command cmd.append(lumier_image) - + # Print the Docker command for debugging logger.debug(f"DOCKER COMMAND: {' '.join(cmd)}") - + # Run the container with improved error handling try: result = subprocess.run(cmd, capture_output=True, text=True, check=True) except subprocess.CalledProcessError as e: - if "no route to host" in str(e.stderr).lower() or "failed to resolve reference" in str(e.stderr).lower(): - error_msg = (f"Network error while trying to pull Docker image '{lumier_image}'\n" - f"Error: {e.stderr}\n\n" - f"SOLUTION: Please try one of the following:\n" - f"1. Check your internet connection\n" - f"2. Pull the image manually with: docker pull {lumier_image}\n" - f"3. Check if Docker is running properly\n") + if ( + "no route to host" in str(e.stderr).lower() + or "failed to resolve reference" in str(e.stderr).lower() + ): + error_msg = ( + f"Network error while trying to pull Docker image '{lumier_image}'\n" + f"Error: {e.stderr}\n\n" + f"SOLUTION: Please try one of the following:\n" + f"1. Check your internet connection\n" + f"2. Pull the image manually with: docker pull {lumier_image}\n" + f"3. Check if Docker is running properly\n" + ) logger.error(error_msg) raise RuntimeError(error_msg) raise - + # Container started, now check VM status with polling logger.debug("Container started, checking VM status...") - logger.debug("NOTE: This may take some time while the VM image is being pulled and initialized") - + logger.debug( + "NOTE: This may take some time while the VM image is being pulled and initialized" + ) + # Start a background thread to show container logs in real-time import threading - + def show_container_logs(): # Give the container a moment to start generating logs time.sleep(1) logger.debug(f"\n---- CONTAINER LOGS FOR '{name}' (LIVE) ----") - logger.debug("Showing logs as they are generated. Press Ctrl+C to stop viewing logs...\n") - + logger.debug( + "Showing logs as they are generated. Press Ctrl+C to stop viewing logs...\n" + ) + try: # Use docker logs with follow option log_cmd = ["docker", "logs", "--tail", "30", "--follow", name] - process = subprocess.Popen(log_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - text=True, bufsize=1, universal_newlines=True) - + process = subprocess.Popen( + log_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + universal_newlines=True, + ) + # Read and print logs line by line for line in process.stdout: - logger.debug(line, end='') - + logger.debug(line, end="") + # Break if process has exited if process.poll() is not None: break @@ -427,20 +460,20 @@ class LumierProvider(BaseVMProvider): finally: logger.debug("\n---- LOG STREAMING ENDED ----") # Make sure process is terminated - if 'process' in locals() and process.poll() is None: + if "process" in locals() and process.poll() is None: process.terminate() - + # Start log streaming in a background thread if verbose mode is enabled log_thread = threading.Thread(target=show_container_logs) log_thread.daemon = True # Thread will exit when main program exits log_thread.start() - + # Skip waiting for container readiness and just poll get_vm directly # Poll the get_vm method indefinitely until the VM is ready with an IP address attempt = 0 consecutive_errors = 0 vm_running = False - + while True: # Wait indefinitely try: # Use longer delays to give the system time to initialize @@ -449,117 +482,151 @@ class LumierProvider(BaseVMProvider): # But use shorter delays while we're getting API errors if consecutive_errors > 0 and consecutive_errors < 5: wait_time = 3 # Use shorter delays when we're getting API errors - else: + else: wait_time = min(30, 5 + (attempt * 2)) - + logger.debug(f"Waiting {wait_time}s before retry #{attempt+1}...") await asyncio.sleep(wait_time) - + # Try to get VM status logger.debug(f"Checking VM status (attempt {attempt+1})...") vm_status = await self.get_vm(name) - + # Check for API errors - if 'error' in vm_status: + if "error" in vm_status: consecutive_errors += 1 - error_msg = vm_status.get('error', 'Unknown error') - + error_msg = vm_status.get("error", "Unknown error") + # Only print a user-friendly status message, not the raw error # since _lume_api_get already logged the technical details if consecutive_errors == 1 or attempt % 5 == 0: - if 'Empty reply from server' in error_msg: - logger.info("API server is starting up - container is running, but API isn't fully initialized yet.") - logger.info("This is expected during the initial VM setup - will continue polling...") + if "Empty reply from server" in error_msg: + logger.info( + "API server is starting up - container is running, but API isn't fully initialized yet." + ) + logger.info( + "This is expected during the initial VM setup - will continue polling..." + ) else: # Don't repeat the exact same error message each time - logger.warning(f"API request error (attempt {attempt+1}): {error_msg}") + logger.warning( + f"API request error (attempt {attempt+1}): {error_msg}" + ) # Just log that we're still working on it if attempt > 3: - logger.debug("Still waiting for the API server to become available...") - + logger.debug( + "Still waiting for the API server to become available..." + ) + # If we're getting errors but container is running, that's normal during startup - if vm_status.get('status') == 'running': + if vm_status.get("status") == "running": if not vm_running: - logger.info("Container is running, waiting for the VM within it to become fully ready...") + logger.info( + "Container is running, waiting for the VM within it to become fully ready..." + ) logger.info("This might take a minute while the VM initializes...") vm_running = True - + # Increase counter and continue attempt += 1 continue - + # Reset consecutive error counter when we get a successful response consecutive_errors = 0 - + # If the VM is running, check if it has an IP address (which means it's fully ready) - if vm_status.get('status') == 'running': + if vm_status.get("status") == "running": vm_running = True - + # Check if we have an IP address, which means the VM is fully ready - if 'ip_address' in vm_status and vm_status['ip_address']: - logger.info(f"VM is now fully running with IP: {vm_status.get('ip_address')}") - if 'vnc_url' in vm_status and vm_status['vnc_url']: + if "ip_address" in vm_status and vm_status["ip_address"]: + logger.info( + f"VM is now fully running with IP: {vm_status.get('ip_address')}" + ) + if "vnc_url" in vm_status and vm_status["vnc_url"]: logger.info(f"VNC URL: {vm_status.get('vnc_url')}") return vm_status else: - logger.debug("VM is running but still initializing network interfaces...") + logger.debug( + "VM is running but still initializing network interfaces..." + ) logger.debug("Waiting for IP address to be assigned...") else: # VM exists but might still be starting up - status = vm_status.get('status', 'unknown') + status = vm_status.get("status", "unknown") logger.debug(f"VM found but status is: {status}. Continuing to poll...") - + # Increase counter for next iteration's delay calculation attempt += 1 - + # If we reach a very large number of attempts, give a reassuring message but continue if attempt % 10 == 0: - logger.debug(f"Still waiting after {attempt} attempts. This might take several minutes for first-time setup.") + logger.debug( + f"Still waiting after {attempt} attempts. This might take several minutes for first-time setup." + ) if not vm_running and attempt >= 20: - logger.warning("\nNOTE: First-time VM initialization can be slow as images are downloaded.") - logger.warning("If this continues for more than 10 minutes, you may want to check:") + logger.warning( + "\nNOTE: First-time VM initialization can be slow as images are downloaded." + ) + logger.warning( + "If this continues for more than 10 minutes, you may want to check:" + ) logger.warning(" 1. Docker logs with: docker logs " + name) logger.warning(" 2. If your network can access container registries") logger.warning("Press Ctrl+C to abort if needed.\n") - + # After 150 attempts (likely over 30-40 minutes), return current status if attempt >= 150: - logger.debug(f"Reached 150 polling attempts. VM status is: {vm_status.get('status', 'unknown')}") - logger.debug("Returning current VM status, but please check Docker logs if there are issues.") + logger.debug( + f"Reached 150 polling attempts. VM status is: {vm_status.get('status', 'unknown')}" + ) + logger.debug( + "Returning current VM status, but please check Docker logs if there are issues." + ) return vm_status - + except Exception as e: # Always continue retrying, but with increasing delays - logger.warning(f"Error checking VM status (attempt {attempt+1}): {e}. Will retry.") + logger.warning( + f"Error checking VM status (attempt {attempt+1}): {e}. Will retry." + ) consecutive_errors += 1 - + # If we've had too many consecutive errors, might be a deeper problem if consecutive_errors >= 10: - logger.warning(f"\nWARNING: Encountered {consecutive_errors} consecutive errors while checking VM status.") - logger.warning("You may need to check the Docker container logs or restart the process.") + logger.warning( + f"\nWARNING: Encountered {consecutive_errors} consecutive errors while checking VM status." + ) + logger.warning( + "You may need to check the Docker container logs or restart the process." + ) logger.warning(f"Error details: {str(e)}\n") - + # Increase attempt counter for next iteration attempt += 1 - + # After many consecutive errors, add a delay to avoid hammering the system if attempt > 5: error_delay = min(30, 10 + attempt) - logger.warning(f"Multiple connection errors, waiting {error_delay}s before next attempt...") + logger.warning( + f"Multiple connection errors, waiting {error_delay}s before next attempt..." + ) await asyncio.sleep(error_delay) - + except subprocess.CalledProcessError as e: - error_msg = f"Failed to start Lumier container: {e.stderr if hasattr(e, 'stderr') else str(e)}" + error_msg = ( + f"Failed to start Lumier container: {e.stderr if hasattr(e, 'stderr') else str(e)}" + ) logger.error(error_msg) raise RuntimeError(error_msg) - + async def _wait_for_container_ready(self, container_name: str, timeout: int = 90) -> bool: """Wait for the Lumier container to be fully ready with a valid API response. - + Args: container_name: Name of the Docker container to check timeout: Maximum time to wait in seconds (default: 90 seconds) - + Returns: True if the container is running, even if API is not fully ready. This allows operations to continue with appropriate fallbacks. @@ -567,21 +634,32 @@ class LumierProvider(BaseVMProvider): start_time = time.time() api_ready = False container_running = False - + logger.debug(f"Waiting for container {container_name} to be ready (timeout: {timeout}s)...") - + while time.time() - start_time < timeout: # Check if container is running try: - check_cmd = ["docker", "ps", "--filter", f"name={container_name}", "--format", "{{.Status}}"] + check_cmd = [ + "docker", + "ps", + "--filter", + f"name={container_name}", + "--format", + "{{.Status}}", + ] result = subprocess.run(check_cmd, capture_output=True, text=True, check=True) container_status = result.stdout.strip() - + if container_status and container_status.startswith("Up"): container_running = True - logger.info(f"Container {container_name} is running with status: {container_status}") + logger.info( + f"Container {container_name} is running with status: {container_status}" + ) else: - logger.warning(f"Container {container_name} not yet running, status: {container_status}") + logger.warning( + f"Container {container_name} not yet running, status: {container_status}" + ) # container is not running yet, wait and try again await asyncio.sleep(2) # Longer sleep to give Docker time continue @@ -589,17 +667,17 @@ class LumierProvider(BaseVMProvider): logger.warning(f"Error checking container status: {e}") await asyncio.sleep(2) continue - + # Container is running, check if API is responsive try: # First check the health endpoint api_url = f"http://{self.host}:{self.api_port}/health" logger.info(f"Checking API health at: {api_url}") - + # Use longer timeout for API health check since it may still be initializing curl_cmd = ["curl", "-s", "--connect-timeout", "5", "--max-time", "10", api_url] result = subprocess.run(curl_cmd, capture_output=True, text=True) - + if result.returncode == 0 and "ok" in result.stdout.lower(): api_ready = True logger.info(f"API is ready at {api_url}") @@ -610,12 +688,21 @@ class LumierProvider(BaseVMProvider): vm_api_url = f"http://{self.host}:{self.api_port}/lume/vms/{container_name}" if self.storage: import urllib.parse + encoded_storage = urllib.parse.quote_plus(self.storage) vm_api_url += f"?storage={encoded_storage}" - - curl_vm_cmd = ["curl", "-s", "--connect-timeout", "5", "--max-time", "10", vm_api_url] + + curl_vm_cmd = [ + "curl", + "-s", + "--connect-timeout", + "5", + "--max-time", + "10", + vm_api_url, + ] vm_result = subprocess.run(curl_vm_cmd, capture_output=True, text=True) - + if vm_result.returncode == 0 and vm_result.stdout.strip(): # VM API responded with something - consider the API ready api_ready = True @@ -625,7 +712,7 @@ class LumierProvider(BaseVMProvider): curl_code = result.returncode if curl_code == 0: curl_code = vm_result.returncode - + # Map common curl error codes to helpful messages if curl_code == 7: curl_error = "Failed to connect - API server is starting up" @@ -639,29 +726,37 @@ class LumierProvider(BaseVMProvider): curl_error = "Network problem during data transfer" else: curl_error = f"Unknown curl error code: {curl_code}" - + logger.info(f"API not ready yet: {curl_error}") except subprocess.SubprocessError as e: logger.warning(f"Error checking API status: {e}") - + # If the container is running but API is not ready, that's OK - we'll just wait # a bit longer before checking again, as the container may still be initializing elapsed_seconds = time.time() - start_time - if int(elapsed_seconds) % 5 == 0: # Only print status every 5 seconds to reduce verbosity - logger.debug(f"Waiting for API to initialize... ({elapsed_seconds:.1f}s / {timeout}s)") - + if ( + int(elapsed_seconds) % 5 == 0 + ): # Only print status every 5 seconds to reduce verbosity + logger.debug( + f"Waiting for API to initialize... ({elapsed_seconds:.1f}s / {timeout}s)" + ) + await asyncio.sleep(3) # Longer sleep between API checks - + # Handle timeout - if the container is running but API is not ready, that's not # necessarily an error - the API might just need more time to start up if not container_running: logger.warning(f"Timed out waiting for container {container_name} to start") return False - + if not api_ready: - logger.warning(f"Container {container_name} is running, but API is not fully ready yet.") - logger.warning(f"NOTE: You may see some 'API request failed' messages while the API initializes.") - + logger.warning( + f"Container {container_name} is running, but API is not fully ready yet." + ) + logger.warning( + "NOTE: You may see some 'API request failed' messages while the API initializes." + ) + # Return True if container is running, even if API isn't ready yet # This allows VM operations to proceed, with appropriate retries for API calls return container_running @@ -670,12 +765,12 @@ class LumierProvider(BaseVMProvider): """Stop a running VM by stopping the Lumier container.""" try: # Use Docker commands to stop the container directly - if hasattr(self, '_container_id') and self._container_id: + if hasattr(self, "_container_id") and self._container_id: logger.info(f"Stopping Lumier container: {self.container_name}") cmd = ["docker", "stop", self.container_name] result = subprocess.run(cmd, capture_output=True, text=True, check=True) logger.info(f"Container stopped: {result.stdout.strip()}") - + # Return minimal status info return { "name": name, @@ -684,16 +779,24 @@ class LumierProvider(BaseVMProvider): } else: # Try to find the container by name - check_cmd = ["docker", "ps", "-a", "--filter", f"name={self.container_name}", "--format", "{{.ID}}"] + check_cmd = [ + "docker", + "ps", + "-a", + "--filter", + f"name={self.container_name}", + "--format", + "{{.ID}}", + ] check_result = subprocess.run(check_cmd, capture_output=True, text=True) container_id = check_result.stdout.strip() - + if container_id: logger.info(f"Found container ID: {container_id}") cmd = ["docker", "stop", self.container_name] result = subprocess.run(cmd, capture_output=True, text=True, check=True) logger.info(f"Container stopped: {result.stdout.strip()}") - + return { "name": name, "status": "stopped", @@ -709,26 +812,30 @@ class LumierProvider(BaseVMProvider): error_msg = f"Failed to stop container: {e.stderr if hasattr(e, 'stderr') else str(e)}" logger.error(error_msg) raise RuntimeError(f"Failed to stop Lumier container: {error_msg}") - + # update_vm is not implemented as it's not needed for Lumier # The BaseVMProvider requires it, so we provide a minimal implementation - async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: + async def update_vm( + self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None + ) -> Dict[str, Any]: """Not implemented for Lumier provider.""" logger.warning("update_vm is not implemented for Lumier provider") return {"name": name, "status": "unchanged"} - - async def get_logs(self, name: str, num_lines: int = 100, follow: bool = False, timeout: Optional[int] = None) -> str: + + async def get_logs( + self, name: str, num_lines: int = 100, follow: bool = False, timeout: Optional[int] = None + ) -> str: """Get the logs from the Lumier container. - + Args: name: Name of the VM/container to get logs for num_lines: Number of recent log lines to return (default: 100) follow: If True, follow the logs (stream new logs as they are generated) timeout: Optional timeout in seconds for follow mode (None means no timeout) - + Returns: Container logs as a string - + Note: If follow=True, this function will continuously stream logs until timeout or until interrupted. The output will be printed to console in real-time. @@ -737,16 +844,16 @@ class LumierProvider(BaseVMProvider): error_msg = "Docker is not available. Cannot get container logs." logger.error(error_msg) return error_msg - + # Make sure we have a container name container_name = name - + # Check if the container exists and is running try: # Check if the container exists inspect_cmd = ["docker", "container", "inspect", container_name] result = subprocess.run(inspect_cmd, capture_output=True, text=True) - + if result.returncode != 0: error_msg = f"Container '{container_name}' does not exist or is not accessible" logger.error(error_msg) @@ -755,39 +862,43 @@ class LumierProvider(BaseVMProvider): error_msg = f"Error checking container status: {str(e)}" logger.error(error_msg) return error_msg - + # Base docker logs command log_cmd = ["docker", "logs"] - + # Add tail parameter to limit the number of lines log_cmd.extend(["--tail", str(num_lines)]) - + # Handle follow mode with or without timeout if follow: log_cmd.append("--follow") - + if timeout is not None: # For follow mode with timeout, we'll run the command and handle the timeout log_cmd.append(container_name) - logger.info(f"Following logs for container '{container_name}' with timeout {timeout}s") + logger.info( + f"Following logs for container '{container_name}' with timeout {timeout}s" + ) logger.info(f"\n---- CONTAINER LOGS FOR '{container_name}' (LIVE) ----") - logger.info(f"Press Ctrl+C to stop following logs\n") - + logger.info("Press Ctrl+C to stop following logs\n") + try: # Run with timeout process = subprocess.Popen(log_cmd, text=True) - + # Wait for the specified timeout if timeout: try: process.wait(timeout=timeout) except subprocess.TimeoutExpired: process.terminate() # Stop after timeout - logger.info(f"\n---- LOG FOLLOWING STOPPED (timeout {timeout}s reached) ----") + logger.info( + f"\n---- LOG FOLLOWING STOPPED (timeout {timeout}s reached) ----" + ) else: # Without timeout, wait for user interruption process.wait() - + return "Logs were displayed to console in follow mode" except KeyboardInterrupt: process.terminate() @@ -798,8 +909,8 @@ class LumierProvider(BaseVMProvider): log_cmd.append(container_name) logger.info(f"Following logs for container '{container_name}' indefinitely") logger.info(f"\n---- CONTAINER LOGS FOR '{container_name}' (LIVE) ----") - logger.info(f"Press Ctrl+C to stop following logs\n") - + logger.info("Press Ctrl+C to stop following logs\n") + try: # Run the command and let it run until interrupted process = subprocess.Popen(log_cmd, text=True) @@ -813,19 +924,21 @@ class LumierProvider(BaseVMProvider): # For non-follow mode, capture and return the logs as a string log_cmd.append(container_name) logger.info(f"Getting {num_lines} log lines for container '{container_name}'") - + try: result = subprocess.run(log_cmd, capture_output=True, text=True, check=True) logs = result.stdout - + # Only print header and logs if there's content if logs.strip(): - logger.info(f"\n---- CONTAINER LOGS FOR '{container_name}' (LAST {num_lines} LINES) ----\n") + logger.info( + f"\n---- CONTAINER LOGS FOR '{container_name}' (LAST {num_lines} LINES) ----\n" + ) logger.info(logs) - logger.info(f"\n---- END OF LOGS ----") + logger.info("\n---- END OF LOGS ----") else: logger.info(f"\nNo logs available for container '{container_name}'") - + return logs except subprocess.CalledProcessError as e: error_msg = f"Error getting logs: {e.stderr}" @@ -835,45 +948,48 @@ class LumierProvider(BaseVMProvider): error_msg = f"Unexpected error getting logs: {str(e)}" logger.error(error_msg) return error_msg - + + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("LumierProvider does not support restarting VMs.") + async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str: """Get the IP address of a VM, waiting indefinitely until it's available. - + Args: name: Name of the VM to get the IP for storage: Optional storage path override retry_delay: Delay between retries in seconds (default: 2) - + Returns: IP address of the VM when it becomes available """ # Use container_name = name for consistency self.container_name = name - + # Track total attempts for logging purposes total_attempts = 0 - + # Loop indefinitely until we get a valid IP while True: total_attempts += 1 - + # Log retry message but not on first attempt if total_attempts > 1: logger.info(f"Waiting for VM {name} IP address (attempt {total_attempts})...") - + try: # Get VM information vm_info = await self.get_vm(name, storage=storage) - + # Check if we got a valid IP ip = vm_info.get("ip_address", None) if ip and ip != "unknown" and not ip.startswith("0.0.0.0"): logger.info(f"Got valid VM IP address: {ip}") return ip - + # Check the VM status status = vm_info.get("status", "unknown") - + # Special handling for Lumier: it may report "stopped" even when the VM is starting # If the VM information contains an IP but status is stopped, it might be a race condition if status == "stopped" and "ip_address" in vm_info: @@ -888,43 +1004,43 @@ class LumierProvider(BaseVMProvider): # If VM is running but no IP yet, wait and retry else: logger.info("VM is running but no valid IP address yet. Waiting...") - + except Exception as e: logger.warning(f"Error getting VM {name} IP: {e}, continuing to wait...") - + # Wait before next retry await asyncio.sleep(retry_delay) - + # Add progress log every 10 attempts if total_attempts % 10 == 0: logger.info(f"Still waiting for VM {name} IP after {total_attempts} attempts...") - + async def __aenter__(self): """Async context manager entry. - + This method is called when entering an async context manager block. Returns self to be used in the context. """ logger.debug("Entering LumierProvider context") - + # Initialize the API URL with the default value if not already set # This ensures get_vm can work before run_vm is called - if not hasattr(self, '_api_url') or not self._api_url: + if not hasattr(self, "_api_url") or not self._api_url: self._api_url = f"http://{self.host}:{self.api_port}" logger.info(f"Initialized default Lumier API URL: {self._api_url}") - + return self - + async def __aexit__(self, exc_type, exc_val, exc_tb): """Async context manager exit. - + This method is called when exiting an async context manager block. It handles proper cleanup of resources, including stopping any running containers. """ logger.debug(f"Exiting LumierProvider context, handling exceptions: {exc_type}") try: # If we have a container ID, we should stop it to clean up resources - if hasattr(self, '_container_id') and self._container_id: + if hasattr(self, "_container_id") and self._container_id: logger.info(f"Stopping Lumier container on context exit: {self.container_name}") try: cmd = ["docker", "stop", self.container_name] diff --git a/libs/python/computer/computer/providers/types.py b/libs/python/computer/computer/providers/types.py new file mode 100644 index 00000000..da9e50c0 --- /dev/null +++ b/libs/python/computer/computer/providers/types.py @@ -0,0 +1,39 @@ +"""Shared provider type definitions for VM metadata and responses. + +These base types describe the common shape of objects returned by provider +methods like `list_vms()`. +""" + +from __future__ import annotations + +from typing import Literal, NotRequired, TypedDict + +# Core status values per product docs +VMStatus = Literal[ + "pending", # VM deployment in progress + "running", # VM is active and accessible + "stopped", # VM is stopped but not terminated + "terminated", # VM has been permanently destroyed + "failed", # VM deployment or operation failed +] + +OSType = Literal["macos", "linux", "windows"] + + +class MinimalVM(TypedDict): + """Minimal VM object shape returned by list calls. + + Providers may include additional fields. Optional fields below are + common extensions some providers expose or that callers may compute. + """ + + name: str + status: VMStatus + # Not always included by all providers + password: NotRequired[str] + vnc_url: NotRequired[str] + api_url: NotRequired[str] + + +# Convenience alias for list_vms() responses +ListVMsResponse = list[MinimalVM] diff --git a/libs/python/computer/computer/providers/winsandbox/__init__.py b/libs/python/computer/computer/providers/winsandbox/__init__.py index 715ed7db..0d089092 100644 --- a/libs/python/computer/computer/providers/winsandbox/__init__.py +++ b/libs/python/computer/computer/providers/winsandbox/__init__.py @@ -2,6 +2,7 @@ try: import winsandbox + HAS_WINSANDBOX = True except ImportError: HAS_WINSANDBOX = False diff --git a/libs/python/computer/computer/providers/winsandbox/provider.py b/libs/python/computer/computer/providers/winsandbox/provider.py index 6196b96f..771ec2b4 100644 --- a/libs/python/computer/computer/providers/winsandbox/provider.py +++ b/libs/python/computer/computer/providers/winsandbox/provider.py @@ -1,10 +1,11 @@ """Windows Sandbox VM provider implementation using pywinsandbox.""" -import os import asyncio import logging +import os import time -from typing import Dict, Any, Optional, List +from pathlib import Path +from typing import Any, Dict, List, Optional from ..base import BaseVMProvider, VMProviderType @@ -13,6 +14,7 @@ logger = logging.getLogger(__name__) try: import winsandbox + HAS_WINSANDBOX = True except ImportError: HAS_WINSANDBOX = False @@ -20,13 +22,13 @@ except ImportError: class WinSandboxProvider(BaseVMProvider): """Windows Sandbox VM provider implementation using pywinsandbox. - + This provider uses Windows Sandbox to create isolated Windows environments. Storage is always ephemeral with Windows Sandbox. """ - + def __init__( - self, + self, port: int = 7777, host: str = "localhost", storage: Optional[str] = None, @@ -34,10 +36,10 @@ class WinSandboxProvider(BaseVMProvider): ephemeral: bool = True, # Windows Sandbox is always ephemeral memory_mb: int = 4096, networking: bool = True, - **kwargs + **kwargs, ): """Initialize the Windows Sandbox provider. - + Args: port: Port for the computer server (default: 7777) host: Host to use for connections (default: localhost) @@ -52,41 +54,43 @@ class WinSandboxProvider(BaseVMProvider): "pywinsandbox is required for WinSandboxProvider. " "Please install it with 'pip install pywinsandbox'" ) - + self.host = host self.port = port self.verbose = verbose self.memory_mb = memory_mb self.networking = networking - + # Windows Sandbox is always ephemeral if not ephemeral: logger.warning("Windows Sandbox storage is always ephemeral. Ignoring ephemeral=False.") self.ephemeral = True - + # Storage is always ephemeral for Windows Sandbox if storage and storage != "ephemeral": - logger.warning("Windows Sandbox does not support persistent storage. Using ephemeral storage.") + logger.warning( + "Windows Sandbox does not support persistent storage. Using ephemeral storage." + ) self.storage = "ephemeral" - + self.logger = logging.getLogger(__name__) - + # Track active sandboxes self._active_sandboxes: Dict[str, Any] = {} - + @property def provider_type(self) -> VMProviderType: """Get the provider type.""" return VMProviderType.WINSANDBOX - + async def __aenter__(self): """Enter async context manager.""" # Verify Windows Sandbox is available if not HAS_WINSANDBOX: raise ImportError("pywinsandbox is not available") - + return self - + async def __aexit__(self, exc_type, exc_val, exc_tb): """Exit async context manager.""" # Clean up any active sandboxes @@ -96,29 +100,24 @@ class WinSandboxProvider(BaseVMProvider): self.logger.info(f"Terminated sandbox: {name}") except Exception as e: self.logger.error(f"Error terminating sandbox {name}: {e}") - + self._active_sandboxes.clear() - + async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: """Get VM information by name. - + Args: name: Name of the VM to get information for storage: Ignored for Windows Sandbox (always ephemeral) - + Returns: Dictionary with VM information including status, IP address, etc. """ if name not in self._active_sandboxes: - return { - "name": name, - "status": "stopped", - "ip_address": None, - "storage": "ephemeral" - } - + return {"name": name, "status": "stopped", "ip_address": None, "storage": "ephemeral"} + sandbox = self._active_sandboxes[name] - + # Check if sandbox is still running try: # Try to ping the sandbox to see if it's responsive @@ -127,7 +126,7 @@ class WinSandboxProvider(BaseVMProvider): sandbox_responsive = True except Exception: sandbox_responsive = False - + if not sandbox_responsive: return { "name": name, @@ -135,33 +134,36 @@ class WinSandboxProvider(BaseVMProvider): "ip_address": None, "storage": "ephemeral", "memory_mb": self.memory_mb, - "networking": self.networking + "networking": self.networking, } - + # Check for computer server address file - server_address_file = r"C:\Users\WDAGUtilityAccount\Desktop\shared_windows_sandbox_dir\server_address" - + server_address_file = ( + r"C:\Users\WDAGUtilityAccount\Desktop\shared_windows_sandbox_dir\server_address" + ) + try: # Check if the server address file exists file_exists = sandbox.rpyc.modules.os.path.exists(server_address_file) - + if file_exists: # Read the server address file - with sandbox.rpyc.builtin.open(server_address_file, 'r') as f: + with sandbox.rpyc.builtin.open(server_address_file, "r") as f: server_address = f.read().strip() - - if server_address and ':' in server_address: + + if server_address and ":" in server_address: # Parse IP:port from the file - ip_address, port = server_address.split(':', 1) - + ip_address, port = server_address.split(":", 1) + # Verify the server is actually responding try: import socket + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(3) result = sock.connect_ex((ip_address, int(port))) sock.close() - + if result == 0: # Server is responding status = "running" @@ -182,26 +184,26 @@ class WinSandboxProvider(BaseVMProvider): # Server address file doesn't exist yet status = "starting" ip_address = None - + except Exception as e: self.logger.debug(f"Error checking server address file: {e}") status = "starting" ip_address = None - + except Exception as e: self.logger.error(f"Error checking sandbox status: {e}") status = "error" ip_address = None - + return { "name": name, "status": status, "ip_address": ip_address, "storage": "ephemeral", "memory_mb": self.memory_mb, - "networking": self.networking + "networking": self.networking, } - + async def list_vms(self) -> List[Dict[str, Any]]: """List all available VMs.""" vms = [] @@ -209,25 +211,24 @@ class WinSandboxProvider(BaseVMProvider): vm_info = await self.get_vm(name) vms.append(vm_info) return vms - - async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: + + async def run_vm( + self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None + ) -> Dict[str, Any]: """Run a VM with the given options. - + Args: image: Image name (ignored for Windows Sandbox - always uses host Windows) name: Name of the VM to run run_opts: Dictionary of run options (memory, cpu, etc.) storage: Ignored for Windows Sandbox (always ephemeral) - + Returns: Dictionary with VM run status and information """ if name in self._active_sandboxes: - return { - "success": False, - "error": f"Sandbox {name} is already running" - } - + return {"success": False, "error": f"Sandbox {name} is already running"} + try: # Extract options from run_opts memory_mb = run_opts.get("memory_mb", self.memory_mb) @@ -239,11 +240,18 @@ class WinSandboxProvider(BaseVMProvider): memory_mb = int(memory_mb[:-2]) else: memory_mb = self.memory_mb - + networking = run_opts.get("networking", self.networking) - - # Create folder mappers if shared directories are specified + + # Create folder mappers; always map a persistent venv directory on host for caching packages folder_mappers = [] + # Ensure host side persistent venv directory exists (Path.home()/wsb_venv) + host_wsb_env = Path.home() / ".cua" / "wsb_cache" + try: + host_wsb_env.mkdir(parents=True, exist_ok=True) + except Exception: + # If cannot create, continue without persistent mapping + host_wsb_env = None shared_directories = run_opts.get("shared_directories", []) for shared_dir in shared_directories: if isinstance(shared_dir, dict): @@ -252,180 +260,197 @@ class WinSandboxProvider(BaseVMProvider): host_path = shared_dir else: continue - + if host_path and os.path.exists(host_path): folder_mappers.append(winsandbox.FolderMapper(host_path)) - + + # Add mapping for the persistent venv directory (read/write) so it appears in Sandbox Desktop + if host_wsb_env is not None and host_wsb_env.exists(): + try: + folder_mappers.append( + winsandbox.FolderMapper(str(host_wsb_env), read_only=False) + ) + except Exception as e: + self.logger.warning(f"Failed to map host winsandbox_venv: {e}") + self.logger.info(f"Creating Windows Sandbox: {name}") self.logger.info(f"Memory: {memory_mb}MB, Networking: {networking}") if folder_mappers: self.logger.info(f"Shared directories: {len(folder_mappers)}") - + # Create the sandbox without logon script try: # Try with memory_mb parameter (newer pywinsandbox version) sandbox = winsandbox.new_sandbox( - memory_mb=str(memory_mb), - networking=networking, - folder_mappers=folder_mappers + memory_mb=str(memory_mb), networking=networking, folder_mappers=folder_mappers ) except TypeError as e: if "memory_mb" in str(e): # Fallback for older pywinsandbox version that doesn't support memory_mb self.logger.warning( - f"Your pywinsandbox version doesn't support memory_mb parameter. " - f"Using default memory settings. To use custom memory settings, " - f"please update pywinsandbox: pip install -U git+https://github.com/karkason/pywinsandbox.git" + "Your pywinsandbox version doesn't support memory_mb parameter. " + "Using default memory settings. To use custom memory settings, " + "please update pywinsandbox: pip install -U git+https://github.com/karkason/pywinsandbox.git" ) sandbox = winsandbox.new_sandbox( - networking=networking, - folder_mappers=folder_mappers + networking=networking, folder_mappers=folder_mappers ) else: # Re-raise if it's a different TypeError raise - + # Store the sandbox self._active_sandboxes[name] = sandbox - + self.logger.info(f"Windows Sandbox {name} created successfully") - + + venv_exists = ( + (host_wsb_env / "venv" / "Lib" / "site-packages" / "computer_server").exists() + if host_wsb_env + else False + ) + # Setup the computer server in the sandbox - await self._setup_computer_server(sandbox, name) - + await self._setup_computer_server(sandbox, name, wait_for_venv=(not venv_exists)) + return { "success": True, "name": name, "status": "starting", "memory_mb": memory_mb, "networking": networking, - "storage": "ephemeral" + "storage": "ephemeral", } - + except Exception as e: self.logger.error(f"Failed to create Windows Sandbox {name}: {e}") # stack trace import traceback + self.logger.error(f"Stack trace: {traceback.format_exc()}") - return { - "success": False, - "error": f"Failed to create sandbox: {str(e)}" - } - + return {"success": False, "error": f"Failed to create sandbox: {str(e)}"} + async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: """Stop a running VM. - + Args: name: Name of the VM to stop storage: Ignored for Windows Sandbox - + Returns: Dictionary with stop status and information """ if name not in self._active_sandboxes: - return { - "success": False, - "error": f"Sandbox {name} is not running" - } - + return {"success": False, "error": f"Sandbox {name} is not running"} + try: sandbox = self._active_sandboxes[name] - + # Terminate the sandbox sandbox.shutdown() - + # Remove from active sandboxes del self._active_sandboxes[name] - + self.logger.info(f"Windows Sandbox {name} stopped successfully") - - return { - "success": True, - "name": name, - "status": "stopped" - } - + + return {"success": True, "name": name, "status": "stopped"} + except Exception as e: self.logger.error(f"Failed to stop Windows Sandbox {name}: {e}") - return { - "success": False, - "error": f"Failed to stop sandbox: {str(e)}" - } - - async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: + return {"success": False, "error": f"Failed to stop sandbox: {str(e)}"} + + async def update_vm( + self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None + ) -> Dict[str, Any]: """Update VM configuration. - + Note: Windows Sandbox does not support runtime configuration updates. The sandbox must be stopped and restarted with new configuration. - + Args: name: Name of the VM to update update_opts: Dictionary of update options storage: Ignored for Windows Sandbox - + Returns: Dictionary with update status and information """ return { "success": False, "error": "Windows Sandbox does not support runtime configuration updates. " - "Please stop and restart the sandbox with new configuration." + "Please stop and restart the sandbox with new configuration.", } - + + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("WinSandboxProvider does not support restarting VMs.") + async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str: """Get the IP address of a VM, waiting indefinitely until it's available. - + Args: name: Name of the VM to get the IP for storage: Ignored for Windows Sandbox retry_delay: Delay between retries in seconds (default: 2) - + Returns: IP address of the VM when it becomes available """ total_attempts = 0 - + # Loop indefinitely until we get a valid IP while True: total_attempts += 1 - + # Log retry message but not on first attempt if total_attempts > 1: - self.logger.info(f"Waiting for Windows Sandbox {name} IP address (attempt {total_attempts})...") - + self.logger.info( + f"Waiting for Windows Sandbox {name} IP address (attempt {total_attempts})..." + ) + try: # Get VM information vm_info = await self.get_vm(name, storage=storage) - + # Check if we got a valid IP ip = vm_info.get("ip_address", None) if ip and ip != "unknown" and not ip.startswith("0.0.0.0"): self.logger.info(f"Got valid Windows Sandbox IP address: {ip}") return ip - + # Check the VM status status = vm_info.get("status", "unknown") - + # If VM is not running yet, log and wait if status != "running": - self.logger.info(f"Windows Sandbox is not running yet (status: {status}). Waiting...") + self.logger.info( + f"Windows Sandbox is not running yet (status: {status}). Waiting..." + ) # If VM is running but no IP yet, wait and retry else: - self.logger.info("Windows Sandbox is running but no valid IP address yet. Waiting...") - + self.logger.info( + "Windows Sandbox is running but no valid IP address yet. Waiting..." + ) + except Exception as e: - self.logger.warning(f"Error getting Windows Sandbox {name} IP: {e}, continuing to wait...") - + self.logger.warning( + f"Error getting Windows Sandbox {name} IP: {e}, continuing to wait..." + ) + # Wait before next retry await asyncio.sleep(retry_delay) - + # Add progress log every 10 attempts if total_attempts % 10 == 0: - self.logger.info(f"Still waiting for Windows Sandbox {name} IP after {total_attempts} attempts...") - - async def _setup_computer_server(self, sandbox, name: str, visible: bool = False): + self.logger.info( + f"Still waiting for Windows Sandbox {name} IP after {total_attempts} attempts..." + ) + + async def _setup_computer_server( + self, sandbox, name: str, visible: bool = False, wait_for_venv: bool = True + ): """Setup the computer server in the Windows Sandbox using RPyC. - + Args: sandbox: The Windows Sandbox instance name: Name of the sandbox @@ -436,50 +461,58 @@ class WinSandboxProvider(BaseVMProvider): # Read the PowerShell setup script script_path = os.path.join(os.path.dirname(__file__), "setup_script.ps1") - with open(script_path, 'r', encoding='utf-8') as f: + with open(script_path, "r", encoding="utf-8") as f: setup_script_content = f.read() - + # Write the setup script to the sandbox using RPyC script_dest_path = r"C:\Users\WDAGUtilityAccount\setup_cua.ps1" - + self.logger.info(f"Writing setup script to {script_dest_path}") - with sandbox.rpyc.builtin.open(script_dest_path, 'w') as f: + with sandbox.rpyc.builtin.open(script_dest_path, "w") as f: f.write(setup_script_content) - + # Execute the PowerShell script in the background self.logger.info("Executing setup script in sandbox...") - + # Use subprocess to run PowerShell script import subprocess + powershell_cmd = [ - "powershell.exe", - "-ExecutionPolicy", "Bypass", + "powershell.exe", + "-ExecutionPolicy", + "Bypass", "-NoExit", # Keep window open after script completes - "-File", script_dest_path + "-File", + script_dest_path, ] - + # Set creation flags based on visibility preference if visible: # CREATE_NEW_CONSOLE - creates a new console window (visible) creation_flags = 0x00000010 else: - creation_flags = 0x08000000 # CREATE_NO_WINDOW - + creation_flags = 0x08000000 # CREATE_NO_WINDOW + # Start the process using RPyC process = sandbox.rpyc.modules.subprocess.Popen( - powershell_cmd, - creationflags=creation_flags, - shell=False + powershell_cmd, creationflags=creation_flags, shell=False ) - - # # Sleep for 30 seconds - # await asyncio.sleep(30) + + if wait_for_venv: + print( + "Waiting for venv to be created for the first time setup of Windows Sandbox..." + ) + print("This may take a minute...") + await asyncio.sleep(120) ip = await self.get_ip(name) self.logger.info(f"Sandbox IP: {ip}") - self.logger.info(f"Setup script started in background in sandbox {name} with PID: {process.pid}") - + self.logger.info( + f"Setup script started in background in sandbox {name} with PID: {process.pid}" + ) + except Exception as e: self.logger.error(f"Failed to setup computer server in sandbox {name}: {e}") import traceback + self.logger.error(f"Stack trace: {traceback.format_exc()}") diff --git a/libs/python/computer/computer/providers/winsandbox/setup_script.ps1 b/libs/python/computer/computer/providers/winsandbox/setup_script.ps1 index 73074764..e6208d71 100644 --- a/libs/python/computer/computer/providers/winsandbox/setup_script.ps1 +++ b/libs/python/computer/computer/providers/winsandbox/setup_script.ps1 @@ -79,23 +79,48 @@ try { $pythonVersion = & $pythonExe --version 2>&1 Write-Host "Python version: $pythonVersion" - # Step 2: Install cua-computer-server directly - Write-Host "Step 2: Installing cua-computer-server..." + # Step 2: Create a dedicated virtual environment in mapped Desktop folder (persistent) + Write-Host "Step 2: Creating virtual environment (if needed)..." + $cachePath = "C:\Users\WDAGUtilityAccount\Desktop\wsb_cache" + $venvPath = "C:\Users\WDAGUtilityAccount\Desktop\wsb_cache\venv" + if (!(Test-Path $venvPath)) { + Write-Host "Creating venv at: $venvPath" + & $pythonExe -m venv $venvPath + } else { + Write-Host "Venv already exists at: $venvPath" + } + # Hide the folder to keep Desktop clean + try { + $item = Get-Item $cachePath -ErrorAction SilentlyContinue + if ($item) { + if (-not ($item.Attributes -band [IO.FileAttributes]::Hidden)) { + $item.Attributes = $item.Attributes -bor [IO.FileAttributes]::Hidden + } + } + } catch { } + $venvPython = Join-Path $venvPath "Scripts\python.exe" + if (!(Test-Path $venvPython)) { + throw "Virtual environment Python not found at $venvPython" + } + Write-Host "Using venv Python: $venvPython" + + # Step 3: Install cua-computer-server into the venv + Write-Host "Step 3: Installing cua-computer-server..." Write-Host "Upgrading pip..." - & $pythonExe -m pip install --upgrade pip --quiet + & $venvPython -m pip install --upgrade pip --quiet Write-Host "Installing cua-computer-server..." - & $pythonExe -m pip install cua-computer-server --quiet + & $venvPython -m pip install cua-computer-server Write-Host "cua-computer-server installation completed." - # Step 3: Start computer server in background - Write-Host "Step 3: Starting computer server in background..." - Write-Host "Starting computer server with: $pythonExe" + # Step 4: Start computer server in background using the venv Python + Write-Host "Step 4: Starting computer server in background..." + Write-Host "Starting computer server with: $venvPython" # Start the computer server in the background - $serverProcess = Start-Process -FilePath $pythonExe -ArgumentList "-m", "computer_server.main" -WindowStyle Hidden -PassThru + $serverProcess = Start-Process -FilePath $venvPython -ArgumentList "-m", "computer_server.main" -WindowStyle Hidden -PassThru Write-Host "Computer server started in background with PID: $($serverProcess.Id)" # Give it a moment to start diff --git a/libs/python/computer/computer/ui/gradio/__init__.py b/libs/python/computer/computer/ui/gradio/__init__.py index 3645348e..62bbaf80 100644 --- a/libs/python/computer/computer/ui/gradio/__init__.py +++ b/libs/python/computer/computer/ui/gradio/__init__.py @@ -1,6 +1,7 @@ """Gradio UI for Computer UI.""" -import gradio as gr from typing import Optional +import gradio as gr + from .app import create_gradio_ui diff --git a/libs/python/computer/computer/ui/gradio/app.py b/libs/python/computer/computer/ui/gradio/app.py index d8c5c513..a297302c 100644 --- a/libs/python/computer/computer/ui/gradio/app.py +++ b/libs/python/computer/computer/ui/gradio/app.py @@ -4,91 +4,171 @@ Advanced Gradio UI for Computer Interface This is a Gradio interface for the Computer Interface """ -import gradio as gr import asyncio +import base64 +import glob +import hashlib import io import json -import uuid -import hashlib import os -import glob import random -import base64 -from datetime import datetime -from PIL import Image -from huggingface_hub import DatasetCard, DatasetCardData -from computer import Computer, VMProviderType -from gradio.components import ChatMessage -import pandas as pd -from datasets import Dataset, Features, Sequence, concatenate_datasets -import datasets - import random as rand +import uuid +from datetime import datetime + +import datasets +import gradio as gr +import pandas as pd +from computer import Computer, VMProviderType +from datasets import Dataset, Features, Sequence, concatenate_datasets +from gradio.components import ChatMessage +from huggingface_hub import DatasetCard, DatasetCardData +from PIL import Image # Task examples as dictionaries with task string and setup function TASK_EXAMPLES = [ { "task": "Open the shopping list on my desktop and add all the items to a Doordash cart", - "setup": lambda computer: create_shopping_list_file(computer) - }, + "setup": lambda computer: create_shopping_list_file(computer), + }, { "task": "Do a random miniwob++ task, output the task name in tags and your reward in tags" - } + }, ] + # Generate random shopping list and save to desktop using computer interface async def create_shopping_list_file(computer): - items = ["Milk", "Eggs", "Bread", "Apples", "Bananas", "Chicken", "Rice", - "Cereal", "Coffee", "Cheese", "Pasta", "Tomatoes", "Potatoes", - "Onions", "Carrots", "Ice Cream", "Yogurt", "Cookies"] - + items = [ + "Milk", + "Eggs", + "Bread", + "Apples", + "Bananas", + "Chicken", + "Rice", + "Cereal", + "Coffee", + "Cheese", + "Pasta", + "Tomatoes", + "Potatoes", + "Onions", + "Carrots", + "Ice Cream", + "Yogurt", + "Cookies", + ] + # Select 1-5 random items num_items = rand.randint(1, 5) selected_items = rand.sample(items, num_items) - + # Create shopping list content content = "SHOPPING LIST:\n\n" for item in selected_items: content += f"- {item}\n" - + # Create a temporary file with the content temp_file_path = "/tmp/shopping_list.txt" - + # Use run_command to create the file on the desktop desktop_path = "~/Desktop" file_path = f"{desktop_path}/shopping_list.txt" - + # Create the file using echo command cmd = f"echo '{content}' > {file_path}" stdout, stderr = await computer.interface.run_command(cmd) - + print(f"Created shopping list at {file_path} with {num_items} items") if stderr: print(f"Error: {stderr}") - + return file_path + +import typing + # Load valid keys from the Key enum in models.py from computer.interface.models import Key -import typing -VALID_KEYS = [key.value for key in Key] + [ - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' + +VALID_KEYS = [key.value for key in Key] + [ + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", ] -VALID_KEYS = list(dict.fromkeys(VALID_KEYS)) # remove duplicates, preserve order +VALID_KEYS = list(dict.fromkeys(VALID_KEYS)) # remove duplicates, preserve order # List of random words for demo naming -RANDOM_WORDS = ["apple", "banana", "cherry", "dolphin", "elephant", "forest", - "giraffe", "harmony", "igloo", "jungle", "kangaroo", "lemon", - "mountain", "notebook", "ocean", "penguin", "quasar", "rainbow", "ohana", - "sunflower", "tiger", "umbrella", "volcano", "waterfall", "xylophone", - "yellow", "zebra"] +RANDOM_WORDS = [ + "apple", + "banana", + "cherry", + "dolphin", + "elephant", + "forest", + "giraffe", + "harmony", + "igloo", + "jungle", + "kangaroo", + "lemon", + "mountain", + "notebook", + "ocean", + "penguin", + "quasar", + "rainbow", + "ohana", + "sunflower", + "tiger", + "umbrella", + "volcano", + "waterfall", + "xylophone", + "yellow", + "zebra", +] + # Generate a random demo name with 3 words def generate_random_demo_name(): return " ".join(random.sample(RANDOM_WORDS, 3)) + # Global session ID for tracking this run session_id = str(uuid.uuid4()) @@ -105,101 +185,108 @@ screenshot_images = [] # Array to store all screenshot images OUTPUT_DIR = "examples/output" SESSION_DIR = os.path.join(OUTPUT_DIR, "sessions") + def load_all_sessions(with_images=False): """Load and concatenate all session datasets into a single Dataset""" try: # Get all session folders if not os.path.exists(SESSION_DIR): return None - + session_folders = glob.glob(os.path.join(SESSION_DIR, "*")) if not session_folders: return None - + # Load each dataset and concatenate all_datasets = [] for folder in session_folders: try: ds = Dataset.load_from_disk(folder) if not with_images: - ds = ds.remove_columns('images') - + ds = ds.remove_columns("images") + # Add folder name to identify the source folder_name = os.path.basename(folder) - + # Process the messages from tool_call_logs def process_messages(example): messages_text = [] current_role = None - + # Process the logs if they exist in the example - if 'tool_calls' in example: + if "tool_calls" in example: # Use the existing get_chatbot_messages function with explicit logs parameter - formatted_msgs = get_chatbot_messages(logs=json.loads(example['tool_calls'])) - + formatted_msgs = get_chatbot_messages( + logs=json.loads(example["tool_calls"]) + ) + # Process each ChatMessage and extract either title or content for msg in formatted_msgs: # Check if role has changed if msg.role != current_role: # Add a line with the new role if it changed if current_role is not None: # Skip for the first message - messages_text.append("") # Add an empty line between role changes + messages_text.append( + "" + ) # Add an empty line between role changes messages_text.append(f"{msg.role}") current_role = msg.role - + # Add the message content - if msg.metadata and 'title' in msg.metadata: + if msg.metadata and "title" in msg.metadata: # Use the title if available - messages_text.append(msg.metadata['title']) + messages_text.append(msg.metadata["title"]) else: # Use just the content without role prefix since we're adding role headers messages_text.append(msg.content) - + # Join all messages with newlines all_messages = "\n".join(messages_text) - + return { **example, "source_folder": folder_name, "messages": all_messages, } - + # Apply the processing to each example ds = ds.map(process_messages) all_datasets.append(ds) except Exception as e: print(f"Error loading dataset from {folder}: {str(e)}") - + if not all_datasets: return None - + # Concatenate all datasets return concatenate_datasets(all_datasets) except Exception as e: print(f"Error loading sessions: {str(e)}") return None + def get_existing_tags(): """Extract all existing tags from saved demonstrations""" all_sessions = load_all_sessions() if all_sessions is None: return [], [] - + # Convert to pandas and extract tags df = all_sessions.to_pandas() - - if 'tags' not in df.columns: + + if "tags" not in df.columns: return [] - + # Extract all tags and flatten the list all_tags = [] - for tags in df['tags'].dropna(): + for tags in df["tags"].dropna(): all_tags += list(tags) - + # Remove duplicates and sort unique_tags = sorted(list(set(all_tags))) return unique_tags, unique_tags + def get_sessions_data(): """Load all sessions dataset""" @@ -207,21 +294,22 @@ def get_sessions_data(): if combined_ds: # Convert to pandas and select columns df = combined_ds.to_pandas() - columns = ['name', 'messages', 'source_folder'] - if 'tags' in df.columns: - columns.append('tags') + columns = ["name", "messages", "source_folder"] + if "tags" in df.columns: + columns.append("tags") return df[columns] else: return pd.DataFrame({"name": [""], "messages": [""], "source_folder": [""]}) + def upload_to_huggingface(dataset_name, visibility, filter_tags=None): """Upload sessions to HuggingFace Datasets Hub, optionally filtered by tags - + Args: dataset_name: Name of the dataset on HuggingFace (format: username/dataset-name) visibility: 'public' or 'private' filter_tags: List of tags to filter by (optional) - + Returns: Status message """ @@ -230,35 +318,35 @@ def upload_to_huggingface(dataset_name, visibility, filter_tags=None): hf_token = os.environ.get("HF_TOKEN") if not hf_token: return "Error: HF_TOKEN environment variable not found. Please set it before uploading." - + # Check if dataset name is in the correct format if not dataset_name or "/" not in dataset_name: return "Dataset name must be in the format 'username/dataset-name'" - + # Load all sessions combined_ds = load_all_sessions(with_images=True) if combined_ds is None or len(combined_ds) == 0: return "No sessions found to upload." - + # If tag filtering is provided, filter the datasets if filter_tags: # Convert to pandas to filter df = combined_ds.to_pandas() - - if 'tags' not in df.columns: + + if "tags" not in df.columns: return "No sessions with tags found to filter." - + # Get list of source folders for sessions that have any of the selected tags matching_folders = [] for _, row in df.iterrows(): - if not len(row.get('tags')): + if not len(row.get("tags")): continue - if any(tag in list(row.get('tags', [])) for tag in filter_tags): - matching_folders.append(row['source_folder']) - + if any(tag in list(row.get("tags", [])) for tag in filter_tags): + matching_folders.append(row["source_folder"]) + if not matching_folders: return "No sessions matched the selected tag filters." - + # Load only the matching datasets filtered_datasets = [] for folder in matching_folders: @@ -269,10 +357,10 @@ def upload_to_huggingface(dataset_name, visibility, filter_tags=None): filtered_datasets.append(ds) except Exception as e: print(f"Error loading dataset from {folder}: {str(e)}") - + if not len(filtered_datasets): return "Error loading the filtered sessions." - + # Create a new combined dataset with just the filtered sessions upload_ds = concatenate_datasets(filtered_datasets) session_count = len(upload_ds) @@ -280,57 +368,52 @@ def upload_to_huggingface(dataset_name, visibility, filter_tags=None): # Use all sessions upload_ds = combined_ds session_count = len(upload_ds) - - tags = ['cua'] + + tags = ["cua"] if isinstance(filter_tags, list): tags += filter_tags - + # Push to HuggingFace upload_ds.push_to_hub( dataset_name, private=visibility == "private", token=hf_token, - commit_message="(Built with github.com/trycua/cua)" + commit_message="(Built with github.com/trycua/cua)", ) - + # Create dataset card card_data = DatasetCardData( - language='en', - license='mit', - task_categories=['visual-question-answering'], - tags=tags + language="en", license="mit", task_categories=["visual-question-answering"], tags=tags ) card = DatasetCard.from_template( card_data=card_data, - template_str="---\n{{ card_data }}\n---\n\n# Uploaded computer interface trajectories\n\nThese trajectories were generated and uploaded using [cua](https://github.com/trycua/cua)" + template_str="---\n{{ card_data }}\n---\n\n# Uploaded computer interface trajectories\n\nThese trajectories were generated and uploaded using [cua](https://github.com/trycua/cua)", ) - card.push_to_hub( - dataset_name, - commit_message="Cua dataset card" - ) - + card.push_to_hub(dataset_name, commit_message="Cua dataset card") + return f"Successfully uploaded {session_count} sessions to HuggingFace Datasets Hub at https://huggingface.co/datasets/{dataset_name}" - + except Exception as e: return f"Error uploading to HuggingFace: {str(e)}" + def save_demonstration(log_data, demo_name=None, demo_tags=None): """Save the current tool call logs as a demonstration file using HuggingFace datasets""" global tool_call_logs, session_id - + if not tool_call_logs: return "No data to save", None - + # Create output directories if they don't exist if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) if not os.path.exists(SESSION_DIR): os.makedirs(SESSION_DIR) - + # Use default name if none provided if not demo_name or demo_name.strip() == "": demo_name = generate_random_demo_name() - + # Process tags tags = [] if demo_tags: @@ -338,66 +421,69 @@ def save_demonstration(log_data, demo_name=None, demo_tags=None): tags = demo_tags elif isinstance(demo_tags, str): # Split by comma if it's a comma-separated string - tags = [tag.strip() for tag in demo_tags.split(',') if tag.strip()] - + tags = [tag.strip() for tag in demo_tags.split(",") if tag.strip()] + log_time = datetime.now().isoformat() - + def msg_to_dict(msg: ChatMessage): - return { - "role": msg.role, - "content": str(msg.content), - "metadata": dict(msg.metadata) - } - + return {"role": msg.role, "content": str(msg.content), "metadata": dict(msg.metadata)} + # Create dataset - demonstration_dataset = [{ - "timestamp": str(log_time), - "session_id": str(session_id), - "name": str(demo_name), - "tool_calls": json.dumps(tool_call_logs), - "messages": json.dumps([msg_to_dict(msg) for msg in get_chatbot_messages(tool_call_logs)]), - "tags": list(tags), - "images": [Image.open(io.BytesIO(img)) for img in screenshot_images], - }] - + demonstration_dataset = [ + { + "timestamp": str(log_time), + "session_id": str(session_id), + "name": str(demo_name), + "tool_calls": json.dumps(tool_call_logs), + "messages": json.dumps( + [msg_to_dict(msg) for msg in get_chatbot_messages(tool_call_logs)] + ), + "tags": list(tags), + "images": [Image.open(io.BytesIO(img)) for img in screenshot_images], + } + ] + try: # Create a new HuggingFace dataset from the current session new_session_ds = Dataset.from_list( demonstration_dataset, - features=Features({ - 'timestamp': datasets.Value('string'), - 'session_id': datasets.Value('string'), - 'name': datasets.Value('string'), - 'tool_calls': datasets.Value('string'), - 'messages': datasets.Value('string'), - 'tags': Sequence(datasets.Value('string')), - 'images': Sequence(datasets.Image()), - }) + features=Features( + { + "timestamp": datasets.Value("string"), + "session_id": datasets.Value("string"), + "name": datasets.Value("string"), + "tool_calls": datasets.Value("string"), + "messages": datasets.Value("string"), + "tags": Sequence(datasets.Value("string")), + "images": Sequence(datasets.Image()), + } + ), ) - + # Create a unique folder name with demonstration name, session ID and timestamp timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") safe_name = demo_name.replace(" ", "_").replace("/", "_").replace("\\", "_")[:50] session_folder = os.path.join(SESSION_DIR, f"{safe_name}_{session_id}_{timestamp}") - + # Create the directory if it doesn't exist if not os.path.exists(session_folder): os.makedirs(session_folder) - + # Save the dataset to the unique folder new_session_ds.save_to_disk(session_folder) - + return f"Session saved to {session_folder}" except Exception as e: return f"Error saving demonstration: {str(e)}" + def log_tool_call(name, action, arguments, result=None): """Log a tool call with unique IDs and results""" global tool_call_logs - + # Create arguments JSON that includes the action args = {"action": action, **arguments} - + # Process result for logging processed_result = {} if result: @@ -417,37 +503,38 @@ def log_tool_call(name, action, arguments, result=None): processed_result[key] = f"" else: processed_result[key] = value - + # Create the tool call log entry log_entry = { "type": "function_call", "name": name, "arguments": json.dumps(args), - "result": processed_result if result else None + "result": processed_result if result else None, } - + # Add to logs and immediately flush by printing tool_call_logs.append(log_entry) print(f"Tool call logged: {json.dumps(log_entry)}") - + return log_entry + async def execute(name, action, arguments): """Execute a tool call, log it, and return any results""" global computer, last_action, last_screenshot, last_screenshot_before - + last_screenshot_before = last_screenshot - + # Store last action for reasoning box last_action = {"name": name, "action": action, "arguments": arguments} - + results = {} - + # Execute the action based on name and action if name == "computer": if computer is None: return {} - + # Get the method from the computer interface if action == "initialize": # Already initialized, just log @@ -505,7 +592,7 @@ async def execute(name, action, arguments): elif action == "done" or action == "fail": # Just a marker, doesn't do anything pass - + # Add a screenshot to the results for every action (if not already there) if action != "shutdown" and "screenshot" not in results: results["screenshot"] = await computer.interface.screenshot() @@ -515,22 +602,25 @@ async def execute(name, action, arguments): # If requested, take a screenshot after message if arguments.get("screenshot_after", False) and computer is not None: results["screenshot"] = await computer.interface.screenshot() - + # Log the tool call with results log_tool_call(name, action, arguments, results) - + if "screenshot" in results: # Convert bytes to PIL Image screenshot_img = Image.open(io.BytesIO(results["screenshot"])) results["screenshot"] = screenshot_img # Update last_screenshot with the new screenshot last_screenshot = screenshot_img - + return results -async def handle_init_computer(os_choice: str, app_list=None, provider="lume", container_name=None, api_key=None): + +async def handle_init_computer( + os_choice: str, app_list=None, provider="lume", container_name=None, api_key=None +): """Initialize the computer instance and tools for macOS or Ubuntu or Windows - + Args: os_choice: The OS to use ("macOS" or "Ubuntu" or "Windows") app_list: Optional list of apps to focus on using the app-use experiment @@ -539,14 +629,14 @@ async def handle_init_computer(os_choice: str, app_list=None, provider="lume", c api_key: The API key to use for cloud provider """ global computer, tool_call_logs, tools - + # Check if we should enable app-use experiment use_app_experiment = app_list and len(app_list) > 0 experiments = ["app-use"] if use_app_experiment else None - + # Determine if we should use host computer server use_host_computer_server = provider == "self" - + if os_choice == "Ubuntu": os_type_str = "linux" image_str = "ubuntu-noble-vanilla:latest" @@ -556,13 +646,11 @@ async def handle_init_computer(os_choice: str, app_list=None, provider="lume", c else: os_type_str = "macos" image_str = "macos-sequoia-cua:latest" - + # Create computer instance with appropriate configuration if use_host_computer_server: computer = Computer( - os_type=os_type_str, - use_host_computer_server=True, - experiments=experiments + os_type=os_type_str, use_host_computer_server=True, experiments=experiments ) elif provider == "cloud": # Use API key from environment variable or field input @@ -572,13 +660,11 @@ async def handle_init_computer(os_choice: str, app_list=None, provider="lume", c provider_type=VMProviderType.CLOUD, name=container_name, api_key=cloud_api_key, - experiments=experiments + experiments=experiments, ) elif provider == "winsandbox": computer = Computer( - os_type="windows", - provider_type=VMProviderType.WINSANDBOX, - experiments=experiments + os_type="windows", provider_type=VMProviderType.WINSANDBOX, experiments=experiments ) else: computer = Computer( @@ -588,169 +674,174 @@ async def handle_init_computer(os_choice: str, app_list=None, provider="lume", c display="1024x768", memory="8GB", cpu="4", - experiments=experiments + experiments=experiments, ) await computer.run() - + # If app list is provided, create desktop from apps if use_app_experiment: computer = computer.create_desktop_from_apps(app_list) # Log computer initialization as a tool call - init_params = { - "os": os_type_str, - "provider": provider - } - + init_params = {"os": os_type_str, "provider": provider} + # Add VM-specific parameters if not using host computer server if not use_host_computer_server: - init_params.update({ - "image": image_str, - "display": "1024x768", - "memory": "8GB", - "cpu": "4" - }) - + init_params.update({"image": image_str, "display": "1024x768", "memory": "8GB", "cpu": "4"}) + # Add app list to the log if provided if use_app_experiment: init_params["apps"] = app_list init_params["experiments"] = ["app-use"] - + # Add container name to the log if using cloud provider if provider == "cloud": init_params["container_name"] = container_name - + result = await execute("computer", "initialize", init_params) return result["screenshot"], json.dumps(tool_call_logs, indent=2) + async def handle_screenshot(): """Take a screenshot and return it as a PIL Image""" global computer if computer is None: return None - + result = await execute("computer", "screenshot", {}) return result["screenshot"] + async def handle_wait(): """Wait for 1 second and then take a screenshot""" global computer if computer is None: return None - + # Execute wait action result = await execute("computer", "wait", {}) return result["screenshot"], json.dumps(tool_call_logs, indent=2) + async def handle_click(evt: gr.SelectData, img, click_type): """Handle click events on the image based on click type""" global computer if computer is None: return img, json.dumps(tool_call_logs, indent=2) - + # Get the coordinates of the click x, y = evt.index - + # Move cursor and perform click result = await execute("computer", click_type, {"x": x, "y": y}) - + # Take a new screenshot to show the result return result["screenshot"], json.dumps(tool_call_logs, indent=2) + async def handle_type(text, press_enter=False): """Type text into the computer""" global computer if computer is None or not text: return await handle_screenshot(), json.dumps(tool_call_logs, indent=2) - + result = await execute("computer", "type_text", {"text": text, "press_enter": press_enter}) - + return result["screenshot"], json.dumps(tool_call_logs, indent=2) + async def handle_copy(): """Copy selected content to clipboard and return it""" global computer if computer is None: return "Computer not initialized", json.dumps(tool_call_logs, indent=2) - + result = await execute("computer", "copy_to_clipboard", {}) content = result.get("clipboard", "No content copied") - + return content, json.dumps(tool_call_logs, indent=2) + async def handle_set_clipboard(text): """Set clipboard content""" global computer if computer is None: return "Computer not initialized", json.dumps(tool_call_logs, indent=2) - + await execute("computer", "set_clipboard", {"text": text}) - + return f"Clipboard set to: {text}", json.dumps(tool_call_logs, indent=2) + async def handle_run_command(command): """Run a shell command""" global computer if computer is None: return "Computer not initialized", json.dumps(tool_call_logs, indent=2) - + # Execute the run_command action and log it result = await execute("computer", "run_command", {"command": command}) - + # Get the result from the computer interface stdout, stderr = result.get("stdout"), result.get("stderr") - + # Format the output output = "" if stdout: output += f"STDOUT:\n{stdout}\n" if stderr: output += f"STDERR:\n{stderr}\n" - + if not output: output = "(No output)" - + return output, json.dumps(tool_call_logs, indent=2) + async def handle_shutdown(): """Shutdown the computer instance""" global computer if computer is None: return "Computer not initialized", json.dumps(tool_call_logs, indent=2) - + await execute("computer", "shutdown", {}) - + computer = None return "Computer shut down", json.dumps(tool_call_logs, indent=2) + async def handle_memory(memory_text): """Update the global memory""" global memory - await execute("memory", "update", { "memory_text": memory_text }) + await execute("memory", "update", {"memory_text": memory_text}) memory = memory_text return "Memory updated" + async def update_reasoning(reasoning_text, is_erroneous=False): """Update the reasoning for the last action""" global last_action, tool_call_logs - + if not last_action["name"]: return "No action to update reasoning for" - + # Find the last log entry that matches the last action for log_entry in reversed(tool_call_logs): - if (log_entry["name"] == last_action["name"] and - json.loads(log_entry["arguments"]).get("action") == last_action["action"]): + if ( + log_entry["name"] == last_action["name"] + and json.loads(log_entry["arguments"]).get("action") == last_action["action"] + ): # Add reasoning to the log entry log_entry["reasoning"] = reasoning_text # If marked as erroneous, set weight to 0 log_entry["weight"] = 0 if is_erroneous else 1 break - + return "Reasoning updated" + async def clear_log(): """Clear the tool call logs""" global tool_call_logs, screenshot_images @@ -758,67 +849,71 @@ async def clear_log(): tool_call_logs = [] return json.dumps(tool_call_logs, indent=2) + def get_last_action_display(): """Format the last action for display in the reasoning box""" global last_action if not last_action["name"]: return "No actions performed yet" - + action_str = f"Tool: {last_action['name']}\nAction: {last_action['action']}" - + if last_action["arguments"]: args_str = "\nArguments:\n" for k, v in last_action["arguments"].items(): args_str += f" {k}: {v}\n" action_str += args_str - + return action_str + def get_memory(): """Get the current memory""" global memory return memory + def get_chatbot_messages(logs=None): """Format chat messages for gr.Chatbot component - + Args: logs: Optional list of tool call logs. If None, uses global tool_call_logs. - + Returns: List of ChatMessage objects """ formatted_messages = [] - + # Use provided logs if specified, otherwise use global tool_call_logs logs_to_process = logs if logs is not None else tool_call_logs - + for tool_call in logs_to_process: - if tool_call['type'] != "function_call": + if tool_call["type"] != "function_call": continue - - name = tool_call['name'] - arguments = json.loads(tool_call['arguments']) - - role = tool_call['role'] if 'role' in tool_call else arguments['role'] if 'role' in arguments else 'assistant' - + + name = tool_call["name"] + arguments = json.loads(tool_call["arguments"]) + + role = ( + tool_call["role"] + if "role" in tool_call + else arguments["role"] if "role" in arguments else "assistant" + ) + if "reasoning" in tool_call: - formatted_messages += [ChatMessage( - role=role, - content=tool_call['reasoning'], - metadata={"title": "🧠 Reasoning"} - )] - + formatted_messages += [ + ChatMessage( + role=role, content=tool_call["reasoning"], metadata={"title": "🧠 Reasoning"} + ) + ] + # Format tool calls with titles if name == "message": - formatted_messages += [ChatMessage( - role=role, - content=arguments['text'] - )] + formatted_messages += [ChatMessage(role=role, content=arguments["text"])] else: # Format tool calls with a title - action = arguments.get('action', '') - + action = arguments.get("action", "") + # Define dictionary for title mappings title_mappings = { "wait": "⏳ Waiting...", @@ -837,9 +932,9 @@ def get_chatbot_messages(logs=None): "set_clipboard": "📋 Setting Clipboard", "run_command": "🖥️ Running Shell Command", "initialize": "🚀 Initializing Computer", - "shutdown": "🛑 Shutting Down" + "shutdown": "🛑 Shutting Down", } - + # Look up title based on name.action or just action key = f"{name}.{action}" if key in title_mappings: @@ -848,84 +943,92 @@ def get_chatbot_messages(logs=None): title = title_mappings[action] else: title = f"🛠️ {name.capitalize()}: {action}" - + # Always set status to done status = "done" - + # Format the response content content_parts = [] - + # Add arguments if arguments: content_parts.append("**Arguments:**") for k, v in arguments.items(): if k != "action": # Skip action as it's in the title content_parts.append(f"- {k}: {v}") - + # Add results if available - if tool_call.get('result'): + if tool_call.get("result"): content_parts.append("\n**Results:**") content_parts.append(f"```json\n{json.dumps(tool_call['result'], indent=4)}\n```") # for k, v in tool_call['result'].items(): # content_parts.append(f"- {k}: {v}") - + # Join all content parts content = "\n".join(content_parts) - - formatted_messages += [ChatMessage( - role="assistant", - content=content, - metadata={"title": title, "status": status} - )] - + + formatted_messages += [ + ChatMessage( + role="assistant", content=content, metadata={"title": title, "status": status} + ) + ] + return formatted_messages + async def submit_message(message_text, role, screenshot_after=False): """Submit a message with specified role (user or assistant)""" global last_screenshot - + # Log the message submission and get result (may include screenshot) - result = await execute("message", "submit", { - "role": role, - "text": message_text, - "screenshot_after": screenshot_after - }) - + result = await execute( + "message", + "submit", + {"role": role, "text": message_text, "screenshot_after": screenshot_after}, + ) + # Update return values based on whether a screenshot was taken if screenshot_after and "screenshot" in result: - return f"Message submitted as {role} with screenshot", get_chatbot_messages(), json.dumps(tool_call_logs, indent=2), result["screenshot"] + return ( + f"Message submitted as {role} with screenshot", + get_chatbot_messages(), + json.dumps(tool_call_logs, indent=2), + result["screenshot"], + ) else: # Return last screenshot if available - return f"Message submitted as {role}", get_chatbot_messages(), json.dumps(tool_call_logs, indent=2), last_screenshot + return ( + f"Message submitted as {role}", + get_chatbot_messages(), + json.dumps(tool_call_logs, indent=2), + last_screenshot, + ) + def create_gradio_ui(): with gr.Blocks() as app: gr.Markdown("# Computer Interface Tool") - + with gr.Row(): with gr.Column(scale=3): with gr.Group(): # Main screenshot display img = gr.Image( - type="pil", - label="Current Screenshot", - show_label=False, - interactive=False + type="pil", label="Current Screenshot", show_label=False, interactive=False ) - + # Click type selection click_type = gr.Radio( - ["left_click", "right_click", "double_click", "move_cursor"], + ["left_click", "right_click", "double_click", "move_cursor"], label="Click Type", - value="left_click" + value="left_click", ) - + with gr.Row(): wait_btn = gr.Button("WAIT") done_btn = gr.Button("DONE") fail_btn = gr.Button("FAIL") - - + # Tabbed logs: Tool logs, Conversational logs, and Demonstrations with gr.Tabs() as logs_tabs: with gr.TabItem("Conversational Logs"): @@ -936,14 +1039,11 @@ def create_gradio_ui(): height=400, type="messages", sanitize_html=True, - allow_tags=True + allow_tags=True, ) with gr.TabItem("Function Logs"): with gr.Group(): - action_log = gr.JSON( - label="Function Logs", - every=0.2 - ) + action_log = gr.JSON(label="Function Logs", every=0.2) clear_log_btn = gr.Button("Clear Log") with gr.TabItem("Save/Share Demonstrations"): with gr.Row(): @@ -952,11 +1052,11 @@ def create_gradio_ui(): dataset_viewer = gr.DataFrame( label="All Sessions", value=get_sessions_data, - show_search='filter', + show_search="filter", max_height=300, - interactive=True # Make it interactive for selection + interactive=True, # Make it interactive for selection ) - + # HuggingFace Upload UI with gr.Group(visible=True): gr.Markdown("Upload Sessions to HuggingFace") @@ -964,28 +1064,30 @@ def create_gradio_ui(): hf_dataset_name = gr.Textbox( label="HuggingFace Dataset Name", placeholder="username/dataset-name", - info="Format: username/dataset-name" + info="Format: username/dataset-name", ) hf_visibility = gr.Radio( choices=["public", "private"], label="Dataset Visibility", - value="private" + value="private", ) - + # Tag filtering with a single multi-select dropdown filter_tags = gr.Dropdown( label="Filter by tags (optional)", choices=get_existing_tags()[0], multiselect=True, allow_custom_value=True, - info="When tags are selected, only demonstrations with those tags will be uploaded. Leave empty to upload all sessions." + info="When tags are selected, only demonstrations with those tags will be uploaded. Leave empty to upload all sessions.", ) - + # Function to update button text based on selected tags def get_upload_button_text(selected_tags=None): if not selected_tags: # Count all sessions - session_folders = glob.glob(os.path.join(SESSION_DIR, "*")) + session_folders = glob.glob( + os.path.join(SESSION_DIR, "*") + ) count = len(session_folders) if session_folders else 0 return f"Upload {count} Sessions to HuggingFace" else: @@ -993,98 +1095,96 @@ def create_gradio_ui(): all_sessions = load_all_sessions() if all_sessions is None: return "Upload 0 Sessions to HuggingFace" - + df = all_sessions.to_pandas() - if 'tags' not in df.columns: + if "tags" not in df.columns: return "Upload 0 Sessions to HuggingFace" - + # Filter by selected tags (sessions that have ANY of the selected tags) matching_count = 0 for _, row in df.iterrows(): - tags = row.get('tags', []) + tags = row.get("tags", []) if not len(tags): continue # Check if any of the selected tags are in this session's tags - if any(tag in list(row['tags']) for tag in selected_tags): + if any( + tag in list(row["tags"]) + for tag in selected_tags + ): matching_count += 1 - - return f"Upload {matching_count} Sessions to HuggingFace" - + + return ( + f"Upload {matching_count} Sessions to HuggingFace" + ) + # Initial button text with all sessions hf_upload_btn = gr.Button(get_upload_button_text()) - + # Update button text when filter changes def update_button_text(selected_tags): return get_upload_button_text(selected_tags) - + # Connect filter changes to update button text filter_tags.change( update_button_text, inputs=filter_tags, - outputs=hf_upload_btn + outputs=hf_upload_btn, ) - + hf_upload_status = gr.Textbox(label="Upload Status", value="") with gr.Column(scale=1): # Demo name with random name button with gr.Group(): demo_name = gr.Textbox( - label="Demonstration Name", + label="Demonstration Name", value=generate_random_demo_name(), - placeholder="Enter a name for this demonstration" + placeholder="Enter a name for this demonstration", ) random_name_btn = gr.Button("🎲", scale=1) - + # Demo tags dropdown demo_tags = gr.Dropdown( label="Demonstration Tags", choices=get_existing_tags()[0], multiselect=True, allow_custom_value=True, - info="Select existing tags or create new ones" + info="Select existing tags or create new ones", ) - + save_btn = gr.Button("Save Current Session") save_status = gr.Textbox(label="Save Status", value="") - + # Function to update the demo name with a new random name def update_random_name(): return generate_random_demo_name() - + # Connect random name button - random_name_btn.click( - update_random_name, - outputs=[demo_name] - ) - + random_name_btn.click(update_random_name, outputs=[demo_name]) + with gr.Column(scale=1): with gr.Accordion("Memory / Scratchpad", open=False): with gr.Group(): memory_display = gr.Textbox( - label="Current Memory", - value=get_memory(), - lines=5 + label="Current Memory", value=get_memory(), lines=5 ) with gr.Row(): memory_submit_btn = gr.Button("Submit Memory") memory_refine_btn = gr.Button("Refine") memory_status = gr.Textbox(label="Status", value="") - + with gr.Accordion("Tasks", open=True): # Add current task display and controls with gr.Group(): current_task = gr.Textbox( - label="Current Task", - value=TASK_EXAMPLES[0]["task"], - interactive=True + label="Current Task", value=TASK_EXAMPLES[0]["task"], interactive=True ) with gr.Row(): randomize_task_btn = gr.Button("🎲 Randomize Task") run_setup_btn = gr.Button("⚙️ Run Task Setup") # Setup status textbox setup_status = gr.Textbox(label="Setup Status", value="") - + with gr.Group(): with gr.Accordion("Computer Configuration", open=False): with gr.Row(): @@ -1093,68 +1193,68 @@ def create_gradio_ui(): choices=["macOS", "Ubuntu", "Windows"], value="macOS", ) - + # Provider selection radio provider_choice = gr.Radio( label="Provider", choices=["lume", "self", "cloud", "winsandbox"], value="lume", - info="'lume' uses a VM, 'self' uses the host computer server, 'cloud' uses a cloud container" + info="'lume' uses a VM, 'self' uses the host computer server, 'cloud' uses a cloud container", ) - + # Container name field for cloud provider (initially hidden) container_name = gr.Textbox( label="Container Name", placeholder="Enter your container name", visible=False, - info="Get your container from [trycua.com](https://trycua.com/)" + info="Get your container from [trycua.com](https://trycua.com/)", ) - + # Check if CUA_API_KEY is set in environment has_cua_key = os.environ.get("CUA_API_KEY") is not None - + # API key field for cloud provider (visible only if no env key and cloud selected) api_key_field = gr.Textbox( label="CUA API Key", placeholder="Enter your CUA API key", type="password", visible=False, - info="Required for cloud provider. Set CUA_API_KEY environment variable to hide this field." + info="Required for cloud provider. Set CUA_API_KEY environment variable to hide this field.", ) - + # App filtering dropdown for app-use experiment app_filter = gr.Dropdown( label="Filter by apps (App-Use)", multiselect=True, allow_custom_value=True, - info="When apps are selected, the computer will focus on those apps using the app-use experiment" + info="When apps are selected, the computer will focus on those apps using the app-use experiment", ) - + # Function to show/hide container name and API key fields based on provider selection def update_cloud_fields_visibility(provider): show_container = provider == "cloud" show_api_key = provider == "cloud" and not has_cua_key return ( gr.update(visible=show_container), - gr.update(visible=show_api_key) + gr.update(visible=show_api_key), ) - + # Connect provider choice to field visibility provider_choice.change( update_cloud_fields_visibility, inputs=provider_choice, - outputs=[container_name, api_key_field] + outputs=[container_name, api_key_field], ) - + start_btn = gr.Button("Initialize Computer") - + with gr.Group(): input_text = gr.Textbox(label="Type Text") with gr.Row(): press_enter_checkbox = gr.Checkbox(label="Press Enter", value=False) submit_text_btn = gr.Button("Submit Text") text_refine_btn = gr.Button("Refine") - + with gr.Group(): hotkey_keys = gr.Dropdown( choices=VALID_KEYS, @@ -1162,112 +1262,102 @@ def create_gradio_ui(): multiselect=True, show_label=False, allow_custom_value=True, - info="Select one or more keys to send as a hotkey" + info="Select one or more keys to send as a hotkey", ) hotkey_btn = gr.Button("Send Hotkey(s)") - + with gr.Accordion("Scrolling", open=False): with gr.Group(): - scroll_clicks = gr.Number(label="Number of Clicks", value=1, minimum=1, step=1) + scroll_clicks = gr.Number( + label="Number of Clicks", value=1, minimum=1, step=1 + ) with gr.Row(): scroll_up_btn = gr.Button("Scroll Up") scroll_down_btn = gr.Button("Scroll Down") - + with gr.Accordion("Reasoning for Last Action", open=False): with gr.Group(): last_action_display = gr.Textbox( - label="Last Action", - value=get_last_action_display(), - interactive=False + label="Last Action", value=get_last_action_display(), interactive=False ) reasoning_text = gr.Textbox( label="What was your thought process behind this action?", placeholder="Enter your reasoning here...", - lines=3 + lines=3, ) erroneous_checkbox = gr.Checkbox( - label="Mark this action as erroneous (sets weight to 0)", - value=False + label="Mark this action as erroneous (sets weight to 0)", value=False ) reasoning_submit_btn = gr.Button("Submit Reasoning") reasoning_refine_btn = gr.Button("Refine") reasoning_status = gr.Textbox(label="Status", value="") - + with gr.Accordion("Conversation Messages", open=False): message_role = gr.Radio( - ["user", "assistant"], - label="Message Role", - value="user" + ["user", "assistant"], label="Message Role", value="user" ) message_text = gr.Textbox( - label="Message Content", - placeholder="Enter message here...", - lines=3 + label="Message Content", placeholder="Enter message here...", lines=3 ) screenshot_after_msg = gr.Checkbox( - label="Receive screenshot after message", - value=False + label="Receive screenshot after message", value=False ) message_submit_btn = gr.Button("Submit Message") message_status = gr.Textbox(label="Status") - + with gr.Accordion("Clipboard Operations", open=False): clipboard_content = gr.Textbox(label="Clipboard Content") get_clipboard_btn = gr.Button("Get Clipboard Content") set_clipboard_text = gr.Textbox(label="Set Clipboard Text") set_clipboard_btn = gr.Button("Set Clipboard") clipboard_status = gr.Textbox(label="Status") - + with gr.Accordion("Run Shell Commands", open=False): command_input = gr.Textbox(label="Command to run", placeholder="ls -la") run_command_btn = gr.Button("Run Command") command_output = gr.Textbox(label="Command Output", lines=5) - + shutdown_btn = gr.Button("Shutdown Computer") # Handle save button save_btn.click( - save_demonstration, - inputs=[action_log, demo_name, demo_tags], - outputs=[save_status] + save_demonstration, inputs=[action_log, demo_name, demo_tags], outputs=[save_status] ) - + # Function to refresh the dataset viewer def refresh_dataset_viewer(): return get_sessions_data() - + # Also update the dataset viewer when saving - save_btn.click( - refresh_dataset_viewer, - outputs=dataset_viewer - ) - + save_btn.click(refresh_dataset_viewer, outputs=dataset_viewer) + # Also update the tags dropdown when saving - save_btn.click( - get_existing_tags, - outputs=[demo_tags, filter_tags] - ) - + save_btn.click(get_existing_tags, outputs=[demo_tags, filter_tags]) + # Handle HuggingFace upload button hf_upload_btn.click( upload_to_huggingface, inputs=[hf_dataset_name, hf_visibility, filter_tags], - outputs=[hf_upload_status] + outputs=[hf_upload_status], ) # Function to randomize task def randomize_task(): task_dict = random.choice(TASK_EXAMPLES) return task_dict["task"] - + # Function to run task setup async def run_task_setup(task_text): global computer - + # Check if computer is initialized if computer is None: - return "Computer not initialized. Please initialize the computer first.", img, action_log - + return ( + "Computer not initialized. Please initialize the computer first.", + img, + action_log, + ) + # Find the task dict that matches the current task text for task_dict in TASK_EXAMPLES: if task_dict["task"] == task_text: @@ -1276,60 +1366,54 @@ def create_gradio_ui(): setup_func = task_dict["setup"] if setup_func: await setup_func(computer) - + # Send initial user message _, _, logs_json, screenshot = await submit_message( - task_text, - "user", - screenshot_after=True + task_text, "user", screenshot_after=True ) - + return f"Setup complete for: {task_text}", screenshot, logs_json except Exception as e: return f"Error during setup: {str(e)}", img, action_log - + return "Task not found in examples", img, action_log - + # Connect the randomize button to the function - randomize_task_btn.click( - randomize_task, - outputs=[current_task] - ) - + randomize_task_btn.click(randomize_task, outputs=[current_task]) + # Connect the setup button run_setup_btn.click( - run_task_setup, - inputs=[current_task], - outputs=[setup_status, img, action_log] + run_task_setup, inputs=[current_task], outputs=[setup_status, img, action_log] ) - + # Event handlers - action_log.change( - get_chatbot_messages, - outputs=[chat_log] - ) - + action_log.change(get_chatbot_messages, outputs=[chat_log]) + img.select(handle_click, inputs=[img, click_type], outputs=[img, action_log]) - start_btn.click(handle_init_computer, inputs=[os_choice, app_filter, provider_choice, container_name, api_key_field], outputs=[img, action_log]) + start_btn.click( + handle_init_computer, + inputs=[os_choice, app_filter, provider_choice, container_name, api_key_field], + outputs=[img, action_log], + ) wait_btn.click(handle_wait, outputs=[img, action_log]) - + # DONE and FAIL buttons just do a placeholder action async def handle_done(): output = await execute("computer", "done", {}) return output["screenshot"], json.dumps(tool_call_logs, indent=2) - + async def handle_fail(): output = await execute("computer", "fail", {}) return output["screenshot"], json.dumps(tool_call_logs, indent=2) - + done_btn.click(handle_done, outputs=[img, action_log]) fail_btn.click(handle_fail, outputs=[img, action_log]) - + # Handle hotkey button async def handle_hotkey(selected_keys): if not selected_keys or len(selected_keys) == 0: return await handle_screenshot(), json.dumps(tool_call_logs, indent=2) - + # When multiple keys are selected, the last one is the main key, the rest are modifiers if len(selected_keys) > 1: key = selected_keys[-1] @@ -1338,19 +1422,19 @@ def create_gradio_ui(): # If only one key is selected, no modifiers key = selected_keys[0] modifiers = [] - + output = await execute("computer", "send_hotkey", {"keys": selected_keys}) return output["screenshot"], json.dumps(tool_call_logs, indent=2) - + hotkey_btn.click(handle_hotkey, inputs=[hotkey_keys], outputs=[img, action_log]) - + # Define async handler for scrolling async def handle_scroll(direction, num_clicks=1): """Scroll the page up or down""" global computer if computer is None: return None, json.dumps(tool_call_logs, indent=2) - + # Convert num_clicks to integer with validation try: num_clicks = int(num_clicks) @@ -1358,34 +1442,35 @@ def create_gradio_ui(): num_clicks = 1 except (ValueError, TypeError): num_clicks = 1 - + # Execute the scroll action action = "scroll_up" if direction == "up" else "scroll_down" result = await execute("computer", action, {"clicks": num_clicks}) - + return result["screenshot"], json.dumps(tool_call_logs, indent=2) - + # Connect scroll buttons scroll_up_btn.click( - handle_scroll, - inputs=[gr.State("up"), scroll_clicks], - outputs=[img, action_log] + handle_scroll, inputs=[gr.State("up"), scroll_clicks], outputs=[img, action_log] ) scroll_down_btn.click( - handle_scroll, - inputs=[gr.State("down"), scroll_clicks], - outputs=[img, action_log] + handle_scroll, inputs=[gr.State("down"), scroll_clicks], outputs=[img, action_log] + ) + + submit_text_btn.click( + handle_type, inputs=[input_text, press_enter_checkbox], outputs=[img, action_log] ) - - submit_text_btn.click(handle_type, inputs=[input_text, press_enter_checkbox], outputs=[img, action_log]) get_clipboard_btn.click(handle_copy, outputs=[clipboard_content, action_log]) - set_clipboard_btn.click(handle_set_clipboard, inputs=set_clipboard_text, outputs=[clipboard_status, action_log]) - run_command_btn.click(handle_run_command, inputs=command_input, outputs=[command_output, action_log]) + set_clipboard_btn.click( + handle_set_clipboard, inputs=set_clipboard_text, outputs=[clipboard_status, action_log] + ) + run_command_btn.click( + handle_run_command, inputs=command_input, outputs=[command_output, action_log] + ) shutdown_btn.click(handle_shutdown, outputs=[clipboard_status, action_log]) clear_log_btn.click(clear_log, outputs=action_log) chat_log.clear(clear_log, outputs=action_log) - # Update last action display after each action img.select(lambda *args: get_last_action_display(), outputs=last_action_display) start_btn.click(lambda: get_last_action_display(), outputs=last_action_display) @@ -1395,31 +1480,33 @@ def create_gradio_ui(): hotkey_btn.click(lambda: get_last_action_display(), outputs=last_action_display) submit_text_btn.click(lambda: get_last_action_display(), outputs=last_action_display) message_submit_btn.click(lambda: get_last_action_display(), outputs=last_action_display) - + # Handle reasoning submission async def handle_reasoning_update(reasoning, is_erroneous): status = await update_reasoning(reasoning, is_erroneous) return status, json.dumps(tool_call_logs, indent=2) - + reasoning_submit_btn.click( handle_reasoning_update, - inputs=[reasoning_text, erroneous_checkbox], - outputs=[reasoning_status, action_log] + inputs=[reasoning_text, erroneous_checkbox], + outputs=[reasoning_status, action_log], ) - + # Helper function for text refinement - used for all refine buttons - async def handle_text_refinement(text_content, content_type="reasoning", task_text="", use_before = False): + async def handle_text_refinement( + text_content, content_type="reasoning", task_text="", use_before=False + ): global last_screenshot, last_action, tool_call_logs, last_screenshot_before - + screenshot = last_screenshot_before if use_before else last_screenshot - + # Check if we have the necessary components if not text_content.strip(): return f"No {content_type} text to refine", text_content - + if screenshot is None: return "No screenshot available for refinement", text_content - + try: # Convert the PIL image to base64 if available screenshot_base64 = None @@ -1427,39 +1514,45 @@ def create_gradio_ui(): with io.BytesIO() as buffer: screenshot.save(buffer, format="PNG") screenshot_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8") - + # Set up the OpenAI client for refinement # Try different API keys from environment in order of preference api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OMNI_OPENAI_API_KEY") - + if not api_key: return "OpenAI API key not found in environment", text_content - + from libs.agent.agent.providers.omni.clients.openai import OpenAIClient - + # Create a client - use gpt-4 if available, fall back to 3.5-turbo model = "gpt-4.1-2025-04-14" - + client = OpenAIClient( api_key=api_key, model=model, max_tokens=1024, temperature=0.2, # Low temperature for more focused refinement ) - + # Get the last 3 messages from the chat history - recent_messages = get_chatbot_messages(tool_call_logs)[-3:] if len(get_chatbot_messages(tool_call_logs)) >= 3 else get_chatbot_messages(tool_call_logs) - + recent_messages = ( + get_chatbot_messages(tool_call_logs)[-3:] + if len(get_chatbot_messages(tool_call_logs)) >= 3 + else get_chatbot_messages(tool_call_logs) + ) + # Format message history with titles when available formatted_messages = [] for msg in recent_messages: - if msg.metadata and 'title' in msg.metadata: - formatted_messages.append(f"{msg.role} ({msg.metadata['title']}): {msg.content}") + if msg.metadata and "title" in msg.metadata: + formatted_messages.append( + f"{msg.role} ({msg.metadata['title']}): {msg.content}" + ) else: formatted_messages.append(f"{msg.role}: {msg.content}") - + formatted_messages = [f"{msg}" for msg in formatted_messages] - + # Create different prompts based on content type if content_type == "reasoning": message_prompt = f"""You are helping refine an explanation about why a specific computer UI action is about to be taken. @@ -1482,7 +1575,7 @@ Make this into a concise reasoning / self-reflection trace, using "I should/need Provide ONLY the refined explanation text, with no additional commentary or markdown.""" - + elif content_type == "memory": message_prompt = f"""You are helping refine memory/scratchpad content for an AI assistant. @@ -1503,7 +1596,7 @@ Refine this memory content to be more clear, organized, and useful for the assis - Make the format more readable with bullet points or other organizational elements if helpful Provide ONLY the refined memory text, with no additional commentary or markdown.""" - + elif content_type == "text": message_prompt = f"""You are helping refine text that will be typed into a computer interface. @@ -1524,7 +1617,7 @@ Refine this text to be more effective for the current context: - Optimize the text for the intended use Provide ONLY the refined text, with no additional commentary or markdown.""" - + else: message_prompt = f"""You are helping refine text content. @@ -1539,50 +1632,48 @@ RECENT MESSAGES: Improve this text to be more clear, concise, and effective. Provide ONLY the refined text, with no additional commentary or markdown.""" - + # Create messages with the screenshot messages = [] - + # Add message with image if available if screenshot_base64: - messages.append({ - "role": "user", - "content": [ - { - "type": "text", - "text": message_prompt - }, - { - "type": "image_url", - "image_url": {"url": f"data:image/png;base64,{screenshot_base64}"} - } - ] - }) + messages.append( + { + "role": "user", + "content": [ + {"type": "text", "text": message_prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{screenshot_base64}" + }, + }, + ], + } + ) else: # Fallback if screenshot isn't available - messages.append({ - "role": "user", - "content": message_prompt - }) - + messages.append({"role": "user", "content": message_prompt}) + print(message_prompt) - + # Make the API call response = await client.run_interleaved( messages=messages, system="You are a helpful AI assistant that improves and refines text.", ) - + # Extract the refined text from the response if "choices" in response and len(response["choices"]) > 0: refined_text = response["choices"][0]["message"]["content"] return f"{content_type.capitalize()} refined successfully", refined_text else: return "Error: Unexpected API response format", text_content - + except Exception as e: return f"Error refining {content_type}: {str(e)}", text_content - + # Define async wrapper functions for each refine button async def handle_reasoning_refinement(reasoning, task): return await handle_text_refinement(reasoning, "reasoning", task, use_before=True) @@ -1597,54 +1688,55 @@ Provide ONLY the refined text, with no additional commentary or markdown.""" reasoning_refine_btn.click( handle_reasoning_refinement, inputs=[reasoning_text, current_task], - outputs=[reasoning_status, reasoning_text] + outputs=[reasoning_status, reasoning_text], ) - + # Connect memory refine button memory_refine_btn.click( handle_memory_refinement, inputs=[memory_display, current_task], - outputs=[memory_status, memory_display] + outputs=[memory_status, memory_display], ) - + # Status element for type text section with gr.Group(): type_text_status = gr.Textbox(label="Text Status", value="", visible=False) - + # Connect text refine button text_refine_btn.click( handle_text_input_refinement, inputs=[input_text, current_task], - outputs=[type_text_status, input_text] + outputs=[type_text_status, input_text], ) - + # Handle memory submission async def handle_memory_update(memory_text): status = await handle_memory(memory_text) return status, json.dumps(tool_call_logs, indent=2) - + memory_submit_btn.click( - handle_memory_update, - inputs=memory_display, - outputs=[memory_status, action_log] + handle_memory_update, inputs=memory_display, outputs=[memory_status, action_log] ) - + # Handle message submission async def handle_message_submit(message_content, role, screenshot_after): - status, chat_messages, logs_json, screenshot = await submit_message(message_content, role, screenshot_after) + status, chat_messages, logs_json, screenshot = await submit_message( + message_content, role, screenshot_after + ) if screenshot: return status, chat_messages, logs_json, screenshot else: return status, chat_messages, logs_json, last_screenshot - + message_submit_btn.click( handle_message_submit, - inputs=[message_text, message_role, screenshot_after_msg], - outputs=[message_status, chat_log, action_log, img] + inputs=[message_text, message_role, screenshot_after_msg], + outputs=[message_status, chat_log, action_log, img], ) return app + # Launch the app if __name__ == "__main__": app = create_gradio_ui() diff --git a/libs/python/computer/computer/utils.py b/libs/python/computer/computer/utils.py index 070f8ebc..c2f04c48 100644 --- a/libs/python/computer/computer/utils.py +++ b/libs/python/computer/computer/utils.py @@ -1,40 +1,46 @@ import base64 -from typing import Tuple, Optional, Dict, Any -from PIL import Image, ImageDraw import io +from typing import Any, Dict, Optional, Tuple + +from PIL import Image, ImageDraw + def decode_base64_image(base64_str: str) -> bytes: """Decode a base64 string into image bytes.""" return base64.b64decode(base64_str) + def encode_base64_image(image_bytes: bytes) -> str: """Encode image bytes to base64 string.""" - return base64.b64encode(image_bytes).decode('utf-8') + return base64.b64encode(image_bytes).decode("utf-8") + def bytes_to_image(image_bytes: bytes) -> Image.Image: """Convert bytes to PIL Image. - + Args: image_bytes: Raw image bytes - + Returns: PIL.Image: The converted image """ return Image.open(io.BytesIO(image_bytes)) -def image_to_bytes(image: Image.Image, format: str = 'PNG') -> bytes: + +def image_to_bytes(image: Image.Image, format: str = "PNG") -> bytes: """Convert PIL Image to bytes.""" buf = io.BytesIO() image.save(buf, format=format) return buf.getvalue() + def resize_image(image_bytes: bytes, scale_factor: float) -> bytes: """Resize an image by a scale factor. - + Args: image_bytes: The original image as bytes scale_factor: Factor to scale the image by (e.g., 0.5 for half size, 2.0 for double) - + Returns: bytes: The resized image as bytes """ @@ -44,6 +50,7 @@ def resize_image(image_bytes: bytes, scale_factor: float) -> bytes: image = image.resize(new_size, Image.Resampling.LANCZOS) return image_to_bytes(image) + def draw_box( image_bytes: bytes, x: int, @@ -51,10 +58,10 @@ def draw_box( width: int, height: int, color: str = "#FF0000", - thickness: int = 2 + thickness: int = 2, ) -> bytes: """Draw a box on an image. - + Args: image_bytes: The original image as bytes x: X coordinate of top-left corner @@ -63,39 +70,37 @@ def draw_box( height: Height of the box color: Color of the box in hex format thickness: Thickness of the box border in pixels - + Returns: bytes: The modified image as bytes """ # Convert bytes to PIL Image image = bytes_to_image(image_bytes) - + # Create drawing context draw = ImageDraw.Draw(image) - + # Draw rectangle - draw.rectangle( - [(x, y), (x + width, y + height)], - outline=color, - width=thickness - ) - + draw.rectangle([(x, y), (x + width, y + height)], outline=color, width=thickness) + # Convert back to bytes return image_to_bytes(image) + def get_image_size(image_bytes: bytes) -> Tuple[int, int]: """Get the dimensions of an image. - + Args: image_bytes: The image as bytes - + Returns: Tuple[int, int]: Width and height of the image """ image = bytes_to_image(image_bytes) return image.size + def parse_vm_info(vm_info: Dict[str, Any]) -> Optional[Dict[str, Any]]: """Parse VM info from pylume response.""" if not vm_info: - return None \ No newline at end of file + return None diff --git a/libs/python/computer/pyproject.toml b/libs/python/computer/pyproject.toml index 4a9b41bb..cc5d17a8 100644 --- a/libs/python/computer/pyproject.toml +++ b/libs/python/computer/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-computer" -version = "0.4.0" +version = "0.4.10" description = "Computer-Use Interface (CUI) framework powering Cua" readme = "README.md" authors = [ @@ -18,7 +18,7 @@ dependencies = [ "cua-core>=0.1.0,<0.2.0", "pydantic>=2.11.1" ] -requires-python = ">=3.11" +requires-python = ">=3.12" [project.optional-dependencies] lume = [ @@ -44,29 +44,6 @@ distribution = true includes = ["computer/"] source-includes = ["tests/", "README.md", "LICENSE"] -[tool.black] -line-length = 100 -target-version = ["py311"] - -[tool.ruff] -line-length = 100 -target-version = "py311" -select = ["E", "F", "B", "I"] -fix = true - -[tool.ruff.format] -docstring-code-format = true - -[tool.mypy] -strict = true -python_version = "3.11" -ignore_missing_imports = true -disallow_untyped_defs = true -check_untyped_defs = true -warn_return_any = true -show_error_codes = true -warn_unused_ignores = false - [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] diff --git a/libs/python/core/.bumpversion.cfg b/libs/python/core/.bumpversion.cfg new file mode 100644 index 00000000..917e4cc8 --- /dev/null +++ b/libs/python/core/.bumpversion.cfg @@ -0,0 +1,10 @@ +[bumpversion] +current_version = 0.1.9 +commit = True +tag = True +tag_name = core-v{new_version} +message = Bump cua-core to v{new_version} + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" diff --git a/libs/python/core/README.md b/libs/python/core/README.md index 6c743a8b..95d3fd2b 100644 --- a/libs/python/core/README.md +++ b/libs/python/core/README.md @@ -8,10 +8,11 @@ - [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) - [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) - [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) - [![PyPI](https://img.shields.io/pypi/v/cua-core?color=333333)](https://pypi.org/project/cua-core/) +[![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) +[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) +[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +[![PyPI](https://img.shields.io/pypi/v/cua-core?color=333333)](https://pypi.org/project/cua-core/) + @@ -25,4 +26,4 @@ ```bash pip install cua-core -``` \ No newline at end of file +``` diff --git a/libs/python/core/core/telemetry/__init__.py b/libs/python/core/core/telemetry/__init__.py index b5846715..4c84ace2 100644 --- a/libs/python/core/core/telemetry/__init__.py +++ b/libs/python/core/core/telemetry/__init__.py @@ -4,12 +4,11 @@ It provides a low-overhead way to collect anonymous usage data. """ from core.telemetry.posthog import ( - record_event, - is_telemetry_enabled, destroy_telemetry_client, + is_telemetry_enabled, + record_event, ) - __all__ = [ "record_event", "is_telemetry_enabled", diff --git a/libs/python/core/core/telemetry/posthog.py b/libs/python/core/core/telemetry/posthog.py index a6c361e5..5edd7343 100644 --- a/libs/python/core/core/telemetry/posthog.py +++ b/libs/python/core/core/telemetry/posthog.py @@ -4,8 +4,8 @@ from __future__ import annotations import logging import os -import uuid import sys +import uuid from pathlib import Path from typing import Any, Dict, List, Optional @@ -20,6 +20,7 @@ logger = logging.getLogger("core.telemetry") PUBLIC_POSTHOG_API_KEY = "phc_eSkLnbLxsnYFaXksif1ksbrNzYlJShr35miFLDppF14" PUBLIC_POSTHOG_HOST = "https://eu.i.posthog.com" + class PostHogTelemetryClient: """Collects and reports telemetry data via PostHog.""" @@ -47,7 +48,8 @@ class PostHogTelemetryClient: # Legacy opt-out flag os.environ.get("CUA_TELEMETRY", "").lower() != "off" # Opt-in flag (defaults to enabled) - and os.environ.get("CUA_TELEMETRY_ENABLED", "true").lower() in { "1", "true", "yes", "on" } + and os.environ.get("CUA_TELEMETRY_ENABLED", "true").lower() + in {"1", "true", "yes", "on"} ) def _get_or_create_installation_id(self) -> str: @@ -150,14 +152,12 @@ class PostHogTelemetryClient: logger.debug( f"Setting up PostHog user properties for: {self.installation_id} with properties: {properties}" ) - + # In the Python SDK, we capture an identification event instead of calling identify() posthog.capture( - distinct_id=self.installation_id, - event="$identify", - properties={"$set": properties} + distinct_id=self.installation_id, event="$identify", properties={"$set": properties} ) - + logger.info(f"Set up PostHog user properties for installation: {self.installation_id}") except Exception as e: logger.warning(f"Failed to set up PostHog user properties: {e}") @@ -224,13 +224,16 @@ class PostHogTelemetryClient: """Destroy the global PostHogTelemetryClient instance.""" cls._singleton = None + def destroy_telemetry_client() -> None: """Destroy the global PostHogTelemetryClient instance (class-managed).""" PostHogTelemetryClient.destroy_client() + def is_telemetry_enabled() -> bool: return PostHogTelemetryClient.is_telemetry_enabled() + def record_event(event_name: str, properties: Optional[Dict[str, Any]] | None = None) -> None: """Record an arbitrary PostHog event.""" - PostHogTelemetryClient.get_client().record_event(event_name, properties or {}) \ No newline at end of file + PostHogTelemetryClient.get_client().record_event(event_name, properties or {}) diff --git a/libs/python/core/pyproject.toml b/libs/python/core/pyproject.toml index b9692162..cf1ff6c7 100644 --- a/libs/python/core/pyproject.toml +++ b/libs/python/core/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-core" -version = "0.1.8" +version = "0.1.9" description = "Core functionality for Cua including telemetry and shared utilities" readme = "README.md" authors = [ @@ -15,7 +15,7 @@ dependencies = [ "httpx>=0.24.0", "posthog>=3.20.0" ] -requires-python = ">=3.11" +requires-python = ">=3.12" [tool.pdm] distribution = true @@ -24,34 +24,11 @@ distribution = true includes = ["core/"] source-includes = ["tests/", "README.md", "LICENSE"] -[tool.black] -line-length = 100 -target-version = ["py311"] - -[tool.ruff] -line-length = 100 -target-version = "py311" -select = ["E", "F", "B", "I"] -fix = true - -[tool.ruff.format] -docstring-code-format = true - -[tool.mypy] -strict = true -python_version = "3.11" -ignore_missing_imports = true -disallow_untyped_defs = true -check_untyped_defs = true -warn_return_any = true -show_error_codes = true -warn_unused_ignores = false - [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] -python_files = "test_*.py" +python_files = "test_*.py" [dependency-groups] dev = [ "pytest>=8.3.5", -] +] \ No newline at end of file diff --git a/libs/python/mcp-server/.bumpversion.cfg b/libs/python/mcp-server/.bumpversion.cfg new file mode 100644 index 00000000..2641965b --- /dev/null +++ b/libs/python/mcp-server/.bumpversion.cfg @@ -0,0 +1,10 @@ +[bumpversion] +current_version = 0.1.15 +commit = True +tag = True +tag_name = mcp-server-v{new_version} +message = Bump cua-mcp-server to v{new_version} + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" diff --git a/libs/python/mcp-server/CONCURRENT_SESSIONS.md b/libs/python/mcp-server/CONCURRENT_SESSIONS.md new file mode 100644 index 00000000..62e63dd2 --- /dev/null +++ b/libs/python/mcp-server/CONCURRENT_SESSIONS.md @@ -0,0 +1,259 @@ +# MCP Server Concurrent Session Management + +This document describes the improvements made to the MCP Server to address concurrent session management and resource lifecycle issues. + +## Problem Statement + +The original MCP server implementation had several critical issues: + +1. **Global Computer Instance**: Used a single `global_computer` variable shared across all clients +2. **No Resource Isolation**: Multiple clients would interfere with each other +3. **Sequential Task Processing**: Multi-task operations were always sequential +4. **No Graceful Shutdown**: Server couldn't properly cleanup resources on shutdown +5. **Hidden Event Loop**: `server.run()` hid the event loop, preventing proper lifecycle management + +## Solution Architecture + +### 1. Session Manager (`session_manager.py`) + +The `SessionManager` class provides: + +- **Per-session computer instances**: Each client gets isolated computer resources +- **Computer instance pooling**: Efficient reuse of computer instances with lifecycle management +- **Task registration**: Track active tasks per session for graceful cleanup +- **Automatic cleanup**: Background task cleans up idle sessions +- **Resource limits**: Configurable maximum concurrent sessions + +#### Key Components: + +```python +class SessionManager: + def __init__(self, max_concurrent_sessions: int = 10): + self._sessions: Dict[str, SessionInfo] = {} + self._computer_pool = ComputerPool() + # ... lifecycle management +``` + +#### Session Lifecycle: + +1. **Creation**: New session created when client first connects +2. **Task Registration**: Each task is registered with the session +3. **Activity Tracking**: Last activity time updated on each operation +4. **Cleanup**: Sessions cleaned up when idle or on shutdown + +### 2. Computer Pool (`ComputerPool`) + +Manages computer instances efficiently: + +- **Pool Size Limits**: Maximum number of concurrent computer instances +- **Instance Reuse**: Available instances reused across sessions +- **Lifecycle Management**: Proper startup/shutdown of computer instances +- **Resource Cleanup**: All instances properly closed on shutdown + +### 3. Enhanced Server Tools + +All server tools now support: + +- **Session ID Parameter**: Optional `session_id` for multi-client support +- **Resource Isolation**: Each session gets its own computer instance +- **Task Tracking**: Proper registration/unregistration of tasks +- **Error Handling**: Graceful error handling with session cleanup + +#### Updated Tool Signatures: + +```python +async def screenshot_cua(ctx: Context, session_id: Optional[str] = None) -> Any: +async def run_cua_task(ctx: Context, task: str, session_id: Optional[str] = None) -> Any: +async def run_multi_cua_tasks(ctx: Context, tasks: List[str], session_id: Optional[str] = None, concurrent: bool = False) -> Any: +``` + +### 4. Concurrent Task Execution + +The `run_multi_cua_tasks` tool now supports: + +- **Sequential Mode** (default): Tasks run one after another +- **Concurrent Mode**: Tasks run in parallel using `asyncio.gather()` +- **Progress Tracking**: Proper progress reporting for both modes +- **Error Handling**: Individual task failures don't stop other tasks + +### 5. Graceful Shutdown + +The server now provides: + +- **Signal Handlers**: Proper handling of SIGINT and SIGTERM +- **Session Cleanup**: All active sessions properly cleaned up +- **Resource Release**: Computer instances returned to pool and closed +- **Async Lifecycle**: Event loop properly exposed for cleanup + +## Usage Examples + +### Basic Usage (Backward Compatible) + +```python +# These calls work exactly as before +await screenshot_cua(ctx) +await run_cua_task(ctx, "Open browser") +await run_multi_cua_tasks(ctx, ["Task 1", "Task 2"]) +``` + +### Multi-Client Usage + +```python +# Client 1 +session_id_1 = "client-1-session" +await screenshot_cua(ctx, session_id_1) +await run_cua_task(ctx, "Open browser", session_id_1) + +# Client 2 (completely isolated) +session_id_2 = "client-2-session" +await screenshot_cua(ctx, session_id_2) +await run_cua_task(ctx, "Open editor", session_id_2) +``` + +### Concurrent Task Execution + +```python +# Run tasks concurrently instead of sequentially +tasks = ["Open browser", "Open editor", "Open terminal"] +results = await run_multi_cua_tasks(ctx, tasks, concurrent=True) +``` + +### Session Management + +```python +# Get session statistics +stats = await get_session_stats(ctx) +print(f"Active sessions: {stats['total_sessions']}") + +# Cleanup specific session +await cleanup_session(ctx, "session-to-cleanup") +``` + +## Configuration + +### Environment Variables + +- `CUA_MODEL_NAME`: Model to use (default: `anthropic/claude-3-5-sonnet-20241022`) +- `CUA_MAX_IMAGES`: Maximum images to keep (default: `3`) + +### Session Manager Configuration + +```python +# In session_manager.py +class SessionManager: + def __init__(self, max_concurrent_sessions: int = 10): + # Configurable maximum concurrent sessions + +class ComputerPool: + def __init__(self, max_size: int = 5, idle_timeout: float = 300.0): + # Configurable pool size and idle timeout +``` + +## Performance Improvements + +### Before (Issues): + +- ❌ Single global computer instance +- ❌ Client interference and resource conflicts +- ❌ Sequential task processing only +- ❌ No graceful shutdown +- ❌ 30s timeout issues with long-running tasks + +### After (Benefits): + +- ✅ Per-session computer instances with proper isolation +- ✅ Computer instance pooling for efficient resource usage +- ✅ Concurrent task execution support +- ✅ Graceful shutdown with proper cleanup +- ✅ Streaming updates prevent timeout issues +- ✅ Configurable resource limits +- ✅ Automatic session cleanup + +## Testing + +Comprehensive test coverage includes: + +- Session creation and reuse +- Concurrent session isolation +- Task registration and cleanup +- Error handling with session management +- Concurrent vs sequential task execution +- Session statistics and cleanup + +Run tests with: + +```bash +pytest tests/test_mcp_server_session_management.py -v +``` + +## Migration Guide + +### For Existing Clients + +No changes required! The new implementation is fully backward compatible: + +```python +# This still works exactly as before +await run_cua_task(ctx, "My task") +``` + +### For New Multi-Client Applications + +Use session IDs for proper isolation: + +```python +# Create a unique session ID for each client +session_id = str(uuid.uuid4()) +await run_cua_task(ctx, "My task", session_id) +``` + +### For Concurrent Task Execution + +Enable concurrent mode for better performance: + +```python +tasks = ["Task 1", "Task 2", "Task 3"] +results = await run_multi_cua_tasks(ctx, tasks, concurrent=True) +``` + +## Monitoring and Debugging + +### Session Statistics + +```python +stats = await get_session_stats(ctx) +print(f"Total sessions: {stats['total_sessions']}") +print(f"Max concurrent: {stats['max_concurrent']}") +for session_id, session_info in stats['sessions'].items(): + print(f"Session {session_id}: {session_info['active_tasks']} active tasks") +``` + +### Logging + +The server provides detailed logging for: + +- Session creation and cleanup +- Task registration and completion +- Resource pool usage +- Error conditions and recovery + +### Graceful Shutdown + +The server properly handles shutdown signals: + +```bash +# Send SIGTERM for graceful shutdown +kill -TERM + +# Or use Ctrl+C (SIGINT) +``` + +## Future Enhancements + +Potential future improvements: + +1. **Session Persistence**: Save/restore session state across restarts +2. **Load Balancing**: Distribute sessions across multiple server instances +3. **Resource Monitoring**: Real-time monitoring of resource usage +4. **Auto-scaling**: Dynamic adjustment of pool size based on demand +5. **Session Timeouts**: Configurable timeouts for different session types diff --git a/libs/python/mcp-server/README.md b/libs/python/mcp-server/README.md index 08aa763a..7eddf4b7 100644 --- a/libs/python/mcp-server/README.md +++ b/libs/python/mcp-server/README.md @@ -8,20 +8,23 @@ - [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) - [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) - [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) - [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/) +[![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) +[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) +[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +[![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/) + -**cua-mcp-server** is a MCP server for the Computer-Use Agent (CUA), allowing you to run CUA through Claude Desktop or other MCP clients. +# cua-mcp-server -### Get started with Agent +cua-mcp-server is an MCP server for the Computer-Use Agent (CUA). It enables CUA to run through MCP clients such as Claude Desktop and Cursor. ## Prerequisites -Cua MCP Server requires [lume](https://github.com/trycua/cua/blob/main/libs/lume/README.md#install) to be installed. +- Install lume: https://github.com/trycua/cua/blob/main/libs/lume/README.md#install +- Python 3.10+ +- pip, venv, setuptools ## Install @@ -31,52 +34,105 @@ Download and run the installation script: curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/python/mcp-server/scripts/install_mcp_server.sh | bash ``` -You can then use the script in your MCP configuration like this: - -```json -{ - "mcpServers": { - "cua-agent": { - "command": "/bin/bash", - "args": ["~/.cua/start_mcp_server.sh"], - "env": { - "CUA_MODEL_NAME": "anthropic/claude-3-5-sonnet-20241022" - } - } - } -} -``` - -## Development - -Use this configuration to develop with the cua-mcp-server directly without installation: +Add this to your MCP client configuration: ```json { "mcpServers": { "cua-agent": { - "command": "/bin/bash", - "args": ["~/cua/libs/python/mcp-server/scripts/start_mcp_server.sh"], - "env": { - "CUA_MODEL_NAME": "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B" - } + "command": "/usr/bin/env", + "args": [ + "bash", + "-lc", + "export CUA_MODEL_NAME='anthropic/claude-3-5-sonnet-20241022'; ~/.cua/start_mcp_server.sh" + ] } } } ``` -This configuration: -- Uses the start_mcp_server.sh script which automatically sets up the Python path and runs the server module -- Works with Claude Desktop, Cursor, or any other MCP client -- Automatically uses your development code without requiring installation +## Development (run from a local checkout) -Just add this to your MCP client's configuration and it will use your local development version of the server. +Use an absolute path to the repository root in the arguments below. -## Docs +```json +{ + "mcpServers": { + "cua-agent": { + "command": "/usr/bin/env", + "args": [ + "bash", + "-lc", + "export CUA_MODEL_NAME='huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B'; /Users/your-username/Documents/GitHub/cua/libs/python/mcp-server/scripts/start_mcp_server.sh" + ] + } + } +} +``` -- [Installation](https://trycua.com/docs/libraries/mcp-server/installation) -- [Configuration](https://trycua.com/docs/libraries/mcp-server/configuration) -- [Usage](https://trycua.com/docs/libraries/mcp-server/usage) -- [Tools](https://trycua.com/docs/libraries/mcp-server/tools) -- [Client Integrations](https://trycua.com/docs/libraries/mcp-server/client-integrations) -- [LLM Integrations](https://trycua.com/docs/libraries/mcp-server/llm-integrations) \ No newline at end of file +Notes: + +- Replace `/Users/your-username/Documents/GitHub/cua` with the absolute path to your clone. +- The script sets `PYTHONPATH` for local libs and runs the server module. + +## Quick Start + +After configuring your MCP client, restart it and invoke one of these tools: + +- Take a screenshot + +```json +{ + "tool": "screenshot_cua", + "args": {} +} +``` + +- Run a task + +```json +{ + "tool": "run_cua_task", + "args": { "task": "Open Safari and search for University of Toronto" } +} +``` + +Expected results: + +- Assistant messages streamed during execution +- A final screenshot image + +## Documentation + +- Installation: https://trycua.com/docs/libraries/mcp-server/installation +- Configuration: https://trycua.com/docs/libraries/mcp-server/configuration +- Usage: https://trycua.com/docs/libraries/mcp-server/usage +- Tools: https://trycua.com/docs/libraries/mcp-server/tools +- Client Integrations: https://trycua.com/docs/libraries/mcp-server/client-integrations +- LLM Integrations: https://trycua.com/docs/libraries/mcp-server/llm-integrations + +## Troubleshooting + +Server reports disconnected in MCP client: + +- Use an absolute path in the `args` command. +- Launch via `/usr/bin/env bash -lc` so the shell initializes and expands paths. +- Run the script manually to verify: + ```bash + /usr/bin/env bash -lc '/Users/your-username/Documents/GitHub/cua/libs/python/mcp-server/scripts/start_mcp_server.sh' + ``` + +pip not found in venv: + +```bash +python3 -m ensurepip --upgrade +python3 -m pip install -U pip setuptools wheel +``` + +Pydantic schema error related to Image: + +```bash +python3 -m pip install -U "mcp>=1.2.0" "fastmcp>=0.4.7" "pydantic>=2.7,<2.12" +``` + +If issues persist, capture logs from your MCP client and the server startup script for diagnosis. diff --git a/libs/python/mcp-server/mcp_server/__init__.py b/libs/python/mcp-server/mcp_server/__init__.py index a0a7407a..f4b66e43 100644 --- a/libs/python/mcp-server/mcp_server/__init__.py +++ b/libs/python/mcp-server/mcp_server/__init__.py @@ -1,7 +1,7 @@ """MCP Server for Computer-Use Agent (CUA).""" -import sys import os +import sys # Add detailed debugging at import time with open("/tmp/mcp_server_debug.log", "w") as f: @@ -9,11 +9,11 @@ with open("/tmp/mcp_server_debug.log", "w") as f: f.write(f"Python version: {sys.version}\n") f.write(f"Working directory: {os.getcwd()}\n") f.write(f"Python path:\n{chr(10).join(sys.path)}\n") - f.write(f"Environment variables:\n") + f.write("Environment variables:\n") for key, value in os.environ.items(): f.write(f"{key}={value}\n") -from .server import server, main +from .server import main, server __version__ = "0.1.0" __all__ = ["server", "main"] diff --git a/libs/python/mcp-server/mcp_server/server.py b/libs/python/mcp-server/mcp_server/server.py index 73996d5e..7d47cfd1 100644 --- a/libs/python/mcp-server/mcp_server/server.py +++ b/libs/python/mcp-server/mcp_server/server.py @@ -1,11 +1,15 @@ import asyncio import base64 +import inspect import logging import os +import signal import sys -from tabnanny import verbose import traceback -from typing import Any, Dict, List, Optional, Union, Tuple +import uuid +from typing import Any, Dict, List, Optional, Tuple, Union + +import anyio # Configure logging to output to stderr for debug visibility logging.basicConfig( @@ -19,7 +23,10 @@ logger = logging.getLogger("mcp-server") logger.debug("MCP Server module loading...") try: - from mcp.server.fastmcp import Context, FastMCP, Image + from mcp.server.fastmcp import Context, FastMCP + + # Use the canonical Image type + from mcp.server.fastmcp.utilities.types import Image logger.debug("Successfully imported FastMCP") except ImportError as e: @@ -28,8 +35,8 @@ except ImportError as e: sys.exit(1) try: - from computer import Computer from agent import ComputerAgent + from computer import Computer logger.debug("Successfully imported Computer and Agent modules") except ImportError as e: @@ -37,8 +44,18 @@ except ImportError as e: traceback.print_exc(file=sys.stderr) sys.exit(1) -# Global computer instance for reuse -global_computer = None +try: + from .session_manager import ( + get_session_manager, + initialize_session_manager, + shutdown_session_manager, + ) + + logger.debug("Successfully imported session manager") +except ImportError as e: + logger.error(f"Failed to import session manager: {e}") + traceback.print_exc(file=sys.stderr) + sys.exit(1) def get_env_bool(key: str, default: bool = False) -> bool: @@ -46,154 +63,304 @@ def get_env_bool(key: str, default: bool = False) -> bool: return os.getenv(key, str(default)).lower() in ("true", "1", "yes") +async def _maybe_call_ctx_method(ctx: Context, method_name: str, *args, **kwargs) -> None: + """Call a context helper if it exists, awaiting the result when necessary.""" + method = getattr(ctx, method_name, None) + if not callable(method): + return + result = method(*args, **kwargs) + if inspect.isawaitable(result): + await result + + +def _normalise_message_content(content: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]: + """Normalise message content to a list of structured parts.""" + if isinstance(content, list): + return content + if content is None: + return [] + return [{"type": "output_text", "text": str(content)}] + + +def _extract_text_from_content(content: Union[str, List[Dict[str, Any]]]) -> str: + """Extract textual content for inclusion in the aggregated result string.""" + if isinstance(content, str): + return content + texts: List[str] = [] + for part in content or []: + if not isinstance(part, dict): + continue + if part.get("type") in {"output_text", "text"} and part.get("text"): + texts.append(str(part["text"])) + return "\n".join(texts) + + +def _serialise_tool_content(content: Any) -> str: + """Convert tool outputs into a string for aggregation.""" + if isinstance(content, str): + return content + if isinstance(content, list): + texts: List[str] = [] + for part in content: + if ( + isinstance(part, dict) + and part.get("type") in {"output_text", "text"} + and part.get("text") + ): + texts.append(str(part["text"])) + if texts: + return "\n".join(texts) + if content is None: + return "" + return str(content) + + def serve() -> FastMCP: """Create and configure the MCP server.""" - server = FastMCP("cua-agent") + # NOTE: Do not pass model_config here; FastMCP 2.12.x doesn't support it. + server = FastMCP(name="cua-agent") - @server.tool() - async def screenshot_cua(ctx: Context) -> Image: + @server.tool(structured_output=False) + async def screenshot_cua(ctx: Context, session_id: Optional[str] = None) -> Any: """ - Take a screenshot of the current MacOS VM screen and return the image. Use this before running a CUA task to get a snapshot of the current state. + Take a screenshot of the current MacOS VM screen and return the image. Args: - ctx: The MCP context - - Returns: - An image resource containing the screenshot + session_id: Optional session ID for multi-client support. If not provided, a new session will be created. """ - global global_computer - if global_computer is None: - global_computer = Computer(verbosity=logging.INFO) - await global_computer.run() - screenshot = await global_computer.interface.screenshot() - return Image( - format="png", - data=screenshot - ) + session_manager = get_session_manager() - @server.tool() - async def run_cua_task(ctx: Context, task: str) -> Tuple[str, Image]: + async with session_manager.get_session(session_id) as session: + screenshot = await session.computer.interface.screenshot() + # Returning Image object is fine when structured_output=False + return Image(format="png", data=screenshot) + + @server.tool(structured_output=False) + async def run_cua_task(ctx: Context, task: str, session_id: Optional[str] = None) -> Any: """ - Run a Computer-Use Agent (CUA) task in a MacOS VM and return the results. + Run a Computer-Use Agent (CUA) task in a MacOS VM and return (combined text, final screenshot). Args: - ctx: The MCP context - task: The instruction or task for the agent to perform - - Returns: - A tuple containing the agent's response and the final screenshot + task: The task description for the agent to execute + session_id: Optional session ID for multi-client support. If not provided, a new session will be created. """ - global global_computer + session_manager = get_session_manager() + task_id = str(uuid.uuid4()) try: - logger.info(f"Starting CUA task: {task}") + logger.info(f"Starting CUA task: {task} (task_id: {task_id})") - # Initialize computer if needed - if global_computer is None: - global_computer = Computer(verbosity=logging.INFO) - await global_computer.run() + async with session_manager.get_session(session_id) as session: + # Register this task with the session + await session_manager.register_task(session.session_id, task_id) - # Get model name - this now determines the loop and provider - model_name = os.getenv("CUA_MODEL_NAME", "anthropic/claude-3-5-sonnet-20241022") - - logger.info(f"Using model: {model_name}") + try: + # Get model name + model_name = os.getenv("CUA_MODEL_NAME", "anthropic/claude-3-5-sonnet-20241022") + logger.info(f"Using model: {model_name}") - # Create agent with the new v0.4.x API - agent = ComputerAgent( - model=model_name, - only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")), - verbosity=logging.INFO, - tools=[global_computer] - ) + # Create agent with the new v0.4.x API + agent = ComputerAgent( + model=model_name, + only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")), + verbosity=logging.INFO, + tools=[session.computer], + ) - # Create messages in the new v0.4.x format - messages = [{"role": "user", "content": task}] - - # Collect all results - full_result = "" - async for result in agent.run(messages): - logger.info(f"Agent processing step") - ctx.info(f"Agent processing step") + messages = [{"role": "user", "content": task}] - # Process output if available - outputs = result.get("output", []) - for output in outputs: - output_type = output.get("type") - if output_type == "message": - logger.debug(f"Message: {output}") - content = output.get("content", []) - for content_part in content: - if content_part.get("text"): - full_result += f"Message: {content_part.get('text', '')}\n" - elif output_type == "tool_use": - logger.debug(f"Tool use: {output}") - tool_name = output.get("name", "") - full_result += f"Tool: {tool_name}\n" - elif output_type == "tool_result": - logger.debug(f"Tool result: {output}") - result_content = output.get("content", "") - if isinstance(result_content, list): - for item in result_content: - if item.get("type") == "text": - full_result += f"Result: {item.get('text', '')}\n" - else: - full_result += f"Result: {result_content}\n" + # Collect all results + aggregated_messages: List[str] = [] + async for result in agent.run(messages): + logger.info("Agent processing step") + ctx.info("Agent processing step") - # Add separator between steps - full_result += "\n" + "-" * 20 + "\n" + outputs = result.get("output", []) + for output in outputs: + output_type = output.get("type") - logger.info(f"CUA task completed successfully") - ctx.info(f"CUA task completed successfully") - return ( - full_result or "Task completed with no text output.", - Image( - format="png", - data=await global_computer.interface.screenshot() - ) - ) + if output_type == "message": + logger.debug("Streaming assistant message: %s", output) + content = _normalise_message_content(output.get("content")) + aggregated_text = _extract_text_from_content(content) + if aggregated_text: + aggregated_messages.append(aggregated_text) + await _maybe_call_ctx_method( + ctx, + "yield_message", + role=output.get("role", "assistant"), + content=content, + ) + + elif output_type in {"tool_use", "computer_call", "function_call"}: + logger.debug("Streaming tool call: %s", output) + call_id = output.get("id") or output.get("call_id") + tool_name = output.get("name") or output.get("action", {}).get( + "type" + ) + tool_input = ( + output.get("input") + or output.get("arguments") + or output.get("action") + ) + if call_id: + await _maybe_call_ctx_method( + ctx, + "yield_tool_call", + name=tool_name, + call_id=call_id, + input=tool_input, + ) + + elif output_type in { + "tool_result", + "computer_call_output", + "function_call_output", + }: + logger.debug("Streaming tool output: %s", output) + call_id = output.get("call_id") or output.get("id") + content = output.get("content") or output.get("output") + aggregated_text = _serialise_tool_content(content) + if aggregated_text: + aggregated_messages.append(aggregated_text) + if call_id: + await _maybe_call_ctx_method( + ctx, + "yield_tool_output", + call_id=call_id, + output=content, + is_error=output.get("status") == "failed" + or output.get("is_error", False), + ) + + logger.info("CUA task completed successfully") + ctx.info("CUA task completed successfully") + + screenshot_image = Image( + format="png", + data=await session.computer.interface.screenshot(), + ) + + return ( + "\n".join(aggregated_messages).strip() + or "Task completed with no text output.", + screenshot_image, + ) + + finally: + # Unregister the task from the session + await session_manager.unregister_task(session.session_id, task_id) except Exception as e: error_msg = f"Error running CUA task: {str(e)}\n{traceback.format_exc()}" logger.error(error_msg) ctx.error(error_msg) - # Return tuple with error message and a screenshot if possible + + # Try to get a screenshot from the session if available try: - if global_computer is not None: - screenshot = await global_computer.interface.screenshot() - return ( - f"Error during task execution: {str(e)}", - Image(format="png", data=screenshot) - ) - except: + if session_id: + async with session_manager.get_session(session_id) as session: + screenshot = await session.computer.interface.screenshot() + return ( + f"Error during task execution: {str(e)}", + Image(format="png", data=screenshot), + ) + except Exception: pass + # If we can't get a screenshot, return a placeholder return ( f"Error during task execution: {str(e)}", - Image(format="png", data=b"") + Image(format="png", data=b""), ) - @server.tool() - async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> List: + @server.tool(structured_output=False) + async def run_multi_cua_tasks( + ctx: Context, tasks: List[str], session_id: Optional[str] = None, concurrent: bool = False + ) -> Any: """ - Run multiple CUA tasks in a MacOS VM in sequence and return the combined results. + Run multiple CUA tasks and return a list of (combined text, screenshot). Args: - ctx: The MCP context - tasks: List of tasks to run in sequence - - Returns: - Combined results from all tasks + tasks: List of task descriptions to execute + session_id: Optional session ID for multi-client support. If not provided, a new session will be created. + concurrent: If True, run tasks concurrently. If False, run sequentially (default). """ - results = [] - for i, task in enumerate(tasks): - logger.info(f"Running task {i+1}/{len(tasks)}: {task}") - ctx.info(f"Running task {i+1}/{len(tasks)}: {task}") - - ctx.report_progress(i / len(tasks)) - results.extend(await run_cua_task(ctx, task)) - ctx.report_progress((i + 1) / len(tasks)) - - return results + total_tasks = len(tasks) + if total_tasks == 0: + ctx.report_progress(1.0) + return [] + + session_manager = get_session_manager() + + if concurrent and total_tasks > 1: + # Run tasks concurrently + logger.info(f"Running {total_tasks} tasks concurrently") + ctx.info(f"Running {total_tasks} tasks concurrently") + + # Create tasks with progress tracking + async def run_task_with_progress( + task_index: int, task: str + ) -> Tuple[int, Tuple[str, Image]]: + ctx.report_progress(task_index / total_tasks) + result = await run_cua_task(ctx, task, session_id) + ctx.report_progress((task_index + 1) / total_tasks) + return task_index, result + + # Create all task coroutines + task_coroutines = [run_task_with_progress(i, task) for i, task in enumerate(tasks)] + + # Wait for all tasks to complete + results_with_indices = await asyncio.gather(*task_coroutines, return_exceptions=True) + + # Sort results by original task order and handle exceptions + results: List[Tuple[str, Image]] = [] + for result in results_with_indices: + if isinstance(result, Exception): + logger.error(f"Task failed with exception: {result}") + ctx.error(f"Task failed: {str(result)}") + results.append((f"Task failed: {str(result)}", Image(format="png", data=b""))) + else: + _, task_result = result + results.append(task_result) + + return results + else: + # Run tasks sequentially (original behavior) + logger.info(f"Running {total_tasks} tasks sequentially") + ctx.info(f"Running {total_tasks} tasks sequentially") + + results: List[Tuple[str, Image]] = [] + for i, task in enumerate(tasks): + logger.info(f"Running task {i+1}/{total_tasks}: {task}") + ctx.info(f"Running task {i+1}/{total_tasks}: {task}") + + ctx.report_progress(i / total_tasks) + task_result = await run_cua_task(ctx, task, session_id) + results.append(task_result) + ctx.report_progress((i + 1) / total_tasks) + + return results + + @server.tool(structured_output=False) + async def get_session_stats(ctx: Context) -> Dict[str, Any]: + """ + Get statistics about active sessions and resource usage. + """ + session_manager = get_session_manager() + return session_manager.get_session_stats() + + @server.tool(structured_output=False) + async def cleanup_session(ctx: Context, session_id: str) -> str: + """ + Cleanup a specific session and release its resources. + + Args: + session_id: The session ID to cleanup + """ + session_manager = get_session_manager() + await session_manager.cleanup_session(session_id) + return f"Session {session_id} cleanup initiated" return server @@ -201,11 +368,63 @@ def serve() -> FastMCP: server = serve() -def main(): - """Run the MCP server.""" +async def run_server(): + """Run the MCP server with proper lifecycle management.""" + session_manager = None try: logger.debug("Starting MCP server...") - server.run() + + # Initialize session manager + session_manager = await initialize_session_manager() + logger.info("Session manager initialized") + + # Set up signal handlers for graceful shutdown + def signal_handler(signum, frame): + logger.info(f"Received signal {signum}, initiating graceful shutdown...") + # Create a task to shutdown gracefully + asyncio.create_task(graceful_shutdown()) + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + # Start the server + logger.info("Starting FastMCP server...") + # Use run_stdio_async directly instead of server.run() to avoid nested event loops + await server.run_stdio_async() + + except Exception as e: + logger.error(f"Error starting server: {e}") + traceback.print_exc(file=sys.stderr) + raise + finally: + # Ensure cleanup happens + if session_manager: + logger.info("Shutting down session manager...") + await shutdown_session_manager() + + +async def graceful_shutdown(): + """Gracefully shutdown the server and all sessions.""" + logger.info("Initiating graceful shutdown...") + try: + await shutdown_session_manager() + logger.info("Graceful shutdown completed") + except Exception as e: + logger.error(f"Error during graceful shutdown: {e}") + finally: + # Exit the process + import os + + os._exit(0) + + +def main(): + """Run the MCP server with proper async lifecycle management.""" + try: + # Use anyio.run instead of asyncio.run to avoid nested event loop issues + anyio.run(run_server) + except KeyboardInterrupt: + logger.info("Server interrupted by user") except Exception as e: logger.error(f"Error starting server: {e}") traceback.print_exc(file=sys.stderr) diff --git a/libs/python/mcp-server/mcp_server/session_manager.py b/libs/python/mcp-server/mcp_server/session_manager.py new file mode 100644 index 00000000..dc8d480b --- /dev/null +++ b/libs/python/mcp-server/mcp_server/session_manager.py @@ -0,0 +1,322 @@ +""" +Session Manager for MCP Server - Handles concurrent client sessions with proper resource isolation. + +This module provides: +- Per-session computer instance management +- Resource pooling and lifecycle management +- Graceful session cleanup +- Concurrent task execution support +""" + +import asyncio +import logging +import time +import uuid +import weakref +from contextlib import asynccontextmanager +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Set + +logger = logging.getLogger("mcp-server.session_manager") + + +@dataclass +class SessionInfo: + """Information about an active session.""" + + session_id: str + computer: Any # Computer instance + created_at: float + last_activity: float + active_tasks: Set[str] = field(default_factory=set) + is_shutting_down: bool = False + + +class ComputerPool: + """Pool of computer instances for efficient resource management.""" + + def __init__(self, max_size: int = 5, idle_timeout: float = 300.0): + self.max_size = max_size + self.idle_timeout = idle_timeout + self._available: List[Any] = [] + self._in_use: Set[Any] = set() + self._creation_lock = asyncio.Lock() + + async def acquire(self) -> Any: + """Acquire a computer instance from the pool.""" + # Try to get an available instance + if self._available: + computer = self._available.pop() + self._in_use.add(computer) + logger.debug("Reusing computer instance from pool") + return computer + + # Check if we can create a new one + async with self._creation_lock: + if len(self._in_use) < self.max_size: + logger.debug("Creating new computer instance") + from computer import Computer + + computer = Computer(verbosity=logging.INFO) + await computer.run() + self._in_use.add(computer) + return computer + + # Wait for an instance to become available + logger.debug("Waiting for computer instance to become available") + while not self._available: + await asyncio.sleep(0.1) + + computer = self._available.pop() + self._in_use.add(computer) + return computer + + async def release(self, computer: Any) -> None: + """Release a computer instance back to the pool.""" + if computer in self._in_use: + self._in_use.remove(computer) + self._available.append(computer) + logger.debug("Released computer instance back to pool") + + async def cleanup_idle(self) -> None: + """Clean up idle computer instances.""" + current_time = time.time() + idle_instances = [] + + for computer in self._available[:]: + # Check if computer has been idle too long + # Note: We'd need to track last use time per instance for this + # For now, we'll keep instances in the pool + pass + + async def shutdown(self) -> None: + """Shutdown all computer instances in the pool.""" + logger.info("Shutting down computer pool") + + # Close all available instances + for computer in self._available: + try: + if hasattr(computer, "close"): + await computer.close() + elif hasattr(computer, "stop"): + await computer.stop() + except Exception as e: + logger.warning(f"Error closing computer instance: {e}") + + # Close all in-use instances + for computer in self._in_use: + try: + if hasattr(computer, "close"): + await computer.close() + elif hasattr(computer, "stop"): + await computer.stop() + except Exception as e: + logger.warning(f"Error closing computer instance: {e}") + + self._available.clear() + self._in_use.clear() + + +class SessionManager: + """Manages concurrent client sessions with proper resource isolation.""" + + def __init__(self, max_concurrent_sessions: int = 10): + self.max_concurrent_sessions = max_concurrent_sessions + self._sessions: Dict[str, SessionInfo] = {} + self._computer_pool = ComputerPool() + self._session_lock = asyncio.Lock() + self._cleanup_task: Optional[asyncio.Task] = None + self._shutdown_event = asyncio.Event() + + async def start(self) -> None: + """Start the session manager and cleanup task.""" + logger.info("Starting session manager") + self._cleanup_task = asyncio.create_task(self._cleanup_loop()) + + async def stop(self) -> None: + """Stop the session manager and cleanup all resources.""" + logger.info("Stopping session manager") + self._shutdown_event.set() + + if self._cleanup_task: + self._cleanup_task.cancel() + try: + await self._cleanup_task + except asyncio.CancelledError: + pass + + # Force cleanup all sessions + async with self._session_lock: + session_ids = list(self._sessions.keys()) + + for session_id in session_ids: + await self._force_cleanup_session(session_id) + + await self._computer_pool.shutdown() + + @asynccontextmanager + async def get_session(self, session_id: Optional[str] = None) -> Any: + """Get or create a session with proper resource management.""" + if session_id is None: + session_id = str(uuid.uuid4()) + + # Check if session exists and is not shutting down + async with self._session_lock: + if session_id in self._sessions: + session = self._sessions[session_id] + if session.is_shutting_down: + raise RuntimeError(f"Session {session_id} is shutting down") + session.last_activity = time.time() + computer = session.computer + else: + # Create new session + if len(self._sessions) >= self.max_concurrent_sessions: + raise RuntimeError( + f"Maximum concurrent sessions ({self.max_concurrent_sessions}) reached" + ) + + computer = await self._computer_pool.acquire() + session = SessionInfo( + session_id=session_id, + computer=computer, + created_at=time.time(), + last_activity=time.time(), + ) + self._sessions[session_id] = session + logger.info(f"Created new session: {session_id}") + + try: + yield session + finally: + # Update last activity + async with self._session_lock: + if session_id in self._sessions: + self._sessions[session_id].last_activity = time.time() + + async def register_task(self, session_id: str, task_id: str) -> None: + """Register a task for a session.""" + async with self._session_lock: + if session_id in self._sessions: + self._sessions[session_id].active_tasks.add(task_id) + logger.debug(f"Registered task {task_id} for session {session_id}") + + async def unregister_task(self, session_id: str, task_id: str) -> None: + """Unregister a task from a session.""" + async with self._session_lock: + if session_id in self._sessions: + self._sessions[session_id].active_tasks.discard(task_id) + logger.debug(f"Unregistered task {task_id} from session {session_id}") + + async def cleanup_session(self, session_id: str) -> None: + """Cleanup a specific session.""" + async with self._session_lock: + if session_id not in self._sessions: + return + + session = self._sessions[session_id] + + # Check if session has active tasks + if session.active_tasks: + logger.info(f"Session {session_id} has active tasks, marking for shutdown") + session.is_shutting_down = True + return + + # Actually cleanup the session + await self._force_cleanup_session(session_id) + + async def _force_cleanup_session(self, session_id: str) -> None: + """Force cleanup a session regardless of active tasks.""" + async with self._session_lock: + if session_id not in self._sessions: + return + + session = self._sessions[session_id] + logger.info(f"Cleaning up session: {session_id}") + + # Release computer back to pool + await self._computer_pool.release(session.computer) + + # Remove session + del self._sessions[session_id] + + async def _cleanup_loop(self) -> None: + """Background task to cleanup idle sessions.""" + while not self._shutdown_event.is_set(): + try: + await asyncio.sleep(60) # Run cleanup every minute + + current_time = time.time() + idle_timeout = 600.0 # 10 minutes + + async with self._session_lock: + idle_sessions = [] + for session_id, session in self._sessions.items(): + if not session.is_shutting_down and not session.active_tasks: + if current_time - session.last_activity > idle_timeout: + idle_sessions.append(session_id) + + # Cleanup idle sessions + for session_id in idle_sessions: + await self._force_cleanup_session(session_id) + logger.info(f"Cleaned up idle session: {session_id}") + + except asyncio.CancelledError: + break + except Exception as e: + logger.error(f"Error in cleanup loop: {e}") + + def get_session_stats(self) -> Dict[str, Any]: + """Get statistics about active sessions.""" + + async def _get_stats(): + async with self._session_lock: + return { + "total_sessions": len(self._sessions), + "max_concurrent": self.max_concurrent_sessions, + "sessions": { + session_id: { + "created_at": session.created_at, + "last_activity": session.last_activity, + "active_tasks": len(session.active_tasks), + "is_shutting_down": session.is_shutting_down, + } + for session_id, session in self._sessions.items() + }, + } + + # Run in current event loop or create new one + try: + loop = asyncio.get_running_loop() + return asyncio.run_coroutine_threadsafe(_get_stats(), loop).result() + except RuntimeError: + # No event loop running, create a new one + return asyncio.run(_get_stats()) + + +# Global session manager instance +_session_manager: Optional[SessionManager] = None + + +def get_session_manager() -> SessionManager: + """Get the global session manager instance.""" + global _session_manager + if _session_manager is None: + _session_manager = SessionManager() + return _session_manager + + +async def initialize_session_manager() -> None: + """Initialize the global session manager.""" + global _session_manager + if _session_manager is None: + _session_manager = SessionManager() + await _session_manager.start() + return _session_manager + + +async def shutdown_session_manager() -> None: + """Shutdown the global session manager.""" + global _session_manager + if _session_manager is not None: + await _session_manager.stop() + _session_manager = None diff --git a/libs/python/mcp-server/pdm.lock b/libs/python/mcp-server/pdm.lock new file mode 100644 index 00000000..447ed9b8 --- /dev/null +++ b/libs/python/mcp-server/pdm.lock @@ -0,0 +1,4465 @@ +# This file is @generated by PDM. +# It is not intended for manual editing. + +[metadata] +groups = ["default", "dev"] +strategy = ["inherit_metadata"] +lock_version = "4.5.0" +content_hash = "sha256:0bc7e3b95912e1aa0a98db359bc7a0fc8f5aede87a8332ee89850cc1f36e5322" + +[[metadata.targets]] +requires_python = ">=3.11" + +[[package]] +name = "accelerate" +version = "1.10.1" +requires_python = ">=3.9.0" +summary = "Accelerate" +groups = ["default"] +dependencies = [ + "huggingface-hub>=0.21.0", + "numpy<3.0.0,>=1.17", + "packaging>=20.0", + "psutil", + "pyyaml", + "safetensors>=0.4.3", + "torch>=2.0.0", +] +files = [ + {file = "accelerate-1.10.1-py3-none-any.whl", hash = "sha256:3621cff60b9a27ce798857ece05e2b9f56fcc71631cfb31ccf71f0359c311f11"}, + {file = "accelerate-1.10.1.tar.gz", hash = "sha256:3dea89e433420e4bfac0369cae7e36dcd6a56adfcfd38cdda145c6225eab5df8"}, +] + +[[package]] +name = "aiofiles" +version = "24.1.0" +requires_python = ">=3.8" +summary = "File support for asyncio." +groups = ["default"] +files = [ + {file = "aiofiles-24.1.0-py3-none-any.whl", hash = "sha256:b4ec55f4195e3eb5d7abd1bf7e061763e864dd4954231fb8539a0ef8bb8260e5"}, + {file = "aiofiles-24.1.0.tar.gz", hash = "sha256:22a075c9e5a3810f0c2e48f3008c94d68c65d763b9b03857924c99e57355166c"}, +] + +[[package]] +name = "aiohappyeyeballs" +version = "2.6.1" +requires_python = ">=3.9" +summary = "Happy Eyeballs for asyncio" +groups = ["default"] +files = [ + {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"}, + {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"}, +] + +[[package]] +name = "aiohttp" +version = "3.13.0" +requires_python = ">=3.9" +summary = "Async http client/server framework (asyncio)" +groups = ["default"] +dependencies = [ + "aiohappyeyeballs>=2.5.0", + "aiosignal>=1.4.0", + "async-timeout<6.0,>=4.0; python_version < \"3.11\"", + "attrs>=17.3.0", + "frozenlist>=1.1.1", + "multidict<7.0,>=4.5", + "propcache>=0.2.0", + "yarl<2.0,>=1.17.0", +] +files = [ + {file = "aiohttp-3.13.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:99eb94e97a42367fef5fc11e28cb2362809d3e70837f6e60557816c7106e2e20"}, + {file = "aiohttp-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4696665b2713021c6eba3e2b882a86013763b442577fe5d2056a42111e732eca"}, + {file = "aiohttp-3.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3e6a38366f7f0d0f6ed7a1198055150c52fda552b107dad4785c0852ad7685d1"}, + {file = "aiohttp-3.13.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aab715b1a0c37f7f11f9f1f579c6fbaa51ef569e47e3c0a4644fba46077a9409"}, + {file = "aiohttp-3.13.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7972c82bed87d7bd8e374b60a6b6e816d75ba4f7c2627c2d14eed216e62738e1"}, + {file = "aiohttp-3.13.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca8313cb852af788c78d5afdea24c40172cbfff8b35e58b407467732fde20390"}, + {file = "aiohttp-3.13.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c333a2385d2a6298265f4b3e960590f787311b87f6b5e6e21bb8375914ef504"}, + {file = "aiohttp-3.13.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cc6d5fc5edbfb8041d9607f6a417997fa4d02de78284d386bea7ab767b5ea4f3"}, + {file = "aiohttp-3.13.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7ddedba3d0043349edc79df3dc2da49c72b06d59a45a42c1c8d987e6b8d175b8"}, + {file = "aiohttp-3.13.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:23ca762140159417a6bbc959ca1927f6949711851e56f2181ddfe8d63512b5ad"}, + {file = "aiohttp-3.13.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:bfe824d6707a5dc3c5676685f624bc0c63c40d79dc0239a7fd6c034b98c25ebe"}, + {file = "aiohttp-3.13.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:3c11fa5dd2ef773a8a5a6daa40243d83b450915992eab021789498dc87acc114"}, + {file = "aiohttp-3.13.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:00fdfe370cffede3163ba9d3f190b32c0cfc8c774f6f67395683d7b0e48cdb8a"}, + {file = "aiohttp-3.13.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:6475e42ef92717a678bfbf50885a682bb360a6f9c8819fb1a388d98198fdcb80"}, + {file = "aiohttp-3.13.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:77da5305a410910218b99f2a963092f4277d8a9c1f429c1ff1b026d1826bd0b6"}, + {file = "aiohttp-3.13.0-cp311-cp311-win32.whl", hash = "sha256:2f9d9ea547618d907f2ee6670c9a951f059c5994e4b6de8dcf7d9747b420c820"}, + {file = "aiohttp-3.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:0f19f7798996d4458c669bd770504f710014926e9970f4729cf55853ae200469"}, + {file = "aiohttp-3.13.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1c272a9a18a5ecc48a7101882230046b83023bb2a662050ecb9bfcb28d9ab53a"}, + {file = "aiohttp-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:97891a23d7fd4e1afe9c2f4473e04595e4acb18e4733b910b6577b74e7e21985"}, + {file = "aiohttp-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:475bd56492ce5f4cffe32b5533c6533ee0c406d1d0e6924879f83adcf51da0ae"}, + {file = "aiohttp-3.13.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c32ada0abb4bc94c30be2b681c42f058ab104d048da6f0148280a51ce98add8c"}, + {file = "aiohttp-3.13.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4af1f8877ca46ecdd0bc0d4a6b66d4b2bddc84a79e2e8366bc0d5308e76bceb8"}, + {file = "aiohttp-3.13.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e04ab827ec4f775817736b20cdc8350f40327f9b598dec4e18c9ffdcbea88a93"}, + {file = "aiohttp-3.13.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a6d9487b9471ec36b0faedf52228cd732e89be0a2bbd649af890b5e2ce422353"}, + {file = "aiohttp-3.13.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e66c57416352f36bf98f6641ddadd47c93740a22af7150d3e9a1ef6e983f9a8"}, + {file = "aiohttp-3.13.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:469167d5372f5bb3aedff4fc53035d593884fff2617a75317740e885acd48b04"}, + {file = "aiohttp-3.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a9f3546b503975a69b547c9fd1582cad10ede1ce6f3e313a2f547c73a3d7814f"}, + {file = "aiohttp-3.13.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6b4174fcec98601f0cfdf308ee29a6ae53c55f14359e848dab4e94009112ee7d"}, + {file = "aiohttp-3.13.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a533873a7a4ec2270fb362ee5a0d3b98752e4e1dc9042b257cd54545a96bd8ed"}, + {file = "aiohttp-3.13.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:ce887c5e54411d607ee0959cac15bb31d506d86a9bcaddf0b7e9d63325a7a802"}, + {file = "aiohttp-3.13.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d871f6a30d43e32fc9252dc7b9febe1a042b3ff3908aa83868d7cf7c9579a59b"}, + {file = "aiohttp-3.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:222c828243b4789d79a706a876910f656fad4381661691220ba57b2ab4547865"}, + {file = "aiohttp-3.13.0-cp312-cp312-win32.whl", hash = "sha256:682d2e434ff2f1108314ff7f056ce44e457f12dbed0249b24e106e385cf154b9"}, + {file = "aiohttp-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:0a2be20eb23888df130214b91c262a90e2de1553d6fb7de9e9010cec994c0ff2"}, + {file = "aiohttp-3.13.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:00243e51f16f6ec0fb021659d4af92f675f3cf9f9b39efd142aa3ad641d8d1e6"}, + {file = "aiohttp-3.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:059978d2fddc462e9211362cbc8446747ecd930537fa559d3d25c256f032ff54"}, + {file = "aiohttp-3.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:564b36512a7da3b386143c611867e3f7cfb249300a1bf60889bd9985da67ab77"}, + {file = "aiohttp-3.13.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4aa995b9156ae499393d949a456a7ab0b994a8241a96db73a3b73c7a090eff6a"}, + {file = "aiohttp-3.13.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:55ca0e95a3905f62f00900255ed807c580775174252999286f283e646d675a49"}, + {file = "aiohttp-3.13.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:49ce7525853a981fc35d380aa2353536a01a9ec1b30979ea4e35966316cace7e"}, + {file = "aiohttp-3.13.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2117be9883501eaf95503bd313eb4c7a23d567edd44014ba15835a1e9ec6d852"}, + {file = "aiohttp-3.13.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d169c47e40c911f728439da853b6fd06da83761012e6e76f11cb62cddae7282b"}, + {file = "aiohttp-3.13.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:703ad3f742fc81e543638a7bebddd35acadaa0004a5e00535e795f4b6f2c25ca"}, + {file = "aiohttp-3.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5bf635c3476f4119b940cc8d94ad454cbe0c377e61b4527f0192aabeac1e9370"}, + {file = "aiohttp-3.13.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:cfe6285ef99e7ee51cef20609be2bc1dd0e8446462b71c9db8bb296ba632810a"}, + {file = "aiohttp-3.13.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:34d8af6391c5f2e69749d7f037b614b8c5c42093c251f336bdbfa4b03c57d6c4"}, + {file = "aiohttp-3.13.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:12f5d820fadc5848d4559ea838aef733cf37ed2a1103bba148ac2f5547c14c29"}, + {file = "aiohttp-3.13.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0f1338b61ea66f4757a0544ed8a02ccbf60e38d9cfb3225888888dd4475ebb96"}, + {file = "aiohttp-3.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:582770f82513419512da096e8df21ca44f86a2e56e25dc93c5ab4df0fe065bf0"}, + {file = "aiohttp-3.13.0-cp313-cp313-win32.whl", hash = "sha256:3194b8cab8dbc882f37c13ef1262e0a3d62064fa97533d3aa124771f7bf1ecee"}, + {file = "aiohttp-3.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:7897298b3eedc790257fef8a6ec582ca04e9dbe568ba4a9a890913b925b8ea21"}, + {file = "aiohttp-3.13.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c417f8c2e1137775569297c584a8a7144e5d1237789eae56af4faf1894a0b861"}, + {file = "aiohttp-3.13.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:f84b53326abf8e56ebc28a35cebf4a0f396a13a76300f500ab11fe0573bf0b52"}, + {file = "aiohttp-3.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:990a53b9d6a30b2878789e490758e568b12b4a7fb2527d0c89deb9650b0e5813"}, + {file = "aiohttp-3.13.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c811612711e01b901e18964b3e5dec0d35525150f5f3f85d0aee2935f059910a"}, + {file = "aiohttp-3.13.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:ee433e594d7948e760b5c2a78cc06ac219df33b0848793cf9513d486a9f90a52"}, + {file = "aiohttp-3.13.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:19bb08e56f57c215e9572cd65cb6f8097804412c54081d933997ddde3e5ac579"}, + {file = "aiohttp-3.13.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f27b7488144eb5dd9151cf839b195edd1569629d90ace4c5b6b18e4e75d1e63a"}, + {file = "aiohttp-3.13.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d812838c109757a11354a161c95708ae4199c4fd4d82b90959b20914c1d097f6"}, + {file = "aiohttp-3.13.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7c20db99da682f9180fa5195c90b80b159632fb611e8dbccdd99ba0be0970620"}, + {file = "aiohttp-3.13.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cf8b0870047900eb1f17f453b4b3953b8ffbf203ef56c2f346780ff930a4d430"}, + {file = "aiohttp-3.13.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:5b8a5557d5af3f4e3add52a58c4cf2b8e6e59fc56b261768866f5337872d596d"}, + {file = "aiohttp-3.13.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:052bcdd80c1c54b8a18a9ea0cd5e36f473dc8e38d51b804cea34841f677a9971"}, + {file = "aiohttp-3.13.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:76484ba17b2832776581b7ab466d094e48eba74cb65a60aea20154dae485e8bd"}, + {file = "aiohttp-3.13.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:62d8a0adcdaf62ee56bfb37737153251ac8e4b27845b3ca065862fb01d99e247"}, + {file = "aiohttp-3.13.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5004d727499ecb95f7c9147dd0bfc5b5670f71d355f0bd26d7af2d3af8e07d2f"}, + {file = "aiohttp-3.13.0-cp314-cp314-win32.whl", hash = "sha256:a1c20c26af48aea984f63f96e5d7af7567c32cb527e33b60a0ef0a6313cf8b03"}, + {file = "aiohttp-3.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:56f7d230ec66e799fbfd8350e9544f8a45a4353f1cf40c1fea74c1780f555b8f"}, + {file = "aiohttp-3.13.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:2fd35177dc483ae702f07b86c782f4f4b100a8ce4e7c5778cea016979023d9fd"}, + {file = "aiohttp-3.13.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:4df1984c8804ed336089e88ac81a9417b1fd0db7c6f867c50a9264488797e778"}, + {file = "aiohttp-3.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e68c0076052dd911a81d3acc4ef2911cc4ef65bf7cadbfbc8ae762da24da858f"}, + {file = "aiohttp-3.13.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc95c49853cd29613e4fe4ff96d73068ff89b89d61e53988442e127e8da8e7ba"}, + {file = "aiohttp-3.13.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3b3bdc89413117b40cc39baae08fd09cbdeb839d421c4e7dce6a34f6b54b3ac1"}, + {file = "aiohttp-3.13.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3e77a729df23be2116acc4e9de2767d8e92445fbca68886dd991dc912f473755"}, + {file = "aiohttp-3.13.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e88ab34826d6eeb6c67e6e92400b9ec653faf5092a35f07465f44c9f1c429f82"}, + {file = "aiohttp-3.13.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:019dbef24fe28ce2301419dd63a2b97250d9760ca63ee2976c2da2e3f182f82e"}, + {file = "aiohttp-3.13.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:2c4aeaedd20771b7b4bcdf0ae791904445df6d856c02fc51d809d12d17cffdc7"}, + {file = "aiohttp-3.13.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b3a8e6a2058a0240cfde542b641d0e78b594311bc1a710cbcb2e1841417d5cb3"}, + {file = "aiohttp-3.13.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:f8e38d55ca36c15f36d814ea414ecb2401d860de177c49f84a327a25b3ee752b"}, + {file = "aiohttp-3.13.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a921edbe971aade1bf45bcbb3494e30ba6863a5c78f28be992c42de980fd9108"}, + {file = "aiohttp-3.13.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:474cade59a447cb4019c0dce9f0434bf835fb558ea932f62c686fe07fe6db6a1"}, + {file = "aiohttp-3.13.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:99a303ad960747c33b65b1cb65d01a62ac73fa39b72f08a2e1efa832529b01ed"}, + {file = "aiohttp-3.13.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:bb34001fc1f05f6b323e02c278090c07a47645caae3aa77ed7ed8a3ce6abcce9"}, + {file = "aiohttp-3.13.0-cp314-cp314t-win32.whl", hash = "sha256:dea698b64235d053def7d2f08af9302a69fcd760d1c7bd9988fd5d3b6157e657"}, + {file = "aiohttp-3.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1f164699a060c0b3616459d13c1464a981fddf36f892f0a5027cbd45121fb14b"}, + {file = "aiohttp-3.13.0.tar.gz", hash = "sha256:378dbc57dd8cf341ce243f13fa1fa5394d68e2e02c15cd5f28eae35a70ec7f67"}, +] + +[[package]] +name = "aiosignal" +version = "1.4.0" +requires_python = ">=3.9" +summary = "aiosignal: a list of registered asynchronous callbacks" +groups = ["default"] +dependencies = [ + "frozenlist>=1.1.0", + "typing-extensions>=4.2; python_version < \"3.13\"", +] +files = [ + {file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"}, + {file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"}, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +requires_python = ">=3.8" +summary = "Reusable constraint types to use with typing.Annotated" +groups = ["default"] +dependencies = [ + "typing-extensions>=4.0.0; python_version < \"3.9\"", +] +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + +[[package]] +name = "anyio" +version = "4.11.0" +requires_python = ">=3.9" +summary = "High-level concurrency and networking framework on top of asyncio or Trio" +groups = ["default"] +dependencies = [ + "exceptiongroup>=1.0.2; python_version < \"3.11\"", + "idna>=2.8", + "sniffio>=1.1", + "typing-extensions>=4.5; python_version < \"3.13\"", +] +files = [ + {file = "anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc"}, + {file = "anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4"}, +] + +[[package]] +name = "asyncio" +version = "4.0.0" +requires_python = ">=3.4" +summary = "Deprecated backport of asyncio; use the stdlib package instead" +groups = ["default"] +files = [ + {file = "asyncio-4.0.0-py3-none-any.whl", hash = "sha256:c1eddb0659231837046809e68103969b2bef8b0400d59cfa6363f6b5ed8cc88b"}, + {file = "asyncio-4.0.0.tar.gz", hash = "sha256:570cd9e50db83bc1629152d4d0b7558d6451bb1bfd5dfc2e935d96fc2f40329b"}, +] + +[[package]] +name = "attrs" +version = "25.4.0" +requires_python = ">=3.9" +summary = "Classes Without Boilerplate" +groups = ["default"] +files = [ + {file = "attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"}, + {file = "attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11"}, +] + +[[package]] +name = "audioop-lts" +version = "0.2.2" +requires_python = ">=3.13" +summary = "LTS Port of Python audioop" +groups = ["default"] +marker = "python_version >= \"3.13\"" +files = [ + {file = "audioop_lts-0.2.2-cp313-abi3-macosx_10_13_universal2.whl", hash = "sha256:fd3d4602dc64914d462924a08c1a9816435a2155d74f325853c1f1ac3b2d9800"}, + {file = "audioop_lts-0.2.2-cp313-abi3-macosx_10_13_x86_64.whl", hash = "sha256:550c114a8df0aafe9a05442a1162dfc8fec37e9af1d625ae6060fed6e756f303"}, + {file = "audioop_lts-0.2.2-cp313-abi3-macosx_11_0_arm64.whl", hash = "sha256:9a13dc409f2564de15dd68be65b462ba0dde01b19663720c68c1140c782d1d75"}, + {file = "audioop_lts-0.2.2-cp313-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:51c916108c56aa6e426ce611946f901badac950ee2ddaf302b7ed35d9958970d"}, + {file = "audioop_lts-0.2.2-cp313-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47eba38322370347b1c47024defbd36374a211e8dd5b0dcbce7b34fdb6f8847b"}, + {file = "audioop_lts-0.2.2-cp313-abi3-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba7c3a7e5f23e215cb271516197030c32aef2e754252c4c70a50aaff7031a2c8"}, + {file = "audioop_lts-0.2.2-cp313-abi3-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:def246fe9e180626731b26e89816e79aae2276f825420a07b4a647abaa84becc"}, + {file = "audioop_lts-0.2.2-cp313-abi3-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e160bf9df356d841bb6c180eeeea1834085464626dc1b68fa4e1d59070affdc3"}, + {file = "audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4b4cd51a57b698b2d06cb9993b7ac8dfe89a3b2878e96bc7948e9f19ff51dba6"}, + {file = "audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_ppc64le.whl", hash = "sha256:4a53aa7c16a60a6857e6b0b165261436396ef7293f8b5c9c828a3a203147ed4a"}, + {file = "audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_riscv64.whl", hash = "sha256:3fc38008969796f0f689f1453722a0f463da1b8a6fbee11987830bfbb664f623"}, + {file = "audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_s390x.whl", hash = "sha256:15ab25dd3e620790f40e9ead897f91e79c0d3ce65fe193c8ed6c26cffdd24be7"}, + {file = "audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:03f061a1915538fd96272bac9551841859dbb2e3bf73ebe4a23ef043766f5449"}, + {file = "audioop_lts-0.2.2-cp313-abi3-win32.whl", hash = "sha256:3bcddaaf6cc5935a300a8387c99f7a7fbbe212a11568ec6cf6e4bc458c048636"}, + {file = "audioop_lts-0.2.2-cp313-abi3-win_amd64.whl", hash = "sha256:a2c2a947fae7d1062ef08c4e369e0ba2086049a5e598fda41122535557012e9e"}, + {file = "audioop_lts-0.2.2-cp313-abi3-win_arm64.whl", hash = "sha256:5f93a5db13927a37d2d09637ccca4b2b6b48c19cd9eda7b17a2e9f77edee6a6f"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:73f80bf4cd5d2ca7814da30a120de1f9408ee0619cc75da87d0641273d202a09"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:106753a83a25ee4d6f473f2be6b0966fc1c9af7e0017192f5531a3e7463dce58"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fbdd522624141e40948ab3e8cdae6e04c748d78710e9f0f8d4dae2750831de19"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:143fad0311e8209ece30a8dbddab3b65ab419cbe8c0dde6e8828da25999be911"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dfbbc74ec68a0fd08cfec1f4b5e8cca3d3cd7de5501b01c4b5d209995033cde9"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cfcac6aa6f42397471e4943e0feb2244549db5c5d01efcd02725b96af417f3fe"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:752d76472d9804ac60f0078c79cdae8b956f293177acd2316cd1e15149aee132"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:83c381767e2cc10e93e40281a04852facc4cd9334550e0f392f72d1c0a9c5753"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c0022283e9556e0f3643b7c3c03f05063ca72b3063291834cca43234f20c60bb"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:a2d4f1513d63c795e82948e1305f31a6d530626e5f9f2605408b300ae6095093"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:c9c8e68d8b4a56fda8c025e538e639f8c5953f5073886b596c93ec9b620055e7"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:96f19de485a2925314f5020e85911fb447ff5fbef56e8c7c6927851b95533a1c"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e541c3ef484852ef36545f66209444c48b28661e864ccadb29daddb6a4b8e5f5"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-win32.whl", hash = "sha256:d5e73fa573e273e4f2e5ff96f9043858a5e9311e94ffefd88a3186a910c70917"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9191d68659eda01e448188f60364c7763a7ca6653ed3f87ebb165822153a8547"}, + {file = "audioop_lts-0.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:c174e322bb5783c099aaf87faeb240c8d210686b04bd61dfd05a8e5a83d88969"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:f9ee9b52f5f857fbaf9d605a360884f034c92c1c23021fb90b2e39b8e64bede6"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:49ee1a41738a23e98d98b937a0638357a2477bc99e61b0f768a8f654f45d9b7a"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5b00be98ccd0fc123dcfad31d50030d25fcf31488cde9e61692029cd7394733b"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a6d2e0f9f7a69403e388894d4ca5ada5c47230716a03f2847cfc7bd1ecb589d6"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9b0b8a03ef474f56d1a842af1a2e01398b8f7654009823c6d9e0ecff4d5cfbf"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2b267b70747d82125f1a021506565bdc5609a2b24bcb4773c16d79d2bb260bbd"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0337d658f9b81f4cd0fdb1f47635070cc084871a3d4646d9de74fdf4e7c3d24a"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:167d3b62586faef8b6b2275c3218796b12621a60e43f7e9d5845d627b9c9b80e"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0d9385e96f9f6da847f4d571ce3cb15b5091140edf3db97276872647ce37efd7"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:48159d96962674eccdca9a3df280e864e8ac75e40a577cc97c5c42667ffabfc5"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8fefe5868cd082db1186f2837d64cfbfa78b548ea0d0543e9b28935ccce81ce9"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:58cf54380c3884fb49fdd37dfb7a772632b6701d28edd3e2904743c5e1773602"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:088327f00488cdeed296edd9215ca159f3a5a5034741465789cad403fcf4bec0"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-win32.whl", hash = "sha256:068aa17a38b4e0e7de771c62c60bbca2455924b67a8814f3b0dee92b5820c0b3"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:a5bf613e96f49712073de86f20dbdd4014ca18efd4d34ed18c75bd808337851b"}, + {file = "audioop_lts-0.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:b492c3b040153e68b9fdaff5913305aaaba5bb433d8a7f73d5cf6a64ed3cc1dd"}, + {file = "audioop_lts-0.2.2.tar.gz", hash = "sha256:64d0c62d88e67b98a1a5e71987b7aa7b5bcffc7dcee65b635823dbdd0a8dbbd0"}, +] + +[[package]] +name = "backoff" +version = "2.2.1" +requires_python = ">=3.7,<4.0" +summary = "Function decoration for backoff and retry" +groups = ["default"] +files = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] + +[[package]] +name = "black" +version = "25.9.0" +requires_python = ">=3.9" +summary = "The uncompromising code formatter." +groups = ["dev"] +dependencies = [ + "click>=8.0.0", + "mypy-extensions>=0.4.3", + "packaging>=22.0", + "pathspec>=0.9.0", + "platformdirs>=2", + "pytokens>=0.1.10", + "tomli>=1.1.0; python_version < \"3.11\"", + "typing-extensions>=4.0.1; python_version < \"3.11\"", +] +files = [ + {file = "black-25.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:456386fe87bad41b806d53c062e2974615825c7a52159cde7ccaeb0695fa28fa"}, + {file = "black-25.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a16b14a44c1af60a210d8da28e108e13e75a284bf21a9afa6b4571f96ab8bb9d"}, + {file = "black-25.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aaf319612536d502fdd0e88ce52d8f1352b2c0a955cc2798f79eeca9d3af0608"}, + {file = "black-25.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:c0372a93e16b3954208417bfe448e09b0de5cc721d521866cd9e0acac3c04a1f"}, + {file = "black-25.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1b9dc70c21ef8b43248f1d86aedd2aaf75ae110b958a7909ad8463c4aa0880b0"}, + {file = "black-25.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8e46eecf65a095fa62e53245ae2795c90bdecabd53b50c448d0a8bcd0d2e74c4"}, + {file = "black-25.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9101ee58ddc2442199a25cb648d46ba22cd580b00ca4b44234a324e3ec7a0f7e"}, + {file = "black-25.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:77e7060a00c5ec4b3367c55f39cf9b06e68965a4f2e61cecacd6d0d9b7ec945a"}, + {file = "black-25.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0172a012f725b792c358d57fe7b6b6e8e67375dd157f64fa7a3097b3ed3e2175"}, + {file = "black-25.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3bec74ee60f8dfef564b573a96b8930f7b6a538e846123d5ad77ba14a8d7a64f"}, + {file = "black-25.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b756fc75871cb1bcac5499552d771822fd9db5a2bb8db2a7247936ca48f39831"}, + {file = "black-25.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:846d58e3ce7879ec1ffe816bb9df6d006cd9590515ed5d17db14e17666b2b357"}, + {file = "black-25.9.0-py3-none-any.whl", hash = "sha256:474b34c1342cdc157d307b56c4c65bce916480c4a8f6551fdc6bf9b486a7c4ae"}, + {file = "black-25.9.0.tar.gz", hash = "sha256:0474bca9a0dd1b51791fcc507a4e02078a1c63f6d4e4ae5544b9848c7adfb619"}, +] + +[[package]] +name = "brotli" +version = "1.1.0" +summary = "Python bindings for the Brotli compression library" +groups = ["default"] +files = [ + {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc"}, + {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c8146669223164fc87a7e3de9f81e9423c67a79d6b3447994dfb9c95da16e2d6"}, + {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30924eb4c57903d5a7526b08ef4a584acc22ab1ffa085faceb521521d2de32dd"}, + {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ceb64bbc6eac5a140ca649003756940f8d6a7c444a68af170b3187623b43bebf"}, + {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a469274ad18dc0e4d316eefa616d1d0c2ff9da369af19fa6f3daa4f09671fd61"}, + {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524f35912131cc2cabb00edfd8d573b07f2d9f21fa824bd3fb19725a9cf06327"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5b3cc074004d968722f51e550b41a27be656ec48f8afaeeb45ebf65b561481dd"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c247dd99d39e0338a604f8c2b3bc7061d5c2e9e2ac7ba9cc1be5a69cb6cd832f"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1b2c248cd517c222d89e74669a4adfa5577e06ab68771a529060cf5a156e9757"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2a24c50840d89ded6c9a8fdc7b6ed3692ed4e86f1c4a4a938e1e92def92933e0"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f31859074d57b4639318523d6ffdca586ace54271a73ad23ad021acd807eb14b"}, + {file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"}, + {file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f4bf76817c14aa98cc6697ac02f3972cb8c3da93e9ef16b9c66573a68014f91"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0c5516f0aed654134a2fc936325cc2e642f8a0e096d075209672eb321cff408"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c3020404e0b5eefd7c9485ccf8393cfb75ec38ce75586e046573c9dc29967a0"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ed11165dd45ce798d99a136808a794a748d5dc38511303239d4e2363c0695dc"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839"}, + {file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"}, + {file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"}, + {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8bf32b98b75c13ec7cf774164172683d6e7891088f6316e54425fde1efc276d5"}, + {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bc37c4d6b87fb1017ea28c9508b36bbcb0c3d18b4260fcdf08b200c74a6aee8"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c0ef38c7a7014ffac184db9e04debe495d317cc9c6fb10071f7fefd93100a4f"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d7cc2a76b5567591d12c01f019dd7afce6ba8cba6571187e21e2fc418ae648"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93dde851926f4f2678e704fadeb39e16c35d8baebd5252c9fd94ce8ce68c4a0"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0db75f47be8b8abc8d9e31bc7aad0547ca26f24a54e6fd10231d623f183d089"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6967ced6730aed543b8673008b5a391c3b1076d834ca438bbd70635c73775368"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7eedaa5d036d9336c95915035fb57422054014ebdeb6f3b42eac809928e40d0c"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d487f5432bf35b60ed625d7e1b448e2dc855422e87469e3f450aa5552b0eb284"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832436e59afb93e1836081a20f324cb185836c617659b07b129141a8426973c7"}, + {file = "Brotli-1.1.0-cp313-cp313-win32.whl", hash = "sha256:43395e90523f9c23a3d5bdf004733246fba087f2948f87ab28015f12359ca6a0"}, + {file = "Brotli-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9011560a466d2eb3f5a6e4929cf4a09be405c64154e12df0dd72713f6500e32b"}, + {file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"}, +] + +[[package]] +name = "certifi" +version = "2025.10.5" +requires_python = ">=3.7" +summary = "Python package for providing Mozilla's CA Bundle." +groups = ["default"] +files = [ + {file = "certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de"}, + {file = "certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43"}, +] + +[[package]] +name = "cffi" +version = "2.0.0" +requires_python = ">=3.9" +summary = "Foreign Function Interface for Python calling C code." +groups = ["default"] +marker = "sys_platform == \"darwin\"" +dependencies = [ + "pycparser; implementation_name != \"PyPy\"", +] +files = [ + {file = "cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe"}, + {file = "cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26"}, + {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9"}, + {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414"}, + {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743"}, + {file = "cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5"}, + {file = "cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5"}, + {file = "cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d"}, + {file = "cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d"}, + {file = "cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba"}, + {file = "cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94"}, + {file = "cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187"}, + {file = "cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18"}, + {file = "cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5"}, + {file = "cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6"}, + {file = "cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb"}, + {file = "cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26"}, + {file = "cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c"}, + {file = "cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b"}, + {file = "cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27"}, + {file = "cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75"}, + {file = "cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91"}, + {file = "cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5"}, + {file = "cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775"}, + {file = "cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205"}, + {file = "cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1"}, + {file = "cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f"}, + {file = "cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25"}, + {file = "cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad"}, + {file = "cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9"}, + {file = "cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592"}, + {file = "cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512"}, + {file = "cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4"}, + {file = "cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e"}, + {file = "cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6"}, + {file = "cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9"}, + {file = "cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.3" +requires_python = ">=3.7" +summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +groups = ["default"] +files = [ + {file = "charset_normalizer-3.4.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b256ee2e749283ef3ddcff51a675ff43798d92d746d1a6e4631bf8c707d22d0b"}, + {file = "charset_normalizer-3.4.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:13faeacfe61784e2559e690fc53fa4c5ae97c6fcedb8eb6fb8d0a15b475d2c64"}, + {file = "charset_normalizer-3.4.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:00237675befef519d9af72169d8604a067d92755e84fe76492fef5441db05b91"}, + {file = "charset_normalizer-3.4.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:585f3b2a80fbd26b048a0be90c5aae8f06605d3c92615911c3a2b03a8a3b796f"}, + {file = "charset_normalizer-3.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e78314bdc32fa80696f72fa16dc61168fda4d6a0c014e0380f9d02f0e5d8a07"}, + {file = "charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:96b2b3d1a83ad55310de8c7b4a2d04d9277d5591f40761274856635acc5fcb30"}, + {file = "charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:939578d9d8fd4299220161fdd76e86c6a251987476f5243e8864a7844476ba14"}, + {file = "charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fd10de089bcdcd1be95a2f73dbe6254798ec1bda9f450d5828c96f93e2536b9c"}, + {file = "charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1e8ac75d72fa3775e0b7cb7e4629cec13b7514d928d15ef8ea06bca03ef01cae"}, + {file = "charset_normalizer-3.4.3-cp311-cp311-win32.whl", hash = "sha256:6cf8fd4c04756b6b60146d98cd8a77d0cdae0e1ca20329da2ac85eed779b6849"}, + {file = "charset_normalizer-3.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:31a9a6f775f9bcd865d88ee350f0ffb0e25936a7f930ca98995c05abf1faf21c"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-win32.whl", hash = "sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-win32.whl", hash = "sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c"}, + {file = "charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a"}, + {file = "charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14"}, +] + +[[package]] +name = "click" +version = "8.3.0" +requires_python = ">=3.10" +summary = "Composable command line interface toolkit" +groups = ["default", "dev"] +dependencies = [ + "colorama; platform_system == \"Windows\"", +] +files = [ + {file = "click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc"}, + {file = "click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4"}, +] + +[[package]] +name = "colorama" +version = "0.4.6" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +summary = "Cross-platform colored terminal text." +groups = ["default", "dev"] +marker = "platform_system == \"Windows\"" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "contourpy" +version = "1.3.3" +requires_python = ">=3.11" +summary = "Python library for calculating contours of 2D quadrilateral grids" +groups = ["default"] +dependencies = [ + "numpy>=1.25", +] +files = [ + {file = "contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:709a48ef9a690e1343202916450bc48b9e51c049b089c7f79a267b46cffcdaa1"}, + {file = "contourpy-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23416f38bfd74d5d28ab8429cc4d63fa67d5068bd711a85edb1c3fb0c3e2f381"}, + {file = "contourpy-1.3.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:929ddf8c4c7f348e4c0a5a3a714b5c8542ffaa8c22954862a46ca1813b667ee7"}, + {file = "contourpy-1.3.3-cp311-cp311-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9e999574eddae35f1312c2b4b717b7885d4edd6cb46700e04f7f02db454e67c1"}, + {file = "contourpy-1.3.3-cp311-cp311-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf67e0e3f482cb69779dd3061b534eb35ac9b17f163d851e2a547d56dba0a3a"}, + {file = "contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51e79c1f7470158e838808d4a996fa9bac72c498e93d8ebe5119bc1e6becb0db"}, + {file = "contourpy-1.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:598c3aaece21c503615fd59c92a3598b428b2f01bfb4b8ca9c4edeecc2438620"}, + {file = "contourpy-1.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:322ab1c99b008dad206d406bb61d014cf0174df491ae9d9d0fac6a6fda4f977f"}, + {file = "contourpy-1.3.3-cp311-cp311-win32.whl", hash = "sha256:fd907ae12cd483cd83e414b12941c632a969171bf90fc937d0c9f268a31cafff"}, + {file = "contourpy-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:3519428f6be58431c56581f1694ba8e50626f2dd550af225f82fb5f5814d2a42"}, + {file = "contourpy-1.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:15ff10bfada4bf92ec8b31c62bf7c1834c244019b4a33095a68000d7075df470"}, + {file = "contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb"}, + {file = "contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2e8faa0ed68cb29af51edd8e24798bb661eac3bd9f65420c1887b6ca89987c8"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:626d60935cf668e70a5ce6ff184fd713e9683fb458898e4249b63be9e28286ea"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d00e655fcef08aba35ec9610536bfe90267d7ab5ba944f7032549c55a146da1"}, + {file = "contourpy-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:451e71b5a7d597379ef572de31eeb909a87246974d960049a9848c3bc6c41bf7"}, + {file = "contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411"}, + {file = "contourpy-1.3.3-cp312-cp312-win32.whl", hash = "sha256:023b44101dfe49d7d53932be418477dba359649246075c996866106da069af69"}, + {file = "contourpy-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:8153b8bfc11e1e4d75bcb0bff1db232f9e10b274e0929de9d608027e0d34ff8b"}, + {file = "contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc"}, + {file = "contourpy-1.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:177fb367556747a686509d6fef71d221a4b198a3905fe824430e5ea0fda54eb5"}, + {file = "contourpy-1.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d002b6f00d73d69333dac9d0b8d5e84d9724ff9ef044fd63c5986e62b7c9e1b1"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:348ac1f5d4f1d66d3322420f01d42e43122f43616e0f194fc1c9f5d830c5b286"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:655456777ff65c2c548b7c454af9c6f33f16c8884f11083244b5819cc214f1b5"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:644a6853d15b2512d67881586bd03f462c7ab755db95f16f14d7e238f2852c67"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4debd64f124ca62069f313a9cb86656ff087786016d76927ae2cf37846b006c9"}, + {file = "contourpy-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a15459b0f4615b00bbd1e91f1b9e19b7e63aea7483d03d804186f278c0af2659"}, + {file = "contourpy-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca0fdcd73925568ca027e0b17ab07aad764be4706d0a925b89227e447d9737b7"}, + {file = "contourpy-1.3.3-cp313-cp313-win32.whl", hash = "sha256:b20c7c9a3bf701366556e1b1984ed2d0cedf999903c51311417cf5f591d8c78d"}, + {file = "contourpy-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:1cadd8b8969f060ba45ed7c1b714fe69185812ab43bd6b86a9123fe8f99c3263"}, + {file = "contourpy-1.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:fd914713266421b7536de2bfa8181aa8c699432b6763a0ea64195ebe28bff6a9"}, + {file = "contourpy-1.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:88df9880d507169449d434c293467418b9f6cbe82edd19284aa0409e7fdb933d"}, + {file = "contourpy-1.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d06bb1f751ba5d417047db62bca3c8fde202b8c11fb50742ab3ab962c81e8216"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e4e6b05a45525357e382909a4c1600444e2a45b4795163d3b22669285591c1ae"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab3074b48c4e2cf1a960e6bbeb7f04566bf36b1861d5c9d4d8ac04b82e38ba20"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c3d53c796f8647d6deb1abe867daeb66dcc8a97e8455efa729516b997b8ed99"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50ed930df7289ff2a8d7afeb9603f8289e5704755c7e5c3bbd929c90c817164b"}, + {file = "contourpy-1.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4feffb6537d64b84877da813a5c30f1422ea5739566abf0bd18065ac040e120a"}, + {file = "contourpy-1.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2b7e9480ffe2b0cd2e787e4df64270e3a0440d9db8dc823312e2c940c167df7e"}, + {file = "contourpy-1.3.3-cp313-cp313t-win32.whl", hash = "sha256:283edd842a01e3dcd435b1c5116798d661378d83d36d337b8dde1d16a5fc9ba3"}, + {file = "contourpy-1.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:87acf5963fc2b34825e5b6b048f40e3635dd547f590b04d2ab317c2619ef7ae8"}, + {file = "contourpy-1.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:3c30273eb2a55024ff31ba7d052dde990d7d8e5450f4bbb6e913558b3d6c2301"}, + {file = "contourpy-1.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fde6c716d51c04b1c25d0b90364d0be954624a0ee9d60e23e850e8d48353d07a"}, + {file = "contourpy-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cbedb772ed74ff5be440fa8eee9bd49f64f6e3fc09436d9c7d8f1c287b121d77"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22e9b1bd7a9b1d652cd77388465dc358dafcd2e217d35552424aa4f996f524f5"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a22738912262aa3e254e4f3cb079a95a67132fc5a063890e224393596902f5a4"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:afe5a512f31ee6bd7d0dda52ec9864c984ca3d66664444f2d72e0dc4eb832e36"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f64836de09927cba6f79dcd00fdd7d5329f3fccc633468507079c829ca4db4e3"}, + {file = "contourpy-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1fd43c3be4c8e5fd6e4f2baeae35ae18176cf2e5cced681cca908addf1cdd53b"}, + {file = "contourpy-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6afc576f7b33cf00996e5c1102dc2a8f7cc89e39c0b55df93a0b78c1bd992b36"}, + {file = "contourpy-1.3.3-cp314-cp314-win32.whl", hash = "sha256:66c8a43a4f7b8df8b71ee1840e4211a3c8d93b214b213f590e18a1beca458f7d"}, + {file = "contourpy-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:cf9022ef053f2694e31d630feaacb21ea24224be1c3ad0520b13d844274614fd"}, + {file = "contourpy-1.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:95b181891b4c71de4bb404c6621e7e2390745f887f2a026b2d99e92c17892339"}, + {file = "contourpy-1.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:33c82d0138c0a062380332c861387650c82e4cf1747aaa6938b9b6516762e772"}, + {file = "contourpy-1.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ea37e7b45949df430fe649e5de8351c423430046a2af20b1c1961cae3afcda77"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d304906ecc71672e9c89e87c4675dc5c2645e1f4269a5063b99b0bb29f232d13"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca658cd1a680a5c9ea96dc61cdbae1e85c8f25849843aa799dfd3cb370ad4fbe"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ab2fd90904c503739a75b7c8c5c01160130ba67944a7b77bbf36ef8054576e7f"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7301b89040075c30e5768810bc96a8e8d78085b47d8be6e4c3f5a0b4ed478a0"}, + {file = "contourpy-1.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2a2a8b627d5cc6b7c41a4beff6c5ad5eb848c88255fda4a8745f7e901b32d8e4"}, + {file = "contourpy-1.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fd6ec6be509c787f1caf6b247f0b1ca598bef13f4ddeaa126b7658215529ba0f"}, + {file = "contourpy-1.3.3-cp314-cp314t-win32.whl", hash = "sha256:e74a9a0f5e3fff48fb5a7f2fd2b9b70a3fe014a67522f79b7cca4c0c7e43c9ae"}, + {file = "contourpy-1.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:13b68d6a62db8eafaebb8039218921399baf6e47bf85006fd8529f2a08ef33fc"}, + {file = "contourpy-1.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b7448cb5a725bb1e35ce88771b86fba35ef418952474492cf7c764059933ff8b"}, + {file = "contourpy-1.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cd5dfcaeb10f7b7f9dc8941717c6c2ade08f587be2226222c12b25f0483ed497"}, + {file = "contourpy-1.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0c1fc238306b35f246d61a1d416a627348b5cf0648648a031e14bb8705fcdfe8"}, + {file = "contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70f9aad7de812d6541d29d2bbf8feb22ff7e1c299523db288004e3157ff4674e"}, + {file = "contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ed3657edf08512fc3fe81b510e35c2012fbd3081d2e26160f27ca28affec989"}, + {file = "contourpy-1.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3d1a3799d62d45c18bafd41c5fa05120b96a28079f2393af559b843d1a966a77"}, + {file = "contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880"}, +] + +[[package]] +name = "cua-agent" +version = "0.4.12" +requires_python = ">=3.11" +summary = "CUA (Computer Use) Agent for AI-driven computer interaction" +groups = ["default"] +dependencies = [ + "aiohttp>=3.9.3", + "anyio>=4.4.1", + "asyncio", + "certifi>=2024.2.2", + "cua-computer<0.5.0,>=0.4.0", + "cua-core<0.2.0,>=0.1.8", + "httpx>=0.27.0", + "litellm>=1.74.12", + "pydantic>=2.6.4", + "python-dotenv>=1.0.1", + "rich>=13.7.1", + "typing-extensions>=4.12.2", +] +files = [ + {file = "cua_agent-0.4.12-py3-none-any.whl", hash = "sha256:f5d356173a9ad0ddfd6342c9b5a1aa76c1ed9e5ce09504bd247e4740e0c7f5bf"}, + {file = "cua_agent-0.4.12.tar.gz", hash = "sha256:3543ac44e6b8a8d0e817f3d7db16d66aa7101f1e2d3c2c9d10a4a9790a9c6e70"}, +] + +[[package]] +name = "cua-agent" +version = "0.4.12" +extras = ["all"] +requires_python = ">=3.11" +summary = "CUA (Computer Use) Agent for AI-driven computer interaction" +groups = ["default"] +dependencies = [ + "accelerate", + "cua-agent==0.4.12", + "cua-som<0.2.0,>=0.1.0", + "gradio>=5.23.3", + "mlx-vlm>=0.1.27; sys_platform == \"darwin\"", + "python-dotenv>=1.0.1", + "torch", + "transformers>=4.54.0", + "ultralytics>=8.0.0", + "yaspin>=3.1.0", +] +files = [ + {file = "cua_agent-0.4.12-py3-none-any.whl", hash = "sha256:f5d356173a9ad0ddfd6342c9b5a1aa76c1ed9e5ce09504bd247e4740e0c7f5bf"}, + {file = "cua_agent-0.4.12.tar.gz", hash = "sha256:3543ac44e6b8a8d0e817f3d7db16d66aa7101f1e2d3c2c9d10a4a9790a9c6e70"}, +] + +[[package]] +name = "cua-computer" +version = "0.4.7" +requires_python = ">=3.11" +summary = "Computer-Use Interface (CUI) framework powering Cua" +groups = ["default"] +dependencies = [ + "aiohttp>=3.9.0", + "cua-core<0.2.0,>=0.1.0", + "pillow>=10.0.0", + "pydantic>=2.11.1", + "websocket-client>=1.8.0", + "websockets>=12.0", +] +files = [ + {file = "cua_computer-0.4.7-py3-none-any.whl", hash = "sha256:e3392f66a94594e73d31ff772de8440196439d6cbcaf1d103a31ba13980d2ccf"}, + {file = "cua_computer-0.4.7.tar.gz", hash = "sha256:a25ed65b93d86bfb7401bd3bd382bf11ab892fc76cfab7d5971c08615646d62d"}, +] + +[[package]] +name = "cua-core" +version = "0.1.9" +requires_python = ">=3.11" +summary = "Core functionality for Cua including telemetry and shared utilities" +groups = ["default"] +dependencies = [ + "httpx>=0.24.0", + "posthog>=3.20.0", + "pydantic>=2.0.0", +] +files = [ + {file = "cua_core-0.1.9-py3-none-any.whl", hash = "sha256:f587020380293215b96a457b0f80557252517c130d509fce27c3c25e68ccc933"}, + {file = "cua_core-0.1.9.tar.gz", hash = "sha256:d00da9d516b95f1b6c35c0bedf6d8310437f457d84406dbd3feeb3b5b89bd79c"}, +] + +[[package]] +name = "cua-som" +version = "0.1.3" +requires_python = ">=3.10" +summary = "Computer Vision and OCR library for detecting and analyzing UI elements" +groups = ["default"] +dependencies = [ + "easyocr>=1.7.1", + "huggingface-hub>=0.21.4", + "matplotlib>=3.8.3", + "numpy>=1.26.4", + "opencv-python-headless>=4.11.0.86", + "pillow>=10.2.0", + "pydantic>=2.6.3", + "setuptools>=75.8.1", + "supervision>=0.25.1", + "torch>=2.2.1", + "torchvision>=0.17.1", + "typing-extensions>=4.9.0", + "ultralytics>=8.1.28", +] +files = [ + {file = "cua_som-0.1.3-py3-none-any.whl", hash = "sha256:3ee85282dfbbd51b8ca51c51818fe5aa60bcf049f8c48dda477ba09cbb5cfa32"}, + {file = "cua_som-0.1.3.tar.gz", hash = "sha256:28b92a7c3de46249f4953d36d398db991f79e87436866d7fe554917732a93fc7"}, +] + +[[package]] +name = "cycler" +version = "0.12.1" +requires_python = ">=3.8" +summary = "Composable style cycles" +groups = ["default"] +files = [ + {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, + {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, +] + +[[package]] +name = "datasets" +version = "4.1.1" +requires_python = ">=3.9.0" +summary = "HuggingFace community-driven open-source library of datasets" +groups = ["default"] +marker = "sys_platform == \"darwin\"" +dependencies = [ + "dill<0.4.1,>=0.3.0", + "filelock", + "fsspec[http]<=2025.9.0,>=2023.1.0", + "huggingface-hub>=0.24.0", + "multiprocess<0.70.17", + "numpy>=1.17", + "packaging", + "pandas", + "pyarrow>=21.0.0", + "pyyaml>=5.1", + "requests>=2.32.2", + "tqdm>=4.66.3", + "xxhash", +] +files = [ + {file = "datasets-4.1.1-py3-none-any.whl", hash = "sha256:62e4f6899a36be9ec74a7e759a6951253cc85b3fcfa0a759b0efa8353b149dac"}, + {file = "datasets-4.1.1.tar.gz", hash = "sha256:7d8d5ba8b12861d2c44bfff9c83484ebfafff1ff553371e5901a8d3aab5450e2"}, +] + +[[package]] +name = "defusedxml" +version = "0.7.1" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +summary = "XML bomb protection for Python stdlib modules" +groups = ["default"] +files = [ + {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, + {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, +] + +[[package]] +name = "dill" +version = "0.4.0" +requires_python = ">=3.8" +summary = "serialize all of Python" +groups = ["default"] +marker = "sys_platform == \"darwin\"" +files = [ + {file = "dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049"}, + {file = "dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0"}, +] + +[[package]] +name = "distro" +version = "1.9.0" +requires_python = ">=3.6" +summary = "Distro - an OS platform information API" +groups = ["default"] +files = [ + {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, + {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, +] + +[[package]] +name = "easyocr" +version = "1.7.2" +summary = "End-to-End Multi-Lingual Optical Character Recognition (OCR) Solution" +groups = ["default"] +dependencies = [ + "Pillow", + "PyYAML", + "Shapely", + "ninja", + "numpy", + "opencv-python-headless", + "pyclipper", + "python-bidi", + "scikit-image", + "scipy", + "torch", + "torchvision>=0.5", +] +files = [ + {file = "easyocr-1.7.2-py3-none-any.whl", hash = "sha256:5be12f9b0e595d443c9c3d10b0542074b50f0ec2d98b141a109cd961fd1c177c"}, +] + +[[package]] +name = "fastapi" +version = "0.118.0" +requires_python = ">=3.8" +summary = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +groups = ["default"] +dependencies = [ + "pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4", + "starlette<0.49.0,>=0.40.0", + "typing-extensions>=4.8.0", +] +files = [ + {file = "fastapi-0.118.0-py3-none-any.whl", hash = "sha256:705137a61e2ef71019d2445b123aa8845bd97273c395b744d5a7dfe559056855"}, + {file = "fastapi-0.118.0.tar.gz", hash = "sha256:5e81654d98c4d2f53790a7d32d25a7353b30c81441be7d0958a26b5d761fa1c8"}, +] + +[[package]] +name = "fastuuid" +version = "0.13.5" +requires_python = ">=3.8" +summary = "Python bindings to Rust's UUID library." +groups = ["default"] +files = [ + {file = "fastuuid-0.13.5-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:c122558ca4b5487e2bd0863467e4ccfe636afd1274803741487d48f2e32ea0e1"}, + {file = "fastuuid-0.13.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d7abd42a03a17a681abddd19aa4d44ca2747138cf8a48373b395cf1341a10de2"}, + {file = "fastuuid-0.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2705cf7c2d6f7c03053404b75a4c44f872a73f6f9d5ea34f1dc6bba400c4a97c"}, + {file = "fastuuid-0.13.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d220a056fcbad25932c1f25304261198612f271f4d150b2a84e81adb877daf7"}, + {file = "fastuuid-0.13.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f29f93b5a0c5f5579f97f77d5319e9bfefd61d8678ec59d850201544faf33bf"}, + {file = "fastuuid-0.13.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:399d86623fb806151b1feb9fdd818ebfc1d50387199a35f7264f98dfc1540af5"}, + {file = "fastuuid-0.13.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:689e8795a1edd573b2c9a455024e4edf605a9690339bba29709857f7180894ea"}, + {file = "fastuuid-0.13.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:25e82c4a1734da168b36f7308e397afbe9c9b353799a9c69563a605f11dd4641"}, + {file = "fastuuid-0.13.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f62299e3cca69aad6a6fb37e26e45055587954d498ad98903fea24382377ea0e"}, + {file = "fastuuid-0.13.5-cp311-cp311-win32.whl", hash = "sha256:68227f2230381b89fb1ad362ca6e433de85c6c11c36312b41757cad47b8a8e32"}, + {file = "fastuuid-0.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:4a32306982bd031cb20d5d1a726b7b958a55babebd2300ce6c8e352d3496e931"}, + {file = "fastuuid-0.13.5-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:35fe8045e866bc6846f8de6fa05acb1de0c32478048484a995e96d31e21dff2a"}, + {file = "fastuuid-0.13.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:02a460333f52d731a006d18a52ef6fcb2d295a1f5b1a5938d30744191b2f77b7"}, + {file = "fastuuid-0.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:74b0e4f8c307b9f477a5d7284db4431ce53a3c1e3f4173db7a97db18564a6202"}, + {file = "fastuuid-0.13.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6955a99ef455c2986f3851f4e0ccc35dec56ac1a7720f2b92e88a75d6684512e"}, + {file = "fastuuid-0.13.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f10c77b826738c1a27dcdaa92ea4dc1ec9d869748a99e1fde54f1379553d4854"}, + {file = "fastuuid-0.13.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bb25dccbeb249d16d5e664f65f17ebec05136821d5ef462c4110e3f76b86fb86"}, + {file = "fastuuid-0.13.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a5becc646a3eeafb76ce0a6783ba190cd182e3790a8b2c78ca9db2b5e87af952"}, + {file = "fastuuid-0.13.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:69b34363752d06e9bb0dbdf02ae391ec56ac948c6f2eb00be90dad68e80774b9"}, + {file = "fastuuid-0.13.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57d0768afcad0eab8770c9b8cf904716bd3c547e8b9a4e755ee8a673b060a3a3"}, + {file = "fastuuid-0.13.5-cp312-cp312-win32.whl", hash = "sha256:8ac6c6f5129d52eaa6ef9ea4b6e2f7c69468a053f3ab8e439661186b9c06bb85"}, + {file = "fastuuid-0.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:ad630e97715beefef07ec37c9c162336e500400774e2c1cbe1a0df6f80d15b9a"}, + {file = "fastuuid-0.13.5-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:ea17dfd35e0e91920a35d91e65e5f9c9d1985db55ac4ff2f1667a0f61189cefa"}, + {file = "fastuuid-0.13.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:be6ad91e5fefbcc2a4b478858a2715e386d405834ea3ae337c3b6b95cc0e47d6"}, + {file = "fastuuid-0.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ea6df13a306aab3e0439d58c312ff1e6f4f07f09f667579679239b4a6121f64a"}, + {file = "fastuuid-0.13.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2354c1996d3cf12dc2ba3752e2c4d6edc46e1a38c63893146777b1939f3062d4"}, + {file = "fastuuid-0.13.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6cf9b7469fc26d1f9b1c43ac4b192e219e85b88fdf81d71aa755a6c08c8a817"}, + {file = "fastuuid-0.13.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92ba539170097b9047551375f1ca09d8d2b4aefcc79eeae3e1c43fe49b42072e"}, + {file = "fastuuid-0.13.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:dbb81d05617bc2970765c1ad82db7e8716f6a2b7a361a14b83de5b9240ade448"}, + {file = "fastuuid-0.13.5-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:d973bd6bf9d754d3cca874714ac0a6b22a47f239fb3d3c8687569db05aac3471"}, + {file = "fastuuid-0.13.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e725ceef79486423f05ee657634d4b4c1ca5fb2c8a94e0708f5d6356a83f2a83"}, + {file = "fastuuid-0.13.5-cp313-cp313-win32.whl", hash = "sha256:a1c430a332ead0b2674f1ef71b17f43b8139ec5a4201182766a21f131a31e021"}, + {file = "fastuuid-0.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:241fdd362fd96e6b337db62a65dd7cb3dfac20adf854573247a47510e192db6f"}, + {file = "fastuuid-0.13.5.tar.gz", hash = "sha256:d4976821ab424d41542e1ea39bc828a9d454c3f8a04067c06fca123c5b95a1a1"}, +] + +[[package]] +name = "ffmpy" +version = "0.6.1" +requires_python = ">=3.9" +summary = "A simple Python wrapper for FFmpeg" +groups = ["default"] +files = [ + {file = "ffmpy-0.6.1-py3-none-any.whl", hash = "sha256:69a37e2d7d6feb840e233d5640f3499a8b0a8657336774c86e4c52a3219222d4"}, + {file = "ffmpy-0.6.1.tar.gz", hash = "sha256:b5830fd05f72bace05b8fb28724d54a7a63c5119d7f74ca36a75df33f749142d"}, +] + +[[package]] +name = "filelock" +version = "3.19.1" +requires_python = ">=3.9" +summary = "A platform independent file lock." +groups = ["default"] +files = [ + {file = "filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d"}, + {file = "filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58"}, +] + +[[package]] +name = "fonttools" +version = "4.60.1" +requires_python = ">=3.9" +summary = "Tools to manipulate font files" +groups = ["default"] +files = [ + {file = "fonttools-4.60.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7b4c32e232a71f63a5d00259ca3d88345ce2a43295bb049d21061f338124246f"}, + {file = "fonttools-4.60.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3630e86c484263eaac71d117085d509cbcf7b18f677906824e4bace598fb70d2"}, + {file = "fonttools-4.60.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5c1015318e4fec75dd4943ad5f6a206d9727adf97410d58b7e32ab644a807914"}, + {file = "fonttools-4.60.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e6c58beb17380f7c2ea181ea11e7db8c0ceb474c9dd45f48e71e2cb577d146a1"}, + {file = "fonttools-4.60.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ec3681a0cb34c255d76dd9d865a55f260164adb9fa02628415cdc2d43ee2c05d"}, + {file = "fonttools-4.60.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f4b5c37a5f40e4d733d3bbaaef082149bee5a5ea3156a785ff64d949bd1353fa"}, + {file = "fonttools-4.60.1-cp311-cp311-win32.whl", hash = "sha256:398447f3d8c0c786cbf1209711e79080a40761eb44b27cdafffb48f52bcec258"}, + {file = "fonttools-4.60.1-cp311-cp311-win_amd64.whl", hash = "sha256:d066ea419f719ed87bc2c99a4a4bfd77c2e5949cb724588b9dd58f3fd90b92bf"}, + {file = "fonttools-4.60.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:7b0c6d57ab00dae9529f3faf187f2254ea0aa1e04215cf2f1a8ec277c96661bc"}, + {file = "fonttools-4.60.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:839565cbf14645952d933853e8ade66a463684ed6ed6c9345d0faf1f0e868877"}, + {file = "fonttools-4.60.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8177ec9676ea6e1793c8a084a90b65a9f778771998eb919d05db6d4b1c0b114c"}, + {file = "fonttools-4.60.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:996a4d1834524adbb423385d5a629b868ef9d774670856c63c9a0408a3063401"}, + {file = "fonttools-4.60.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a46b2f450bc79e06ef3b6394f0c68660529ed51692606ad7f953fc2e448bc903"}, + {file = "fonttools-4.60.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6ec722ee589e89a89f5b7574f5c45604030aa6ae24cb2c751e2707193b466fed"}, + {file = "fonttools-4.60.1-cp312-cp312-win32.whl", hash = "sha256:b2cf105cee600d2de04ca3cfa1f74f1127f8455b71dbad02b9da6ec266e116d6"}, + {file = "fonttools-4.60.1-cp312-cp312-win_amd64.whl", hash = "sha256:992775c9fbe2cf794786fa0ffca7f09f564ba3499b8fe9f2f80bd7197db60383"}, + {file = "fonttools-4.60.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6f68576bb4bbf6060c7ab047b1574a1ebe5c50a17de62830079967b211059ebb"}, + {file = "fonttools-4.60.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:eedacb5c5d22b7097482fa834bda0dafa3d914a4e829ec83cdea2a01f8c813c4"}, + {file = "fonttools-4.60.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b33a7884fabd72bdf5f910d0cf46be50dce86a0362a65cfc746a4168c67eb96c"}, + {file = "fonttools-4.60.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2409d5fb7b55fd70f715e6d34e7a6e4f7511b8ad29a49d6df225ee76da76dd77"}, + {file = "fonttools-4.60.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c8651e0d4b3bdeda6602b85fdc2abbefc1b41e573ecb37b6779c4ca50753a199"}, + {file = "fonttools-4.60.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:145daa14bf24824b677b9357c5e44fd8895c2a8f53596e1b9ea3496081dc692c"}, + {file = "fonttools-4.60.1-cp313-cp313-win32.whl", hash = "sha256:2299df884c11162617a66b7c316957d74a18e3758c0274762d2cc87df7bc0272"}, + {file = "fonttools-4.60.1-cp313-cp313-win_amd64.whl", hash = "sha256:a3db56f153bd4c5c2b619ab02c5db5192e222150ce5a1bc10f16164714bc39ac"}, + {file = "fonttools-4.60.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:a884aef09d45ba1206712c7dbda5829562d3fea7726935d3289d343232ecb0d3"}, + {file = "fonttools-4.60.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8a44788d9d91df72d1a5eac49b31aeb887a5f4aab761b4cffc4196c74907ea85"}, + {file = "fonttools-4.60.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e852d9dda9f93ad3651ae1e3bb770eac544ec93c3807888798eccddf84596537"}, + {file = "fonttools-4.60.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:154cb6ee417e417bf5f7c42fe25858c9140c26f647c7347c06f0cc2d47eff003"}, + {file = "fonttools-4.60.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5664fd1a9ea7f244487ac8f10340c4e37664675e8667d6fee420766e0fb3cf08"}, + {file = "fonttools-4.60.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:583b7f8e3c49486e4d489ad1deacfb8d5be54a8ef34d6df824f6a171f8511d99"}, + {file = "fonttools-4.60.1-cp314-cp314-win32.whl", hash = "sha256:66929e2ea2810c6533a5184f938502cfdaea4bc3efb7130d8cc02e1c1b4108d6"}, + {file = "fonttools-4.60.1-cp314-cp314-win_amd64.whl", hash = "sha256:f3d5be054c461d6a2268831f04091dc82753176f6ea06dc6047a5e168265a987"}, + {file = "fonttools-4.60.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:b6379e7546ba4ae4b18f8ae2b9bc5960936007a1c0e30b342f662577e8bc3299"}, + {file = "fonttools-4.60.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9d0ced62b59e0430b3690dbc5373df1c2aa7585e9a8ce38eff87f0fd993c5b01"}, + {file = "fonttools-4.60.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:875cb7764708b3132637f6c5fb385b16eeba0f7ac9fa45a69d35e09b47045801"}, + {file = "fonttools-4.60.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a184b2ea57b13680ab6d5fbde99ccef152c95c06746cb7718c583abd8f945ccc"}, + {file = "fonttools-4.60.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:026290e4ec76583881763fac284aca67365e0be9f13a7fb137257096114cb3bc"}, + {file = "fonttools-4.60.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f0e8817c7d1a0c2eedebf57ef9a9896f3ea23324769a9a2061a80fe8852705ed"}, + {file = "fonttools-4.60.1-cp314-cp314t-win32.whl", hash = "sha256:1410155d0e764a4615774e5c2c6fc516259fe3eca5882f034eb9bfdbee056259"}, + {file = "fonttools-4.60.1-cp314-cp314t-win_amd64.whl", hash = "sha256:022beaea4b73a70295b688f817ddc24ed3e3418b5036ffcd5658141184ef0d0c"}, + {file = "fonttools-4.60.1-py3-none-any.whl", hash = "sha256:906306ac7afe2156fcf0042173d6ebbb05416af70f6b370967b47f8f00103bbb"}, + {file = "fonttools-4.60.1.tar.gz", hash = "sha256:ef00af0439ebfee806b25f24c8f92109157ff3fac5731dc7867957812e87b8d9"}, +] + +[[package]] +name = "frozenlist" +version = "1.8.0" +requires_python = ">=3.9" +summary = "A list-like structure which implements collections.abc.MutableSequence" +groups = ["default"] +files = [ + {file = "frozenlist-1.8.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:09474e9831bc2b2199fad6da3c14c7b0fbdd377cce9d3d77131be28906cb7d84"}, + {file = "frozenlist-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:17c883ab0ab67200b5f964d2b9ed6b00971917d5d8a92df149dc2c9779208ee9"}, + {file = "frozenlist-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa47e444b8ba08fffd1c18e8cdb9a75db1b6a27f17507522834ad13ed5922b93"}, + {file = "frozenlist-1.8.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2552f44204b744fba866e573be4c1f9048d6a324dfe14475103fd51613eb1d1f"}, + {file = "frozenlist-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:957e7c38f250991e48a9a73e6423db1bb9dd14e722a10f6b8bb8e16a0f55f695"}, + {file = "frozenlist-1.8.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:8585e3bb2cdea02fc88ffa245069c36555557ad3609e83be0ec71f54fd4abb52"}, + {file = "frozenlist-1.8.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:edee74874ce20a373d62dc28b0b18b93f645633c2943fd90ee9d898550770581"}, + {file = "frozenlist-1.8.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c9a63152fe95756b85f31186bddf42e4c02c6321207fd6601a1c89ebac4fe567"}, + {file = "frozenlist-1.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b6db2185db9be0a04fecf2f241c70b63b1a242e2805be291855078f2b404dd6b"}, + {file = "frozenlist-1.8.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:f4be2e3d8bc8aabd566f8d5b8ba7ecc09249d74ba3c9ed52e54dc23a293f0b92"}, + {file = "frozenlist-1.8.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c8d1634419f39ea6f5c427ea2f90ca85126b54b50837f31497f3bf38266e853d"}, + {file = "frozenlist-1.8.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:1a7fa382a4a223773ed64242dbe1c9c326ec09457e6b8428efb4118c685c3dfd"}, + {file = "frozenlist-1.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:11847b53d722050808926e785df837353bd4d75f1d494377e59b23594d834967"}, + {file = "frozenlist-1.8.0-cp311-cp311-win32.whl", hash = "sha256:27c6e8077956cf73eadd514be8fb04d77fc946a7fe9f7fe167648b0b9085cc25"}, + {file = "frozenlist-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac913f8403b36a2c8610bbfd25b8013488533e71e62b4b4adce9c86c8cea905b"}, + {file = "frozenlist-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:d4d3214a0f8394edfa3e303136d0575eece0745ff2b47bd2cb2e66dd92d4351a"}, + {file = "frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1"}, + {file = "frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b"}, + {file = "frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4"}, + {file = "frozenlist-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383"}, + {file = "frozenlist-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4"}, + {file = "frozenlist-1.8.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8"}, + {file = "frozenlist-1.8.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b"}, + {file = "frozenlist-1.8.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52"}, + {file = "frozenlist-1.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29"}, + {file = "frozenlist-1.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3"}, + {file = "frozenlist-1.8.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143"}, + {file = "frozenlist-1.8.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608"}, + {file = "frozenlist-1.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa"}, + {file = "frozenlist-1.8.0-cp312-cp312-win32.whl", hash = "sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf"}, + {file = "frozenlist-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746"}, + {file = "frozenlist-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd"}, + {file = "frozenlist-1.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a"}, + {file = "frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96153e77a591c8adc2ee805756c61f59fef4cf4073a9275ee86fe8cba41241f7"}, + {file = "frozenlist-1.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40"}, + {file = "frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027"}, + {file = "frozenlist-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822"}, + {file = "frozenlist-1.8.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121"}, + {file = "frozenlist-1.8.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5"}, + {file = "frozenlist-1.8.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e"}, + {file = "frozenlist-1.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11"}, + {file = "frozenlist-1.8.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1"}, + {file = "frozenlist-1.8.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1"}, + {file = "frozenlist-1.8.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8"}, + {file = "frozenlist-1.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed"}, + {file = "frozenlist-1.8.0-cp313-cp313-win32.whl", hash = "sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496"}, + {file = "frozenlist-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231"}, + {file = "frozenlist-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62"}, + {file = "frozenlist-1.8.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94"}, + {file = "frozenlist-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c"}, + {file = "frozenlist-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52"}, + {file = "frozenlist-1.8.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51"}, + {file = "frozenlist-1.8.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65"}, + {file = "frozenlist-1.8.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82"}, + {file = "frozenlist-1.8.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714"}, + {file = "frozenlist-1.8.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d"}, + {file = "frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506"}, + {file = "frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51"}, + {file = "frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e"}, + {file = "frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0"}, + {file = "frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41"}, + {file = "frozenlist-1.8.0-cp313-cp313t-win32.whl", hash = "sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b"}, + {file = "frozenlist-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888"}, + {file = "frozenlist-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042"}, + {file = "frozenlist-1.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cee686f1f4cadeb2136007ddedd0aaf928ab95216e7691c63e50a8ec066336d0"}, + {file = "frozenlist-1.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:119fb2a1bd47307e899c2fac7f28e85b9a543864df47aa7ec9d3c1b4545f096f"}, + {file = "frozenlist-1.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4970ece02dbc8c3a92fcc5228e36a3e933a01a999f7094ff7c23fbd2beeaa67c"}, + {file = "frozenlist-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cba69cb73723c3f329622e34bdbf5ce1f80c21c290ff04256cff1cd3c2036ed2"}, + {file = "frozenlist-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:778a11b15673f6f1df23d9586f83c4846c471a8af693a22e066508b77d201ec8"}, + {file = "frozenlist-1.8.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0325024fe97f94c41c08872db482cf8ac4800d80e79222c6b0b7b162d5b13686"}, + {file = "frozenlist-1.8.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:97260ff46b207a82a7567b581ab4190bd4dfa09f4db8a8b49d1a958f6aa4940e"}, + {file = "frozenlist-1.8.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54b2077180eb7f83dd52c40b2750d0a9f175e06a42e3213ce047219de902717a"}, + {file = "frozenlist-1.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f05983daecab868a31e1da44462873306d3cbfd76d1f0b5b69c473d21dbb128"}, + {file = "frozenlist-1.8.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:33f48f51a446114bc5d251fb2954ab0164d5be02ad3382abcbfe07e2531d650f"}, + {file = "frozenlist-1.8.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:154e55ec0655291b5dd1b8731c637ecdb50975a2ae70c606d100750a540082f7"}, + {file = "frozenlist-1.8.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4314debad13beb564b708b4a496020e5306c7333fa9a3ab90374169a20ffab30"}, + {file = "frozenlist-1.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:073f8bf8becba60aa931eb3bc420b217bb7d5b8f4750e6f8b3be7f3da85d38b7"}, + {file = "frozenlist-1.8.0-cp314-cp314-win32.whl", hash = "sha256:bac9c42ba2ac65ddc115d930c78d24ab8d4f465fd3fc473cdedfccadb9429806"}, + {file = "frozenlist-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:3e0761f4d1a44f1d1a47996511752cf3dcec5bbdd9cc2b4fe595caf97754b7a0"}, + {file = "frozenlist-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:d1eaff1d00c7751b7c6662e9c5ba6eb2c17a2306ba5e2a37f24ddf3cc953402b"}, + {file = "frozenlist-1.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d3bb933317c52d7ea5004a1c442eef86f426886fba134ef8cf4226ea6ee1821d"}, + {file = "frozenlist-1.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8009897cdef112072f93a0efdce29cd819e717fd2f649ee3016efd3cd885a7ed"}, + {file = "frozenlist-1.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c5dcbbc55383e5883246d11fd179782a9d07a986c40f49abe89ddf865913930"}, + {file = "frozenlist-1.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:39ecbc32f1390387d2aa4f5a995e465e9e2f79ba3adcac92d68e3e0afae6657c"}, + {file = "frozenlist-1.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92db2bf818d5cc8d9c1f1fc56b897662e24ea5adb36ad1f1d82875bd64e03c24"}, + {file = "frozenlist-1.8.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2dc43a022e555de94c3b68a4ef0b11c4f747d12c024a520c7101709a2144fb37"}, + {file = "frozenlist-1.8.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb89a7f2de3602cfed448095bab3f178399646ab7c61454315089787df07733a"}, + {file = "frozenlist-1.8.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:33139dc858c580ea50e7e60a1b0ea003efa1fd42e6ec7fdbad78fff65fad2fd2"}, + {file = "frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:168c0969a329b416119507ba30b9ea13688fafffac1b7822802537569a1cb0ef"}, + {file = "frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:28bd570e8e189d7f7b001966435f9dac6718324b5be2990ac496cf1ea9ddb7fe"}, + {file = "frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b2a095d45c5d46e5e79ba1e5b9cb787f541a8dee0433836cea4b96a2c439dcd8"}, + {file = "frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:eab8145831a0d56ec9c4139b6c3e594c7a83c2c8be25d5bcf2d86136a532287a"}, + {file = "frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:974b28cf63cc99dfb2188d8d222bc6843656188164848c4f679e63dae4b0708e"}, + {file = "frozenlist-1.8.0-cp314-cp314t-win32.whl", hash = "sha256:342c97bf697ac5480c0a7ec73cd700ecfa5a8a40ac923bd035484616efecc2df"}, + {file = "frozenlist-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:06be8f67f39c8b1dc671f5d83aaefd3358ae5cdcf8314552c57e7ed3e6475bdd"}, + {file = "frozenlist-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:102e6314ca4da683dca92e3b1355490fed5f313b768500084fbe6371fddfdb79"}, + {file = "frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d"}, + {file = "frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad"}, +] + +[[package]] +name = "fsspec" +version = "2025.9.0" +requires_python = ">=3.9" +summary = "File-system specification" +groups = ["default"] +files = [ + {file = "fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7"}, + {file = "fsspec-2025.9.0.tar.gz", hash = "sha256:19fd429483d25d28b65ec68f9f4adc16c17ea2c7c7bf54ec61360d478fb19c19"}, +] + +[[package]] +name = "fsspec" +version = "2025.9.0" +extras = ["http"] +requires_python = ">=3.9" +summary = "File-system specification" +groups = ["default"] +marker = "sys_platform == \"darwin\"" +dependencies = [ + "aiohttp!=4.0.0a0,!=4.0.0a1", + "fsspec==2025.9.0", +] +files = [ + {file = "fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7"}, + {file = "fsspec-2025.9.0.tar.gz", hash = "sha256:19fd429483d25d28b65ec68f9f4adc16c17ea2c7c7bf54ec61360d478fb19c19"}, +] + +[[package]] +name = "gradio" +version = "5.49.0" +requires_python = ">=3.10" +summary = "Python library for easily interacting with trained machine learning models" +groups = ["default"] +dependencies = [ + "aiofiles<25.0,>=22.0", + "anyio<5.0,>=3.0", + "audioop-lts<1.0; python_version >= \"3.13\"", + "brotli>=1.1.0", + "fastapi<1.0,>=0.115.2", + "ffmpy", + "gradio-client==1.13.3", + "groovy~=0.1", + "httpx<1.0,>=0.24.1", + "huggingface-hub<2.0,>=0.33.5", + "jinja2<4.0", + "markupsafe<4.0,>=2.0", + "numpy<3.0,>=1.0", + "orjson~=3.0", + "packaging", + "pandas<3.0,>=1.0", + "pillow<12.0,>=8.0", + "pydantic<2.12,>=2.0", + "pydub", + "python-multipart>=0.0.18", + "pyyaml<7.0,>=5.0", + "ruff>=0.9.3", + "safehttpx<0.2.0,>=0.1.6", + "semantic-version~=2.0", + "starlette<1.0,>=0.40.0", + "tomlkit<0.14.0,>=0.12.0", + "typer<1.0,>=0.12", + "typing-extensions~=4.0", + "uvicorn>=0.14.0", +] +files = [ + {file = "gradio-5.49.0-py3-none-any.whl", hash = "sha256:50cba9411c02e89768ad24ee3cd5898783cb60e8abf426bda9e04bef53e0d966"}, + {file = "gradio-5.49.0.tar.gz", hash = "sha256:19702c824350640201d9f4b150b3efd219363ebe6390bd3c94c9894880d20d15"}, +] + +[[package]] +name = "gradio-client" +version = "1.13.3" +requires_python = ">=3.10" +summary = "Python library for easily interacting with trained machine learning models" +groups = ["default"] +dependencies = [ + "fsspec", + "httpx>=0.24.1", + "huggingface-hub<2.0,>=0.19.3", + "packaging", + "typing-extensions~=4.0", + "websockets<16.0,>=13.0", +] +files = [ + {file = "gradio_client-1.13.3-py3-none-any.whl", hash = "sha256:3f63e4d33a2899c1a12b10fe3cf77b82a6919ff1a1fb6391f6aa225811aa390c"}, + {file = "gradio_client-1.13.3.tar.gz", hash = "sha256:869b3e67e0f7a0f40df8c48c94de99183265cf4b7b1d9bd4623e336d219ffbe7"}, +] + +[[package]] +name = "groovy" +version = "0.1.2" +requires_python = ">3.9" +summary = "A small Python library created to help developers protect their applications from Server Side Request Forgery (SSRF) attacks." +groups = ["default"] +files = [ + {file = "groovy-0.1.2-py3-none-any.whl", hash = "sha256:7f7975bab18c729a257a8b1ae9dcd70b7cafb1720481beae47719af57c35fa64"}, + {file = "groovy-0.1.2.tar.gz", hash = "sha256:25c1dc09b3f9d7e292458aa762c6beb96ea037071bf5e917fc81fb78d2231083"}, +] + +[[package]] +name = "h11" +version = "0.16.0" +requires_python = ">=3.8" +summary = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +groups = ["default"] +files = [ + {file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"}, + {file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"}, +] + +[[package]] +name = "hf-xet" +version = "1.1.10" +requires_python = ">=3.8" +summary = "Fast transfer of large files with the Hugging Face Hub." +groups = ["default"] +marker = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"" +files = [ + {file = "hf_xet-1.1.10-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:686083aca1a6669bc85c21c0563551cbcdaa5cf7876a91f3d074a030b577231d"}, + {file = "hf_xet-1.1.10-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71081925383b66b24eedff3013f8e6bbd41215c3338be4b94ba75fd75b21513b"}, + {file = "hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b6bceb6361c80c1cc42b5a7b4e3efd90e64630bcf11224dcac50ef30a47e435"}, + {file = "hf_xet-1.1.10-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eae7c1fc8a664e54753ffc235e11427ca61f4b0477d757cc4eb9ae374b69f09c"}, + {file = "hf_xet-1.1.10-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0a0005fd08f002180f7a12d4e13b22be277725bc23ed0529f8add5c7a6309c06"}, + {file = "hf_xet-1.1.10-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f900481cf6e362a6c549c61ff77468bd59d6dd082f3170a36acfef2eb6a6793f"}, + {file = "hf_xet-1.1.10-cp37-abi3-win_amd64.whl", hash = "sha256:5f54b19cc347c13235ae7ee98b330c26dd65ef1df47e5316ffb1e87713ca7045"}, + {file = "hf_xet-1.1.10.tar.gz", hash = "sha256:408aef343800a2102374a883f283ff29068055c111f003ff840733d3b715bb97"}, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +requires_python = ">=3.8" +summary = "A minimal low-level HTTP client." +groups = ["default"] +dependencies = [ + "certifi", + "h11>=0.16", +] +files = [ + {file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"}, + {file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"}, +] + +[[package]] +name = "httpx" +version = "0.28.1" +requires_python = ">=3.8" +summary = "The next generation HTTP client." +groups = ["default"] +dependencies = [ + "anyio", + "certifi", + "httpcore==1.*", + "idna", +] +files = [ + {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, + {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, +] + +[[package]] +name = "httpx-sse" +version = "0.4.1" +requires_python = ">=3.9" +summary = "Consume Server-Sent Event (SSE) messages with HTTPX." +groups = ["default"] +files = [ + {file = "httpx_sse-0.4.1-py3-none-any.whl", hash = "sha256:cba42174344c3a5b06f255ce65b350880f962d99ead85e776f23c6618a377a37"}, + {file = "httpx_sse-0.4.1.tar.gz", hash = "sha256:8f44d34414bc7b21bf3602713005c5df4917884f76072479b21f68befa4ea26e"}, +] + +[[package]] +name = "huggingface-hub" +version = "0.35.3" +requires_python = ">=3.8.0" +summary = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +groups = ["default"] +dependencies = [ + "filelock", + "fsspec>=2023.5.0", + "hf-xet<2.0.0,>=1.1.3; platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"", + "packaging>=20.9", + "pyyaml>=5.1", + "requests", + "tqdm>=4.42.1", + "typing-extensions>=3.7.4.3", +] +files = [ + {file = "huggingface_hub-0.35.3-py3-none-any.whl", hash = "sha256:0e3a01829c19d86d03793e4577816fe3bdfc1602ac62c7fb220d593d351224ba"}, + {file = "huggingface_hub-0.35.3.tar.gz", hash = "sha256:350932eaa5cc6a4747efae85126ee220e4ef1b54e29d31c3b45c5612ddf0b32a"}, +] + +[[package]] +name = "idna" +version = "3.10" +requires_python = ">=3.6" +summary = "Internationalized Domain Names in Applications (IDNA)" +groups = ["default"] +files = [ + {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, + {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, +] + +[[package]] +name = "imageio" +version = "2.37.0" +requires_python = ">=3.9" +summary = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats." +groups = ["default"] +dependencies = [ + "numpy", + "pillow>=8.3.2", +] +files = [ + {file = "imageio-2.37.0-py3-none-any.whl", hash = "sha256:11efa15b87bc7871b61590326b2d635439acc321cf7f8ce996f812543ce10eed"}, + {file = "imageio-2.37.0.tar.gz", hash = "sha256:71b57b3669666272c818497aebba2b4c5f20d5b37c81720e5e1a56d59c492996"}, +] + +[[package]] +name = "importlib-metadata" +version = "8.7.0" +requires_python = ">=3.9" +summary = "Read metadata from Python packages" +groups = ["default"] +dependencies = [ + "typing-extensions>=3.6.4; python_version < \"3.8\"", + "zipp>=3.20", +] +files = [ + {file = "importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd"}, + {file = "importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000"}, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +requires_python = ">=3.7" +summary = "A very fast and expressive template engine." +groups = ["default"] +dependencies = [ + "MarkupSafe>=2.0", +] +files = [ + {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, + {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, +] + +[[package]] +name = "jiter" +version = "0.11.0" +requires_python = ">=3.9" +summary = "Fast iterable JSON parser." +groups = ["default"] +files = [ + {file = "jiter-0.11.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:cb5d9db02979c3f49071fce51a48f4b4e4cf574175fb2b11c7a535fa4867b222"}, + {file = "jiter-0.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1dc6a123f3471c4730db7ca8ba75f1bb3dcb6faeb8d46dd781083e7dee88b32d"}, + {file = "jiter-0.11.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09858f8d230f031c7b8e557429102bf050eea29c77ad9c34c8fe253c5329acb7"}, + {file = "jiter-0.11.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dbe2196c4a0ce760925a74ab4456bf644748ab0979762139626ad138f6dac72d"}, + {file = "jiter-0.11.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5beb56d22b63647bafd0b74979216fdee80c580c0c63410be8c11053860ffd09"}, + {file = "jiter-0.11.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97025d09ef549795d8dc720a824312cee3253c890ac73c621721ddfc75066789"}, + {file = "jiter-0.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d50880a6da65d8c23a2cf53c412847d9757e74cc9a3b95c5704a1d1a24667347"}, + {file = "jiter-0.11.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:452d80a1c86c095a242007bd9fc5d21b8a8442307193378f891cb8727e469648"}, + {file = "jiter-0.11.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e84e58198d4894668eec2da660ffff60e0f3e60afa790ecc50cb12b0e02ca1d4"}, + {file = "jiter-0.11.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:df64edcfc5dd5279a791eea52aa113d432c933119a025b0b5739f90d2e4e75f1"}, + {file = "jiter-0.11.0-cp311-cp311-win32.whl", hash = "sha256:144fc21337d21b1d048f7f44bf70881e1586401d405ed3a98c95a114a9994982"}, + {file = "jiter-0.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:b0f32e644d241293b892b1a6dd8f0b9cc029bfd94c97376b2681c36548aabab7"}, + {file = "jiter-0.11.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:2fb7b377688cc3850bbe5c192a6bd493562a0bc50cbc8b047316428fbae00ada"}, + {file = "jiter-0.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a1b7cbe3f25bd0d8abb468ba4302a5d45617ee61b2a7a638f63fee1dc086be99"}, + {file = "jiter-0.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0a7f0ec81d5b7588c5cade1eb1925b91436ae6726dc2df2348524aeabad5de6"}, + {file = "jiter-0.11.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07630bb46ea2a6b9c6ed986c6e17e35b26148cce2c535454b26ee3f0e8dcaba1"}, + {file = "jiter-0.11.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7764f27d28cd4a9cbc61704dfcd80c903ce3aad106a37902d3270cd6673d17f4"}, + {file = "jiter-0.11.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1d4a6c4a737d486f77f842aeb22807edecb4a9417e6700c7b981e16d34ba7c72"}, + {file = "jiter-0.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf408d2a0abd919b60de8c2e7bc5eeab72d4dafd18784152acc7c9adc3291591"}, + {file = "jiter-0.11.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cdef53eda7d18e799625023e1e250dbc18fbc275153039b873ec74d7e8883e09"}, + {file = "jiter-0.11.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:53933a38ef7b551dd9c7f1064f9d7bb235bb3168d0fa5f14f0798d1b7ea0d9c5"}, + {file = "jiter-0.11.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:11840d2324c9ab5162fc1abba23bc922124fedcff0d7b7f85fffa291e2f69206"}, + {file = "jiter-0.11.0-cp312-cp312-win32.whl", hash = "sha256:4f01a744d24a5f2bb4a11657a1b27b61dc038ae2e674621a74020406e08f749b"}, + {file = "jiter-0.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:29fff31190ab3a26de026da2f187814f4b9c6695361e20a9ac2123e4d4378a4c"}, + {file = "jiter-0.11.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:4441a91b80a80249f9a6452c14b2c24708f139f64de959943dfeaa6cb915e8eb"}, + {file = "jiter-0.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ff85fc6d2a431251ad82dbd1ea953affb5a60376b62e7d6809c5cd058bb39471"}, + {file = "jiter-0.11.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5e86126d64706fd28dfc46f910d496923c6f95b395138c02d0e252947f452bd"}, + {file = "jiter-0.11.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4ad8bd82165961867a10f52010590ce0b7a8c53da5ddd8bbb62fef68c181b921"}, + {file = "jiter-0.11.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b42c2cd74273455ce439fd9528db0c6e84b5623cb74572305bdd9f2f2961d3df"}, + {file = "jiter-0.11.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0062dab98172dd0599fcdbf90214d0dcde070b1ff38a00cc1b90e111f071982"}, + {file = "jiter-0.11.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb948402821bc76d1f6ef0f9e19b816f9b09f8577844ba7140f0b6afe994bc64"}, + {file = "jiter-0.11.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:25a5b1110cca7329fd0daf5060faa1234be5c11e988948e4f1a1923b6a457fe1"}, + {file = "jiter-0.11.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:bf11807e802a214daf6c485037778843fadd3e2ec29377ae17e0706ec1a25758"}, + {file = "jiter-0.11.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:dbb57da40631c267861dd0090461222060960012d70fd6e4c799b0f62d0ba166"}, + {file = "jiter-0.11.0-cp313-cp313-win32.whl", hash = "sha256:8e36924dad32c48d3c5e188d169e71dc6e84d6cb8dedefea089de5739d1d2f80"}, + {file = "jiter-0.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:452d13e4fd59698408087235259cebe67d9d49173b4dacb3e8d35ce4acf385d6"}, + {file = "jiter-0.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:089f9df9f69532d1339e83142438668f52c97cd22ee2d1195551c2b1a9e6cf33"}, + {file = "jiter-0.11.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29ed1fe69a8c69bf0f2a962d8d706c7b89b50f1332cd6b9fbda014f60bd03a03"}, + {file = "jiter-0.11.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a4d71d7ea6ea8786291423fe209acf6f8d398a0759d03e7f24094acb8ab686ba"}, + {file = "jiter-0.11.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:9a6dff27eca70930bdbe4cbb7c1a4ba8526e13b63dc808c0670083d2d51a4a72"}, + {file = "jiter-0.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b1ae2a7593a62132c7d4c2abbee80bbbb94fdc6d157e2c6cc966250c564ef774"}, + {file = "jiter-0.11.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b13a431dba4b059e9e43019d3022346d009baf5066c24dcdea321a303cde9f0"}, + {file = "jiter-0.11.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:af62e84ca3889604ebb645df3b0a3f3bcf6b92babbff642bd214616f57abb93a"}, + {file = "jiter-0.11.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c6f3b32bb723246e6b351aecace52aba78adb8eeb4b2391630322dc30ff6c773"}, + {file = "jiter-0.11.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:adcab442f4a099a358a7f562eaa54ed6456fb866e922c6545a717be51dbed7d7"}, + {file = "jiter-0.11.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9967c2ab338ee2b2c0102fd379ec2693c496abf71ffd47e4d791d1f593b68e2"}, + {file = "jiter-0.11.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e7d0bed3b187af8b47a981d9742ddfc1d9b252a7235471ad6078e7e4e5fe75c2"}, + {file = "jiter-0.11.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:f6fe0283e903ebc55f1a6cc569b8c1f3bf4abd026fed85e3ff8598a9e6f982f0"}, + {file = "jiter-0.11.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:4ee5821e3d66606b29ae5b497230b304f1376f38137d69e35f8d2bd5f310ff73"}, + {file = "jiter-0.11.0-cp314-cp314-win32.whl", hash = "sha256:c2d13ba7567ca8799f17c76ed56b1d49be30df996eb7fa33e46b62800562a5e2"}, + {file = "jiter-0.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:fb4790497369d134a07fc763cc88888c46f734abdd66f9fdf7865038bf3a8f40"}, + {file = "jiter-0.11.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e2bbf24f16ba5ad4441a9845e40e4ea0cb9eed00e76ba94050664ef53ef4406"}, + {file = "jiter-0.11.0.tar.gz", hash = "sha256:1d9637eaf8c1d6a63d6562f2a6e5ab3af946c66037eb1b894e8fad75422266e4"}, +] + +[[package]] +name = "jsonschema" +version = "4.25.1" +requires_python = ">=3.9" +summary = "An implementation of JSON Schema validation for Python" +groups = ["default"] +dependencies = [ + "attrs>=22.2.0", + "jsonschema-specifications>=2023.03.6", + "referencing>=0.28.4", + "rpds-py>=0.7.1", +] +files = [ + {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"}, + {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"}, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +requires_python = ">=3.9" +summary = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" +groups = ["default"] +dependencies = [ + "referencing>=0.31.0", +] +files = [ + {file = "jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe"}, + {file = "jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d"}, +] + +[[package]] +name = "kiwisolver" +version = "1.4.9" +requires_python = ">=3.10" +summary = "A fast implementation of the Cassowary constraint solver" +groups = ["default"] +files = [ + {file = "kiwisolver-1.4.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:eb14a5da6dc7642b0f3a18f13654847cd8b7a2550e2645a5bda677862b03ba16"}, + {file = "kiwisolver-1.4.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:39a219e1c81ae3b103643d2aedb90f1ef22650deb266ff12a19e7773f3e5f089"}, + {file = "kiwisolver-1.4.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2405a7d98604b87f3fc28b1716783534b1b4b8510d8142adca34ee0bc3c87543"}, + {file = "kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dc1ae486f9abcef254b5618dfb4113dd49f94c68e3e027d03cf0143f3f772b61"}, + {file = "kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a1f570ce4d62d718dce3f179ee78dac3b545ac16c0c04bb363b7607a949c0d1"}, + {file = "kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb27e7b78d716c591e88e0a09a2139c6577865d7f2e152488c2cc6257f460872"}, + {file = "kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:15163165efc2f627eb9687ea5f3a28137217d217ac4024893d753f46bce9de26"}, + {file = "kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bdee92c56a71d2b24c33a7d4c2856bd6419d017e08caa7802d2963870e315028"}, + {file = "kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:412f287c55a6f54b0650bd9b6dce5aceddb95864a1a90c87af16979d37c89771"}, + {file = "kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2c93f00dcba2eea70af2be5f11a830a742fe6b579a1d4e00f47760ef13be247a"}, + {file = "kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f117e1a089d9411663a3207ba874f31be9ac8eaa5b533787024dc07aeb74f464"}, + {file = "kiwisolver-1.4.9-cp311-cp311-win_amd64.whl", hash = "sha256:be6a04e6c79819c9a8c2373317d19a96048e5a3f90bec587787e86a1153883c2"}, + {file = "kiwisolver-1.4.9-cp311-cp311-win_arm64.whl", hash = "sha256:0ae37737256ba2de764ddc12aed4956460277f00c4996d51a197e72f62f5eec7"}, + {file = "kiwisolver-1.4.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ac5a486ac389dddcc5bef4f365b6ae3ffff2c433324fb38dd35e3fab7c957999"}, + {file = "kiwisolver-1.4.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2ba92255faa7309d06fe44c3a4a97efe1c8d640c2a79a5ef728b685762a6fd2"}, + {file = "kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a2899935e724dd1074cb568ce7ac0dce28b2cd6ab539c8e001a8578eb106d14"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f6008a4919fdbc0b0097089f67a1eb55d950ed7e90ce2cc3e640abadd2757a04"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67bb8b474b4181770f926f7b7d2f8c0248cbcb78b660fdd41a47054b28d2a752"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2327a4a30d3ee07d2fbe2e7933e8a37c591663b96ce42a00bc67461a87d7df77"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7a08b491ec91b1d5053ac177afe5290adacf1f0f6307d771ccac5de30592d198"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8fc5c867c22b828001b6a38d2eaeb88160bf5783c6cb4a5e440efc981ce286d"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3b3115b2581ea35bb6d1f24a4c90af37e5d9b49dcff267eeed14c3893c5b86ab"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858e4c22fb075920b96a291928cb7dea5644e94c0ee4fcd5af7e865655e4ccf2"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ed0fecd28cc62c54b262e3736f8bb2512d8dcfdc2bcf08be5f47f96bf405b145"}, + {file = "kiwisolver-1.4.9-cp312-cp312-win_amd64.whl", hash = "sha256:f68208a520c3d86ea51acf688a3e3002615a7f0238002cccc17affecc86a8a54"}, + {file = "kiwisolver-1.4.9-cp312-cp312-win_arm64.whl", hash = "sha256:2c1a4f57df73965f3f14df20b80ee29e6a7930a57d2d9e8491a25f676e197c60"}, + {file = "kiwisolver-1.4.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5d0432ccf1c7ab14f9949eec60c5d1f924f17c037e9f8b33352fa05799359b8"}, + {file = "kiwisolver-1.4.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efb3a45b35622bb6c16dbfab491a8f5a391fe0e9d45ef32f4df85658232ca0e2"}, + {file = "kiwisolver-1.4.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a12cf6398e8a0a001a059747a1cbf24705e18fe413bc22de7b3d15c67cffe3f"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b67e6efbf68e077dd71d1a6b37e43e1a99d0bff1a3d51867d45ee8908b931098"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5656aa670507437af0207645273ccdfee4f14bacd7f7c67a4306d0dcaeaf6eed"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bfc08add558155345129c7803b3671cf195e6a56e7a12f3dde7c57d9b417f525"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:40092754720b174e6ccf9e845d0d8c7d8e12c3d71e7fc35f55f3813e96376f78"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:497d05f29a1300d14e02e6441cf0f5ee81c1ff5a304b0d9fb77423974684e08b"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdd1a81a1860476eb41ac4bc1e07b3f07259e6d55bbf739b79c8aaedcf512799"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e6b93f13371d341afee3be9f7c5964e3fe61d5fa30f6a30eb49856935dfe4fc3"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d75aa530ccfaa593da12834b86a0724f58bff12706659baa9227c2ccaa06264c"}, + {file = "kiwisolver-1.4.9-cp313-cp313-win_amd64.whl", hash = "sha256:dd0a578400839256df88c16abddf9ba14813ec5f21362e1fe65022e00c883d4d"}, + {file = "kiwisolver-1.4.9-cp313-cp313-win_arm64.whl", hash = "sha256:d4188e73af84ca82468f09cadc5ac4db578109e52acb4518d8154698d3a87ca2"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:5a0f2724dfd4e3b3ac5a82436a8e6fd16baa7d507117e4279b660fe8ca38a3a1"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b11d6a633e4ed84fc0ddafd4ebfd8ea49b3f25082c04ad12b8315c11d504dc1"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61874cdb0a36016354853593cffc38e56fc9ca5aa97d2c05d3dcf6922cd55a11"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:60c439763a969a6af93b4881db0eed8fadf93ee98e18cbc35bc8da868d0c4f0c"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92a2f997387a1b79a75e7803aa7ded2cfbe2823852ccf1ba3bcf613b62ae3197"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a31d512c812daea6d8b3be3b2bfcbeb091dbb09177706569bcfc6240dcf8b41c"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:52a15b0f35dad39862d376df10c5230155243a2c1a436e39eb55623ccbd68185"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a30fd6fdef1430fd9e1ba7b3398b5ee4e2887783917a687d86ba69985fb08748"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cc9617b46837c6468197b5945e196ee9ca43057bb7d9d1ae688101e4e1dddf64"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:0ab74e19f6a2b027ea4f845a78827969af45ce790e6cb3e1ebab71bdf9f215ff"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dba5ee5d3981160c28d5490f0d1b7ed730c22470ff7f6cc26cfcfaacb9896a07"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-win_arm64.whl", hash = "sha256:0749fd8f4218ad2e851e11cc4dc05c7cbc0cbc4267bdfdb31782e65aace4ee9c"}, + {file = "kiwisolver-1.4.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:9928fe1eb816d11ae170885a74d074f57af3a0d65777ca47e9aeb854a1fba386"}, + {file = "kiwisolver-1.4.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d0005b053977e7b43388ddec89fa567f43d4f6d5c2c0affe57de5ebf290dc552"}, + {file = "kiwisolver-1.4.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2635d352d67458b66fd0667c14cb1d4145e9560d503219034a18a87e971ce4f3"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:767c23ad1c58c9e827b649a9ab7809fd5fd9db266a9cf02b0e926ddc2c680d58"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72d0eb9fba308b8311685c2268cf7d0a0639a6cd027d8128659f72bdd8a024b4"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f68e4f3eeca8fb22cc3d731f9715a13b652795ef657a13df1ad0c7dc0e9731df"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d84cd4061ae292d8ac367b2c3fa3aad11cb8625a95d135fe93f286f914f3f5a6"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a60ea74330b91bd22a29638940d115df9dc00af5035a9a2a6ad9399ffb4ceca5"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ce6a3a4e106cf35c2d9c4fa17c05ce0b180db622736845d4315519397a77beaf"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:77937e5e2a38a7b48eef0585114fe7930346993a88060d0bf886086d2aa49ef5"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:24c175051354f4a28c5d6a31c93906dc653e2bf234e8a4bbfb964892078898ce"}, + {file = "kiwisolver-1.4.9-cp314-cp314-win_amd64.whl", hash = "sha256:0763515d4df10edf6d06a3c19734e2566368980d21ebec439f33f9eb936c07b7"}, + {file = "kiwisolver-1.4.9-cp314-cp314-win_arm64.whl", hash = "sha256:0e4e2bf29574a6a7b7f6cb5fa69293b9f96c928949ac4a53ba3f525dffb87f9c"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d976bbb382b202f71c67f77b0ac11244021cfa3f7dfd9e562eefcea2df711548"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2489e4e5d7ef9a1c300a5e0196e43d9c739f066ef23270607d45aba368b91f2d"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e2ea9f7ab7fbf18fffb1b5434ce7c69a07582f7acc7717720f1d69f3e806f90c"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b34e51affded8faee0dfdb705416153819d8ea9250bbbf7ea1b249bdeb5f1122"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8aacd3d4b33b772542b2e01beb50187536967b514b00003bdda7589722d2a64"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7cf974dd4e35fa315563ac99d6287a1024e4dc2077b8a7d7cd3d2fb65d283134"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85bd218b5ecfbee8c8a82e121802dcb519a86044c9c3b2e4aef02fa05c6da370"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0856e241c2d3df4efef7c04a1e46b1936b6120c9bcf36dd216e3acd84bc4fb21"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9af39d6551f97d31a4deebeac6f45b156f9755ddc59c07b402c148f5dbb6482a"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:bb4ae2b57fc1d8cbd1cf7b1d9913803681ffa903e7488012be5b76dedf49297f"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:aedff62918805fb62d43a4aa2ecd4482c380dc76cd31bd7c8878588a61bd0369"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-win_amd64.whl", hash = "sha256:1fa333e8b2ce4d9660f2cda9c0e1b6bafcfb2457a9d259faa82289e73ec24891"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-win_arm64.whl", hash = "sha256:4a48a2ce79d65d363597ef7b567ce3d14d68783d2b2263d98db3d9477805ba32"}, + {file = "kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:720e05574713db64c356e86732c0f3c5252818d05f9df320f0ad8380641acea5"}, + {file = "kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:17680d737d5335b552994a2008fab4c851bcd7de33094a82067ef3a576ff02fa"}, + {file = "kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85b5352f94e490c028926ea567fc569c52ec79ce131dadb968d3853e809518c2"}, + {file = "kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:464415881e4801295659462c49461a24fb107c140de781d55518c4b80cb6790f"}, + {file = "kiwisolver-1.4.9-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fb940820c63a9590d31d88b815e7a3aa5915cad3ce735ab45f0c730b39547de1"}, + {file = "kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d"}, +] + +[[package]] +name = "lazy-loader" +version = "0.4" +requires_python = ">=3.7" +summary = "Makes it easy to load subpackages and functions on demand." +groups = ["default"] +dependencies = [ + "importlib-metadata; python_version < \"3.8\"", + "packaging", +] +files = [ + {file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"}, + {file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"}, +] + +[[package]] +name = "litellm" +version = "1.77.7" +requires_python = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" +summary = "Library to easily interface with LLM API providers" +groups = ["default"] +dependencies = [ + "aiohttp>=3.10", + "click", + "fastuuid>=0.13.0", + "httpx>=0.23.0", + "importlib-metadata>=6.8.0", + "jinja2<4.0.0,>=3.1.2", + "jsonschema<5.0.0,>=4.22.0", + "openai>=1.99.5", + "pydantic<3.0.0,>=2.5.0", + "python-dotenv>=0.2.0", + "tiktoken>=0.7.0", + "tokenizers", +] +files = [ + {file = "litellm-1.77.7-py3-none-any.whl", hash = "sha256:1b3a1b17bd521a0ad25226fb62a912602c803922aabb4a16adf83834673be574"}, + {file = "litellm-1.77.7.tar.gz", hash = "sha256:e3398fb2575b98726e787c0a1481daed5938d58cafdcd96fbca80c312221af3e"}, +] + +[[package]] +name = "markdown-it-py" +version = "4.0.0" +requires_python = ">=3.10" +summary = "Python port of markdown-it. Markdown parsing, done right!" +groups = ["default"] +dependencies = [ + "mdurl~=0.1", +] +files = [ + {file = "markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147"}, + {file = "markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3"}, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +requires_python = ">=3.9" +summary = "Safely add untrusted strings to HTML/XML markup." +groups = ["default"] +files = [ + {file = "markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad"}, + {file = "markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a"}, + {file = "markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50"}, + {file = "markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf"}, + {file = "markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f"}, + {file = "markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a"}, + {file = "markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115"}, + {file = "markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a"}, + {file = "markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19"}, + {file = "markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01"}, + {file = "markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c"}, + {file = "markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e"}, + {file = "markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b"}, + {file = "markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d"}, + {file = "markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c"}, + {file = "markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f"}, + {file = "markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795"}, + {file = "markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12"}, + {file = "markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed"}, + {file = "markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5"}, + {file = "markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485"}, + {file = "markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73"}, + {file = "markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287"}, + {file = "markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe"}, + {file = "markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe"}, + {file = "markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9"}, + {file = "markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581"}, + {file = "markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4"}, + {file = "markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab"}, + {file = "markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa"}, + {file = "markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698"}, +] + +[[package]] +name = "matplotlib" +version = "3.10.6" +requires_python = ">=3.10" +summary = "Python plotting package" +groups = ["default"] +dependencies = [ + "contourpy>=1.0.1", + "cycler>=0.10", + "fonttools>=4.22.0", + "kiwisolver>=1.3.1", + "numpy>=1.23", + "packaging>=20.0", + "pillow>=8", + "pyparsing>=2.3.1", + "python-dateutil>=2.7", +] +files = [ + {file = "matplotlib-3.10.6-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:905b60d1cb0ee604ce65b297b61cf8be9f4e6cfecf95a3fe1c388b5266bc8f4f"}, + {file = "matplotlib-3.10.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7bac38d816637343e53d7185d0c66677ff30ffb131044a81898b5792c956ba76"}, + {file = "matplotlib-3.10.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:942a8de2b5bfff1de31d95722f702e2966b8a7e31f4e68f7cd963c7cd8861cf6"}, + {file = "matplotlib-3.10.6-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a3276c85370bc0dfca051ec65c5817d1e0f8f5ce1b7787528ec8ed2d524bbc2f"}, + {file = "matplotlib-3.10.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9df5851b219225731f564e4b9e7f2ac1e13c9e6481f941b5631a0f8e2d9387ce"}, + {file = "matplotlib-3.10.6-cp311-cp311-win_amd64.whl", hash = "sha256:abb5d9478625dd9c9eb51a06d39aae71eda749ae9b3138afb23eb38824026c7e"}, + {file = "matplotlib-3.10.6-cp311-cp311-win_arm64.whl", hash = "sha256:886f989ccfae63659183173bb3fced7fd65e9eb793c3cc21c273add368536951"}, + {file = "matplotlib-3.10.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:31ca662df6a80bd426f871105fdd69db7543e28e73a9f2afe80de7e531eb2347"}, + {file = "matplotlib-3.10.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1678bb61d897bb4ac4757b5ecfb02bfb3fddf7f808000fb81e09c510712fda75"}, + {file = "matplotlib-3.10.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:56cd2d20842f58c03d2d6e6c1f1cf5548ad6f66b91e1e48f814e4fb5abd1cb95"}, + {file = "matplotlib-3.10.6-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:662df55604a2f9a45435566d6e2660e41efe83cd94f4288dfbf1e6d1eae4b0bb"}, + {file = "matplotlib-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:08f141d55148cd1fc870c3387d70ca4df16dee10e909b3b038782bd4bda6ea07"}, + {file = "matplotlib-3.10.6-cp312-cp312-win_amd64.whl", hash = "sha256:590f5925c2d650b5c9d813c5b3b5fc53f2929c3f8ef463e4ecfa7e052044fb2b"}, + {file = "matplotlib-3.10.6-cp312-cp312-win_arm64.whl", hash = "sha256:f44c8d264a71609c79a78d50349e724f5d5fc3684ead7c2a473665ee63d868aa"}, + {file = "matplotlib-3.10.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:819e409653c1106c8deaf62e6de6b8611449c2cd9939acb0d7d4e57a3d95cc7a"}, + {file = "matplotlib-3.10.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:59c8ac8382fefb9cb71308dde16a7c487432f5255d8f1fd32473523abecfecdf"}, + {file = "matplotlib-3.10.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:84e82d9e0fd70c70bc55739defbd8055c54300750cbacf4740c9673a24d6933a"}, + {file = "matplotlib-3.10.6-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25f7a3eb42d6c1c56e89eacd495661fc815ffc08d9da750bca766771c0fd9110"}, + {file = "matplotlib-3.10.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f9c862d91ec0b7842920a4cfdaaec29662195301914ea54c33e01f1a28d014b2"}, + {file = "matplotlib-3.10.6-cp313-cp313-win_amd64.whl", hash = "sha256:1b53bd6337eba483e2e7d29c5ab10eee644bc3a2491ec67cc55f7b44583ffb18"}, + {file = "matplotlib-3.10.6-cp313-cp313-win_arm64.whl", hash = "sha256:cbd5eb50b7058b2892ce45c2f4e92557f395c9991f5c886d1bb74a1582e70fd6"}, + {file = "matplotlib-3.10.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:acc86dd6e0e695c095001a7fccff158c49e45e0758fdf5dcdbb0103318b59c9f"}, + {file = "matplotlib-3.10.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e228cd2ffb8f88b7d0b29e37f68ca9aaf83e33821f24a5ccc4f082dd8396bc27"}, + {file = "matplotlib-3.10.6-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:658bc91894adeab669cf4bb4a186d049948262987e80f0857216387d7435d833"}, + {file = "matplotlib-3.10.6-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8913b7474f6dd83ac444c9459c91f7f0f2859e839f41d642691b104e0af056aa"}, + {file = "matplotlib-3.10.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:091cea22e059b89f6d7d1a18e2c33a7376c26eee60e401d92a4d6726c4e12706"}, + {file = "matplotlib-3.10.6-cp313-cp313t-win_amd64.whl", hash = "sha256:491e25e02a23d7207629d942c666924a6b61e007a48177fdd231a0097b7f507e"}, + {file = "matplotlib-3.10.6-cp313-cp313t-win_arm64.whl", hash = "sha256:3d80d60d4e54cda462e2cd9a086d85cd9f20943ead92f575ce86885a43a565d5"}, + {file = "matplotlib-3.10.6-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:70aaf890ce1d0efd482df969b28a5b30ea0b891224bb315810a3940f67182899"}, + {file = "matplotlib-3.10.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1565aae810ab79cb72e402b22facfa6501365e73ebab70a0fdfb98488d2c3c0c"}, + {file = "matplotlib-3.10.6-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3b23315a01981689aa4e1a179dbf6ef9fbd17143c3eea77548c2ecfb0499438"}, + {file = "matplotlib-3.10.6-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:30fdd37edf41a4e6785f9b37969de57aea770696cb637d9946eb37470c94a453"}, + {file = "matplotlib-3.10.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bc31e693da1c08012c764b053e702c1855378e04102238e6a5ee6a7117c53a47"}, + {file = "matplotlib-3.10.6-cp314-cp314-win_amd64.whl", hash = "sha256:05be9bdaa8b242bc6ff96330d18c52f1fc59c6fb3a4dd411d953d67e7e1baf98"}, + {file = "matplotlib-3.10.6-cp314-cp314-win_arm64.whl", hash = "sha256:f56a0d1ab05d34c628592435781d185cd99630bdfd76822cd686fb5a0aecd43a"}, + {file = "matplotlib-3.10.6-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:94f0b4cacb23763b64b5dace50d5b7bfe98710fed5f0cef5c08135a03399d98b"}, + {file = "matplotlib-3.10.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cc332891306b9fb39462673d8225d1b824c89783fee82840a709f96714f17a5c"}, + {file = "matplotlib-3.10.6-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee1d607b3fb1590deb04b69f02ea1d53ed0b0bf75b2b1a5745f269afcbd3cdd3"}, + {file = "matplotlib-3.10.6-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:376a624a218116461696b27b2bbf7a8945053e6d799f6502fc03226d077807bf"}, + {file = "matplotlib-3.10.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:83847b47f6524c34b4f2d3ce726bb0541c48c8e7692729865c3df75bfa0f495a"}, + {file = "matplotlib-3.10.6-cp314-cp314t-win_amd64.whl", hash = "sha256:c7e0518e0d223683532a07f4b512e2e0729b62674f1b3a1a69869f98e6b1c7e3"}, + {file = "matplotlib-3.10.6-cp314-cp314t-win_arm64.whl", hash = "sha256:4dd83e029f5b4801eeb87c64efd80e732452781c16a9cf7415b7b63ec8f374d7"}, + {file = "matplotlib-3.10.6-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f2d684c3204fa62421bbf770ddfebc6b50130f9cad65531eeba19236d73bb488"}, + {file = "matplotlib-3.10.6-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:6f4a69196e663a41d12a728fab8751177215357906436804217d6d9cf0d4d6cf"}, + {file = "matplotlib-3.10.6-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d6ca6ef03dfd269f4ead566ec6f3fb9becf8dab146fb999022ed85ee9f6b3eb"}, + {file = "matplotlib-3.10.6.tar.gz", hash = "sha256:ec01b645840dd1996df21ee37f208cd8ba57644779fa20464010638013d3203c"}, +] + +[[package]] +name = "mcp" +version = "1.16.0" +requires_python = ">=3.10" +summary = "Model Context Protocol SDK" +groups = ["default"] +dependencies = [ + "anyio>=4.5", + "httpx-sse>=0.4", + "httpx>=0.27.1", + "jsonschema>=4.20.0", + "pydantic-settings>=2.5.2", + "pydantic<3.0.0,>=2.11.0", + "python-multipart>=0.0.9", + "pywin32>=310; sys_platform == \"win32\"", + "sse-starlette>=1.6.1", + "starlette>=0.27", + "uvicorn>=0.31.1; sys_platform != \"emscripten\"", +] +files = [ + {file = "mcp-1.16.0-py3-none-any.whl", hash = "sha256:ec917be9a5d31b09ba331e1768aa576e0af45470d657a0319996a20a57d7d633"}, + {file = "mcp-1.16.0.tar.gz", hash = "sha256:39b8ca25460c578ee2cdad33feeea122694cfdf73eef58bee76c42f6ef0589df"}, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +requires_python = ">=3.7" +summary = "Markdown URL utilities" +groups = ["default"] +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + +[[package]] +name = "mlx" +version = "0.29.2" +requires_python = ">=3.9" +summary = "A framework for machine learning on Apple silicon." +groups = ["default"] +marker = "sys_platform == \"darwin\"" +dependencies = [ + "mlx-metal==0.29.2; platform_system == \"Darwin\"", +] +files = [ + {file = "mlx-0.29.2-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:e74965369227230374b3e8e8c8d46e209e5221a9b76bbb0fa788617e2c68f73c"}, + {file = "mlx-0.29.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0f79194eeac78e85b96439d3bbc17aae5aba045a2af083c000b4fbbc501f253e"}, + {file = "mlx-0.29.2-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:33bbbb0fd24895d5ff080bb4d10e3e77017bba675d9a12466c8866eaf9b47854"}, + {file = "mlx-0.29.2-cp311-cp311-manylinux_2_35_x86_64.whl", hash = "sha256:32e159f2772be893bec580d2d50c0e6b32ad71a19ded7307bf6c871c8aaa9cf2"}, + {file = "mlx-0.29.2-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:eec950bf7118ad0865d0fc4686bd85d99bf8463fc717d836a5132e1a08b4f129"}, + {file = "mlx-0.29.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:bef7333268d6d02e50a9ac6b10f661b711cd02da4a5e2d7619cf198a7e530308"}, + {file = "mlx-0.29.2-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:f622fc6a84542a08ad2136e9251822d2c08106e5a1a0bd5d249a2d72bccd6577"}, + {file = "mlx-0.29.2-cp312-cp312-manylinux_2_35_x86_64.whl", hash = "sha256:a1aa1aee8e1b6bd1e51361e6b692c70d281b8187b2e859e70ecc11daab306dac"}, + {file = "mlx-0.29.2-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:05ea54173f4bde11b2c93e673d65d72523f5d850f5112d3874156a6fc74ca591"}, + {file = "mlx-0.29.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:199dd029b5e55b6d94f1ce366d0137824e46e4333891424dd00413c739f50ae9"}, + {file = "mlx-0.29.2-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:b6dd4e5f227414882b1676d99250d99389228d1bdc14e4e4e88c95d4903810b7"}, + {file = "mlx-0.29.2-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:c3b9a9aee13f346d060966472954eebe99d9f1b295c9a237c9a000f1ef9adf2c"}, +] + +[[package]] +name = "mlx-lm" +version = "0.28.2" +requires_python = ">=3.8" +summary = "LLMs with MLX and the Hugging Face Hub" +groups = ["default"] +marker = "sys_platform == \"darwin\"" +dependencies = [ + "jinja2", + "mlx>=0.29.2", + "numpy", + "protobuf", + "pyyaml", + "transformers>=4.39.3", +] +files = [ + {file = "mlx_lm-0.28.2-py3-none-any.whl", hash = "sha256:1501529e625d0d648216f7bb543b8b449d5fd17bd598f635536dbc1fbde6d1d6"}, + {file = "mlx_lm-0.28.2.tar.gz", hash = "sha256:d28752635ed5c89ff2b41361916c928e6b16f765c07b2908044e1dcaf921ed9b"}, +] + +[[package]] +name = "mlx-metal" +version = "0.29.2" +requires_python = ">=3.9" +summary = "A framework for machine learning on Apple silicon." +groups = ["default"] +marker = "platform_system == \"Darwin\" and sys_platform == \"darwin\"" +files = [ + {file = "mlx_metal-0.29.2-py3-none-macosx_13_0_arm64.whl", hash = "sha256:cf8f83a521e620357185c57945142718d526b9312ee112e5a89eb5600480f4d6"}, + {file = "mlx_metal-0.29.2-py3-none-macosx_14_0_arm64.whl", hash = "sha256:fa944001970813b296e8aff5616f2fa9daeda6bc1d190c17fbe8a7ca838ecef0"}, + {file = "mlx_metal-0.29.2-py3-none-macosx_15_0_arm64.whl", hash = "sha256:08d8b7fe305425a14b74ebf36cee176575bfd4cd8d34a2aaae8f05b9983d2d71"}, +] + +[[package]] +name = "mlx-vlm" +version = "0.3.3" +requires_python = ">=3.8" +summary = "MLX-VLM is a package for inference and fine-tuning of Vision Language Models (VLMs) and Omni Models (VLMs with audio and video support) on your Mac using MLX." +groups = ["default"] +marker = "sys_platform == \"darwin\"" +dependencies = [ + "Pillow>=10.3.0", + "datasets>=2.19.1", + "fastapi>=0.95.1", + "mlx-lm>=0.23.0", + "mlx>=0.26.0", + "numpy", + "opencv-python>=4.12.0.88", + "requests>=2.31.0", + "scipy>=1.15.3", + "soundfile>=0.13.1", + "tqdm>=4.66.2", + "transformers>=4.53.0", + "uvicorn", +] +files = [ + {file = "mlx_vlm-0.3.3-py3-none-any.whl", hash = "sha256:50f977e989613c846c08cac847e8c43bc7eaf074892bb00e439d07d14ee79823"}, + {file = "mlx_vlm-0.3.3.tar.gz", hash = "sha256:5a08c802d1bf32cc47bd6aebe348d3554ce21bfce417a585bba83f9d213a6e66"}, +] + +[[package]] +name = "mpmath" +version = "1.3.0" +summary = "Python library for arbitrary-precision floating-point arithmetic" +groups = ["default"] +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] + +[[package]] +name = "multidict" +version = "6.7.0" +requires_python = ">=3.9" +summary = "multidict implementation" +groups = ["default"] +dependencies = [ + "typing-extensions>=4.1.0; python_version < \"3.11\"", +] +files = [ + {file = "multidict-6.7.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4d409aa42a94c0b3fa617708ef5276dfe81012ba6753a0370fcc9d0195d0a1fc"}, + {file = "multidict-6.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14c9e076eede3b54c636f8ce1c9c252b5f057c62131211f0ceeec273810c9721"}, + {file = "multidict-6.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c09703000a9d0fa3c3404b27041e574cc7f4df4c6563873246d0e11812a94b6"}, + {file = "multidict-6.7.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a265acbb7bb33a3a2d626afbe756371dce0279e7b17f4f4eda406459c2b5ff1c"}, + {file = "multidict-6.7.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51cb455de290ae462593e5b1cb1118c5c22ea7f0d3620d9940bf695cea5a4bd7"}, + {file = "multidict-6.7.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:db99677b4457c7a5c5a949353e125ba72d62b35f74e26da141530fbb012218a7"}, + {file = "multidict-6.7.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f470f68adc395e0183b92a2f4689264d1ea4b40504a24d9882c27375e6662bb9"}, + {file = "multidict-6.7.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0db4956f82723cc1c270de9c6e799b4c341d327762ec78ef82bb962f79cc07d8"}, + {file = "multidict-6.7.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e56d780c238f9e1ae66a22d2adf8d16f485381878250db8d496623cd38b22bd"}, + {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9d14baca2ee12c1a64740d4531356ba50b82543017f3ad6de0deb943c5979abb"}, + {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:295a92a76188917c7f99cda95858c822f9e4aae5824246bba9b6b44004ddd0a6"}, + {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39f1719f57adbb767ef592a50ae5ebb794220d1188f9ca93de471336401c34d2"}, + {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:0a13fb8e748dfc94749f622de065dd5c1def7e0d2216dba72b1d8069a389c6ff"}, + {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e3aa16de190d29a0ea1b48253c57d99a68492c8dd8948638073ab9e74dc9410b"}, + {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a048ce45dcdaaf1defb76b2e684f997fb5abf74437b6cb7b22ddad934a964e34"}, + {file = "multidict-6.7.0-cp311-cp311-win32.whl", hash = "sha256:a90af66facec4cebe4181b9e62a68be65e45ac9b52b67de9eec118701856e7ff"}, + {file = "multidict-6.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:95b5ffa4349df2887518bb839409bcf22caa72d82beec453216802f475b23c81"}, + {file = "multidict-6.7.0-cp311-cp311-win_arm64.whl", hash = "sha256:329aa225b085b6f004a4955271a7ba9f1087e39dcb7e65f6284a988264a63912"}, + {file = "multidict-6.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8a3862568a36d26e650a19bb5cbbba14b71789032aebc0423f8cc5f150730184"}, + {file = "multidict-6.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:960c60b5849b9b4f9dcc9bea6e3626143c252c74113df2c1540aebce70209b45"}, + {file = "multidict-6.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2049be98fb57a31b4ccf870bf377af2504d4ae35646a19037ec271e4c07998aa"}, + {file = "multidict-6.7.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0934f3843a1860dd465d38895c17fce1f1cb37295149ab05cd1b9a03afacb2a7"}, + {file = "multidict-6.7.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3e34f3a1b8131ba06f1a73adab24f30934d148afcd5f5de9a73565a4404384e"}, + {file = "multidict-6.7.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:efbb54e98446892590dc2458c19c10344ee9a883a79b5cec4bc34d6656e8d546"}, + {file = "multidict-6.7.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a35c5fc61d4f51eb045061e7967cfe3123d622cd500e8868e7c0c592a09fedc4"}, + {file = "multidict-6.7.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29fe6740ebccba4175af1b9b87bf553e9c15cd5868ee967e010efcf94e4fd0f1"}, + {file = "multidict-6.7.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:123e2a72e20537add2f33a79e605f6191fba2afda4cbb876e35c1a7074298a7d"}, + {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b284e319754366c1aee2267a2036248b24eeb17ecd5dc16022095e747f2f4304"}, + {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:803d685de7be4303b5a657b76e2f6d1240e7e0a8aa2968ad5811fa2285553a12"}, + {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c04a328260dfd5db8c39538f999f02779012268f54614902d0afc775d44e0a62"}, + {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8a19cdb57cd3df4cd865849d93ee14920fb97224300c88501f16ecfa2604b4e0"}, + {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b2fd74c52accced7e75de26023b7dccee62511a600e62311b918ec5c168fc2a"}, + {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3e8bfdd0e487acf992407a140d2589fe598238eaeffa3da8448d63a63cd363f8"}, + {file = "multidict-6.7.0-cp312-cp312-win32.whl", hash = "sha256:dd32a49400a2c3d52088e120ee00c1e3576cbff7e10b98467962c74fdb762ed4"}, + {file = "multidict-6.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:92abb658ef2d7ef22ac9f8bb88e8b6c3e571671534e029359b6d9e845923eb1b"}, + {file = "multidict-6.7.0-cp312-cp312-win_arm64.whl", hash = "sha256:490dab541a6a642ce1a9d61a4781656b346a55c13038f0b1244653828e3a83ec"}, + {file = "multidict-6.7.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bee7c0588aa0076ce77c0ea5d19a68d76ad81fcd9fe8501003b9a24f9d4000f6"}, + {file = "multidict-6.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7ef6b61cad77091056ce0e7ce69814ef72afacb150b7ac6a3e9470def2198159"}, + {file = "multidict-6.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c0359b1ec12b1d6849c59f9d319610b7f20ef990a6d454ab151aa0e3b9f78ca"}, + {file = "multidict-6.7.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cd240939f71c64bd658f186330603aac1a9a81bf6273f523fca63673cb7378a8"}, + {file = "multidict-6.7.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a60a4d75718a5efa473ebd5ab685786ba0c67b8381f781d1be14da49f1a2dc60"}, + {file = "multidict-6.7.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53a42d364f323275126aff81fb67c5ca1b7a04fda0546245730a55c8c5f24bc4"}, + {file = "multidict-6.7.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3b29b980d0ddbecb736735ee5bef69bb2ddca56eff603c86f3f29a1128299b4f"}, + {file = "multidict-6.7.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f8a93b1c0ed2d04b97a5e9336fd2d33371b9a6e29ab7dd6503d63407c20ffbaf"}, + {file = "multidict-6.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ff96e8815eecacc6645da76c413eb3b3d34cfca256c70b16b286a687d013c32"}, + {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7516c579652f6a6be0e266aec0acd0db80829ca305c3d771ed898538804c2036"}, + {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:040f393368e63fb0f3330e70c26bfd336656bed925e5cbe17c9da839a6ab13ec"}, + {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b3bc26a951007b1057a1c543af845f1c7e3e71cc240ed1ace7bf4484aa99196e"}, + {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7b022717c748dd1992a83e219587aabe45980d88969f01b316e78683e6285f64"}, + {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9600082733859f00d79dee64effc7aef1beb26adb297416a4ad2116fd61374bd"}, + {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94218fcec4d72bc61df51c198d098ce2b378e0ccbac41ddbed5ef44092913288"}, + {file = "multidict-6.7.0-cp313-cp313-win32.whl", hash = "sha256:a37bd74c3fa9d00be2d7b8eca074dc56bd8077ddd2917a839bd989612671ed17"}, + {file = "multidict-6.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:30d193c6cc6d559db42b6bcec8a5d395d34d60c9877a0b71ecd7c204fcf15390"}, + {file = "multidict-6.7.0-cp313-cp313-win_arm64.whl", hash = "sha256:ea3334cabe4d41b7ccd01e4d349828678794edbc2d3ae97fc162a3312095092e"}, + {file = "multidict-6.7.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ad9ce259f50abd98a1ca0aa6e490b58c316a0fce0617f609723e40804add2c00"}, + {file = "multidict-6.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07f5594ac6d084cbb5de2df218d78baf55ef150b91f0ff8a21cc7a2e3a5a58eb"}, + {file = "multidict-6.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0591b48acf279821a579282444814a2d8d0af624ae0bc600aa4d1b920b6e924b"}, + {file = "multidict-6.7.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:749a72584761531d2b9467cfbdfd29487ee21124c304c4b6cb760d8777b27f9c"}, + {file = "multidict-6.7.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b4c3d199f953acd5b446bf7c0de1fe25d94e09e79086f8dc2f48a11a129cdf1"}, + {file = "multidict-6.7.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9fb0211dfc3b51efea2f349ec92c114d7754dd62c01f81c3e32b765b70c45c9b"}, + {file = "multidict-6.7.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a027ec240fe73a8d6281872690b988eed307cd7d91b23998ff35ff577ca688b5"}, + {file = "multidict-6.7.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1d964afecdf3a8288789df2f5751dc0a8261138c3768d9af117ed384e538fad"}, + {file = "multidict-6.7.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caf53b15b1b7df9fbd0709aa01409000a2b4dd03a5f6f5cc548183c7c8f8b63c"}, + {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:654030da3197d927f05a536a66186070e98765aa5142794c9904555d3a9d8fb5"}, + {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2090d3718829d1e484706a2f525e50c892237b2bf9b17a79b059cb98cddc2f10"}, + {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2d2cfeec3f6f45651b3d408c4acec0ebf3daa9bc8a112a084206f5db5d05b754"}, + {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:4ef089f985b8c194d341eb2c24ae6e7408c9a0e2e5658699c92f497437d88c3c"}, + {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e93a0617cd16998784bf4414c7e40f17a35d2350e5c6f0bd900d3a8e02bd3762"}, + {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0feece2ef8ebc42ed9e2e8c78fc4aa3cf455733b507c09ef7406364c94376c6"}, + {file = "multidict-6.7.0-cp313-cp313t-win32.whl", hash = "sha256:19a1d55338ec1be74ef62440ca9e04a2f001a04d0cc49a4983dc320ff0f3212d"}, + {file = "multidict-6.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3da4fb467498df97e986af166b12d01f05d2e04f978a9c1c680ea1988e0bc4b6"}, + {file = "multidict-6.7.0-cp313-cp313t-win_arm64.whl", hash = "sha256:b4121773c49a0776461f4a904cdf6264c88e42218aaa8407e803ca8025872792"}, + {file = "multidict-6.7.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3bab1e4aff7adaa34410f93b1f8e57c4b36b9af0426a76003f441ee1d3c7e842"}, + {file = "multidict-6.7.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b8512bac933afc3e45fb2b18da8e59b78d4f408399a960339598374d4ae3b56b"}, + {file = "multidict-6.7.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:79dcf9e477bc65414ebfea98ffd013cb39552b5ecd62908752e0e413d6d06e38"}, + {file = "multidict-6.7.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:31bae522710064b5cbeddaf2e9f32b1abab70ac6ac91d42572502299e9953128"}, + {file = "multidict-6.7.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a0df7ff02397bb63e2fd22af2c87dfa39e8c7f12947bc524dbdc528282c7e34"}, + {file = "multidict-6.7.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7a0222514e8e4c514660e182d5156a415c13ef0aabbd71682fc714e327b95e99"}, + {file = "multidict-6.7.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2397ab4daaf2698eb51a76721e98db21ce4f52339e535725de03ea962b5a3202"}, + {file = "multidict-6.7.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8891681594162635948a636c9fe0ff21746aeb3dd5463f6e25d9bea3a8a39ca1"}, + {file = "multidict-6.7.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18706cc31dbf402a7945916dd5cddf160251b6dab8a2c5f3d6d5a55949f676b3"}, + {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f844a1bbf1d207dd311a56f383f7eda2d0e134921d45751842d8235e7778965d"}, + {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:d4393e3581e84e5645506923816b9cc81f5609a778c7e7534054091acc64d1c6"}, + {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:fbd18dc82d7bf274b37aa48d664534330af744e03bccf696d6f4c6042e7d19e7"}, + {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b6234e14f9314731ec45c42fc4554b88133ad53a09092cc48a88e771c125dadb"}, + {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:08d4379f9744d8f78d98c8673c06e202ffa88296f009c71bbafe8a6bf847d01f"}, + {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fe04da3f79387f450fd0061d4dd2e45a72749d31bf634aecc9e27f24fdc4b3f"}, + {file = "multidict-6.7.0-cp314-cp314-win32.whl", hash = "sha256:fbafe31d191dfa7c4c51f7a6149c9fb7e914dcf9ffead27dcfd9f1ae382b3885"}, + {file = "multidict-6.7.0-cp314-cp314-win_amd64.whl", hash = "sha256:2f67396ec0310764b9222a1728ced1ab638f61aadc6226f17a71dd9324f9a99c"}, + {file = "multidict-6.7.0-cp314-cp314-win_arm64.whl", hash = "sha256:ba672b26069957ee369cfa7fc180dde1fc6f176eaf1e6beaf61fbebbd3d9c000"}, + {file = "multidict-6.7.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:c1dcc7524066fa918c6a27d61444d4ee7900ec635779058571f70d042d86ed63"}, + {file = "multidict-6.7.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e0b36c2d388dc7b6ced3406671b401e84ad7eb0656b8f3a2f46ed0ce483718"}, + {file = "multidict-6.7.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a7baa46a22e77f0988e3b23d4ede5513ebec1929e34ee9495be535662c0dfe2"}, + {file = "multidict-6.7.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7bf77f54997a9166a2f5675d1201520586439424c2511723a7312bdb4bcc034e"}, + {file = "multidict-6.7.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e011555abada53f1578d63389610ac8a5400fc70ce71156b0aa30d326f1a5064"}, + {file = "multidict-6.7.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:28b37063541b897fd6a318007373930a75ca6d6ac7c940dbe14731ffdd8d498e"}, + {file = "multidict-6.7.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05047ada7a2fde2631a0ed706f1fd68b169a681dfe5e4cf0f8e4cb6618bbc2cd"}, + {file = "multidict-6.7.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:716133f7d1d946a4e1b91b1756b23c088881e70ff180c24e864c26192ad7534a"}, + {file = "multidict-6.7.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1bed1b467ef657f2a0ae62844a607909ef1c6889562de5e1d505f74457d0b96"}, + {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ca43bdfa5d37bd6aee89d85e1d0831fb86e25541be7e9d376ead1b28974f8e5e"}, + {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:44b546bd3eb645fd26fb949e43c02a25a2e632e2ca21a35e2e132c8105dc8599"}, + {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a6ef16328011d3f468e7ebc326f24c1445f001ca1dec335b2f8e66bed3006394"}, + {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:5aa873cbc8e593d361ae65c68f85faadd755c3295ea2c12040ee146802f23b38"}, + {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:3d7b6ccce016e29df4b7ca819659f516f0bc7a4b3efa3bb2012ba06431b044f9"}, + {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:171b73bd4ee683d307599b66793ac80981b06f069b62eea1c9e29c9241aa66b0"}, + {file = "multidict-6.7.0-cp314-cp314t-win32.whl", hash = "sha256:b2d7f80c4e1fd010b07cb26820aae86b7e73b681ee4889684fb8d2d4537aab13"}, + {file = "multidict-6.7.0-cp314-cp314t-win_amd64.whl", hash = "sha256:09929cab6fcb68122776d575e03c6cc64ee0b8fca48d17e135474b042ce515cd"}, + {file = "multidict-6.7.0-cp314-cp314t-win_arm64.whl", hash = "sha256:cc41db090ed742f32bd2d2c721861725e6109681eddf835d0a82bd3a5c382827"}, + {file = "multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3"}, + {file = "multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5"}, +] + +[[package]] +name = "multiprocess" +version = "0.70.16" +requires_python = ">=3.8" +summary = "better multiprocessing and multithreading in Python" +groups = ["default"] +marker = "sys_platform == \"darwin\"" +dependencies = [ + "dill>=0.3.8", +] +files = [ + {file = "multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a"}, + {file = "multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e"}, + {file = "multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1"}, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +requires_python = ">=3.8" +summary = "Type system extensions for programs checked with the mypy type checker." +groups = ["dev"] +files = [ + {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, + {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, +] + +[[package]] +name = "networkx" +version = "3.5" +requires_python = ">=3.11" +summary = "Python package for creating and manipulating graphs and networks" +groups = ["default"] +files = [ + {file = "networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec"}, + {file = "networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037"}, +] + +[[package]] +name = "ninja" +version = "1.13.0" +requires_python = ">=3.8" +summary = "Ninja is a small build system with a focus on speed" +groups = ["default"] +files = [ + {file = "ninja-1.13.0-py3-none-macosx_10_9_universal2.whl", hash = "sha256:fa2a8bfc62e31b08f83127d1613d10821775a0eb334197154c4d6067b7068ff1"}, + {file = "ninja-1.13.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3d00c692fb717fd511abeb44b8c5d00340c36938c12d6538ba989fe764e79630"}, + {file = "ninja-1.13.0-py3-none-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:be7f478ff9f96a128b599a964fc60a6a87b9fa332ee1bd44fa243ac88d50291c"}, + {file = "ninja-1.13.0-py3-none-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:60056592cf495e9a6a4bea3cd178903056ecb0943e4de45a2ea825edb6dc8d3e"}, + {file = "ninja-1.13.0-py3-none-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:1c97223cdda0417f414bf864cfb73b72d8777e57ebb279c5f6de368de0062988"}, + {file = "ninja-1.13.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fb46acf6b93b8dd0322adc3a4945452a4e774b75b91293bafcc7b7f8e6517dfa"}, + {file = "ninja-1.13.0-py3-none-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4be9c1b082d244b1ad7ef41eb8ab088aae8c109a9f3f0b3e56a252d3e00f42c1"}, + {file = "ninja-1.13.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:6739d3352073341ad284246f81339a384eec091d9851a886dfa5b00a6d48b3e2"}, + {file = "ninja-1.13.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:11be2d22027bde06f14c343f01d31446747dbb51e72d00decca2eb99be911e2f"}, + {file = "ninja-1.13.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:aa45b4037b313c2f698bc13306239b8b93b4680eb47e287773156ac9e9304714"}, + {file = "ninja-1.13.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5f8e1e8a1a30835eeb51db05cf5a67151ad37542f5a4af2a438e9490915e5b72"}, + {file = "ninja-1.13.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:3d7d7779d12cb20c6d054c61b702139fd23a7a964ec8f2c823f1ab1b084150db"}, + {file = "ninja-1.13.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:d741a5e6754e0bda767e3274a0f0deeef4807f1fec6c0d7921a0244018926ae5"}, + {file = "ninja-1.13.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:e8bad11f8a00b64137e9b315b137d8bb6cbf3086fbdc43bf1f90fd33324d2e96"}, + {file = "ninja-1.13.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b4f2a072db3c0f944c32793e91532d8948d20d9ab83da9c0c7c15b5768072200"}, + {file = "ninja-1.13.0-py3-none-win32.whl", hash = "sha256:8cfbb80b4a53456ae8a39f90ae3d7a2129f45ea164f43fadfa15dc38c4aef1c9"}, + {file = "ninja-1.13.0-py3-none-win_amd64.whl", hash = "sha256:fb8ee8719f8af47fed145cced4a85f0755dd55d45b2bddaf7431fa89803c5f3e"}, + {file = "ninja-1.13.0-py3-none-win_arm64.whl", hash = "sha256:3c0b40b1f0bba764644385319028650087b4c1b18cdfa6f45cb39a3669b81aa9"}, + {file = "ninja-1.13.0.tar.gz", hash = "sha256:4a40ce995ded54d9dc24f8ea37ff3bf62ad192b547f6c7126e7e25045e76f978"}, +] + +[[package]] +name = "numpy" +version = "2.2.6" +requires_python = ">=3.10" +summary = "Fundamental package for array computing in Python" +groups = ["default"] +files = [ + {file = "numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491"}, + {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a"}, + {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf"}, + {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1"}, + {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab"}, + {file = "numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47"}, + {file = "numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282"}, + {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87"}, + {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249"}, + {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49"}, + {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de"}, + {file = "numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4"}, + {file = "numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566"}, + {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f"}, + {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f"}, + {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868"}, + {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d"}, + {file = "numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd"}, + {file = "numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8"}, + {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f"}, + {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa"}, + {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571"}, + {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1"}, + {file = "numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff"}, + {file = "numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06"}, + {file = "numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd"}, +] + +[[package]] +name = "nvidia-cublas-cu12" +version = "12.6.4.1" +requires_python = ">=3" +summary = "CUBLAS native runtime libraries" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb"}, + {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:235f728d6e2a409eddf1df58d5b0921cf80cfa9e72b9f2775ccb7b4a87984668"}, + {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-win_amd64.whl", hash = "sha256:9e4fa264f4d8a4eb0cdbd34beadc029f453b3bafae02401e999cf3d5a5af75f8"}, +] + +[[package]] +name = "nvidia-cuda-cupti-cu12" +version = "12.6.80" +requires_python = ">=3" +summary = "CUDA profiling tools runtime libs." +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:166ee35a3ff1587f2490364f90eeeb8da06cd867bd5b701bf7f9a02b78bc63fc"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.whl", hash = "sha256:358b4a1d35370353d52e12f0a7d1769fc01ff74a191689d3870b2123156184c4"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6768bad6cab4f19e8292125e5f1ac8aa7d1718704012a0e3272a6f61c4bce132"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a3eff6cdfcc6a4c35db968a06fcadb061cbc7d6dde548609a941ff8701b98b73"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-win_amd64.whl", hash = "sha256:bbe6ae76e83ce5251b56e8c8e61a964f757175682bbad058b170b136266ab00a"}, +] + +[[package]] +name = "nvidia-cuda-nvrtc-cu12" +version = "12.6.77" +requires_python = ">=3" +summary = "NVRTC native runtime libraries" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5847f1d6e5b757f1d2b3991a01082a44aad6f10ab3c5c0213fa3e25bddc25a13"}, + {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53"}, + {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:f7007dbd914c56bd80ea31bc43e8e149da38f68158f423ba845fc3292684e45a"}, +] + +[[package]] +name = "nvidia-cuda-runtime-cu12" +version = "12.6.77" +requires_python = ">=3" +summary = "CUDA Runtime native Libraries" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6116fad3e049e04791c0256a9778c16237837c08b27ed8c8401e2e45de8d60cd"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d461264ecb429c84c8879a7153499ddc7b19b5f8d84c204307491989a365588e"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ba3b56a4f896141e25e19ab287cd71e52a6a0f4b29d0d31609f60e3b4d5219b7"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a84d15d5e1da416dd4774cb42edf5e954a3e60cc945698dc1d5be02321c44dc8"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:86c58044c824bf3c173c49a2dbc7a6c8b53cb4e4dca50068be0bf64e9dab3f7f"}, +] + +[[package]] +name = "nvidia-cudnn-cu12" +version = "9.5.1.17" +requires_python = ">=3" +summary = "cuDNN runtime libraries" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +dependencies = [ + "nvidia-cublas-cu12", +] +files = [ + {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9fd4584468533c61873e5fda8ca41bac3a38bcb2d12350830c69b0a96a7e4def"}, + {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2"}, + {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-win_amd64.whl", hash = "sha256:d7af0f8a4f3b4b9dbb3122f2ef553b45694ed9c384d5a75bab197b8eefb79ab8"}, +] + +[[package]] +name = "nvidia-cufft-cu12" +version = "11.3.0.4" +requires_python = ">=3" +summary = "CUFFT native runtime libraries" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +dependencies = [ + "nvidia-nvjitlink-cu12", +] +files = [ + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d16079550df460376455cba121db6564089176d9bac9e4f360493ca4741b22a6"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8510990de9f96c803a051822618d42bf6cb8f069ff3f48d93a8486efdacb48fb"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ccba62eb9cef5559abd5e0d54ceed2d9934030f51163df018532142a8ec533e5"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.whl", hash = "sha256:768160ac89f6f7b459bee747e8d175dbf53619cfe74b2a5636264163138013ca"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-win_amd64.whl", hash = "sha256:6048ebddfb90d09d2707efb1fd78d4e3a77cb3ae4dc60e19aab6be0ece2ae464"}, +] + +[[package]] +name = "nvidia-cufile-cu12" +version = "1.11.1.6" +requires_python = ">=3" +summary = "cuFile GPUDirect libraries" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159"}, + {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8f57a0051dcf2543f6dc2b98a98cb2719c37d3cee1baba8965d57f3bbc90d4db"}, +] + +[[package]] +name = "nvidia-curand-cu12" +version = "10.3.7.77" +requires_python = ">=3" +summary = "CURAND native runtime libraries" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:6e82df077060ea28e37f48a3ec442a8f47690c7499bff392a5938614b56c98d8"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:99f1a32f1ac2bd134897fc7a203f779303261268a65762a623bf30cc9fe79117"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:7b2ed8e95595c3591d984ea3603dd66fe6ce6812b886d59049988a712ed06b6e"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-win_amd64.whl", hash = "sha256:6d6d935ffba0f3d439b7cd968192ff068fafd9018dbf1b85b37261b13cfc9905"}, +] + +[[package]] +name = "nvidia-cusolver-cu12" +version = "11.7.1.2" +requires_python = ">=3" +summary = "CUDA solver native runtime libraries" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +dependencies = [ + "nvidia-cublas-cu12", + "nvidia-cusparse-cu12", + "nvidia-nvjitlink-cu12", +] +files = [ + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0ce237ef60acde1efc457335a2ddadfd7610b892d94efee7b776c64bb1cac9e0"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6cf28f17f64107a0c4d7802be5ff5537b2130bfc112f25d5a30df227058ca0e6"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dbbe4fc38ec1289c7e5230e16248365e375c3673c9c8bac5796e2e20db07f56e"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-win_amd64.whl", hash = "sha256:6813f9d8073f555444a8705f3ab0296d3e1cb37a16d694c5fc8b862a0d8706d7"}, +] + +[[package]] +name = "nvidia-cusparse-cu12" +version = "12.5.4.2" +requires_python = ">=3" +summary = "CUSPARSE native runtime libraries" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +dependencies = [ + "nvidia-nvjitlink-cu12", +] +files = [ + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d25b62fb18751758fe3c93a4a08eff08effedfe4edf1c6bb5afd0890fe88f887"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7aa32fa5470cf754f72d1116c7cbc300b4e638d3ae5304cfa4a638a5b87161b1"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7556d9eca156e18184b94947ade0fba5bb47d69cec46bf8660fd2c71a4b48b73"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:23749a6571191a215cb74d1cdbff4a86e7b19f1200c071b3fcf844a5bea23a2f"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-win_amd64.whl", hash = "sha256:4acb8c08855a26d737398cba8fb6f8f5045d93f82612b4cfd84645a2332ccf20"}, +] + +[[package]] +name = "nvidia-cusparselt-cu12" +version = "0.6.3" +summary = "NVIDIA cuSPARSELt" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8371549623ba601a06322af2133c4a44350575f5a3108fb75f3ef20b822ad5f1"}, + {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46"}, + {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-win_amd64.whl", hash = "sha256:3b325bcbd9b754ba43df5a311488fca11a6b5dc3d11df4d190c000cf1a0765c7"}, +] + +[[package]] +name = "nvidia-nccl-cu12" +version = "2.26.2" +requires_python = ">=3" +summary = "NVIDIA Collective Communication Library (NCCL) Runtime" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c196e95e832ad30fbbb50381eb3cbd1fadd5675e587a548563993609af19522"}, + {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6"}, +] + +[[package]] +name = "nvidia-nvjitlink-cu12" +version = "12.6.85" +requires_python = ">=3" +summary = "Nvidia JIT LTO Library" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a"}, + {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41"}, + {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-win_amd64.whl", hash = "sha256:e61120e52ed675747825cdd16febc6a0730537451d867ee58bee3853b1b13d1c"}, +] + +[[package]] +name = "nvidia-nvtx-cu12" +version = "12.6.77" +requires_python = ">=3" +summary = "NVIDIA Tools Extension" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +files = [ + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f44f8d86bb7d5629988d61c8d3ae61dddb2015dee142740536bc7481b022fe4b"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:adcaabb9d436c9761fca2b13959a2d237c5f9fd406c8e4b723c695409ff88059"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b90bed3df379fa79afbd21be8e04a0314336b8ae16768b58f2d34cb1d04cd7d2"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:2fb11a4af04a5e6c84073e6404d26588a34afd35379f0855a99797897efa75c0"}, +] + +[[package]] +name = "openai" +version = "2.2.0" +requires_python = ">=3.8" +summary = "The official Python library for the openai API" +groups = ["default"] +dependencies = [ + "anyio<5,>=3.5.0", + "distro<2,>=1.7.0", + "httpx<1,>=0.23.0", + "jiter<1,>=0.4.0", + "pydantic<3,>=1.9.0", + "sniffio", + "tqdm>4", + "typing-extensions<5,>=4.11", +] +files = [ + {file = "openai-2.2.0-py3-none-any.whl", hash = "sha256:d222e63436e33f3134a3d7ce490dc2d2f146fa98036eb65cc225df3ce163916f"}, + {file = "openai-2.2.0.tar.gz", hash = "sha256:bc49d077a8bf0e370eec4d038bc05e232c20855a19df0b58e5b3e5a8da7d33e0"}, +] + +[[package]] +name = "opencv-python" +version = "4.12.0.88" +requires_python = ">=3.6" +summary = "Wrapper package for OpenCV python bindings." +groups = ["default"] +dependencies = [ + "numpy<2.0; python_version < \"3.9\"", + "numpy<2.3.0,>=2; python_version >= \"3.9\"", +] +files = [ + {file = "opencv-python-4.12.0.88.tar.gz", hash = "sha256:8b738389cede219405f6f3880b851efa3415ccd674752219377353f017d2994d"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:f9a1f08883257b95a5764bf517a32d75aec325319c8ed0f89739a57fae9e92a5"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:812eb116ad2b4de43ee116fcd8991c3a687f099ada0b04e68f64899c09448e81"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:51fd981c7df6af3e8f70b1556696b05224c4e6b6777bdd2a46b3d4fb09de1a92"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:092c16da4c5a163a818f120c22c5e4a2f96e0db4f24e659c701f1fe629a690f9"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-win32.whl", hash = "sha256:ff554d3f725b39878ac6a2e1fa232ec509c36130927afc18a1719ebf4fbf4357"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl", hash = "sha256:d98edb20aa932fd8ebd276a72627dad9dc097695b3d435a4257557bbb49a79d2"}, +] + +[[package]] +name = "opencv-python-headless" +version = "4.12.0.88" +requires_python = ">=3.6" +summary = "Wrapper package for OpenCV python bindings." +groups = ["default"] +dependencies = [ + "numpy<2.0; python_version < \"3.9\"", + "numpy<2.3.0,>=2; python_version >= \"3.9\"", +] +files = [ + {file = "opencv-python-headless-4.12.0.88.tar.gz", hash = "sha256:cfdc017ddf2e59b6c2f53bc12d74b6b0be7ded4ec59083ea70763921af2b6c09"}, + {file = "opencv_python_headless-4.12.0.88-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:1e58d664809b3350c1123484dd441e1667cd7bed3086db1b9ea1b6f6cb20b50e"}, + {file = "opencv_python_headless-4.12.0.88-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:365bb2e486b50feffc2d07a405b953a8f3e8eaa63865bc650034e5c71e7a5154"}, + {file = "opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:aeb4b13ecb8b4a0beb2668ea07928160ea7c2cd2d9b5ef571bbee6bafe9cc8d0"}, + {file = "opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:236c8df54a90f4d02076e6f9c1cc763d794542e886c576a6fee46ec8ff75a7a9"}, + {file = "opencv_python_headless-4.12.0.88-cp37-abi3-win32.whl", hash = "sha256:fde2cf5c51e4def5f2132d78e0c08f9c14783cd67356922182c6845b9af87dbd"}, + {file = "opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl", hash = "sha256:86b413bdd6c6bf497832e346cd5371995de148e579b9774f8eba686dee3f5528"}, +] + +[[package]] +name = "orjson" +version = "3.11.3" +requires_python = ">=3.9" +summary = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +groups = ["default"] +files = [ + {file = "orjson-3.11.3-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9d2ae0cc6aeb669633e0124531f342a17d8e97ea999e42f12a5ad4adaa304c5f"}, + {file = "orjson-3.11.3-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:ba21dbb2493e9c653eaffdc38819b004b7b1b246fb77bfc93dc016fe664eac91"}, + {file = "orjson-3.11.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00f1a271e56d511d1569937c0447d7dce5a99a33ea0dec76673706360a051904"}, + {file = "orjson-3.11.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b67e71e47caa6680d1b6f075a396d04fa6ca8ca09aafb428731da9b3ea32a5a6"}, + {file = "orjson-3.11.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d7d012ebddffcce8c85734a6d9e5f08180cd3857c5f5a3ac70185b43775d043d"}, + {file = "orjson-3.11.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dd759f75d6b8d1b62012b7f5ef9461d03c804f94d539a5515b454ba3a6588038"}, + {file = "orjson-3.11.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6890ace0809627b0dff19cfad92d69d0fa3f089d3e359a2a532507bb6ba34efb"}, + {file = "orjson-3.11.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9d4a5e041ae435b815e568537755773d05dac031fee6a57b4ba70897a44d9d2"}, + {file = "orjson-3.11.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2d68bf97a771836687107abfca089743885fb664b90138d8761cce61d5625d55"}, + {file = "orjson-3.11.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:bfc27516ec46f4520b18ef645864cee168d2a027dbf32c5537cb1f3e3c22dac1"}, + {file = "orjson-3.11.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f66b001332a017d7945e177e282a40b6997056394e3ed7ddb41fb1813b83e824"}, + {file = "orjson-3.11.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:212e67806525d2561efbfe9e799633b17eb668b8964abed6b5319b2f1cfbae1f"}, + {file = "orjson-3.11.3-cp311-cp311-win32.whl", hash = "sha256:6e8e0c3b85575a32f2ffa59de455f85ce002b8bdc0662d6b9c2ed6d80ab5d204"}, + {file = "orjson-3.11.3-cp311-cp311-win_amd64.whl", hash = "sha256:6be2f1b5d3dc99a5ce5ce162fc741c22ba9f3443d3dd586e6a1211b7bc87bc7b"}, + {file = "orjson-3.11.3-cp311-cp311-win_arm64.whl", hash = "sha256:fafb1a99d740523d964b15c8db4eabbfc86ff29f84898262bf6e3e4c9e97e43e"}, + {file = "orjson-3.11.3-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8c752089db84333e36d754c4baf19c0e1437012242048439c7e80eb0e6426e3b"}, + {file = "orjson-3.11.3-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:9b8761b6cf04a856eb544acdd82fc594b978f12ac3602d6374a7edb9d86fd2c2"}, + {file = "orjson-3.11.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b13974dc8ac6ba22feaa867fc19135a3e01a134b4f7c9c28162fed4d615008a"}, + {file = "orjson-3.11.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f83abab5bacb76d9c821fd5c07728ff224ed0e52d7a71b7b3de822f3df04e15c"}, + {file = "orjson-3.11.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6fbaf48a744b94091a56c62897b27c31ee2da93d826aa5b207131a1e13d4064"}, + {file = "orjson-3.11.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc779b4f4bba2847d0d2940081a7b6f7b5877e05408ffbb74fa1faf4a136c424"}, + {file = "orjson-3.11.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd4b909ce4c50faa2192da6bb684d9848d4510b736b0611b6ab4020ea6fd2d23"}, + {file = "orjson-3.11.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:524b765ad888dc5518bbce12c77c2e83dee1ed6b0992c1790cc5fb49bb4b6667"}, + {file = "orjson-3.11.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:84fd82870b97ae3cdcea9d8746e592b6d40e1e4d4527835fc520c588d2ded04f"}, + {file = "orjson-3.11.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:fbecb9709111be913ae6879b07bafd4b0785b44c1eb5cac8ac76da048b3885a1"}, + {file = "orjson-3.11.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9dba358d55aee552bd868de348f4736ca5a4086d9a62e2bfbbeeb5629fe8b0cc"}, + {file = "orjson-3.11.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eabcf2e84f1d7105f84580e03012270c7e97ecb1fb1618bda395061b2a84a049"}, + {file = "orjson-3.11.3-cp312-cp312-win32.whl", hash = "sha256:3782d2c60b8116772aea8d9b7905221437fdf53e7277282e8d8b07c220f96cca"}, + {file = "orjson-3.11.3-cp312-cp312-win_amd64.whl", hash = "sha256:79b44319268af2eaa3e315b92298de9a0067ade6e6003ddaef72f8e0bedb94f1"}, + {file = "orjson-3.11.3-cp312-cp312-win_arm64.whl", hash = "sha256:0e92a4e83341ef79d835ca21b8bd13e27c859e4e9e4d7b63defc6e58462a3710"}, + {file = "orjson-3.11.3-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:af40c6612fd2a4b00de648aa26d18186cd1322330bd3a3cc52f87c699e995810"}, + {file = "orjson-3.11.3-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:9f1587f26c235894c09e8b5b7636a38091a9e6e7fe4531937534749c04face43"}, + {file = "orjson-3.11.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61dcdad16da5bb486d7227a37a2e789c429397793a6955227cedbd7252eb5a27"}, + {file = "orjson-3.11.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:11c6d71478e2cbea0a709e8a06365fa63da81da6498a53e4c4f065881d21ae8f"}, + {file = "orjson-3.11.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff94112e0098470b665cb0ed06efb187154b63649403b8d5e9aedeb482b4548c"}, + {file = "orjson-3.11.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae8b756575aaa2a855a75192f356bbda11a89169830e1439cfb1a3e1a6dde7be"}, + {file = "orjson-3.11.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c9416cc19a349c167ef76135b2fe40d03cea93680428efee8771f3e9fb66079d"}, + {file = "orjson-3.11.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b822caf5b9752bc6f246eb08124c3d12bf2175b66ab74bac2ef3bbf9221ce1b2"}, + {file = "orjson-3.11.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:414f71e3bdd5573893bf5ecdf35c32b213ed20aa15536fe2f588f946c318824f"}, + {file = "orjson-3.11.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:828e3149ad8815dc14468f36ab2a4b819237c155ee1370341b91ea4c8672d2ee"}, + {file = "orjson-3.11.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ac9e05f25627ffc714c21f8dfe3a579445a5c392a9c8ae7ba1d0e9fb5333f56e"}, + {file = "orjson-3.11.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e44fbe4000bd321d9f3b648ae46e0196d21577cf66ae684a96ff90b1f7c93633"}, + {file = "orjson-3.11.3-cp313-cp313-win32.whl", hash = "sha256:2039b7847ba3eec1f5886e75e6763a16e18c68a63efc4b029ddf994821e2e66b"}, + {file = "orjson-3.11.3-cp313-cp313-win_amd64.whl", hash = "sha256:29be5ac4164aa8bdcba5fa0700a3c9c316b411d8ed9d39ef8a882541bd452fae"}, + {file = "orjson-3.11.3-cp313-cp313-win_arm64.whl", hash = "sha256:18bd1435cb1f2857ceb59cfb7de6f92593ef7b831ccd1b9bfb28ca530e539dce"}, + {file = "orjson-3.11.3-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:cf4b81227ec86935568c7edd78352a92e97af8da7bd70bdfdaa0d2e0011a1ab4"}, + {file = "orjson-3.11.3-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:bc8bc85b81b6ac9fc4dae393a8c159b817f4c2c9dee5d12b773bddb3b95fc07e"}, + {file = "orjson-3.11.3-cp314-cp314-manylinux_2_34_aarch64.whl", hash = "sha256:88dcfc514cfd1b0de038443c7b3e6a9797ffb1b3674ef1fd14f701a13397f82d"}, + {file = "orjson-3.11.3-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:d61cd543d69715d5fc0a690c7c6f8dcc307bc23abef9738957981885f5f38229"}, + {file = "orjson-3.11.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2b7b153ed90ababadbef5c3eb39549f9476890d339cf47af563aea7e07db2451"}, + {file = "orjson-3.11.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:7909ae2460f5f494fecbcd10613beafe40381fd0316e35d6acb5f3a05bfda167"}, + {file = "orjson-3.11.3-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:2030c01cbf77bc67bee7eef1e7e31ecf28649353987775e3583062c752da0077"}, + {file = "orjson-3.11.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a0169ebd1cbd94b26c7a7ad282cf5c2744fce054133f959e02eb5265deae1872"}, + {file = "orjson-3.11.3-cp314-cp314-win32.whl", hash = "sha256:0c6d7328c200c349e3a4c6d8c83e0a5ad029bdc2d417f234152bf34842d0fc8d"}, + {file = "orjson-3.11.3-cp314-cp314-win_amd64.whl", hash = "sha256:317bbe2c069bbc757b1a2e4105b64aacd3bc78279b66a6b9e51e846e4809f804"}, + {file = "orjson-3.11.3-cp314-cp314-win_arm64.whl", hash = "sha256:e8f6a7a27d7b7bec81bd5924163e9af03d49bbb63013f107b48eb5d16db711bc"}, + {file = "orjson-3.11.3.tar.gz", hash = "sha256:1c0603b1d2ffcd43a411d64797a19556ef76958aef1c182f22dc30860152a98a"}, +] + +[[package]] +name = "packaging" +version = "25.0" +requires_python = ">=3.8" +summary = "Core utilities for Python packages" +groups = ["default", "dev"] +files = [ + {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, + {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, +] + +[[package]] +name = "pandas" +version = "2.3.3" +requires_python = ">=3.9" +summary = "Powerful data structures for data analysis, time series, and statistics" +groups = ["default"] +dependencies = [ + "numpy>=1.22.4; python_version < \"3.11\"", + "numpy>=1.23.2; python_version == \"3.11\"", + "numpy>=1.26.0; python_version >= \"3.12\"", + "python-dateutil>=2.8.2", + "pytz>=2020.1", + "tzdata>=2022.7", +] +files = [ + {file = "pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523"}, + {file = "pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45"}, + {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66"}, + {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b"}, + {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791"}, + {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151"}, + {file = "pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c"}, + {file = "pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53"}, + {file = "pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35"}, + {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908"}, + {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89"}, + {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98"}, + {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084"}, + {file = "pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b"}, + {file = "pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713"}, + {file = "pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8"}, + {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d"}, + {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac"}, + {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c"}, + {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493"}, + {file = "pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee"}, + {file = "pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5"}, + {file = "pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21"}, + {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78"}, + {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110"}, + {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86"}, + {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc"}, + {file = "pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0"}, + {file = "pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593"}, + {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c"}, + {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b"}, + {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6"}, + {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3"}, + {file = "pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5"}, + {file = "pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec"}, + {file = "pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7"}, + {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450"}, + {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5"}, + {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788"}, + {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87"}, + {file = "pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b"}, +] + +[[package]] +name = "pathspec" +version = "0.12.1" +requires_python = ">=3.8" +summary = "Utility library for gitignore style pattern matching of file paths." +groups = ["dev"] +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + +[[package]] +name = "pillow" +version = "11.3.0" +requires_python = ">=3.9" +summary = "Python Imaging Library (Fork)" +groups = ["default"] +files = [ + {file = "pillow-11.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1cd110edf822773368b396281a2293aeb91c90a2db00d78ea43e7e861631b722"}, + {file = "pillow-11.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c412fddd1b77a75aa904615ebaa6001f169b26fd467b4be93aded278266b288"}, + {file = "pillow-11.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1aa4de119a0ecac0a34a9c8bde33f34022e2e8f99104e47a3ca392fd60e37d"}, + {file = "pillow-11.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:91da1d88226663594e3f6b4b8c3c8d85bd504117d043740a8e0ec449087cc494"}, + {file = "pillow-11.3.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:643f189248837533073c405ec2f0bb250ba54598cf80e8c1e043381a60632f58"}, + {file = "pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:106064daa23a745510dabce1d84f29137a37224831d88eb4ce94bb187b1d7e5f"}, + {file = "pillow-11.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd8ff254faf15591e724dc7c4ddb6bf4793efcbe13802a4ae3e863cd300b493e"}, + {file = "pillow-11.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:932c754c2d51ad2b2271fd01c3d121daaa35e27efae2a616f77bf164bc0b3e94"}, + {file = "pillow-11.3.0-cp311-cp311-win32.whl", hash = "sha256:b4b8f3efc8d530a1544e5962bd6b403d5f7fe8b9e08227c6b255f98ad82b4ba0"}, + {file = "pillow-11.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:1a992e86b0dd7aeb1f053cd506508c0999d710a8f07b4c791c63843fc6a807ac"}, + {file = "pillow-11.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:30807c931ff7c095620fe04448e2c2fc673fcbb1ffe2a7da3fb39613489b1ddd"}, + {file = "pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4"}, + {file = "pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024"}, + {file = "pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809"}, + {file = "pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d"}, + {file = "pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149"}, + {file = "pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d"}, + {file = "pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542"}, + {file = "pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd"}, + {file = "pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30b7c02f3899d10f13d7a48163c8969e4e653f8b43416d23d13d1bbfdc93b9f8"}, + {file = "pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f"}, + {file = "pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c"}, + {file = "pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8"}, + {file = "pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2"}, + {file = "pillow-11.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:45dfc51ac5975b938e9809451c51734124e73b04d0f0ac621649821a63852e7b"}, + {file = "pillow-11.3.0-cp313-cp313-win32.whl", hash = "sha256:a4d336baed65d50d37b88ca5b60c0fa9d81e3a87d4a7930d3880d1624d5b31f3"}, + {file = "pillow-11.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bce5c4fd0921f99d2e858dc4d4d64193407e1b99478bc5cacecba2311abde51"}, + {file = "pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580"}, + {file = "pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e"}, + {file = "pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59"}, + {file = "pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe"}, + {file = "pillow-11.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83e1b0161c9d148125083a35c1c5a89db5b7054834fd4387499e06552035236c"}, + {file = "pillow-11.3.0-cp313-cp313t-win32.whl", hash = "sha256:2a3117c06b8fb646639dce83694f2f9eac405472713fcb1ae887469c0d4f6788"}, + {file = "pillow-11.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:857844335c95bea93fb39e0fa2726b4d9d758850b34075a7e3ff4f4fa3aa3b31"}, + {file = "pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e"}, + {file = "pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d9da3df5f9ea2a89b81bb6087177fb1f4d1c7146d583a3fe5c672c0d94e55e12"}, + {file = "pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0b275ff9b04df7b640c59ec5a3cb113eefd3795a8df80bac69646ef699c6981a"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0743841cabd3dba6a83f38a92672cccbd69af56e3e91777b0ee7f4dba4385632"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2465a69cf967b8b49ee1b96d76718cd98c4e925414ead59fdf75cf0fd07df673"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41742638139424703b4d01665b807c6468e23e699e8e90cffefe291c5832b027"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93efb0b4de7e340d99057415c749175e24c8864302369e05914682ba642e5d77"}, + {file = "pillow-11.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7966e38dcd0fa11ca390aed7c6f20454443581d758242023cf36fcb319b1a874"}, + {file = "pillow-11.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98a9afa7b9007c67ed84c57c9e0ad86a6000da96eaa638e4f8abe5b65ff83f0a"}, + {file = "pillow-11.3.0-cp314-cp314-win32.whl", hash = "sha256:02a723e6bf909e7cea0dac1b0e0310be9d7650cd66222a5f1c571455c0a45214"}, + {file = "pillow-11.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a418486160228f64dd9e9efcd132679b7a02a5f22c982c78b6fc7dab3fefb635"}, + {file = "pillow-11.3.0-cp314-cp314-win_arm64.whl", hash = "sha256:155658efb5e044669c08896c0c44231c5e9abcaadbc5cd3648df2f7c0b96b9a6"}, + {file = "pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:59a03cdf019efbfeeed910bf79c7c93255c3d54bc45898ac2a4140071b02b4ae"}, + {file = "pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f8a5827f84d973d8636e9dc5764af4f0cf2318d26744b3d902931701b0d46653"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ee92f2fd10f4adc4b43d07ec5e779932b4eb3dbfbc34790ada5a6669bc095aa6"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c96d333dcf42d01f47b37e0979b6bd73ec91eae18614864622d9b87bbd5bbf36"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c96f993ab8c98460cd0c001447bff6194403e8b1d7e149ade5f00594918128b"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41342b64afeba938edb034d122b2dda5db2139b9a4af999729ba8818e0056477"}, + {file = "pillow-11.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:068d9c39a2d1b358eb9f245ce7ab1b5c3246c7c8c7d9ba58cfa5b43146c06e50"}, + {file = "pillow-11.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bc6ba083b145187f648b667e05a2534ecc4b9f2784c2cbe3089e44868f2b9b"}, + {file = "pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12"}, + {file = "pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db"}, + {file = "pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7c8ec7a017ad1bd562f93dbd8505763e688d388cde6e4a010ae1486916e713e6"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:9ab6ae226de48019caa8074894544af5b53a117ccb9d3b3dcb2871464c829438"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe27fb049cdcca11f11a7bfda64043c37b30e6b91f10cb5bab275806c32f6ab3"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:465b9e8844e3c3519a983d58b80be3f668e2a7a5db97f2784e7079fbc9f9822c"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5418b53c0d59b3824d05e029669efa023bbef0f3e92e75ec8428f3799487f361"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:504b6f59505f08ae014f724b6207ff6222662aab5cc9542577fb084ed0676ac7"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c84d689db21a1c397d001aa08241044aa2069e7587b398c8cc63020390b1c1b8"}, + {file = "pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523"}, +] + +[[package]] +name = "platformdirs" +version = "4.4.0" +requires_python = ">=3.9" +summary = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +groups = ["dev"] +files = [ + {file = "platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85"}, + {file = "platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf"}, +] + +[[package]] +name = "polars" +version = "1.34.0" +requires_python = ">=3.9" +summary = "Blazingly fast DataFrame library" +groups = ["default"] +dependencies = [ + "polars-runtime-32==1.34.0", +] +files = [ + {file = "polars-1.34.0-py3-none-any.whl", hash = "sha256:40d2f357b4d9e447ad28bd2c9923e4318791a7c18eb68f31f1fbf11180f41391"}, + {file = "polars-1.34.0.tar.gz", hash = "sha256:5de5f871027db4b11bcf39215a2d6b13b4a80baf8a55c5862d4ebedfd5cd4013"}, +] + +[[package]] +name = "polars-runtime-32" +version = "1.34.0" +requires_python = ">=3.9" +summary = "Blazingly fast DataFrame library" +groups = ["default"] +files = [ + {file = "polars_runtime_32-1.34.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:2878f9951e91121afe60c25433ef270b9a221e6ebf3de5f6642346b38cab3f03"}, + {file = "polars_runtime_32-1.34.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:fbc329c7d34a924228cc5dcdbbd4696d94411a3a5b15ad8bb868634c204e1951"}, + {file = "polars_runtime_32-1.34.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93fa51d88a2d12ea996a5747aad5647d22a86cce73c80f208e61f487b10bc448"}, + {file = "polars_runtime_32-1.34.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:79e4d696392c6d8d51f4347f0b167c52eef303c9d87093c0c68e8651198735b7"}, + {file = "polars_runtime_32-1.34.0-cp39-abi3-win_amd64.whl", hash = "sha256:2501d6b29d9001ea5ea2fd9b598787e10ddf45d8c4a87c2bead75159e8a15711"}, + {file = "polars_runtime_32-1.34.0-cp39-abi3-win_arm64.whl", hash = "sha256:f9ed1765378dfe0bcd1ac5ec570dd9eab27ea728bbc980cc9a76eebc55586559"}, + {file = "polars_runtime_32-1.34.0.tar.gz", hash = "sha256:ebe6f865128a0d833f53a3f6828360761ad86d1698bceb22bef9fd999500dc1c"}, +] + +[[package]] +name = "posthog" +version = "6.7.6" +requires_python = ">=3.9" +summary = "Integrate PostHog into any python application." +groups = ["default"] +dependencies = [ + "backoff>=1.10.0", + "distro>=1.5.0", + "python-dateutil>=2.2", + "requests<3.0,>=2.7", + "six>=1.5", + "typing-extensions>=4.2.0", +] +files = [ + {file = "posthog-6.7.6-py3-none-any.whl", hash = "sha256:b09a7e65a042ec416c28874b397d3accae412a80a8b0ef3fa686fbffc99e4d4b"}, + {file = "posthog-6.7.6.tar.gz", hash = "sha256:ee5c5ad04b857d96d9b7a4f715e23916a2f206bfcf25e5a9d328a3d27664b0d3"}, +] + +[[package]] +name = "propcache" +version = "0.4.0" +requires_python = ">=3.9" +summary = "Accelerated property cache" +groups = ["default"] +files = [ + {file = "propcache-0.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6a6a36b94c09711d6397d79006ca47901539fbc602c853d794c39abd6a326549"}, + {file = "propcache-0.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:da47070e1340a1639aca6b1c18fe1f1f3d8d64d3a1f9ddc67b94475f44cd40f3"}, + {file = "propcache-0.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:de536cf796abc5b58d11c0ad56580215d231d9554ea4bb6b8b1b3bed80aa3234"}, + {file = "propcache-0.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f5c82af8e329c3cdc3e717dd3c7b2ff1a218b6de611f6ce76ee34967570a9de9"}, + {file = "propcache-0.4.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:abe04e7aa5ab2e4056fcf3255ebee2071e4a427681f76d4729519e292c46ecc1"}, + {file = "propcache-0.4.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:075ca32384294434344760fdcb95f7833e1d7cf7c4e55f0e726358140179da35"}, + {file = "propcache-0.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:626ec13592928b677f48ff5861040b604b635e93d8e2162fb638397ea83d07e8"}, + {file = "propcache-0.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:02e071548b6a376e173b0102c3f55dc16e7d055b5307d487e844c320e38cacf2"}, + {file = "propcache-0.4.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:2af6de831a26f42a3f94592964becd8d7f238551786d7525807f02e53defbd13"}, + {file = "propcache-0.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:bd6c6dba1a3b8949e08c4280071c86e38cb602f02e0ed6659234108c7a7cd710"}, + {file = "propcache-0.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:783e91595cf9b66c2deda17f2e8748ae8591aa9f7c65dcab038872bfe83c5bb1"}, + {file = "propcache-0.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c3f4b125285d354a627eb37f3ea7c13b8842c7c0d47783581d0df0e272dbf5f0"}, + {file = "propcache-0.4.0-cp311-cp311-win32.whl", hash = "sha256:71c45f02ffbb8a21040ae816ceff7f6cd749ffac29fc0f9daa42dc1a9652d577"}, + {file = "propcache-0.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:7d51f70f77950f8efafed4383865d3533eeee52d8a0dd1c35b65f24de41de4e0"}, + {file = "propcache-0.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:858eaabd2191dd0da5272993ad08a748b5d3ae1aefabea8aee619b45c2af4a64"}, + {file = "propcache-0.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:381c84a445efb8c9168f1393a5a7c566de22edc42bfe207a142fff919b37f5d9"}, + {file = "propcache-0.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5a531d29d7b873b12730972237c48b1a4e5980b98cf21b3f09fa4710abd3a8c3"}, + {file = "propcache-0.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cd6e22255ed73efeaaeb1765505a66a48a9ec9ebc919fce5ad490fe5e33b1555"}, + {file = "propcache-0.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d9a8d277dc218ddf04ec243a53ac309b1afcebe297c0526a8f82320139b56289"}, + {file = "propcache-0.4.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:399c73201d88c856a994916200d7cba41d7687096f8eb5139eb68f02785dc3f7"}, + {file = "propcache-0.4.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a1d5e474d43c238035b74ecf997f655afa67f979bae591ac838bb3fbe3076392"}, + {file = "propcache-0.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22f589652ee38de96aa58dd219335604e09666092bc250c1d9c26a55bcef9932"}, + {file = "propcache-0.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e5227da556b2939da6125cda1d5eecf9e412e58bc97b41e2f192605c3ccbb7c2"}, + {file = "propcache-0.4.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:92bc43a1ab852310721ce856f40a3a352254aa6f5e26f0fad870b31be45bba2e"}, + {file = "propcache-0.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:83ae2f5343f6f06f4c91ae530d95f56b415f768f9c401a5ee2a10459cf74370b"}, + {file = "propcache-0.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:077a32977399dc05299b16e793210341a0b511eb0a86d1796873e83ce47334cc"}, + {file = "propcache-0.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:94a278c45e6463031b5a8278e40a07edf2bcc3b5379510e22b6c1a6e6498c194"}, + {file = "propcache-0.4.0-cp312-cp312-win32.whl", hash = "sha256:4c491462e1dc80f9deb93f428aad8d83bb286de212837f58eb48e75606e7726c"}, + {file = "propcache-0.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:cdb0cecafb528ab15ed89cdfed183074d15912d046d3e304955513b50a34b907"}, + {file = "propcache-0.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:b2f29697d1110e8cdf7a39cc630498df0082d7898b79b731c1c863f77c6e8cfc"}, + {file = "propcache-0.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e2d01fd53e89cb3d71d20b8c225a8c70d84660f2d223afc7ed7851a4086afe6d"}, + {file = "propcache-0.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7dfa60953169d2531dd8ae306e9c27c5d4e5efe7a2ba77049e8afdaece062937"}, + {file = "propcache-0.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:227892597953611fce2601d49f1d1f39786a6aebc2f253c2de775407f725a3f6"}, + {file = "propcache-0.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e0a5bc019014531308fb67d86066d235daa7551baf2e00e1ea7b00531f6ea85"}, + {file = "propcache-0.4.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6ebc6e2e65c31356310ddb6519420eaa6bb8c30fbd809d0919129c89dcd70f4c"}, + {file = "propcache-0.4.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1927b78dd75fc31a7fdc76cc7039e39f3170cb1d0d9a271e60f0566ecb25211a"}, + {file = "propcache-0.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b113feeda47f908562d9a6d0e05798ad2f83d4473c0777dafa2bc7756473218"}, + {file = "propcache-0.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4596c12aa7e3bb2abf158ea8f79eb0fb4851606695d04ab846b2bb386f5690a1"}, + {file = "propcache-0.4.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:6d1f67dad8cc36e8abc2207a77f3f952ac80be7404177830a7af4635a34cbc16"}, + {file = "propcache-0.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:e6229ad15366cd8b6d6b4185c55dd48debf9ca546f91416ba2e5921ad6e210a6"}, + {file = "propcache-0.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2a4bf309d057327f1f227a22ac6baf34a66f9af75e08c613e47c4d775b06d6c7"}, + {file = "propcache-0.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c2e274f3d1cbb2ddcc7a55ce3739af0f8510edc68a7f37981b2258fa1eedc833"}, + {file = "propcache-0.4.0-cp313-cp313-win32.whl", hash = "sha256:f114a3e1f8034e2957d34043b7a317a8a05d97dfe8fddb36d9a2252c0117dbbc"}, + {file = "propcache-0.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:9ba68c57cde9c667f6b65b98bc342dfa7240b1272ffb2c24b32172ee61b6d281"}, + {file = "propcache-0.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:eb77a85253174bf73e52c968b689d64be62d71e8ac33cabef4ca77b03fb4ef92"}, + {file = "propcache-0.4.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:c0e1c218fff95a66ad9f2f83ad41a67cf4d0a3f527efe820f57bde5fda616de4"}, + {file = "propcache-0.4.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:5710b1c01472542bb024366803812ca13e8774d21381bcfc1f7ae738eeb38acc"}, + {file = "propcache-0.4.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d7f008799682e8826ce98f25e8bc43532d2cd26c187a1462499fa8d123ae054f"}, + {file = "propcache-0.4.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0596d2ae99d74ca436553eb9ce11fe4163dc742fcf8724ebe07d7cb0db679bb1"}, + {file = "propcache-0.4.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab9c1bd95ebd1689f0e24f2946c495808777e9e8df7bb3c1dfe3e9eb7f47fe0d"}, + {file = "propcache-0.4.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a8ef2ea819549ae2e8698d2ec229ae948d7272feea1cb2878289f767b6c585a4"}, + {file = "propcache-0.4.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:71a400b2f0b079438cc24f9a27f02eff24d8ef78f2943f949abc518b844ade3d"}, + {file = "propcache-0.4.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4c2735d3305e6cecab6e53546909edf407ad3da5b9eeaf483f4cf80142bb21be"}, + {file = "propcache-0.4.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:72b51340047ac43b3cf388eebd362d052632260c9f73a50882edbb66e589fd44"}, + {file = "propcache-0.4.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:184c779363740d6664982ad05699f378f7694220e2041996f12b7c2a4acdcad0"}, + {file = "propcache-0.4.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:a60634a9de41f363923c6adfb83105d39e49f7a3058511563ed3de6748661af6"}, + {file = "propcache-0.4.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c9b8119244d122241a9c4566bce49bb20408a6827044155856735cf14189a7da"}, + {file = "propcache-0.4.0-cp313-cp313t-win32.whl", hash = "sha256:515b610a364c8cdd2b72c734cc97dece85c416892ea8d5c305624ac8734e81db"}, + {file = "propcache-0.4.0-cp313-cp313t-win_amd64.whl", hash = "sha256:7ea86eb32e74f9902df57e8608e8ac66f1e1e1d24d1ed2ddeb849888413b924d"}, + {file = "propcache-0.4.0-cp313-cp313t-win_arm64.whl", hash = "sha256:c1443fa4bb306461a3a8a52b7de0932a2515b100ecb0ebc630cc3f87d451e0a9"}, + {file = "propcache-0.4.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:de8e310d24b5a61de08812dd70d5234da1458d41b059038ee7895a9e4c8cae79"}, + {file = "propcache-0.4.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:55a54de5266bc44aa274915cdf388584fa052db8748a869e5500ab5993bac3f4"}, + {file = "propcache-0.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:88d50d662c917ec2c9d3858920aa7b9d5bfb74ab9c51424b775ccbe683cb1b4e"}, + {file = "propcache-0.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae3adf88a66f5863cf79394bc359da523bb27a2ed6ba9898525a6a02b723bfc5"}, + {file = "propcache-0.4.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7f088e21d15b3abdb9047e4b7b7a0acd79bf166893ac2b34a72ab1062feb219e"}, + {file = "propcache-0.4.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a4efbaf10793fd574c76a5732c75452f19d93df6e0f758c67dd60552ebd8614b"}, + {file = "propcache-0.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:681a168d06284602d56e97f09978057aa88bcc4177352b875b3d781df4efd4cb"}, + {file = "propcache-0.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a7f06f077fc4ef37e8a37ca6bbb491b29e29db9fb28e29cf3896aad10dbd4137"}, + {file = "propcache-0.4.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:082a643479f49a6778dcd68a80262fc324b14fd8e9b1a5380331fe41adde1738"}, + {file = "propcache-0.4.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:26692850120241a99bb4a4eec675cd7b4fdc431144f0d15ef69f7f8599f6165f"}, + {file = "propcache-0.4.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:33ad7d37b9a386f97582f5d042cc7b8d4b3591bb384cf50866b749a17e4dba90"}, + {file = "propcache-0.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1e7fd82d4a5b7583588f103b0771e43948532f1292105f13ee6f3b300933c4ca"}, + {file = "propcache-0.4.0-cp314-cp314-win32.whl", hash = "sha256:213eb0d3bc695a70cffffe11a1c2e1c2698d89ffd8dba35a49bc44a035d45c93"}, + {file = "propcache-0.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:087e2d3d7613e1b59b2ffca0daabd500c1a032d189c65625ee05ea114afcad0b"}, + {file = "propcache-0.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:94b0f7407d18001dbdcbb239512e753b1b36725a6e08a4983be1c948f5435f79"}, + {file = "propcache-0.4.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:b730048ae8b875e2c0af1a09ca31b303fc7b5ed27652beec03fa22b29545aec9"}, + {file = "propcache-0.4.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f495007ada16a4e16312b502636fafff42a9003adf1d4fb7541e0a0870bc056f"}, + {file = "propcache-0.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:659a0ea6d9017558ed7af00fb4028186f64d0ba9adfc70a4d2c85fcd3d026321"}, + {file = "propcache-0.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d74aa60b1ec076d4d5dcde27c9a535fc0ebb12613f599681c438ca3daa68acac"}, + {file = "propcache-0.4.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:34000e31795bdcda9826e0e70e783847a42e3dcd0d6416c5d3cb717905ebaec0"}, + {file = "propcache-0.4.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bcb5bfac5b9635e6fc520c8af6efc7a0a56f12a1fe9e9d3eb4328537e316dd6a"}, + {file = "propcache-0.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ea11fceb31fa95b0fa2007037f19e922e2caceb7dc6c6cac4cb56e2d291f1a2"}, + {file = "propcache-0.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:cd8684f628fe285ea5c86f88e1c30716239dc9d6ac55e7851a4b7f555b628da3"}, + {file = "propcache-0.4.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:790286d3d542c0ef9f6d0280d1049378e5e776dcba780d169298f664c39394db"}, + {file = "propcache-0.4.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:009093c9b5dbae114a5958e6a649f8a5d94dd6866b0f82b60395eb92c58002d4"}, + {file = "propcache-0.4.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:728d98179e92d77096937fdfecd2c555a3d613abe56c9909165c24196a3b5012"}, + {file = "propcache-0.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a9725d96a81e17e48a0fe82d0c3de2f5e623d7163fec70a6c7df90753edd1bec"}, + {file = "propcache-0.4.0-cp314-cp314t-win32.whl", hash = "sha256:0964c55c95625193defeb4fd85f8f28a9a754ed012cab71127d10e3dc66b1373"}, + {file = "propcache-0.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:24403152e41abf09488d3ae9c0c3bf7ff93e2fb12b435390718f21810353db28"}, + {file = "propcache-0.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0363a696a9f24b37a04ed5e34c2e07ccbe92798c998d37729551120a1bb744c4"}, + {file = "propcache-0.4.0-py3-none-any.whl", hash = "sha256:015b2ca2f98ea9e08ac06eecc409d5d988f78c5fd5821b2ad42bc9afcd6b1557"}, + {file = "propcache-0.4.0.tar.gz", hash = "sha256:c1ad731253eb738f9cadd9fa1844e019576c70bca6a534252e97cf33a57da529"}, +] + +[[package]] +name = "protobuf" +version = "6.32.1" +requires_python = ">=3.9" +summary = "" +groups = ["default"] +marker = "sys_platform == \"darwin\"" +files = [ + {file = "protobuf-6.32.1-cp310-abi3-win32.whl", hash = "sha256:a8a32a84bc9f2aad712041b8b366190f71dde248926da517bde9e832e4412085"}, + {file = "protobuf-6.32.1-cp310-abi3-win_amd64.whl", hash = "sha256:b00a7d8c25fa471f16bc8153d0e53d6c9e827f0953f3c09aaa4331c718cae5e1"}, + {file = "protobuf-6.32.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8c7e6eb619ffdf105ee4ab76af5a68b60a9d0f66da3ea12d1640e6d8dab7281"}, + {file = "protobuf-6.32.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:2f5b80a49e1eb7b86d85fcd23fe92df154b9730a725c3b38c4e43b9d77018bf4"}, + {file = "protobuf-6.32.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:b1864818300c297265c83a4982fd3169f97122c299f56a56e2445c3698d34710"}, + {file = "protobuf-6.32.1-py3-none-any.whl", hash = "sha256:2601b779fc7d32a866c6b4404f9d42a3f67c5b9f3f15b4db3cccabe06b95c346"}, + {file = "protobuf-6.32.1.tar.gz", hash = "sha256:ee2469e4a021474ab9baafea6cd070e5bf27c7d29433504ddea1a4ee5850f68d"}, +] + +[[package]] +name = "psutil" +version = "7.1.0" +requires_python = ">=3.6" +summary = "Cross-platform lib for process and system monitoring." +groups = ["default"] +files = [ + {file = "psutil-7.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:76168cef4397494250e9f4e73eb3752b146de1dd950040b29186d0cce1d5ca13"}, + {file = "psutil-7.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:5d007560c8c372efdff9e4579c2846d71de737e4605f611437255e81efcca2c5"}, + {file = "psutil-7.1.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22e4454970b32472ce7deaa45d045b34d3648ce478e26a04c7e858a0a6e75ff3"}, + {file = "psutil-7.1.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c70e113920d51e89f212dd7be06219a9b88014e63a4cec69b684c327bc474e3"}, + {file = "psutil-7.1.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d4a113425c037300de3ac8b331637293da9be9713855c4fc9d2d97436d7259d"}, + {file = "psutil-7.1.0-cp37-abi3-win32.whl", hash = "sha256:09ad740870c8d219ed8daae0ad3b726d3bf9a028a198e7f3080f6a1888b99bca"}, + {file = "psutil-7.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:57f5e987c36d3146c0dd2528cd42151cf96cd359b9d67cfff836995cc5df9a3d"}, + {file = "psutil-7.1.0-cp37-abi3-win_arm64.whl", hash = "sha256:6937cb68133e7c97b6cc9649a570c9a18ba0efebed46d8c5dae4c07fa1b67a07"}, + {file = "psutil-7.1.0.tar.gz", hash = "sha256:655708b3c069387c8b77b072fc429a57d0e214221d01c0a772df7dfedcb3bcd2"}, +] + +[[package]] +name = "pyarrow" +version = "21.0.0" +requires_python = ">=3.9" +summary = "Python library for Apache Arrow" +groups = ["default"] +marker = "sys_platform == \"darwin\"" +files = [ + {file = "pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c077f48aab61738c237802836fc3844f85409a46015635198761b0d6a688f87b"}, + {file = "pyarrow-21.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:689f448066781856237eca8d1975b98cace19b8dd2ab6145bf49475478bcaa10"}, + {file = "pyarrow-21.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:479ee41399fcddc46159a551705b89c05f11e8b8cb8e968f7fec64f62d91985e"}, + {file = "pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40ebfcb54a4f11bcde86bc586cbd0272bac0d516cfa539c799c2453768477569"}, + {file = "pyarrow-21.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8d58d8497814274d3d20214fbb24abcad2f7e351474357d552a8d53bce70c70e"}, + {file = "pyarrow-21.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:585e7224f21124dd57836b1530ac8f2df2afc43c861d7bf3d58a4870c42ae36c"}, + {file = "pyarrow-21.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:555ca6935b2cbca2c0e932bedd853e9bc523098c39636de9ad4693b5b1df86d6"}, + {file = "pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd"}, + {file = "pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876"}, + {file = "pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d"}, + {file = "pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e"}, + {file = "pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82"}, + {file = "pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623"}, + {file = "pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18"}, + {file = "pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a"}, + {file = "pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe"}, + {file = "pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd"}, + {file = "pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61"}, + {file = "pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d"}, + {file = "pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99"}, + {file = "pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636"}, + {file = "pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da"}, + {file = "pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7"}, + {file = "pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6"}, + {file = "pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8"}, + {file = "pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503"}, + {file = "pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79"}, + {file = "pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10"}, + {file = "pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc"}, +] + +[[package]] +name = "pyclipper" +version = "1.3.0.post6" +summary = "Cython wrapper for the C++ translation of the Angus Johnson's Clipper library (ver. 6.4.2)" +groups = ["default"] +files = [ + {file = "pyclipper-1.3.0.post6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4247e7c44b34c87acbf38f99d48fb1acaf5da4a2cf4dcd601a9b24d431be4ef"}, + {file = "pyclipper-1.3.0.post6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:851b3e58106c62a5534a1201295fe20c21714dee2eda68081b37ddb0367e6caa"}, + {file = "pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16cc1705a915896d2aff52131c427df02265631279eac849ebda766432714cc0"}, + {file = "pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ace1f0753cf71c5c5f6488b8feef5dd0fa8b976ad86b24bb51f708f513df4aac"}, + {file = "pyclipper-1.3.0.post6-cp311-cp311-win32.whl", hash = "sha256:dbc828641667142751b1127fd5c4291663490cf05689c85be4c5bcc89aaa236a"}, + {file = "pyclipper-1.3.0.post6-cp311-cp311-win_amd64.whl", hash = "sha256:1c03f1ae43b18ee07730c3c774cc3cf88a10c12a4b097239b33365ec24a0a14a"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:6363b9d79ba1b5d8f32d1623e797c1e9f994600943402e68d5266067bdde173e"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:32cd7fb9c1c893eb87f82a072dbb5e26224ea7cebbad9dc306d67e1ac62dd229"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3aab10e3c10ed8fa60c608fb87c040089b83325c937f98f06450cf9fcfdaf1d"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58eae2ff92a8cae1331568df076c4c5775bf946afab0068b217f0cf8e188eb3c"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-win32.whl", hash = "sha256:793b0aa54b914257aa7dc76b793dd4dcfb3c84011d48df7e41ba02b571616eaf"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-win_amd64.whl", hash = "sha256:d3f9da96f83b8892504923beb21a481cd4516c19be1d39eb57a92ef1c9a29548"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f129284d2c7bcd213d11c0f35e1ae506a1144ce4954e9d1734d63b120b0a1b58"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:188fbfd1d30d02247f92c25ce856f5f3c75d841251f43367dbcf10935bc48f38"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6d129d0c2587f2f5904d201a4021f859afbb45fada4261c9fdedb2205b09d23"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c9c80b5c46eef38ba3f12dd818dc87f5f2a0853ba914b6f91b133232315f526"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-win32.whl", hash = "sha256:b15113ec4fc423b58e9ae80aa95cf5a0802f02d8f02a98a46af3d7d66ff0cc0e"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-win_amd64.whl", hash = "sha256:e5ff68fa770ac654c7974fc78792978796f068bd274e95930c0691c31e192889"}, + {file = "pyclipper-1.3.0.post6.tar.gz", hash = "sha256:42bff0102fa7a7f2abdd795a2594654d62b786d0c6cd67b72d469114fdeb608c"}, +] + +[[package]] +name = "pycparser" +version = "2.23" +requires_python = ">=3.8" +summary = "C parser in Python" +groups = ["default"] +marker = "implementation_name != \"PyPy\" and sys_platform == \"darwin\"" +files = [ + {file = "pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934"}, + {file = "pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2"}, +] + +[[package]] +name = "pydantic" +version = "2.11.10" +requires_python = ">=3.9" +summary = "Data validation using Python type hints" +groups = ["default"] +dependencies = [ + "annotated-types>=0.6.0", + "pydantic-core==2.33.2", + "typing-extensions>=4.12.2", + "typing-inspection>=0.4.0", +] +files = [ + {file = "pydantic-2.11.10-py3-none-any.whl", hash = "sha256:802a655709d49bd004c31e865ef37da30b540786a46bfce02333e0e24b5fe29a"}, + {file = "pydantic-2.11.10.tar.gz", hash = "sha256:dc280f0982fbda6c38fada4e476dc0a4f3aeaf9c6ad4c28df68a666ec3c61423"}, +] + +[[package]] +name = "pydantic-core" +version = "2.33.2" +requires_python = ">=3.9" +summary = "Core functionality for Pydantic validation and serialization" +groups = ["default"] +dependencies = [ + "typing-extensions!=4.7.0,>=4.6.0", +] +files = [ + {file = "pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7"}, + {file = "pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65"}, + {file = "pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc"}, + {file = "pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab"}, + {file = "pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f"}, + {file = "pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1"}, + {file = "pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc"}, +] + +[[package]] +name = "pydantic-settings" +version = "2.11.0" +requires_python = ">=3.9" +summary = "Settings management using Pydantic" +groups = ["default"] +dependencies = [ + "pydantic>=2.7.0", + "python-dotenv>=0.21.0", + "typing-inspection>=0.4.0", +] +files = [ + {file = "pydantic_settings-2.11.0-py3-none-any.whl", hash = "sha256:fe2cea3413b9530d10f3a5875adffb17ada5c1e1bab0b2885546d7310415207c"}, + {file = "pydantic_settings-2.11.0.tar.gz", hash = "sha256:d0e87a1c7d33593beb7194adb8470fc426e95ba02af83a0f23474a04c9a08180"}, +] + +[[package]] +name = "pydub" +version = "0.25.1" +summary = "Manipulate audio with an simple and easy high level interface" +groups = ["default"] +files = [ + {file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"}, + {file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"}, +] + +[[package]] +name = "pygments" +version = "2.19.2" +requires_python = ">=3.8" +summary = "Pygments is a syntax highlighting package written in Python." +groups = ["default"] +files = [ + {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, + {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, +] + +[[package]] +name = "pyparsing" +version = "3.2.5" +requires_python = ">=3.9" +summary = "pyparsing - Classes and methods to define and execute parsing grammars" +groups = ["default"] +files = [ + {file = "pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e"}, + {file = "pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6"}, +] + +[[package]] +name = "python-bidi" +version = "0.6.6" +summary = "Python Bidi layout wrapping the Rust crate unicode-bidi" +groups = ["default"] +files = [ + {file = "python_bidi-0.6.6-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:da4949496e563b51f53ff34aad5a9f4c3aaf06f4180cf3bcb42bec649486c8f1"}, + {file = "python_bidi-0.6.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c48a755ca8ba3f2b242d6795d4a60e83ca580cc4fa270a3aaa8af05d93b7ba7f"}, + {file = "python_bidi-0.6.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76a1cd320993ba3e91a567e97f057a03f2c6b493096b3fff8b5630f51a38e7eb"}, + {file = "python_bidi-0.6.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e8bf3e396f9ebe8f4f81e92fa4c98c50160d60c58964b89c8ff4ee0c482befaa"}, + {file = "python_bidi-0.6.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2a49b506ed21f762ebf332de6de689bc4912e24dcc3b85f120b34e5f01e541a"}, + {file = "python_bidi-0.6.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3428331e7ce0d58c15b5a57e18a43a12e28f8733086066e6fd75b0ded80e1cae"}, + {file = "python_bidi-0.6.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35adfb9fed3e72b9043a5c00b6ab69e4b33d53d2d8f8b9f60d4df700f77bc2c0"}, + {file = "python_bidi-0.6.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:589c5b24a8c4b5e07a1e97654020734bf16ed01a4353911ab663a37aaf1c281d"}, + {file = "python_bidi-0.6.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:994534e47260d712c3b3291a6ab55b46cdbfd78a879ef95d14b27bceebfd4049"}, + {file = "python_bidi-0.6.6-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:00622f54a80826a918b22a2d6d5481bb3f669147e17bac85c81136b6ffbe7c06"}, + {file = "python_bidi-0.6.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:965e6f2182e7b9352f2d79221f6c49502a307a9778d7d87d82dc36bb1ffecbab"}, + {file = "python_bidi-0.6.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:53d7d3a550d176df99dd0bb0cc2da16b40634f11c8b9f5715777441d679c0a62"}, + {file = "python_bidi-0.6.6-cp311-cp311-win32.whl", hash = "sha256:b271cd05cb40f47eb4600de79a8e47f8579d81ce35f5650b39b7860d018c3ece"}, + {file = "python_bidi-0.6.6-cp311-cp311-win_amd64.whl", hash = "sha256:4ff1eba0ff87e04bd35d7e164203ad6e5ce19f0bac0bdf673134c0b78d919608"}, + {file = "python_bidi-0.6.6-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:166060a31c10aa3ffadd52cf10a3c9c2b8d78d844e0f2c5801e2ed511d3ec316"}, + {file = "python_bidi-0.6.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8706addd827840c2c3b3a9963060d9b979b43801cc9be982efa9644facd3ed26"}, + {file = "python_bidi-0.6.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69c02316a4f72a168ea6f66b90d845086e2f2d2de6b08eb32c576db36582177c"}, + {file = "python_bidi-0.6.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a525bcb77b8edbfdcf8b199dbed24556e6d1436af8f5fa392f6cdc93ed79b4af"}, + {file = "python_bidi-0.6.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bb186c8da4bdc953893504bba93f41d5b412fd767ba5661ff606f22950ec609"}, + {file = "python_bidi-0.6.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25fa21b46dc80ac7099d2dee424b634eb1f76b2308d518e505a626c55cdbf7b1"}, + {file = "python_bidi-0.6.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b31f5562839e7ecea881ba337f9d39716e2e0e6b3ba395e824620ee5060050ff"}, + {file = "python_bidi-0.6.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fb750d3d5ac028e8afd62d000928a2110dbca012fee68b1a325a38caa03dc50b"}, + {file = "python_bidi-0.6.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8b5f648ee8e9f4ac0400f71e671934b39837d7031496e0edde867a303344d758"}, + {file = "python_bidi-0.6.6-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c4c0255940e6ff98fb05f9d5de3ffcaab7b60d821d4ca072b50c4f871b036562"}, + {file = "python_bidi-0.6.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e7e36601edda15e67527560b1c00108b0d27831260b6b251cf7c6dd110645c03"}, + {file = "python_bidi-0.6.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:07c9f000671b187319bacebb9e98d8b75005ccd16aa41b9d4411e66813c467bb"}, + {file = "python_bidi-0.6.6-cp312-cp312-win32.whl", hash = "sha256:57c0ca449a116c4f804422111b3345281c4e69c733c4556fa216644ec9907078"}, + {file = "python_bidi-0.6.6-cp312-cp312-win_amd64.whl", hash = "sha256:f60afe457a37bd908fdc7b520c07620b1a7cc006e08b6e3e70474025b4f5e5c7"}, + {file = "python_bidi-0.6.6-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:61cf12f6b7d0b9bb37838a5f045e6acbd91e838b57f0369c55319bb3969ffa4d"}, + {file = "python_bidi-0.6.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:33bd0ba5eedf18315a1475ac0f215b5134e48011b7320aedc2fb97df31d4e5bf"}, + {file = "python_bidi-0.6.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c9f798dd49b24bb1a9d90f065ef25c7bffa94c04c554f1fc02d0aea0a9b10b0"}, + {file = "python_bidi-0.6.6-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:43a0409570c618d93706dc875b1d33b4adfe67144f6f2ebeb32d85d8bbdb85ed"}, + {file = "python_bidi-0.6.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ada1aecd32773c61b16f7c9f74d9ec1b57ea433e2083e08ca387c5cd4b0ceaed"}, + {file = "python_bidi-0.6.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:125a815f2b20313a2f6d331aa84abdd07de7d270985b056e6729390a4cda90df"}, + {file = "python_bidi-0.6.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:183fee39bd2de787f632376bd5ba0d5f1daf6a09d3ebfaa211df25d62223e531"}, + {file = "python_bidi-0.6.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c4e08753d32d633f5ecb5eb02624272eeffaa6d5c6f4f9ddf012637bcaabfc0a"}, + {file = "python_bidi-0.6.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d1dcd7a82ae00b86821fce627e310791f56da90924f15877cfda844e340679de"}, + {file = "python_bidi-0.6.6-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:5506ba56380140b3cb3504029de014d21eb8874c5e081d88495f8775f6ed90bc"}, + {file = "python_bidi-0.6.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:207b0a7082ec38045910d37700a0dd73c10d4ffccb22a4fd0391d7e9ce241672"}, + {file = "python_bidi-0.6.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:686642a52acdeffb1d9a593a284d07b175c63877c596fa3ccceeb2649ced1dd8"}, + {file = "python_bidi-0.6.6-cp313-cp313-win32.whl", hash = "sha256:485f2ee109e7aa73efc165b90a6d90da52546801413540c08b7133fe729d5e0a"}, + {file = "python_bidi-0.6.6-cp313-cp313-win_amd64.whl", hash = "sha256:63f7a9eaec31078e7611ab958b6e18e796c05b63ca50c1f7298311dc1e15ac3e"}, + {file = "python_bidi-0.6.6.tar.gz", hash = "sha256:07db4c7da502593bd6e39c07b3a38733704070de0cbf92a7b7277b7be8867dd9"}, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +summary = "Extensions to the standard Python datetime module" +groups = ["default"] +dependencies = [ + "six>=1.5", +] +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[[package]] +name = "python-dotenv" +version = "1.1.1" +requires_python = ">=3.9" +summary = "Read key-value pairs from a .env file and set them as environment variables" +groups = ["default"] +files = [ + {file = "python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc"}, + {file = "python_dotenv-1.1.1.tar.gz", hash = "sha256:a8a6399716257f45be6a007360200409fce5cda2661e3dec71d23dc15f6189ab"}, +] + +[[package]] +name = "python-multipart" +version = "0.0.20" +requires_python = ">=3.8" +summary = "A streaming multipart parser for Python" +groups = ["default"] +files = [ + {file = "python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104"}, + {file = "python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13"}, +] + +[[package]] +name = "pytokens" +version = "0.1.10" +requires_python = ">=3.8" +summary = "A Fast, spec compliant Python 3.12+ tokenizer that runs on older Pythons." +groups = ["dev"] +files = [ + {file = "pytokens-0.1.10-py3-none-any.whl", hash = "sha256:db7b72284e480e69fb085d9f251f66b3d2df8b7166059261258ff35f50fb711b"}, + {file = "pytokens-0.1.10.tar.gz", hash = "sha256:c9a4bfa0be1d26aebce03e6884ba454e842f186a59ea43a6d3b25af58223c044"}, +] + +[[package]] +name = "pytz" +version = "2025.2" +summary = "World timezone definitions, modern and historical" +groups = ["default"] +files = [ + {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, + {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, +] + +[[package]] +name = "pywin32" +version = "311" +summary = "Python for Window Extensions" +groups = ["default"] +marker = "sys_platform == \"win32\"" +files = [ + {file = "pywin32-311-cp311-cp311-win32.whl", hash = "sha256:184eb5e436dea364dcd3d2316d577d625c0351bf237c4e9a5fabbcfa5a58b151"}, + {file = "pywin32-311-cp311-cp311-win_amd64.whl", hash = "sha256:3ce80b34b22b17ccbd937a6e78e7225d80c52f5ab9940fe0506a1a16f3dab503"}, + {file = "pywin32-311-cp311-cp311-win_arm64.whl", hash = "sha256:a733f1388e1a842abb67ffa8e7aad0e70ac519e09b0f6a784e65a136ec7cefd2"}, + {file = "pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31"}, + {file = "pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067"}, + {file = "pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852"}, + {file = "pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d"}, + {file = "pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d"}, + {file = "pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a"}, + {file = "pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee"}, + {file = "pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87"}, + {file = "pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +requires_python = ">=3.8" +summary = "YAML parser and emitter for Python" +groups = ["default"] +files = [ + {file = "pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e"}, + {file = "pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d"}, + {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a"}, + {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4"}, + {file = "pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b"}, + {file = "pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf"}, + {file = "pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196"}, + {file = "pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea"}, + {file = "pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be"}, + {file = "pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b"}, + {file = "pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f"}, +] + +[[package]] +name = "referencing" +version = "0.36.2" +requires_python = ">=3.9" +summary = "JSON Referencing + Python" +groups = ["default"] +dependencies = [ + "attrs>=22.2.0", + "rpds-py>=0.7.0", + "typing-extensions>=4.4.0; python_version < \"3.13\"", +] +files = [ + {file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"}, + {file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"}, +] + +[[package]] +name = "regex" +version = "2025.9.18" +requires_python = ">=3.9" +summary = "Alternative regular expression module, to replace re." +groups = ["default"] +files = [ + {file = "regex-2025.9.18-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:51076980cd08cd13c88eb7365427ae27f0d94e7cebe9ceb2bb9ffdae8fc4d82a"}, + {file = "regex-2025.9.18-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:828446870bd7dee4e0cbeed767f07961aa07f0ea3129f38b3ccecebc9742e0b8"}, + {file = "regex-2025.9.18-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c28821d5637866479ec4cc23b8c990f5bc6dd24e5e4384ba4a11d38a526e1414"}, + {file = "regex-2025.9.18-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:726177ade8e481db669e76bf99de0b278783be8acd11cef71165327abd1f170a"}, + {file = "regex-2025.9.18-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f5cca697da89b9f8ea44115ce3130f6c54c22f541943ac8e9900461edc2b8bd4"}, + {file = "regex-2025.9.18-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dfbde38f38004703c35666a1e1c088b778e35d55348da2b7b278914491698d6a"}, + {file = "regex-2025.9.18-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f2f422214a03fab16bfa495cfec72bee4aaa5731843b771860a471282f1bf74f"}, + {file = "regex-2025.9.18-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a295916890f4df0902e4286bc7223ee7f9e925daa6dcdec4192364255b70561a"}, + {file = "regex-2025.9.18-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:5db95ff632dbabc8c38c4e82bf545ab78d902e81160e6e455598014f0abe66b9"}, + {file = "regex-2025.9.18-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fb967eb441b0f15ae610b7069bdb760b929f267efbf522e814bbbfffdf125ce2"}, + {file = "regex-2025.9.18-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f04d2f20da4053d96c08f7fde6e1419b7ec9dbcee89c96e3d731fca77f411b95"}, + {file = "regex-2025.9.18-cp311-cp311-win32.whl", hash = "sha256:895197241fccf18c0cea7550c80e75f185b8bd55b6924fcae269a1a92c614a07"}, + {file = "regex-2025.9.18-cp311-cp311-win_amd64.whl", hash = "sha256:7e2b414deae99166e22c005e154a5513ac31493db178d8aec92b3269c9cce8c9"}, + {file = "regex-2025.9.18-cp311-cp311-win_arm64.whl", hash = "sha256:fb137ec7c5c54f34a25ff9b31f6b7b0c2757be80176435bf367111e3f71d72df"}, + {file = "regex-2025.9.18-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:436e1b31d7efd4dcd52091d076482031c611dde58bf9c46ca6d0a26e33053a7e"}, + {file = "regex-2025.9.18-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c190af81e5576b9c5fdc708f781a52ff20f8b96386c6e2e0557a78402b029f4a"}, + {file = "regex-2025.9.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e4121f1ce2b2b5eec4b397cc1b277686e577e658d8f5870b7eb2d726bd2300ab"}, + {file = "regex-2025.9.18-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:300e25dbbf8299d87205e821a201057f2ef9aa3deb29caa01cd2cac669e508d5"}, + {file = "regex-2025.9.18-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7b47fcf9f5316c0bdaf449e879407e1b9937a23c3b369135ca94ebc8d74b1742"}, + {file = "regex-2025.9.18-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:57a161bd3acaa4b513220b49949b07e252165e6b6dc910ee7617a37ff4f5b425"}, + {file = "regex-2025.9.18-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f130c3a7845ba42de42f380fff3c8aebe89a810747d91bcf56d40a069f15352"}, + {file = "regex-2025.9.18-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5f96fa342b6f54dcba928dd452e8d8cb9f0d63e711d1721cd765bb9f73bb048d"}, + {file = "regex-2025.9.18-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0f0d676522d68c207828dcd01fb6f214f63f238c283d9f01d85fc664c7c85b56"}, + {file = "regex-2025.9.18-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:40532bff8a1a0621e7903ae57fce88feb2e8a9a9116d341701302c9302aef06e"}, + {file = "regex-2025.9.18-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:039f11b618ce8d71a1c364fdee37da1012f5a3e79b1b2819a9f389cd82fd6282"}, + {file = "regex-2025.9.18-cp312-cp312-win32.whl", hash = "sha256:e1dd06f981eb226edf87c55d523131ade7285137fbde837c34dc9d1bf309f459"}, + {file = "regex-2025.9.18-cp312-cp312-win_amd64.whl", hash = "sha256:3d86b5247bf25fa3715e385aa9ff272c307e0636ce0c9595f64568b41f0a9c77"}, + {file = "regex-2025.9.18-cp312-cp312-win_arm64.whl", hash = "sha256:032720248cbeeae6444c269b78cb15664458b7bb9ed02401d3da59fe4d68c3a5"}, + {file = "regex-2025.9.18-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2a40f929cd907c7e8ac7566ac76225a77701a6221bca937bdb70d56cb61f57b2"}, + {file = "regex-2025.9.18-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c90471671c2cdf914e58b6af62420ea9ecd06d1554d7474d50133ff26ae88feb"}, + {file = "regex-2025.9.18-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a351aff9e07a2dabb5022ead6380cff17a4f10e4feb15f9100ee56c4d6d06af"}, + {file = "regex-2025.9.18-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc4b8e9d16e20ddfe16430c23468a8707ccad3365b06d4536142e71823f3ca29"}, + {file = "regex-2025.9.18-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4b8cdbddf2db1c5e80338ba2daa3cfa3dec73a46fff2a7dda087c8efbf12d62f"}, + {file = "regex-2025.9.18-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a276937d9d75085b2c91fb48244349c6954f05ee97bba0963ce24a9d915b8b68"}, + {file = "regex-2025.9.18-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92a8e375ccdc1256401c90e9dc02b8642894443d549ff5e25e36d7cf8a80c783"}, + {file = "regex-2025.9.18-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0dc6893b1f502d73037cf807a321cdc9be29ef3d6219f7970f842475873712ac"}, + {file = "regex-2025.9.18-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:a61e85bfc63d232ac14b015af1261f826260c8deb19401c0597dbb87a864361e"}, + {file = "regex-2025.9.18-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1ef86a9ebc53f379d921fb9a7e42b92059ad3ee800fcd9e0fe6181090e9f6c23"}, + {file = "regex-2025.9.18-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d3bc882119764ba3a119fbf2bd4f1b47bc56c1da5d42df4ed54ae1e8e66fdf8f"}, + {file = "regex-2025.9.18-cp313-cp313-win32.whl", hash = "sha256:3810a65675845c3bdfa58c3c7d88624356dd6ee2fc186628295e0969005f928d"}, + {file = "regex-2025.9.18-cp313-cp313-win_amd64.whl", hash = "sha256:16eaf74b3c4180ede88f620f299e474913ab6924d5c4b89b3833bc2345d83b3d"}, + {file = "regex-2025.9.18-cp313-cp313-win_arm64.whl", hash = "sha256:4dc98ba7dd66bd1261927a9f49bd5ee2bcb3660f7962f1ec02617280fc00f5eb"}, + {file = "regex-2025.9.18-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:fe5d50572bc885a0a799410a717c42b1a6b50e2f45872e2b40f4f288f9bce8a2"}, + {file = "regex-2025.9.18-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b9d9a2d6cda6621551ca8cf7a06f103adf72831153f3c0d982386110870c4d3"}, + {file = "regex-2025.9.18-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:13202e4c4ac0ef9a317fff817674b293c8f7e8c68d3190377d8d8b749f566e12"}, + {file = "regex-2025.9.18-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:874ff523b0fecffb090f80ae53dc93538f8db954c8bb5505f05b7787ab3402a0"}, + {file = "regex-2025.9.18-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d13ab0490128f2bb45d596f754148cd750411afc97e813e4b3a61cf278a23bb6"}, + {file = "regex-2025.9.18-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:05440bc172bc4b4b37fb9667e796597419404dbba62e171e1f826d7d2a9ebcef"}, + {file = "regex-2025.9.18-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5514b8e4031fdfaa3d27e92c75719cbe7f379e28cacd939807289bce76d0e35a"}, + {file = "regex-2025.9.18-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:65d3c38c39efce73e0d9dc019697b39903ba25b1ad45ebbd730d2cf32741f40d"}, + {file = "regex-2025.9.18-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:ae77e447ebc144d5a26d50055c6ddba1d6ad4a865a560ec7200b8b06bc529368"}, + {file = "regex-2025.9.18-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e3ef8cf53dc8df49d7e28a356cf824e3623764e9833348b655cfed4524ab8a90"}, + {file = "regex-2025.9.18-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9feb29817df349c976da9a0debf775c5c33fc1c8ad7b9f025825da99374770b7"}, + {file = "regex-2025.9.18-cp313-cp313t-win32.whl", hash = "sha256:168be0d2f9b9d13076940b1ed774f98595b4e3c7fc54584bba81b3cc4181742e"}, + {file = "regex-2025.9.18-cp313-cp313t-win_amd64.whl", hash = "sha256:d59ecf3bb549e491c8104fea7313f3563c7b048e01287db0a90485734a70a730"}, + {file = "regex-2025.9.18-cp313-cp313t-win_arm64.whl", hash = "sha256:dbef80defe9fb21310948a2595420b36c6d641d9bea4c991175829b2cc4bc06a"}, + {file = "regex-2025.9.18-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c6db75b51acf277997f3adcd0ad89045d856190d13359f15ab5dda21581d9129"}, + {file = "regex-2025.9.18-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8f9698b6f6895d6db810e0bda5364f9ceb9e5b11328700a90cae573574f61eea"}, + {file = "regex-2025.9.18-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29cd86aa7cb13a37d0f0d7c21d8d949fe402ffa0ea697e635afedd97ab4b69f1"}, + {file = "regex-2025.9.18-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7c9f285a071ee55cd9583ba24dde006e53e17780bb309baa8e4289cd472bcc47"}, + {file = "regex-2025.9.18-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5adf266f730431e3be9021d3e5b8d5ee65e563fec2883ea8093944d21863b379"}, + {file = "regex-2025.9.18-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1137cabc0f38807de79e28d3f6e3e3f2cc8cfb26bead754d02e6d1de5f679203"}, + {file = "regex-2025.9.18-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7cc9e5525cada99699ca9223cce2d52e88c52a3d2a0e842bd53de5497c604164"}, + {file = "regex-2025.9.18-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bbb9246568f72dce29bcd433517c2be22c7791784b223a810225af3b50d1aafb"}, + {file = "regex-2025.9.18-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:6a52219a93dd3d92c675383efff6ae18c982e2d7651c792b1e6d121055808743"}, + {file = "regex-2025.9.18-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:ae9b3840c5bd456780e3ddf2f737ab55a79b790f6409182012718a35c6d43282"}, + {file = "regex-2025.9.18-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d488c236ac497c46a5ac2005a952c1a0e22a07be9f10c3e735bc7d1209a34773"}, + {file = "regex-2025.9.18-cp314-cp314-win32.whl", hash = "sha256:0c3506682ea19beefe627a38872d8da65cc01ffa25ed3f2e422dffa1474f0788"}, + {file = "regex-2025.9.18-cp314-cp314-win_amd64.whl", hash = "sha256:57929d0f92bebb2d1a83af372cd0ffba2263f13f376e19b1e4fa32aec4efddc3"}, + {file = "regex-2025.9.18-cp314-cp314-win_arm64.whl", hash = "sha256:6a4b44df31d34fa51aa5c995d3aa3c999cec4d69b9bd414a8be51984d859f06d"}, + {file = "regex-2025.9.18-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:b176326bcd544b5e9b17d6943f807697c0cb7351f6cfb45bf5637c95ff7e6306"}, + {file = "regex-2025.9.18-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:0ffd9e230b826b15b369391bec167baed57c7ce39efc35835448618860995946"}, + {file = "regex-2025.9.18-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ec46332c41add73f2b57e2f5b642f991f6b15e50e9f86285e08ffe3a512ac39f"}, + {file = "regex-2025.9.18-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b80fa342ed1ea095168a3f116637bd1030d39c9ff38dc04e54ef7c521e01fc95"}, + {file = "regex-2025.9.18-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4d97071c0ba40f0cf2a93ed76e660654c399a0a04ab7d85472239460f3da84b"}, + {file = "regex-2025.9.18-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0ac936537ad87cef9e0e66c5144484206c1354224ee811ab1519a32373e411f3"}, + {file = "regex-2025.9.18-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dec57f96d4def58c422d212d414efe28218d58537b5445cf0c33afb1b4768571"}, + {file = "regex-2025.9.18-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:48317233294648bf7cd068857f248e3a57222259a5304d32c7552e2284a1b2ad"}, + {file = "regex-2025.9.18-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:274687e62ea3cf54846a9b25fc48a04459de50af30a7bd0b61a9e38015983494"}, + {file = "regex-2025.9.18-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:a78722c86a3e7e6aadf9579e3b0ad78d955f2d1f1a8ca4f67d7ca258e8719d4b"}, + {file = "regex-2025.9.18-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:06104cd203cdef3ade989a1c45b6215bf42f8b9dd705ecc220c173233f7cba41"}, + {file = "regex-2025.9.18-cp314-cp314t-win32.whl", hash = "sha256:2e1eddc06eeaffd249c0adb6fafc19e2118e6308c60df9db27919e96b5656096"}, + {file = "regex-2025.9.18-cp314-cp314t-win_amd64.whl", hash = "sha256:8620d247fb8c0683ade51217b459cb4a1081c0405a3072235ba43a40d355c09a"}, + {file = "regex-2025.9.18-cp314-cp314t-win_arm64.whl", hash = "sha256:b7531a8ef61de2c647cdf68b3229b071e46ec326b3138b2180acb4275f470b01"}, + {file = "regex-2025.9.18.tar.gz", hash = "sha256:c5ba23274c61c6fef447ba6a39333297d0c247f53059dba0bca415cac511edc4"}, +] + +[[package]] +name = "requests" +version = "2.32.5" +requires_python = ">=3.9" +summary = "Python HTTP for Humans." +groups = ["default"] +dependencies = [ + "certifi>=2017.4.17", + "charset-normalizer<4,>=2", + "idna<4,>=2.5", + "urllib3<3,>=1.21.1", +] +files = [ + {file = "requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"}, + {file = "requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf"}, +] + +[[package]] +name = "rich" +version = "14.1.0" +requires_python = ">=3.8.0" +summary = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +groups = ["default"] +dependencies = [ + "markdown-it-py>=2.2.0", + "pygments<3.0.0,>=2.13.0", +] +files = [ + {file = "rich-14.1.0-py3-none-any.whl", hash = "sha256:536f5f1785986d6dbdea3c75205c473f970777b4a0d6c6dd1b696aa05a3fa04f"}, + {file = "rich-14.1.0.tar.gz", hash = "sha256:e497a48b844b0320d45007cdebfeaeed8db2a4f4bcf49f15e455cfc4af11eaa8"}, +] + +[[package]] +name = "rpds-py" +version = "0.27.1" +requires_python = ">=3.9" +summary = "Python bindings to Rust's persistent data structures (rpds)" +groups = ["default"] +files = [ + {file = "rpds_py-0.27.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:be898f271f851f68b318872ce6ebebbc62f303b654e43bf72683dbdc25b7c881"}, + {file = "rpds_py-0.27.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:62ac3d4e3e07b58ee0ddecd71d6ce3b1637de2d373501412df395a0ec5f9beb5"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4708c5c0ceb2d034f9991623631d3d23cb16e65c83736ea020cdbe28d57c0a0e"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:abfa1171a9952d2e0002aba2ad3780820b00cc3d9c98c6630f2e93271501f66c"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b507d19f817ebaca79574b16eb2ae412e5c0835542c93fe9983f1e432aca195"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:168b025f8fd8d8d10957405f3fdcef3dc20f5982d398f90851f4abc58c566c52"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb56c6210ef77caa58e16e8c17d35c63fe3f5b60fd9ba9d424470c3400bcf9ed"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:d252f2d8ca0195faa707f8eb9368955760880b2b42a8ee16d382bf5dd807f89a"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6e5e54da1e74b91dbc7996b56640f79b195d5925c2b78efaa8c5d53e1d88edde"}, + {file = "rpds_py-0.27.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ffce0481cc6e95e5b3f0a47ee17ffbd234399e6d532f394c8dce320c3b089c21"}, + {file = "rpds_py-0.27.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a205fdfe55c90c2cd8e540ca9ceba65cbe6629b443bc05db1f590a3db8189ff9"}, + {file = "rpds_py-0.27.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:689fb5200a749db0415b092972e8eba85847c23885c8543a8b0f5c009b1a5948"}, + {file = "rpds_py-0.27.1-cp311-cp311-win32.whl", hash = "sha256:3182af66048c00a075010bc7f4860f33913528a4b6fc09094a6e7598e462fe39"}, + {file = "rpds_py-0.27.1-cp311-cp311-win_amd64.whl", hash = "sha256:b4938466c6b257b2f5c4ff98acd8128ec36b5059e5c8f8372d79316b1c36bb15"}, + {file = "rpds_py-0.27.1-cp311-cp311-win_arm64.whl", hash = "sha256:2f57af9b4d0793e53266ee4325535a31ba48e2f875da81a9177c9926dfa60746"}, + {file = "rpds_py-0.27.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ae2775c1973e3c30316892737b91f9283f9908e3cc7625b9331271eaaed7dc90"}, + {file = "rpds_py-0.27.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2643400120f55c8a96f7c9d858f7be0c88d383cd4653ae2cf0d0c88f668073e5"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16323f674c089b0360674a4abd28d5042947d54ba620f72514d69be4ff64845e"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a1f4814b65eacac94a00fc9a526e3fdafd78e439469644032032d0d63de4881"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ba32c16b064267b22f1850a34051121d423b6f7338a12b9459550eb2096e7ec"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5c20f33fd10485b80f65e800bbe5f6785af510b9f4056c5a3c612ebc83ba6cb"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:466bfe65bd932da36ff279ddd92de56b042f2266d752719beb97b08526268ec5"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:41e532bbdcb57c92ba3be62c42e9f096431b4cf478da9bc3bc6ce5c38ab7ba7a"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f149826d742b406579466283769a8ea448eed82a789af0ed17b0cd5770433444"}, + {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:80c60cfb5310677bd67cb1e85a1e8eb52e12529545441b43e6f14d90b878775a"}, + {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7ee6521b9baf06085f62ba9c7a3e5becffbc32480d2f1b351559c001c38ce4c1"}, + {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a512c8263249a9d68cac08b05dd59d2b3f2061d99b322813cbcc14c3c7421998"}, + {file = "rpds_py-0.27.1-cp312-cp312-win32.whl", hash = "sha256:819064fa048ba01b6dadc5116f3ac48610435ac9a0058bbde98e569f9e785c39"}, + {file = "rpds_py-0.27.1-cp312-cp312-win_amd64.whl", hash = "sha256:d9199717881f13c32c4046a15f024971a3b78ad4ea029e8da6b86e5aa9cf4594"}, + {file = "rpds_py-0.27.1-cp312-cp312-win_arm64.whl", hash = "sha256:33aa65b97826a0e885ef6e278fbd934e98cdcfed80b63946025f01e2f5b29502"}, + {file = "rpds_py-0.27.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e4b9fcfbc021633863a37e92571d6f91851fa656f0180246e84cbd8b3f6b329b"}, + {file = "rpds_py-0.27.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1441811a96eadca93c517d08df75de45e5ffe68aa3089924f963c782c4b898cf"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55266dafa22e672f5a4f65019015f90336ed31c6383bd53f5e7826d21a0e0b83"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d78827d7ac08627ea2c8e02c9e5b41180ea5ea1f747e9db0915e3adf36b62dcf"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae92443798a40a92dc5f0b01d8a7c93adde0c4dc965310a29ae7c64d72b9fad2"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c46c9dd2403b66a2a3b9720ec4b74d4ab49d4fabf9f03dfdce2d42af913fe8d0"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2efe4eb1d01b7f5f1939f4ef30ecea6c6b3521eec451fb93191bf84b2a522418"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:15d3b4d83582d10c601f481eca29c3f138d44c92187d197aff663a269197c02d"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4ed2e16abbc982a169d30d1a420274a709949e2cbdef119fe2ec9d870b42f274"}, + {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a75f305c9b013289121ec0f1181931975df78738cdf650093e6b86d74aa7d8dd"}, + {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:67ce7620704745881a3d4b0ada80ab4d99df390838839921f99e63c474f82cf2"}, + {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d992ac10eb86d9b6f369647b6a3f412fc0075cfd5d799530e84d335e440a002"}, + {file = "rpds_py-0.27.1-cp313-cp313-win32.whl", hash = "sha256:4f75e4bd8ab8db624e02c8e2fc4063021b58becdbe6df793a8111d9343aec1e3"}, + {file = "rpds_py-0.27.1-cp313-cp313-win_amd64.whl", hash = "sha256:f9025faafc62ed0b75a53e541895ca272815bec18abe2249ff6501c8f2e12b83"}, + {file = "rpds_py-0.27.1-cp313-cp313-win_arm64.whl", hash = "sha256:ed10dc32829e7d222b7d3b93136d25a406ba9788f6a7ebf6809092da1f4d279d"}, + {file = "rpds_py-0.27.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:92022bbbad0d4426e616815b16bc4127f83c9a74940e1ccf3cfe0b387aba0228"}, + {file = "rpds_py-0.27.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:47162fdab9407ec3f160805ac3e154df042e577dd53341745fc7fb3f625e6d92"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb89bec23fddc489e5d78b550a7b773557c9ab58b7946154a10a6f7a214a48b2"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e48af21883ded2b3e9eb48cb7880ad8598b31ab752ff3be6457001d78f416723"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6f5b7bd8e219ed50299e58551a410b64daafb5017d54bbe822e003856f06a802"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08f1e20bccf73b08d12d804d6e1c22ca5530e71659e6673bce31a6bb71c1e73f"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dc5dceeaefcc96dc192e3a80bbe1d6c410c469e97bdd47494a7d930987f18b2"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:d76f9cc8665acdc0c9177043746775aa7babbf479b5520b78ae4002d889f5c21"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:134fae0e36022edad8290a6661edf40c023562964efea0cc0ec7f5d392d2aaef"}, + {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb11a4f1b2b63337cfd3b4d110af778a59aae51c81d195768e353d8b52f88081"}, + {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:13e608ac9f50a0ed4faec0e90ece76ae33b34c0e8656e3dceb9a7db994c692cd"}, + {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dd2135527aa40f061350c3f8f89da2644de26cd73e4de458e79606384f4f68e7"}, + {file = "rpds_py-0.27.1-cp313-cp313t-win32.whl", hash = "sha256:3020724ade63fe320a972e2ffd93b5623227e684315adce194941167fee02688"}, + {file = "rpds_py-0.27.1-cp313-cp313t-win_amd64.whl", hash = "sha256:8ee50c3e41739886606388ba3ab3ee2aae9f35fb23f833091833255a31740797"}, + {file = "rpds_py-0.27.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:acb9aafccaae278f449d9c713b64a9e68662e7799dbd5859e2c6b3c67b56d334"}, + {file = "rpds_py-0.27.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b7fb801aa7f845ddf601c49630deeeccde7ce10065561d92729bfe81bd21fb33"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0dd05afb46597b9a2e11c351e5e4283c741237e7f617ffb3252780cca9336a"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b6dfb0e058adb12d8b1d1b25f686e94ffa65d9995a5157afe99743bf7369d62b"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed090ccd235f6fa8bb5861684567f0a83e04f52dfc2e5c05f2e4b1309fcf85e7"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf876e79763eecf3e7356f157540d6a093cef395b65514f17a356f62af6cc136"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12ed005216a51b1d6e2b02a7bd31885fe317e45897de81d86dcce7d74618ffff"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:ee4308f409a40e50593c7e3bb8cbe0b4d4c66d1674a316324f0c2f5383b486f9"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b08d152555acf1f455154d498ca855618c1378ec810646fcd7c76416ac6dc60"}, + {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:dce51c828941973a5684d458214d3a36fcd28da3e1875d659388f4f9f12cc33e"}, + {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:c1476d6f29eb81aa4151c9a31219b03f1f798dc43d8af1250a870735516a1212"}, + {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3ce0cac322b0d69b63c9cdb895ee1b65805ec9ffad37639f291dd79467bee675"}, + {file = "rpds_py-0.27.1-cp314-cp314-win32.whl", hash = "sha256:dfbfac137d2a3d0725758cd141f878bf4329ba25e34979797c89474a89a8a3a3"}, + {file = "rpds_py-0.27.1-cp314-cp314-win_amd64.whl", hash = "sha256:a6e57b0abfe7cc513450fcf529eb486b6e4d3f8aee83e92eb5f1ef848218d456"}, + {file = "rpds_py-0.27.1-cp314-cp314-win_arm64.whl", hash = "sha256:faf8d146f3d476abfee026c4ae3bdd9ca14236ae4e4c310cbd1cf75ba33d24a3"}, + {file = "rpds_py-0.27.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:ba81d2b56b6d4911ce735aad0a1d4495e808b8ee4dc58715998741a26874e7c2"}, + {file = "rpds_py-0.27.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:84f7d509870098de0e864cad0102711c1e24e9b1a50ee713b65928adb22269e4"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e960fc78fecd1100539f14132425e1d5fe44ecb9239f8f27f079962021523e"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62f85b665cedab1a503747617393573995dac4600ff51869d69ad2f39eb5e817"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fed467af29776f6556250c9ed85ea5a4dd121ab56a5f8b206e3e7a4c551e48ec"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2729615f9d430af0ae6b36cf042cb55c0936408d543fb691e1a9e36648fd35a"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b207d881a9aef7ba753d69c123a35d96ca7cb808056998f6b9e8747321f03b8"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:639fd5efec029f99b79ae47e5d7e00ad8a773da899b6309f6786ecaf22948c48"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fecc80cb2a90e28af8a9b366edacf33d7a91cbfe4c2c4544ea1246e949cfebeb"}, + {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42a89282d711711d0a62d6f57d81aa43a1368686c45bc1c46b7f079d55692734"}, + {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:cf9931f14223de59551ab9d38ed18d92f14f055a5f78c1d8ad6493f735021bbb"}, + {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f39f58a27cc6e59f432b568ed8429c7e1641324fbe38131de852cd77b2d534b0"}, + {file = "rpds_py-0.27.1-cp314-cp314t-win32.whl", hash = "sha256:d5fa0ee122dc09e23607a28e6d7b150da16c662e66409bbe85230e4c85bb528a"}, + {file = "rpds_py-0.27.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6567d2bb951e21232c2f660c24cf3470bb96de56cdcb3f071a83feeaff8a2772"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cdfe4bb2f9fe7458b7453ad3c33e726d6d1c7c0a72960bcc23800d77384e42df"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:8fabb8fd848a5f75a2324e4a84501ee3a5e3c78d8603f83475441866e60b94a3"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda8719d598f2f7f3e0f885cba8646644b55a187762bec091fa14a2b819746a9"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c64d07e95606ec402a0a1c511fe003873fa6af630bda59bac77fac8b4318ebc"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93a2ed40de81bcff59aabebb626562d48332f3d028ca2036f1d23cbb52750be4"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:387ce8c44ae94e0ec50532d9cb0edce17311024c9794eb196b90e1058aadeb66"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaf94f812c95b5e60ebaf8bfb1898a7d7cb9c1af5744d4a67fa47796e0465d4e"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:4848ca84d6ded9b58e474dfdbad4b8bfb450344c0551ddc8d958bf4b36aa837c"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2bde09cbcf2248b73c7c323be49b280180ff39fadcfe04e7b6f54a678d02a7cf"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:94c44ee01fd21c9058f124d2d4f0c9dc7634bec93cd4b38eefc385dabe71acbf"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:df8b74962e35c9249425d90144e721eed198e6555a0e22a563d29fe4486b51f6"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:dc23e6820e3b40847e2f4a7726462ba0cf53089512abe9ee16318c366494c17a"}, + {file = "rpds_py-0.27.1.tar.gz", hash = "sha256:26a1c73171d10b7acccbded82bf6a586ab8203601e565badc74bbbf8bc5a10f8"}, +] + +[[package]] +name = "ruff" +version = "0.13.3" +requires_python = ">=3.7" +summary = "An extremely fast Python linter and code formatter, written in Rust." +groups = ["default", "dev"] +files = [ + {file = "ruff-0.13.3-py3-none-linux_armv6l.whl", hash = "sha256:311860a4c5e19189c89d035638f500c1e191d283d0cc2f1600c8c80d6dcd430c"}, + {file = "ruff-0.13.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2bdad6512fb666b40fcadb65e33add2b040fc18a24997d2e47fee7d66f7fcae2"}, + {file = "ruff-0.13.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fc6fa4637284708d6ed4e5e970d52fc3b76a557d7b4e85a53013d9d201d93286"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c9e6469864f94a98f412f20ea143d547e4c652f45e44f369d7b74ee78185838"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5bf62b705f319476c78891e0e97e965b21db468b3c999086de8ffb0d40fd2822"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78cc1abed87ce40cb07ee0667ce99dbc766c9f519eabfd948ed87295d8737c60"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:4fb75e7c402d504f7a9a259e0442b96403fa4a7310ffe3588d11d7e170d2b1e3"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17b951f9d9afb39330b2bdd2dd144ce1c1335881c277837ac1b50bfd99985ed3"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6052f8088728898e0a449f0dde8fafc7ed47e4d878168b211977e3e7e854f662"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc742c50f4ba72ce2a3be362bd359aef7d0d302bf7637a6f942eaa763bd292af"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:8e5640349493b378431637019366bbd73c927e515c9c1babfea3e932f5e68e1d"}, + {file = "ruff-0.13.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6b139f638a80eae7073c691a5dd8d581e0ba319540be97c343d60fb12949c8d0"}, + {file = "ruff-0.13.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:6b547def0a40054825de7cfa341039ebdfa51f3d4bfa6a0772940ed351d2746c"}, + {file = "ruff-0.13.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9cc48a3564423915c93573f1981d57d101e617839bef38504f85f3677b3a0a3e"}, + {file = "ruff-0.13.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1a993b17ec03719c502881cb2d5f91771e8742f2ca6de740034433a97c561989"}, + {file = "ruff-0.13.3-py3-none-win32.whl", hash = "sha256:f14e0d1fe6460f07814d03c6e32e815bff411505178a1f539a38f6097d3e8ee3"}, + {file = "ruff-0.13.3-py3-none-win_amd64.whl", hash = "sha256:621e2e5812b691d4f244638d693e640f188bacbb9bc793ddd46837cea0503dd2"}, + {file = "ruff-0.13.3-py3-none-win_arm64.whl", hash = "sha256:9e9e9d699841eaf4c2c798fa783df2fabc680b72059a02ca0ed81c460bc58330"}, + {file = "ruff-0.13.3.tar.gz", hash = "sha256:5b0ba0db740eefdfbcce4299f49e9eaefc643d4d007749d77d047c2bab19908e"}, +] + +[[package]] +name = "safehttpx" +version = "0.1.6" +requires_python = ">3.9" +summary = "A small Python library created to help developers protect their applications from Server Side Request Forgery (SSRF) attacks." +groups = ["default"] +dependencies = [ + "httpx", +] +files = [ + {file = "safehttpx-0.1.6-py3-none-any.whl", hash = "sha256:407cff0b410b071623087c63dd2080c3b44dc076888d8c5823c00d1e58cb381c"}, + {file = "safehttpx-0.1.6.tar.gz", hash = "sha256:b356bfc82cee3a24c395b94a2dbeabbed60aff1aa5fa3b5fe97c4f2456ebce42"}, +] + +[[package]] +name = "safetensors" +version = "0.6.2" +requires_python = ">=3.9" +summary = "" +groups = ["default"] +files = [ + {file = "safetensors-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9c85ede8ec58f120bad982ec47746981e210492a6db876882aa021446af8ffba"}, + {file = "safetensors-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d6675cf4b39c98dbd7d940598028f3742e0375a6b4d4277e76beb0c35f4b843b"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d2d2b3ce1e2509c68932ca03ab8f20570920cd9754b05063d4368ee52833ecd"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:93de35a18f46b0f5a6a1f9e26d91b442094f2df02e9fd7acf224cfec4238821a"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89a89b505f335640f9120fac65ddeb83e40f1fd081cb8ed88b505bdccec8d0a1"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc4d0d0b937e04bdf2ae6f70cd3ad51328635fe0e6214aa1fc811f3b576b3bda"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8045db2c872db8f4cbe3faa0495932d89c38c899c603f21e9b6486951a5ecb8f"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:81e67e8bab9878bb568cffbc5f5e655adb38d2418351dc0859ccac158f753e19"}, + {file = "safetensors-0.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b0e4d029ab0a0e0e4fdf142b194514695b1d7d3735503ba700cf36d0fc7136ce"}, + {file = "safetensors-0.6.2-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:fa48268185c52bfe8771e46325a1e21d317207bcabcb72e65c6e28e9ffeb29c7"}, + {file = "safetensors-0.6.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:d83c20c12c2d2f465997c51b7ecb00e407e5f94d7dec3ea0cc11d86f60d3fde5"}, + {file = "safetensors-0.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d944cea65fad0ead848b6ec2c37cc0b197194bec228f8020054742190e9312ac"}, + {file = "safetensors-0.6.2-cp38-abi3-win32.whl", hash = "sha256:cab75ca7c064d3911411461151cb69380c9225798a20e712b102edda2542ddb1"}, + {file = "safetensors-0.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:c7b214870df923cbc1593c3faee16bec59ea462758699bd3fee399d00aac072c"}, + {file = "safetensors-0.6.2.tar.gz", hash = "sha256:43ff2aa0e6fa2dc3ea5524ac7ad93a9839256b8703761e76e2d0b2a3fa4f15d9"}, +] + +[[package]] +name = "scikit-image" +version = "0.25.2" +requires_python = ">=3.10" +summary = "Image processing in Python" +groups = ["default"] +dependencies = [ + "imageio!=2.35.0,>=2.33", + "lazy-loader>=0.4", + "networkx>=3.0", + "numpy>=1.24", + "packaging>=21", + "pillow>=10.1", + "scipy>=1.11.4", + "tifffile>=2022.8.12", +] +files = [ + {file = "scikit_image-0.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f4bac9196fb80d37567316581c6060763b0f4893d3aca34a9ede3825bc035b17"}, + {file = "scikit_image-0.25.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d989d64ff92e0c6c0f2018c7495a5b20e2451839299a018e0e5108b2680f71e0"}, + {file = "scikit_image-0.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2cfc96b27afe9a05bc92f8c6235321d3a66499995675b27415e0d0c76625173"}, + {file = "scikit_image-0.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24cc986e1f4187a12aa319f777b36008764e856e5013666a4a83f8df083c2641"}, + {file = "scikit_image-0.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:b4f6b61fc2db6340696afe3db6b26e0356911529f5f6aee8c322aa5157490c9b"}, + {file = "scikit_image-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8db8dd03663112783221bf01ccfc9512d1cc50ac9b5b0fe8f4023967564719fb"}, + {file = "scikit_image-0.25.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:483bd8cc10c3d8a7a37fae36dfa5b21e239bd4ee121d91cad1f81bba10cfb0ed"}, + {file = "scikit_image-0.25.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d1e80107bcf2bf1291acfc0bf0425dceb8890abe9f38d8e94e23497cbf7ee0d"}, + {file = "scikit_image-0.25.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a17e17eb8562660cc0d31bb55643a4da996a81944b82c54805c91b3fe66f4824"}, + {file = "scikit_image-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:bdd2b8c1de0849964dbc54037f36b4e9420157e67e45a8709a80d727f52c7da2"}, + {file = "scikit_image-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7efa888130f6c548ec0439b1a7ed7295bc10105458a421e9bf739b457730b6da"}, + {file = "scikit_image-0.25.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dd8011efe69c3641920614d550f5505f83658fe33581e49bed86feab43a180fc"}, + {file = "scikit_image-0.25.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28182a9d3e2ce3c2e251383bdda68f8d88d9fff1a3ebe1eb61206595c9773341"}, + {file = "scikit_image-0.25.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8abd3c805ce6944b941cfed0406d88faeb19bab3ed3d4b50187af55cf24d147"}, + {file = "scikit_image-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:64785a8acefee460ec49a354706db0b09d1f325674107d7fa3eadb663fb56d6f"}, + {file = "scikit_image-0.25.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:330d061bd107d12f8d68f1d611ae27b3b813b8cdb0300a71d07b1379178dd4cd"}, + {file = "scikit_image-0.25.2.tar.gz", hash = "sha256:e5a37e6cd4d0c018a7a55b9d601357e3382826d3888c10d0213fc63bff977dde"}, +] + +[[package]] +name = "scipy" +version = "1.16.2" +requires_python = ">=3.11" +summary = "Fundamental algorithms for scientific computing in Python" +groups = ["default"] +dependencies = [ + "numpy<2.6,>=1.25.2", +] +files = [ + {file = "scipy-1.16.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6ab88ea43a57da1af33292ebd04b417e8e2eaf9d5aa05700be8d6e1b6501cd92"}, + {file = "scipy-1.16.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c95e96c7305c96ede73a7389f46ccd6c659c4da5ef1b2789466baeaed3622b6e"}, + {file = "scipy-1.16.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:87eb178db04ece7c698220d523c170125dbffebb7af0345e66c3554f6f60c173"}, + {file = "scipy-1.16.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:4e409eac067dcee96a57fbcf424c13f428037827ec7ee3cb671ff525ca4fc34d"}, + {file = "scipy-1.16.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e574be127bb760f0dad24ff6e217c80213d153058372362ccb9555a10fc5e8d2"}, + {file = "scipy-1.16.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f5db5ba6188d698ba7abab982ad6973265b74bb40a1efe1821b58c87f73892b9"}, + {file = "scipy-1.16.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ec6e74c4e884104ae006d34110677bfe0098203a3fec2f3faf349f4cb05165e3"}, + {file = "scipy-1.16.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:912f46667d2d3834bc3d57361f854226475f695eb08c08a904aadb1c936b6a88"}, + {file = "scipy-1.16.2-cp311-cp311-win_amd64.whl", hash = "sha256:91e9e8a37befa5a69e9cacbe0bcb79ae5afb4a0b130fd6db6ee6cc0d491695fa"}, + {file = "scipy-1.16.2-cp311-cp311-win_arm64.whl", hash = "sha256:f3bf75a6dcecab62afde4d1f973f1692be013110cad5338007927db8da73249c"}, + {file = "scipy-1.16.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:89d6c100fa5c48472047632e06f0876b3c4931aac1f4291afc81a3644316bb0d"}, + {file = "scipy-1.16.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ca748936cd579d3f01928b30a17dc474550b01272d8046e3e1ee593f23620371"}, + {file = "scipy-1.16.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:fac4f8ce2ddb40e2e3d0f7ec36d2a1e7f92559a2471e59aec37bd8d9de01fec0"}, + {file = "scipy-1.16.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:033570f1dcefd79547a88e18bccacff025c8c647a330381064f561d43b821232"}, + {file = "scipy-1.16.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ea3421209bf00c8a5ef2227de496601087d8f638a2363ee09af059bd70976dc1"}, + {file = "scipy-1.16.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f66bd07ba6f84cd4a380b41d1bf3c59ea488b590a2ff96744845163309ee8e2f"}, + {file = "scipy-1.16.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5e9feab931bd2aea4a23388c962df6468af3d808ddf2d40f94a81c5dc38f32ef"}, + {file = "scipy-1.16.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:03dfc75e52f72cf23ec2ced468645321407faad8f0fe7b1f5b49264adbc29cb1"}, + {file = "scipy-1.16.2-cp312-cp312-win_amd64.whl", hash = "sha256:0ce54e07bbb394b417457409a64fd015be623f36e330ac49306433ffe04bc97e"}, + {file = "scipy-1.16.2-cp312-cp312-win_arm64.whl", hash = "sha256:2a8ffaa4ac0df81a0b94577b18ee079f13fecdb924df3328fc44a7dc5ac46851"}, + {file = "scipy-1.16.2-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:84f7bf944b43e20b8a894f5fe593976926744f6c185bacfcbdfbb62736b5cc70"}, + {file = "scipy-1.16.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:5c39026d12edc826a1ef2ad35ad1e6d7f087f934bb868fc43fa3049c8b8508f9"}, + {file = "scipy-1.16.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e52729ffd45b68777c5319560014d6fd251294200625d9d70fd8626516fc49f5"}, + {file = "scipy-1.16.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:024dd4a118cccec09ca3209b7e8e614931a6ffb804b2a601839499cb88bdf925"}, + {file = "scipy-1.16.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7a5dc7ee9c33019973a470556081b0fd3c9f4c44019191039f9769183141a4d9"}, + {file = "scipy-1.16.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c2275ff105e508942f99d4e3bc56b6ef5e4b3c0af970386ca56b777608ce95b7"}, + {file = "scipy-1.16.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:af80196eaa84f033e48444d2e0786ec47d328ba00c71e4299b602235ffef9acb"}, + {file = "scipy-1.16.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9fb1eb735fe3d6ed1f89918224e3385fbf6f9e23757cacc35f9c78d3b712dd6e"}, + {file = "scipy-1.16.2-cp313-cp313-win_amd64.whl", hash = "sha256:fda714cf45ba43c9d3bae8f2585c777f64e3f89a2e073b668b32ede412d8f52c"}, + {file = "scipy-1.16.2-cp313-cp313-win_arm64.whl", hash = "sha256:2f5350da923ccfd0b00e07c3e5cfb316c1c0d6c1d864c07a72d092e9f20db104"}, + {file = "scipy-1.16.2-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:53d8d2ee29b925344c13bda64ab51785f016b1b9617849dac10897f0701b20c1"}, + {file = "scipy-1.16.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:9e05e33657efb4c6a9d23bd8300101536abd99c85cca82da0bffff8d8764d08a"}, + {file = "scipy-1.16.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:7fe65b36036357003b3ef9d37547abeefaa353b237e989c21027b8ed62b12d4f"}, + {file = "scipy-1.16.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:6406d2ac6d40b861cccf57f49592f9779071655e9f75cd4f977fa0bdd09cb2e4"}, + {file = "scipy-1.16.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ff4dc42bd321991fbf611c23fc35912d690f731c9914bf3af8f417e64aca0f21"}, + {file = "scipy-1.16.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:654324826654d4d9133e10675325708fb954bc84dae6e9ad0a52e75c6b1a01d7"}, + {file = "scipy-1.16.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:63870a84cd15c44e65220eaed2dac0e8f8b26bbb991456a033c1d9abfe8a94f8"}, + {file = "scipy-1.16.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:fa01f0f6a3050fa6a9771a95d5faccc8e2f5a92b4a2e5440a0fa7264a2398472"}, + {file = "scipy-1.16.2-cp313-cp313t-win_amd64.whl", hash = "sha256:116296e89fba96f76353a8579820c2512f6e55835d3fad7780fece04367de351"}, + {file = "scipy-1.16.2-cp313-cp313t-win_arm64.whl", hash = "sha256:98e22834650be81d42982360382b43b17f7ba95e0e6993e2a4f5b9ad9283a94d"}, + {file = "scipy-1.16.2-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:567e77755019bb7461513c87f02bb73fb65b11f049aaaa8ca17cfaa5a5c45d77"}, + {file = "scipy-1.16.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:17d9bb346194e8967296621208fcdfd39b55498ef7d2f376884d5ac47cec1a70"}, + {file = "scipy-1.16.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:0a17541827a9b78b777d33b623a6dcfe2ef4a25806204d08ead0768f4e529a88"}, + {file = "scipy-1.16.2-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:d7d4c6ba016ffc0f9568d012f5f1eb77ddd99412aea121e6fa8b4c3b7cbad91f"}, + {file = "scipy-1.16.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9702c4c023227785c779cba2e1d6f7635dbb5b2e0936cdd3a4ecb98d78fd41eb"}, + {file = "scipy-1.16.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d1cdf0ac28948d225decdefcc45ad7dd91716c29ab56ef32f8e0d50657dffcc7"}, + {file = "scipy-1.16.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:70327d6aa572a17c2941cdfb20673f82e536e91850a2e4cb0c5b858b690e1548"}, + {file = "scipy-1.16.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5221c0b2a4b58aa7c4ed0387d360fd90ee9086d383bb34d9f2789fafddc8a936"}, + {file = "scipy-1.16.2-cp314-cp314-win_amd64.whl", hash = "sha256:f5a85d7b2b708025af08f060a496dd261055b617d776fc05a1a1cc69e09fe9ff"}, + {file = "scipy-1.16.2-cp314-cp314-win_arm64.whl", hash = "sha256:2cc73a33305b4b24556957d5857d6253ce1e2dcd67fa0ff46d87d1670b3e1e1d"}, + {file = "scipy-1.16.2-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:9ea2a3fed83065d77367775d689401a703d0f697420719ee10c0780bcab594d8"}, + {file = "scipy-1.16.2-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7280d926f11ca945c3ef92ba960fa924e1465f8d07ce3a9923080363390624c4"}, + {file = "scipy-1.16.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:8afae1756f6a1fe04636407ef7dbece33d826a5d462b74f3d0eb82deabefd831"}, + {file = "scipy-1.16.2-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:5c66511f29aa8d233388e7416a3f20d5cae7a2744d5cee2ecd38c081f4e861b3"}, + {file = "scipy-1.16.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:efe6305aeaa0e96b0ccca5ff647a43737d9a092064a3894e46c414db84bc54ac"}, + {file = "scipy-1.16.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f3a337d9ae06a1e8d655ee9d8ecb835ea5ddcdcbd8d23012afa055ab014f374"}, + {file = "scipy-1.16.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bab3605795d269067d8ce78a910220262711b753de8913d3deeaedb5dded3bb6"}, + {file = "scipy-1.16.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b0348d8ddb55be2a844c518cd8cc8deeeb8aeba707cf834db5758fc89b476a2c"}, + {file = "scipy-1.16.2-cp314-cp314t-win_amd64.whl", hash = "sha256:26284797e38b8a75e14ea6631d29bda11e76ceaa6ddb6fdebbfe4c4d90faf2f9"}, + {file = "scipy-1.16.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d2a4472c231328d4de38d5f1f68fdd6d28a615138f842580a8a321b5845cf779"}, + {file = "scipy-1.16.2.tar.gz", hash = "sha256:af029b153d243a80afb6eabe40b0a07f8e35c9adc269c019f364ad747f826a6b"}, +] + +[[package]] +name = "semantic-version" +version = "2.10.0" +requires_python = ">=2.7" +summary = "A library implementing the 'SemVer' scheme." +groups = ["default"] +files = [ + {file = "semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177"}, + {file = "semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c"}, +] + +[[package]] +name = "setuptools" +version = "80.9.0" +requires_python = ">=3.9" +summary = "Easily download, build, install, upgrade, and uninstall Python packages" +groups = ["default"] +files = [ + {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"}, + {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"}, +] + +[[package]] +name = "shapely" +version = "2.1.2" +requires_python = ">=3.10" +summary = "Manipulation and analysis of geometric objects" +groups = ["default"] +dependencies = [ + "numpy>=1.21", +] +files = [ + {file = "shapely-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:91121757b0a36c9aac3427a651a7e6567110a4a67c97edf04f8d55d4765f6618"}, + {file = "shapely-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:16a9c722ba774cf50b5d4541242b4cce05aafd44a015290c82ba8a16931ff63d"}, + {file = "shapely-2.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cc4f7397459b12c0b196c9efe1f9d7e92463cbba142632b4cc6d8bbbbd3e2b09"}, + {file = "shapely-2.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:136ab87b17e733e22f0961504d05e77e7be8c9b5a8184f685b4a91a84efe3c26"}, + {file = "shapely-2.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:16c5d0fc45d3aa0a69074979f4f1928ca2734fb2e0dde8af9611e134e46774e7"}, + {file = "shapely-2.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ddc759f72b5b2b0f54a7e7cde44acef680a55019eb52ac63a7af2cf17cb9cd2"}, + {file = "shapely-2.1.2-cp311-cp311-win32.whl", hash = "sha256:2fa78b49485391224755a856ed3b3bd91c8455f6121fee0db0e71cefb07d0ef6"}, + {file = "shapely-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:c64d5c97b2f47e3cd9b712eaced3b061f2b71234b3fc263e0fcf7d889c6559dc"}, + {file = "shapely-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fe2533caae6a91a543dec62e8360fe86ffcdc42a7c55f9dfd0128a977a896b94"}, + {file = "shapely-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ba4d1333cc0bc94381d6d4308d2e4e008e0bd128bdcff5573199742ee3634359"}, + {file = "shapely-2.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bd308103340030feef6c111d3eb98d50dc13feea33affc8a6f9fa549e9458a3"}, + {file = "shapely-2.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1e7d4d7ad262a48bb44277ca12c7c78cb1b0f56b32c10734ec9a1d30c0b0c54b"}, + {file = "shapely-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e9eddfe513096a71896441a7c37db72da0687b34752c4e193577a145c71736fc"}, + {file = "shapely-2.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:980c777c612514c0cf99bc8a9de6d286f5e186dcaf9091252fcd444e5638193d"}, + {file = "shapely-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9111274b88e4d7b54a95218e243282709b330ef52b7b86bc6aaf4f805306f454"}, + {file = "shapely-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:743044b4cfb34f9a67205cee9279feaf60ba7d02e69febc2afc609047cb49179"}, + {file = "shapely-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b510dda1a3672d6879beb319bc7c5fd302c6c354584690973c838f46ec3e0fa8"}, + {file = "shapely-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8cff473e81017594d20ec55d86b54bc635544897e13a7cfc12e36909c5309a2a"}, + {file = "shapely-2.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe7b77dc63d707c09726b7908f575fc04ff1d1ad0f3fb92aec212396bc6cfe5e"}, + {file = "shapely-2.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ed1a5bbfb386ee8332713bf7508bc24e32d24b74fc9a7b9f8529a55db9f4ee6"}, + {file = "shapely-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a84e0582858d841d54355246ddfcbd1fce3179f185da7470f41ce39d001ee1af"}, + {file = "shapely-2.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc3487447a43d42adcdf52d7ac73804f2312cbfa5d433a7d2c506dcab0033dfd"}, + {file = "shapely-2.1.2-cp313-cp313-win32.whl", hash = "sha256:9c3a3c648aedc9f99c09263b39f2d8252f199cb3ac154fadc173283d7d111350"}, + {file = "shapely-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:ca2591bff6645c216695bdf1614fca9c82ea1144d4a7591a466fef64f28f0715"}, + {file = "shapely-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2d93d23bdd2ed9dc157b46bc2f19b7da143ca8714464249bef6771c679d5ff40"}, + {file = "shapely-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:01d0d304b25634d60bd7cf291828119ab55a3bab87dc4af1e44b07fb225f188b"}, + {file = "shapely-2.1.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8d8382dd120d64b03698b7298b89611a6ea6f55ada9d39942838b79c9bc89801"}, + {file = "shapely-2.1.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:19efa3611eef966e776183e338b2d7ea43569ae99ab34f8d17c2c054d3205cc0"}, + {file = "shapely-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:346ec0c1a0fcd32f57f00e4134d1200e14bf3f5ae12af87ba83ca275c502498c"}, + {file = "shapely-2.1.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6305993a35989391bd3476ee538a5c9a845861462327efe00dd11a5c8c709a99"}, + {file = "shapely-2.1.2-cp313-cp313t-win32.whl", hash = "sha256:c8876673449f3401f278c86eb33224c5764582f72b653a415d0e6672fde887bf"}, + {file = "shapely-2.1.2-cp313-cp313t-win_amd64.whl", hash = "sha256:4a44bc62a10d84c11a7a3d7c1c4fe857f7477c3506e24c9062da0db0ae0c449c"}, + {file = "shapely-2.1.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:9a522f460d28e2bf4e12396240a5fc1518788b2fcd73535166d748399ef0c223"}, + {file = "shapely-2.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ff629e00818033b8d71139565527ced7d776c269a49bd78c9df84e8f852190c"}, + {file = "shapely-2.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f67b34271dedc3c653eba4e3d7111aa421d5be9b4c4c7d38d30907f796cb30df"}, + {file = "shapely-2.1.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21952dc00df38a2c28375659b07a3979d22641aeb104751e769c3ee825aadecf"}, + {file = "shapely-2.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1f2f33f486777456586948e333a56ae21f35ae273be99255a191f5c1fa302eb4"}, + {file = "shapely-2.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cf831a13e0d5a7eb519e96f58ec26e049b1fad411fc6fc23b162a7ce04d9cffc"}, + {file = "shapely-2.1.2-cp314-cp314-win32.whl", hash = "sha256:61edcd8d0d17dd99075d320a1dd39c0cb9616f7572f10ef91b4b5b00c4aeb566"}, + {file = "shapely-2.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:a444e7afccdb0999e203b976adb37ea633725333e5b119ad40b1ca291ecf311c"}, + {file = "shapely-2.1.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:5ebe3f84c6112ad3d4632b1fd2290665aa75d4cef5f6c5d77c4c95b324527c6a"}, + {file = "shapely-2.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5860eb9f00a1d49ebb14e881f5caf6c2cf472c7fd38bd7f253bbd34f934eb076"}, + {file = "shapely-2.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b705c99c76695702656327b819c9660768ec33f5ce01fa32b2af62b56ba400a1"}, + {file = "shapely-2.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a1fd0ea855b2cf7c9cddaf25543e914dd75af9de08785f20ca3085f2c9ca60b0"}, + {file = "shapely-2.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:df90e2db118c3671a0754f38e36802db75fe0920d211a27481daf50a711fdf26"}, + {file = "shapely-2.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:361b6d45030b4ac64ddd0a26046906c8202eb60d0f9f53085f5179f1d23021a0"}, + {file = "shapely-2.1.2-cp314-cp314t-win32.whl", hash = "sha256:b54df60f1fbdecc8ebc2c5b11870461a6417b3d617f555e5033f1505d36e5735"}, + {file = "shapely-2.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:0036ac886e0923417932c2e6369b6c52e38e0ff5d9120b90eef5cd9a5fc5cae9"}, + {file = "shapely-2.1.2.tar.gz", hash = "sha256:2ed4ecb28320a433db18a5bf029986aa8afcfd740745e78847e330d5d94922a9"}, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +requires_python = ">=3.7" +summary = "Tool to Detect Surrounding Shell" +groups = ["default"] +files = [ + {file = "shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686"}, + {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"}, +] + +[[package]] +name = "six" +version = "1.17.0" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +summary = "Python 2 and 3 compatibility utilities" +groups = ["default"] +files = [ + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +requires_python = ">=3.7" +summary = "Sniff out which async library your code is running under" +groups = ["default"] +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + +[[package]] +name = "soundfile" +version = "0.13.1" +summary = "An audio library based on libsndfile, CFFI and NumPy" +groups = ["default"] +marker = "sys_platform == \"darwin\"" +dependencies = [ + "cffi>=1.0", + "numpy", +] +files = [ + {file = "soundfile-0.13.1-py2.py3-none-any.whl", hash = "sha256:a23c717560da2cf4c7b5ae1142514e0fd82d6bbd9dfc93a50423447142f2c445"}, + {file = "soundfile-0.13.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:82dc664d19831933fe59adad199bf3945ad06d84bc111a5b4c0d3089a5b9ec33"}, + {file = "soundfile-0.13.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:743f12c12c4054921e15736c6be09ac26b3b3d603aef6fd69f9dde68748f2593"}, + {file = "soundfile-0.13.1-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9c9e855f5a4d06ce4213f31918653ab7de0c5a8d8107cd2427e44b42df547deb"}, + {file = "soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:03267c4e493315294834a0870f31dbb3b28a95561b80b134f0bd3cf2d5f0e618"}, + {file = "soundfile-0.13.1-py2.py3-none-win32.whl", hash = "sha256:c734564fab7c5ddf8e9be5bf70bab68042cd17e9c214c06e365e20d64f9a69d5"}, + {file = "soundfile-0.13.1-py2.py3-none-win_amd64.whl", hash = "sha256:1e70a05a0626524a69e9f0f4dd2ec174b4e9567f4d8b6c11d38b5c289be36ee9"}, + {file = "soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b"}, +] + +[[package]] +name = "sse-starlette" +version = "3.0.2" +requires_python = ">=3.9" +summary = "SSE plugin for Starlette" +groups = ["default"] +dependencies = [ + "anyio>=4.7.0", +] +files = [ + {file = "sse_starlette-3.0.2-py3-none-any.whl", hash = "sha256:16b7cbfddbcd4eaca11f7b586f3b8a080f1afe952c15813455b162edea619e5a"}, + {file = "sse_starlette-3.0.2.tar.gz", hash = "sha256:ccd60b5765ebb3584d0de2d7a6e4f745672581de4f5005ab31c3a25d10b52b3a"}, +] + +[[package]] +name = "starlette" +version = "0.48.0" +requires_python = ">=3.9" +summary = "The little ASGI library that shines." +groups = ["default"] +dependencies = [ + "anyio<5,>=3.6.2", + "typing-extensions>=4.10.0; python_version < \"3.13\"", +] +files = [ + {file = "starlette-0.48.0-py3-none-any.whl", hash = "sha256:0764ca97b097582558ecb498132ed0c7d942f233f365b86ba37770e026510659"}, + {file = "starlette-0.48.0.tar.gz", hash = "sha256:7e8cee469a8ab2352911528110ce9088fdc6a37d9876926e73da7ce4aa4c7a46"}, +] + +[[package]] +name = "supervision" +version = "0.26.1" +requires_python = ">=3.9" +summary = "A set of easy-to-use utils that will come in handy in any Computer Vision project" +groups = ["default"] +dependencies = [ + "defusedxml>=0.7.1", + "matplotlib>=3.6.0", + "numpy>=1.21.2", + "opencv-python>=4.5.5.64", + "pillow>=9.4", + "pyyaml>=5.3", + "requests>=2.26.0", + "scipy>=1.10.0", + "tqdm>=4.62.3", +] +files = [ + {file = "supervision-0.26.1-py3-none-any.whl", hash = "sha256:43c55e2830f38f5750be7266208992dc16996da9c9478e067bc2617ebaf91c1a"}, + {file = "supervision-0.26.1.tar.gz", hash = "sha256:af0db9c5459bb640cf0d31e9a4df3296020b4cd0dd484d8659eafe7b475b68f2"}, +] + +[[package]] +name = "sympy" +version = "1.14.0" +requires_python = ">=3.9" +summary = "Computer algebra system (CAS) in Python" +groups = ["default"] +dependencies = [ + "mpmath<1.4,>=1.1.0", +] +files = [ + {file = "sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5"}, + {file = "sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517"}, +] + +[[package]] +name = "termcolor" +version = "3.1.0" +requires_python = ">=3.9" +summary = "ANSI color formatting for output in terminal" +groups = ["default"] +files = [ + {file = "termcolor-3.1.0-py3-none-any.whl", hash = "sha256:591dd26b5c2ce03b9e43f391264626557873ce1d379019786f99b0c2bee140aa"}, + {file = "termcolor-3.1.0.tar.gz", hash = "sha256:6a6dd7fbee581909eeec6a756cff1d7f7c376063b14e4a298dc4980309e55970"}, +] + +[[package]] +name = "tifffile" +version = "2025.10.4" +requires_python = ">=3.11" +summary = "Read and write TIFF files" +groups = ["default"] +dependencies = [ + "numpy", +] +files = [ + {file = "tifffile-2025.10.4-py3-none-any.whl", hash = "sha256:7687d691e49026053181470cec70fa9250e3a586b2041041297e38b10bbd34e1"}, + {file = "tifffile-2025.10.4.tar.gz", hash = "sha256:2e437c16ab211be5bcdc79f71b4907359115f1f83b5d919e7c297c29725d3e38"}, +] + +[[package]] +name = "tiktoken" +version = "0.12.0" +requires_python = ">=3.9" +summary = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +groups = ["default"] +dependencies = [ + "regex>=2022.1.18", + "requests>=2.26.0", +] +files = [ + {file = "tiktoken-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb"}, + {file = "tiktoken-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa"}, + {file = "tiktoken-0.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc"}, + {file = "tiktoken-0.12.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:47a5bc270b8c3db00bb46ece01ef34ad050e364b51d406b6f9730b64ac28eded"}, + {file = "tiktoken-0.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:508fa71810c0efdcd1b898fda574889ee62852989f7c1667414736bcb2b9a4bd"}, + {file = "tiktoken-0.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1af81a6c44f008cba48494089dd98cccb8b313f55e961a52f5b222d1e507967"}, + {file = "tiktoken-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:3e68e3e593637b53e56f7237be560f7a394451cb8c11079755e80ae64b9e6def"}, + {file = "tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8"}, + {file = "tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b"}, + {file = "tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37"}, + {file = "tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad"}, + {file = "tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5"}, + {file = "tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3"}, + {file = "tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd"}, + {file = "tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3"}, + {file = "tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160"}, + {file = "tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa"}, + {file = "tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be"}, + {file = "tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a"}, + {file = "tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3"}, + {file = "tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697"}, + {file = "tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16"}, + {file = "tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a"}, + {file = "tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27"}, + {file = "tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb"}, + {file = "tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e"}, + {file = "tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25"}, + {file = "tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f"}, + {file = "tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646"}, + {file = "tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88"}, + {file = "tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff"}, + {file = "tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830"}, + {file = "tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b"}, + {file = "tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b"}, + {file = "tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3"}, + {file = "tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365"}, + {file = "tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e"}, + {file = "tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63"}, + {file = "tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0"}, + {file = "tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a"}, + {file = "tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0"}, + {file = "tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71"}, + {file = "tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931"}, +] + +[[package]] +name = "tokenizers" +version = "0.22.1" +requires_python = ">=3.9" +summary = "" +groups = ["default"] +dependencies = [ + "huggingface-hub<2.0,>=0.16.4", +] +files = [ + {file = "tokenizers-0.22.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:59fdb013df17455e5f950b4b834a7b3ee2e0271e6378ccb33aa74d178b513c73"}, + {file = "tokenizers-0.22.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:8d4e484f7b0827021ac5f9f71d4794aaef62b979ab7608593da22b1d2e3c4edc"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d2962dd28bc67c1f205ab180578a78eef89ac60ca7ef7cbe9635a46a56422a"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38201f15cdb1f8a6843e6563e6e79f4abd053394992b9bbdf5213ea3469b4ae7"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1cbe5454c9a15df1b3443c726063d930c16f047a3cc724b9e6e1a91140e5a21"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7d094ae6312d69cc2a872b54b91b309f4f6fbce871ef28eb27b52a98e4d0214"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afd7594a56656ace95cdd6df4cca2e4059d294c5cfb1679c57824b605556cb2f"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2ef6063d7a84994129732b47e7915e8710f27f99f3a3260b8a38fc7ccd083f4"}, + {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ba0a64f450b9ef412c98f6bcd2a50c6df6e2443b560024a09fa6a03189726879"}, + {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:331d6d149fa9c7d632cde4490fb8bbb12337fa3a0232e77892be656464f4b446"}, + {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:607989f2ea68a46cb1dfbaf3e3aabdf3f21d8748312dbeb6263d1b3b66c5010a"}, + {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a0f307d490295717726598ef6fa4f24af9d484809223bbc253b201c740a06390"}, + {file = "tokenizers-0.22.1-cp39-abi3-win32.whl", hash = "sha256:b5120eed1442765cd90b903bb6cfef781fd8fe64e34ccaecbae4c619b7b12a82"}, + {file = "tokenizers-0.22.1-cp39-abi3-win_amd64.whl", hash = "sha256:65fd6e3fb11ca1e78a6a93602490f134d1fdeb13bcef99389d5102ea318ed138"}, + {file = "tokenizers-0.22.1.tar.gz", hash = "sha256:61de6522785310a309b3407bac22d99c4db5dba349935e99e4d15ea2226af2d9"}, +] + +[[package]] +name = "tomlkit" +version = "0.13.3" +requires_python = ">=3.8" +summary = "Style preserving TOML library" +groups = ["default"] +files = [ + {file = "tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0"}, + {file = "tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1"}, +] + +[[package]] +name = "torch" +version = "2.7.1" +requires_python = ">=3.9.0" +summary = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +groups = ["default"] +dependencies = [ + "filelock", + "fsspec", + "jinja2", + "networkx", + "nvidia-cublas-cu12==12.6.4.1; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-cuda-cupti-cu12==12.6.80; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-cuda-runtime-cu12==12.6.77; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-cudnn-cu12==9.5.1.17; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-cufft-cu12==11.3.0.4; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-cufile-cu12==1.11.1.6; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-curand-cu12==10.3.7.77; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-cusolver-cu12==11.7.1.2; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-cusparse-cu12==12.5.4.2; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-cusparselt-cu12==0.6.3; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-nccl-cu12==2.26.2; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-nvjitlink-cu12==12.6.85; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "nvidia-nvtx-cu12==12.6.77; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "setuptools; python_version >= \"3.12\"", + "sympy>=1.13.3", + "triton==3.3.1; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + "typing-extensions>=4.10.0", +] +files = [ + {file = "torch-2.7.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:236f501f2e383f1cb861337bdf057712182f910f10aeaf509065d54d339e49b2"}, + {file = "torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:06eea61f859436622e78dd0cdd51dbc8f8c6d76917a9cf0555a333f9eac31ec1"}, + {file = "torch-2.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:8273145a2e0a3c6f9fd2ac36762d6ee89c26d430e612b95a99885df083b04e52"}, + {file = "torch-2.7.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:aea4fc1bf433d12843eb2c6b2204861f43d8364597697074c8d38ae2507f8730"}, + {file = "torch-2.7.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ea1e518df4c9de73af7e8a720770f3628e7f667280bce2be7a16292697e3fa"}, + {file = "torch-2.7.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c33360cfc2edd976c2633b3b66c769bdcbbf0e0b6550606d188431c81e7dd1fc"}, + {file = "torch-2.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:d8bf6e1856ddd1807e79dc57e54d3335f2b62e6f316ed13ed3ecfe1fc1df3d8b"}, + {file = "torch-2.7.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:787687087412c4bd68d315e39bc1223f08aae1d16a9e9771d95eabbb04ae98fb"}, + {file = "torch-2.7.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:03563603d931e70722dce0e11999d53aa80a375a3d78e6b39b9f6805ea0a8d28"}, + {file = "torch-2.7.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d632f5417b6980f61404a125b999ca6ebd0b8b4bbdbb5fbbba44374ab619a412"}, + {file = "torch-2.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:23660443e13995ee93e3d844786701ea4ca69f337027b05182f5ba053ce43b38"}, + {file = "torch-2.7.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:0da4f4dba9f65d0d203794e619fe7ca3247a55ffdcbd17ae8fb83c8b2dc9b585"}, + {file = "torch-2.7.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:e08d7e6f21a617fe38eeb46dd2213ded43f27c072e9165dc27300c9ef9570934"}, + {file = "torch-2.7.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:30207f672328a42df4f2174b8f426f354b2baa0b7cca3a0adb3d6ab5daf00dc8"}, + {file = "torch-2.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:79042feca1c634aaf6603fe6feea8c6b30dfa140a6bbc0b973e2260c7e79a22e"}, + {file = "torch-2.7.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:988b0cbc4333618a1056d2ebad9eb10089637b659eb645434d0809d8d937b946"}, +] + +[[package]] +name = "torchvision" +version = "0.22.1" +requires_python = ">=3.9" +summary = "image and video datasets and models for torch deep learning" +groups = ["default"] +dependencies = [ + "numpy", + "pillow!=8.3.*,>=5.3.0", + "torch==2.7.1", +] +files = [ + {file = "torchvision-0.22.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4addf626e2b57fc22fd6d329cf1346d474497672e6af8383b7b5b636fba94a53"}, + {file = "torchvision-0.22.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:8b4a53a6067d63adba0c52f2b8dd2290db649d642021674ee43c0c922f0c6a69"}, + {file = "torchvision-0.22.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b7866a3b326413e67724ac46f1ee594996735e10521ba9e6cdbe0fa3cd98c2f2"}, + {file = "torchvision-0.22.1-cp311-cp311-win_amd64.whl", hash = "sha256:bb3f6df6f8fd415ce38ec4fd338376ad40c62e86052d7fc706a0dd51efac1718"}, + {file = "torchvision-0.22.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:153f1790e505bd6da123e21eee6e83e2e155df05c0fe7d56347303067d8543c5"}, + {file = "torchvision-0.22.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:964414eef19459d55a10e886e2fca50677550e243586d1678f65e3f6f6bac47a"}, + {file = "torchvision-0.22.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:699c2d70d33951187f6ed910ea05720b9b4aaac1dcc1135f53162ce7d42481d3"}, + {file = "torchvision-0.22.1-cp312-cp312-win_amd64.whl", hash = "sha256:75e0897da7a8e43d78632f66f2bdc4f6e26da8d3f021a7c0fa83746073c2597b"}, + {file = "torchvision-0.22.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c3ae3319624c43cc8127020f46c14aa878406781f0899bb6283ae474afeafbf"}, + {file = "torchvision-0.22.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:4a614a6a408d2ed74208d0ea6c28a2fbb68290e9a7df206c5fef3f0b6865d307"}, + {file = "torchvision-0.22.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:7ee682be589bb1a002b7704f06b8ec0b89e4b9068f48e79307d2c6e937a9fdf4"}, + {file = "torchvision-0.22.1-cp313-cp313-win_amd64.whl", hash = "sha256:2566cafcfa47ecfdbeed04bab8cef1307c8d4ef75046f7624b9e55f384880dfe"}, + {file = "torchvision-0.22.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:043d9e35ed69c2e586aff6eb9e2887382e7863707115668ac9d140da58f42cba"}, + {file = "torchvision-0.22.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:27142bcc8a984227a6dcf560985e83f52b82a7d3f5fe9051af586a2ccc46ef26"}, + {file = "torchvision-0.22.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef46e065502f7300ad6abc98554131c35dc4c837b978d91306658f1a65c00baa"}, + {file = "torchvision-0.22.1-cp313-cp313t-win_amd64.whl", hash = "sha256:7414eeacfb941fa21acddcd725f1617da5630ec822e498660a4b864d7d998075"}, +] + +[[package]] +name = "tqdm" +version = "4.67.1" +requires_python = ">=3.7" +summary = "Fast, Extensible Progress Meter" +groups = ["default"] +dependencies = [ + "colorama; platform_system == \"Windows\"", +] +files = [ + {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, + {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, +] + +[[package]] +name = "transformers" +version = "4.57.0" +requires_python = ">=3.9.0" +summary = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +groups = ["default"] +dependencies = [ + "filelock", + "huggingface-hub<1.0,>=0.34.0", + "numpy>=1.17", + "packaging>=20.0", + "pyyaml>=5.1", + "regex!=2019.12.17", + "requests", + "safetensors>=0.4.3", + "tokenizers<=0.23.0,>=0.22.0", + "tqdm>=4.27", +] +files = [ + {file = "transformers-4.57.0-py3-none-any.whl", hash = "sha256:9d7c6d098c026e40d897e017ed1f481ab803cbac041021dbc6ae6100e4949b55"}, + {file = "transformers-4.57.0.tar.gz", hash = "sha256:d045753f3d93f9216e693cdb168698dfd2e9d3aad1bb72579a5d60ebf1545a8b"}, +] + +[[package]] +name = "triton" +version = "3.3.1" +summary = "A language and compiler for custom Deep Learning operations" +groups = ["default"] +marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +dependencies = [ + "setuptools>=40.8.0", +] +files = [ + {file = "triton-3.3.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b31e3aa26f8cb3cc5bf4e187bf737cbacf17311e1112b781d4a059353dfd731b"}, + {file = "triton-3.3.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9999e83aba21e1a78c1f36f21bce621b77bcaa530277a50484a7cb4a822f6e43"}, + {file = "triton-3.3.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b89d846b5a4198317fec27a5d3a609ea96b6d557ff44b56c23176546023c4240"}, + {file = "triton-3.3.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3198adb9d78b77818a5388bff89fa72ff36f9da0bc689db2f0a651a67ce6a42"}, +] + +[[package]] +name = "typer" +version = "0.19.2" +requires_python = ">=3.8" +summary = "Typer, build great CLIs. Easy to code. Based on Python type hints." +groups = ["default"] +dependencies = [ + "click>=8.0.0", + "rich>=10.11.0", + "shellingham>=1.3.0", + "typing-extensions>=3.7.4.3", +] +files = [ + {file = "typer-0.19.2-py3-none-any.whl", hash = "sha256:755e7e19670ffad8283db353267cb81ef252f595aa6834a0d1ca9312d9326cb9"}, + {file = "typer-0.19.2.tar.gz", hash = "sha256:9ad824308ded0ad06cc716434705f691d4ee0bfd0fb081839d2e426860e7fdca"}, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +requires_python = ">=3.9" +summary = "Backported and Experimental Type Hints for Python 3.9+" +groups = ["default"] +files = [ + {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, + {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +requires_python = ">=3.9" +summary = "Runtime typing introspection tools" +groups = ["default"] +dependencies = [ + "typing-extensions>=4.12.0", +] +files = [ + {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"}, + {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"}, +] + +[[package]] +name = "tzdata" +version = "2025.2" +requires_python = ">=2" +summary = "Provider of IANA time zone data" +groups = ["default"] +files = [ + {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, + {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, +] + +[[package]] +name = "ultralytics" +version = "8.3.205" +requires_python = ">=3.8" +summary = "Ultralytics YOLO 🚀 for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification." +groups = ["default"] +dependencies = [ + "matplotlib>=3.3.0", + "numpy>=1.23.0", + "opencv-python>=4.6.0", + "pillow>=7.1.2", + "polars", + "psutil", + "pyyaml>=5.3.1", + "requests>=2.23.0", + "scipy>=1.4.1", + "torch!=2.4.0,>=1.8.0; sys_platform == \"win32\"", + "torch>=1.8.0", + "torchvision>=0.9.0", + "ultralytics-thop>=2.0.0", +] +files = [ + {file = "ultralytics-8.3.205-py3-none-any.whl", hash = "sha256:5dee679a5467ecf33de3c64973016ac64d02db3cc7611ef27aa45dc1c2c7492c"}, + {file = "ultralytics-8.3.205.tar.gz", hash = "sha256:1adef151e6e68e50993b8c1cda690f52ebb6c9c7e629fefc02283e3eb6d8e863"}, +] + +[[package]] +name = "ultralytics-thop" +version = "2.0.17" +requires_python = ">=3.8" +summary = "Ultralytics THOP package for fast computation of PyTorch model FLOPs and parameters." +groups = ["default"] +dependencies = [ + "numpy", + "torch", +] +files = [ + {file = "ultralytics_thop-2.0.17-py3-none-any.whl", hash = "sha256:36ba7bd297b26cfd193531f4b8f42075ecf2059d9c0f04907521fee1db94e8c7"}, + {file = "ultralytics_thop-2.0.17.tar.gz", hash = "sha256:f4572aeb7236939f35c72f966e4e0c3d42fd433ae2974d816865d43e29dc981b"}, +] + +[[package]] +name = "urllib3" +version = "2.5.0" +requires_python = ">=3.9" +summary = "HTTP library with thread-safe connection pooling, file post, and more." +groups = ["default"] +files = [ + {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"}, + {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"}, +] + +[[package]] +name = "uvicorn" +version = "0.37.0" +requires_python = ">=3.9" +summary = "The lightning-fast ASGI server." +groups = ["default"] +dependencies = [ + "click>=7.0", + "h11>=0.8", + "typing-extensions>=4.0; python_version < \"3.11\"", +] +files = [ + {file = "uvicorn-0.37.0-py3-none-any.whl", hash = "sha256:913b2b88672343739927ce381ff9e2ad62541f9f8289664fa1d1d3803fa2ce6c"}, + {file = "uvicorn-0.37.0.tar.gz", hash = "sha256:4115c8add6d3fd536c8ee77f0e14a7fd2ebba939fed9b02583a97f80648f9e13"}, +] + +[[package]] +name = "websocket-client" +version = "1.8.0" +requires_python = ">=3.8" +summary = "WebSocket client for Python with low level API options" +groups = ["default"] +files = [ + {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"}, + {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"}, +] + +[[package]] +name = "websockets" +version = "15.0.1" +requires_python = ">=3.9" +summary = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +groups = ["default"] +files = [ + {file = "websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431"}, + {file = "websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57"}, + {file = "websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905"}, + {file = "websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562"}, + {file = "websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792"}, + {file = "websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413"}, + {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8"}, + {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3"}, + {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf"}, + {file = "websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85"}, + {file = "websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065"}, + {file = "websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3"}, + {file = "websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665"}, + {file = "websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2"}, + {file = "websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215"}, + {file = "websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5"}, + {file = "websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65"}, + {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe"}, + {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4"}, + {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597"}, + {file = "websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9"}, + {file = "websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7"}, + {file = "websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931"}, + {file = "websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675"}, + {file = "websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151"}, + {file = "websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22"}, + {file = "websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f"}, + {file = "websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8"}, + {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375"}, + {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d"}, + {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4"}, + {file = "websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa"}, + {file = "websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561"}, + {file = "websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f"}, + {file = "websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee"}, +] + +[[package]] +name = "xxhash" +version = "3.6.0" +requires_python = ">=3.7" +summary = "Python binding for xxHash" +groups = ["default"] +marker = "sys_platform == \"darwin\"" +files = [ + {file = "xxhash-3.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b47bbd8cf2d72797f3c2772eaaac0ded3d3af26481a26d7d7d41dc2d3c46b04a"}, + {file = "xxhash-3.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2b6821e94346f96db75abaa6e255706fb06ebd530899ed76d32cd99f20dc52fa"}, + {file = "xxhash-3.6.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d0a9751f71a1a65ce3584e9cae4467651c7e70c9d31017fa57574583a4540248"}, + {file = "xxhash-3.6.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b29ee68625ab37b04c0b40c3fafdf24d2f75ccd778333cfb698f65f6c463f62"}, + {file = "xxhash-3.6.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6812c25fe0d6c36a46ccb002f40f27ac903bf18af9f6dd8f9669cb4d176ab18f"}, + {file = "xxhash-3.6.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4ccbff013972390b51a18ef1255ef5ac125c92dc9143b2d1909f59abc765540e"}, + {file = "xxhash-3.6.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:297b7fbf86c82c550e12e8fb71968b3f033d27b874276ba3624ea868c11165a8"}, + {file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dea26ae1eb293db089798d3973a5fc928a18fdd97cc8801226fae705b02b14b0"}, + {file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7a0b169aafb98f4284f73635a8e93f0735f9cbde17bd5ec332480484241aaa77"}, + {file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:08d45aef063a4531b785cd72de4887766d01dc8f362a515693df349fdb825e0c"}, + {file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:929142361a48ee07f09121fe9e96a84950e8d4df3bb298ca5d88061969f34d7b"}, + {file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:51312c768403d8540487dbbfb557454cfc55589bbde6424456951f7fcd4facb3"}, + {file = "xxhash-3.6.0-cp311-cp311-win32.whl", hash = "sha256:d1927a69feddc24c987b337ce81ac15c4720955b667fe9b588e02254b80446fd"}, + {file = "xxhash-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:26734cdc2d4ffe449b41d186bbeac416f704a482ed835d375a5c0cb02bc63fef"}, + {file = "xxhash-3.6.0-cp311-cp311-win_arm64.whl", hash = "sha256:d72f67ef8bf36e05f5b6c65e8524f265bd61071471cd4cf1d36743ebeeeb06b7"}, + {file = "xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c"}, + {file = "xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204"}, + {file = "xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490"}, + {file = "xxhash-3.6.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2"}, + {file = "xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa"}, + {file = "xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0"}, + {file = "xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2"}, + {file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9"}, + {file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6fb5f5476bef678f69db04f2bd1efbed3030d2aba305b0fc1773645f187d6a4e"}, + {file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374"}, + {file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d"}, + {file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae"}, + {file = "xxhash-3.6.0-cp312-cp312-win32.whl", hash = "sha256:50fc255f39428a27299c20e280d6193d8b63b8ef8028995323bf834a026b4fbb"}, + {file = "xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c"}, + {file = "xxhash-3.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:eae5c13f3bc455a3bbb68bdc513912dc7356de7e2280363ea235f71f54064829"}, + {file = "xxhash-3.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:599e64ba7f67472481ceb6ee80fa3bd828fd61ba59fb11475572cc5ee52b89ec"}, + {file = "xxhash-3.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d8b8aaa30fca4f16f0c84a5c8d7ddee0e25250ec2796c973775373257dde8f1"}, + {file = "xxhash-3.6.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d597acf8506d6e7101a4a44a5e428977a51c0fadbbfd3c39650cca9253f6e5a6"}, + {file = "xxhash-3.6.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:858dc935963a33bc33490128edc1c12b0c14d9c7ebaa4e387a7869ecc4f3e263"}, + {file = "xxhash-3.6.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546"}, + {file = "xxhash-3.6.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89"}, + {file = "xxhash-3.6.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d"}, + {file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f22927652cba98c44639ffdc7aaf35828dccf679b10b31c4ad72a5b530a18eb7"}, + {file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b45fad44d9c5c119e9c6fbf2e1c656a46dc68e280275007bbfd3d572b21426db"}, + {file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42"}, + {file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11"}, + {file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd"}, + {file = "xxhash-3.6.0-cp313-cp313-win32.whl", hash = "sha256:2577b276e060b73b73a53042ea5bd5203d3e6347ce0d09f98500f418a9fcf799"}, + {file = "xxhash-3.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:757320d45d2fbcce8f30c42a6b2f47862967aea7bf458b9625b4bbe7ee390392"}, + {file = "xxhash-3.6.0-cp313-cp313-win_arm64.whl", hash = "sha256:457b8f85dec5825eed7b69c11ae86834a018b8e3df5e77783c999663da2f96d6"}, + {file = "xxhash-3.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a42e633d75cdad6d625434e3468126c73f13f7584545a9cf34e883aa1710e702"}, + {file = "xxhash-3.6.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:568a6d743219e717b07b4e03b0a828ce593833e498c3b64752e0f5df6bfe84db"}, + {file = "xxhash-3.6.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bec91b562d8012dae276af8025a55811b875baace6af510412a5e58e3121bc54"}, + {file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78e7f2f4c521c30ad5e786fdd6bae89d47a32672a80195467b5de0480aa97b1f"}, + {file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5"}, + {file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1"}, + {file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee"}, + {file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:794fe9145fe60191c6532fa95063765529770edcdd67b3d537793e8004cabbfd"}, + {file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6105ef7e62b5ac73a837778efc331a591d8442f8ef5c7e102376506cb4ae2729"}, + {file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292"}, + {file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf"}, + {file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033"}, + {file = "xxhash-3.6.0-cp313-cp313t-win32.whl", hash = "sha256:1244460adc3a9be84731d72b8e80625788e5815b68da3da8b83f78115a40a7ec"}, + {file = "xxhash-3.6.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b1e420ef35c503869c4064f4a2f2b08ad6431ab7b229a05cce39d74268bca6b8"}, + {file = "xxhash-3.6.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ec44b73a4220623235f67a996c862049f375df3b1052d9899f40a6382c32d746"}, + {file = "xxhash-3.6.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a40a3d35b204b7cc7643cbcf8c9976d818cb47befcfac8bbefec8038ac363f3e"}, + {file = "xxhash-3.6.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a54844be970d3fc22630b32d515e79a90d0a3ddb2644d8d7402e3c4c8da61405"}, + {file = "xxhash-3.6.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:016e9190af8f0a4e3741343777710e3d5717427f175adfdc3e72508f59e2a7f3"}, + {file = "xxhash-3.6.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f6f72232f849eb9d0141e2ebe2677ece15adfd0fa599bc058aad83c714bb2c6"}, + {file = "xxhash-3.6.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:63275a8aba7865e44b1813d2177e0f5ea7eadad3dd063a21f7cf9afdc7054063"}, + {file = "xxhash-3.6.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cd01fa2aa00d8b017c97eb46b9a794fbdca53fc14f845f5a328c71254b0abb7"}, + {file = "xxhash-3.6.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0226aa89035b62b6a86d3c68df4d7c1f47a342b8683da2b60cedcddb46c4d95b"}, + {file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c6e193e9f56e4ca4923c61238cdaced324f0feac782544eb4c6d55ad5cc99ddd"}, + {file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9176dcaddf4ca963d4deb93866d739a343c01c969231dbe21680e13a5d1a5bf0"}, + {file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c1ce4009c97a752e682b897aa99aef84191077a9433eb237774689f14f8ec152"}, + {file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:8cb2f4f679b01513b7adbb9b1b2f0f9cdc31b70007eaf9d59d0878809f385b11"}, + {file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:653a91d7c2ab54a92c19ccf43508b6a555440b9be1bc8be553376778be7f20b5"}, + {file = "xxhash-3.6.0-cp314-cp314-win32.whl", hash = "sha256:a756fe893389483ee8c394d06b5ab765d96e68fbbfe6fde7aa17e11f5720559f"}, + {file = "xxhash-3.6.0-cp314-cp314-win_amd64.whl", hash = "sha256:39be8e4e142550ef69629c9cd71b88c90e9a5db703fecbcf265546d9536ca4ad"}, + {file = "xxhash-3.6.0-cp314-cp314-win_arm64.whl", hash = "sha256:25915e6000338999236f1eb68a02a32c3275ac338628a7eaa5a269c401995679"}, + {file = "xxhash-3.6.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c5294f596a9017ca5a3e3f8884c00b91ab2ad2933cf288f4923c3fd4346cf3d4"}, + {file = "xxhash-3.6.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1cf9dcc4ab9cff01dfbba78544297a3a01dafd60f3bde4e2bfd016cf7e4ddc67"}, + {file = "xxhash-3.6.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:01262da8798422d0685f7cef03b2bd3f4f46511b02830861df548d7def4402ad"}, + {file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51a73fb7cb3a3ead9f7a8b583ffd9b8038e277cdb8cb87cf890e88b3456afa0b"}, + {file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b9c6df83594f7df8f7f708ce5ebeacfc69f72c9fbaaababf6cf4758eaada0c9b"}, + {file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:627f0af069b0ea56f312fd5189001c24578868643203bca1abbc2c52d3a6f3ca"}, + {file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa912c62f842dfd013c5f21a642c9c10cd9f4c4e943e0af83618b4a404d9091a"}, + {file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b465afd7909db30168ab62afe40b2fcf79eedc0b89a6c0ab3123515dc0df8b99"}, + {file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a881851cf38b0a70e7c4d3ce81fc7afd86fbc2a024f4cfb2a97cf49ce04b75d3"}, + {file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9b3222c686a919a0f3253cfc12bb118b8b103506612253b5baeaac10d8027cf6"}, + {file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:c5aa639bc113e9286137cec8fadc20e9cd732b2cc385c0b7fa673b84fc1f2a93"}, + {file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5c1343d49ac102799905e115aee590183c3921d475356cb24b4de29a4bc56518"}, + {file = "xxhash-3.6.0-cp314-cp314t-win32.whl", hash = "sha256:5851f033c3030dd95c086b4a36a2683c2ff4a799b23af60977188b057e467119"}, + {file = "xxhash-3.6.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0444e7967dac37569052d2409b00a8860c2135cff05502df4da80267d384849f"}, + {file = "xxhash-3.6.0-cp314-cp314t-win_arm64.whl", hash = "sha256:bb79b1e63f6fd84ec778a4b1916dfe0a7c3fdb986c06addd5db3a0d413819d95"}, + {file = "xxhash-3.6.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0f7b7e2ec26c1666ad5fc9dbfa426a6a3367ceaf79db5dd76264659d509d73b0"}, + {file = "xxhash-3.6.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5dc1e14d14fa0f5789ec29a7062004b5933964bb9b02aae6622b8f530dc40296"}, + {file = "xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:881b47fc47e051b37d94d13e7455131054b56749b91b508b0907eb07900d1c13"}, + {file = "xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c6dc31591899f5e5666f04cc2e529e69b4072827085c1ef15294d91a004bc1bd"}, + {file = "xxhash-3.6.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:15e0dac10eb9309508bfc41f7f9deaa7755c69e35af835db9cb10751adebc35d"}, + {file = "xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6"}, +] + +[[package]] +name = "yarl" +version = "1.22.0" +requires_python = ">=3.9" +summary = "Yet another URL library" +groups = ["default"] +dependencies = [ + "idna>=2.0", + "multidict>=4.0", + "propcache>=0.2.1", +] +files = [ + {file = "yarl-1.22.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ab72135b1f2db3fed3997d7e7dc1b80573c67138023852b6efb336a5eae6511"}, + {file = "yarl-1.22.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:669930400e375570189492dc8d8341301578e8493aec04aebc20d4717f899dd6"}, + {file = "yarl-1.22.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:792a2af6d58177ef7c19cbf0097aba92ca1b9cb3ffdd9c7470e156c8f9b5e028"}, + {file = "yarl-1.22.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea66b1c11c9150f1372f69afb6b8116f2dd7286f38e14ea71a44eee9ec51b9d"}, + {file = "yarl-1.22.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3e2daa88dc91870215961e96a039ec73e4937da13cf77ce17f9cad0c18df3503"}, + {file = "yarl-1.22.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba440ae430c00eee41509353628600212112cd5018d5def7e9b05ea7ac34eb65"}, + {file = "yarl-1.22.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e6438cc8f23a9c1478633d216b16104a586b9761db62bfacb6425bac0a36679e"}, + {file = "yarl-1.22.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c52a6e78aef5cf47a98ef8e934755abf53953379b7d53e68b15ff4420e6683d"}, + {file = "yarl-1.22.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3b06bcadaac49c70f4c88af4ffcfbe3dc155aab3163e75777818092478bcbbe7"}, + {file = "yarl-1.22.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:6944b2dc72c4d7f7052683487e3677456050ff77fcf5e6204e98caf785ad1967"}, + {file = "yarl-1.22.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d5372ca1df0f91a86b047d1277c2aaf1edb32d78bbcefffc81b40ffd18f027ed"}, + {file = "yarl-1.22.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:51af598701f5299012b8416486b40fceef8c26fc87dc6d7d1f6fc30609ea0aa6"}, + {file = "yarl-1.22.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b266bd01fedeffeeac01a79ae181719ff848a5a13ce10075adbefc8f1daee70e"}, + {file = "yarl-1.22.0-cp311-cp311-win32.whl", hash = "sha256:a9b1ba5610a4e20f655258d5a1fdc7ebe3d837bb0e45b581398b99eb98b1f5ca"}, + {file = "yarl-1.22.0-cp311-cp311-win_amd64.whl", hash = "sha256:078278b9b0b11568937d9509b589ee83ef98ed6d561dfe2020e24a9fd08eaa2b"}, + {file = "yarl-1.22.0-cp311-cp311-win_arm64.whl", hash = "sha256:b6a6f620cfe13ccec221fa312139135166e47ae169f8253f72a0abc0dae94376"}, + {file = "yarl-1.22.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f"}, + {file = "yarl-1.22.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2"}, + {file = "yarl-1.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74"}, + {file = "yarl-1.22.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1651bf8e0398574646744c1885a41198eba53dc8a9312b954073f845c90a8df"}, + {file = "yarl-1.22.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b8a0588521a26bf92a57a1705b77b8b59044cdceccac7151bd8d229e66b8dedb"}, + {file = "yarl-1.22.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:42188e6a615c1a75bcaa6e150c3fe8f3e8680471a6b10150c5f7e83f47cc34d2"}, + {file = "yarl-1.22.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f6d2cb59377d99718913ad9a151030d6f83ef420a2b8f521d94609ecc106ee82"}, + {file = "yarl-1.22.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50678a3b71c751d58d7908edc96d332af328839eea883bb554a43f539101277a"}, + {file = "yarl-1.22.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e8fbaa7cec507aa24ea27a01456e8dd4b6fab829059b69844bd348f2d467124"}, + {file = "yarl-1.22.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:433885ab5431bc3d3d4f2f9bd15bfa1614c522b0f1405d62c4f926ccd69d04fa"}, + {file = "yarl-1.22.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b790b39c7e9a4192dc2e201a282109ed2985a1ddbd5ac08dc56d0e121400a8f7"}, + {file = "yarl-1.22.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:31f0b53913220599446872d757257be5898019c85e7971599065bc55065dc99d"}, + {file = "yarl-1.22.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a49370e8f711daec68d09b821a34e1167792ee2d24d405cbc2387be4f158b520"}, + {file = "yarl-1.22.0-cp312-cp312-win32.whl", hash = "sha256:70dfd4f241c04bd9239d53b17f11e6ab672b9f1420364af63e8531198e3f5fe8"}, + {file = "yarl-1.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:8884d8b332a5e9b88e23f60bb166890009429391864c685e17bd73a9eda9105c"}, + {file = "yarl-1.22.0-cp312-cp312-win_arm64.whl", hash = "sha256:ea70f61a47f3cc93bdf8b2f368ed359ef02a01ca6393916bc8ff877427181e74"}, + {file = "yarl-1.22.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8dee9c25c74997f6a750cd317b8ca63545169c098faee42c84aa5e506c819b53"}, + {file = "yarl-1.22.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01e73b85a5434f89fc4fe27dcda2aff08ddf35e4d47bbbea3bdcd25321af538a"}, + {file = "yarl-1.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22965c2af250d20c873cdbee8ff958fb809940aeb2e74ba5f20aaf6b7ac8c70c"}, + {file = "yarl-1.22.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4f15793aa49793ec8d1c708ab7f9eded1aa72edc5174cae703651555ed1b601"}, + {file = "yarl-1.22.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5542339dcf2747135c5c85f68680353d5cb9ffd741c0f2e8d832d054d41f35a"}, + {file = "yarl-1.22.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5c401e05ad47a75869c3ab3e35137f8468b846770587e70d71e11de797d113df"}, + {file = "yarl-1.22.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:243dda95d901c733f5b59214d28b0120893d91777cb8aa043e6ef059d3cddfe2"}, + {file = "yarl-1.22.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bec03d0d388060058f5d291a813f21c011041938a441c593374da6077fe21b1b"}, + {file = "yarl-1.22.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0748275abb8c1e1e09301ee3cf90c8a99678a4e92e4373705f2a2570d581273"}, + {file = "yarl-1.22.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:47fdb18187e2a4e18fda2c25c05d8251a9e4a521edaed757fef033e7d8498d9a"}, + {file = "yarl-1.22.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c7044802eec4524fde550afc28edda0dd5784c4c45f0be151a2d3ba017daca7d"}, + {file = "yarl-1.22.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:139718f35149ff544caba20fce6e8a2f71f1e39b92c700d8438a0b1d2a631a02"}, + {file = "yarl-1.22.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e1b51bebd221006d3d2f95fbe124b22b247136647ae5dcc8c7acafba66e5ee67"}, + {file = "yarl-1.22.0-cp313-cp313-win32.whl", hash = "sha256:d3e32536234a95f513bd374e93d717cf6b2231a791758de6c509e3653f234c95"}, + {file = "yarl-1.22.0-cp313-cp313-win_amd64.whl", hash = "sha256:47743b82b76d89a1d20b83e60d5c20314cbd5ba2befc9cda8f28300c4a08ed4d"}, + {file = "yarl-1.22.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d0fcda9608875f7d052eff120c7a5da474a6796fe4d83e152e0e4d42f6d1a9b"}, + {file = "yarl-1.22.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:719ae08b6972befcba4310e49edb1161a88cdd331e3a694b84466bd938a6ab10"}, + {file = "yarl-1.22.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:47d8a5c446df1c4db9d21b49619ffdba90e77c89ec6e283f453856c74b50b9e3"}, + {file = "yarl-1.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cfebc0ac8333520d2d0423cbbe43ae43c8838862ddb898f5ca68565e395516e9"}, + {file = "yarl-1.22.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4398557cbf484207df000309235979c79c4356518fd5c99158c7d38203c4da4f"}, + {file = "yarl-1.22.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2ca6fd72a8cd803be290d42f2dec5cdcd5299eeb93c2d929bf060ad9efaf5de0"}, + {file = "yarl-1.22.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca1f59c4e1ab6e72f0a23c13fca5430f889634166be85dbf1013683e49e3278e"}, + {file = "yarl-1.22.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c5010a52015e7c70f86eb967db0f37f3c8bd503a695a49f8d45700144667708"}, + {file = "yarl-1.22.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d7672ecf7557476642c88497c2f8d8542f8e36596e928e9bcba0e42e1e7d71f"}, + {file = "yarl-1.22.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b7c88eeef021579d600e50363e0b6ee4f7f6f728cd3486b9d0f3ee7b946398d"}, + {file = "yarl-1.22.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f4afb5c34f2c6fecdcc182dfcfc6af6cccf1aa923eed4d6a12e9d96904e1a0d8"}, + {file = "yarl-1.22.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:59c189e3e99a59cf8d83cbb31d4db02d66cda5a1a4374e8a012b51255341abf5"}, + {file = "yarl-1.22.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5a3bf7f62a289fa90f1990422dc8dff5a458469ea71d1624585ec3a4c8d6960f"}, + {file = "yarl-1.22.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:de6b9a04c606978fdfe72666fa216ffcf2d1a9f6a381058d4378f8d7b1e5de62"}, + {file = "yarl-1.22.0-cp313-cp313t-win32.whl", hash = "sha256:1834bb90991cc2999f10f97f5f01317f99b143284766d197e43cd5b45eb18d03"}, + {file = "yarl-1.22.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff86011bd159a9d2dfc89c34cfd8aff12875980e3bd6a39ff097887520e60249"}, + {file = "yarl-1.22.0-cp313-cp313t-win_arm64.whl", hash = "sha256:7861058d0582b847bc4e3a4a4c46828a410bca738673f35a29ba3ca5db0b473b"}, + {file = "yarl-1.22.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:34b36c2c57124530884d89d50ed2c1478697ad7473efd59cfd479945c95650e4"}, + {file = "yarl-1.22.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:0dd9a702591ca2e543631c2a017e4a547e38a5c0f29eece37d9097e04a7ac683"}, + {file = "yarl-1.22.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:594fcab1032e2d2cc3321bb2e51271e7cd2b516c7d9aee780ece81b07ff8244b"}, + {file = "yarl-1.22.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3d7a87a78d46a2e3d5b72587ac14b4c16952dd0887dbb051451eceac774411e"}, + {file = "yarl-1.22.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:852863707010316c973162e703bddabec35e8757e67fcb8ad58829de1ebc8590"}, + {file = "yarl-1.22.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:131a085a53bfe839a477c0845acf21efc77457ba2bcf5899618136d64f3303a2"}, + {file = "yarl-1.22.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:078a8aefd263f4d4f923a9677b942b445a2be970ca24548a8102689a3a8ab8da"}, + {file = "yarl-1.22.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bca03b91c323036913993ff5c738d0842fc9c60c4648e5c8d98331526df89784"}, + {file = "yarl-1.22.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:68986a61557d37bb90d3051a45b91fa3d5c516d177dfc6dd6f2f436a07ff2b6b"}, + {file = "yarl-1.22.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:4792b262d585ff0dff6bcb787f8492e40698443ec982a3568c2096433660c694"}, + {file = "yarl-1.22.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ebd4549b108d732dba1d4ace67614b9545b21ece30937a63a65dd34efa19732d"}, + {file = "yarl-1.22.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f87ac53513d22240c7d59203f25cc3beac1e574c6cd681bbfd321987b69f95fd"}, + {file = "yarl-1.22.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:22b029f2881599e2f1b06f8f1db2ee63bd309e2293ba2d566e008ba12778b8da"}, + {file = "yarl-1.22.0-cp314-cp314-win32.whl", hash = "sha256:6a635ea45ba4ea8238463b4f7d0e721bad669f80878b7bfd1f89266e2ae63da2"}, + {file = "yarl-1.22.0-cp314-cp314-win_amd64.whl", hash = "sha256:0d6e6885777af0f110b0e5d7e5dda8b704efed3894da26220b7f3d887b839a79"}, + {file = "yarl-1.22.0-cp314-cp314-win_arm64.whl", hash = "sha256:8218f4e98d3c10d683584cb40f0424f4b9fd6e95610232dd75e13743b070ee33"}, + {file = "yarl-1.22.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:45c2842ff0e0d1b35a6bf1cd6c690939dacb617a70827f715232b2e0494d55d1"}, + {file = "yarl-1.22.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d947071e6ebcf2e2bee8fce76e10faca8f7a14808ca36a910263acaacef08eca"}, + {file = "yarl-1.22.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:334b8721303e61b00019474cc103bdac3d7b1f65e91f0bfedeec2d56dfe74b53"}, + {file = "yarl-1.22.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e7ce67c34138a058fd092f67d07a72b8e31ff0c9236e751957465a24b28910c"}, + {file = "yarl-1.22.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d77e1b2c6d04711478cb1c4ab90db07f1609ccf06a287d5607fcd90dc9863acf"}, + {file = "yarl-1.22.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4647674b6150d2cae088fc07de2738a84b8bcedebef29802cf0b0a82ab6face"}, + {file = "yarl-1.22.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efb07073be061c8f79d03d04139a80ba33cbd390ca8f0297aae9cce6411e4c6b"}, + {file = "yarl-1.22.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e51ac5435758ba97ad69617e13233da53908beccc6cfcd6c34bbed8dcbede486"}, + {file = "yarl-1.22.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33e32a0dd0c8205efa8e83d04fc9f19313772b78522d1bdc7d9aed706bfd6138"}, + {file = "yarl-1.22.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:bf4a21e58b9cde0e401e683ebd00f6ed30a06d14e93f7c8fd059f8b6e8f87b6a"}, + {file = "yarl-1.22.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:e4b582bab49ac33c8deb97e058cd67c2c50dac0dd134874106d9c774fd272529"}, + {file = "yarl-1.22.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0b5bcc1a9c4839e7e30b7b30dd47fe5e7e44fb7054ec29b5bb8d526aa1041093"}, + {file = "yarl-1.22.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c"}, + {file = "yarl-1.22.0-cp314-cp314t-win32.whl", hash = "sha256:8009b3173bcd637be650922ac455946197d858b3630b6d8787aa9e5c4564533e"}, + {file = "yarl-1.22.0-cp314-cp314t-win_amd64.whl", hash = "sha256:9fb17ea16e972c63d25d4a97f016d235c78dd2344820eb35bc034bc32012ee27"}, + {file = "yarl-1.22.0-cp314-cp314t-win_arm64.whl", hash = "sha256:9f6d73c1436b934e3f01df1e1b21ff765cd1d28c77dfb9ace207f746d4610ee1"}, + {file = "yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff"}, + {file = "yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71"}, +] + +[[package]] +name = "yaspin" +version = "3.2.0" +requires_python = "<4.0,>=3.9" +summary = "Yet Another Terminal Spinner" +groups = ["default"] +dependencies = [ + "termcolor<4.0,>=3.1", +] +files = [ + {file = "yaspin-3.2.0-py3-none-any.whl", hash = "sha256:6a98053c75c0728271070bd6c99d0c83b6de76734bee34a294c2c2df00e9a06c"}, + {file = "yaspin-3.2.0.tar.gz", hash = "sha256:416fe8d6722d26e4d1a1f50498bb4f3bdd4c68b9cd54065d224a4b9d1228cce7"}, +] + +[[package]] +name = "zipp" +version = "3.23.0" +requires_python = ">=3.9" +summary = "Backport of pathlib-compatible object wrapper for zip files" +groups = ["default"] +files = [ + {file = "zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e"}, + {file = "zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166"}, +] diff --git a/libs/python/mcp-server/pyproject.toml b/libs/python/mcp-server/pyproject.toml index f80a1b6b..088ae9ec 100644 --- a/libs/python/mcp-server/pyproject.toml +++ b/libs/python/mcp-server/pyproject.toml @@ -6,8 +6,8 @@ build-backend = "pdm.backend" name = "cua-mcp-server" description = "MCP Server for Computer-Use Agent (CUA)" readme = "README.md" -requires-python = ">=3.11" -version = "0.1.0" +requires-python = ">=3.12" +version = "0.1.15" authors = [ {name = "TryCua", email = "gh@trycua.com"} ] @@ -27,14 +27,4 @@ distribution = true dev = [ "black>=23.9.1", "ruff>=0.0.292", -] - -[tool.black] -line-length = 100 -target-version = ["py311"] - -[tool.ruff] -line-length = 100 -target-version = "py311" -select = ["E", "F", "B", "I"] -fix = true +] \ No newline at end of file diff --git a/libs/python/mcp-server/scripts/start_mcp_server.sh b/libs/python/mcp-server/scripts/start_mcp_server.sh index 13257351..95343ac9 100755 --- a/libs/python/mcp-server/scripts/start_mcp_server.sh +++ b/libs/python/mcp-server/scripts/start_mcp_server.sh @@ -1,14 +1,43 @@ -#!/bin/bash +#!/usr/bin/env bash +set -Eeuo pipefail -set -e - -# Set the CUA repository path based on script location +# --- Resolve repo root from this script's location --- SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../.." &> /dev/null && pwd )" -PYTHON_PATH="${CUA_REPO_DIR}/.venv/bin/python" +CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../../.." &> /dev/null && pwd )" -# Set Python path to include all necessary libraries -export PYTHONPATH="${CUA_REPO_DIR}/libs/python/mcp-server:${CUA_REPO_DIR}/libs/python/agent:${CUA_REPO_DIR}/libs/python/computer:${CUA_REPO_DIR}/libs/python/core:${CUA_REPO_DIR}/libs/python/pylume" +# --- Choose a Python interpreter (prefer repo-root venv) --- +CANDIDATES=( + "$CUA_REPO_DIR/.venv/bin/python" + "$CUA_REPO_DIR/libs/.venv/bin/python" + "$(command -v python3 || true)" + "$(command -v python || true)" +) -# Run the MCP server directly as a module -$PYTHON_PATH -m mcp_server.server \ No newline at end of file +PYTHON_PATH="" +for p in "${CANDIDATES[@]}"; do + if [[ -n "$p" && -x "$p" ]]; then + PYTHON_PATH="$p" + break + fi +done + +if [[ -z "${PYTHON_PATH}" ]]; then + >&2 echo "[cua-mcp] ERROR: No suitable Python found. Tried:" + for p in "${CANDIDATES[@]}"; do >&2 echo " - $p"; done + >&2 echo "[cua-mcp] Tip: create venv: python3 -m venv $CUA_REPO_DIR/.venv && \"$CUA_REPO_DIR/.venv/bin/pip\" install -e \"$CUA_REPO_DIR/libs/python/mcp-server\"" + exit 127 +fi + +# --- Export PYTHONPATH so module imports work during dev --- +export PYTHONPATH="$CUA_REPO_DIR/libs/python/mcp-server:$CUA_REPO_DIR/libs/python/agent:$CUA_REPO_DIR/libs/python/computer:$CUA_REPO_DIR/libs/python/core:$CUA_REPO_DIR/libs/python/pylume" + +# --- Helpful startup log for Claude's mcp.log --- +>&2 echo "[cua-mcp] using python: $PYTHON_PATH" +>&2 echo "[cua-mcp] repo dir : $CUA_REPO_DIR" +>&2 echo "[cua-mcp] PYTHONPATH : $PYTHONPATH" +if [[ -n "${CUA_MODEL_NAME:-}" ]]; then + >&2 echo "[cua-mcp] CUA_MODEL_NAME=$CUA_MODEL_NAME" +fi + +# --- Run the MCP server module --- +exec "$PYTHON_PATH" -m mcp_server.server diff --git a/libs/python/pylume/.bumpversion.cfg b/libs/python/pylume/.bumpversion.cfg new file mode 100644 index 00000000..4a316b37 --- /dev/null +++ b/libs/python/pylume/.bumpversion.cfg @@ -0,0 +1,10 @@ +[bumpversion] +current_version = 0.2.1 +commit = True +tag = True +tag_name = pylume-v{new_version} +message = Bump pylume to v{new_version} + +[bumpversion:file:pylume/__init__.py] +search = __version__ = "{current_version}" +replace = __version__ = "{new_version}" diff --git a/libs/python/pylume/README.md b/libs/python/pylume/README.md index 4502c943..459d1ce5 100644 --- a/libs/python/pylume/README.md +++ b/libs/python/pylume/README.md @@ -8,14 +8,14 @@ - [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) - [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) - [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) - [![PyPI](https://img.shields.io/pypi/v/pylume?color=333333)](https://pypi.org/project/pylume/) +[![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) +[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) +[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +[![PyPI](https://img.shields.io/pypi/v/pylume?color=333333)](https://pypi.org/project/pylume/) + - **pylume** is a lightweight Python library based on [lume](https://github.com/trycua/lume) to create, run and manage macOS and Linux virtual machines (VMs) natively on Apple Silicon. ```bash @@ -28,7 +28,7 @@ Please refer to this [Notebook](./samples/nb.ipynb) for a quickstart. More detai ## Prebuilt Images -Pre-built images are available on [ghcr.io/trycua](https://github.com/orgs/trycua/packages). +Pre-built images are available on [ghcr.io/trycua](https://github.com/orgs/trycua/packages). These images come pre-configured with an SSH server and auto-login enabled. ## Contributing diff --git a/libs/python/pylume/__init__.py b/libs/python/pylume/__init__.py index 65cacee1..128ce121 100644 --- a/libs/python/pylume/__init__.py +++ b/libs/python/pylume/__init__.py @@ -2,8 +2,8 @@ PyLume Python SDK - A client library for managing macOS VMs with PyLume. """ -from pylume.pylume import * -from pylume.models import * from pylume.exceptions import * +from pylume.models import * +from pylume.pylume import * __version__ = "0.1.0" diff --git a/libs/python/pylume/pylume/__init__.py b/libs/python/pylume/pylume/__init__.py index 5b3818ef..adfb15d9 100644 --- a/libs/python/pylume/pylume/__init__.py +++ b/libs/python/pylume/pylume/__init__.py @@ -35,7 +35,7 @@ from .models import ( # Import main class last to avoid circular imports from .pylume import PyLume -__version__ = "0.2.2" +__version__ = "0.2.1" __all__ = [ "PyLume", diff --git a/libs/python/pylume/pylume/client.py b/libs/python/pylume/pylume/client.py index 607ddd0a..101d5ee8 100644 --- a/libs/python/pylume/pylume/client.py +++ b/libs/python/pylume/pylume/client.py @@ -1,18 +1,19 @@ -import json import asyncio -import subprocess -from typing import Optional, Any, Dict +import json import shlex +import subprocess +from typing import Any, Dict, Optional from .exceptions import ( - LumeError, - LumeServerError, - LumeConnectionError, - LumeTimeoutError, - LumeNotFoundError, LumeConfigError, + LumeConnectionError, + LumeError, + LumeNotFoundError, + LumeServerError, + LumeTimeoutError, ) + class LumeClient: def __init__(self, base_url: str, timeout: float = 60.0, debug: bool = False): self.base_url = base_url @@ -26,7 +27,13 @@ class LumeClient: if kwargs: print(json.dumps(kwargs, indent=2)) - async def _run_curl(self, method: str, path: str, data: Optional[Dict[str, Any]] = None, params: Optional[Dict[str, Any]] = None) -> Any: + async def _run_curl( + self, + method: str, + path: str, + data: Optional[Dict[str, Any]] = None, + params: Optional[Dict[str, Any]] = None, + ) -> Any: """Execute a curl command and return the response.""" url = f"{self.base_url}{path}" if params: @@ -34,30 +41,28 @@ class LumeClient: url = f"{url}?{param_str}" cmd = ["curl", "-X", method, "-s", "-w", "%{http_code}", "-m", str(self.timeout)] - + if data is not None: cmd.extend(["-H", "Content-Type: application/json", "-d", json.dumps(data)]) - + cmd.append(url) - + self._log_debug(f"Running curl command: {' '.join(map(shlex.quote, cmd))}") - + try: process = await asyncio.create_subprocess_exec( - *cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE + *cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = await process.communicate() - + if process.returncode != 0: raise LumeConnectionError(f"Curl command failed: {stderr.decode()}") - + # The last 3 characters are the status code response = stdout.decode() status_code = int(response[-3:]) response_body = response[:-3] # Remove status code from response - + if status_code >= 400: if status_code == 404: raise LumeNotFoundError(f"Resource not found: {path}") @@ -67,9 +72,9 @@ class LumeClient: raise LumeServerError(f"Server error: {response_body}") else: raise LumeError(f"Request failed with status {status_code}: {response_body}") - + return json.loads(response_body) if response_body.strip() else None - + except asyncio.TimeoutError: raise LumeTimeoutError(f"Request timed out after {self.timeout} seconds") @@ -77,7 +82,9 @@ class LumeClient: """Make a GET request.""" return await self._run_curl("GET", path, params=params) - async def post(self, path: str, data: Optional[Dict[str, Any]] = None, timeout: Optional[float] = None) -> Any: + async def post( + self, path: str, data: Optional[Dict[str, Any]] = None, timeout: Optional[float] = None + ) -> Any: """Make a POST request.""" old_timeout = self.timeout if timeout is not None: @@ -99,14 +106,14 @@ class LumeClient: """Print equivalent curl command for debugging.""" curl_cmd = f"""curl -X {method} \\ '{self.base_url}{path}'""" - + if data: curl_cmd += f" \\\n -H 'Content-Type: application/json' \\\n -d '{json.dumps(data)}'" - + print("\nEquivalent curl command:") print(curl_cmd) print() async def close(self) -> None: """Close the client resources.""" - pass # No shared resources to clean up \ No newline at end of file + pass # No shared resources to clean up diff --git a/libs/python/pylume/pylume/exceptions.py b/libs/python/pylume/pylume/exceptions.py index 420b5d42..191718b0 100644 --- a/libs/python/pylume/pylume/exceptions.py +++ b/libs/python/pylume/pylume/exceptions.py @@ -1,36 +1,54 @@ from typing import Optional + class LumeError(Exception): """Base exception for all PyLume errors.""" + pass + class LumeServerError(LumeError): """Raised when there's an error with the PyLume server.""" - def __init__(self, message: str, status_code: Optional[int] = None, response_text: Optional[str] = None): + + def __init__( + self, message: str, status_code: Optional[int] = None, response_text: Optional[str] = None + ): self.status_code = status_code self.response_text = response_text super().__init__(message) + class LumeConnectionError(LumeError): """Raised when there's an error connecting to the PyLume server.""" + pass + class LumeTimeoutError(LumeError): """Raised when a request to the PyLume server times out.""" + pass + class LumeNotFoundError(LumeError): """Raised when a requested resource is not found.""" + pass + class LumeConfigError(LumeError): """Raised when there's an error with the configuration.""" + pass + class LumeVMError(LumeError): """Raised when there's an error with a VM operation.""" + pass + class LumeImageError(LumeError): """Raised when there's an error with an image operation.""" - pass \ No newline at end of file + + pass diff --git a/libs/python/pylume/pylume/models.py b/libs/python/pylume/pylume/models.py index cd2ddb2b..021ea8aa 100644 --- a/libs/python/pylume/pylume/models.py +++ b/libs/python/pylume/pylume/models.py @@ -1,22 +1,26 @@ -from typing import Optional, List, Literal, Dict, Any import re -from pydantic import BaseModel, Field, computed_field, validator, ConfigDict, RootModel +from typing import Any, Dict, List, Literal, Optional + +from pydantic import BaseModel, ConfigDict, Field, RootModel, computed_field, validator + class DiskInfo(BaseModel): """Information about disk storage allocation. - + Attributes: total: Total disk space in bytes allocated: Currently allocated disk space in bytes """ + total: int allocated: int + class VMConfig(BaseModel): """Configuration for creating a new VM. - + Note: Memory and disk sizes should be specified with units (e.g., "4GB", "64GB") - + Attributes: name: Name of the virtual machine os: Operating system type, either "macOS" or "linux" @@ -26,6 +30,7 @@ class VMConfig(BaseModel): display: Display resolution in format "widthxheight" ipsw: IPSW path or 'latest' for macOS VMs, None for other OS types """ + name: str os: Literal["macOS", "linux"] = "macOS" cpu: int = Field(default=2, ge=1) @@ -37,46 +42,53 @@ class VMConfig(BaseModel): class Config: populate_by_alias = True + class SharedDirectory(BaseModel): """Configuration for a shared directory. - + Attributes: host_path: Path to the directory on the host system read_only: Whether the directory should be mounted as read-only """ + host_path: str = Field(..., alias="hostPath") # Allow host_path but serialize as hostPath read_only: bool = False - + class Config: populate_by_name = True # Allow both alias and original name - alias_generator = lambda s: ''.join(word.capitalize() if i else word for i, word in enumerate(s.split('_'))) + alias_generator = lambda s: "".join( + word.capitalize() if i else word for i, word in enumerate(s.split("_")) + ) + class VMRunOpts(BaseModel): """Configuration for running a VM. - + Args: no_display: Whether to not display the VNC client shared_directories: List of directories to share with the VM """ + no_display: bool = Field(default=False, alias="noDisplay") shared_directories: Optional[list[SharedDirectory]] = Field( - default=None, - alias="sharedDirectories" + default=None, alias="sharedDirectories" ) model_config = ConfigDict( populate_by_name=True, - alias_generator=lambda s: ''.join(word.capitalize() if i else word for i, word in enumerate(s.split('_'))) + alias_generator=lambda s: "".join( + word.capitalize() if i else word for i, word in enumerate(s.split("_")) + ), ) def model_dump(self, **kwargs): """Export model data with proper field name conversion. - + Converts shared directory fields to match API expectations when using aliases. - + Args: **kwargs: Keyword arguments passed to parent model_dump method - + Returns: dict: Model data with properly formatted field names """ @@ -84,19 +96,16 @@ class VMRunOpts(BaseModel): # Convert shared directory fields to match API expectations if self.shared_directories and "by_alias" in kwargs and kwargs["by_alias"]: data["sharedDirectories"] = [ - { - "hostPath": d.host_path, - "readOnly": d.read_only - } - for d in self.shared_directories + {"hostPath": d.host_path, "readOnly": d.read_only} for d in self.shared_directories ] # Remove the snake_case version if it exists data.pop("shared_directories", None) return data + class VMStatus(BaseModel): """Status information for a virtual machine. - + Attributes: name: Name of the virtual machine status: Current status of the VM @@ -107,6 +116,7 @@ class VMStatus(BaseModel): vnc_url: URL for VNC connection if available ip_address: IP address of the VM if available """ + name: str status: str os: Literal["macOS", "linux"] @@ -123,7 +133,7 @@ class VMStatus(BaseModel): @property def state(self) -> str: """Get the current state of the VM. - + Returns: str: Current VM status """ @@ -133,7 +143,7 @@ class VMStatus(BaseModel): @property def cpu(self) -> int: """Get the number of CPU cores. - + Returns: int: Number of CPU cores allocated to the VM """ @@ -143,7 +153,7 @@ class VMStatus(BaseModel): @property def memory(self) -> str: """Get memory allocation in human-readable format. - + Returns: str: Memory size formatted as "{size}GB" """ @@ -151,27 +161,31 @@ class VMStatus(BaseModel): gb = self.memory_size / (1024 * 1024 * 1024) return f"{int(gb)}GB" + class VMUpdateOpts(BaseModel): """Options for updating VM configuration. - + Attributes: cpu: Number of CPU cores to update to memory: Amount of memory to update to with units disk_size: Size of disk to update to with units """ + cpu: Optional[int] = None memory: Optional[str] = None disk_size: Optional[str] = None + class ImageRef(BaseModel): """Reference to a VM image. - + Attributes: image: Name of the image tag: Tag version of the image registry: Registry hostname where image is stored organization: Organization or namespace in the registry """ + image: str tag: str = "latest" registry: Optional[str] = "ghcr.io" @@ -179,47 +193,53 @@ class ImageRef(BaseModel): def model_dump(self, **kwargs): """Override model_dump to return just the image:tag format. - + Args: **kwargs: Keyword arguments (ignored) - + Returns: str: Image reference in "image:tag" format """ return f"{self.image}:{self.tag}" + class CloneSpec(BaseModel): """Specification for cloning a VM. - + Attributes: name: Name of the source VM to clone new_name: Name for the new cloned VM """ + name: str new_name: str = Field(alias="newName") class Config: populate_by_alias = True + class ImageInfo(BaseModel): """Model for individual image information. - + Attributes: imageId: Unique identifier for the image """ + imageId: str + class ImageList(RootModel): """Response model for the images endpoint. - + A list-like container for ImageInfo objects that provides iteration and indexing capabilities. """ + root: List[ImageInfo] def __iter__(self): """Iterate over the image list. - + Returns: Iterator over ImageInfo objects """ @@ -227,10 +247,10 @@ class ImageList(RootModel): def __getitem__(self, item): """Get an item from the image list by index. - + Args: item: Index or slice to retrieve - + Returns: ImageInfo or list of ImageInfo objects """ @@ -238,8 +258,8 @@ class ImageList(RootModel): def __len__(self): """Get the number of images in the list. - + Returns: int: Number of images in the list """ - return len(self.root) \ No newline at end of file + return len(self.root) diff --git a/libs/python/pylume/pylume/pylume.py b/libs/python/pylume/pylume/pylume.py index 2073b883..1bbe34b2 100644 --- a/libs/python/pylume/pylume/pylume.py +++ b/libs/python/pylume/pylume/pylume.py @@ -1,36 +1,36 @@ -import os -import sys -import json -import time import asyncio -import subprocess -from typing import Optional, List, Union, Callable, TypeVar, Any -from functools import wraps +import json +import os import re import signal +import subprocess +import sys +import time +from functools import wraps +from typing import Any, Callable, List, Optional, TypeVar, Union -from .server import LumeServer from .client import LumeClient -from .models import ( - VMConfig, - VMStatus, - VMRunOpts, - VMUpdateOpts, - ImageRef, - CloneSpec, - SharedDirectory, - ImageList, -) from .exceptions import ( - LumeError, - LumeServerError, - LumeConnectionError, - LumeTimeoutError, - LumeNotFoundError, LumeConfigError, - LumeVMError, + LumeConnectionError, + LumeError, LumeImageError, + LumeNotFoundError, + LumeServerError, + LumeTimeoutError, + LumeVMError, ) +from .models import ( + CloneSpec, + ImageList, + ImageRef, + SharedDirectory, + VMConfig, + VMRunOpts, + VMStatus, + VMUpdateOpts, +) +from .server import LumeServer # Type variable for the decorator T = TypeVar("T") diff --git a/libs/python/pylume/pylume/server.py b/libs/python/pylume/pylume/server.py index 01c48084..cab5f627 100644 --- a/libs/python/pylume/pylume/server.py +++ b/libs/python/pylume/pylume/server.py @@ -1,18 +1,19 @@ -import os -import time import asyncio -import subprocess -import tempfile -import logging -import socket -from typing import Optional -import sys -from .exceptions import LumeConnectionError -import signal import json -import shlex +import logging +import os import random +import shlex +import signal +import socket +import subprocess +import sys +import tempfile +import time from logging import getLogger +from typing import Optional + +from .exceptions import LumeConnectionError class LumeServer: diff --git a/libs/python/pylume/pyproject.toml b/libs/python/pylume/pyproject.toml index f21f2bb2..976fe6ff 100644 --- a/libs/python/pylume/pyproject.toml +++ b/libs/python/pylume/pyproject.toml @@ -20,7 +20,7 @@ keywords = ["apple-silicon", "macos", "virtualization", "vm"] license = { text = "MIT" } name = "pylume" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.12" [tool.pdm.version] path = "pylume/__init__.py" @@ -41,29 +41,6 @@ dev = [ "pytest>=7.0.0", ] -[tool.black] -line-length = 100 -target-version = ["py311"] - -[tool.ruff] -fix = true -line-length = 100 -select = ["B", "E", "F", "I"] -target-version = "py311" - -[tool.ruff.format] -docstring-code-format = true - -[tool.mypy] -check_untyped_defs = true -disallow_untyped_defs = true -ignore_missing_imports = true -python_version = "3.11" -show_error_codes = true -strict = true -warn_return_any = true -warn_unused_ignores = false - [tool.pytest.ini_options] asyncio_mode = "auto" python_files = "test_*.py" @@ -71,4 +48,4 @@ testpaths = ["tests"] [tool.pdm.build] includes = ["pylume/"] -source-includes = ["LICENSE", "README.md", "tests/"] +source-includes = ["LICENSE", "README.md", "tests/"] \ No newline at end of file diff --git a/libs/python/som/.bumpversion.cfg b/libs/python/som/.bumpversion.cfg new file mode 100644 index 00000000..232fe9f9 --- /dev/null +++ b/libs/python/som/.bumpversion.cfg @@ -0,0 +1,10 @@ +[bumpversion] +current_version = 0.1.3 +commit = True +tag = True +tag_name = som-v{new_version} +message = Bump cua-som to v{new_version} + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" diff --git a/libs/python/som/README.md b/libs/python/som/README.md index 02802bfc..ad7e906a 100644 --- a/libs/python/som/README.md +++ b/libs/python/som/README.md @@ -8,10 +8,11 @@ - [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) - [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) - [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) - [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/) +[![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#) +[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) +[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +[![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/) + @@ -33,7 +34,6 @@ - Uses Metal Performance Shaders (MPS) - Multi-scale detection enabled - ~0.4s average detection time - - **Supported**: Any Python 3.11+ environment - Falls back to CPU if no GPU available - Single-scale detection on CPU @@ -82,11 +82,13 @@ for elem in result.elements: ## Development ### Test Data + - Place test screenshots in `examples/test_data/` - Not tracked in git to keep repository size manageable - Default test image: `test_screen.png` (1920x1080) ### Running Tests + ```bash # Run benchmark with no OCR python examples/omniparser_examples.py examples/test_data/test_screen.png --runs 5 --ocr none diff --git a/libs/python/som/pyproject.toml b/libs/python/som/pyproject.toml index 10b29ff8..d5fac5c9 100644 --- a/libs/python/som/pyproject.toml +++ b/libs/python/som/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-som" -version = "0.1.0" +version = "0.1.3" description = "Computer Vision and OCR library for detecting and analyzing UI elements" authors = [ { name = "TryCua", email = "gh@trycua.com" } @@ -24,7 +24,7 @@ dependencies = [ "typing-extensions>=4.9.0", "pydantic>=2.6.3" ] -requires-python = ">=3.11" +requires-python = ">=3.12" readme = "README.md" license = {text = "AGPL-3.0-or-later"} keywords = ["computer-vision", "ocr", "ui-analysis", "icon-detection"] @@ -52,30 +52,7 @@ src-layout = false includes = ["som/"] source-includes = ["tests/", "README.md", "LICENSE"] -[tool.black] -line-length = 100 -target-version = ["py311"] - -[tool.ruff] -line-length = 100 -target-version = "py311" -select = ["E", "F", "B", "I"] -fix = true - -[tool.ruff.format] -docstring-code-format = true - -[tool.mypy] -strict = true -python_version = "3.11" -ignore_missing_imports = true -disallow_untyped_defs = true -check_untyped_defs = true -warn_return_any = true -show_error_codes = true -warn_unused_ignores = false - [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] -python_files = "test_*.py" +python_files = "test_*.py" \ No newline at end of file diff --git a/libs/python/som/som/__init__.py b/libs/python/som/som/__init__.py index 906da597..48b0c7e8 100644 --- a/libs/python/som/som/__init__.py +++ b/libs/python/som/som/__init__.py @@ -5,11 +5,11 @@ __version__ = "0.1.0" from .detect import OmniParser from .models import ( BoundingBox, - UIElement, IconElement, - TextElement, + ParseResult, ParserMetadata, - ParseResult + TextElement, + UIElement, ) __all__ = [ @@ -19,5 +19,5 @@ __all__ = [ "IconElement", "TextElement", "ParserMetadata", - "ParseResult" -] \ No newline at end of file + "ParseResult", +] diff --git a/libs/python/som/som/detect.py b/libs/python/som/som/detect.py index 79e64886..8b27d529 100644 --- a/libs/python/som/som/detect.py +++ b/libs/python/som/som/detect.py @@ -1,28 +1,35 @@ -from pathlib import Path -from typing import Union, List, Dict, Any, Tuple, Optional, cast +import argparse +import base64 +import io import logging -import torch -import torchvision.ops +import signal +import time +from contextlib import contextmanager +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union, cast + import cv2 import numpy as np -import time -import torchvision.transforms as T -from PIL import Image -import io -import base64 -import argparse -import signal -from contextlib import contextmanager - -from ultralytics import YOLO -from huggingface_hub import hf_hub_download import supervision as sv +import torch +import torchvision.ops +import torchvision.transforms as T +from huggingface_hub import hf_hub_download +from PIL import Image from supervision.detection.core import Detections +from ultralytics import YOLO from .detection import DetectionProcessor +from .models import ( + BoundingBox, + IconElement, + ParseResult, + ParserMetadata, + TextElement, + UIElement, +) from .ocr import OCRProcessor from .visualization import BoxAnnotator -from .models import BoundingBox, UIElement, IconElement, TextElement, ParserMetadata, ParseResult logger = logging.getLogger(__name__) @@ -50,8 +57,9 @@ def timeout(seconds: int): def process_text_box(box, image): """Process a single text box with OCR.""" try: + from typing import Any, List, Sequence, Tuple + import easyocr - from typing import List, Tuple, Any, Sequence x1 = int(min(point[0] for point in box)) y1 = int(min(point[1] for point in box)) @@ -100,6 +108,7 @@ def check_ocr_box(image_path: Union[str, Path]) -> Tuple[List[str], List[List[fl # Use EasyOCR import ssl + import easyocr # Create unverified SSL context for development @@ -231,7 +240,7 @@ class OmniParser: for i, det in enumerate(text_detections) ], ) - + if elements and text_elements: # Filter out non-OCR elements that have OCR elements with center points colliding with them filtered_elements = [] @@ -241,17 +250,21 @@ class OmniParser: # Calculate center point of the text element center_x = (text_elem.bbox.x1 + text_elem.bbox.x2) / 2 center_y = (text_elem.bbox.y1 + text_elem.bbox.y2) / 2 - + # Check if this center point is inside the non-OCR element - if (center_x >= elem.bbox.x1 and center_x <= elem.bbox.x2 and - center_y >= elem.bbox.y1 and center_y <= elem.bbox.y2): + if ( + center_x >= elem.bbox.x1 + and center_x <= elem.bbox.x2 + and center_y >= elem.bbox.y1 + and center_y <= elem.bbox.y2 + ): should_keep = False break - + if should_keep: filtered_elements.append(elem) elements = filtered_elements - + # Merge detections using NMS all_elements = elements + text_elements boxes = torch.tensor([elem.bbox.coordinates for elem in all_elements]) diff --git a/libs/python/som/som/detection.py b/libs/python/som/som/detection.py index 3b585d9e..bc7454fd 100644 --- a/libs/python/som/som/detection.py +++ b/libs/python/som/som/detection.py @@ -1,12 +1,13 @@ -from typing import List, Dict, Any, Tuple, Optional import logging +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import numpy as np import torch import torchvision -from PIL import Image -import numpy as np -from ultralytics import YOLO from huggingface_hub import hf_hub_download -from pathlib import Path +from PIL import Image +from ultralytics import YOLO logger = logging.getLogger(__name__) diff --git a/libs/python/som/som/models.py b/libs/python/som/som/models.py index aa116106..fc1e3503 100644 --- a/libs/python/som/som/models.py +++ b/libs/python/som/som/models.py @@ -1,4 +1,5 @@ -from typing import List, Tuple, Optional, Literal, Dict, Any, Union +from typing import Any, Dict, List, Literal, Optional, Tuple, Union + from pydantic import BaseModel, Field, validator diff --git a/libs/python/som/som/ocr.py b/libs/python/som/som/ocr.py index 32f15bd1..73f677b9 100644 --- a/libs/python/som/som/ocr.py +++ b/libs/python/som/som/ocr.py @@ -1,12 +1,13 @@ -from typing import List, Dict, Any, Tuple, Union import logging import signal from contextlib import contextmanager from pathlib import Path +from typing import Any, Dict, List, Tuple, Union + import easyocr -from PIL import Image import numpy as np import torch +from PIL import Image logger = logging.getLogger(__name__) @@ -18,9 +19,10 @@ class TimeoutException(Exception): @contextmanager def timeout(seconds: int): import threading - + # Check if we're in the main thread if threading.current_thread() is threading.main_thread(): + def timeout_handler(signum, frame): raise TimeoutException("OCR process timed out") @@ -34,7 +36,9 @@ def timeout(seconds: int): signal.signal(signal.SIGALRM, original_handler) else: # In a non-main thread, we can't use signal - logger.warning("Timeout function called from non-main thread; signal-based timeout disabled") + logger.warning( + "Timeout function called from non-main thread; signal-based timeout disabled" + ) try: yield finally: @@ -80,7 +84,7 @@ class OCRProcessor: # Use GPU if available use_gpu = self.device in ["cuda", "mps"] self.reader = easyocr.Reader(["en"], gpu=use_gpu) - + # Verify reader initialization if self.reader is None: raise ValueError("Failed to initialize EasyOCR reader") diff --git a/libs/python/som/som/util/utils.py b/libs/python/som/som/util/utils.py index 4303b91d..e635adcd 100644 --- a/libs/python/som/som/util/utils.py +++ b/libs/python/som/som/util/utils.py @@ -1,13 +1,14 @@ -import easyocr +import logging +import signal +import time +from contextlib import contextmanager +from typing import Any, List, Optional, Sequence, Tuple, Union, cast + import cv2 +import easyocr import matplotlib.pyplot as plt import numpy as np from PIL import Image -from typing import Union, List, Tuple, Any, Optional, cast, Sequence -import time -import signal -from contextlib import contextmanager -import logging logger = logging.getLogger(__name__) diff --git a/libs/python/som/som/visualization.py b/libs/python/som/som/visualization.py index 038af0f5..2668b231 100644 --- a/libs/python/som/som/visualization.py +++ b/libs/python/som/som/visualization.py @@ -1,10 +1,11 @@ -from typing import List, Dict, Any, Tuple -import numpy as np -from PIL import Image, ImageDraw, ImageFont -import supervision as sv -import platform -import os import logging +import os +import platform +from typing import Any, Dict, List, Tuple + +import numpy as np +import supervision as sv +from PIL import Image, ImageDraw, ImageFont logger = logging.getLogger(__name__) @@ -184,7 +185,7 @@ class BoxAnnotator: new_box = (x, y, x + box_width, y + box_height) label_width = new_box[2] - new_box[0] label_height = new_box[3] - new_box[1] - + for used_box in used_areas: if not ( new_box[2] < used_box[0] # new box is left of used box @@ -195,9 +196,11 @@ class BoxAnnotator: # Calculate dimensions of the used box used_box_width = used_box[2] - used_box[0] used_box_height = used_box[3] - used_box[1] - + # Only consider as collision if used box is NOT more than 5x bigger in both dimensions - if not (used_box_width > 5 * label_width and used_box_height > 5 * label_height): + if not ( + used_box_width > 5 * label_width and used_box_height > 5 * label_height + ): return True return False diff --git a/libs/typescript/agent/README.md b/libs/typescript/agent/README.md index 76db192e..27c152fb 100644 --- a/libs/typescript/agent/README.md +++ b/libs/typescript/agent/README.md @@ -17,28 +17,27 @@ yarn add @trycua/agent ### Basic Usage ```typescript -import AgentClient from "@trycua/agent"; +import AgentClient from '@trycua/agent'; // Connect to local HTTP server -const client = new AgentClient("https://localhost:8000"); +const client = new AgentClient('https://localhost:8000'); // Connect to a cloud container (port 8443 over HTTPS) -const cloud = new AgentClient( - "https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443", - { apiKey: process.env.NEXT_PUBLIC_CUA_API_KEY || "" } -); +const cloud = new AgentClient('https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443', { + apiKey: process.env.NEXT_PUBLIC_CUA_API_KEY || '', +}); // Connect to peer -const peerClient = new AgentClient("peer://my-agent-proxy"); +const peerClient = new AgentClient('peer://my-agent-proxy'); // Send a simple text request const response = await client.responses.create({ - model: "anthropic/claude-3-5-sonnet-20241022", - input: "Write a one-sentence bedtime story about a unicorn.", + model: 'anthropic/claude-3-5-sonnet-20241022', + input: 'Write a one-sentence bedtime story about a unicorn.', // Optional per-request env overrides env: { - OPENAI_API_KEY: "sk-..." - } + OPENAI_API_KEY: 'sk-...', + }, }); console.log(response.output); @@ -48,48 +47,48 @@ console.log(response.output); ```typescript const response = await client.responses.create({ - model: "anthropic/claude-3-5-sonnet-20241022", + model: 'anthropic/claude-3-5-sonnet-20241022', input: [ { - role: "user", + role: 'user', content: [ - { type: "input_text", text: "What is in this image?" }, - { - type: "input_image", - image_url: "https://example.com/image.jpg" - } - ] - } + { type: 'input_text', text: 'What is in this image?' }, + { + type: 'input_image', + image_url: 'https://example.com/image.jpg', + }, + ], + }, ], - env: { OPENROUTER_API_KEY: "sk-..." } + env: { OPENROUTER_API_KEY: 'sk-...' }, }); ``` ### Advanced Configuration ```typescript -const client = new AgentClient("https://localhost:8000", { +const client = new AgentClient('https://localhost:8000', { timeout: 60000, // 60 second timeout - retries: 5, // 5 retry attempts - apiKey: "cua_...", // sent as X-API-Key header when using HTTP/HTTPS + retries: 5, // 5 retry attempts + apiKey: 'cua_...', // sent as X-API-Key header when using HTTP/HTTPS }); const response = await client.responses.create({ - model: "anthropic/claude-3-5-sonnet-20241022", - input: "Hello, world!", + model: 'anthropic/claude-3-5-sonnet-20241022', + input: 'Hello, world!', agent_kwargs: { save_trajectory: true, - verbosity: 20 + verbosity: 20, }, computer_kwargs: { - os_type: "linux", - provider_type: "cloud" + os_type: 'linux', + provider_type: 'cloud', }, // Per-request env overrides env: { - ANTHROPIC_API_KEY: "sk-...", - OPENROUTER_API_KEY: "sk-..." - } + ANTHROPIC_API_KEY: 'sk-...', + OPENROUTER_API_KEY: 'sk-...', + }, }); ``` @@ -174,6 +173,7 @@ interface Usage { ``` The `output` array contains the conversation history including: + - User messages - Agent reasoning/thinking - Computer actions and their results @@ -189,16 +189,16 @@ Connect to a CUA agent proxy server: ```typescript // Local -const client = new AgentClient("https://my-agent-server.com:8000", { apiKey: "cua_..." }); +const client = new AgentClient('https://my-agent-server.com:8000', { apiKey: 'cua_...' }); // Cloud container (port 8443) -const cloud = new AgentClient( - "https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443", - { apiKey: "cua_..." } -); +const cloud = new AgentClient('https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443', { + apiKey: 'cua_...', +}); ``` Notes: + - The client sends the API key as `X-API-Key` for HTTP/HTTPS connections. - Cloud containers listen on `:8443` with HTTPS. @@ -207,7 +207,7 @@ Notes: Connect directly to another peer using WebRTC: ```typescript -const client = new AgentClient("peer://agent-proxy-peer-id"); +const client = new AgentClient('peer://agent-proxy-peer-id'); ``` The client uses PeerJS with default configuration for peer connections. diff --git a/libs/typescript/agent/examples/README.md b/libs/typescript/agent/examples/README.md index 98939466..d27eac59 100644 --- a/libs/typescript/agent/examples/README.md +++ b/libs/typescript/agent/examples/README.md @@ -9,6 +9,7 @@ This directory contains examples demonstrating how to use the `@trycua/agent` cl A simple HTML page that demonstrates using the CUA Agent Client in a browser environment. **Features:** + - Connect to HTTP/HTTPS or P2P (peer://) agent proxies - Send text messages to any supported model - View responses in real-time @@ -18,19 +19,21 @@ A simple HTML page that demonstrates using the CUA Agent Client in a browser env **Usage:** 1. **Build the library first:** + ```bash cd ../ pnpm build ``` 2. **Start a local web server** (required for ES modules): + ```bash # Option 1: Using Python python -m http.server 8080 - + # Option 2: Using Node.js (if you have http-server installed) npx http-server -p 8080 - + # Option 3: Using any other local server ``` @@ -44,10 +47,12 @@ A simple HTML page that demonstrates using the CUA Agent Client in a browser env - View the response in the output textarea **Supported URLs:** + - **HTTP/HTTPS**: `https://localhost:8000`, `http://my-agent-server.com:8080` - **Peer-to-Peer**: `peer://computer-agent-proxy`, `peer://any-peer-id` **Example Models:** + - `anthropic/claude-3-5-sonnet-20241022` - `openai/gpt-4` - `huggingface-local/microsoft/UI-TARS-7B` diff --git a/libs/typescript/agent/package.json b/libs/typescript/agent/package.json index f77206df..0ef50e37 100644 --- a/libs/typescript/agent/package.json +++ b/libs/typescript/agent/package.json @@ -28,8 +28,8 @@ "access": "public" }, "scripts": { - "lint": "biome lint .", - "lint:fix": "biome lint --fix .", + "lint": "prettier --check .", + "lint:fix": "prettier --write .", "build": "tsdown", "dev": "tsdown --watch", "test": "vitest", @@ -43,7 +43,6 @@ "pino": "^9.7.0" }, "devDependencies": { - "@biomejs/biome": "^1.9.4", "@types/node": "^22.15.17", "bumpp": "^10.1.0", "happy-dom": "^17.4.7", diff --git a/libs/typescript/agent/src/client.ts b/libs/typescript/agent/src/client.ts index d25e698b..2ae4da66 100644 --- a/libs/typescript/agent/src/client.ts +++ b/libs/typescript/agent/src/client.ts @@ -1,10 +1,5 @@ -import {Peer} from "peerjs"; -import type { - AgentRequest, - AgentResponse, - ConnectionType, - AgentClientOptions, -} from "./types"; +import { Peer } from 'peerjs'; +import type { AgentRequest, AgentResponse, ConnectionType, AgentClientOptions } from './types'; export class AgentClient { private url: string; @@ -22,14 +17,12 @@ export class AgentClient { }; // Determine connection type from URL - if (url.startsWith("http://") || url.startsWith("https://")) { - this.connectionType = url.startsWith("https://") ? "https" : "http"; - } else if (url.startsWith("peer://")) { - this.connectionType = "peer"; + if (url.startsWith('http://') || url.startsWith('https://')) { + this.connectionType = url.startsWith('https://') ? 'https' : 'http'; + } else if (url.startsWith('peer://')) { + this.connectionType = 'peer'; } else { - throw new Error( - "Invalid URL format. Must start with http://, https://, or peer://" - ); + throw new Error('Invalid URL format. Must start with http://, https://, or peer://'); } } @@ -42,10 +35,10 @@ export class AgentClient { private async sendRequest(request: AgentRequest): Promise { switch (this.connectionType) { - case "http": - case "https": + case 'http': + case 'https': return this.sendHttpRequest(request); - case "peer": + case 'peer': return this.sendPeerRequest(request); default: throw new Error(`Unsupported connection type: ${this.connectionType}`); @@ -54,21 +47,18 @@ export class AgentClient { private async sendHttpRequest(request: AgentRequest): Promise { const controller = new AbortController(); - const timeoutId = setTimeout( - () => controller.abort(), - this.options.timeout - ); + const timeoutId = setTimeout(() => controller.abort(), this.options.timeout); try { const headers: Record = { - "Content-Type": "application/json", + 'Content-Type': 'application/json', }; if (this.options.apiKey) { - headers["X-API-Key"] = this.options.apiKey; + headers['X-API-Key'] = this.options.apiKey; } const response = await fetch(`${this.url}/responses`, { - method: "POST", + method: 'POST', headers, body: JSON.stringify(request), signal: controller.signal, @@ -93,7 +83,7 @@ export class AgentClient { private async sendPeerRequest(request: AgentRequest): Promise { // Extract peer ID from peer:// URL - const peerId = this.url.replace("peer://", ""); + const peerId = this.url.replace('peer://', ''); if (!this.peer) { // Initialize peer connection with default options as requested @@ -101,36 +91,35 @@ export class AgentClient { return new Promise((resolve, reject) => { const timeout = setTimeout(() => { - reject(new Error("Peer connection timeout")); + reject(new Error('Peer connection timeout')); }, this.options.timeout); - this.peer!.on("open", () => { + this.peer!.on('open', () => { // Connect to the target peer this.connection = this.peer!.connect(peerId); - this.connection.on("open", () => { + this.connection.on('open', () => { // Send the request this.connection!.send(JSON.stringify(request)); }); - this.connection.on("data", (data: any) => { + this.connection.on('data', (data: any) => { clearTimeout(timeout); try { - const response = - typeof data === "string" ? JSON.parse(data) : data; + const response = typeof data === 'string' ? JSON.parse(data) : data; resolve(response as AgentResponse); } catch (error) { - reject(new Error("Failed to parse peer response")); + reject(new Error('Failed to parse peer response')); } }); - this.connection.on("error", (error: any) => { + this.connection.on('error', (error: any) => { clearTimeout(timeout); reject(new Error(`Peer connection error: ${error}`)); }); }); - this.peer!.on("error", (error: any) => { + this.peer!.on('error', (error: any) => { clearTimeout(timeout); reject(new Error(`Peer error: ${error}`)); }); @@ -139,7 +128,7 @@ export class AgentClient { // Reuse existing connection return new Promise((resolve, reject) => { const timeout = setTimeout(() => { - reject(new Error("Peer request timeout")); + reject(new Error('Peer request timeout')); }, this.options.timeout); if (this.connection && this.connection.open) { @@ -147,20 +136,19 @@ export class AgentClient { const handleData = (data: any) => { clearTimeout(timeout); - this.connection!.off("data", handleData); + this.connection!.off('data', handleData); try { - const response = - typeof data === "string" ? JSON.parse(data) : data; + const response = typeof data === 'string' ? JSON.parse(data) : data; resolve(response as AgentResponse); } catch (error) { - reject(new Error("Failed to parse peer response")); + reject(new Error('Failed to parse peer response')); } }; - this.connection.on("data", handleData); + this.connection.on('data', handleData); } else { clearTimeout(timeout); - reject(new Error("Peer connection not available")); + reject(new Error('Peer connection not available')); } }); } @@ -168,18 +156,18 @@ export class AgentClient { // Health check method async health(): Promise<{ status: string }> { - if (this.connectionType === "peer") { - return { status: this.peer?.open ? "connected" : "disconnected" }; + if (this.connectionType === 'peer') { + return { status: this.peer?.open ? 'connected' : 'disconnected' }; } try { const response = await fetch(`${this.url}/health`); if (response.ok) { - return { status: "healthy" }; + return { status: 'healthy' }; } - return { status: "unhealthy" }; + return { status: 'unhealthy' }; } catch { - return { status: "unreachable" }; + return { status: 'unreachable' }; } } diff --git a/libs/typescript/agent/src/types.ts b/libs/typescript/agent/src/types.ts index 30e7340a..eec5b1f8 100644 --- a/libs/typescript/agent/src/types.ts +++ b/libs/typescript/agent/src/types.ts @@ -28,7 +28,6 @@ export interface AgentRequest { } // #endregion - // #region Response // Response types export interface AgentResponse { @@ -46,11 +45,9 @@ export interface Usage { } // #endregion - - // #region Messages // Agent message types - can be one of several different message types -export type AgentMessage = +export type AgentMessage = | UserMessage | AssistantMessage | ReasoningMessage @@ -104,8 +101,6 @@ export interface FunctionCallOutputMessage { } // #endregion - - // #region Message Content export interface InputContent { type: 'input_image' | 'input_text'; @@ -126,14 +121,10 @@ export interface ComputerResultContent { } // #endregion - - // #region Actions -export type ComputerAction = - | ComputerActionOpenAI - | ComputerActionAnthropic; +export type ComputerAction = ComputerActionOpenAI | ComputerActionAnthropic; // OpenAI Computer Actions -export type ComputerActionOpenAI = +export type ComputerActionOpenAI = | ClickAction | DoubleClickAction | DragAction @@ -187,9 +178,7 @@ export interface WaitAction { type: 'wait'; } // Anthropic Computer Actions -export type ComputerActionAnthropic = - | LeftMouseDownAction - | LeftMouseUpAction; +export type ComputerActionAnthropic = LeftMouseDownAction | LeftMouseUpAction; export interface LeftMouseDownAction { type: 'left_mouse_down'; x: number; @@ -200,4 +189,4 @@ export interface LeftMouseUpAction { x: number; y: number; } -// #endregion \ No newline at end of file +// #endregion diff --git a/libs/typescript/agent/tsconfig.json b/libs/typescript/agent/tsconfig.json index 8d56b691..cdcd74de 100644 --- a/libs/typescript/agent/tsconfig.json +++ b/libs/typescript/agent/tsconfig.json @@ -1,28 +1,22 @@ { - "compilerOptions": { - "target": "esnext", - "lib": [ - "es2023" - ], - "moduleDetection": "force", - "module": "preserve", - "moduleResolution": "bundler", - "allowImportingTsExtensions": true, - "resolveJsonModule": true, - "types": [ - "node" - ], - "allowSyntheticDefaultImports": true, - "strict": true, - "noUnusedLocals": true, - "declaration": true, - "emitDeclarationOnly": true, - "esModuleInterop": true, - "isolatedModules": true, - "verbatimModuleSyntax": true, - "skipLibCheck": true - }, - "include": [ - "src" - ] -} \ No newline at end of file + "compilerOptions": { + "target": "esnext", + "lib": ["es2023"], + "moduleDetection": "force", + "module": "preserve", + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "resolveJsonModule": true, + "types": ["node"], + "allowSyntheticDefaultImports": true, + "strict": true, + "noUnusedLocals": true, + "declaration": true, + "emitDeclarationOnly": true, + "esModuleInterop": true, + "isolatedModules": true, + "verbatimModuleSyntax": true, + "skipLibCheck": true + }, + "include": ["src"] +} diff --git a/libs/typescript/agent/tsdown.config.ts b/libs/typescript/agent/tsdown.config.ts index b837b6ee..efbd5ff2 100644 --- a/libs/typescript/agent/tsdown.config.ts +++ b/libs/typescript/agent/tsdown.config.ts @@ -1,12 +1,12 @@ -import { defineConfig } from "tsdown"; +import { defineConfig } from 'tsdown'; export default defineConfig({ - entry: ["src/index.ts"], - format: ["module"], - platform: "browser", + entry: ['src/index.ts'], + format: ['module'], + platform: 'browser', dts: true, clean: true, - // Remove if we don't need to support including the library via '