mirror of
https://github.com/trycua/computer.git
synced 2026-01-04 04:19:57 -06:00
Merge branch 'main' into feature/mcp-claude-extension
This commit is contained in:
82
.github/workflows/pypi-publish-pylume.yml
vendored
82
.github/workflows/pypi-publish-pylume.yml
vendored
@@ -1,82 +0,0 @@
|
||||
name: Publish Pylume Package
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "pylume-v*"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: "Version to publish (without v prefix)"
|
||||
required: true
|
||||
default: "0.1.0"
|
||||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
description: "Version to publish"
|
||||
required: true
|
||||
type: string
|
||||
outputs:
|
||||
version:
|
||||
description: "The version that was published"
|
||||
value: ${{ jobs.determine-version.outputs.version }}
|
||||
|
||||
# Adding permissions at workflow level
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
determine-version:
|
||||
runs-on: macos-latest
|
||||
outputs:
|
||||
version: ${{ steps.get-version.outputs.version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Determine version
|
||||
id: get-version
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "push" ]; then
|
||||
# Extract version from tag (for package-specific tags)
|
||||
if [[ "${{ github.ref }}" =~ ^refs/tags/pylume-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
|
||||
VERSION=${BASH_REMATCH[1]}
|
||||
else
|
||||
echo "Invalid tag format for pylume"
|
||||
exit 1
|
||||
fi
|
||||
elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
|
||||
# Use version from workflow dispatch
|
||||
VERSION=${{ github.event.inputs.version }}
|
||||
else
|
||||
# Use version from workflow_call
|
||||
VERSION=${{ inputs.version }}
|
||||
fi
|
||||
echo "VERSION=$VERSION"
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
validate-version:
|
||||
runs-on: macos-latest
|
||||
needs: determine-version
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Validate version
|
||||
id: validate-version
|
||||
run: |
|
||||
CODE_VERSION=$(grep '__version__' libs/python/pylume/pylume/__init__.py | cut -d'"' -f2)
|
||||
if [ "${{ needs.determine-version.outputs.version }}" != "$CODE_VERSION" ]; then
|
||||
echo "Version mismatch: expected $CODE_VERSION, got ${{ needs.determine-version.outputs.version }}"
|
||||
exit 1
|
||||
fi
|
||||
echo "Version validated: $CODE_VERSION"
|
||||
|
||||
publish:
|
||||
needs: determine-version
|
||||
uses: ./.github/workflows/pypi-reusable-publish.yml
|
||||
with:
|
||||
package_name: "pylume"
|
||||
package_dir: "libs/python/pylume"
|
||||
version: ${{ needs.determine-version.outputs.version }}
|
||||
is_lume_package: true
|
||||
base_package_name: "pylume"
|
||||
secrets:
|
||||
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
||||
6
.github/workflows/pypi-reusable-publish.yml
vendored
6
.github/workflows/pypi-reusable-publish.yml
vendored
@@ -4,11 +4,11 @@ on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
package_name:
|
||||
description: "Name of the package (e.g. pylume, computer, agent)"
|
||||
description: "Name of the package (e.g. computer, agent)"
|
||||
required: true
|
||||
type: string
|
||||
package_dir:
|
||||
description: "Directory containing the package relative to workspace root (e.g. libs/python/pylume)"
|
||||
description: "Directory containing the package relative to workspace root (e.g. libs/python/computer)"
|
||||
required: true
|
||||
type: string
|
||||
version:
|
||||
@@ -21,7 +21,7 @@ on:
|
||||
type: boolean
|
||||
default: false
|
||||
base_package_name:
|
||||
description: "PyPI package name (e.g. pylume, cua-agent)"
|
||||
description: "PyPI package name (e.g. cua-agent)"
|
||||
required: true
|
||||
type: string
|
||||
make_latest:
|
||||
|
||||
93
.github/workflows/python-tests.yml
vendored
Normal file
93
.github/workflows/python-tests.yml
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
name: Python Unit Tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- "libs/python/**"
|
||||
- ".github/workflows/python-tests.yml"
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "libs/python/**"
|
||||
- ".github/workflows/python-tests.yml"
|
||||
workflow_dispatch: # Allow manual trigger
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Test ${{ matrix.package }}
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false # Test all packages even if one fails
|
||||
matrix:
|
||||
package:
|
||||
- core
|
||||
- agent
|
||||
- computer
|
||||
- computer-server
|
||||
- mcp-server
|
||||
- pylume
|
||||
- som
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
- name: Install uv
|
||||
run: |
|
||||
pip install uv
|
||||
|
||||
- name: Install package and dependencies
|
||||
run: |
|
||||
cd libs/python/${{ matrix.package }}
|
||||
# Install the package in editable mode with dev dependencies
|
||||
if [ -f pyproject.toml ]; then
|
||||
uv pip install --system -e .
|
||||
# Install test dependencies
|
||||
uv pip install --system pytest pytest-asyncio pytest-mock pytest-cov
|
||||
fi
|
||||
shell: bash
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
cd libs/python/${{ matrix.package }}
|
||||
if [ -d tests ]; then
|
||||
python -m pytest tests/ -v --tb=short --cov --cov-report=term --cov-report=xml
|
||||
else
|
||||
echo "No tests directory found, skipping tests"
|
||||
fi
|
||||
shell: bash
|
||||
env:
|
||||
CUA_TELEMETRY_DISABLED: "1" # Disable telemetry during tests
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
if: always()
|
||||
with:
|
||||
file: ./libs/python/${{ matrix.package }}/coverage.xml
|
||||
flags: ${{ matrix.package }}
|
||||
name: codecov-${{ matrix.package }}
|
||||
fail_ci_if_error: false
|
||||
continue-on-error: true
|
||||
|
||||
summary:
|
||||
name: Test Summary
|
||||
runs-on: ubuntu-latest
|
||||
needs: test
|
||||
if: always()
|
||||
|
||||
steps:
|
||||
- name: Check test results
|
||||
run: |
|
||||
if [ "${{ needs.test.result }}" == "failure" ]; then
|
||||
echo "❌ Some tests failed. Please check the logs above."
|
||||
exit 1
|
||||
else
|
||||
echo "✅ All tests passed!"
|
||||
fi
|
||||
376
.github/workflows/test-cua-models.yml
vendored
Normal file
376
.github/workflows/test-cua-models.yml
vendored
Normal file
@@ -0,0 +1,376 @@
|
||||
name: Test CUA Supporting Models
|
||||
|
||||
# This workflow tests all supported CUA models with API keys
|
||||
# Run manually using workflow_dispatch with test_models=true
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
branches: [main, master]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
test_models:
|
||||
description: "Test all supported models (requires API keys)"
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
schedule:
|
||||
# Runs at 3 PM UTC (8 AM PDT) daily
|
||||
- cron: "0 15 * * *"
|
||||
|
||||
jobs:
|
||||
# Test all CUA models - runs on PRs, schedules, or when manually triggered
|
||||
test-all-models:
|
||||
if: ${{ github.event_name == 'pull_request_target' || github.event_name == 'schedule' || fromJSON(inputs.test_models || 'false') }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
model:
|
||||
# Claude Sonnet/Haiku
|
||||
- anthropic/claude-sonnet-4-5-20250929
|
||||
- anthropic/claude-haiku-4-5-20251001
|
||||
- anthropic/claude-opus-4-1-20250805
|
||||
|
||||
# OpenAI CU Preview
|
||||
- openai/computer-use-preview
|
||||
|
||||
# GLM-V
|
||||
- openrouter/z-ai/glm-4.5v
|
||||
# - huggingface-local/zai-org/GLM-4.5V # Requires local model setup
|
||||
|
||||
# Gemini CU Preview
|
||||
# - gemini-2.5-computer-use-preview-10-2025
|
||||
|
||||
# InternVL
|
||||
- huggingface-local/OpenGVLab/InternVL3_5-1B
|
||||
# - huggingface-local/OpenGVLab/InternVL3_5-2B
|
||||
# - huggingface-local/OpenGVLab/InternVL3_5-4B
|
||||
# - huggingface-local/OpenGVLab/InternVL3_5-8B
|
||||
|
||||
# UI-TARS (supports full computer-use, can run standalone)
|
||||
- huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
|
||||
|
||||
# Note: OpenCUA, GTA, and Holo are grounding-only models
|
||||
# They only support predict_click(), not agent.run()
|
||||
# See composed agents section below for testing them
|
||||
|
||||
# Moondream (typically used in composed agents)
|
||||
# Format: moondream3+{any-llm-with-tools}
|
||||
- moondream3+anthropic/claude-sonnet-4-5-20250929 # Claude has VLM + Tools
|
||||
# - moondream3+openai/gpt-4o # GPT-4o has VLM + Tools
|
||||
|
||||
# OmniParser (typically used in composed agents)
|
||||
# Format: omniparser+{any-vlm-with-tools}
|
||||
- omniparser+anthropic/claude-sonnet-4-5-20250929 # Claude has VLM + Tools
|
||||
# - omniparser+openai/gpt-4o # GPT-4o has VLM + Tools
|
||||
|
||||
# Other grounding models + VLM with tools
|
||||
# Format: {grounding-model}+{any-vlm-with-tools}
|
||||
# These grounding-only models (OpenCUA, GTA, Holo) must be used in composed form
|
||||
# since they only support predict_click(), not full agent.run()
|
||||
- huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-sonnet-4-5-20250929
|
||||
- huggingface-local/xlangai/OpenCUA-7B+anthropic/claude-sonnet-4-5-20250929
|
||||
- huggingface-local/Hcompany/Holo1.5-3B+anthropic/claude-sonnet-4-5-20250929
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up uv and Python
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
- name: Cache system packages
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: /var/cache/apt
|
||||
key: ${{ runner.os }}-apt-${{ hashFiles('**/Dockerfile') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-apt-
|
||||
|
||||
- name: Install system dependencies
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y libgl1-mesa-dri libglib2.0-0
|
||||
|
||||
- name: Cache Python dependencies (uv)
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/uv
|
||||
.venv
|
||||
key: ${{ runner.os }}-uv-${{ hashFiles('pyproject.toml', 'uv.lock', 'libs/python/**/pyproject.toml') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-uv-
|
||||
|
||||
- name: Install CUA dependencies (uv)
|
||||
run: |
|
||||
# Remove existing venv if it exists (from cache restore) to avoid interactive prompt
|
||||
rm -rf .venv
|
||||
uv venv --python 3.12
|
||||
uv pip install -e libs/python/agent -e libs/python/computer
|
||||
uv pip install -e libs/python/core
|
||||
uv pip install "cua-agent[uitars-hf,internvl-hf,opencua-hf,moondream3,omni]"
|
||||
uv pip install pytest
|
||||
|
||||
- name: Cache HuggingFace models
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/huggingface
|
||||
key: ${{ runner.os }}-hf-models-v1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-hf-models-
|
||||
# Large cache - models can be several GB each and are reused across runs
|
||||
|
||||
- name: Record test start time
|
||||
run: echo "TEST_START_TIME=$(date +%s)" >> $GITHUB_ENV
|
||||
env:
|
||||
# Ensure HuggingFace uses consistent cache location
|
||||
HF_HOME: ~/.cache/huggingface
|
||||
|
||||
- name: Test model with agent loop
|
||||
id: test_model
|
||||
timeout-minutes: 20
|
||||
continue-on-error: true
|
||||
run: |
|
||||
cd tests/agent_loop_testing
|
||||
uv run python agent_test.py --model "${{ matrix.model }}"
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
|
||||
- name: Calculate test duration and prepare message
|
||||
if: always()
|
||||
run: |
|
||||
TEST_END_TIME=$(date +%s)
|
||||
|
||||
# Handle case where TEST_START_TIME might not be set
|
||||
if [ -z "$TEST_START_TIME" ]; then
|
||||
TEST_START_TIME=$TEST_END_TIME
|
||||
fi
|
||||
|
||||
TEST_DURATION=$((TEST_END_TIME - TEST_START_TIME))
|
||||
|
||||
# Convert seconds to minutes and seconds
|
||||
MINUTES=$((TEST_DURATION / 60))
|
||||
SECONDS=$((TEST_DURATION % 60))
|
||||
|
||||
# Format duration
|
||||
if [ $MINUTES -gt 0 ]; then
|
||||
DURATION_STR="${MINUTES}m ${SECONDS}s"
|
||||
else
|
||||
DURATION_STR="${SECONDS}s"
|
||||
fi
|
||||
|
||||
# Determine status icon based on test step outcome
|
||||
if [ "${{ steps.test_model.outcome }}" == "success" ]; then
|
||||
STATUS_ICON="✅"
|
||||
STATUS_TEXT="PASSED"
|
||||
SLACK_COLOR="#36a64f"
|
||||
else
|
||||
STATUS_ICON="❌"
|
||||
STATUS_TEXT="FAILED"
|
||||
SLACK_COLOR="#dc3545"
|
||||
fi
|
||||
|
||||
# Prepare Slack message
|
||||
echo "TESTS_CONTENT<<EOF" >> $GITHUB_ENV
|
||||
echo "*CUA Model Test Results*" >> $GITHUB_ENV
|
||||
echo "" >> $GITHUB_ENV
|
||||
echo "*Model:* ${{ matrix.model }}" >> $GITHUB_ENV
|
||||
echo "*Status:* ${STATUS_ICON} ${STATUS_TEXT}" >> $GITHUB_ENV
|
||||
echo "*Duration:* ${DURATION_STR}" >> $GITHUB_ENV
|
||||
echo "*Run:* ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
# Set color based on outcome
|
||||
echo "SLACK_COLOR=${SLACK_COLOR}" >> $GITHUB_ENV
|
||||
|
||||
# Save result to JSON file for summary
|
||||
mkdir -p test_summary
|
||||
MODEL_NAME="${{ matrix.model }}"
|
||||
# Sanitize model name for filename
|
||||
SAFE_MODEL_NAME=$(echo "$MODEL_NAME" | sed 's/[^a-zA-Z0-9]/_/g')
|
||||
|
||||
# Determine pass status
|
||||
if [ "${{ steps.test_model.outcome }}" == "success" ]; then
|
||||
PASSED_VAL="true"
|
||||
else
|
||||
PASSED_VAL="false"
|
||||
fi
|
||||
|
||||
# Create JSON file using printf to avoid YAML parsing issues
|
||||
printf '{\n "model": "%s",\n "status": "%s",\n "status_icon": "%s",\n "duration": "%s",\n "duration_seconds": %d,\n "passed": %s\n}' \
|
||||
"${MODEL_NAME}" "${STATUS_TEXT}" "${STATUS_ICON}" "${DURATION_STR}" "${TEST_DURATION}" "${PASSED_VAL}" \
|
||||
> "test_summary/${SAFE_MODEL_NAME}.json"
|
||||
# Expose safe model name for subsequent steps (artifact naming)
|
||||
echo "SAFE_MODEL_NAME=${SAFE_MODEL_NAME}" >> $GITHUB_ENV
|
||||
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test-results-${{ matrix.model }}
|
||||
path: |
|
||||
tests/agent_loop_testing/test_images/
|
||||
*.log
|
||||
retention-days: 7
|
||||
|
||||
- name: Upload test summary data
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
# Unique, slash-free artifact name per matrix entry
|
||||
name: test-summary-${{ env.SAFE_MODEL_NAME }}
|
||||
path: test_summary/
|
||||
retention-days: 1
|
||||
|
||||
- name: Set default Slack color
|
||||
if: always() && env.SLACK_COLOR == ''
|
||||
run: echo "SLACK_COLOR=#36a64f" >> $GITHUB_ENV
|
||||
|
||||
# Individual model notifications disabled - only summary is sent
|
||||
# - name: Notify Slack with test results
|
||||
# if: always()
|
||||
# uses: rtCamp/action-slack-notify@v2
|
||||
# env:
|
||||
# SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
# SLACK_CHANNEL: ${{ vars.SLACK_CHANNEL }}
|
||||
# SLACK_TITLE: CUA Model Test Update
|
||||
# SLACK_COLOR: ${{ env.SLACK_COLOR }}
|
||||
# SLACK_MESSAGE: |
|
||||
# ${{ env.TESTS_CONTENT }}
|
||||
|
||||
# Summary job that aggregates all model test results
|
||||
test-summary:
|
||||
if: ${{ always() && (github.event_name == 'pull_request_target' || github.event_name == 'schedule' || fromJSON(inputs.test_models || 'false')) }}
|
||||
needs: test-all-models
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Install jq
|
||||
run: sudo apt-get update && sudo apt-get install -y jq
|
||||
|
||||
- name: Download all test summary artifacts
|
||||
continue-on-error: true
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: test-summary-*
|
||||
merge-multiple: true
|
||||
path: all_summaries
|
||||
|
||||
- name: Generate and send summary
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
# Create directory if it doesn't exist
|
||||
mkdir -p all_summaries
|
||||
|
||||
# Get list of models being tested in this run from the matrix
|
||||
# This helps filter out artifacts from previous runs when testing locally
|
||||
EXPECTED_MODELS="${{ join(matrix.model, ' ') }}"
|
||||
|
||||
# Aggregate all results
|
||||
PASSED_COUNT=0
|
||||
FAILED_COUNT=0
|
||||
TOTAL_DURATION=0
|
||||
SUMMARY_MESSAGE="*🚀 Model Summaries*\n\n"
|
||||
|
||||
# Process each JSON file (find all JSON files recursively)
|
||||
# Save to temp file first to avoid subshell issues
|
||||
find all_summaries -name "*.json" -type f 2>/dev/null > /tmp/json_files.txt || true
|
||||
|
||||
# Use associative array to deduplicate by model name
|
||||
declare -A processed_models
|
||||
|
||||
while IFS= read -r json_file; do
|
||||
if [ -f "$json_file" ]; then
|
||||
MODEL=$(jq -r '.model' "$json_file")
|
||||
|
||||
# Skip if we've already processed this model
|
||||
if [ "${processed_models[$MODEL]}" = "1" ]; then
|
||||
echo "Skipping duplicate model: $MODEL"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Filter: Only include models that are in the current matrix
|
||||
# This prevents including artifacts from previous workflow runs
|
||||
if [ -n "$EXPECTED_MODELS" ]; then
|
||||
if ! echo "$EXPECTED_MODELS" | grep -q "$MODEL"; then
|
||||
echo "Skipping model from previous run: $MODEL"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
# Mark as processed
|
||||
processed_models[$MODEL]="1"
|
||||
|
||||
STATUS_ICON=$(jq -r '.status_icon' "$json_file")
|
||||
STATUS=$(jq -r '.status' "$json_file")
|
||||
DURATION=$(jq -r '.duration' "$json_file")
|
||||
DURATION_SEC=$(jq -r '.duration_seconds' "$json_file")
|
||||
PASSED=$(jq -r '.passed' "$json_file")
|
||||
|
||||
# Add to summary as clean line format
|
||||
SUMMARY_MESSAGE="${SUMMARY_MESSAGE}${STATUS_ICON} ${STATUS} - \`${MODEL}\` - ${DURATION}\n"
|
||||
|
||||
if [ "$PASSED" = "true" ]; then
|
||||
PASSED_COUNT=$((PASSED_COUNT + 1))
|
||||
else
|
||||
FAILED_COUNT=$((FAILED_COUNT + 1))
|
||||
fi
|
||||
TOTAL_DURATION=$((TOTAL_DURATION + DURATION_SEC))
|
||||
fi
|
||||
done < /tmp/json_files.txt
|
||||
|
||||
# Check if we found any results
|
||||
TOTAL_COUNT=$((PASSED_COUNT + FAILED_COUNT))
|
||||
if [ $TOTAL_COUNT -eq 0 ]; then
|
||||
SUMMARY_MESSAGE="${SUMMARY_MESSAGE}⚠️ No test results found (workflow may have been canceled)\n"
|
||||
SLACK_COLOR="#ffa500"
|
||||
else
|
||||
# Add summary stats
|
||||
SUMMARY_MESSAGE="${SUMMARY_MESSAGE}\n*Results:* ${PASSED_COUNT} passed, ${FAILED_COUNT} failed out of ${TOTAL_COUNT} models\n"
|
||||
|
||||
# Calculate total duration
|
||||
TOTAL_MIN=$((TOTAL_DURATION / 60))
|
||||
TOTAL_SEC=$((TOTAL_DURATION % 60))
|
||||
if [ $TOTAL_MIN -gt 0 ]; then
|
||||
TOTAL_DURATION_STR="${TOTAL_MIN}m ${TOTAL_SEC}s"
|
||||
else
|
||||
TOTAL_DURATION_STR="${TOTAL_SEC}s"
|
||||
fi
|
||||
SUMMARY_MESSAGE="${SUMMARY_MESSAGE}*Total Duration:* ${TOTAL_DURATION_STR}\n"
|
||||
|
||||
# Determine color based on results
|
||||
if [ $FAILED_COUNT -eq 0 ]; then
|
||||
SLACK_COLOR="#36a64f"
|
||||
elif [ $PASSED_COUNT -eq 0 ]; then
|
||||
SLACK_COLOR="#dc3545"
|
||||
else
|
||||
SLACK_COLOR="#ffa500"
|
||||
fi
|
||||
fi
|
||||
|
||||
SUMMARY_MESSAGE="${SUMMARY_MESSAGE}*Run:* ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
|
||||
# Export for use in next step
|
||||
echo "SUMMARY_MESSAGE<<EOF" >> $GITHUB_ENV
|
||||
echo -e "${SUMMARY_MESSAGE}" >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
echo "SLACK_COLOR=${SLACK_COLOR}" >> $GITHUB_ENV
|
||||
|
||||
- name: Send summary to Slack
|
||||
if: always()
|
||||
uses: rtCamp/action-slack-notify@v2
|
||||
env:
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACK_CHANNEL: ${{ vars.SLACK_CHANNEL }}
|
||||
SLACK_TITLE: CUA Models Test Summary
|
||||
SLACK_COLOR: ${{ env.SLACK_COLOR }}
|
||||
SLACK_MESSAGE: |
|
||||
${{ env.SUMMARY_MESSAGE }}
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -202,4 +202,6 @@ storage/
|
||||
# Trashes
|
||||
.Trashes
|
||||
.Trash-1000/
|
||||
post-provision
|
||||
post-provision
|
||||
# Local secrets for act
|
||||
.secrets
|
||||
27
.vscode/launch.json
vendored
27
.vscode/launch.json
vendored
@@ -10,7 +10,7 @@
|
||||
"python": "${workspaceFolder:cua-root}/.venv/bin/python",
|
||||
"cwd": "${workspaceFolder:cua-root}",
|
||||
"env": {
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som:${workspaceFolder:cua-root}/libs/python/pylume"
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -23,7 +23,7 @@
|
||||
"python": "${workspaceFolder:cua-root}/.venv/bin/python",
|
||||
"cwd": "${workspaceFolder:cua-root}",
|
||||
"env": {
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som:${workspaceFolder:cua-root}/libs/python/pylume"
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -36,7 +36,7 @@
|
||||
"python": "${workspaceFolder:cua-root}/.venv/bin/python",
|
||||
"cwd": "${workspaceFolder:cua-root}",
|
||||
"env": {
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som:${workspaceFolder:cua-root}/libs/python/pylume"
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -49,20 +49,7 @@
|
||||
"python": "${workspaceFolder:cua-root}/.venv/bin/python",
|
||||
"cwd": "${workspaceFolder:cua-root}",
|
||||
"env": {
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som:${workspaceFolder:cua-root}/libs/python/pylume"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Run PyLume Examples",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "examples/pylume_examples.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true,
|
||||
"python": "${workspaceFolder:cua-root}/.venv/bin/python",
|
||||
"cwd": "${workspaceFolder:cua-root}",
|
||||
"env": {
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som:${workspaceFolder:cua-root}/libs/python/pylume"
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -84,7 +71,7 @@
|
||||
"python": "${workspaceFolder:cua-root}/.venv/bin/python",
|
||||
"cwd": "${workspaceFolder:cua-root}",
|
||||
"env": {
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som:${workspaceFolder:cua-root}/libs/python/pylume"
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -106,7 +93,7 @@
|
||||
"python": "${workspaceFolder:cua-root}/.venv/bin/python",
|
||||
"cwd": "${workspaceFolder:cua-root}",
|
||||
"env": {
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som:${workspaceFolder:cua-root}/libs/python/pylume"
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -119,7 +106,7 @@
|
||||
"python": "${workspaceFolder:cua-root}/.venv/bin/python",
|
||||
"cwd": "${workspaceFolder:cua-root}",
|
||||
"env": {
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som:${workspaceFolder:cua-root}/libs/python/pylume"
|
||||
"PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
|
||||
}
|
||||
},
|
||||
{
|
||||
|
||||
10
.vscode/py.code-workspace
vendored
10
.vscode/py.code-workspace
vendored
@@ -20,10 +20,6 @@
|
||||
"name": "computer-server",
|
||||
"path": "../libs/python/computer-server"
|
||||
},
|
||||
{
|
||||
"name": "pylume",
|
||||
"path": "../libs/python/pylume"
|
||||
},
|
||||
{
|
||||
"name": "core",
|
||||
"path": "../libs/python/core"
|
||||
@@ -51,7 +47,6 @@
|
||||
"${workspaceFolder:cua-root}/libs/python/computer",
|
||||
"${workspaceFolder:cua-root}/libs/python/agent",
|
||||
"${workspaceFolder:cua-root}/libs/python/som",
|
||||
"${workspaceFolder:cua-root}/libs/python/pylume",
|
||||
"${workspaceFolder:cua-root}/.vscode/typings"
|
||||
],
|
||||
"python.envFile": "${workspaceFolder:cua-root}/.env",
|
||||
@@ -89,10 +84,6 @@
|
||||
"name": "som",
|
||||
"depth": 2
|
||||
},
|
||||
{
|
||||
"name": "pylume",
|
||||
"depth": 2
|
||||
},
|
||||
{
|
||||
"name": "core",
|
||||
"depth": 2
|
||||
@@ -103,7 +94,6 @@
|
||||
"${workspaceFolder:cua-root}/libs/python/computer",
|
||||
"${workspaceFolder:cua-root}/libs/python/agent",
|
||||
"${workspaceFolder:cua-root}/libs/python/som",
|
||||
"${workspaceFolder:cua-root}/libs/python/pylume"
|
||||
],
|
||||
"python.languageServer": "None",
|
||||
"[python]": {
|
||||
|
||||
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"python-envs.pythonProjects": [],
|
||||
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
|
||||
"python.defaultInterpreterPath": "${workspaceFolder}/.venv",
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": "explicit",
|
||||
|
||||
@@ -5,7 +5,7 @@ ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
||||
PYTHONPATH="/app/libs/python/core:/app/libs/python/computer:/app/libs/python/agent:/app/libs/python/som:/app/libs/python/pylume:/app/libs/python/computer-server:/app/libs/python/mcp-server"
|
||||
PYTHONPATH="/app/libs/python/core:/app/libs/python/computer:/app/libs/python/agent:/app/libs/python/som:/app/libs/python/computer-server:/app/libs/python/mcp-server"
|
||||
|
||||
# Install system dependencies for ARM architecture
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
|
||||
46
README.md
46
README.md
@@ -22,14 +22,14 @@
|
||||
|
||||
With the [Computer SDK](#computer-sdk), you can:
|
||||
|
||||
- automate Windows, Linux, and macOS VMs with a consistent, [pyautogui-like API](https://docs.trycua.com/docs/libraries/computer#interface-actions)
|
||||
- create & manage VMs [locally](https://docs.trycua.com/docs/computer-sdk/computers#cua-local-containers) or using [Cua cloud](https://www.trycua.com/)
|
||||
- automate Windows, Linux, and macOS VMs with a consistent, [pyautogui-like API](https://cua.ai/docs/docs/libraries/computer#interface-actions)
|
||||
- create & manage VMs [locally](https://cua.ai/docs/docs/computer-sdk/computers#cua-local-containers) or using [Cua cloud](https://www.cua.ai/)
|
||||
|
||||
With the [Agent SDK](#agent-sdk), you can:
|
||||
|
||||
- run computer-use models with a [consistent schema](https://docs.trycua.com/docs/agent-sdk/message-format)
|
||||
- benchmark on OSWorld-Verified, SheetBench-V2, and more [with a single line of code using HUD](https://docs.trycua.com/docs/agent-sdk/integrations/hud) ([Notebook](https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb))
|
||||
- combine UI grounding models with any LLM using [composed agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents)
|
||||
- run computer-use models with a [consistent schema](https://cua.ai/docs/docs/agent-sdk/message-format)
|
||||
- benchmark on OSWorld-Verified, SheetBench-V2, and more [with a single line of code using HUD](https://cua.ai/docs/docs/agent-sdk/integrations/hud) ([Notebook](https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb))
|
||||
- combine UI grounding models with any LLM using [composed agents](https://cua.ai/docs/docs/agent-sdk/supported-agents/composed-agents)
|
||||
- use new UI agent models and UI grounding models from the Model Zoo below with just a model string (e.g., `ComputerAgent(model="openai/computer-use-preview")`)
|
||||
- use API or local inference by changing a prefix (e.g., `openai/`, `openrouter/`, `ollama/`, `huggingface-local/`, `mlx/`, [etc.](https://docs.litellm.ai/docs/providers))
|
||||
|
||||
@@ -96,8 +96,8 @@ Core utilities for Cua
|
||||
# Quick Start
|
||||
|
||||
- [Clone a starter template and run the code in <1 min](https://github.com/trycua/agent-template)
|
||||
- [Get started with the Cua SDKs](https://docs.trycua.com/docs/quickstart-devs)
|
||||
- [Get started with the Cua CLI](https://docs.trycua.com/docs/quickstart-cli)
|
||||
- [Get started with the Cua SDKs](https://cua.ai/docs/docs/quickstart-devs)
|
||||
- [Get started with the Cua CLI](https://cua.ai/docs/docs/quickstart-cli)
|
||||
|
||||
# Agent SDK
|
||||
|
||||
@@ -197,9 +197,9 @@ These are the valid model configurations for `ComputerAgent(model="...")`:
|
||||
| Configuration | Description |
|
||||
| ---------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `{computer-use-model}` | A single model to perform all computer-use tasks |
|
||||
| `{grounding-model}+{any-vlm-with-tools}` | [Composed](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) with VLM for captioning and grounding LLM for element detection |
|
||||
| `moondream3+{any-llm-with-tools}` | [Composed](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents) with Moondream3 for captioning and UI element detection |
|
||||
| `human/human` | A [human-in-the-loop](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop) in place of a model |
|
||||
| `{grounding-model}+{any-vlm-with-tools}` | [Composed](https://cua.ai/docs/docs/agent-sdk/supported-agents/composed-agents) with VLM for captioning and grounding LLM for element detection |
|
||||
| `moondream3+{any-llm-with-tools}` | [Composed](https://cua.ai/docs/docs/agent-sdk/supported-agents/composed-agents) with Moondream3 for captioning and UI element detection |
|
||||
| `human/human` | A [human-in-the-loop](https://cua.ai/docs/docs/agent-sdk/supported-agents/human-in-the-loop) in place of a model |
|
||||
|
||||
### Model Capabilities
|
||||
|
||||
@@ -207,17 +207,17 @@ The following table shows which capabilities are supported by each model:
|
||||
|
||||
| Model | Computer-Use | Grounding | Tools | VLM |
|
||||
| -------------------------------------------------------------------------------------------------------------------------------- | :----------: | :-------: | :---: | :-: |
|
||||
| [Claude Sonnet/Haiku](https://docs.claude.com/en/docs/agents-and-tools/tool-use/computer-use-tool#how-to-implement-computer-use) | ✓ | ✓ | ✓ | ✓ |
|
||||
| [OpenAI CU Preview](https://platform.openai.com/docs/models/computer-use-preview) | ✓ | ✓ | | ✓ |
|
||||
| [GLM-V](https://huggingface.co/THUDM/glm-4v-9b) | ✓ | ✓ | ✓ | ✓ |
|
||||
| [Gemini CU Preview](https://ai.google.dev/gemini-api/docs/computer-use) | ✓ | ✓ | | ✓ |
|
||||
| [InternVL](https://huggingface.co/OpenGVLab/InternVL3_5-1B) | ✓ | ✓ | ✓ | ✓ |
|
||||
| [UI-TARS](https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B) | ✓ | ✓ | ✓ | ✓ |
|
||||
| [OpenCUA](https://huggingface.co/xlangai/OpenCUA-7B) | | ✓ | | |
|
||||
| [GTA](https://huggingface.co/HelloKKMe/GTA1-7B) | | ✓ | | |
|
||||
| [Holo](https://huggingface.co/Hcompany/Holo1.5-3B) | | ✓ | | |
|
||||
| [Moondream](https://huggingface.co/moondream/moondream3-preview) | | ✓ | | |
|
||||
| [OmniParser](https://github.com/microsoft/OmniParser) | | ✓ | | |
|
||||
| [Claude Sonnet/Haiku](https://docs.claude.com/en/docs/agents-and-tools/tool-use/computer-use-tool#how-to-implement-computer-use) | 🖥️ | 🎯 | 🛠️ | 👁️ |
|
||||
| [OpenAI CU Preview](https://platform.openai.com/docs/models/computer-use-preview) | 🖥️ | 🎯 | | 👁️ |
|
||||
| [GLM-V](https://huggingface.co/THUDM/glm-4v-9b) | 🖥️ | 🎯 | 🛠️ | 👁️ |
|
||||
| [Gemini CU Preview](https://ai.google.dev/gemini-api/docs/computer-use) | 🖥️ | 🎯 | | 👁️ |
|
||||
| [InternVL](https://huggingface.co/OpenGVLab/InternVL3_5-1B) | 🖥️ | 🎯 | 🛠️ | 👁️ |
|
||||
| [UI-TARS](https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B) | 🖥️ | 🎯 | 🛠️ | 👁️ |
|
||||
| [OpenCUA](https://huggingface.co/xlangai/OpenCUA-7B) | | 🎯 | | |
|
||||
| [GTA](https://huggingface.co/HelloKKMe/GTA1-7B) | | 🎯 | | |
|
||||
| [Holo](https://huggingface.co/Hcompany/Holo1.5-3B) | | 🎯 | | |
|
||||
| [Moondream](https://huggingface.co/moondream/moondream3-preview) | | 🎯 | | |
|
||||
| [OmniParser](https://github.com/microsoft/OmniParser) | | 🎯 | | |
|
||||
|
||||
### Model IDs
|
||||
|
||||
@@ -333,8 +333,8 @@ Learn more in the [SOM documentation](./libs/python/som/README.md).
|
||||
|
||||
# Resources
|
||||
|
||||
- [Cua Blog](https://www.trycua.com/blog)
|
||||
- [Cua Docs](https://docs.trycua.com)
|
||||
- [Cua Blog](https://www.cua.ai/blog)
|
||||
- [Cua Docs](https://cua.ai/docs)
|
||||
|
||||
# Community and Contributions
|
||||
|
||||
|
||||
106
TESTING.md
Normal file
106
TESTING.md
Normal file
@@ -0,0 +1,106 @@
|
||||
# Testing Guide for CUA
|
||||
|
||||
Quick guide to running tests and understanding the test architecture.
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
pip install pytest pytest-asyncio pytest-mock pytest-cov
|
||||
|
||||
# Install package
|
||||
cd libs/python/core
|
||||
pip install -e .
|
||||
|
||||
# Run tests
|
||||
export CUA_TELEMETRY_DISABLED=1 # or $env:CUA_TELEMETRY_DISABLED="1" on Windows
|
||||
pytest tests/ -v
|
||||
```
|
||||
|
||||
## 🧪 Running Tests
|
||||
|
||||
```bash
|
||||
# All packages
|
||||
pytest libs/python/*/tests/ -v
|
||||
|
||||
# Specific package
|
||||
cd libs/python/core && pytest tests/ -v
|
||||
|
||||
# With coverage
|
||||
pytest tests/ --cov --cov-report=html
|
||||
|
||||
# Specific test
|
||||
pytest tests/test_telemetry.py::TestTelemetryEnabled::test_telemetry_enabled_by_default -v
|
||||
```
|
||||
|
||||
## 🏗️ Test Architecture
|
||||
|
||||
**Principles**: SRP (Single Responsibility) + Vertical Slices + Testability
|
||||
|
||||
```
|
||||
libs/python/
|
||||
├── core/tests/ # Tests ONLY core
|
||||
├── agent/tests/ # Tests ONLY agent
|
||||
└── computer/tests/ # Tests ONLY computer
|
||||
```
|
||||
|
||||
Each test file = ONE feature. Each test class = ONE concern.
|
||||
|
||||
## ➕ Adding New Tests
|
||||
|
||||
1. Create `test_*.py` in the appropriate package's `tests/` directory
|
||||
2. Follow the pattern:
|
||||
|
||||
```python
|
||||
"""Unit tests for my_feature."""
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
|
||||
class TestMyFeature:
|
||||
"""Test MyFeature class."""
|
||||
|
||||
def test_initialization(self):
|
||||
"""Test that feature initializes."""
|
||||
from my_package import MyFeature
|
||||
feature = MyFeature()
|
||||
assert feature is not None
|
||||
```
|
||||
|
||||
3. Mock external dependencies:
|
||||
|
||||
```python
|
||||
@pytest.fixture
|
||||
def mock_api():
|
||||
with patch("my_package.api_client") as mock:
|
||||
yield mock
|
||||
```
|
||||
|
||||
## 🔄 CI/CD
|
||||
|
||||
Tests run automatically on every PR via GitHub Actions (`.github/workflows/python-tests.yml`):
|
||||
|
||||
- Matrix strategy: each package tested separately
|
||||
- Python 3.12
|
||||
- ~2 minute runtime
|
||||
|
||||
## 🐛 Troubleshooting
|
||||
|
||||
**ModuleNotFoundError**: Run `pip install -e .` in package directory
|
||||
|
||||
**Tests fail in CI but pass locally**: Set `CUA_TELEMETRY_DISABLED=1`
|
||||
|
||||
**Async tests error**: Install `pytest-asyncio` and use `@pytest.mark.asyncio`
|
||||
|
||||
**Mock not working**: Patch at usage location, not definition:
|
||||
|
||||
```python
|
||||
# ✅ Right
|
||||
@patch("my_package.module.external_function")
|
||||
|
||||
# ❌ Wrong
|
||||
@patch("external_library.function")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
**Questions?** Check existing tests for examples or open an issue.
|
||||
@@ -30,7 +30,7 @@ By the end of this tutorial, you'll be able to:
|
||||
- Node.js 16+ and npm/yarn/pnpm
|
||||
- Basic JavaScript or TypeScript knowledge
|
||||
- OpenAI API access (Tier 3+ for computer-use-preview)
|
||||
- Cua cloud container credits ([get started here](https://trycua.com/pricing))
|
||||
- Cua cloud container credits ([get started here](https://cua.ai/pricing))
|
||||
|
||||
**Estimated Time:** 45-60 minutes
|
||||
|
||||
@@ -51,7 +51,7 @@ Luckily, the `@trycua/computer` library can be used in conjunction with other mo
|
||||
|
||||
To follow this guide, you’ll need access to a Cua cloud container.
|
||||
|
||||
Getting access is simple: purchase credits from our [pricing page](https://trycua.com/pricing), then create and provision a new container instance from the [dashboard](https://trycua.com/dashboard/containers). With your container running, you'll be ready to leverage the web SDK and bring automation to your JavaScript or TypeScript applications.
|
||||
Getting access is simple: purchase credits from our [pricing page](https://cua.ai/pricing), then create and provision a new container instance from the [dashboard](https://cua.ai/dashboard/containers). With your container running, you'll be ready to leverage the web SDK and bring automation to your JavaScript or TypeScript applications.
|
||||
|
||||
## Understanding the Flow
|
||||
|
||||
@@ -86,7 +86,7 @@ const res = await openai.responses.create({
|
||||
role: 'user',
|
||||
content: [
|
||||
// what we want the ai to do
|
||||
{ type: 'input_text', text: 'Open firefox and go to trycua.com' },
|
||||
{ type: 'input_text', text: 'Open firefox and go to cua.ai' },
|
||||
// first screenshot of the vm
|
||||
{
|
||||
type: 'input_image',
|
||||
@@ -144,7 +144,7 @@ Each response contains:
|
||||
|
||||
### Provision a Cua Cloud Container
|
||||
|
||||
1. Visit [trycua.com](https://trycua.com), sign up, purchase [credits](https://trycua.com/pricing), and create a new container instance from the [dashboard](https://trycua.com/dashboard).
|
||||
1. Visit [cua.ai](https://cua.ai), sign up, purchase [credits](https://cua.ai/pricing), and create a new container instance from the [dashboard](https://cua.ai/dashboard).
|
||||
2. Create an API key from the dashboard — be sure to save it in a secure location before continuing.
|
||||
3. Start the cloud container from the dashboard.
|
||||
|
||||
@@ -281,7 +281,7 @@ let res = await openai.responses.create({
|
||||
role: 'user',
|
||||
content: [
|
||||
// what we want the ai to do
|
||||
{ type: 'input_text', text: 'open firefox and go to trycua.com' },
|
||||
{ type: 'input_text', text: 'open firefox and go to cua.ai' },
|
||||
// current screenshot of the vm
|
||||
{
|
||||
type: 'input_image',
|
||||
|
||||
@@ -67,7 +67,7 @@ If you try out version 0.4.x, we'd love to hear how it goes. Join us on Discord
|
||||
|
||||
## Links
|
||||
|
||||
- **Composite Agent Docs:** [https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents)
|
||||
- **Composite Agent Docs:** [https://cua.ai/docs/agent-sdk/supported-agents/composed-agents](https://cua.ai/docs/agent-sdk/supported-agents/composed-agents)
|
||||
- **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai)
|
||||
|
||||
Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build.
|
||||
|
||||
@@ -84,4 +84,4 @@ Bring a team, pick a model stack, and push what agents can do on real computers.
|
||||
**Contact**
|
||||
Questions on Hack the North? Email **hackthenorth@trycua.com**.
|
||||
|
||||
_P.S. If you’re planning ahead, start with the Cua Agent Framework and OSWorld-Verified docs at docs.trycua.com; we’ll share office-hour times in both Discord channels._
|
||||
_P.S. If you’re planning ahead, start with the Cua Agent Framework and OSWorld-Verified docs at cua.ai/docs; we’ll share office-hour times in both Discord channels._
|
||||
|
||||
@@ -22,7 +22,7 @@ From day one, though, we knew we’d have to fight for sign-ups. This was a nich
|
||||
|
||||
Unfortunately, Hack the North (HTN) didn’t offer an interest form to help us estimate demand, which made capacity planning tricky—especially with early-stage infra. Stress-testing takes foresight, and multimodal language model usage is still costly (~1.5× to 3–4× the price of comparable text-only models).
|
||||
|
||||
On top of that, we were discouraged from external promotion on [lu.ma](http://lu.ma). So we spun up our own sign-up page at **trycua.com/hackathon** and built ad-hoc Discord channels to share track details. We emphasized—repeatedly—that only students already accepted to Hack the North should register.
|
||||
On top of that, we were discouraged from external promotion on [lu.ma](http://lu.ma). So we spun up our own sign-up page at **cua.ai/hackathon** and built ad-hoc Discord channels to share track details. We emphasized—repeatedly—that only students already accepted to Hack the North should register.
|
||||
|
||||
_(Moral: the “measure-zero effect”—no matter how many times you say it, some people won’t see it. Plenty of invalid sign-ups still slipped through.)_
|
||||
|
||||
|
||||
@@ -89,5 +89,5 @@ Customize your evaluation with these options:
|
||||
## Learn more
|
||||
|
||||
- Notebook with end‑to‑end examples: https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb
|
||||
- Docs: https://docs.trycua.com/docs/agent-sdk/integrations/hud
|
||||
- Docs: https://cua.ai/docs/agent-sdk/integrations/hud
|
||||
- Live traces: https://app.hud.so
|
||||
|
||||
@@ -216,4 +216,4 @@ Ready to put humans back in the loop? The most sophisticated AI system knows whe
|
||||
|
||||
---
|
||||
|
||||
_Questions about human-in-the-loop agents? Join the conversation in our [Discord community](https://discord.gg/cua-ai) or check out our [documentation](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop)._
|
||||
_Questions about human-in-the-loop agents? Join the conversation in our [Discord community](https://discord.gg/cua-ai) or check out our [documentation](https://cua.ai/docs/agent-sdk/supported-agents/human-in-the-loop)._
|
||||
|
||||
@@ -32,7 +32,7 @@ The result? **Instant deployment** in seconds instead of hours, with no infrastr
|
||||
|
||||
### Step 1: Get Your API Key
|
||||
|
||||
Sign up at [**trycua.com**](https://trycua.com) to get your API key.
|
||||
Sign up at [**cua.ai**](https://cua.ai) to get your API key.
|
||||
|
||||
```bash
|
||||
# Set your API key in environment variables
|
||||
@@ -226,6 +226,6 @@ Stay tuned for updates and join our [**Discord**](https://discord.gg/cua-ai) to
|
||||
|
||||
Ready to deploy your Computer-Use Agents in the cloud?
|
||||
|
||||
Visit [**trycua.com**](https://trycua.com) to sign up and get your API key. Join our [**Discord community**](https://discord.gg/cua-ai) for support and explore more examples on [**GitHub**](https://github.com/trycua/cua).
|
||||
Visit [**cua.ai**](https://cua.ai) to sign up and get your API key. Join our [**Discord community**](https://discord.gg/cua-ai) for support and explore more examples on [**GitHub**](https://github.com/trycua/cua).
|
||||
|
||||
Happy RPA 2.0! 🚀
|
||||
|
||||
@@ -174,7 +174,7 @@ Apple's announcement confirms we're on the right path. Here's what we're looking
|
||||
- [Apple Containerization Framework](https://github.com/apple/containerization)
|
||||
- [Lume - Direct VM Management](https://github.com/trycua/cua/tree/main/libs/lume)
|
||||
- [Lumier - Docker Interface for VMs](https://github.com/trycua/cua/tree/main/libs/lumier)
|
||||
- [Cua Cloud Sandbox](https://trycua.com)
|
||||
- [Cua Cloud Sandbox](https://cua.ai)
|
||||
- [Join our Discord](https://discord.gg/cua-ai)
|
||||
|
||||
---
|
||||
|
||||
@@ -32,7 +32,7 @@ The viewer allows you to see exactly what your agent observed and how it interac
|
||||
|
||||
## Opening Trajectory Viewer in 3 Simple Steps
|
||||
|
||||
1. **Visit**: Open your browser and go to [https://www.trycua.com/trajectory-viewer](https://www.trycua.com/trajectory-viewer).
|
||||
1. **Visit**: Open your browser and go to [https://cua.ai/trajectory-viewer](https://cua.ai/trajectory-viewer).
|
||||
2. **Upload**: Drag and drop a trajectories folder or click Select Folder.
|
||||
3. **Explore**: View your agent’s trajectories! All data stays in your browser unless you give permission otherwise.
|
||||
|
||||
|
||||
@@ -174,10 +174,10 @@ await computer.run()
|
||||
|
||||
## Links
|
||||
|
||||
- **Docker Provider Docs:** [https://docs.trycua.com/computers/docker](https://docs.trycua.com/computers/docker)
|
||||
- **Docker Provider Docs:** [https://cua.ai/docs/computers/docker](https://cua.ai/docs/computers/docker)
|
||||
- **KasmVNC:** [https://github.com/kasmtech/KasmVNC](https://github.com/kasmtech/KasmVNC)
|
||||
- **Container Source:** [https://github.com/trycua/cua/tree/main/libs/kasm](https://github.com/trycua/cua/tree/main/libs/kasm)
|
||||
- **Computer SDK:** [https://docs.trycua.com/docs/computer-sdk/computers](https://docs.trycua.com/docs/computer-sdk/computers)
|
||||
- **Computer SDK:** [https://cua.ai/docs/computer-sdk/computers](https://cua.ai/docs/computer-sdk/computers)
|
||||
- **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai)
|
||||
|
||||
Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build.
|
||||
|
||||
@@ -144,7 +144,11 @@ The `ComputerAgent` constructor provides a wide range of options for customizing
|
||||
If set (float or dict), adds a budget manager callback that tracks usage costs and stops execution if the budget is exceeded. Dict allows advanced options (e.g., `{ "max_budget": 5.0, "raise_error": True }`).
|
||||
- `instructions` (`str` | `list[str]`):
|
||||
System instructions for the agent. Can be a single string or multiple strings in a tuple/list for readability; they are concatenated into one system prompt.
|
||||
- `**kwargs` (`any`):
|
||||
- `api_key` (`str`):
|
||||
Optional API key override for the model provider.
|
||||
- `api_base` (`str`):
|
||||
Optional API base URL override for the model provider.
|
||||
- `**additional_generation_kwargs` (`any`):
|
||||
Any additional keyword arguments are passed through to the agent loop or model provider.
|
||||
|
||||
**Example with advanced options:**
|
||||
@@ -168,7 +172,9 @@ agent = ComputerAgent(
|
||||
instructions=(
|
||||
"You are a helpful computer-using agent"
|
||||
"Output computer calls until you complete the given task"
|
||||
)
|
||||
),
|
||||
api_key="your-api-key",
|
||||
api_base="https://your-api-base.com/v1",
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ agent = ComputerAgent(
|
||||
## View Trajectories Online
|
||||
|
||||
View trajectories in the browser at:
|
||||
**[trycua.com/trajectory-viewer](http://trycua.com/trajectory-viewer)**
|
||||
**[cua.ai/trajectory-viewer](https://cua.ai/trajectory-viewer)**
|
||||
|
||||
The viewer provides:
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ All examples require a CUA API key. You can obtain one from the [Dashboard](http
|
||||
|
||||
## List VMs
|
||||
|
||||
<Tabs items={["Python", "curl"]}>
|
||||
<Tabs items={['Python', 'curl']}>
|
||||
<Tab value="Python">
|
||||
|
||||
```python
|
||||
@@ -72,6 +72,8 @@ Status values:
|
||||
- `terminated`: VM has been permanently destroyed
|
||||
- `failed`: VM deployment or operation failed
|
||||
|
||||
---
|
||||
|
||||
</Tab>
|
||||
|
||||
</Tabs>
|
||||
|
||||
@@ -18,7 +18,7 @@ Execute shell commands and get detailed results:
|
||||
# Run shell command
|
||||
result = await computer.interface.run_command(cmd) # result.stdout, result.stderr, result.returncode
|
||||
```
|
||||
|
||||
|
||||
</Tab>
|
||||
<Tab value="TypeScript">
|
||||
|
||||
@@ -30,6 +30,63 @@ Execute shell commands and get detailed results:
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Window Management
|
||||
|
||||
Control application launching and windows:
|
||||
|
||||
<Tabs items={['Python', 'TypeScript']}>
|
||||
<Tab value="Python">
|
||||
|
||||
```python
|
||||
# Launch applications
|
||||
await computer.interface.launch("xfce4-terminal")
|
||||
await computer.interface.launch("libreoffice --writer")
|
||||
await computer.interface.open("https://www.google.com")
|
||||
|
||||
# Window management
|
||||
windows = await computer.interface.get_application_windows("xfce4-terminal")
|
||||
window_id = windows[0]
|
||||
await computer.interface.activate_window(window_id)
|
||||
|
||||
window_id = await computer.interface.get_current_window_id() # get the current active window id
|
||||
await computer.interface.window_size(window_id)
|
||||
await computer.interface.get_window_title(window_id)
|
||||
await computer.interface.get_window_position(window_id)
|
||||
await computer.interface.set_window_size(window_id, 1200, 800)
|
||||
await computer.interface.set_window_position(window_id, 100, 100)
|
||||
await computer.interface.maximize_window(window_id)
|
||||
await computer.interface.minimize_window(window_id)
|
||||
await computer.interface.close_window(window_id)
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="TypeScript">
|
||||
|
||||
```typescript
|
||||
// Launch applications
|
||||
await computer.interface.launch("xfce4-terminal");
|
||||
await computer.interface.launch("libreoffice --writer");
|
||||
await computer.interface.open("https://www.google.com");
|
||||
|
||||
// Window management
|
||||
const windows = await computer.interface.getApplicationWindows("xfce4-terminal");
|
||||
let windowId = windows[0];
|
||||
await computer.interface.activateWindow(windowId);
|
||||
|
||||
windowId = await computer.interface.getCurrentWindowId(); // current active window id
|
||||
await computer.interface.getWindowSize(windowId);
|
||||
await computer.interface.getWindowName(windowId);
|
||||
await computer.interface.getWindowPosition(windowId);
|
||||
await computer.interface.setWindowSize(windowId, 1200, 800);
|
||||
await computer.interface.setWindowPosition(windowId, 100, 100);
|
||||
await computer.interface.maximizeWindow(windowId);
|
||||
await computer.interface.minimizeWindow(windowId);
|
||||
await computer.interface.closeWindow(windowId);
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Mouse Actions
|
||||
|
||||
Precise mouse control and interaction:
|
||||
@@ -162,6 +219,35 @@ Screen capture and display information:
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Desktop Actions
|
||||
|
||||
Control desktop environment features like wallpaper:
|
||||
|
||||
<Tabs items={['Python', 'TypeScript']}>
|
||||
<Tab value="Python">
|
||||
```python
|
||||
# Get current desktop environment (e.g., 'xfce4', 'gnome', 'kde', 'mac', 'windows')
|
||||
env = await computer.interface.get_desktop_environment()
|
||||
print(env) # "xfce4"
|
||||
|
||||
# Set desktop wallpaper to an image file accessible on the VM
|
||||
await computer.interface.set_wallpaper("/home/cua/shared/wallpaper.png")
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="TypeScript">
|
||||
```typescript
|
||||
// Get current desktop environment
|
||||
const env = await computer.interface.getDesktopEnvironment();
|
||||
print(env) # "xfce4"
|
||||
|
||||
// Set desktop wallpaper to an image file accessible on the VM
|
||||
await computer.interface.setWallpaper('/home/cua/shared/wallpaper.png');
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Clipboard Actions
|
||||
|
||||
System clipboard management:
|
||||
|
||||
@@ -23,7 +23,7 @@ Cua Computers are preconfigured virtual machines running the Computer Server. Th
|
||||
|
||||
**Easiest & safest way to get started - works on any host OS**
|
||||
|
||||
This is a Cloud Sandbox running the Computer Server. Get a container at [trycua.com](https://www.trycua.com/).
|
||||
This is a Cloud Sandbox running the Computer Server. Get a container at [cua.ai](https://cua.ai/).
|
||||
|
||||
<Tabs items={['Python', 'TypeScript']}>
|
||||
<Tab value="Python">
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"title": "Computer SDK",
|
||||
"description": "Build computer-using agents with the Computer SDK",
|
||||
"pages": ["computers", "cloud-vm-management", "commands", "computer-ui", "sandboxed-python"]
|
||||
"pages": ["computers", "commands", "computer-ui", "tracing-api", "sandboxed-python"]
|
||||
}
|
||||
|
||||
350
docs/content/docs/computer-sdk/tracing-api.mdx
Normal file
350
docs/content/docs/computer-sdk/tracing-api.mdx
Normal file
@@ -0,0 +1,350 @@
|
||||
---
|
||||
title: Computer Tracing API
|
||||
description: Record computer interactions for debugging, training, and analysis
|
||||
---
|
||||
|
||||
# Computer Tracing API
|
||||
|
||||
The Computer tracing API provides a powerful way to record computer interactions for debugging, training, analysis, and compliance purposes. Inspired by Playwright's tracing functionality, it offers flexible recording options and standardized output formats.
|
||||
|
||||
<Callout>
|
||||
The tracing API addresses GitHub issue #299 by providing a unified recording interface that works
|
||||
with any Computer usage pattern, not just ComputerAgent.
|
||||
</Callout>
|
||||
|
||||
## Overview
|
||||
|
||||
The tracing API allows you to:
|
||||
|
||||
- Record screenshots at key moments
|
||||
- Log all API calls and their results
|
||||
- Capture accessibility tree snapshots
|
||||
- Add custom metadata
|
||||
- Export recordings in standardized formats
|
||||
- Support for both automated and human-in-the-loop workflows
|
||||
|
||||
## Basic Usage
|
||||
|
||||
### Starting and Stopping Traces
|
||||
|
||||
```python
|
||||
from computer import Computer
|
||||
|
||||
computer = Computer(os_type="macos")
|
||||
await computer.run()
|
||||
|
||||
# Start tracing with default options
|
||||
await computer.tracing.start()
|
||||
|
||||
# Perform some operations
|
||||
await computer.interface.left_click(100, 200)
|
||||
await computer.interface.type_text("Hello, World!")
|
||||
await computer.interface.press_key("enter")
|
||||
|
||||
# Stop tracing and save
|
||||
trace_path = await computer.tracing.stop()
|
||||
print(f"Trace saved to: {trace_path}")
|
||||
```
|
||||
|
||||
### Custom Configuration
|
||||
|
||||
```python
|
||||
# Start tracing with custom configuration
|
||||
await computer.tracing.start({
|
||||
'video': False, # Record video frames
|
||||
'screenshots': True, # Record screenshots (default: True)
|
||||
'api_calls': True, # Record API calls (default: True)
|
||||
'accessibility_tree': True, # Record accessibility snapshots
|
||||
'metadata': True, # Allow custom metadata (default: True)
|
||||
'name': 'my_custom_trace', # Custom trace name
|
||||
'path': './my_traces' # Custom output directory
|
||||
})
|
||||
|
||||
# Add custom metadata during tracing
|
||||
await computer.tracing.add_metadata('user_id', 'user123')
|
||||
await computer.tracing.add_metadata('test_case', 'login_flow')
|
||||
|
||||
# Stop with custom options
|
||||
trace_path = await computer.tracing.stop({
|
||||
'path': './exports/trace.zip',
|
||||
'format': 'zip' # 'zip' or 'dir'
|
||||
})
|
||||
```
|
||||
|
||||
## Configuration Options
|
||||
|
||||
### Start Options
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
| -------------------- | ---- | -------------- | ------------------------------------- |
|
||||
| `video` | bool | `False` | Record video frames (future feature) |
|
||||
| `screenshots` | bool | `True` | Capture screenshots after key actions |
|
||||
| `api_calls` | bool | `True` | Log all interface method calls |
|
||||
| `accessibility_tree` | bool | `False` | Record accessibility tree snapshots |
|
||||
| `metadata` | bool | `True` | Enable custom metadata recording |
|
||||
| `name` | str | auto-generated | Custom name for the trace |
|
||||
| `path` | str | auto-generated | Custom directory for trace files |
|
||||
|
||||
### Stop Options
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
| -------- | ---- | -------------- | ---------------------------------- |
|
||||
| `path` | str | auto-generated | Custom output path for final trace |
|
||||
| `format` | str | `'zip'` | Output format: `'zip'` or `'dir'` |
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Custom Agent Development
|
||||
|
||||
```python
|
||||
from computer import Computer
|
||||
|
||||
async def test_custom_agent():
|
||||
computer = Computer(os_type="linux")
|
||||
await computer.run()
|
||||
|
||||
# Start tracing for this test session
|
||||
await computer.tracing.start({
|
||||
'name': 'custom_agent_test',
|
||||
'screenshots': True,
|
||||
'accessibility_tree': True
|
||||
})
|
||||
|
||||
# Your custom agent logic here
|
||||
screenshot = await computer.interface.screenshot()
|
||||
await computer.interface.left_click(500, 300)
|
||||
await computer.interface.type_text("test input")
|
||||
|
||||
# Add context about what the agent is doing
|
||||
await computer.tracing.add_metadata('action', 'filling_form')
|
||||
await computer.tracing.add_metadata('confidence', 0.95)
|
||||
|
||||
# Save the trace
|
||||
trace_path = await computer.tracing.stop()
|
||||
return trace_path
|
||||
```
|
||||
|
||||
### Training Data Collection
|
||||
|
||||
```python
|
||||
async def collect_training_data():
|
||||
computer = Computer(os_type="macos")
|
||||
await computer.run()
|
||||
|
||||
tasks = [
|
||||
"open_browser_and_search",
|
||||
"create_document",
|
||||
"send_email"
|
||||
]
|
||||
|
||||
for task in tasks:
|
||||
# Start a new trace for each task
|
||||
await computer.tracing.start({
|
||||
'name': f'training_{task}',
|
||||
'screenshots': True,
|
||||
'accessibility_tree': True,
|
||||
'metadata': True
|
||||
})
|
||||
|
||||
# Add task metadata
|
||||
await computer.tracing.add_metadata('task_type', task)
|
||||
await computer.tracing.add_metadata('difficulty', 'beginner')
|
||||
|
||||
# Perform the task (automated or human-guided)
|
||||
await perform_task(computer, task)
|
||||
|
||||
# Save this training example
|
||||
await computer.tracing.stop({
|
||||
'path': f'./training_data/{task}.zip'
|
||||
})
|
||||
```
|
||||
|
||||
### Human-in-the-Loop Recording
|
||||
|
||||
```python
|
||||
async def record_human_demonstration():
|
||||
computer = Computer(os_type="windows")
|
||||
await computer.run()
|
||||
|
||||
# Start recording human demonstration
|
||||
await computer.tracing.start({
|
||||
'name': 'human_demo_excel_workflow',
|
||||
'screenshots': True,
|
||||
'api_calls': True, # Will capture any programmatic actions
|
||||
'metadata': True
|
||||
})
|
||||
|
||||
print("Trace recording started. Perform your demonstration...")
|
||||
print("The system will record all computer interactions.")
|
||||
|
||||
# Add metadata about the demonstration
|
||||
await computer.tracing.add_metadata('demonstrator', 'expert_user')
|
||||
await computer.tracing.add_metadata('workflow', 'excel_data_analysis')
|
||||
|
||||
# Human performs actions manually or through other tools
|
||||
# Tracing will still capture any programmatic interactions
|
||||
|
||||
input("Press Enter when demonstration is complete...")
|
||||
|
||||
# Stop and save the demonstration
|
||||
trace_path = await computer.tracing.stop()
|
||||
print(f"Human demonstration saved to: {trace_path}")
|
||||
```
|
||||
|
||||
### RPA Debugging
|
||||
|
||||
```python
|
||||
async def debug_rpa_workflow():
|
||||
computer = Computer(os_type="linux")
|
||||
await computer.run()
|
||||
|
||||
# Start tracing with full debugging info
|
||||
await computer.tracing.start({
|
||||
'name': 'rpa_debug_session',
|
||||
'screenshots': True,
|
||||
'accessibility_tree': True,
|
||||
'api_calls': True
|
||||
})
|
||||
|
||||
try:
|
||||
# Your RPA workflow
|
||||
await rpa_login_sequence(computer)
|
||||
await rpa_data_entry(computer)
|
||||
await rpa_generate_report(computer)
|
||||
|
||||
await computer.tracing.add_metadata('status', 'success')
|
||||
|
||||
except Exception as e:
|
||||
# Record the error in the trace
|
||||
await computer.tracing.add_metadata('error', str(e))
|
||||
await computer.tracing.add_metadata('status', 'failed')
|
||||
raise
|
||||
finally:
|
||||
# Always save the debug trace
|
||||
trace_path = await computer.tracing.stop()
|
||||
print(f"Debug trace saved to: {trace_path}")
|
||||
```
|
||||
|
||||
## Output Format
|
||||
|
||||
### Directory Structure
|
||||
|
||||
When using `format='dir'`, traces are saved with this structure:
|
||||
|
||||
```
|
||||
trace_20240922_143052_abc123/
|
||||
├── trace_metadata.json # Overall trace information
|
||||
├── event_000001_trace_start.json
|
||||
├── event_000002_api_call.json
|
||||
├── event_000003_api_call.json
|
||||
├── 000001_initial_screenshot.png
|
||||
├── 000002_after_left_click.png
|
||||
├── 000003_after_type_text.png
|
||||
└── event_000004_trace_end.json
|
||||
```
|
||||
|
||||
### Metadata Format
|
||||
|
||||
The `trace_metadata.json` contains:
|
||||
|
||||
```json
|
||||
{
|
||||
"trace_id": "trace_20240922_143052_abc123",
|
||||
"config": {
|
||||
"screenshots": true,
|
||||
"api_calls": true,
|
||||
"accessibility_tree": false,
|
||||
"metadata": true
|
||||
},
|
||||
"start_time": 1695392252.123,
|
||||
"end_time": 1695392267.456,
|
||||
"duration": 15.333,
|
||||
"total_events": 12,
|
||||
"screenshot_count": 5,
|
||||
"events": [...] // All events in chronological order
|
||||
}
|
||||
```
|
||||
|
||||
### Event Format
|
||||
|
||||
Individual events follow this structure:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "api_call",
|
||||
"timestamp": 1695392255.789,
|
||||
"relative_time": 3.666,
|
||||
"data": {
|
||||
"method": "left_click",
|
||||
"args": { "x": 100, "y": 200, "delay": null },
|
||||
"result": null,
|
||||
"error": null,
|
||||
"screenshot": "000002_after_left_click.png",
|
||||
"success": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Integration with ComputerAgent
|
||||
|
||||
The tracing API works seamlessly with existing ComputerAgent workflows:
|
||||
|
||||
```python
|
||||
from agent import ComputerAgent
|
||||
from computer import Computer
|
||||
|
||||
# Create computer and start tracing
|
||||
computer = Computer(os_type="macos")
|
||||
await computer.run()
|
||||
|
||||
await computer.tracing.start({
|
||||
'name': 'agent_with_tracing',
|
||||
'screenshots': True,
|
||||
'metadata': True
|
||||
})
|
||||
|
||||
# Create agent using the same computer
|
||||
agent = ComputerAgent(
|
||||
model="openai/computer-use-preview",
|
||||
tools=[computer]
|
||||
)
|
||||
|
||||
# Agent operations will be automatically traced
|
||||
async for _ in agent.run("open cua.ai and navigate to docs"):
|
||||
pass
|
||||
|
||||
# Save the combined trace
|
||||
trace_path = await computer.tracing.stop()
|
||||
```
|
||||
|
||||
## Privacy Considerations
|
||||
|
||||
The tracing API is designed with privacy in mind:
|
||||
|
||||
- Clipboard content is not recorded (only content length)
|
||||
- Screenshots can be disabled
|
||||
- Sensitive text input can be filtered
|
||||
- Custom metadata allows you to control what information is recorded
|
||||
|
||||
## Comparison with ComputerAgent Trajectories
|
||||
|
||||
| Feature | ComputerAgent Trajectories | Computer.tracing |
|
||||
| ---------------------- | -------------------------- | -------------------- |
|
||||
| **Scope** | ComputerAgent only | Any Computer usage |
|
||||
| **Flexibility** | Fixed format | Configurable options |
|
||||
| **Custom Agents** | Not supported | Fully supported |
|
||||
| **Human-in-the-loop** | Limited | Full support |
|
||||
| **Real-time Control** | No | Start/stop anytime |
|
||||
| **Output Format** | Agent-specific | Standardized |
|
||||
| **Accessibility Data** | No | Optional |
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Start tracing early**: Begin recording before your main workflow to capture the complete session
|
||||
2. **Use meaningful names**: Provide descriptive trace names for easier organization
|
||||
3. **Add contextual metadata**: Include information about what you're testing or demonstrating
|
||||
4. **Handle errors gracefully**: Always stop tracing in a finally block
|
||||
5. **Choose appropriate options**: Only record what you need to minimize overhead
|
||||
6. **Organize output**: Use custom paths to organize traces by project or use case
|
||||
|
||||
The Computer tracing API provides a powerful foundation for recording, analyzing, and improving computer automation workflows across all use cases.
|
||||
@@ -30,7 +30,7 @@ Choose how you want to run your Cua computer. **Cloud Sandbox is recommended** f
|
||||
|
||||
**Easiest & safest way to get started - works on any host OS**
|
||||
|
||||
1. Go to [trycua.com/signin](https://www.trycua.com/signin)
|
||||
1. Go to [cua.ai/signin](https://cua.ai/signin)
|
||||
2. Navigate to **Dashboard > Containers > Create Instance**
|
||||
3. Create a **Medium, Ubuntu 22** container
|
||||
4. Note your container name and API key
|
||||
@@ -312,7 +312,7 @@ python -m agent.cli omniparser+ollama_chat/llama3.2:latest
|
||||
|
||||
If you haven't set up environment variables, the CLI will guide you through the setup:
|
||||
|
||||
1. **Sandbox Name**: Enter your Cua sandbox name (or get one at [trycua.com](https://www.trycua.com/))
|
||||
1. **Sandbox Name**: Enter your Cua sandbox name (or get one at [cua.ai](https://cua.ai/))
|
||||
2. **CUA API Key**: Enter your Cua API key
|
||||
3. **Provider API Key**: Enter your AI provider API key (OpenAI, Anthropic, etc.)
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ You can run your Cua computer in the cloud (recommended for easiest setup), loca
|
||||
|
||||
Cua Cloud Sandbox provides virtual machines that run Ubuntu.
|
||||
|
||||
1. Go to [trycua.com/signin](https://www.trycua.com/signin)
|
||||
1. Go to [cua.ai/signin](https://cua.ai/signin)
|
||||
2. Navigate to **Dashboard > Containers > Create Instance**
|
||||
3. Create a **Medium, Ubuntu 22** sandbox
|
||||
4. Note your sandbox name and API key
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
"posthog-js": "^1.276.0",
|
||||
"react": "^19.1.0",
|
||||
"react-dom": "^19.1.0",
|
||||
"react-icons": "^5.5.0",
|
||||
"remark": "^15.0.1",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"remark-mdx": "^3.1.0",
|
||||
|
||||
12
docs/pnpm-lock.yaml
generated
12
docs/pnpm-lock.yaml
generated
@@ -38,6 +38,9 @@ importers:
|
||||
react-dom:
|
||||
specifier: ^19.1.0
|
||||
version: 19.1.0(react@19.1.0)
|
||||
react-icons:
|
||||
specifier: ^5.5.0
|
||||
version: 5.5.0(react@19.1.0)
|
||||
remark:
|
||||
specifier: ^15.0.1
|
||||
version: 15.0.1
|
||||
@@ -2054,6 +2057,11 @@ packages:
|
||||
peerDependencies:
|
||||
react: ^19.1.0
|
||||
|
||||
react-icons@5.5.0:
|
||||
resolution: {integrity: sha512-MEFcXdkP3dLo8uumGI5xN3lDFNsRtrjbOEKDLD7yv76v4wpnEq2Lt2qeHaQOr34I/wPN3s3+N08WkQ+CW37Xiw==}
|
||||
peerDependencies:
|
||||
react: '*'
|
||||
|
||||
react-medium-image-zoom@5.2.14:
|
||||
resolution: {integrity: sha512-nfTVYcAUnBzXQpPDcZL+cG/e6UceYUIG+zDcnemL7jtAqbJjVVkA85RgneGtJeni12dTyiRPZVM6Szkmwd/o8w==}
|
||||
peerDependencies:
|
||||
@@ -4622,6 +4630,10 @@ snapshots:
|
||||
react: 19.1.0
|
||||
scheduler: 0.26.0
|
||||
|
||||
react-icons@5.5.0(react@19.1.0):
|
||||
dependencies:
|
||||
react: 19.1.0
|
||||
|
||||
react-medium-image-zoom@5.2.14(react-dom@19.1.0(react@19.1.0))(react@19.1.0):
|
||||
dependencies:
|
||||
react: 19.1.0
|
||||
|
||||
@@ -10,6 +10,7 @@ import type { Metadata } from 'next';
|
||||
import Link from 'next/link';
|
||||
import { notFound, redirect } from 'next/navigation';
|
||||
import { PageFeedback } from '@/components/page-feedback';
|
||||
import { DocActionsMenu } from '@/components/doc-actions-menu';
|
||||
|
||||
export default async function Page(props: { params: Promise<{ slug?: string[] }> }) {
|
||||
const params = await props.params;
|
||||
@@ -177,14 +178,26 @@ export default async function Page(props: { params: Promise<{ slug?: string[] }>
|
||||
);
|
||||
};
|
||||
|
||||
const tocFooter = () => {
|
||||
return (
|
||||
<div className="mt-4">
|
||||
<DocActionsMenu pageUrl={page.url} pageTitle={page.data.title} filePath={page.file.path} />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
return (
|
||||
<DocsPage toc={page.data.toc} tableOfContent={{ header: tocHeader() }} full={page.data.full}>
|
||||
<DocsPage
|
||||
toc={page.data.toc}
|
||||
tableOfContent={{ header: tocHeader(), footer: tocFooter() }}
|
||||
full={page.data.full}
|
||||
>
|
||||
<div className="flex flex-row w-full items-start">
|
||||
<div className="flex-1">
|
||||
<div className="flex flex-row w-full">
|
||||
<DocsTitle>{page.data.title}</DocsTitle>
|
||||
|
||||
<div className="ml-auto">
|
||||
<div className="ml-auto flex items-center gap-2">
|
||||
{apiSection && versionItems.length > 1 && (
|
||||
<Popover>
|
||||
<PopoverTrigger
|
||||
@@ -273,15 +286,99 @@ export async function generateMetadata(props: {
|
||||
if (page.url.includes('api')) title = `${page.data.title} | Cua API Docs`;
|
||||
if (page.url.includes('guide')) title = ` Guide: ${page.data.title} | Cua Docs`;
|
||||
|
||||
// Canonical URL points to cua.ai to consolidate all SEO authority on main domain
|
||||
const canonicalUrl = `https://cua.ai${page.url}`;
|
||||
|
||||
// Extract keywords from the page for SEO
|
||||
const keywords = [
|
||||
'computer use agent',
|
||||
'computer use',
|
||||
'AI automation',
|
||||
'visual automation',
|
||||
page.data.title,
|
||||
];
|
||||
|
||||
// Structured data for better Google indexing (TechArticle schema)
|
||||
const structuredData = {
|
||||
'@context': 'https://schema.org',
|
||||
'@type': 'TechArticle',
|
||||
headline: page.data.title,
|
||||
description: page.data.description,
|
||||
url: canonicalUrl,
|
||||
publisher: {
|
||||
'@type': 'Organization',
|
||||
name: 'Cua',
|
||||
url: 'https://cua.ai',
|
||||
logo: {
|
||||
'@type': 'ImageObject',
|
||||
url: 'https://cua.ai/cua_logo_black.svg',
|
||||
},
|
||||
},
|
||||
mainEntityOfPage: {
|
||||
'@type': 'WebPage',
|
||||
'@id': canonicalUrl,
|
||||
},
|
||||
};
|
||||
|
||||
// Breadcrumb schema for better site structure understanding
|
||||
const breadcrumbSchema = {
|
||||
'@context': 'https://schema.org',
|
||||
'@type': 'BreadcrumbList',
|
||||
itemListElement: [
|
||||
{
|
||||
'@type': 'ListItem',
|
||||
position: 1,
|
||||
name: 'Cua',
|
||||
item: 'https://cua.ai',
|
||||
},
|
||||
{
|
||||
'@type': 'ListItem',
|
||||
position: 2,
|
||||
name: 'Documentation',
|
||||
item: 'https://cua.ai/docs',
|
||||
},
|
||||
{
|
||||
'@type': 'ListItem',
|
||||
position: 3,
|
||||
name: page.data.title,
|
||||
item: canonicalUrl,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
return {
|
||||
title,
|
||||
description: page.data.description,
|
||||
keywords,
|
||||
authors: [{ name: 'Cua', url: 'https://cua.ai' }],
|
||||
robots: {
|
||||
index: true,
|
||||
follow: true,
|
||||
googleBot: {
|
||||
index: true,
|
||||
follow: true,
|
||||
'max-image-preview': 'large',
|
||||
'max-snippet': -1,
|
||||
},
|
||||
},
|
||||
alternates: {
|
||||
canonical: canonicalUrl,
|
||||
},
|
||||
openGraph: {
|
||||
title,
|
||||
description: page.data.description,
|
||||
type: 'article',
|
||||
siteName: 'Cua Docs',
|
||||
url: 'https://trycua.com/docs',
|
||||
url: canonicalUrl,
|
||||
},
|
||||
twitter: {
|
||||
card: 'summary',
|
||||
title,
|
||||
description: page.data.description,
|
||||
creator: '@trycua',
|
||||
},
|
||||
other: {
|
||||
'script:ld+json': JSON.stringify([structuredData, breadcrumbSchema]),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -41,15 +41,15 @@ export const baseOptions: BaseLayoutProps = {
|
||||
githubUrl: 'https://github.com/trycua/cua',
|
||||
links: [
|
||||
{
|
||||
url: 'https://trycua.com',
|
||||
text: 'Cua home',
|
||||
url: 'https://cua.ai',
|
||||
text: 'Cua Home',
|
||||
type: 'icon',
|
||||
icon: <HomeIcon />,
|
||||
external: false,
|
||||
external: true,
|
||||
},
|
||||
{
|
||||
url: 'https://discord.com/invite/mVnXXpdE85',
|
||||
text: 'Cua discord',
|
||||
text: 'Discord',
|
||||
type: 'icon',
|
||||
icon: (
|
||||
<>
|
||||
@@ -69,6 +69,7 @@ export const baseOptions: BaseLayoutProps = {
|
||||
/>
|
||||
</>
|
||||
),
|
||||
external: true,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
13
docs/src/app/robots.ts
Normal file
13
docs/src/app/robots.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { MetadataRoute } from 'next';
|
||||
|
||||
export default function robots(): MetadataRoute.Robots {
|
||||
return {
|
||||
rules: {
|
||||
userAgent: '*',
|
||||
allow: ['/', '/llms.txt'],
|
||||
disallow: [],
|
||||
},
|
||||
sitemap: 'https://cua.ai/docs/sitemap.xml',
|
||||
host: 'https://cua.ai',
|
||||
};
|
||||
}
|
||||
32
docs/src/app/sitemap.ts
Normal file
32
docs/src/app/sitemap.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import { MetadataRoute } from 'next';
|
||||
import { source } from '@/lib/source';
|
||||
|
||||
export default function sitemap(): MetadataRoute.Sitemap {
|
||||
const baseUrl = 'https://cua.ai';
|
||||
|
||||
// Get all pages from fumadocs source
|
||||
const pages = source.getPages();
|
||||
|
||||
// Map pages to sitemap entries with /docs prefix
|
||||
const docPages = pages.map((page) => {
|
||||
// Ensure URL starts with /docs
|
||||
const url = page.url.startsWith('/docs') ? page.url : `/docs${page.url}`;
|
||||
|
||||
return {
|
||||
url: `${baseUrl}${url}`,
|
||||
lastModified: new Date(),
|
||||
changeFrequency: 'weekly' as const,
|
||||
priority: url === '/docs' ? 1.0 : 0.8,
|
||||
};
|
||||
});
|
||||
|
||||
// Add main docs page if not included
|
||||
const mainDocsPage = {
|
||||
url: `${baseUrl}/docs`,
|
||||
lastModified: new Date(),
|
||||
changeFrequency: 'weekly' as const,
|
||||
priority: 1.0,
|
||||
};
|
||||
|
||||
return [mainDocsPage, ...docPages];
|
||||
}
|
||||
126
docs/src/components/doc-actions-menu.tsx
Normal file
126
docs/src/components/doc-actions-menu.tsx
Normal file
@@ -0,0 +1,126 @@
|
||||
'use client';
|
||||
|
||||
import { useState } from 'react';
|
||||
import { SiOpenai, SiAnthropic, SiMarkdown, SiGithub } from 'react-icons/si';
|
||||
import posthog from 'posthog-js';
|
||||
|
||||
interface DocActionsMenuProps {
|
||||
pageUrl: string;
|
||||
pageTitle: string;
|
||||
filePath: string;
|
||||
}
|
||||
|
||||
export function DocActionsMenu({ pageUrl, pageTitle, filePath }: DocActionsMenuProps) {
|
||||
const [copied, setCopied] = useState(false);
|
||||
|
||||
const handleCopyMarkdown = async () => {
|
||||
try {
|
||||
const githubRawUrl = `https://raw.githubusercontent.com/trycua/cua/refs/heads/main/docs/content/docs/${filePath}`;
|
||||
|
||||
const response = await fetch(githubRawUrl);
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to fetch markdown');
|
||||
}
|
||||
const markdown = await response.text();
|
||||
|
||||
await navigator.clipboard.writeText(markdown);
|
||||
|
||||
setCopied(true);
|
||||
setTimeout(() => setCopied(false), 2000);
|
||||
|
||||
posthog.capture('docs_copy_markdown_clicked', {
|
||||
page: pageUrl,
|
||||
page_title: pageTitle,
|
||||
success: true,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error copying markdown:', error);
|
||||
|
||||
try {
|
||||
const urlWithUtm = `https://cua.ai${pageUrl}?utm_source=cua.ai/docs`;
|
||||
await navigator.clipboard.writeText(urlWithUtm);
|
||||
setCopied(true);
|
||||
setTimeout(() => setCopied(false), 2000);
|
||||
} catch (fallbackError) {
|
||||
console.error('Error copying URL:', fallbackError);
|
||||
}
|
||||
|
||||
posthog.capture('docs_copy_markdown_clicked', {
|
||||
page: pageUrl,
|
||||
page_title: pageTitle,
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const handleEditGithub = () => {
|
||||
posthog.capture('docs_edit_github_clicked', {
|
||||
page: pageUrl,
|
||||
page_title: pageTitle,
|
||||
});
|
||||
|
||||
const githubEditUrl = `https://github.com/trycua/cua/edit/main/docs/content/docs/${filePath}`;
|
||||
window.open(githubEditUrl, '_blank', 'noopener,noreferrer');
|
||||
};
|
||||
|
||||
const handleOpenChatGPT = () => {
|
||||
posthog.capture('docs_open_chatgpt_clicked', {
|
||||
page: pageUrl,
|
||||
page_title: pageTitle,
|
||||
});
|
||||
|
||||
const docUrl = `https://cua.ai${pageUrl}?utm_source=cua.ai/docs`;
|
||||
const prompt = `I need help understanding this cua.ai documentation page: "${pageTitle}". Please read and help me with: ${docUrl}`;
|
||||
const chatgptUrl = `https://chatgpt.com/?q=${encodeURIComponent(prompt)}`;
|
||||
window.open(chatgptUrl, '_blank', 'noopener,noreferrer');
|
||||
};
|
||||
|
||||
const handleOpenClaude = () => {
|
||||
posthog.capture('docs_open_claude_clicked', {
|
||||
page: pageUrl,
|
||||
page_title: pageTitle,
|
||||
});
|
||||
|
||||
const docUrl = `https://cua.ai${pageUrl}?utm_source=cua.ai/docs`;
|
||||
const prompt = `I need help understanding this cua.ai documentation page: "${pageTitle}". Please read and help me with: ${docUrl}`;
|
||||
const claudeUrl = `https://claude.ai/new?q=${encodeURIComponent(prompt)}`;
|
||||
window.open(claudeUrl, '_blank', 'noopener,noreferrer');
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-2">
|
||||
<button
|
||||
onClick={handleCopyMarkdown}
|
||||
className="inline-flex gap-3 w-full items-center rounded-md p-1 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground text-left transition-colors px-2 hover:cursor-pointer"
|
||||
>
|
||||
<SiMarkdown className="w-2 h-4 flex-shrink-0" />
|
||||
<span>{copied ? 'Copied!' : 'Copy as markdown'}</span>
|
||||
</button>
|
||||
|
||||
<button
|
||||
onClick={handleEditGithub}
|
||||
className="inline-flex gap-3 w-full items-center rounded-md p-1 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground text-left transition-colors px-2 hover:cursor-pointer"
|
||||
>
|
||||
<SiGithub className="w-4 h-4 flex-shrink-0" />
|
||||
<span>Edit on GitHub</span>
|
||||
</button>
|
||||
|
||||
<button
|
||||
onClick={handleOpenChatGPT}
|
||||
className="inline-flex gap-3 w-full items-center rounded-md p-1 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground text-left transition-colors px-2 hover:cursor-pointer"
|
||||
>
|
||||
<SiOpenai className="w-4 h-4 flex-shrink-0" />
|
||||
<span>Open in ChatGPT</span>
|
||||
</button>
|
||||
|
||||
<button
|
||||
onClick={handleOpenClaude}
|
||||
className="inline-flex gap-3 w-full items-center rounded-md p-1 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground text-left transition-colors px-2 hover:cursor-pointer"
|
||||
>
|
||||
<SiAnthropic className="w-4 h-4 flex-shrink-0" />
|
||||
<span>Open in Claude</span>
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,15 +1,159 @@
|
||||
export function Footer() {
|
||||
return (
|
||||
<footer className="mt-auto border-t border-fd-border py-4">
|
||||
<div className="container mx-auto px-4 flex justify-end">
|
||||
<a
|
||||
href="https://www.cua.ai/cookie-policy"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Cookie Policy
|
||||
</a>
|
||||
<footer className="mt-auto border-t border-fd-border py-8">
|
||||
<div className="container mx-auto px-4">
|
||||
<div className="grid grid-cols-1 md:grid-cols-4 gap-8 mb-6">
|
||||
{/* Product Links */}
|
||||
<div>
|
||||
<h3 className="font-semibold text-sm mb-3 text-fd-foreground">Product</h3>
|
||||
<ul className="space-y-2">
|
||||
<li>
|
||||
<a
|
||||
href="https://cua.ai"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Home
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a
|
||||
href="https://cua.ai/pricing"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Pricing
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a
|
||||
href="https://cua.ai/#features"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Features
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{/* Documentation Links */}
|
||||
<div>
|
||||
<h3 className="font-semibold text-sm mb-3 text-fd-foreground">Documentation</h3>
|
||||
<ul className="space-y-2">
|
||||
<li>
|
||||
<a
|
||||
href="/docs"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Getting Started
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a
|
||||
href="/docs/agent-sdk/agent-loops"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Agent Loops
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a
|
||||
href="/docs/quickstart-devs"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Quick Start
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{/* Resources Links */}
|
||||
<div>
|
||||
<h3 className="font-semibold text-sm mb-3 text-fd-foreground">Resources</h3>
|
||||
<ul className="space-y-2">
|
||||
<li>
|
||||
<a
|
||||
href="https://cua.ai/blog"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Blog
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a
|
||||
href="https://github.com/trycua/cua"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
GitHub
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a
|
||||
href="https://discord.com/invite/mVnXXpdE85"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Discord Community
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{/* Company Links */}
|
||||
<div>
|
||||
<h3 className="font-semibold text-sm mb-3 text-fd-foreground">Company</h3>
|
||||
<ul className="space-y-2">
|
||||
<li>
|
||||
<a
|
||||
href="https://cua.ai/about"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
About
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a
|
||||
href="mailto:hello@trycua.com"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Contact
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a
|
||||
href="https://cua.ai/cookie-policy"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Cookie Policy
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Bottom Bar */}
|
||||
<div className="pt-6 border-t border-fd-border flex flex-col md:flex-row justify-between items-center gap-4">
|
||||
<p className="text-sm text-fd-muted-foreground">
|
||||
© {new Date().getFullYear()} Cua. All rights reserved.
|
||||
</p>
|
||||
<div className="flex gap-4">
|
||||
<a
|
||||
href="https://cua.ai/privacy"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Privacy Policy
|
||||
</a>
|
||||
<a
|
||||
href="https://cua.ai/terms"
|
||||
className="text-sm text-fd-muted-foreground hover:text-fd-foreground transition-colors"
|
||||
>
|
||||
Terms of Service
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
);
|
||||
|
||||
@@ -6,7 +6,7 @@ import 'dotenv/config';
|
||||
|
||||
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
||||
|
||||
const COMPUTER_USE_PROMPT = 'Open firefox and go to trycua.com';
|
||||
const COMPUTER_USE_PROMPT = 'Open firefox and go to cua.ai';
|
||||
|
||||
// Initialize the Computer Connection
|
||||
const computer = new Computer({
|
||||
|
||||
@@ -38,8 +38,8 @@ def load_env_or_fail() -> None:
|
||||
"""
|
||||
Build Agent Config
|
||||
- customize agent behavior, tool integration, callbacks, resource management, and more
|
||||
- https://docs.trycua.com/docs/agent-sdk/agent-loops#parameters
|
||||
- https://docs.trycua.com/docs/agent-sdk/supported-model-providers
|
||||
- https://cua.ai/docs/agent-sdk/agent-loops#parameters
|
||||
- https://cua.ai/docs/agent-sdk/supported-model-providers
|
||||
"""
|
||||
|
||||
|
||||
@@ -76,7 +76,7 @@ async def run_hud_eval() -> None:
|
||||
|
||||
"""
|
||||
Customize your hud eval below, check the doc for additional params
|
||||
- https://docs.trycua.com/docs/agent-sdk/integrations/hud#parameters-1
|
||||
- https://cua.ai/docs/agent-sdk/integrations/hud#parameters-1
|
||||
- recommend low max steps (5-10) for testing, then max 100 for benchmarking
|
||||
- also select specific tasks to run by using splitting the dataset
|
||||
"""
|
||||
|
||||
384
examples/tracing_examples.py
Normal file
384
examples/tracing_examples.py
Normal file
@@ -0,0 +1,384 @@
|
||||
"""
|
||||
Examples demonstrating the Computer.tracing API for recording sessions.
|
||||
|
||||
This module shows various use cases for the new Computer.tracing functionality,
|
||||
including training data collection, debugging, and compliance recording.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from agent import ComputerAgent
|
||||
from computer import Computer
|
||||
|
||||
|
||||
async def basic_tracing_example():
|
||||
"""
|
||||
Basic example showing how to use Computer.tracing for recording a simple session.
|
||||
"""
|
||||
print("=== Basic Tracing Example ===")
|
||||
|
||||
# Initialize computer
|
||||
computer = Computer(os_type="macos", provider_type="lume")
|
||||
await computer.run()
|
||||
|
||||
try:
|
||||
# Start tracing with basic configuration
|
||||
await computer.tracing.start(
|
||||
{"screenshots": True, "api_calls": True, "metadata": True, "name": "basic_session"}
|
||||
)
|
||||
|
||||
print("Tracing started...")
|
||||
|
||||
# Perform some computer operations
|
||||
await computer.interface.move_cursor(100, 100)
|
||||
await computer.interface.left_click()
|
||||
await computer.interface.type_text("Hello, tracing!")
|
||||
await computer.interface.press_key("enter")
|
||||
|
||||
# Add custom metadata
|
||||
await computer.tracing.add_metadata("session_type", "basic_demo")
|
||||
await computer.tracing.add_metadata("user_notes", "Testing basic functionality")
|
||||
|
||||
# Stop tracing and save
|
||||
trace_path = await computer.tracing.stop({"format": "zip"})
|
||||
print(f"Trace saved to: {trace_path}")
|
||||
|
||||
finally:
|
||||
await computer.stop()
|
||||
|
||||
|
||||
async def agent_tracing_example():
|
||||
"""
|
||||
Example showing how to use tracing with ComputerAgent for enhanced session recording.
|
||||
"""
|
||||
print("=== Agent with Tracing Example ===")
|
||||
|
||||
# Initialize computer and agent
|
||||
computer = Computer(os_type="macos", provider_type="lume")
|
||||
await computer.run()
|
||||
|
||||
try:
|
||||
# Start comprehensive tracing
|
||||
await computer.tracing.start(
|
||||
{
|
||||
"screenshots": True,
|
||||
"api_calls": True,
|
||||
"accessibility_tree": True, # Include accessibility data for training
|
||||
"metadata": True,
|
||||
"name": "agent_session",
|
||||
}
|
||||
)
|
||||
|
||||
# Create agent
|
||||
agent = ComputerAgent(
|
||||
model="openai/computer-use-preview", tools=[computer], verbosity=logging.INFO
|
||||
)
|
||||
|
||||
# Add metadata about the agent session
|
||||
await computer.tracing.add_metadata("agent_model", "openai/computer-use-preview")
|
||||
await computer.tracing.add_metadata("task_type", "web_search")
|
||||
|
||||
# Run agent task
|
||||
async for message in agent.run(
|
||||
"Open a web browser and search for 'computer use automation'"
|
||||
):
|
||||
print(f"Agent: {message}")
|
||||
|
||||
# Stop tracing
|
||||
trace_path = await computer.tracing.stop({"format": "zip"})
|
||||
print(f"Agent trace saved to: {trace_path}")
|
||||
|
||||
finally:
|
||||
await computer.stop()
|
||||
|
||||
|
||||
async def custom_agent_tracing_example():
|
||||
"""
|
||||
Example showing tracing with custom agent implementations.
|
||||
"""
|
||||
print("=== Custom Agent Tracing Example ===")
|
||||
|
||||
computer = Computer(os_type="macos", provider_type="lume")
|
||||
await computer.run()
|
||||
|
||||
try:
|
||||
# Start tracing with custom path
|
||||
trace_dir = Path.cwd() / "custom_traces" / "my_agent_session"
|
||||
await computer.tracing.start(
|
||||
{
|
||||
"screenshots": True,
|
||||
"api_calls": True,
|
||||
"accessibility_tree": False,
|
||||
"metadata": True,
|
||||
"path": str(trace_dir),
|
||||
}
|
||||
)
|
||||
|
||||
# Custom agent logic using direct computer calls
|
||||
await computer.tracing.add_metadata("session_type", "custom_agent")
|
||||
await computer.tracing.add_metadata("purpose", "RPA_workflow")
|
||||
|
||||
# Take initial screenshot
|
||||
screenshot = await computer.interface.screenshot()
|
||||
|
||||
# Simulate RPA workflow
|
||||
await computer.interface.move_cursor(500, 300)
|
||||
await computer.interface.left_click()
|
||||
await computer.interface.type_text("automation workflow test")
|
||||
|
||||
# Add workflow checkpoint
|
||||
await computer.tracing.add_metadata("checkpoint", "text_input_complete")
|
||||
|
||||
await computer.interface.hotkey("command", "a") # Select all
|
||||
await computer.interface.hotkey("command", "c") # Copy
|
||||
|
||||
# Stop tracing and save as directory
|
||||
trace_path = await computer.tracing.stop({"format": "dir"})
|
||||
print(f"Custom agent trace saved to: {trace_path}")
|
||||
|
||||
finally:
|
||||
await computer.stop()
|
||||
|
||||
|
||||
async def training_data_collection_example():
|
||||
"""
|
||||
Example for collecting training data with rich context.
|
||||
"""
|
||||
print("=== Training Data Collection Example ===")
|
||||
|
||||
computer = Computer(os_type="macos", provider_type="lume")
|
||||
await computer.run()
|
||||
|
||||
try:
|
||||
# Start tracing optimized for training data
|
||||
await computer.tracing.start(
|
||||
{
|
||||
"screenshots": True, # Essential for visual training
|
||||
"api_calls": True, # Capture action sequences
|
||||
"accessibility_tree": True, # Rich semantic context
|
||||
"metadata": True, # Custom annotations
|
||||
"name": "training_session",
|
||||
}
|
||||
)
|
||||
|
||||
# Add training metadata
|
||||
await computer.tracing.add_metadata("data_type", "training")
|
||||
await computer.tracing.add_metadata("task_category", "ui_automation")
|
||||
await computer.tracing.add_metadata("difficulty", "intermediate")
|
||||
await computer.tracing.add_metadata("annotator", "human_expert")
|
||||
|
||||
# Simulate human demonstration
|
||||
await computer.interface.screenshot() # Baseline screenshot
|
||||
|
||||
# Step 1: Navigate to application
|
||||
await computer.tracing.add_metadata("step", "1_navigate_to_app")
|
||||
await computer.interface.move_cursor(100, 50)
|
||||
await computer.interface.left_click()
|
||||
|
||||
# Step 2: Input data
|
||||
await computer.tracing.add_metadata("step", "2_input_data")
|
||||
await computer.interface.type_text("training example data")
|
||||
|
||||
# Step 3: Process
|
||||
await computer.tracing.add_metadata("step", "3_process")
|
||||
await computer.interface.press_key("tab")
|
||||
await computer.interface.press_key("enter")
|
||||
|
||||
# Final metadata
|
||||
await computer.tracing.add_metadata("success", True)
|
||||
await computer.tracing.add_metadata("completion_time", "45_seconds")
|
||||
|
||||
trace_path = await computer.tracing.stop()
|
||||
print(f"Training data collected: {trace_path}")
|
||||
|
||||
finally:
|
||||
await computer.stop()
|
||||
|
||||
|
||||
async def debugging_session_example():
|
||||
"""
|
||||
Example for debugging agent behavior with detailed tracing.
|
||||
"""
|
||||
print("=== Debugging Session Example ===")
|
||||
|
||||
computer = Computer(os_type="macos", provider_type="lume")
|
||||
await computer.run()
|
||||
|
||||
try:
|
||||
# Start tracing for debugging
|
||||
await computer.tracing.start(
|
||||
{
|
||||
"screenshots": True,
|
||||
"api_calls": True,
|
||||
"accessibility_tree": True,
|
||||
"metadata": True,
|
||||
"name": "debug_session",
|
||||
}
|
||||
)
|
||||
|
||||
# Debug metadata
|
||||
await computer.tracing.add_metadata("session_type", "debugging")
|
||||
await computer.tracing.add_metadata("issue", "click_target_detection")
|
||||
await computer.tracing.add_metadata("expected_behavior", "click_on_button")
|
||||
|
||||
try:
|
||||
# Problematic sequence that needs debugging
|
||||
await computer.interface.move_cursor(200, 150)
|
||||
await computer.interface.left_click()
|
||||
|
||||
# This might fail - let's trace it
|
||||
await computer.interface.type_text("debug test")
|
||||
await computer.tracing.add_metadata("action_result", "successful_typing")
|
||||
|
||||
except Exception as e:
|
||||
# Record the error in tracing
|
||||
await computer.tracing.add_metadata("error_encountered", str(e))
|
||||
await computer.tracing.add_metadata("error_type", type(e).__name__)
|
||||
print(f"Error occurred: {e}")
|
||||
|
||||
# Stop tracing
|
||||
trace_path = await computer.tracing.stop()
|
||||
print(f"Debug trace saved: {trace_path}")
|
||||
print("Use this trace to analyze the failure and improve the agent")
|
||||
|
||||
finally:
|
||||
await computer.stop()
|
||||
|
||||
|
||||
async def human_in_the_loop_example():
|
||||
"""
|
||||
Example for recording mixed human/agent sessions.
|
||||
"""
|
||||
print("=== Human-in-the-Loop Example ===")
|
||||
|
||||
computer = Computer(os_type="macos", provider_type="lume")
|
||||
await computer.run()
|
||||
|
||||
try:
|
||||
# Start tracing for hybrid session
|
||||
await computer.tracing.start(
|
||||
{
|
||||
"screenshots": True,
|
||||
"api_calls": True,
|
||||
"metadata": True,
|
||||
"name": "human_agent_collaboration",
|
||||
}
|
||||
)
|
||||
|
||||
# Initial agent phase
|
||||
await computer.tracing.add_metadata("phase", "agent_autonomous")
|
||||
await computer.tracing.add_metadata("agent_model", "computer-use-preview")
|
||||
|
||||
# Agent performs initial task
|
||||
await computer.interface.move_cursor(300, 200)
|
||||
await computer.interface.left_click()
|
||||
await computer.interface.type_text("automated input")
|
||||
|
||||
# Transition to human intervention
|
||||
await computer.tracing.add_metadata("phase", "human_intervention")
|
||||
await computer.tracing.add_metadata("intervention_reason", "complex_ui_element")
|
||||
|
||||
print("Human intervention phase - manual actions will be recorded...")
|
||||
# At this point, human can take control while tracing continues
|
||||
|
||||
# Simulate human input (in practice, this would be actual human interaction)
|
||||
await computer.interface.move_cursor(500, 400)
|
||||
await computer.interface.double_click()
|
||||
await computer.tracing.add_metadata("human_action", "double_click_complex_element")
|
||||
|
||||
# Back to agent
|
||||
await computer.tracing.add_metadata("phase", "agent_completion")
|
||||
await computer.interface.press_key("enter")
|
||||
|
||||
trace_path = await computer.tracing.stop()
|
||||
print(f"Human-agent collaboration trace saved: {trace_path}")
|
||||
|
||||
finally:
|
||||
await computer.stop()
|
||||
|
||||
|
||||
async def performance_monitoring_example():
|
||||
"""
|
||||
Example for performance monitoring and analysis.
|
||||
"""
|
||||
print("=== Performance Monitoring Example ===")
|
||||
|
||||
computer = Computer(os_type="macos", provider_type="lume")
|
||||
await computer.run()
|
||||
|
||||
try:
|
||||
# Start tracing for performance analysis
|
||||
await computer.tracing.start(
|
||||
{
|
||||
"screenshots": False, # Skip screenshots for performance
|
||||
"api_calls": True,
|
||||
"metadata": True,
|
||||
"name": "performance_test",
|
||||
}
|
||||
)
|
||||
|
||||
# Performance test metadata
|
||||
await computer.tracing.add_metadata("test_type", "performance_benchmark")
|
||||
await computer.tracing.add_metadata("expected_duration", "< 30 seconds")
|
||||
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Perform a series of rapid actions
|
||||
for i in range(10):
|
||||
await computer.tracing.add_metadata("iteration", i)
|
||||
await computer.interface.move_cursor(100 + i * 50, 100)
|
||||
await computer.interface.left_click()
|
||||
await computer.interface.type_text(f"Test {i}")
|
||||
await computer.interface.press_key("tab")
|
||||
|
||||
end_time = time.time()
|
||||
|
||||
# Record performance metrics
|
||||
await computer.tracing.add_metadata(
|
||||
"actual_duration", f"{end_time - start_time:.2f} seconds"
|
||||
)
|
||||
await computer.tracing.add_metadata(
|
||||
"actions_per_second", f"{40 / (end_time - start_time):.2f}"
|
||||
)
|
||||
|
||||
trace_path = await computer.tracing.stop()
|
||||
print(f"Performance trace saved: {trace_path}")
|
||||
|
||||
finally:
|
||||
await computer.stop()
|
||||
|
||||
|
||||
async def main():
|
||||
"""
|
||||
Run all tracing examples.
|
||||
"""
|
||||
print("Computer.tracing API Examples")
|
||||
print("=" * 50)
|
||||
|
||||
examples = [
|
||||
basic_tracing_example,
|
||||
agent_tracing_example,
|
||||
custom_agent_tracing_example,
|
||||
training_data_collection_example,
|
||||
debugging_session_example,
|
||||
human_in_the_loop_example,
|
||||
performance_monitoring_example,
|
||||
]
|
||||
|
||||
for example in examples:
|
||||
try:
|
||||
await example()
|
||||
print()
|
||||
except Exception as e:
|
||||
print(f"Error in {example.__name__}: {e}")
|
||||
print()
|
||||
|
||||
print("All examples completed!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -55,11 +55,11 @@ To get set up with Lume for development, read [these instructions](Development.m
|
||||
|
||||
## Docs
|
||||
|
||||
- [Installation](https://trycua.com/docs/libraries/lume/installation)
|
||||
- [Prebuilt Images](https://trycua.com/docs/libraries/lume/prebuilt-images)
|
||||
- [CLI Reference](https://trycua.com/docs/libraries/lume/cli-reference)
|
||||
- [HTTP API](https://trycua.com/docs/libraries/lume/http-api)
|
||||
- [FAQ](https://trycua.com/docs/libraries/lume/faq)
|
||||
- [Installation](https://cua.ai/docs/libraries/lume/installation)
|
||||
- [Prebuilt Images](https://cua.ai/docs/libraries/lume/prebuilt-images)
|
||||
- [CLI Reference](https://cua.ai/docs/libraries/lume/cli-reference)
|
||||
- [HTTP API](https://cuai.ai/docs/libraries/lume/http-api)
|
||||
- [FAQ](https://cua.ai/docs/libraries/lume/faq)
|
||||
|
||||
## Contributing
|
||||
|
||||
|
||||
@@ -58,14 +58,14 @@ docker run -it --rm \
|
||||
|
||||
After running the command above, you can access your macOS VM through a web browser (e.g., http://localhost:8006).
|
||||
|
||||
> **Note:** With the basic setup above, your VM will be reset when you stop the container (ephemeral mode). This means any changes you make inside the macOS VM will be lost. See [the documentation](https://trycua.com/docs/libraries/lumier/docker) for how to save your VM state.
|
||||
> **Note:** With the basic setup above, your VM will be reset when you stop the container (ephemeral mode). This means any changes you make inside the macOS VM will be lost. See [the documentation](https://cua.ai/docs/libraries/lumier/docker) for how to save your VM state.
|
||||
|
||||
## Docs
|
||||
|
||||
- [Installation](https://trycua.com/docs/libraries/lumier/installation)
|
||||
- [Docker](https://trycua.com/docs/libraries/lumier/docker)
|
||||
- [Docker Compose](https://trycua.com/docs/libraries/lumier/docker-compose)
|
||||
- [Building Lumier](https://trycua.com/docs/libraries/lumier/building-lumier)
|
||||
- [Installation](https://cua.ai/docs/libraries/lumier/installation)
|
||||
- [Docker](https://cua.ai/docs/libraries/lumier/docker)
|
||||
- [Docker Compose](https://cua.ai/docs/libraries/lumier/docker-compose)
|
||||
- [Building Lumier](https://cua.ai/docs/libraries/lumier/building-lumier)
|
||||
|
||||
## Credits
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.4.35
|
||||
current_version = 0.4.37
|
||||
commit = True
|
||||
tag = True
|
||||
tag_name = agent-v{new_version}
|
||||
|
||||
@@ -72,16 +72,16 @@ if __name__ == "__main__":
|
||||
|
||||
## Docs
|
||||
|
||||
- [Agent Loops](https://trycua.com/docs/agent-sdk/agent-loops)
|
||||
- [Supported Agents](https://trycua.com/docs/agent-sdk/supported-agents)
|
||||
- [Supported Models](https://trycua.com/docs/agent-sdk/supported-models)
|
||||
- [Chat History](https://trycua.com/docs/agent-sdk/chat-history)
|
||||
- [Callbacks](https://trycua.com/docs/agent-sdk/callbacks)
|
||||
- [Custom Tools](https://trycua.com/docs/agent-sdk/custom-tools)
|
||||
- [Custom Computer Handlers](https://trycua.com/docs/agent-sdk/custom-computer-handlers)
|
||||
- [Prompt Caching](https://trycua.com/docs/agent-sdk/prompt-caching)
|
||||
- [Usage Tracking](https://trycua.com/docs/agent-sdk/usage-tracking)
|
||||
- [Benchmarks](https://trycua.com/docs/agent-sdk/benchmarks)
|
||||
- [Agent Loops](https://cua.ai/docs/agent-sdk/agent-loops)
|
||||
- [Supported Agents](https://cua.ai/docs/agent-sdk/supported-agents/computer-use-agents)
|
||||
- [Supported Models](https://cua.ai/docs/agent-sdk/supported-model-providers)
|
||||
- [Chat History](https://cua.ai/docs/agent-sdk/chat-history)
|
||||
- [Callbacks](https://cua.ai/docs/agent-sdk/callbacks)
|
||||
- [Custom Tools](https://cua.ai/docs/agent-sdk/custom-tools)
|
||||
- [Custom Computer Handlers](https://cua.ai/docs/agent-sdk/custom-computer-handlers)
|
||||
- [Prompt Caching](https://cua.ai/docs/agent-sdk/prompt-caching)
|
||||
- [Usage Tracking](https://cua.ai/docs/agent-sdk/usage-tracking)
|
||||
- [Benchmarks](https://cua.ai/docs/agent-sdk/benchmarks)
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@@ -185,7 +185,9 @@ class ComputerAgent:
|
||||
max_trajectory_budget: Optional[float | dict] = None,
|
||||
telemetry_enabled: Optional[bool] = True,
|
||||
trust_remote_code: Optional[bool] = False,
|
||||
**kwargs,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
**additional_generation_kwargs,
|
||||
):
|
||||
"""
|
||||
Initialize ComputerAgent.
|
||||
@@ -205,7 +207,9 @@ class ComputerAgent:
|
||||
max_trajectory_budget: If set, adds BudgetManagerCallback to track usage costs and stop when budget is exceeded
|
||||
telemetry_enabled: If set, adds TelemetryCallback to track anonymized usage data. Enabled by default.
|
||||
trust_remote_code: If set, trust remote code when loading local models. Disabled by default.
|
||||
**kwargs: Additional arguments passed to the agent loop
|
||||
api_key: Optional API key override for the model provider
|
||||
api_base: Optional API base URL override for the model provider
|
||||
**additional_generation_kwargs: Additional arguments passed to the model provider
|
||||
"""
|
||||
# If the loop is "human/human", we need to prefix a grounding model fallback
|
||||
if model in ["human/human", "human"]:
|
||||
@@ -223,8 +227,10 @@ class ComputerAgent:
|
||||
self.screenshot_delay = screenshot_delay
|
||||
self.use_prompt_caching = use_prompt_caching
|
||||
self.telemetry_enabled = telemetry_enabled
|
||||
self.kwargs = kwargs
|
||||
self.kwargs = additional_generation_kwargs
|
||||
self.trust_remote_code = trust_remote_code
|
||||
self.api_key = api_key
|
||||
self.api_base = api_base
|
||||
|
||||
# == Add built-in callbacks ==
|
||||
|
||||
@@ -593,7 +599,12 @@ class ComputerAgent:
|
||||
# ============================================================================
|
||||
|
||||
async def run(
|
||||
self, messages: Messages, stream: bool = False, **kwargs
|
||||
self,
|
||||
messages: Messages,
|
||||
stream: bool = False,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
**additional_generation_kwargs,
|
||||
) -> AsyncGenerator[Dict[str, Any], None]:
|
||||
"""
|
||||
Run the agent with the given messages using Computer protocol handler pattern.
|
||||
@@ -601,7 +612,9 @@ class ComputerAgent:
|
||||
Args:
|
||||
messages: List of message dictionaries
|
||||
stream: Whether to stream the response
|
||||
**kwargs: Additional arguments
|
||||
api_key: Optional API key override for the model provider
|
||||
api_base: Optional API base URL override for the model provider
|
||||
**additional_generation_kwargs: Additional arguments passed to the model provider
|
||||
|
||||
Returns:
|
||||
AsyncGenerator that yields response chunks
|
||||
@@ -617,8 +630,12 @@ class ComputerAgent:
|
||||
|
||||
await self._initialize_computers()
|
||||
|
||||
# Merge kwargs
|
||||
merged_kwargs = {**self.kwargs, **kwargs}
|
||||
# Merge kwargs and thread api credentials (run overrides constructor)
|
||||
merged_kwargs = {**self.kwargs, **additional_generation_kwargs}
|
||||
if (api_key is not None) or (self.api_key is not None):
|
||||
merged_kwargs["api_key"] = api_key if api_key is not None else self.api_key
|
||||
if (api_base is not None) or (self.api_base is not None):
|
||||
merged_kwargs["api_base"] = api_base if api_base is not None else self.api_base
|
||||
|
||||
old_items = self._process_input(messages)
|
||||
new_items = []
|
||||
@@ -728,8 +745,14 @@ class ComputerAgent:
|
||||
if not self.computer_handler:
|
||||
raise ValueError("Computer tool or image_b64 is required for predict_click")
|
||||
image_b64 = await self.computer_handler.screenshot()
|
||||
# Pass along api credentials if available
|
||||
click_kwargs: Dict[str, Any] = {}
|
||||
if self.api_key is not None:
|
||||
click_kwargs["api_key"] = self.api_key
|
||||
if self.api_base is not None:
|
||||
click_kwargs["api_base"] = self.api_base
|
||||
return await self.agent_loop.predict_click(
|
||||
model=self.model, image_b64=image_b64, instruction=instruction
|
||||
model=self.model, image_b64=image_b64, instruction=instruction, **click_kwargs
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@@ -297,6 +297,20 @@ Examples:
|
||||
help="Maximum number of retries for the LLM API calls",
|
||||
)
|
||||
|
||||
# Provider override credentials
|
||||
parser.add_argument(
|
||||
"--api-key",
|
||||
dest="api_key",
|
||||
type=str,
|
||||
help="API key override for the model provider (passed to ComputerAgent)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-base",
|
||||
dest="api_base",
|
||||
type=str,
|
||||
help="API base URL override for the model provider (passed to ComputerAgent)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check for required environment variables
|
||||
@@ -307,7 +321,7 @@ Examples:
|
||||
if not container_name:
|
||||
if args.provider == "cloud":
|
||||
print_colored("CUA_CONTAINER_NAME not set.", dim=True)
|
||||
print_colored("You can get a CUA container at https://www.trycua.com/", dim=True)
|
||||
print_colored("You can get a CUA container at https://cua.ai/", dim=True)
|
||||
container_name = input("Enter your CUA container name: ").strip()
|
||||
if not container_name:
|
||||
print_colored("❌ Container name is required.")
|
||||
@@ -380,6 +394,12 @@ Examples:
|
||||
"max_retries": args.max_retries,
|
||||
}
|
||||
|
||||
# Thread API credentials to agent if provided
|
||||
if args.api_key:
|
||||
agent_kwargs["api_key"] = args.api_key
|
||||
if args.api_base:
|
||||
agent_kwargs["api_base"] = args.api_base
|
||||
|
||||
if args.images > 0:
|
||||
agent_kwargs["only_n_most_recent_images"] = args.images
|
||||
|
||||
|
||||
@@ -28,8 +28,12 @@ class AsyncComputerHandler(Protocol):
|
||||
"""Get screen dimensions as (width, height)."""
|
||||
...
|
||||
|
||||
async def screenshot(self) -> str:
|
||||
"""Take a screenshot and return as base64 string."""
|
||||
async def screenshot(self, text: Optional[str] = None) -> str:
|
||||
"""Take a screenshot and return as base64 string.
|
||||
|
||||
Args:
|
||||
text: Optional descriptive text (for compatibility with GPT-4o models, ignored)
|
||||
"""
|
||||
...
|
||||
|
||||
async def click(self, x: int, y: int, button: str = "left") -> None:
|
||||
|
||||
@@ -36,8 +36,12 @@ class cuaComputerHandler(AsyncComputerHandler):
|
||||
screen_size = await self.interface.get_screen_size()
|
||||
return screen_size["width"], screen_size["height"]
|
||||
|
||||
async def screenshot(self) -> str:
|
||||
"""Take a screenshot and return as base64 string."""
|
||||
async def screenshot(self, text: Optional[str] = None) -> str:
|
||||
"""Take a screenshot and return as base64 string.
|
||||
|
||||
Args:
|
||||
text: Optional descriptive text (for compatibility with GPT-4o models, ignored)
|
||||
"""
|
||||
assert self.interface is not None
|
||||
screenshot_bytes = await self.interface.screenshot()
|
||||
return base64.b64encode(screenshot_bytes).decode("utf-8")
|
||||
|
||||
@@ -122,8 +122,12 @@ class CustomComputerHandler(AsyncComputerHandler):
|
||||
|
||||
return self._last_screenshot_size
|
||||
|
||||
async def screenshot(self) -> str:
|
||||
"""Take a screenshot and return as base64 string."""
|
||||
async def screenshot(self, text: Optional[str] = None) -> str:
|
||||
"""Take a screenshot and return as base64 string.
|
||||
|
||||
Args:
|
||||
text: Optional descriptive text (for compatibility with GPT-4o models, ignored)
|
||||
"""
|
||||
result = await self._call_function(self.functions["screenshot"])
|
||||
b64_str = self._to_b64_str(result) # type: ignore
|
||||
|
||||
|
||||
@@ -1615,6 +1615,11 @@ Task: Click {instruction}. Output ONLY a click action on the target element.""",
|
||||
"max_tokens": 100, # Keep response short for click prediction
|
||||
"headers": {"anthropic-beta": tool_config["beta_flag"]},
|
||||
}
|
||||
# Thread optional API params
|
||||
if "api_key" in kwargs and kwargs.get("api_key") is not None:
|
||||
api_kwargs["api_key"] = kwargs.get("api_key")
|
||||
if "api_base" in kwargs and kwargs.get("api_base") is not None:
|
||||
api_kwargs["api_base"] = kwargs.get("api_base")
|
||||
|
||||
# Use liteLLM acompletion
|
||||
response = await litellm.acompletion(**api_kwargs)
|
||||
|
||||
@@ -24,7 +24,7 @@ class AsyncAgentConfig(Protocol):
|
||||
_on_api_end=None,
|
||||
_on_usage=None,
|
||||
_on_screenshot=None,
|
||||
**kwargs,
|
||||
**generation_config,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Predict the next step based on input items.
|
||||
@@ -40,7 +40,9 @@ class AsyncAgentConfig(Protocol):
|
||||
_on_api_end: Callback for API end
|
||||
_on_usage: Callback for usage tracking
|
||||
_on_screenshot: Callback for screenshot events
|
||||
**kwargs: Additional arguments
|
||||
**generation_config: Additional arguments to pass to the model provider
|
||||
- api_key: Optional API key for the provider
|
||||
- api_base: Optional API base URL for the provider
|
||||
|
||||
Returns:
|
||||
Dictionary with "output" (output items) and "usage" array
|
||||
@@ -49,7 +51,7 @@ class AsyncAgentConfig(Protocol):
|
||||
|
||||
@abstractmethod
|
||||
async def predict_click(
|
||||
self, model: str, image_b64: str, instruction: str
|
||||
self, model: str, image_b64: str, instruction: str, **generation_config
|
||||
) -> Optional[Tuple[int, int]]:
|
||||
"""
|
||||
Predict click coordinates based on image and instruction.
|
||||
@@ -58,6 +60,9 @@ class AsyncAgentConfig(Protocol):
|
||||
model: Model name to use
|
||||
image_b64: Base64 encoded image
|
||||
instruction: Instruction for where to click
|
||||
**generation_config: Additional arguments to pass to the model provider
|
||||
- api_key: Optional API key for the provider
|
||||
- api_base: Optional API base URL for the provider
|
||||
|
||||
Returns:
|
||||
None or tuple with (x, y) coordinates
|
||||
|
||||
@@ -762,6 +762,7 @@ class Glm4vConfig(AsyncAgentConfig):
|
||||
# "skip_special_tokens": False,
|
||||
# }
|
||||
}
|
||||
api_kwargs.update({k: v for k, v in (kwargs or {}).items()})
|
||||
|
||||
# Add API callbacks
|
||||
if _on_api_start:
|
||||
@@ -852,6 +853,7 @@ Where x,y are coordinates normalized to 0-999 range."""
|
||||
"skip_special_tokens": False,
|
||||
},
|
||||
}
|
||||
api_kwargs.update({k: v for k, v in (kwargs or {}).items()})
|
||||
|
||||
# Call liteLLM
|
||||
response = await litellm.acompletion(**api_kwargs)
|
||||
|
||||
@@ -14,67 +14,73 @@ import litellm
|
||||
|
||||
from ..decorators import register_agent
|
||||
from ..loops.base import AsyncAgentConfig
|
||||
from ..responses import (
|
||||
convert_completion_messages_to_responses_items,
|
||||
convert_responses_items_to_completion_messages,
|
||||
)
|
||||
from ..types import AgentCapability, AgentResponse, Messages, Tools
|
||||
|
||||
SOM_TOOL_SCHEMA = {
|
||||
"type": "function",
|
||||
"name": "computer",
|
||||
"description": "Control a computer by taking screenshots and interacting with UI elements. This tool shows screenshots with numbered elements overlaid on them. Each UI element has been assigned a unique ID number that you can see in the image. Use the element's ID number to interact with any element instead of pixel coordinates.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"screenshot",
|
||||
"click",
|
||||
"double_click",
|
||||
"drag",
|
||||
"type",
|
||||
"keypress",
|
||||
"scroll",
|
||||
"move",
|
||||
"wait",
|
||||
"get_current_url",
|
||||
"get_dimensions",
|
||||
"get_environment",
|
||||
],
|
||||
"description": "The action to perform",
|
||||
},
|
||||
"element_id": {
|
||||
"type": "integer",
|
||||
"description": "The ID of the element to interact with (required for click, double_click, move, scroll actions, and as start/end for drag)",
|
||||
},
|
||||
"start_element_id": {
|
||||
"type": "integer",
|
||||
"description": "The ID of the element to start dragging from (required for drag action)",
|
||||
},
|
||||
"end_element_id": {
|
||||
"type": "integer",
|
||||
"description": "The ID of the element to drag to (required for drag action)",
|
||||
},
|
||||
"text": {
|
||||
"type": "string",
|
||||
"description": "The text to type (required for type action)",
|
||||
},
|
||||
"keys": {
|
||||
"type": "string",
|
||||
"description": "Key combination to press (required for keypress action). Single key for individual key press, multiple keys for combinations (e.g., 'ctrl+c')",
|
||||
},
|
||||
"button": {
|
||||
"type": "string",
|
||||
"description": "The mouse button to use for click action (left, right, wheel, back, forward) Default: left",
|
||||
},
|
||||
"scroll_x": {
|
||||
"type": "integer",
|
||||
"description": "Horizontal scroll amount for scroll action (positive for right, negative for left)",
|
||||
},
|
||||
"scroll_y": {
|
||||
"type": "integer",
|
||||
"description": "Vertical scroll amount for scroll action (positive for down, negative for up)",
|
||||
"function": {
|
||||
"name": "computer",
|
||||
"description": "Control a computer by taking screenshots and interacting with UI elements. This tool shows screenshots with numbered elements overlaid on them. Each UI element has been assigned a unique ID number that you can see in the image. Use the element's ID number to interact with any element instead of pixel coordinates.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"screenshot",
|
||||
"click",
|
||||
"double_click",
|
||||
"drag",
|
||||
"type",
|
||||
"keypress",
|
||||
"scroll",
|
||||
"move",
|
||||
"wait",
|
||||
"get_current_url",
|
||||
"get_dimensions",
|
||||
"get_environment",
|
||||
],
|
||||
"description": "The action to perform",
|
||||
},
|
||||
"element_id": {
|
||||
"type": "integer",
|
||||
"description": "The ID of the element to interact with (required for click, double_click, move, scroll actions, and as start/end for drag)",
|
||||
},
|
||||
"start_element_id": {
|
||||
"type": "integer",
|
||||
"description": "The ID of the element to start dragging from (required for drag action)",
|
||||
},
|
||||
"end_element_id": {
|
||||
"type": "integer",
|
||||
"description": "The ID of the element to drag to (required for drag action)",
|
||||
},
|
||||
"text": {
|
||||
"type": "string",
|
||||
"description": "The text to type (required for type action)",
|
||||
},
|
||||
"keys": {
|
||||
"type": "string",
|
||||
"description": "Key combination to press (required for keypress action). Single key for individual key press, multiple keys for combinations (e.g., 'ctrl+c')",
|
||||
},
|
||||
"button": {
|
||||
"type": "string",
|
||||
"description": "The mouse button to use for click action (left, right, wheel, back, forward) Default: left",
|
||||
},
|
||||
"scroll_x": {
|
||||
"type": "integer",
|
||||
"description": "Horizontal scroll amount for scroll action (positive for right, negative for left)",
|
||||
},
|
||||
"scroll_y": {
|
||||
"type": "integer",
|
||||
"description": "Vertical scroll amount for scroll action (positive for down, negative for up)",
|
||||
},
|
||||
},
|
||||
"required": ["action", "element_id"],
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
@@ -243,18 +249,20 @@ async def replace_computer_call_with_function(
|
||||
"id": item.get("id"),
|
||||
"call_id": item.get("call_id"),
|
||||
"status": "completed",
|
||||
# Fall back to string representation
|
||||
"content": f"Used tool: {action_data.get("type")}({json.dumps(fn_args)})",
|
||||
}
|
||||
]
|
||||
|
||||
elif item_type == "computer_call_output":
|
||||
# Simple conversion: computer_call_output -> function_call_output
|
||||
output = item.get("output")
|
||||
|
||||
if isinstance(output, dict):
|
||||
output = [output]
|
||||
|
||||
return [
|
||||
{
|
||||
"type": "function_call_output",
|
||||
"call_id": item.get("call_id"),
|
||||
"content": [item.get("output")],
|
||||
"output": item.get("output"),
|
||||
"id": item.get("id"),
|
||||
"status": "completed",
|
||||
}
|
||||
@@ -296,6 +304,13 @@ class OmniparserConfig(AsyncAgentConfig):
|
||||
|
||||
llm_model = model.split("+")[-1]
|
||||
|
||||
# Get screen dimensions from computer handler
|
||||
try:
|
||||
width, height = await computer_handler.get_dimensions()
|
||||
except Exception:
|
||||
# Fallback to default dimensions if method fails
|
||||
width, height = 1024, 768
|
||||
|
||||
# Prepare tools for OpenAI API
|
||||
openai_tools, id2xy = _prepare_tools_for_omniparser(tools)
|
||||
|
||||
@@ -309,27 +324,43 @@ class OmniparserConfig(AsyncAgentConfig):
|
||||
result = parser.parse(image_data)
|
||||
if _on_screenshot:
|
||||
await _on_screenshot(result.annotated_image_base64, "annotated_image")
|
||||
for element in result.elements:
|
||||
id2xy[element.id] = (
|
||||
(element.bbox.x1 + element.bbox.x2) / 2,
|
||||
(element.bbox.y1 + element.bbox.y2) / 2,
|
||||
)
|
||||
|
||||
# handle computer calls -> function calls
|
||||
new_messages = []
|
||||
for message in messages:
|
||||
# Convert OmniParser normalized coordinates (0-1) to absolute pixels, convert to pixels
|
||||
for element in result.elements:
|
||||
norm_x = (element.bbox.x1 + element.bbox.x2) / 2
|
||||
norm_y = (element.bbox.y1 + element.bbox.y2) / 2
|
||||
pixel_x = int(norm_x * width)
|
||||
pixel_y = int(norm_y * height)
|
||||
id2xy[element.id] = (pixel_x, pixel_y)
|
||||
|
||||
# Replace the original screenshot with the annotated image
|
||||
annotated_image_url = f"data:image/png;base64,{result.annotated_image_base64}"
|
||||
last_computer_call_output["output"]["image_url"] = annotated_image_url
|
||||
|
||||
xy2id = {v: k for k, v in id2xy.items()}
|
||||
messages_with_element_ids = []
|
||||
for i, message in enumerate(messages):
|
||||
if not isinstance(message, dict):
|
||||
message = message.__dict__
|
||||
new_messages += await replace_computer_call_with_function(message, id2xy) # type: ignore
|
||||
messages = new_messages
|
||||
|
||||
msg_type = message.get("type")
|
||||
|
||||
if msg_type == "computer_call" and "action" in message:
|
||||
action = message.get("action", {})
|
||||
|
||||
converted = await replace_computer_call_with_function(message, xy2id) # type: ignore
|
||||
messages_with_element_ids += converted
|
||||
|
||||
completion_messages = convert_responses_items_to_completion_messages(
|
||||
messages_with_element_ids, allow_images_in_tool_results=False
|
||||
)
|
||||
|
||||
# Prepare API call kwargs
|
||||
api_kwargs = {
|
||||
"model": llm_model,
|
||||
"input": messages,
|
||||
"messages": completion_messages,
|
||||
"tools": openai_tools if openai_tools else None,
|
||||
"stream": stream,
|
||||
"truncation": "auto",
|
||||
"num_retries": max_retries,
|
||||
**kwargs,
|
||||
}
|
||||
@@ -340,8 +371,8 @@ class OmniparserConfig(AsyncAgentConfig):
|
||||
|
||||
print(str(api_kwargs)[:1000])
|
||||
|
||||
# Use liteLLM responses
|
||||
response = await litellm.aresponses(**api_kwargs)
|
||||
# Use liteLLM completion
|
||||
response = await litellm.acompletion(**api_kwargs)
|
||||
|
||||
# Call API end hook
|
||||
if _on_api_end:
|
||||
@@ -355,12 +386,45 @@ class OmniparserConfig(AsyncAgentConfig):
|
||||
if _on_usage:
|
||||
await _on_usage(usage)
|
||||
|
||||
# handle som function calls -> xy computer calls
|
||||
new_output = []
|
||||
for i in range(len(response.output)): # type: ignore
|
||||
new_output += await replace_function_with_computer_call(response.output[i].model_dump(), id2xy) # type: ignore
|
||||
response_dict = response.model_dump() # type: ignore
|
||||
choice_messages = [choice["message"] for choice in response_dict["choices"]]
|
||||
responses_items = []
|
||||
for choice_message in choice_messages:
|
||||
responses_items.extend(convert_completion_messages_to_responses_items([choice_message]))
|
||||
|
||||
return {"output": new_output, "usage": usage}
|
||||
# Convert element_id → x,y (similar to moondream's convert_computer_calls_desc2xy)
|
||||
final_output = []
|
||||
for item in responses_items:
|
||||
if item.get("type") == "computer_call" and "action" in item:
|
||||
action = item["action"].copy()
|
||||
|
||||
# Handle single element_id
|
||||
if "element_id" in action:
|
||||
element_id = action["element_id"]
|
||||
if element_id in id2xy:
|
||||
x, y = id2xy[element_id]
|
||||
action["x"] = x
|
||||
action["y"] = y
|
||||
del action["element_id"]
|
||||
|
||||
# Handle start_element_id and end_element_id for drag operations
|
||||
elif "start_element_id" in action and "end_element_id" in action:
|
||||
start_id = action["start_element_id"]
|
||||
end_id = action["end_element_id"]
|
||||
if start_id in id2xy and end_id in id2xy:
|
||||
start_x, start_y = id2xy[start_id]
|
||||
end_x, end_y = id2xy[end_id]
|
||||
action["path"] = [{"x": start_x, "y": start_y}, {"x": end_x, "y": end_y}]
|
||||
del action["start_element_id"]
|
||||
del action["end_element_id"]
|
||||
|
||||
converted_item = item.copy()
|
||||
converted_item["action"] = action
|
||||
final_output.append(converted_item)
|
||||
else:
|
||||
final_output.append(item)
|
||||
|
||||
return {"output": final_output, "usage": usage}
|
||||
|
||||
async def predict_click(
|
||||
self, model: str, image_b64: str, instruction: str, **kwargs
|
||||
|
||||
@@ -140,7 +140,7 @@ class OpenAIComputerUseConfig:
|
||||
return output_dict
|
||||
|
||||
async def predict_click(
|
||||
self, model: str, image_b64: str, instruction: str
|
||||
self, model: str, image_b64: str, instruction: str, **kwargs
|
||||
) -> Optional[Tuple[int, int]]:
|
||||
"""
|
||||
Predict click coordinates based on image and instruction.
|
||||
@@ -208,6 +208,7 @@ Task: Click {instruction}. Output ONLY a click action on the target element.""",
|
||||
"reasoning": {"summary": "concise"},
|
||||
"truncation": "auto",
|
||||
"max_tokens": 200, # Keep response short for click prediction
|
||||
**kwargs,
|
||||
}
|
||||
|
||||
# Use liteLLM responses
|
||||
|
||||
@@ -773,7 +773,7 @@ class UITARSConfig:
|
||||
return agent_response
|
||||
|
||||
async def predict_click(
|
||||
self, model: str, image_b64: str, instruction: str
|
||||
self, model: str, image_b64: str, instruction: str, **kwargs
|
||||
) -> Optional[Tuple[int, int]]:
|
||||
"""
|
||||
Predict click coordinates based on image and instruction.
|
||||
@@ -819,6 +819,7 @@ class UITARSConfig:
|
||||
"temperature": 0.0,
|
||||
"do_sample": False,
|
||||
}
|
||||
api_kwargs.update({k: v for k, v in (kwargs or {}).items()})
|
||||
|
||||
# Call liteLLM with UITARS model
|
||||
response = await litellm.acompletion(**api_kwargs)
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "pdm.backend"
|
||||
|
||||
[project]
|
||||
name = "cua-agent"
|
||||
version = "0.4.35"
|
||||
version = "0.4.37"
|
||||
description = "CUA (Computer Use) Agent for AI-driven computer interaction"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
|
||||
84
libs/python/agent/tests/conftest.py
Normal file
84
libs/python/agent/tests/conftest.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""Pytest configuration and shared fixtures for agent package tests.
|
||||
|
||||
This file contains shared fixtures and configuration for all agent tests.
|
||||
Following SRP: This file ONLY handles test setup/teardown.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_litellm():
|
||||
"""Mock liteLLM completion calls.
|
||||
|
||||
Use this fixture to avoid making real LLM API calls during tests.
|
||||
Returns a mock that simulates LLM responses.
|
||||
"""
|
||||
with patch("litellm.acompletion") as mock_completion:
|
||||
|
||||
async def mock_response(*args, **kwargs):
|
||||
"""Simulate a typical LLM response."""
|
||||
return {
|
||||
"id": "chatcmpl-test123",
|
||||
"object": "chat.completion",
|
||||
"created": 1234567890,
|
||||
"model": kwargs.get("model", "anthropic/claude-3-5-sonnet-20241022"),
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "This is a mocked response for testing.",
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 20,
|
||||
"total_tokens": 30,
|
||||
},
|
||||
}
|
||||
|
||||
mock_completion.side_effect = mock_response
|
||||
yield mock_completion
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_computer():
|
||||
"""Mock Computer interface for agent tests.
|
||||
|
||||
Use this fixture to test agent logic without requiring a real Computer instance.
|
||||
"""
|
||||
computer = AsyncMock()
|
||||
computer.interface = AsyncMock()
|
||||
computer.interface.screenshot = AsyncMock(return_value=b"fake_screenshot_data")
|
||||
computer.interface.left_click = AsyncMock()
|
||||
computer.interface.type = AsyncMock()
|
||||
computer.interface.key = AsyncMock()
|
||||
|
||||
# Mock context manager
|
||||
computer.__aenter__ = AsyncMock(return_value=computer)
|
||||
computer.__aexit__ = AsyncMock()
|
||||
|
||||
return computer
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def disable_telemetry(monkeypatch):
|
||||
"""Disable telemetry for tests.
|
||||
|
||||
Use this fixture to ensure no telemetry is sent during tests.
|
||||
"""
|
||||
monkeypatch.setenv("CUA_TELEMETRY_DISABLED", "1")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_messages():
|
||||
"""Provide sample messages for testing.
|
||||
|
||||
Returns a list of messages in the expected format.
|
||||
"""
|
||||
return [{"role": "user", "content": "Take a screenshot and tell me what you see"}]
|
||||
139
libs/python/agent/tests/test_computer_agent.py
Normal file
139
libs/python/agent/tests/test_computer_agent.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""Unit tests for ComputerAgent class.
|
||||
|
||||
This file tests ONLY the ComputerAgent initialization and basic functionality.
|
||||
Following SRP: This file tests ONE class (ComputerAgent).
|
||||
All external dependencies (liteLLM, Computer) are mocked.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestComputerAgentInitialization:
|
||||
"""Test ComputerAgent initialization (SRP: Only tests initialization)."""
|
||||
|
||||
@patch("agent.agent.litellm")
|
||||
def test_agent_initialization_with_model(self, mock_litellm, disable_telemetry):
|
||||
"""Test that agent can be initialized with a model string."""
|
||||
from agent import ComputerAgent
|
||||
|
||||
agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022")
|
||||
|
||||
assert agent is not None
|
||||
assert hasattr(agent, "model")
|
||||
assert agent.model == "anthropic/claude-3-5-sonnet-20241022"
|
||||
|
||||
@patch("agent.agent.litellm")
|
||||
def test_agent_initialization_with_tools(self, mock_litellm, disable_telemetry, mock_computer):
|
||||
"""Test that agent can be initialized with tools."""
|
||||
from agent import ComputerAgent
|
||||
|
||||
agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022", tools=[mock_computer])
|
||||
|
||||
assert agent is not None
|
||||
assert hasattr(agent, "tools")
|
||||
|
||||
@patch("agent.agent.litellm")
|
||||
def test_agent_initialization_with_max_budget(self, mock_litellm, disable_telemetry):
|
||||
"""Test that agent can be initialized with max trajectory budget."""
|
||||
from agent import ComputerAgent
|
||||
|
||||
budget = 5.0
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022", max_trajectory_budget=budget
|
||||
)
|
||||
|
||||
assert agent is not None
|
||||
|
||||
@patch("agent.agent.litellm")
|
||||
def test_agent_requires_model(self, mock_litellm, disable_telemetry):
|
||||
"""Test that agent requires a model parameter."""
|
||||
from agent import ComputerAgent
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
# Should fail without model parameter - intentionally missing required argument
|
||||
ComputerAgent() # type: ignore[call-arg]
|
||||
|
||||
|
||||
class TestComputerAgentRun:
|
||||
"""Test ComputerAgent.run() method (SRP: Only tests run logic)."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("agent.agent.litellm")
|
||||
async def test_agent_run_with_messages(self, mock_litellm, disable_telemetry, sample_messages):
|
||||
"""Test that agent.run() works with valid messages."""
|
||||
from agent import ComputerAgent
|
||||
|
||||
# Mock liteLLM response
|
||||
mock_response = {
|
||||
"id": "chatcmpl-test",
|
||||
"choices": [
|
||||
{
|
||||
"message": {"role": "assistant", "content": "Test response"},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
|
||||
}
|
||||
|
||||
mock_litellm.acompletion = AsyncMock(return_value=mock_response)
|
||||
|
||||
agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022")
|
||||
|
||||
# Run should return an async generator
|
||||
result_generator = agent.run(sample_messages)
|
||||
|
||||
assert result_generator is not None
|
||||
# Check it's an async generator
|
||||
assert hasattr(result_generator, "__anext__")
|
||||
|
||||
def test_agent_has_run_method(self, disable_telemetry):
|
||||
"""Test that agent has run method available."""
|
||||
from agent import ComputerAgent
|
||||
|
||||
agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022")
|
||||
|
||||
# Verify run method exists
|
||||
assert hasattr(agent, "run")
|
||||
assert callable(agent.run)
|
||||
|
||||
def test_agent_has_agent_loop(self, disable_telemetry):
|
||||
"""Test that agent has agent_loop initialized."""
|
||||
from agent import ComputerAgent
|
||||
|
||||
agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022")
|
||||
|
||||
# Verify agent_loop is initialized
|
||||
assert hasattr(agent, "agent_loop")
|
||||
assert agent.agent_loop is not None
|
||||
|
||||
|
||||
class TestComputerAgentTypes:
|
||||
"""Test AgentResponse and Messages types (SRP: Only tests type definitions)."""
|
||||
|
||||
def test_messages_type_exists(self):
|
||||
"""Test that Messages type is exported."""
|
||||
from agent import Messages
|
||||
|
||||
assert Messages is not None
|
||||
|
||||
def test_agent_response_type_exists(self):
|
||||
"""Test that AgentResponse type is exported."""
|
||||
from agent import AgentResponse
|
||||
|
||||
assert AgentResponse is not None
|
||||
|
||||
|
||||
class TestComputerAgentIntegration:
|
||||
"""Test ComputerAgent integration with Computer tool (SRP: Integration within package)."""
|
||||
|
||||
def test_agent_accepts_computer_tool(self, disable_telemetry, mock_computer):
|
||||
"""Test that agent can be initialized with Computer tool."""
|
||||
from agent import ComputerAgent
|
||||
|
||||
agent = ComputerAgent(model="anthropic/claude-3-5-sonnet-20241022", tools=[mock_computer])
|
||||
|
||||
# Verify agent accepted the tool
|
||||
assert agent is not None
|
||||
assert hasattr(agent, "tools")
|
||||
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.1.27
|
||||
current_version = 0.1.29
|
||||
commit = True
|
||||
tag = True
|
||||
tag_name = computer-server-v{new_version}
|
||||
|
||||
@@ -40,7 +40,7 @@ Refer to this notebook for a step-by-step guide on how to use the Computer-Use S
|
||||
|
||||
## Docs
|
||||
|
||||
- [Commands](https://trycua.com/docs/libraries/computer-server/Commands)
|
||||
- [REST-API](https://trycua.com/docs/libraries/computer-server/REST-API)
|
||||
- [WebSocket-API](https://trycua.com/docs/libraries/computer-server/WebSocket-API)
|
||||
- [Index](https://trycua.com/docs/libraries/computer-server/index)
|
||||
- [Commands](https://cua.ai/docs/libraries/computer-server/Commands)
|
||||
- [REST-API](https://cua.ai/docs/libraries/computer-server/REST-API)
|
||||
- [WebSocket-API](https://cua.ai/docs/libraries/computer-server/WebSocket-API)
|
||||
- [Index](https://cua.ai/docs/libraries/computer-server/index)
|
||||
|
||||
@@ -85,6 +85,102 @@ class BaseFileHandler(ABC):
|
||||
pass
|
||||
|
||||
|
||||
class BaseDesktopHandler(ABC):
|
||||
"""Abstract base class for OS-specific desktop handlers.
|
||||
|
||||
Categories:
|
||||
- Wallpaper Actions: Methods for wallpaper operations
|
||||
- Desktop shortcut actions: Methods for managing desktop shortcuts
|
||||
"""
|
||||
|
||||
# Wallpaper Actions
|
||||
@abstractmethod
|
||||
async def get_desktop_environment(self) -> Dict[str, Any]:
|
||||
"""Get the current desktop environment name."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def set_wallpaper(self, path: str) -> Dict[str, Any]:
|
||||
"""Set the desktop wallpaper to the file at path."""
|
||||
pass
|
||||
|
||||
|
||||
class BaseWindowHandler(ABC):
|
||||
"""Abstract class for OS-specific window management handlers.
|
||||
|
||||
Categories:
|
||||
- Window Management: Methods for application/window control
|
||||
"""
|
||||
|
||||
# Window Management
|
||||
@abstractmethod
|
||||
async def open(self, target: str) -> Dict[str, Any]:
|
||||
"""Open a file or URL with the default application."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def launch(self, app: str, args: Optional[List[str]] = None) -> Dict[str, Any]:
|
||||
"""Launch an application with optional arguments."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_current_window_id(self) -> Dict[str, Any]:
|
||||
"""Get the currently active window ID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_application_windows(self, app: str) -> Dict[str, Any]:
|
||||
"""Get windows belonging to an application (by name or bundle)."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_window_name(self, window_id: str) -> Dict[str, Any]:
|
||||
"""Get the title/name of a window by ID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_window_size(self, window_id: str | int) -> Dict[str, Any]:
|
||||
"""Get the size of a window by ID as {width, height}."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def activate_window(self, window_id: str | int) -> Dict[str, Any]:
|
||||
"""Bring a window to the foreground by ID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def close_window(self, window_id: str | int) -> Dict[str, Any]:
|
||||
"""Close a window by ID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_window_position(self, window_id: str | int) -> Dict[str, Any]:
|
||||
"""Get the top-left position of a window as {x, y}."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def set_window_size(
|
||||
self, window_id: str | int, width: int, height: int
|
||||
) -> Dict[str, Any]:
|
||||
"""Set the size of a window by ID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def set_window_position(self, window_id: str | int, x: int, y: int) -> Dict[str, Any]:
|
||||
"""Set the position of a window by ID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def maximize_window(self, window_id: str | int) -> Dict[str, Any]:
|
||||
"""Maximize a window by ID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def minimize_window(self, window_id: str | int) -> Dict[str, Any]:
|
||||
"""Minimize a window by ID."""
|
||||
pass
|
||||
|
||||
|
||||
class BaseAutomationHandler(ABC):
|
||||
"""Abstract base class for OS-specific automation handlers.
|
||||
|
||||
|
||||
@@ -4,7 +4,13 @@ from typing import Tuple, Type
|
||||
|
||||
from computer_server.diorama.base import BaseDioramaHandler
|
||||
|
||||
from .base import BaseAccessibilityHandler, BaseAutomationHandler, BaseFileHandler
|
||||
from .base import (
|
||||
BaseAccessibilityHandler,
|
||||
BaseAutomationHandler,
|
||||
BaseDesktopHandler,
|
||||
BaseFileHandler,
|
||||
BaseWindowHandler,
|
||||
)
|
||||
|
||||
# Conditionally import platform-specific handlers
|
||||
system = platform.system().lower()
|
||||
@@ -17,7 +23,7 @@ elif system == "linux":
|
||||
elif system == "windows":
|
||||
from .windows import WindowsAccessibilityHandler, WindowsAutomationHandler
|
||||
|
||||
from .generic import GenericFileHandler
|
||||
from .generic import GenericDesktopHandler, GenericFileHandler, GenericWindowHandler
|
||||
|
||||
|
||||
class HandlerFactory:
|
||||
@@ -49,9 +55,14 @@ class HandlerFactory:
|
||||
raise RuntimeError(f"Failed to determine current OS: {str(e)}")
|
||||
|
||||
@staticmethod
|
||||
def create_handlers() -> (
|
||||
Tuple[BaseAccessibilityHandler, BaseAutomationHandler, BaseDioramaHandler, BaseFileHandler]
|
||||
):
|
||||
def create_handlers() -> Tuple[
|
||||
BaseAccessibilityHandler,
|
||||
BaseAutomationHandler,
|
||||
BaseDioramaHandler,
|
||||
BaseFileHandler,
|
||||
BaseDesktopHandler,
|
||||
BaseWindowHandler,
|
||||
]:
|
||||
"""Create and return appropriate handlers for the current OS.
|
||||
|
||||
Returns:
|
||||
@@ -70,6 +81,8 @@ class HandlerFactory:
|
||||
MacOSAutomationHandler(),
|
||||
MacOSDioramaHandler(),
|
||||
GenericFileHandler(),
|
||||
GenericDesktopHandler(),
|
||||
GenericWindowHandler(),
|
||||
)
|
||||
elif os_type == "linux":
|
||||
return (
|
||||
@@ -77,6 +90,8 @@ class HandlerFactory:
|
||||
LinuxAutomationHandler(),
|
||||
BaseDioramaHandler(),
|
||||
GenericFileHandler(),
|
||||
GenericDesktopHandler(),
|
||||
GenericWindowHandler(),
|
||||
)
|
||||
elif os_type == "windows":
|
||||
return (
|
||||
@@ -84,6 +99,8 @@ class HandlerFactory:
|
||||
WindowsAutomationHandler(),
|
||||
BaseDioramaHandler(),
|
||||
GenericFileHandler(),
|
||||
GenericDesktopHandler(),
|
||||
GenericWindowHandler(),
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(f"OS '{os_type}' is not supported")
|
||||
|
||||
@@ -2,15 +2,26 @@
|
||||
Generic handlers for all OSes.
|
||||
|
||||
Includes:
|
||||
- DesktopHandler
|
||||
- FileHandler
|
||||
|
||||
"""
|
||||
|
||||
import base64
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import webbrowser
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from .base import BaseFileHandler
|
||||
from ..utils import wallpaper
|
||||
from .base import BaseDesktopHandler, BaseFileHandler, BaseWindowHandler
|
||||
|
||||
try:
|
||||
import pywinctl as pwc
|
||||
except Exception: # pragma: no cover
|
||||
pwc = None # type: ignore
|
||||
|
||||
|
||||
def resolve_path(path: str) -> Path:
|
||||
@@ -25,6 +36,233 @@ def resolve_path(path: str) -> Path:
|
||||
return Path(path).expanduser().resolve()
|
||||
|
||||
|
||||
# ===== Cross-platform Desktop command handlers =====
|
||||
|
||||
|
||||
class GenericDesktopHandler(BaseDesktopHandler):
|
||||
"""
|
||||
Generic desktop handler providing desktop-related operations.
|
||||
|
||||
Implements:
|
||||
- get_desktop_environment: detect current desktop environment
|
||||
- set_wallpaper: set desktop wallpaper path
|
||||
"""
|
||||
|
||||
async def get_desktop_environment(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get the current desktop environment.
|
||||
|
||||
Returns:
|
||||
Dict containing 'success' boolean and either 'environment' string or 'error' string
|
||||
"""
|
||||
try:
|
||||
env = wallpaper.get_desktop_environment()
|
||||
return {"success": True, "environment": env}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def set_wallpaper(self, path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Set the desktop wallpaper to the specified path.
|
||||
|
||||
Args:
|
||||
path: The file path to set as wallpaper
|
||||
|
||||
Returns:
|
||||
Dict containing 'success' boolean and optionally 'error' string
|
||||
"""
|
||||
try:
|
||||
file_path = resolve_path(path)
|
||||
ok = wallpaper.set_wallpaper(str(file_path))
|
||||
return {"success": bool(ok)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
# ===== Cross-platform window control command handlers =====
|
||||
|
||||
|
||||
class GenericWindowHandler(BaseWindowHandler):
|
||||
"""
|
||||
Cross-platform window management using pywinctl where possible.
|
||||
"""
|
||||
|
||||
async def open(self, target: str) -> Dict[str, Any]:
|
||||
try:
|
||||
if target.startswith("http://") or target.startswith("https://"):
|
||||
ok = webbrowser.open(target)
|
||||
return {"success": bool(ok)}
|
||||
path = str(resolve_path(target))
|
||||
sys = platform.system().lower()
|
||||
if sys == "darwin":
|
||||
subprocess.Popen(["open", path])
|
||||
elif sys == "linux":
|
||||
subprocess.Popen(["xdg-open", path])
|
||||
elif sys == "windows":
|
||||
os.startfile(path) # type: ignore[attr-defined]
|
||||
else:
|
||||
return {"success": False, "error": f"Unsupported OS: {sys}"}
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def launch(self, app: str, args: Optional[list[str]] = None) -> Dict[str, Any]:
|
||||
try:
|
||||
if args:
|
||||
proc = subprocess.Popen([app, *args])
|
||||
else:
|
||||
# allow shell command like "libreoffice --writer"
|
||||
proc = subprocess.Popen(app, shell=True)
|
||||
return {"success": True, "pid": proc.pid}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
def _get_window_by_id(self, window_id: int | str) -> Optional[Any]:
|
||||
if pwc is None:
|
||||
raise RuntimeError("pywinctl not available")
|
||||
# Find by native handle among Window objects; getAllWindowsDict keys are titles
|
||||
try:
|
||||
for w in pwc.getAllWindows():
|
||||
if str(w.getHandle()) == str(window_id):
|
||||
return w
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
async def get_current_window_id(self) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
win = pwc.getActiveWindow()
|
||||
if not win:
|
||||
return {"success": False, "error": "No active window"}
|
||||
return {"success": True, "window_id": win.getHandle()}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def get_application_windows(self, app: str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
wins = pwc.getWindowsWithTitle(app, condition=pwc.Re.CONTAINS, flags=pwc.Re.IGNORECASE)
|
||||
ids = [w.getHandle() for w in wins]
|
||||
return {"success": True, "windows": ids}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def get_window_name(self, window_id: int | str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
return {"success": True, "name": w.title}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def get_window_size(self, window_id: int | str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
width, height = w.size
|
||||
return {"success": True, "width": int(width), "height": int(height)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def get_window_position(self, window_id: int | str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
x, y = w.position
|
||||
return {"success": True, "x": int(x), "y": int(y)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def set_window_size(
|
||||
self, window_id: int | str, width: int, height: int
|
||||
) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
ok = w.resizeTo(int(width), int(height))
|
||||
return {"success": bool(ok)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def set_window_position(self, window_id: int | str, x: int, y: int) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
ok = w.moveTo(int(x), int(y))
|
||||
return {"success": bool(ok)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def maximize_window(self, window_id: int | str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
ok = w.maximize()
|
||||
return {"success": bool(ok)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def minimize_window(self, window_id: int | str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
ok = w.minimize()
|
||||
return {"success": bool(ok)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def activate_window(self, window_id: int | str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
ok = w.activate()
|
||||
return {"success": bool(ok)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def close_window(self, window_id: int | str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
ok = w.close()
|
||||
return {"success": bool(ok)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
# ===== Cross-platform file system command handlers =====
|
||||
|
||||
|
||||
class GenericFileHandler(BaseFileHandler):
|
||||
"""
|
||||
Generic file handler that provides file system operations for all operating systems.
|
||||
|
||||
@@ -75,9 +75,14 @@ except Exception:
|
||||
except Exception:
|
||||
package_version = "unknown"
|
||||
|
||||
accessibility_handler, automation_handler, diorama_handler, file_handler = (
|
||||
HandlerFactory.create_handlers()
|
||||
)
|
||||
(
|
||||
accessibility_handler,
|
||||
automation_handler,
|
||||
diorama_handler,
|
||||
file_handler,
|
||||
desktop_handler,
|
||||
window_handler,
|
||||
) = HandlerFactory.create_handlers()
|
||||
handlers = {
|
||||
"version": lambda: {"protocol": protocol_version, "package": package_version},
|
||||
# App-Use commands
|
||||
@@ -99,6 +104,23 @@ handlers = {
|
||||
"delete_file": file_handler.delete_file,
|
||||
"create_dir": file_handler.create_dir,
|
||||
"delete_dir": file_handler.delete_dir,
|
||||
# Desktop commands
|
||||
"get_desktop_environment": desktop_handler.get_desktop_environment,
|
||||
"set_wallpaper": desktop_handler.set_wallpaper,
|
||||
# Window management
|
||||
"open": window_handler.open,
|
||||
"launch": window_handler.launch,
|
||||
"get_current_window_id": window_handler.get_current_window_id,
|
||||
"get_application_windows": window_handler.get_application_windows,
|
||||
"get_window_name": window_handler.get_window_name,
|
||||
"get_window_size": window_handler.get_window_size,
|
||||
"get_window_position": window_handler.get_window_position,
|
||||
"set_window_size": window_handler.set_window_size,
|
||||
"set_window_position": window_handler.set_window_position,
|
||||
"maximize_window": window_handler.maximize_window,
|
||||
"minimize_window": window_handler.minimize_window,
|
||||
"activate_window": window_handler.activate_window,
|
||||
"close_window": window_handler.close_window,
|
||||
# Mouse commands
|
||||
"mouse_down": automation_handler.mouse_down,
|
||||
"mouse_up": automation_handler.mouse_up,
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
from . import wallpaper
|
||||
|
||||
__all__ = ["wallpaper"]
|
||||
321
libs/python/computer-server/computer_server/utils/wallpaper.py
Normal file
321
libs/python/computer-server/computer_server/utils/wallpaper.py
Normal file
@@ -0,0 +1,321 @@
|
||||
"""Set the desktop wallpaper."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_desktop_environment() -> str:
|
||||
"""
|
||||
Returns the name of the current desktop environment.
|
||||
"""
|
||||
# From https://stackoverflow.com/a/21213358/2624876
|
||||
# which takes from:
|
||||
# http://stackoverflow.com/questions/2035657/what-is-my-current-desktop-environment
|
||||
# and http://ubuntuforums.org/showthread.php?t=652320
|
||||
# and http://ubuntuforums.org/showthread.php?t=1139057
|
||||
if sys.platform in ["win32", "cygwin"]:
|
||||
return "windows"
|
||||
elif sys.platform == "darwin":
|
||||
return "mac"
|
||||
else: # Most likely either a POSIX system or something not much common
|
||||
desktop_session = os.environ.get("DESKTOP_SESSION")
|
||||
if (
|
||||
desktop_session is not None
|
||||
): # easier to match if we doesn't have to deal with character cases
|
||||
desktop_session = desktop_session.lower()
|
||||
if desktop_session in [
|
||||
"gnome",
|
||||
"unity",
|
||||
"cinnamon",
|
||||
"mate",
|
||||
"xfce4",
|
||||
"lxde",
|
||||
"fluxbox",
|
||||
"blackbox",
|
||||
"openbox",
|
||||
"icewm",
|
||||
"jwm",
|
||||
"afterstep",
|
||||
"trinity",
|
||||
"kde",
|
||||
]:
|
||||
return desktop_session
|
||||
## Special cases ##
|
||||
# Canonical sets $DESKTOP_SESSION to Lubuntu rather than LXDE if using LXDE.
|
||||
# There is no guarantee that they will not do the same with the other desktop environments.
|
||||
elif "xfce" in desktop_session or desktop_session.startswith("xubuntu"):
|
||||
return "xfce4"
|
||||
elif desktop_session.startswith("ubuntustudio"):
|
||||
return "kde"
|
||||
elif desktop_session.startswith("ubuntu"):
|
||||
return "gnome"
|
||||
elif desktop_session.startswith("lubuntu"):
|
||||
return "lxde"
|
||||
elif desktop_session.startswith("kubuntu"):
|
||||
return "kde"
|
||||
elif desktop_session.startswith("razor"): # e.g. razorkwin
|
||||
return "razor-qt"
|
||||
elif desktop_session.startswith("wmaker"): # e.g. wmaker-common
|
||||
return "windowmaker"
|
||||
gnome_desktop_session_id = os.environ.get("GNOME_DESKTOP_SESSION_ID")
|
||||
if os.environ.get("KDE_FULL_SESSION") == "true":
|
||||
return "kde"
|
||||
elif gnome_desktop_session_id:
|
||||
if "deprecated" not in gnome_desktop_session_id:
|
||||
return "gnome2"
|
||||
# From http://ubuntuforums.org/showthread.php?t=652320
|
||||
elif is_running("xfce-mcs-manage"):
|
||||
return "xfce4"
|
||||
elif is_running("ksmserver"):
|
||||
return "kde"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def is_running(process: str) -> bool:
|
||||
"""Returns whether a process with the given name is (likely) currently running.
|
||||
|
||||
Uses a basic text search, and so may have false positives.
|
||||
"""
|
||||
# From http://www.bloggerpolis.com/2011/05/how-to-check-if-a-process-is-running-using-python/
|
||||
# and http://richarddingwall.name/2009/06/18/windows-equivalents-of-ps-and-kill-commands/
|
||||
try: # Linux/Unix
|
||||
s = subprocess.Popen(["ps", "axw"], stdout=subprocess.PIPE)
|
||||
except: # Windows
|
||||
s = subprocess.Popen(["tasklist", "/v"], stdout=subprocess.PIPE)
|
||||
assert s.stdout is not None
|
||||
for x in s.stdout:
|
||||
# if re.search(process, x):
|
||||
if process in str(x):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def set_wallpaper(file_loc: str, first_run: bool = True):
|
||||
"""Sets the wallpaper to the given file location."""
|
||||
# From https://stackoverflow.com/a/21213504/2624876
|
||||
# I have not personally tested most of this. -- @1j01
|
||||
# -----------------------------------------
|
||||
|
||||
# Note: There are two common Linux desktop environments where
|
||||
# I have not been able to set the desktop background from
|
||||
# command line: KDE, Enlightenment
|
||||
desktop_env = get_desktop_environment()
|
||||
if desktop_env in ["gnome", "unity", "cinnamon"]:
|
||||
# Tested on Ubuntu 22 -- @1j01
|
||||
uri = Path(file_loc).as_uri()
|
||||
SCHEMA = "org.gnome.desktop.background"
|
||||
KEY = "picture-uri"
|
||||
# Needed for Ubuntu 22 in dark mode
|
||||
# Might be better to set only one or the other, depending on the current theme
|
||||
# In the settings it will say "This background selection only applies to the dark style"
|
||||
# even if it's set for both, arguably referring to the selection that you can make on that page.
|
||||
# -- @1j01
|
||||
KEY_DARK = "picture-uri-dark"
|
||||
try:
|
||||
from gi.repository import Gio # type: ignore
|
||||
|
||||
gsettings = Gio.Settings.new(SCHEMA) # type: ignore
|
||||
gsettings.set_string(KEY, uri)
|
||||
gsettings.set_string(KEY_DARK, uri)
|
||||
except Exception:
|
||||
# Fallback tested on Ubuntu 22 -- @1j01
|
||||
args = ["gsettings", "set", SCHEMA, KEY, uri]
|
||||
subprocess.Popen(args)
|
||||
args = ["gsettings", "set", SCHEMA, KEY_DARK, uri]
|
||||
subprocess.Popen(args)
|
||||
elif desktop_env == "mate":
|
||||
try: # MATE >= 1.6
|
||||
# info from http://wiki.mate-desktop.org/docs:gsettings
|
||||
args = ["gsettings", "set", "org.mate.background", "picture-filename", file_loc]
|
||||
subprocess.Popen(args)
|
||||
except Exception: # MATE < 1.6
|
||||
# From https://bugs.launchpad.net/variety/+bug/1033918
|
||||
args = [
|
||||
"mateconftool-2",
|
||||
"-t",
|
||||
"string",
|
||||
"--set",
|
||||
"/desktop/mate/background/picture_filename",
|
||||
file_loc,
|
||||
]
|
||||
subprocess.Popen(args)
|
||||
elif desktop_env == "gnome2": # Not tested
|
||||
# From https://bugs.launchpad.net/variety/+bug/1033918
|
||||
args = [
|
||||
"gconftool-2",
|
||||
"-t",
|
||||
"string",
|
||||
"--set",
|
||||
"/desktop/gnome/background/picture_filename",
|
||||
file_loc,
|
||||
]
|
||||
subprocess.Popen(args)
|
||||
## KDE4 is difficult
|
||||
## see http://blog.zx2c4.com/699 for a solution that might work
|
||||
elif desktop_env in ["kde3", "trinity"]:
|
||||
# From http://ubuntuforums.org/archive/index.php/t-803417.html
|
||||
args = ["dcop", "kdesktop", "KBackgroundIface", "setWallpaper", "0", file_loc, "6"]
|
||||
subprocess.Popen(args)
|
||||
elif desktop_env == "xfce4":
|
||||
# Iterate over all wallpaper-related keys and set to file_loc
|
||||
try:
|
||||
list_proc = subprocess.run(
|
||||
["xfconf-query", "-c", "xfce4-desktop", "-l"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
keys = []
|
||||
if list_proc.stdout:
|
||||
for line in list_proc.stdout.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
# Common keys: .../last-image and .../image-path
|
||||
if "/last-image" in line or "/image-path" in line:
|
||||
keys.append(line)
|
||||
# Fallback: known defaults if none were listed
|
||||
if not keys:
|
||||
keys = [
|
||||
"/backdrop/screen0/monitorVNC-0/workspace0/last-image",
|
||||
"/backdrop/screen0/monitor0/image-path",
|
||||
]
|
||||
for key in keys:
|
||||
subprocess.run(
|
||||
[
|
||||
"xfconf-query",
|
||||
"-c",
|
||||
"xfce4-desktop",
|
||||
"-p",
|
||||
key,
|
||||
"-s",
|
||||
file_loc,
|
||||
],
|
||||
check=False,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
# Reload xfdesktop to apply changes
|
||||
subprocess.Popen(["xfdesktop", "--reload"])
|
||||
elif desktop_env == "razor-qt": # TODO: implement reload of desktop when possible
|
||||
if first_run:
|
||||
import configparser
|
||||
|
||||
desktop_conf = configparser.ConfigParser()
|
||||
# Development version
|
||||
desktop_conf_file = os.path.join(get_config_dir("razor"), "desktop.conf")
|
||||
if os.path.isfile(desktop_conf_file):
|
||||
config_option = R"screens\1\desktops\1\wallpaper"
|
||||
else:
|
||||
desktop_conf_file = os.path.join(get_home_dir(), ".razor/desktop.conf")
|
||||
config_option = R"desktops\1\wallpaper"
|
||||
desktop_conf.read(os.path.join(desktop_conf_file))
|
||||
try:
|
||||
if desktop_conf.has_option("razor", config_option): # only replacing a value
|
||||
desktop_conf.set("razor", config_option, file_loc)
|
||||
with open(desktop_conf_file, "w", encoding="utf-8", errors="replace") as f:
|
||||
desktop_conf.write(f)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
# TODO: reload desktop when possible
|
||||
pass
|
||||
elif desktop_env in ["fluxbox", "jwm", "openbox", "afterstep"]:
|
||||
# http://fluxbox-wiki.org/index.php/Howto_set_the_background
|
||||
# used fbsetbg on jwm too since I am too lazy to edit the XML configuration
|
||||
# now where fbsetbg does the job excellent anyway.
|
||||
# and I have not figured out how else it can be set on Openbox and AfterSTep
|
||||
# but fbsetbg works excellent here too.
|
||||
try:
|
||||
args = ["fbsetbg", file_loc]
|
||||
subprocess.Popen(args)
|
||||
except Exception:
|
||||
sys.stderr.write("ERROR: Failed to set wallpaper with fbsetbg!\n")
|
||||
sys.stderr.write("Please make sre that You have fbsetbg installed.\n")
|
||||
elif desktop_env == "icewm":
|
||||
# command found at http://urukrama.wordpress.com/2007/12/05/desktop-backgrounds-in-window-managers/
|
||||
args = ["icewmbg", file_loc]
|
||||
subprocess.Popen(args)
|
||||
elif desktop_env == "blackbox":
|
||||
# command found at http://blackboxwm.sourceforge.net/BlackboxDocumentation/BlackboxBackground
|
||||
args = ["bsetbg", "-full", file_loc]
|
||||
subprocess.Popen(args)
|
||||
elif desktop_env == "lxde":
|
||||
args = ["pcmanfm", "--set-wallpaper", file_loc, "--wallpaper-mode=scaled"]
|
||||
subprocess.Popen(args)
|
||||
elif desktop_env == "windowmaker":
|
||||
# From http://www.commandlinefu.com/commands/view/3857/set-wallpaper-on-windowmaker-in-one-line
|
||||
args = ["wmsetbg", "-s", "-u", file_loc]
|
||||
subprocess.Popen(args)
|
||||
# elif desktop_env == "enlightenment": # I have not been able to make it work on e17. On e16 it would have been something in this direction
|
||||
# args = ["enlightenment_remote", "-desktop-bg-add", "0", "0", "0", "0", file_loc]
|
||||
# subprocess.Popen(args)
|
||||
elif desktop_env == "windows":
|
||||
# From https://stackoverflow.com/questions/1977694/change-desktop-background
|
||||
# Tested on Windows 10. -- @1j01
|
||||
import ctypes
|
||||
|
||||
SPI_SETDESKWALLPAPER = 20
|
||||
ctypes.windll.user32.SystemParametersInfoW(SPI_SETDESKWALLPAPER, 0, file_loc, 0) # type: ignore
|
||||
elif desktop_env == "mac":
|
||||
# From https://stackoverflow.com/questions/431205/how-can-i-programatically-change-the-background-in-mac-os-x
|
||||
try:
|
||||
# Tested on macOS 10.14.6 (Mojave) -- @1j01
|
||||
assert (
|
||||
sys.platform == "darwin"
|
||||
) # ignore `Import "appscript" could not be resolved` for other platforms
|
||||
from appscript import app, mactypes
|
||||
|
||||
app("Finder").desktop_picture.set(mactypes.File(file_loc))
|
||||
except ImportError:
|
||||
# Tested on macOS 10.14.6 (Mojave) -- @1j01
|
||||
# import subprocess
|
||||
# SCRIPT = f"""/usr/bin/osascript<<END
|
||||
# tell application "Finder" to set desktop picture to POSIX file "{file_loc}"
|
||||
# END"""
|
||||
# subprocess.Popen(SCRIPT, shell=True)
|
||||
|
||||
# Safer version, avoiding string interpolation,
|
||||
# to protect against command injection (both in the shell and in AppleScript):
|
||||
OSASCRIPT = """
|
||||
on run (clp)
|
||||
if clp's length is not 1 then error "Incorrect Parameters"
|
||||
local file_loc
|
||||
set file_loc to clp's item 1
|
||||
tell application "Finder" to set desktop picture to POSIX file file_loc
|
||||
end run
|
||||
"""
|
||||
subprocess.Popen(["osascript", "-e", OSASCRIPT, "--", file_loc])
|
||||
else:
|
||||
if first_run: # don't spam the user with the same message over and over again
|
||||
sys.stderr.write(
|
||||
"Warning: Failed to set wallpaper. Your desktop environment is not supported."
|
||||
)
|
||||
sys.stderr.write(f"You can try manually to set your wallpaper to {file_loc}")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_config_dir(app_name: str) -> str:
|
||||
"""Returns the configuration directory for the given application name."""
|
||||
if "XDG_CONFIG_HOME" in os.environ:
|
||||
config_home = os.environ["XDG_CONFIG_HOME"]
|
||||
elif "APPDATA" in os.environ: # On Windows
|
||||
config_home = os.environ["APPDATA"]
|
||||
else:
|
||||
try:
|
||||
from xdg import BaseDirectory
|
||||
|
||||
config_home = BaseDirectory.xdg_config_home
|
||||
except ImportError: # Most likely a Linux/Unix system anyway
|
||||
config_home = os.path.join(get_home_dir(), ".config")
|
||||
config_dir = os.path.join(config_home, app_name)
|
||||
return config_dir
|
||||
|
||||
|
||||
def get_home_dir() -> str:
|
||||
"""Returns the home directory of the current user."""
|
||||
return os.path.expanduser("~")
|
||||
@@ -4,7 +4,7 @@ build-backend = "pdm.backend"
|
||||
|
||||
[project]
|
||||
name = "cua-computer-server"
|
||||
version = "0.1.27"
|
||||
version = "0.1.29"
|
||||
|
||||
description = "Server component for the Computer-Use Interface (CUI) framework powering Cua"
|
||||
authors = [
|
||||
@@ -23,13 +23,14 @@ dependencies = [
|
||||
"aiohttp>=3.9.1",
|
||||
"pyperclip>=1.9.0",
|
||||
"websockets>=12.0",
|
||||
"pywinctl>=0.4.1",
|
||||
# OS-specific runtime deps
|
||||
"pyobjc-framework-Cocoa>=10.1; sys_platform == 'darwin'",
|
||||
"pyobjc-framework-Quartz>=10.1; sys_platform == 'darwin'",
|
||||
"pyobjc-framework-ApplicationServices>=10.1; sys_platform == 'darwin'",
|
||||
"python-xlib>=0.33; sys_platform == 'linux'",
|
||||
"pywin32>=310; sys_platform == 'win32'",
|
||||
"pip-system-certs; sys_platform == 'win32'",
|
||||
"python-certifi-win32; sys_platform == 'win32'",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
47
libs/python/computer-server/tests/conftest.py
Normal file
47
libs/python/computer-server/tests/conftest.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""Pytest configuration and shared fixtures for computer-server package tests.
|
||||
|
||||
This file contains shared fixtures and configuration for all computer-server tests.
|
||||
Following SRP: This file ONLY handles test setup/teardown.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_websocket():
|
||||
"""Mock WebSocket connection for testing.
|
||||
|
||||
Use this fixture to test WebSocket logic without real connections.
|
||||
"""
|
||||
websocket = AsyncMock()
|
||||
websocket.send = AsyncMock()
|
||||
websocket.recv = AsyncMock()
|
||||
websocket.close = AsyncMock()
|
||||
|
||||
return websocket
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_computer_interface():
|
||||
"""Mock computer interface for server tests.
|
||||
|
||||
Use this fixture to test server logic without real computer operations.
|
||||
"""
|
||||
interface = AsyncMock()
|
||||
interface.screenshot = AsyncMock(return_value=b"fake_screenshot")
|
||||
interface.left_click = AsyncMock()
|
||||
interface.type = AsyncMock()
|
||||
interface.key = AsyncMock()
|
||||
|
||||
return interface
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def disable_telemetry(monkeypatch):
|
||||
"""Disable telemetry for tests.
|
||||
|
||||
Use this fixture to ensure no telemetry is sent during tests.
|
||||
"""
|
||||
monkeypatch.setenv("CUA_TELEMETRY_DISABLED", "1")
|
||||
40
libs/python/computer-server/tests/test_server.py
Normal file
40
libs/python/computer-server/tests/test_server.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""Unit tests for computer-server package.
|
||||
|
||||
This file tests ONLY basic server functionality.
|
||||
Following SRP: This file tests server initialization and basic operations.
|
||||
All external dependencies are mocked.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestServerImports:
|
||||
"""Test server module imports (SRP: Only tests imports)."""
|
||||
|
||||
def test_server_module_exists(self):
|
||||
"""Test that server module can be imported."""
|
||||
try:
|
||||
import computer_server
|
||||
|
||||
assert computer_server is not None
|
||||
except ImportError:
|
||||
pytest.skip("computer_server module not installed")
|
||||
|
||||
|
||||
class TestServerInitialization:
|
||||
"""Test server initialization (SRP: Only tests initialization)."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_server_can_be_imported(self):
|
||||
"""Basic smoke test: verify server components can be imported."""
|
||||
try:
|
||||
from computer_server import server
|
||||
|
||||
assert server is not None
|
||||
except ImportError:
|
||||
pytest.skip("Server module not available")
|
||||
except Exception as e:
|
||||
# Some initialization errors are acceptable in unit tests
|
||||
pytest.skip(f"Server initialization requires specific setup: {e}")
|
||||
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.4.7
|
||||
current_version = 0.4.11
|
||||
commit = True
|
||||
tag = True
|
||||
tag_name = computer-v{new_version}
|
||||
|
||||
@@ -68,7 +68,7 @@ Refer to this notebook for a step-by-step guide on how to use the Computer-Use I
|
||||
|
||||
## Docs
|
||||
|
||||
- [Computers](https://trycua.com/docs/computer-sdk/computers)
|
||||
- [Commands](https://trycua.com/docs/computer-sdk/commands)
|
||||
- [Computer UI](https://trycua.com/docs/computer-sdk/computer-ui)
|
||||
- [Sandboxed Python](https://trycua.com/docs/computer-sdk/sandboxed-python)
|
||||
- [Computers](https://cua.ai/docs/computer-sdk/computers)
|
||||
- [Commands](https://cua.ai/docs/computer-sdk/commands)
|
||||
- [Computer UI](https://cua.ai/docs/computer-sdk/computer-ui)
|
||||
- [Sandboxed Python](https://cua.ai/docs/computer-sdk/sandboxed-python)
|
||||
|
||||
@@ -17,6 +17,8 @@ from .interface.factory import InterfaceFactory
|
||||
from .logger import Logger, LogLevel
|
||||
from .models import Computer as ComputerConfig
|
||||
from .models import Display
|
||||
from .tracing import ComputerTracing
|
||||
from .tracing_wrapper import TracingInterfaceWrapper
|
||||
|
||||
SYSTEM_INFO = {
|
||||
"os": platform.system().lower(),
|
||||
@@ -208,8 +210,13 @@ class Computer:
|
||||
|
||||
# Initialize with proper typing - None at first, will be set in run()
|
||||
self._interface = None
|
||||
self._original_interface = None # Keep reference to original interface
|
||||
self._tracing_wrapper = None # Tracing wrapper for interface
|
||||
self.use_host_computer_server = use_host_computer_server
|
||||
|
||||
# Initialize tracing
|
||||
self._tracing = ComputerTracing(self)
|
||||
|
||||
# Record initialization in telemetry (if enabled)
|
||||
if telemetry_enabled and is_telemetry_enabled():
|
||||
record_event("computer_initialized", SYSTEM_INFO)
|
||||
@@ -259,12 +266,14 @@ class Computer:
|
||||
# Create the interface with explicit type annotation
|
||||
from .interface.base import BaseComputerInterface
|
||||
|
||||
self._interface = cast(
|
||||
interface = cast(
|
||||
BaseComputerInterface,
|
||||
InterfaceFactory.create_interface_for_os(
|
||||
os=self.os_type, ip_address=ip_address # type: ignore[arg-type]
|
||||
),
|
||||
)
|
||||
self._interface = interface
|
||||
self._original_interface = interface
|
||||
|
||||
self.logger.info("Waiting for host computer server to be ready...")
|
||||
await self._interface.wait_for_ready()
|
||||
@@ -493,7 +502,7 @@ class Computer:
|
||||
|
||||
# Pass authentication credentials if using cloud provider
|
||||
if self.provider_type == VMProviderType.CLOUD and self.api_key and self.config.name:
|
||||
self._interface = cast(
|
||||
interface = cast(
|
||||
BaseComputerInterface,
|
||||
InterfaceFactory.create_interface_for_os(
|
||||
os=self.os_type,
|
||||
@@ -503,13 +512,16 @@ class Computer:
|
||||
),
|
||||
)
|
||||
else:
|
||||
self._interface = cast(
|
||||
interface = cast(
|
||||
BaseComputerInterface,
|
||||
InterfaceFactory.create_interface_for_os(
|
||||
os=self.os_type, ip_address=ip_address
|
||||
),
|
||||
)
|
||||
|
||||
self._interface = interface
|
||||
self._original_interface = interface
|
||||
|
||||
# Wait for the WebSocket interface to be ready
|
||||
self.logger.info("Connecting to WebSocket interface...")
|
||||
|
||||
@@ -866,7 +878,7 @@ class Computer:
|
||||
"""Get the computer interface for interacting with the VM.
|
||||
|
||||
Returns:
|
||||
The computer interface
|
||||
The computer interface (wrapped with tracing if tracing is active)
|
||||
"""
|
||||
if not hasattr(self, "_interface") or self._interface is None:
|
||||
error_msg = "Computer interface not initialized. Call run() first."
|
||||
@@ -876,8 +888,34 @@ class Computer:
|
||||
)
|
||||
raise RuntimeError(error_msg)
|
||||
|
||||
# Return tracing wrapper if tracing is active and we have an original interface
|
||||
if (
|
||||
self._tracing.is_tracing
|
||||
and hasattr(self, "_original_interface")
|
||||
and self._original_interface is not None
|
||||
):
|
||||
# Create wrapper if it doesn't exist or if the original interface changed
|
||||
if (
|
||||
not hasattr(self, "_tracing_wrapper")
|
||||
or self._tracing_wrapper is None
|
||||
or self._tracing_wrapper._original_interface != self._original_interface
|
||||
):
|
||||
self._tracing_wrapper = TracingInterfaceWrapper(
|
||||
self._original_interface, self._tracing
|
||||
)
|
||||
return self._tracing_wrapper
|
||||
|
||||
return self._interface
|
||||
|
||||
@property
|
||||
def tracing(self) -> ComputerTracing:
|
||||
"""Get the computer tracing instance for recording sessions.
|
||||
|
||||
Returns:
|
||||
ComputerTracing: The tracing instance
|
||||
"""
|
||||
return self._tracing
|
||||
|
||||
@property
|
||||
def telemetry_enabled(self) -> bool:
|
||||
"""Check if telemetry is enabled for this computer instance.
|
||||
|
||||
@@ -436,6 +436,189 @@ class BaseComputerInterface(ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
# Desktop actions
|
||||
@abstractmethod
|
||||
async def get_desktop_environment(self) -> str:
|
||||
"""Get the current desktop environment.
|
||||
|
||||
Returns:
|
||||
The name of the current desktop environment.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def set_wallpaper(self, path: str) -> None:
|
||||
"""Set the desktop wallpaper to the specified path.
|
||||
|
||||
Args:
|
||||
path: The file path to set as wallpaper
|
||||
"""
|
||||
pass
|
||||
|
||||
# Window management
|
||||
@abstractmethod
|
||||
async def open(self, target: str) -> None:
|
||||
"""Open a target using the system's default handler.
|
||||
|
||||
Typically opens files, folders, or URLs with the associated application.
|
||||
|
||||
Args:
|
||||
target: The file path, folder path, or URL to open.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def launch(self, app: str, args: List[str] | None = None) -> Optional[int]:
|
||||
"""Launch an application with optional arguments.
|
||||
|
||||
Args:
|
||||
app: The application executable or bundle identifier.
|
||||
args: Optional list of arguments to pass to the application.
|
||||
|
||||
Returns:
|
||||
Optional process ID (PID) of the launched application if available, otherwise None.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_current_window_id(self) -> int | str:
|
||||
"""Get the identifier of the currently active/focused window.
|
||||
|
||||
Returns:
|
||||
A window identifier that can be used with other window management methods.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_application_windows(self, app: str) -> List[int | str]:
|
||||
"""Get all window identifiers for a specific application.
|
||||
|
||||
Args:
|
||||
app: The application name, executable, or identifier to query.
|
||||
|
||||
Returns:
|
||||
A list of window identifiers belonging to the specified application.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_window_name(self, window_id: int | str) -> str:
|
||||
"""Get the title/name of a window.
|
||||
|
||||
Args:
|
||||
window_id: The window identifier.
|
||||
|
||||
Returns:
|
||||
The window's title or name string.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_window_size(self, window_id: int | str) -> tuple[int, int]:
|
||||
"""Get the size of a window in pixels.
|
||||
|
||||
Args:
|
||||
window_id: The window identifier.
|
||||
|
||||
Returns:
|
||||
A tuple of (width, height) representing the window size in pixels.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_window_position(self, window_id: int | str) -> tuple[int, int]:
|
||||
"""Get the screen position of a window.
|
||||
|
||||
Args:
|
||||
window_id: The window identifier.
|
||||
|
||||
Returns:
|
||||
A tuple of (x, y) representing the window's top-left corner in screen coordinates.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def set_window_size(self, window_id: int | str, width: int, height: int) -> None:
|
||||
"""Set the size of a window in pixels.
|
||||
|
||||
Args:
|
||||
window_id: The window identifier.
|
||||
width: Desired width in pixels.
|
||||
height: Desired height in pixels.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def set_window_position(self, window_id: int | str, x: int, y: int) -> None:
|
||||
"""Move a window to a specific position on the screen.
|
||||
|
||||
Args:
|
||||
window_id: The window identifier.
|
||||
x: X coordinate for the window's top-left corner.
|
||||
y: Y coordinate for the window's top-left corner.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def maximize_window(self, window_id: int | str) -> None:
|
||||
"""Maximize a window.
|
||||
|
||||
Args:
|
||||
window_id: The window identifier.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def minimize_window(self, window_id: int | str) -> None:
|
||||
"""Minimize a window.
|
||||
|
||||
Args:
|
||||
window_id: The window identifier.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def activate_window(self, window_id: int | str) -> None:
|
||||
"""Bring a window to the foreground and focus it.
|
||||
|
||||
Args:
|
||||
window_id: The window identifier.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def close_window(self, window_id: int | str) -> None:
|
||||
"""Close a window.
|
||||
|
||||
Args:
|
||||
window_id: The window identifier.
|
||||
"""
|
||||
pass
|
||||
|
||||
# Convenience aliases
|
||||
async def get_window_title(self, window_id: int | str) -> str:
|
||||
"""Convenience alias for get_window_name().
|
||||
|
||||
Args:
|
||||
window_id: The window identifier.
|
||||
|
||||
Returns:
|
||||
The window's title or name string.
|
||||
"""
|
||||
return await self.get_window_name(window_id)
|
||||
|
||||
async def window_size(self, window_id: int | str) -> tuple[int, int]:
|
||||
"""Convenience alias for get_window_size().
|
||||
|
||||
Args:
|
||||
window_id: The window identifier.
|
||||
|
||||
Returns:
|
||||
A tuple of (width, height) representing the window size in pixels.
|
||||
"""
|
||||
return await self.get_window_size(window_id)
|
||||
|
||||
# Shell actions
|
||||
@abstractmethod
|
||||
async def run_command(self, command: str) -> CommandResult:
|
||||
"""Run shell command and return structured result.
|
||||
|
||||
@@ -487,6 +487,104 @@ class GenericComputerInterface(BaseComputerInterface):
|
||||
raise RuntimeError(result.get("error", "Failed to list directory"))
|
||||
return result.get("files", [])
|
||||
|
||||
# Desktop actions
|
||||
async def get_desktop_environment(self) -> str:
|
||||
result = await self._send_command("get_desktop_environment")
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get desktop environment"))
|
||||
return result.get("environment", "unknown")
|
||||
|
||||
async def set_wallpaper(self, path: str) -> None:
|
||||
result = await self._send_command("set_wallpaper", {"path": path})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to set wallpaper"))
|
||||
|
||||
# Window management
|
||||
async def open(self, target: str) -> None:
|
||||
result = await self._send_command("open", {"target": target})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to open target"))
|
||||
|
||||
async def launch(self, app: str, args: list[str] | None = None) -> int | None:
|
||||
payload: dict[str, object] = {"app": app}
|
||||
if args is not None:
|
||||
payload["args"] = args
|
||||
result = await self._send_command("launch", payload)
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to launch application"))
|
||||
return result.get("pid") # type: ignore[return-value]
|
||||
|
||||
async def get_current_window_id(self) -> int | str:
|
||||
result = await self._send_command("get_current_window_id")
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get current window id"))
|
||||
return result["window_id"] # type: ignore[return-value]
|
||||
|
||||
async def get_application_windows(self, app: str) -> list[int | str]:
|
||||
result = await self._send_command("get_application_windows", {"app": app})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get application windows"))
|
||||
return list(result.get("windows", [])) # type: ignore[return-value]
|
||||
|
||||
async def get_window_name(self, window_id: int | str) -> str:
|
||||
result = await self._send_command("get_window_name", {"window_id": window_id})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get window name"))
|
||||
return result.get("name", "") # type: ignore[return-value]
|
||||
|
||||
async def get_window_size(self, window_id: int | str) -> tuple[int, int]:
|
||||
result = await self._send_command("get_window_size", {"window_id": window_id})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get window size"))
|
||||
return int(result.get("width", 0)), int(result.get("height", 0))
|
||||
|
||||
async def get_window_position(self, window_id: int | str) -> tuple[int, int]:
|
||||
result = await self._send_command("get_window_position", {"window_id": window_id})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get window position"))
|
||||
return int(result.get("x", 0)), int(result.get("y", 0))
|
||||
|
||||
async def set_window_size(self, window_id: int | str, width: int, height: int) -> None:
|
||||
result = await self._send_command(
|
||||
"set_window_size", {"window_id": window_id, "width": width, "height": height}
|
||||
)
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to set window size"))
|
||||
|
||||
async def set_window_position(self, window_id: int | str, x: int, y: int) -> None:
|
||||
result = await self._send_command(
|
||||
"set_window_position", {"window_id": window_id, "x": x, "y": y}
|
||||
)
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to set window position"))
|
||||
|
||||
async def maximize_window(self, window_id: int | str) -> None:
|
||||
result = await self._send_command("maximize_window", {"window_id": window_id})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to maximize window"))
|
||||
|
||||
async def minimize_window(self, window_id: int | str) -> None:
|
||||
result = await self._send_command("minimize_window", {"window_id": window_id})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to minimize window"))
|
||||
|
||||
async def activate_window(self, window_id: int | str) -> None:
|
||||
result = await self._send_command("activate_window", {"window_id": window_id})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to activate window"))
|
||||
|
||||
async def close_window(self, window_id: int | str) -> None:
|
||||
result = await self._send_command("close_window", {"window_id": window_id})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to close window"))
|
||||
|
||||
# Convenience aliases
|
||||
async def get_window_title(self, window_id: int | str) -> str:
|
||||
return await self.get_window_name(window_id)
|
||||
|
||||
async def window_size(self, window_id: int | str) -> tuple[int, int]:
|
||||
return await self.get_window_size(window_id)
|
||||
|
||||
# Command execution
|
||||
async def run_command(self, command: str) -> CommandResult:
|
||||
result = await self._send_command("run_command", {"command": command})
|
||||
|
||||
@@ -10,6 +10,8 @@ import subprocess
|
||||
import urllib.parse
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from computer.utils import safe_join
|
||||
|
||||
# Setup logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -59,7 +61,7 @@ def lume_api_get(
|
||||
# --max-time: Maximum time for the whole operation (20 seconds)
|
||||
# -f: Fail silently (no output at all) on server errors
|
||||
# Add single quotes around URL to ensure special characters are handled correctly
|
||||
cmd = ["curl", "--connect-timeout", "15", "--max-time", "20", "-s", "-f", f"'{api_url}'"]
|
||||
cmd = ["curl", "--connect-timeout", "15", "--max-time", "20", "-s", "-f", api_url]
|
||||
|
||||
# For logging and display, show the properly escaped URL
|
||||
display_cmd = ["curl", "--connect-timeout", "15", "--max-time", "20", "-s", "-f", api_url]
|
||||
@@ -71,7 +73,7 @@ def lume_api_get(
|
||||
# Execute the command - for execution we need to use shell=True to handle URLs with special characters
|
||||
try:
|
||||
# Use a single string with shell=True for proper URL handling
|
||||
shell_cmd = " ".join(cmd)
|
||||
shell_cmd = safe_join(cmd)
|
||||
result = subprocess.run(shell_cmd, shell=True, capture_output=True, text=True)
|
||||
|
||||
# Handle curl exit codes
|
||||
@@ -514,7 +516,7 @@ def lume_api_delete(
|
||||
"-s",
|
||||
"-X",
|
||||
"DELETE",
|
||||
f"'{api_url}'",
|
||||
api_url,
|
||||
]
|
||||
|
||||
# For logging and display, show the properly escaped URL
|
||||
@@ -537,7 +539,7 @@ def lume_api_delete(
|
||||
# Execute the command - for execution we need to use shell=True to handle URLs with special characters
|
||||
try:
|
||||
# Use a single string with shell=True for proper URL handling
|
||||
shell_cmd = " ".join(cmd)
|
||||
shell_cmd = safe_join(cmd)
|
||||
result = subprocess.run(shell_cmd, shell=True, capture_output=True, text=True)
|
||||
|
||||
# Handle curl exit codes
|
||||
|
||||
355
libs/python/computer/computer/tracing.py
Normal file
355
libs/python/computer/computer/tracing.py
Normal file
@@ -0,0 +1,355 @@
|
||||
"""
|
||||
Computer tracing functionality for recording sessions.
|
||||
|
||||
This module provides a Computer.tracing API inspired by Playwright's tracing functionality,
|
||||
allowing users to record computer interactions for debugging, training, and analysis.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class ComputerTracing:
|
||||
"""
|
||||
Computer tracing class that records computer interactions and saves them to disk.
|
||||
|
||||
This class provides a flexible API for recording computer sessions with configurable
|
||||
options for what to record (screenshots, API calls, video, etc.).
|
||||
"""
|
||||
|
||||
def __init__(self, computer_instance):
|
||||
"""
|
||||
Initialize the tracing instance.
|
||||
|
||||
Args:
|
||||
computer_instance: The Computer instance to trace
|
||||
"""
|
||||
self._computer = computer_instance
|
||||
self._is_tracing = False
|
||||
self._trace_config: Dict[str, Any] = {}
|
||||
self._trace_data: List[Dict[str, Any]] = []
|
||||
self._trace_start_time: Optional[float] = None
|
||||
self._trace_id: Optional[str] = None
|
||||
self._trace_dir: Optional[Path] = None
|
||||
self._screenshot_count = 0
|
||||
|
||||
@property
|
||||
def is_tracing(self) -> bool:
|
||||
"""Check if tracing is currently active."""
|
||||
return self._is_tracing
|
||||
|
||||
async def start(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""
|
||||
Start tracing with the specified configuration.
|
||||
|
||||
Args:
|
||||
config: Tracing configuration dict with options:
|
||||
- video: bool - Record video frames (default: False)
|
||||
- screenshots: bool - Record screenshots (default: True)
|
||||
- api_calls: bool - Record API calls and results (default: True)
|
||||
- accessibility_tree: bool - Record accessibility tree snapshots (default: False)
|
||||
- metadata: bool - Record custom metadata (default: True)
|
||||
- name: str - Custom trace name (default: auto-generated)
|
||||
- path: str - Custom trace directory path (default: auto-generated)
|
||||
"""
|
||||
if self._is_tracing:
|
||||
raise RuntimeError("Tracing is already active. Call stop() first.")
|
||||
|
||||
# Set default configuration
|
||||
default_config = {
|
||||
"video": False,
|
||||
"screenshots": True,
|
||||
"api_calls": True,
|
||||
"accessibility_tree": False,
|
||||
"metadata": True,
|
||||
"name": None,
|
||||
"path": None,
|
||||
}
|
||||
|
||||
self._trace_config = {**default_config, **(config or {})}
|
||||
|
||||
# Generate trace ID and directory
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
self._trace_id = (
|
||||
self._trace_config.get("name") or f"trace_{timestamp}_{str(uuid.uuid4())[:8]}"
|
||||
)
|
||||
|
||||
if self._trace_config.get("path"):
|
||||
self._trace_dir = Path(self._trace_config["path"])
|
||||
else:
|
||||
self._trace_dir = Path.cwd() / "traces" / self._trace_id
|
||||
|
||||
# Create trace directory
|
||||
self._trace_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize trace data
|
||||
self._trace_data = []
|
||||
self._trace_start_time = time.time()
|
||||
self._screenshot_count = 0
|
||||
self._is_tracing = True
|
||||
|
||||
# Record initial metadata
|
||||
await self._record_event(
|
||||
"trace_start",
|
||||
{
|
||||
"trace_id": self._trace_id,
|
||||
"config": self._trace_config,
|
||||
"timestamp": self._trace_start_time,
|
||||
"computer_info": {
|
||||
"os_type": self._computer.os_type,
|
||||
"provider_type": str(self._computer.provider_type),
|
||||
"image": self._computer.image,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
# Take initial screenshot if enabled
|
||||
if self._trace_config.get("screenshots"):
|
||||
await self._take_screenshot("initial_screenshot")
|
||||
|
||||
async def stop(self, options: Optional[Dict[str, Any]] = None) -> str:
|
||||
"""
|
||||
Stop tracing and save the trace data.
|
||||
|
||||
Args:
|
||||
options: Stop options dict with:
|
||||
- path: str - Custom output path for the trace archive
|
||||
- format: str - Output format ('zip' or 'dir', default: 'zip')
|
||||
|
||||
Returns:
|
||||
str: Path to the saved trace file or directory
|
||||
"""
|
||||
if not self._is_tracing:
|
||||
raise RuntimeError("Tracing is not active. Call start() first.")
|
||||
|
||||
if self._trace_start_time is None or self._trace_dir is None or self._trace_id is None:
|
||||
raise RuntimeError("Tracing state is invalid.")
|
||||
|
||||
# Record final metadata
|
||||
await self._record_event(
|
||||
"trace_end",
|
||||
{
|
||||
"timestamp": time.time(),
|
||||
"duration": time.time() - self._trace_start_time,
|
||||
"total_events": len(self._trace_data),
|
||||
"screenshot_count": self._screenshot_count,
|
||||
},
|
||||
)
|
||||
|
||||
# Take final screenshot if enabled
|
||||
if self._trace_config.get("screenshots"):
|
||||
await self._take_screenshot("final_screenshot")
|
||||
|
||||
# Save trace metadata
|
||||
metadata_path = self._trace_dir / "trace_metadata.json"
|
||||
with open(metadata_path, "w") as f:
|
||||
json.dump(
|
||||
{
|
||||
"trace_id": self._trace_id,
|
||||
"config": self._trace_config,
|
||||
"start_time": self._trace_start_time,
|
||||
"end_time": time.time(),
|
||||
"duration": time.time() - self._trace_start_time,
|
||||
"total_events": len(self._trace_data),
|
||||
"screenshot_count": self._screenshot_count,
|
||||
"events": self._trace_data,
|
||||
},
|
||||
f,
|
||||
indent=2,
|
||||
default=str,
|
||||
)
|
||||
|
||||
# Determine output format and path
|
||||
output_format = options.get("format", "zip") if options else "zip"
|
||||
custom_path = options.get("path") if options else None
|
||||
|
||||
if output_format == "zip":
|
||||
# Create zip file
|
||||
if custom_path:
|
||||
zip_path = Path(custom_path)
|
||||
else:
|
||||
zip_path = self._trace_dir.parent / f"{self._trace_id}.zip"
|
||||
|
||||
await self._create_zip_archive(zip_path)
|
||||
output_path = str(zip_path)
|
||||
else:
|
||||
# Return directory path
|
||||
if custom_path:
|
||||
# Move directory to custom path
|
||||
custom_dir = Path(custom_path)
|
||||
if custom_dir.exists():
|
||||
import shutil
|
||||
|
||||
shutil.rmtree(custom_dir)
|
||||
self._trace_dir.rename(custom_dir)
|
||||
output_path = str(custom_dir)
|
||||
else:
|
||||
output_path = str(self._trace_dir)
|
||||
|
||||
# Reset tracing state
|
||||
self._is_tracing = False
|
||||
self._trace_config = {}
|
||||
self._trace_data = []
|
||||
self._trace_start_time = None
|
||||
self._trace_id = None
|
||||
self._screenshot_count = 0
|
||||
|
||||
return output_path
|
||||
|
||||
async def _record_event(self, event_type: str, data: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Record a trace event.
|
||||
|
||||
Args:
|
||||
event_type: Type of event (e.g., 'click', 'type', 'screenshot')
|
||||
data: Event data
|
||||
"""
|
||||
if not self._is_tracing or self._trace_start_time is None or self._trace_dir is None:
|
||||
return
|
||||
|
||||
event = {
|
||||
"type": event_type,
|
||||
"timestamp": time.time(),
|
||||
"relative_time": time.time() - self._trace_start_time,
|
||||
"data": data,
|
||||
}
|
||||
|
||||
self._trace_data.append(event)
|
||||
|
||||
# Save event to individual file for large traces
|
||||
event_file = self._trace_dir / f"event_{len(self._trace_data):06d}_{event_type}.json"
|
||||
with open(event_file, "w") as f:
|
||||
json.dump(event, f, indent=2, default=str)
|
||||
|
||||
async def _take_screenshot(self, name: str = "screenshot") -> Optional[str]:
|
||||
"""
|
||||
Take a screenshot and save it to the trace.
|
||||
|
||||
Args:
|
||||
name: Name for the screenshot
|
||||
|
||||
Returns:
|
||||
Optional[str]: Path to the saved screenshot, or None if screenshots disabled
|
||||
"""
|
||||
if (
|
||||
not self._trace_config.get("screenshots")
|
||||
or not self._computer.interface
|
||||
or self._trace_dir is None
|
||||
):
|
||||
return None
|
||||
|
||||
try:
|
||||
screenshot_bytes = await self._computer.interface.screenshot()
|
||||
self._screenshot_count += 1
|
||||
|
||||
screenshot_filename = f"{self._screenshot_count:06d}_{name}.png"
|
||||
screenshot_path = self._trace_dir / screenshot_filename
|
||||
|
||||
with open(screenshot_path, "wb") as f:
|
||||
f.write(screenshot_bytes)
|
||||
|
||||
return str(screenshot_path)
|
||||
except Exception as e:
|
||||
# Log error but don't fail the trace
|
||||
if hasattr(self._computer, "logger"):
|
||||
self._computer.logger.warning(f"Failed to take screenshot: {e}")
|
||||
return None
|
||||
|
||||
async def _create_zip_archive(self, zip_path: Path) -> None:
|
||||
"""
|
||||
Create a zip archive of the trace directory.
|
||||
|
||||
Args:
|
||||
zip_path: Path where to save the zip file
|
||||
"""
|
||||
if self._trace_dir is None:
|
||||
raise RuntimeError("Trace directory is not set")
|
||||
|
||||
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
|
||||
for file_path in self._trace_dir.rglob("*"):
|
||||
if file_path.is_file():
|
||||
arcname = file_path.relative_to(self._trace_dir)
|
||||
zipf.write(file_path, arcname)
|
||||
|
||||
async def record_api_call(
|
||||
self,
|
||||
method: str,
|
||||
args: Dict[str, Any],
|
||||
result: Any = None,
|
||||
error: Optional[Exception] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Record an API call event.
|
||||
|
||||
Args:
|
||||
method: The method name that was called
|
||||
args: Arguments passed to the method
|
||||
result: Result returned by the method
|
||||
error: Exception raised by the method, if any
|
||||
"""
|
||||
if not self._trace_config.get("api_calls"):
|
||||
return
|
||||
|
||||
# Take screenshot after certain actions if enabled
|
||||
screenshot_path = None
|
||||
screenshot_actions = [
|
||||
"left_click",
|
||||
"right_click",
|
||||
"double_click",
|
||||
"type_text",
|
||||
"press_key",
|
||||
"hotkey",
|
||||
]
|
||||
if method in screenshot_actions and self._trace_config.get("screenshots"):
|
||||
screenshot_path = await self._take_screenshot(f"after_{method}")
|
||||
|
||||
# Record accessibility tree after certain actions if enabled
|
||||
if method in screenshot_actions and self._trace_config.get("accessibility_tree"):
|
||||
await self.record_accessibility_tree()
|
||||
|
||||
await self._record_event(
|
||||
"api_call",
|
||||
{
|
||||
"method": method,
|
||||
"args": args,
|
||||
"result": str(result) if result is not None else None,
|
||||
"error": str(error) if error else None,
|
||||
"screenshot": screenshot_path,
|
||||
"success": error is None,
|
||||
},
|
||||
)
|
||||
|
||||
async def record_accessibility_tree(self) -> None:
|
||||
"""Record the current accessibility tree if enabled."""
|
||||
if not self._trace_config.get("accessibility_tree") or not self._computer.interface:
|
||||
return
|
||||
|
||||
try:
|
||||
accessibility_tree = await self._computer.interface.get_accessibility_tree()
|
||||
await self._record_event("accessibility_tree", {"tree": accessibility_tree})
|
||||
except Exception as e:
|
||||
if hasattr(self._computer, "logger"):
|
||||
self._computer.logger.warning(f"Failed to record accessibility tree: {e}")
|
||||
|
||||
async def add_metadata(self, key: str, value: Any) -> None:
|
||||
"""
|
||||
Add custom metadata to the trace.
|
||||
|
||||
Args:
|
||||
key: Metadata key
|
||||
value: Metadata value
|
||||
"""
|
||||
if not self._trace_config.get("metadata"):
|
||||
return
|
||||
|
||||
await self._record_event("metadata", {"key": key, "value": value})
|
||||
334
libs/python/computer/computer/tracing_wrapper.py
Normal file
334
libs/python/computer/computer/tracing_wrapper.py
Normal file
@@ -0,0 +1,334 @@
|
||||
"""
|
||||
Tracing wrapper for computer interface that records API calls.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .interface.base import BaseComputerInterface
|
||||
|
||||
|
||||
class TracingInterfaceWrapper:
|
||||
"""
|
||||
Wrapper class that intercepts computer interface calls and records them for tracing.
|
||||
"""
|
||||
|
||||
def __init__(self, original_interface: BaseComputerInterface, tracing_instance):
|
||||
"""
|
||||
Initialize the tracing wrapper.
|
||||
|
||||
Args:
|
||||
original_interface: The original computer interface
|
||||
tracing_instance: The ComputerTracing instance
|
||||
"""
|
||||
self._original_interface = original_interface
|
||||
self._tracing = tracing_instance
|
||||
|
||||
def __getattr__(self, name):
|
||||
"""
|
||||
Delegate attribute access to the original interface if not found in wrapper.
|
||||
"""
|
||||
return getattr(self._original_interface, name)
|
||||
|
||||
async def _record_call(
|
||||
self,
|
||||
method_name: str,
|
||||
args: Dict[str, Any],
|
||||
result: Any = None,
|
||||
error: Optional[Exception] = None,
|
||||
):
|
||||
"""
|
||||
Record an API call for tracing.
|
||||
|
||||
Args:
|
||||
method_name: Name of the method called
|
||||
args: Arguments passed to the method
|
||||
result: Result returned by the method
|
||||
error: Exception raised, if any
|
||||
"""
|
||||
if self._tracing.is_tracing:
|
||||
await self._tracing.record_api_call(method_name, args, result, error)
|
||||
|
||||
# Mouse Actions
|
||||
async def left_click(
|
||||
self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None
|
||||
) -> None:
|
||||
"""Perform a left mouse button click."""
|
||||
args = {"x": x, "y": y, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.left_click(x, y, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("left_click", args, None, error)
|
||||
|
||||
async def right_click(
|
||||
self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None
|
||||
) -> None:
|
||||
"""Perform a right mouse button click."""
|
||||
args = {"x": x, "y": y, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.right_click(x, y, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("right_click", args, None, error)
|
||||
|
||||
async def double_click(
|
||||
self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None
|
||||
) -> None:
|
||||
"""Perform a double left mouse button click."""
|
||||
args = {"x": x, "y": y, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.double_click(x, y, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("double_click", args, None, error)
|
||||
|
||||
async def move_cursor(self, x: int, y: int, delay: Optional[float] = None) -> None:
|
||||
"""Move the cursor to the specified screen coordinates."""
|
||||
args = {"x": x, "y": y, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.move_cursor(x, y, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("move_cursor", args, None, error)
|
||||
|
||||
async def drag_to(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
button: str = "left",
|
||||
duration: float = 0.5,
|
||||
delay: Optional[float] = None,
|
||||
) -> None:
|
||||
"""Drag from current position to specified coordinates."""
|
||||
args = {"x": x, "y": y, "button": button, "duration": duration, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.drag_to(x, y, button, duration, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("drag_to", args, None, error)
|
||||
|
||||
async def drag(
|
||||
self,
|
||||
path: List[Tuple[int, int]],
|
||||
button: str = "left",
|
||||
duration: float = 0.5,
|
||||
delay: Optional[float] = None,
|
||||
) -> None:
|
||||
"""Drag the cursor along a path of coordinates."""
|
||||
args = {"path": path, "button": button, "duration": duration, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.drag(path, button, duration, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("drag", args, None, error)
|
||||
|
||||
# Keyboard Actions
|
||||
async def key_down(self, key: str, delay: Optional[float] = None) -> None:
|
||||
"""Press and hold a key."""
|
||||
args = {"key": key, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.key_down(key, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("key_down", args, None, error)
|
||||
|
||||
async def key_up(self, key: str, delay: Optional[float] = None) -> None:
|
||||
"""Release a previously pressed key."""
|
||||
args = {"key": key, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.key_up(key, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("key_up", args, None, error)
|
||||
|
||||
async def type_text(self, text: str, delay: Optional[float] = None) -> None:
|
||||
"""Type the specified text string."""
|
||||
args = {"text": text, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.type_text(text, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("type_text", args, None, error)
|
||||
|
||||
async def press_key(self, key: str, delay: Optional[float] = None) -> None:
|
||||
"""Press and release a single key."""
|
||||
args = {"key": key, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.press_key(key, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("press_key", args, None, error)
|
||||
|
||||
async def hotkey(self, *keys: str, delay: Optional[float] = None) -> None:
|
||||
"""Press multiple keys simultaneously (keyboard shortcut)."""
|
||||
args = {"keys": keys, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.hotkey(*keys, delay=delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("hotkey", args, None, error)
|
||||
|
||||
# Scrolling Actions
|
||||
async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None:
|
||||
"""Scroll the mouse wheel by specified amounts."""
|
||||
args = {"x": x, "y": y, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.scroll(x, y, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("scroll", args, None, error)
|
||||
|
||||
async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None:
|
||||
"""Scroll down by the specified number of clicks."""
|
||||
args = {"clicks": clicks, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.scroll_down(clicks, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("scroll_down", args, None, error)
|
||||
|
||||
async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None:
|
||||
"""Scroll up by the specified number of clicks."""
|
||||
args = {"clicks": clicks, "delay": delay}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.scroll_up(clicks, delay)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("scroll_up", args, None, error)
|
||||
|
||||
# Screen Actions
|
||||
async def screenshot(self) -> bytes:
|
||||
"""Take a screenshot."""
|
||||
args = {}
|
||||
error = None
|
||||
result = None
|
||||
try:
|
||||
result = await self._original_interface.screenshot()
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
# For screenshots, we don't want to include the raw bytes in the trace args
|
||||
await self._record_call(
|
||||
"screenshot", args, "screenshot_taken" if result else None, error
|
||||
)
|
||||
|
||||
async def get_screen_size(self) -> Dict[str, int]:
|
||||
"""Get the screen dimensions."""
|
||||
args = {}
|
||||
error = None
|
||||
result = None
|
||||
try:
|
||||
result = await self._original_interface.get_screen_size()
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("get_screen_size", args, result, error)
|
||||
|
||||
async def get_cursor_position(self) -> Dict[str, int]:
|
||||
"""Get the current cursor position on screen."""
|
||||
args = {}
|
||||
error = None
|
||||
result = None
|
||||
try:
|
||||
result = await self._original_interface.get_cursor_position()
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("get_cursor_position", args, result, error)
|
||||
|
||||
# Clipboard Actions
|
||||
async def copy_to_clipboard(self) -> str:
|
||||
"""Get the current clipboard content."""
|
||||
args = {}
|
||||
error = None
|
||||
result = None
|
||||
try:
|
||||
result = await self._original_interface.copy_to_clipboard()
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
# Don't include clipboard content in trace for privacy
|
||||
await self._record_call(
|
||||
"copy_to_clipboard",
|
||||
args,
|
||||
f"content_length_{len(result)}" if result else None,
|
||||
error,
|
||||
)
|
||||
|
||||
async def set_clipboard(self, text: str) -> None:
|
||||
"""Set the clipboard content to the specified text."""
|
||||
# Don't include clipboard content in trace for privacy
|
||||
args = {"text_length": len(text)}
|
||||
error = None
|
||||
try:
|
||||
result = await self._original_interface.set_clipboard(text)
|
||||
return result
|
||||
except Exception as e:
|
||||
error = e
|
||||
raise
|
||||
finally:
|
||||
await self._record_call("set_clipboard", args, None, error)
|
||||
@@ -1207,7 +1207,7 @@ def create_gradio_ui():
|
||||
label="Container Name",
|
||||
placeholder="Enter your container name",
|
||||
visible=False,
|
||||
info="Get your container from [trycua.com](https://trycua.com/)",
|
||||
info="Get your container from [cua.ai](https://cua.ai/)",
|
||||
)
|
||||
|
||||
# Check if CUA_API_KEY is set in environment
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import base64
|
||||
import io
|
||||
import os
|
||||
import shlex
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
import mslex
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
|
||||
@@ -104,3 +107,25 @@ def parse_vm_info(vm_info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Parse VM info from pylume response."""
|
||||
if not vm_info:
|
||||
return None
|
||||
|
||||
|
||||
def safe_join(argv: list[str]) -> str:
|
||||
"""
|
||||
Return a platform-correct string that safely quotes `argv` for shell execution.
|
||||
|
||||
- On POSIX: uses `shlex.join`.
|
||||
- On Windows: uses `shlex.join`.
|
||||
|
||||
Args:
|
||||
argv: iterable of argument strings (will be coerced to str).
|
||||
|
||||
Returns:
|
||||
A safely quoted command-line string appropriate for the current platform that protects against
|
||||
shell injection vulnerabilities.
|
||||
"""
|
||||
if os.name == "nt":
|
||||
# On Windows, use mslex for proper quoting
|
||||
return mslex.join(argv)
|
||||
else:
|
||||
# On POSIX systems, use shlex
|
||||
return shlex.join(argv)
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "pdm.backend"
|
||||
|
||||
[project]
|
||||
name = "cua-computer"
|
||||
version = "0.4.10"
|
||||
version = "0.4.11"
|
||||
description = "Computer-Use Interface (CUI) framework powering Cua"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
@@ -16,7 +16,8 @@ dependencies = [
|
||||
"websockets>=12.0",
|
||||
"aiohttp>=3.9.0",
|
||||
"cua-core>=0.1.0,<0.2.0",
|
||||
"pydantic>=2.11.1"
|
||||
"pydantic>=2.11.1",
|
||||
"mslex>=1.3.0",
|
||||
]
|
||||
requires-python = ">=3.12"
|
||||
|
||||
@@ -47,4 +48,4 @@ source-includes = ["tests/", "README.md", "LICENSE"]
|
||||
[tool.pytest.ini_options]
|
||||
asyncio_mode = "auto"
|
||||
testpaths = ["tests"]
|
||||
python_files = "test_*.py"
|
||||
python_files = "test_*.py"
|
||||
|
||||
69
libs/python/computer/tests/conftest.py
Normal file
69
libs/python/computer/tests/conftest.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Pytest configuration and shared fixtures for computer package tests.
|
||||
|
||||
This file contains shared fixtures and configuration for all computer tests.
|
||||
Following SRP: This file ONLY handles test setup/teardown.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_interface():
|
||||
"""Mock computer interface for testing.
|
||||
|
||||
Use this fixture to test Computer logic without real OS calls.
|
||||
"""
|
||||
interface = AsyncMock()
|
||||
interface.screenshot = AsyncMock(return_value=b"fake_screenshot")
|
||||
interface.left_click = AsyncMock()
|
||||
interface.right_click = AsyncMock()
|
||||
interface.middle_click = AsyncMock()
|
||||
interface.double_click = AsyncMock()
|
||||
interface.type = AsyncMock()
|
||||
interface.key = AsyncMock()
|
||||
interface.move_mouse = AsyncMock()
|
||||
interface.scroll = AsyncMock()
|
||||
interface.get_screen_size = AsyncMock(return_value=(1920, 1080))
|
||||
|
||||
return interface
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_cloud_provider():
|
||||
"""Mock cloud provider for testing.
|
||||
|
||||
Use this fixture to test cloud provider logic without real API calls.
|
||||
"""
|
||||
provider = AsyncMock()
|
||||
provider.start = AsyncMock()
|
||||
provider.stop = AsyncMock()
|
||||
provider.get_status = AsyncMock(return_value="running")
|
||||
provider.execute_command = AsyncMock(return_value="command output")
|
||||
|
||||
return provider
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_local_provider():
|
||||
"""Mock local provider for testing.
|
||||
|
||||
Use this fixture to test local provider logic without real VM operations.
|
||||
"""
|
||||
provider = AsyncMock()
|
||||
provider.start = AsyncMock()
|
||||
provider.stop = AsyncMock()
|
||||
provider.get_status = AsyncMock(return_value="running")
|
||||
provider.execute_command = AsyncMock(return_value="command output")
|
||||
|
||||
return provider
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def disable_telemetry(monkeypatch):
|
||||
"""Disable telemetry for tests.
|
||||
|
||||
Use this fixture to ensure no telemetry is sent during tests.
|
||||
"""
|
||||
monkeypatch.setenv("CUA_TELEMETRY_DISABLED", "1")
|
||||
67
libs/python/computer/tests/test_computer.py
Normal file
67
libs/python/computer/tests/test_computer.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""Unit tests for Computer class.
|
||||
|
||||
This file tests ONLY the Computer class initialization and context manager.
|
||||
Following SRP: This file tests ONE class (Computer).
|
||||
All external dependencies (providers, interfaces) are mocked.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestComputerImport:
|
||||
"""Test Computer module imports (SRP: Only tests imports)."""
|
||||
|
||||
def test_computer_class_exists(self):
|
||||
"""Test that Computer class can be imported."""
|
||||
from computer import Computer
|
||||
|
||||
assert Computer is not None
|
||||
|
||||
def test_vm_provider_type_exists(self):
|
||||
"""Test that VMProviderType enum can be imported."""
|
||||
from computer import VMProviderType
|
||||
|
||||
assert VMProviderType is not None
|
||||
|
||||
|
||||
class TestComputerInitialization:
|
||||
"""Test Computer initialization (SRP: Only tests initialization)."""
|
||||
|
||||
def test_computer_class_can_be_imported(self, disable_telemetry):
|
||||
"""Test that Computer class can be imported without errors."""
|
||||
from computer import Computer
|
||||
|
||||
assert Computer is not None
|
||||
|
||||
def test_computer_has_required_methods(self, disable_telemetry):
|
||||
"""Test that Computer class has required methods."""
|
||||
from computer import Computer
|
||||
|
||||
assert hasattr(Computer, "__aenter__")
|
||||
assert hasattr(Computer, "__aexit__")
|
||||
|
||||
|
||||
class TestComputerContextManager:
|
||||
"""Test Computer context manager protocol (SRP: Only tests context manager)."""
|
||||
|
||||
def test_computer_is_async_context_manager(self, disable_telemetry):
|
||||
"""Test that Computer has async context manager methods."""
|
||||
from computer import Computer
|
||||
|
||||
assert hasattr(Computer, "__aenter__")
|
||||
assert hasattr(Computer, "__aexit__")
|
||||
assert callable(Computer.__aenter__)
|
||||
assert callable(Computer.__aexit__)
|
||||
|
||||
|
||||
class TestComputerInterface:
|
||||
"""Test Computer.interface property (SRP: Only tests interface access)."""
|
||||
|
||||
def test_computer_class_structure(self, disable_telemetry):
|
||||
"""Test that Computer class has expected structure."""
|
||||
from computer import Computer
|
||||
|
||||
# Verify Computer is a class
|
||||
assert isinstance(Computer, type)
|
||||
43
libs/python/core/tests/conftest.py
Normal file
43
libs/python/core/tests/conftest.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""Pytest configuration and shared fixtures for core package tests.
|
||||
|
||||
This file contains shared fixtures and configuration for all core tests.
|
||||
Following SRP: This file ONLY handles test setup/teardown.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_httpx_client():
|
||||
"""Mock httpx.AsyncClient for API calls.
|
||||
|
||||
Use this fixture to avoid making real HTTP requests during tests.
|
||||
"""
|
||||
with patch("httpx.AsyncClient") as mock_client:
|
||||
mock_instance = AsyncMock()
|
||||
mock_client.return_value.__aenter__.return_value = mock_instance
|
||||
yield mock_instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_posthog():
|
||||
"""Mock PostHog client for telemetry tests.
|
||||
|
||||
Use this fixture to avoid sending real telemetry during tests.
|
||||
"""
|
||||
with patch("posthog.Posthog") as mock_ph:
|
||||
mock_instance = Mock()
|
||||
mock_ph.return_value = mock_instance
|
||||
yield mock_instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def disable_telemetry(monkeypatch):
|
||||
"""Disable telemetry for tests that don't need it.
|
||||
|
||||
Use this fixture to ensure telemetry is disabled during tests.
|
||||
"""
|
||||
monkeypatch.setenv("CUA_TELEMETRY_DISABLED", "1")
|
||||
yield
|
||||
255
libs/python/core/tests/test_telemetry.py
Normal file
255
libs/python/core/tests/test_telemetry.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""Unit tests for core telemetry functionality.
|
||||
|
||||
This file tests ONLY telemetry logic, following SRP.
|
||||
All external dependencies (PostHog, file system) are mocked.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, Mock, mock_open, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestTelemetryEnabled:
|
||||
"""Test telemetry enable/disable logic (SRP: Only tests enable/disable)."""
|
||||
|
||||
def test_telemetry_enabled_by_default(self, monkeypatch):
|
||||
"""Test that telemetry is enabled by default."""
|
||||
# Remove any environment variables that might affect the test
|
||||
monkeypatch.delenv("CUA_TELEMETRY", raising=False)
|
||||
monkeypatch.delenv("CUA_TELEMETRY_ENABLED", raising=False)
|
||||
|
||||
from core.telemetry import is_telemetry_enabled
|
||||
|
||||
assert is_telemetry_enabled() is True
|
||||
|
||||
def test_telemetry_disabled_with_legacy_flag(self, monkeypatch):
|
||||
"""Test that telemetry can be disabled with legacy CUA_TELEMETRY=off."""
|
||||
monkeypatch.setenv("CUA_TELEMETRY", "off")
|
||||
|
||||
from core.telemetry import is_telemetry_enabled
|
||||
|
||||
assert is_telemetry_enabled() is False
|
||||
|
||||
def test_telemetry_disabled_with_new_flag(self, monkeypatch):
|
||||
"""Test that telemetry can be disabled with CUA_TELEMETRY_ENABLED=false."""
|
||||
monkeypatch.setenv("CUA_TELEMETRY_ENABLED", "false")
|
||||
|
||||
from core.telemetry import is_telemetry_enabled
|
||||
|
||||
assert is_telemetry_enabled() is False
|
||||
|
||||
@pytest.mark.parametrize("value", ["0", "false", "no", "off"])
|
||||
def test_telemetry_disabled_with_various_values(self, monkeypatch, value):
|
||||
"""Test that telemetry respects various disable values."""
|
||||
monkeypatch.setenv("CUA_TELEMETRY_ENABLED", value)
|
||||
|
||||
from core.telemetry import is_telemetry_enabled
|
||||
|
||||
assert is_telemetry_enabled() is False
|
||||
|
||||
@pytest.mark.parametrize("value", ["1", "true", "yes", "on"])
|
||||
def test_telemetry_enabled_with_various_values(self, monkeypatch, value):
|
||||
"""Test that telemetry respects various enable values."""
|
||||
monkeypatch.setenv("CUA_TELEMETRY_ENABLED", value)
|
||||
|
||||
from core.telemetry import is_telemetry_enabled
|
||||
|
||||
assert is_telemetry_enabled() is True
|
||||
|
||||
|
||||
class TestPostHogTelemetryClient:
|
||||
"""Test PostHogTelemetryClient class (SRP: Only tests client logic)."""
|
||||
|
||||
@patch("core.telemetry.posthog.posthog")
|
||||
@patch("core.telemetry.posthog.Path")
|
||||
def test_client_initialization(self, mock_path, mock_posthog, disable_telemetry):
|
||||
"""Test that client initializes correctly."""
|
||||
from core.telemetry.posthog import PostHogTelemetryClient
|
||||
|
||||
# Mock the storage directory
|
||||
mock_storage_dir = MagicMock()
|
||||
mock_storage_dir.exists.return_value = False
|
||||
mock_path.return_value.parent.parent = MagicMock()
|
||||
mock_path.return_value.parent.parent.__truediv__.return_value = mock_storage_dir
|
||||
|
||||
# Reset singleton
|
||||
PostHogTelemetryClient.destroy_client()
|
||||
|
||||
client = PostHogTelemetryClient()
|
||||
|
||||
assert client is not None
|
||||
assert hasattr(client, "installation_id")
|
||||
assert hasattr(client, "initialized")
|
||||
assert hasattr(client, "queued_events")
|
||||
|
||||
@patch("core.telemetry.posthog.posthog")
|
||||
@patch("core.telemetry.posthog.Path")
|
||||
def test_installation_id_generation(self, mock_path, mock_posthog, disable_telemetry):
|
||||
"""Test that installation ID is generated if not exists."""
|
||||
from core.telemetry.posthog import PostHogTelemetryClient
|
||||
|
||||
# Mock file system
|
||||
mock_id_file = MagicMock()
|
||||
mock_id_file.exists.return_value = False
|
||||
mock_storage_dir = MagicMock()
|
||||
mock_storage_dir.__truediv__.return_value = mock_id_file
|
||||
|
||||
mock_core_dir = MagicMock()
|
||||
mock_core_dir.__truediv__.return_value = mock_storage_dir
|
||||
mock_path.return_value.parent.parent = mock_core_dir
|
||||
|
||||
# Reset singleton
|
||||
PostHogTelemetryClient.destroy_client()
|
||||
|
||||
client = PostHogTelemetryClient()
|
||||
|
||||
# Should have generated a new UUID
|
||||
assert client.installation_id is not None
|
||||
assert len(client.installation_id) == 36 # UUID format
|
||||
|
||||
@patch("core.telemetry.posthog.posthog")
|
||||
@patch("core.telemetry.posthog.Path")
|
||||
def test_installation_id_persistence(self, mock_path, mock_posthog, disable_telemetry):
|
||||
"""Test that installation ID is read from file if exists."""
|
||||
from core.telemetry.posthog import PostHogTelemetryClient
|
||||
|
||||
existing_id = "test-installation-id-123"
|
||||
|
||||
# Mock file system
|
||||
mock_id_file = MagicMock()
|
||||
mock_id_file.exists.return_value = True
|
||||
mock_id_file.read_text.return_value = existing_id
|
||||
|
||||
mock_storage_dir = MagicMock()
|
||||
mock_storage_dir.__truediv__.return_value = mock_id_file
|
||||
|
||||
mock_core_dir = MagicMock()
|
||||
mock_core_dir.__truediv__.return_value = mock_storage_dir
|
||||
mock_path.return_value.parent.parent = mock_core_dir
|
||||
|
||||
# Reset singleton
|
||||
PostHogTelemetryClient.destroy_client()
|
||||
|
||||
client = PostHogTelemetryClient()
|
||||
|
||||
assert client.installation_id == existing_id
|
||||
|
||||
@patch("core.telemetry.posthog.posthog")
|
||||
@patch("core.telemetry.posthog.Path")
|
||||
def test_record_event_when_disabled(self, mock_path, mock_posthog, monkeypatch):
|
||||
"""Test that events are not recorded when telemetry is disabled."""
|
||||
from core.telemetry.posthog import PostHogTelemetryClient
|
||||
|
||||
# Disable telemetry explicitly using the correct environment variable
|
||||
monkeypatch.setenv("CUA_TELEMETRY_ENABLED", "false")
|
||||
|
||||
# Mock file system
|
||||
mock_storage_dir = MagicMock()
|
||||
mock_storage_dir.exists.return_value = False
|
||||
mock_path.return_value.parent.parent = MagicMock()
|
||||
mock_path.return_value.parent.parent.__truediv__.return_value = mock_storage_dir
|
||||
|
||||
# Reset singleton
|
||||
PostHogTelemetryClient.destroy_client()
|
||||
|
||||
client = PostHogTelemetryClient()
|
||||
client.record_event("test_event", {"key": "value"})
|
||||
|
||||
# PostHog capture should not be called at all when telemetry is disabled
|
||||
mock_posthog.capture.assert_not_called()
|
||||
|
||||
@patch("core.telemetry.posthog.posthog")
|
||||
@patch("core.telemetry.posthog.Path")
|
||||
def test_record_event_when_enabled(self, mock_path, mock_posthog, monkeypatch):
|
||||
"""Test that events are recorded when telemetry is enabled."""
|
||||
from core.telemetry.posthog import PostHogTelemetryClient
|
||||
|
||||
# Enable telemetry
|
||||
monkeypatch.setenv("CUA_TELEMETRY_ENABLED", "true")
|
||||
|
||||
# Mock file system
|
||||
mock_storage_dir = MagicMock()
|
||||
mock_storage_dir.exists.return_value = False
|
||||
mock_path.return_value.parent.parent = MagicMock()
|
||||
mock_path.return_value.parent.parent.__truediv__.return_value = mock_storage_dir
|
||||
|
||||
# Reset singleton
|
||||
PostHogTelemetryClient.destroy_client()
|
||||
|
||||
client = PostHogTelemetryClient()
|
||||
client.initialized = True # Pretend it's initialized
|
||||
|
||||
event_name = "test_event"
|
||||
event_props = {"key": "value"}
|
||||
client.record_event(event_name, event_props)
|
||||
|
||||
# PostHog capture should be called
|
||||
assert mock_posthog.capture.call_count >= 1
|
||||
|
||||
@patch("core.telemetry.posthog.posthog")
|
||||
@patch("core.telemetry.posthog.Path")
|
||||
def test_singleton_pattern(self, mock_path, mock_posthog, disable_telemetry):
|
||||
"""Test that get_client returns the same instance."""
|
||||
from core.telemetry.posthog import PostHogTelemetryClient
|
||||
|
||||
# Mock file system
|
||||
mock_storage_dir = MagicMock()
|
||||
mock_storage_dir.exists.return_value = False
|
||||
mock_path.return_value.parent.parent = MagicMock()
|
||||
mock_path.return_value.parent.parent.__truediv__.return_value = mock_storage_dir
|
||||
|
||||
# Reset singleton
|
||||
PostHogTelemetryClient.destroy_client()
|
||||
|
||||
client1 = PostHogTelemetryClient.get_client()
|
||||
client2 = PostHogTelemetryClient.get_client()
|
||||
|
||||
assert client1 is client2
|
||||
|
||||
|
||||
class TestRecordEvent:
|
||||
"""Test the public record_event function (SRP: Only tests public API)."""
|
||||
|
||||
@patch("core.telemetry.posthog.PostHogTelemetryClient")
|
||||
def test_record_event_calls_client(self, mock_client_class, disable_telemetry):
|
||||
"""Test that record_event delegates to the client."""
|
||||
from core.telemetry import record_event
|
||||
|
||||
mock_client_instance = Mock()
|
||||
mock_client_class.get_client.return_value = mock_client_instance
|
||||
|
||||
event_name = "test_event"
|
||||
event_props = {"key": "value"}
|
||||
|
||||
record_event(event_name, event_props)
|
||||
|
||||
mock_client_instance.record_event.assert_called_once_with(event_name, event_props)
|
||||
|
||||
@patch("core.telemetry.posthog.PostHogTelemetryClient")
|
||||
def test_record_event_without_properties(self, mock_client_class, disable_telemetry):
|
||||
"""Test that record_event works without properties."""
|
||||
from core.telemetry import record_event
|
||||
|
||||
mock_client_instance = Mock()
|
||||
mock_client_class.get_client.return_value = mock_client_instance
|
||||
|
||||
event_name = "test_event"
|
||||
|
||||
record_event(event_name)
|
||||
|
||||
mock_client_instance.record_event.assert_called_once_with(event_name, {})
|
||||
|
||||
|
||||
class TestDestroyTelemetryClient:
|
||||
"""Test client destruction (SRP: Only tests cleanup)."""
|
||||
|
||||
@patch("core.telemetry.posthog.PostHogTelemetryClient")
|
||||
def test_destroy_client_calls_class_method(self, mock_client_class):
|
||||
"""Test that destroy_telemetry_client delegates correctly."""
|
||||
from core.telemetry import destroy_telemetry_client
|
||||
|
||||
destroy_telemetry_client()
|
||||
|
||||
mock_client_class.destroy_client.assert_called_once()
|
||||
@@ -129,12 +129,12 @@ See [desktop-extension/README.md](desktop-extension/README.md) for more details.
|
||||
|
||||
## Documentation
|
||||
|
||||
- Installation: https://trycua.com/docs/libraries/mcp-server/installation
|
||||
- Configuration: https://trycua.com/docs/libraries/mcp-server/configuration
|
||||
- Usage: https://trycua.com/docs/libraries/mcp-server/usage
|
||||
- Tools: https://trycua.com/docs/libraries/mcp-server/tools
|
||||
- Client Integrations: https://trycua.com/docs/libraries/mcp-server/client-integrations
|
||||
- LLM Integrations: https://trycua.com/docs/libraries/mcp-server/llm-integrations
|
||||
- Installation: https://cua.ai/docs/libraries/mcp-server/installation
|
||||
- Configuration: https://cua.ai/docs/libraries/mcp-server/configuration
|
||||
- Usage: https://cua.ai/docs/libraries/mcp-server/usage
|
||||
- Tools: https://cua.ai/docs/libraries/mcp-server/tools
|
||||
- Client Integrations: https://cua.ai/docs/libraries/mcp-server/client-integrations
|
||||
- LLM Integrations: https://cua.ai/docs/libraries/mcp-server/llm-integrations
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
|
||||
51
libs/python/mcp-server/tests/conftest.py
Normal file
51
libs/python/mcp-server/tests/conftest.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""Pytest configuration and shared fixtures for mcp-server package tests.
|
||||
|
||||
This file contains shared fixtures and configuration for all mcp-server tests.
|
||||
Following SRP: This file ONLY handles test setup/teardown.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_mcp_context():
|
||||
"""Mock MCP context for testing.
|
||||
|
||||
Use this fixture to test MCP server logic without real MCP connections.
|
||||
"""
|
||||
context = AsyncMock()
|
||||
context.request_context = AsyncMock()
|
||||
context.session = Mock()
|
||||
context.session.send_resource_updated = AsyncMock()
|
||||
|
||||
return context
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_computer():
|
||||
"""Mock Computer instance for MCP server tests.
|
||||
|
||||
Use this fixture to test MCP logic without real Computer operations.
|
||||
"""
|
||||
computer = AsyncMock()
|
||||
computer.interface = AsyncMock()
|
||||
computer.interface.screenshot = AsyncMock(return_value=b"fake_screenshot")
|
||||
computer.interface.left_click = AsyncMock()
|
||||
computer.interface.type = AsyncMock()
|
||||
|
||||
# Mock context manager
|
||||
computer.__aenter__ = AsyncMock(return_value=computer)
|
||||
computer.__aexit__ = AsyncMock()
|
||||
|
||||
return computer
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def disable_telemetry(monkeypatch):
|
||||
"""Disable telemetry for tests.
|
||||
|
||||
Use this fixture to ensure no telemetry is sent during tests.
|
||||
"""
|
||||
monkeypatch.setenv("CUA_TELEMETRY_DISABLED", "1")
|
||||
44
libs/python/mcp-server/tests/test_mcp_server.py
Normal file
44
libs/python/mcp-server/tests/test_mcp_server.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Unit tests for mcp-server package.
|
||||
|
||||
This file tests ONLY basic MCP server functionality.
|
||||
Following SRP: This file tests MCP server initialization.
|
||||
All external dependencies are mocked.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestMCPServerImports:
|
||||
"""Test MCP server module imports (SRP: Only tests imports)."""
|
||||
|
||||
def test_mcp_server_module_exists(self):
|
||||
"""Test that mcp_server module can be imported."""
|
||||
try:
|
||||
import mcp_server
|
||||
|
||||
assert mcp_server is not None
|
||||
except ImportError:
|
||||
pytest.skip("mcp_server module not installed")
|
||||
except SystemExit:
|
||||
pytest.skip("MCP dependencies (mcp.server.fastmcp) not available")
|
||||
|
||||
|
||||
class TestMCPServerInitialization:
|
||||
"""Test MCP server initialization (SRP: Only tests initialization)."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_mcp_server_can_be_imported(self):
|
||||
"""Basic smoke test: verify MCP server components can be imported."""
|
||||
try:
|
||||
from mcp_server import server
|
||||
|
||||
assert server is not None
|
||||
except ImportError:
|
||||
pytest.skip("MCP server module not available")
|
||||
except SystemExit:
|
||||
pytest.skip("MCP dependencies (mcp.server.fastmcp) not available")
|
||||
except Exception as e:
|
||||
# Some initialization errors are acceptable in unit tests
|
||||
pytest.skip(f"MCP server initialization requires specific setup: {e}")
|
||||
@@ -1,10 +0,0 @@
|
||||
[bumpversion]
|
||||
current_version = 0.2.1
|
||||
commit = True
|
||||
tag = True
|
||||
tag_name = pylume-v{new_version}
|
||||
message = Bump pylume to v{new_version}
|
||||
|
||||
[bumpversion:file:pylume/__init__.py]
|
||||
search = __version__ = "{current_version}"
|
||||
replace = __version__ = "{new_version}"
|
||||
@@ -1,46 +0,0 @@
|
||||
<div align="center">
|
||||
<h1>
|
||||
<div class="image-wrapper" style="display: inline-block;">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;">
|
||||
<source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;">
|
||||
<img alt="Shows my svg">
|
||||
</picture>
|
||||
</div>
|
||||
|
||||
[](#)
|
||||
[](#)
|
||||
[](https://discord.com/invite/mVnXXpdE85)
|
||||
[](https://pypi.org/project/pylume/)
|
||||
|
||||
</h1>
|
||||
</div>
|
||||
|
||||
**pylume** is a lightweight Python library based on [lume](https://github.com/trycua/lume) to create, run and manage macOS and Linux virtual machines (VMs) natively on Apple Silicon.
|
||||
|
||||
```bash
|
||||
pip install pylume
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Please refer to this [Notebook](./samples/nb.ipynb) for a quickstart. More details about the underlying API used by pylume are available [here](https://github.com/trycua/lume/docs/API-Reference.md).
|
||||
|
||||
## Prebuilt Images
|
||||
|
||||
Pre-built images are available on [ghcr.io/trycua](https://github.com/orgs/trycua/packages).
|
||||
These images come pre-configured with an SSH server and auto-login enabled.
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome and greatly appreciate contributions to lume! Whether you're improving documentation, adding new features, fixing bugs, or adding new VM images, your efforts help make pylume better for everyone.
|
||||
|
||||
Join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas or get assistance.
|
||||
|
||||
## License
|
||||
|
||||
lume is open-sourced under the MIT License - see the [LICENSE](LICENSE) file for details.
|
||||
|
||||
## Stargazers over time
|
||||
|
||||
[](https://starchart.cc/trycua/pylume)
|
||||
@@ -1,9 +0,0 @@
|
||||
"""
|
||||
PyLume Python SDK - A client library for managing macOS VMs with PyLume.
|
||||
"""
|
||||
|
||||
from pylume.exceptions import *
|
||||
from pylume.models import *
|
||||
from pylume.pylume import *
|
||||
|
||||
__version__ = "0.1.0"
|
||||
@@ -1,59 +0,0 @@
|
||||
"""
|
||||
PyLume Python SDK - A client library for managing macOS VMs with PyLume.
|
||||
|
||||
Example:
|
||||
>>> from pylume import PyLume, VMConfig
|
||||
>>> client = PyLume()
|
||||
>>> config = VMConfig(name="my-vm", cpu=4, memory="8GB", disk_size="64GB")
|
||||
>>> client.create_vm(config)
|
||||
>>> client.run_vm("my-vm")
|
||||
"""
|
||||
|
||||
# Import exceptions then all models
|
||||
from .exceptions import (
|
||||
LumeConfigError,
|
||||
LumeConnectionError,
|
||||
LumeError,
|
||||
LumeImageError,
|
||||
LumeNotFoundError,
|
||||
LumeServerError,
|
||||
LumeTimeoutError,
|
||||
LumeVMError,
|
||||
)
|
||||
from .models import (
|
||||
CloneSpec,
|
||||
ImageInfo,
|
||||
ImageList,
|
||||
ImageRef,
|
||||
SharedDirectory,
|
||||
VMConfig,
|
||||
VMRunOpts,
|
||||
VMStatus,
|
||||
VMUpdateOpts,
|
||||
)
|
||||
|
||||
# Import main class last to avoid circular imports
|
||||
from .pylume import PyLume
|
||||
|
||||
__version__ = "0.2.1"
|
||||
|
||||
__all__ = [
|
||||
"PyLume",
|
||||
"VMConfig",
|
||||
"VMStatus",
|
||||
"VMRunOpts",
|
||||
"VMUpdateOpts",
|
||||
"ImageRef",
|
||||
"CloneSpec",
|
||||
"SharedDirectory",
|
||||
"ImageList",
|
||||
"ImageInfo",
|
||||
"LumeError",
|
||||
"LumeServerError",
|
||||
"LumeConnectionError",
|
||||
"LumeTimeoutError",
|
||||
"LumeNotFoundError",
|
||||
"LumeConfigError",
|
||||
"LumeVMError",
|
||||
"LumeImageError",
|
||||
]
|
||||
@@ -1,119 +0,0 @@
|
||||
import asyncio
|
||||
import json
|
||||
import shlex
|
||||
import subprocess
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from .exceptions import (
|
||||
LumeConfigError,
|
||||
LumeConnectionError,
|
||||
LumeError,
|
||||
LumeNotFoundError,
|
||||
LumeServerError,
|
||||
LumeTimeoutError,
|
||||
)
|
||||
|
||||
|
||||
class LumeClient:
|
||||
def __init__(self, base_url: str, timeout: float = 60.0, debug: bool = False):
|
||||
self.base_url = base_url
|
||||
self.timeout = timeout
|
||||
self.debug = debug
|
||||
|
||||
def _log_debug(self, message: str, **kwargs) -> None:
|
||||
"""Log debug information if debug mode is enabled."""
|
||||
if self.debug:
|
||||
print(f"DEBUG: {message}")
|
||||
if kwargs:
|
||||
print(json.dumps(kwargs, indent=2))
|
||||
|
||||
async def _run_curl(
|
||||
self,
|
||||
method: str,
|
||||
path: str,
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
) -> Any:
|
||||
"""Execute a curl command and return the response."""
|
||||
url = f"{self.base_url}{path}"
|
||||
if params:
|
||||
param_str = "&".join(f"{k}={v}" for k, v in params.items())
|
||||
url = f"{url}?{param_str}"
|
||||
|
||||
cmd = ["curl", "-X", method, "-s", "-w", "%{http_code}", "-m", str(self.timeout)]
|
||||
|
||||
if data is not None:
|
||||
cmd.extend(["-H", "Content-Type: application/json", "-d", json.dumps(data)])
|
||||
|
||||
cmd.append(url)
|
||||
|
||||
self._log_debug(f"Running curl command: {' '.join(map(shlex.quote, cmd))}")
|
||||
|
||||
try:
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode != 0:
|
||||
raise LumeConnectionError(f"Curl command failed: {stderr.decode()}")
|
||||
|
||||
# The last 3 characters are the status code
|
||||
response = stdout.decode()
|
||||
status_code = int(response[-3:])
|
||||
response_body = response[:-3] # Remove status code from response
|
||||
|
||||
if status_code >= 400:
|
||||
if status_code == 404:
|
||||
raise LumeNotFoundError(f"Resource not found: {path}")
|
||||
elif status_code == 400:
|
||||
raise LumeConfigError(f"Invalid request: {response_body}")
|
||||
elif status_code >= 500:
|
||||
raise LumeServerError(f"Server error: {response_body}")
|
||||
else:
|
||||
raise LumeError(f"Request failed with status {status_code}: {response_body}")
|
||||
|
||||
return json.loads(response_body) if response_body.strip() else None
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
raise LumeTimeoutError(f"Request timed out after {self.timeout} seconds")
|
||||
|
||||
async def get(self, path: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
||||
"""Make a GET request."""
|
||||
return await self._run_curl("GET", path, params=params)
|
||||
|
||||
async def post(
|
||||
self, path: str, data: Optional[Dict[str, Any]] = None, timeout: Optional[float] = None
|
||||
) -> Any:
|
||||
"""Make a POST request."""
|
||||
old_timeout = self.timeout
|
||||
if timeout is not None:
|
||||
self.timeout = timeout
|
||||
try:
|
||||
return await self._run_curl("POST", path, data=data)
|
||||
finally:
|
||||
self.timeout = old_timeout
|
||||
|
||||
async def patch(self, path: str, data: Dict[str, Any]) -> None:
|
||||
"""Make a PATCH request."""
|
||||
await self._run_curl("PATCH", path, data=data)
|
||||
|
||||
async def delete(self, path: str) -> None:
|
||||
"""Make a DELETE request."""
|
||||
await self._run_curl("DELETE", path)
|
||||
|
||||
def print_curl(self, method: str, path: str, data: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Print equivalent curl command for debugging."""
|
||||
curl_cmd = f"""curl -X {method} \\
|
||||
'{self.base_url}{path}'"""
|
||||
|
||||
if data:
|
||||
curl_cmd += f" \\\n -H 'Content-Type: application/json' \\\n -d '{json.dumps(data)}'"
|
||||
|
||||
print("\nEquivalent curl command:")
|
||||
print(curl_cmd)
|
||||
print()
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the client resources."""
|
||||
pass # No shared resources to clean up
|
||||
@@ -1,54 +0,0 @@
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class LumeError(Exception):
|
||||
"""Base exception for all PyLume errors."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class LumeServerError(LumeError):
|
||||
"""Raised when there's an error with the PyLume server."""
|
||||
|
||||
def __init__(
|
||||
self, message: str, status_code: Optional[int] = None, response_text: Optional[str] = None
|
||||
):
|
||||
self.status_code = status_code
|
||||
self.response_text = response_text
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class LumeConnectionError(LumeError):
|
||||
"""Raised when there's an error connecting to the PyLume server."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class LumeTimeoutError(LumeError):
|
||||
"""Raised when a request to the PyLume server times out."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class LumeNotFoundError(LumeError):
|
||||
"""Raised when a requested resource is not found."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class LumeConfigError(LumeError):
|
||||
"""Raised when there's an error with the configuration."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class LumeVMError(LumeError):
|
||||
"""Raised when there's an error with a VM operation."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class LumeImageError(LumeError):
|
||||
"""Raised when there's an error with an image operation."""
|
||||
|
||||
pass
|
||||
Binary file not shown.
@@ -1,265 +0,0 @@
|
||||
import re
|
||||
from typing import Any, Dict, List, Literal, Optional
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, RootModel, computed_field, validator
|
||||
|
||||
|
||||
class DiskInfo(BaseModel):
|
||||
"""Information about disk storage allocation.
|
||||
|
||||
Attributes:
|
||||
total: Total disk space in bytes
|
||||
allocated: Currently allocated disk space in bytes
|
||||
"""
|
||||
|
||||
total: int
|
||||
allocated: int
|
||||
|
||||
|
||||
class VMConfig(BaseModel):
|
||||
"""Configuration for creating a new VM.
|
||||
|
||||
Note: Memory and disk sizes should be specified with units (e.g., "4GB", "64GB")
|
||||
|
||||
Attributes:
|
||||
name: Name of the virtual machine
|
||||
os: Operating system type, either "macOS" or "linux"
|
||||
cpu: Number of CPU cores to allocate
|
||||
memory: Amount of memory to allocate with units
|
||||
disk_size: Size of the disk to create with units
|
||||
display: Display resolution in format "widthxheight"
|
||||
ipsw: IPSW path or 'latest' for macOS VMs, None for other OS types
|
||||
"""
|
||||
|
||||
name: str
|
||||
os: Literal["macOS", "linux"] = "macOS"
|
||||
cpu: int = Field(default=2, ge=1)
|
||||
memory: str = "4GB"
|
||||
disk_size: str = Field(default="64GB", alias="diskSize")
|
||||
display: str = "1024x768"
|
||||
ipsw: Optional[str] = Field(default=None, description="IPSW path or 'latest', for macOS VMs")
|
||||
|
||||
class Config:
|
||||
populate_by_alias = True
|
||||
|
||||
|
||||
class SharedDirectory(BaseModel):
|
||||
"""Configuration for a shared directory.
|
||||
|
||||
Attributes:
|
||||
host_path: Path to the directory on the host system
|
||||
read_only: Whether the directory should be mounted as read-only
|
||||
"""
|
||||
|
||||
host_path: str = Field(..., alias="hostPath") # Allow host_path but serialize as hostPath
|
||||
read_only: bool = False
|
||||
|
||||
class Config:
|
||||
populate_by_name = True # Allow both alias and original name
|
||||
alias_generator = lambda s: "".join(
|
||||
word.capitalize() if i else word for i, word in enumerate(s.split("_"))
|
||||
)
|
||||
|
||||
|
||||
class VMRunOpts(BaseModel):
|
||||
"""Configuration for running a VM.
|
||||
|
||||
Args:
|
||||
no_display: Whether to not display the VNC client
|
||||
shared_directories: List of directories to share with the VM
|
||||
"""
|
||||
|
||||
no_display: bool = Field(default=False, alias="noDisplay")
|
||||
shared_directories: Optional[list[SharedDirectory]] = Field(
|
||||
default=None, alias="sharedDirectories"
|
||||
)
|
||||
|
||||
model_config = ConfigDict(
|
||||
populate_by_name=True,
|
||||
alias_generator=lambda s: "".join(
|
||||
word.capitalize() if i else word for i, word in enumerate(s.split("_"))
|
||||
),
|
||||
)
|
||||
|
||||
def model_dump(self, **kwargs):
|
||||
"""Export model data with proper field name conversion.
|
||||
|
||||
Converts shared directory fields to match API expectations when using aliases.
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments passed to parent model_dump method
|
||||
|
||||
Returns:
|
||||
dict: Model data with properly formatted field names
|
||||
"""
|
||||
data = super().model_dump(**kwargs)
|
||||
# Convert shared directory fields to match API expectations
|
||||
if self.shared_directories and "by_alias" in kwargs and kwargs["by_alias"]:
|
||||
data["sharedDirectories"] = [
|
||||
{"hostPath": d.host_path, "readOnly": d.read_only} for d in self.shared_directories
|
||||
]
|
||||
# Remove the snake_case version if it exists
|
||||
data.pop("shared_directories", None)
|
||||
return data
|
||||
|
||||
|
||||
class VMStatus(BaseModel):
|
||||
"""Status information for a virtual machine.
|
||||
|
||||
Attributes:
|
||||
name: Name of the virtual machine
|
||||
status: Current status of the VM
|
||||
os: Operating system type
|
||||
cpu_count: Number of CPU cores allocated
|
||||
memory_size: Amount of memory allocated in bytes
|
||||
disk_size: Disk storage information
|
||||
vnc_url: URL for VNC connection if available
|
||||
ip_address: IP address of the VM if available
|
||||
"""
|
||||
|
||||
name: str
|
||||
status: str
|
||||
os: Literal["macOS", "linux"]
|
||||
cpu_count: int = Field(alias="cpuCount")
|
||||
memory_size: int = Field(alias="memorySize") # API returns memory size in bytes
|
||||
disk_size: DiskInfo = Field(alias="diskSize")
|
||||
vnc_url: Optional[str] = Field(default=None, alias="vncUrl")
|
||||
ip_address: Optional[str] = Field(default=None, alias="ipAddress")
|
||||
|
||||
class Config:
|
||||
populate_by_alias = True
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def state(self) -> str:
|
||||
"""Get the current state of the VM.
|
||||
|
||||
Returns:
|
||||
str: Current VM status
|
||||
"""
|
||||
return self.status
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def cpu(self) -> int:
|
||||
"""Get the number of CPU cores.
|
||||
|
||||
Returns:
|
||||
int: Number of CPU cores allocated to the VM
|
||||
"""
|
||||
return self.cpu_count
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def memory(self) -> str:
|
||||
"""Get memory allocation in human-readable format.
|
||||
|
||||
Returns:
|
||||
str: Memory size formatted as "{size}GB"
|
||||
"""
|
||||
# Convert bytes to GB
|
||||
gb = self.memory_size / (1024 * 1024 * 1024)
|
||||
return f"{int(gb)}GB"
|
||||
|
||||
|
||||
class VMUpdateOpts(BaseModel):
|
||||
"""Options for updating VM configuration.
|
||||
|
||||
Attributes:
|
||||
cpu: Number of CPU cores to update to
|
||||
memory: Amount of memory to update to with units
|
||||
disk_size: Size of disk to update to with units
|
||||
"""
|
||||
|
||||
cpu: Optional[int] = None
|
||||
memory: Optional[str] = None
|
||||
disk_size: Optional[str] = None
|
||||
|
||||
|
||||
class ImageRef(BaseModel):
|
||||
"""Reference to a VM image.
|
||||
|
||||
Attributes:
|
||||
image: Name of the image
|
||||
tag: Tag version of the image
|
||||
registry: Registry hostname where image is stored
|
||||
organization: Organization or namespace in the registry
|
||||
"""
|
||||
|
||||
image: str
|
||||
tag: str = "latest"
|
||||
registry: Optional[str] = "ghcr.io"
|
||||
organization: Optional[str] = "trycua"
|
||||
|
||||
def model_dump(self, **kwargs):
|
||||
"""Override model_dump to return just the image:tag format.
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments (ignored)
|
||||
|
||||
Returns:
|
||||
str: Image reference in "image:tag" format
|
||||
"""
|
||||
return f"{self.image}:{self.tag}"
|
||||
|
||||
|
||||
class CloneSpec(BaseModel):
|
||||
"""Specification for cloning a VM.
|
||||
|
||||
Attributes:
|
||||
name: Name of the source VM to clone
|
||||
new_name: Name for the new cloned VM
|
||||
"""
|
||||
|
||||
name: str
|
||||
new_name: str = Field(alias="newName")
|
||||
|
||||
class Config:
|
||||
populate_by_alias = True
|
||||
|
||||
|
||||
class ImageInfo(BaseModel):
|
||||
"""Model for individual image information.
|
||||
|
||||
Attributes:
|
||||
imageId: Unique identifier for the image
|
||||
"""
|
||||
|
||||
imageId: str
|
||||
|
||||
|
||||
class ImageList(RootModel):
|
||||
"""Response model for the images endpoint.
|
||||
|
||||
A list-like container for ImageInfo objects that provides
|
||||
iteration and indexing capabilities.
|
||||
"""
|
||||
|
||||
root: List[ImageInfo]
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate over the image list.
|
||||
|
||||
Returns:
|
||||
Iterator over ImageInfo objects
|
||||
"""
|
||||
return iter(self.root)
|
||||
|
||||
def __getitem__(self, item):
|
||||
"""Get an item from the image list by index.
|
||||
|
||||
Args:
|
||||
item: Index or slice to retrieve
|
||||
|
||||
Returns:
|
||||
ImageInfo or list of ImageInfo objects
|
||||
"""
|
||||
return self.root[item]
|
||||
|
||||
def __len__(self):
|
||||
"""Get the number of images in the list.
|
||||
|
||||
Returns:
|
||||
int: Number of images in the list
|
||||
"""
|
||||
return len(self.root)
|
||||
@@ -1,315 +0,0 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from functools import wraps
|
||||
from typing import Any, Callable, List, Optional, TypeVar, Union
|
||||
|
||||
from .client import LumeClient
|
||||
from .exceptions import (
|
||||
LumeConfigError,
|
||||
LumeConnectionError,
|
||||
LumeError,
|
||||
LumeImageError,
|
||||
LumeNotFoundError,
|
||||
LumeServerError,
|
||||
LumeTimeoutError,
|
||||
LumeVMError,
|
||||
)
|
||||
from .models import (
|
||||
CloneSpec,
|
||||
ImageList,
|
||||
ImageRef,
|
||||
SharedDirectory,
|
||||
VMConfig,
|
||||
VMRunOpts,
|
||||
VMStatus,
|
||||
VMUpdateOpts,
|
||||
)
|
||||
from .server import LumeServer
|
||||
|
||||
# Type variable for the decorator
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def ensure_server(func: Callable[..., T]) -> Callable[..., T]:
|
||||
"""Decorator to ensure server is running before executing the method."""
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(self: "PyLume", *args: Any, **kwargs: Any) -> T:
|
||||
# ensure_running is an async method, so we need to await it
|
||||
await self.server.ensure_running()
|
||||
# Initialize client if needed
|
||||
await self._init_client()
|
||||
return await func(self, *args, **kwargs) # type: ignore
|
||||
|
||||
return wrapper # type: ignore
|
||||
|
||||
|
||||
class PyLume:
|
||||
def __init__(
|
||||
self,
|
||||
debug: bool = False,
|
||||
server_start_timeout: int = 60,
|
||||
port: Optional[int] = None,
|
||||
use_existing_server: bool = False,
|
||||
host: str = "localhost",
|
||||
):
|
||||
"""Initialize the async PyLume client.
|
||||
|
||||
Args:
|
||||
debug: Enable debug logging
|
||||
auto_start_server: Whether to automatically start the lume server if not running
|
||||
server_start_timeout: Timeout in seconds to wait for server to start
|
||||
port: Port number for the lume server. Required when use_existing_server is True.
|
||||
use_existing_server: If True, will try to connect to an existing server on the specified port
|
||||
instead of starting a new one.
|
||||
host: Host to use for connections (e.g., "localhost", "127.0.0.1", "host.docker.internal")
|
||||
"""
|
||||
if use_existing_server and port is None:
|
||||
raise LumeConfigError("Port must be specified when using an existing server")
|
||||
|
||||
self.server = LumeServer(
|
||||
debug=debug,
|
||||
server_start_timeout=server_start_timeout,
|
||||
port=port,
|
||||
use_existing_server=use_existing_server,
|
||||
host=host,
|
||||
)
|
||||
self.client = None
|
||||
|
||||
async def __aenter__(self) -> "PyLume":
|
||||
"""Async context manager entry."""
|
||||
if self.server.use_existing_server:
|
||||
# Just ensure base_url is set for existing server
|
||||
if self.server.requested_port is None:
|
||||
raise LumeConfigError("Port must be specified when using an existing server")
|
||||
|
||||
if not self.server.base_url:
|
||||
self.server.port = self.server.requested_port
|
||||
self.server.base_url = f"http://{self.server.host}:{self.server.port}/lume"
|
||||
|
||||
# Ensure the server is running (will connect to existing or start new as needed)
|
||||
await self.server.ensure_running()
|
||||
|
||||
# Initialize the client
|
||||
await self._init_client()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||
"""Async context manager exit."""
|
||||
if self.client is not None:
|
||||
await self.client.close()
|
||||
await self.server.stop()
|
||||
|
||||
async def _init_client(self) -> None:
|
||||
"""Initialize the client if not already initialized."""
|
||||
if self.client is None:
|
||||
if self.server.base_url is None:
|
||||
raise RuntimeError("Server base URL not set")
|
||||
self.client = LumeClient(self.server.base_url, debug=self.server.debug)
|
||||
|
||||
def _log_debug(self, message: str, **kwargs) -> None:
|
||||
"""Log debug information if debug mode is enabled."""
|
||||
if self.server.debug:
|
||||
print(f"DEBUG: {message}")
|
||||
if kwargs:
|
||||
print(json.dumps(kwargs, indent=2))
|
||||
|
||||
async def _handle_api_error(self, e: Exception, operation: str) -> None:
|
||||
"""Handle API errors and raise appropriate custom exceptions."""
|
||||
if isinstance(e, subprocess.SubprocessError):
|
||||
raise LumeConnectionError(f"Failed to connect to PyLume server: {str(e)}")
|
||||
elif isinstance(e, asyncio.TimeoutError):
|
||||
raise LumeTimeoutError(f"Request timed out: {str(e)}")
|
||||
|
||||
if not hasattr(e, "status") and not isinstance(e, subprocess.CalledProcessError):
|
||||
raise LumeServerError(f"Unknown error during {operation}: {str(e)}")
|
||||
|
||||
status_code = getattr(e, "status", 500)
|
||||
response_text = str(e)
|
||||
|
||||
self._log_debug(
|
||||
f"{operation} request failed", status_code=status_code, response_text=response_text
|
||||
)
|
||||
|
||||
if status_code == 404:
|
||||
raise LumeNotFoundError(f"Resource not found during {operation}")
|
||||
elif status_code == 400:
|
||||
raise LumeConfigError(f"Invalid configuration for {operation}: {response_text}")
|
||||
elif status_code >= 500:
|
||||
raise LumeServerError(
|
||||
f"Server error during {operation}",
|
||||
status_code=status_code,
|
||||
response_text=response_text,
|
||||
)
|
||||
else:
|
||||
raise LumeServerError(
|
||||
f"Error during {operation}", status_code=status_code, response_text=response_text
|
||||
)
|
||||
|
||||
async def _read_output(self) -> None:
|
||||
"""Read and log server output."""
|
||||
try:
|
||||
while True:
|
||||
if not self.server.server_process or self.server.server_process.poll() is not None:
|
||||
self._log_debug("Server process ended")
|
||||
break
|
||||
|
||||
# Read stdout without blocking
|
||||
if self.server.server_process.stdout:
|
||||
while True:
|
||||
line = self.server.server_process.stdout.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.strip()
|
||||
self._log_debug(f"Server stdout: {line}")
|
||||
if "Server started" in line.decode("utf-8"):
|
||||
self._log_debug("Detected server started message")
|
||||
return
|
||||
|
||||
# Read stderr without blocking
|
||||
if self.server.server_process.stderr:
|
||||
while True:
|
||||
line = self.server.server_process.stderr.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.strip()
|
||||
self._log_debug(f"Server stderr: {line}")
|
||||
if "error" in line.decode("utf-8").lower():
|
||||
raise RuntimeError(f"Server error: {line}")
|
||||
|
||||
await asyncio.sleep(0.1) # Small delay to prevent CPU spinning
|
||||
except Exception as e:
|
||||
self._log_debug(f"Error in output reader: {str(e)}")
|
||||
raise
|
||||
|
||||
@ensure_server
|
||||
async def create_vm(self, spec: Union[VMConfig, dict]) -> None:
|
||||
"""Create a VM with the given configuration."""
|
||||
# Ensure client is initialized
|
||||
await self._init_client()
|
||||
|
||||
if isinstance(spec, VMConfig):
|
||||
spec = spec.model_dump(by_alias=True, exclude_none=True)
|
||||
|
||||
# Suppress optional attribute access errors
|
||||
self.client.print_curl("POST", "/vms", spec) # type: ignore[attr-defined]
|
||||
await self.client.post("/vms", spec) # type: ignore[attr-defined]
|
||||
|
||||
@ensure_server
|
||||
async def run_vm(self, name: str, opts: Optional[Union[VMRunOpts, dict]] = None) -> None:
|
||||
"""Run a VM."""
|
||||
if opts is None:
|
||||
opts = VMRunOpts(no_display=False) # type: ignore[attr-defined]
|
||||
elif isinstance(opts, dict):
|
||||
opts = VMRunOpts(**opts)
|
||||
|
||||
payload = opts.model_dump(by_alias=True, exclude_none=True)
|
||||
self.client.print_curl("POST", f"/vms/{name}/run", payload) # type: ignore[attr-defined]
|
||||
await self.client.post(f"/vms/{name}/run", payload) # type: ignore[attr-defined]
|
||||
|
||||
@ensure_server
|
||||
async def list_vms(self) -> List[VMStatus]:
|
||||
"""List all VMs."""
|
||||
data = await self.client.get("/vms") # type: ignore[attr-defined]
|
||||
return [VMStatus.model_validate(vm) for vm in data]
|
||||
|
||||
@ensure_server
|
||||
async def get_vm(self, name: str) -> VMStatus:
|
||||
"""Get VM details."""
|
||||
data = await self.client.get(f"/vms/{name}") # type: ignore[attr-defined]
|
||||
return VMStatus.model_validate(data)
|
||||
|
||||
@ensure_server
|
||||
async def update_vm(self, name: str, params: Union[VMUpdateOpts, dict]) -> None:
|
||||
"""Update VM settings."""
|
||||
if isinstance(params, dict):
|
||||
params = VMUpdateOpts(**params)
|
||||
|
||||
payload = params.model_dump(by_alias=True, exclude_none=True)
|
||||
self.client.print_curl("PATCH", f"/vms/{name}", payload) # type: ignore[attr-defined]
|
||||
await self.client.patch(f"/vms/{name}", payload) # type: ignore[attr-defined]
|
||||
|
||||
@ensure_server
|
||||
async def stop_vm(self, name: str) -> None:
|
||||
"""Stop a VM."""
|
||||
await self.client.post(f"/vms/{name}/stop") # type: ignore[attr-defined]
|
||||
|
||||
@ensure_server
|
||||
async def delete_vm(self, name: str) -> None:
|
||||
"""Delete a VM."""
|
||||
await self.client.delete(f"/vms/{name}") # type: ignore[attr-defined]
|
||||
|
||||
@ensure_server
|
||||
async def pull_image(
|
||||
self, spec: Union[ImageRef, dict, str], name: Optional[str] = None
|
||||
) -> None:
|
||||
"""Pull a VM image."""
|
||||
await self._init_client()
|
||||
if isinstance(spec, str):
|
||||
if ":" in spec:
|
||||
image_str = spec
|
||||
else:
|
||||
image_str = f"{spec}:latest"
|
||||
registry = "ghcr.io"
|
||||
organization = "trycua"
|
||||
elif isinstance(spec, dict):
|
||||
image = spec.get("image", "")
|
||||
tag = spec.get("tag", "latest")
|
||||
image_str = f"{image}:{tag}"
|
||||
registry = spec.get("registry", "ghcr.io")
|
||||
organization = spec.get("organization", "trycua")
|
||||
else:
|
||||
image_str = f"{spec.image}:{spec.tag}"
|
||||
registry = spec.registry
|
||||
organization = spec.organization
|
||||
|
||||
payload = {
|
||||
"image": image_str,
|
||||
"name": name,
|
||||
"registry": registry,
|
||||
"organization": organization,
|
||||
}
|
||||
|
||||
self.client.print_curl("POST", "/pull", payload) # type: ignore[attr-defined]
|
||||
await self.client.post("/pull", payload, timeout=300.0) # type: ignore[attr-defined]
|
||||
|
||||
@ensure_server
|
||||
async def clone_vm(self, name: str, new_name: str) -> None:
|
||||
"""Clone a VM with the given name to a new VM with new_name."""
|
||||
config = CloneSpec(name=name, newName=new_name)
|
||||
self.client.print_curl("POST", "/vms/clone", config.model_dump()) # type: ignore[attr-defined]
|
||||
await self.client.post("/vms/clone", config.model_dump()) # type: ignore[attr-defined]
|
||||
|
||||
@ensure_server
|
||||
async def get_latest_ipsw_url(self) -> str:
|
||||
"""Get the latest IPSW URL."""
|
||||
await self._init_client()
|
||||
data = await self.client.get("/ipsw") # type: ignore[attr-defined]
|
||||
return data["url"]
|
||||
|
||||
@ensure_server
|
||||
async def get_images(self, organization: Optional[str] = None) -> ImageList:
|
||||
"""Get list of available images."""
|
||||
await self._init_client()
|
||||
params = {"organization": organization} if organization else None
|
||||
data = await self.client.get("/images", params) # type: ignore[attr-defined]
|
||||
return ImageList(root=data)
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the client and stop the server."""
|
||||
if self.client is not None:
|
||||
await self.client.close()
|
||||
self.client = None
|
||||
await asyncio.sleep(1)
|
||||
await self.server.stop()
|
||||
|
||||
async def _ensure_client(self) -> None:
|
||||
"""Ensure client is initialized."""
|
||||
if self.client is None:
|
||||
await self._init_client()
|
||||
@@ -1,481 +0,0 @@
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import shlex
|
||||
import signal
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from logging import getLogger
|
||||
from typing import Optional
|
||||
|
||||
from .exceptions import LumeConnectionError
|
||||
|
||||
|
||||
class LumeServer:
|
||||
def __init__(
|
||||
self,
|
||||
debug: bool = False,
|
||||
server_start_timeout: int = 60,
|
||||
port: Optional[int] = None,
|
||||
use_existing_server: bool = False,
|
||||
host: str = "localhost",
|
||||
):
|
||||
"""Initialize the LumeServer.
|
||||
|
||||
Args:
|
||||
debug: Enable debug logging
|
||||
server_start_timeout: Timeout in seconds to wait for server to start
|
||||
port: Specific port to use for the server
|
||||
use_existing_server: If True, will try to connect to an existing server
|
||||
instead of starting a new one
|
||||
host: Host to use for connections (e.g., "localhost", "127.0.0.1", "host.docker.internal")
|
||||
"""
|
||||
self.debug = debug
|
||||
self.server_start_timeout = server_start_timeout
|
||||
self.server_process = None
|
||||
self.output_file = None
|
||||
self.requested_port = port
|
||||
self.port = None
|
||||
self.base_url = None
|
||||
self.use_existing_server = use_existing_server
|
||||
self.host = host
|
||||
|
||||
# Configure logging
|
||||
self.logger = getLogger("pylume.server")
|
||||
if not self.logger.handlers:
|
||||
handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
handler.setFormatter(formatter)
|
||||
self.logger.addHandler(handler)
|
||||
self.logger.setLevel(logging.DEBUG if debug else logging.INFO)
|
||||
|
||||
self.logger.debug(f"Server initialized with host: {self.host}")
|
||||
|
||||
def _check_port_available(self, port: int) -> bool:
|
||||
"""Check if a port is available."""
|
||||
try:
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.settimeout(0.5)
|
||||
result = s.connect_ex(("127.0.0.1", port))
|
||||
if result == 0: # Port is in use on localhost
|
||||
return False
|
||||
except:
|
||||
pass
|
||||
|
||||
# Check the specified host (e.g., "host.docker.internal") if it's not a localhost alias
|
||||
if self.host not in ["localhost", "127.0.0.1"]:
|
||||
try:
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.settimeout(0.5)
|
||||
result = s.connect_ex((self.host, port))
|
||||
if result == 0: # Port is in use on host
|
||||
return False
|
||||
except:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
def _get_server_port(self) -> int:
|
||||
"""Get an available port for the server."""
|
||||
# Use requested port if specified
|
||||
if self.requested_port is not None:
|
||||
if not self._check_port_available(self.requested_port):
|
||||
raise RuntimeError(f"Requested port {self.requested_port} is not available")
|
||||
return self.requested_port
|
||||
|
||||
# Find a free port
|
||||
for _ in range(10): # Try up to 10 times
|
||||
port = random.randint(49152, 65535)
|
||||
if self._check_port_available(port):
|
||||
return port
|
||||
|
||||
raise RuntimeError("Could not find an available port")
|
||||
|
||||
async def _ensure_server_running(self) -> None:
|
||||
"""Ensure the lume server is running, start it if it's not."""
|
||||
try:
|
||||
self.logger.debug("Checking if lume server is running...")
|
||||
# Try to connect to the server with a short timeout
|
||||
cmd = ["curl", "-s", "-w", "%{http_code}", "-m", "5", f"{self.base_url}/vms"]
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode == 0:
|
||||
response = stdout.decode()
|
||||
status_code = int(response[-3:])
|
||||
if status_code == 200:
|
||||
self.logger.debug("PyLume server is running")
|
||||
return
|
||||
|
||||
self.logger.debug("PyLume server not running, attempting to start it")
|
||||
# Server not running, try to start it
|
||||
lume_path = os.path.join(os.path.dirname(__file__), "lume")
|
||||
if not os.path.exists(lume_path):
|
||||
raise RuntimeError(f"Could not find lume binary at {lume_path}")
|
||||
|
||||
# Make sure the file is executable
|
||||
os.chmod(lume_path, 0o755)
|
||||
|
||||
# Create a temporary file for server output
|
||||
self.output_file = tempfile.NamedTemporaryFile(mode="w+", delete=False)
|
||||
self.logger.debug(f"Using temporary file for server output: {self.output_file.name}")
|
||||
|
||||
# Start the server
|
||||
self.logger.debug(f"Starting lume server with: {lume_path} serve --port {self.port}")
|
||||
|
||||
# Start server in background using subprocess.Popen
|
||||
try:
|
||||
self.server_process = subprocess.Popen(
|
||||
[lume_path, "serve", "--port", str(self.port)],
|
||||
stdout=self.output_file,
|
||||
stderr=self.output_file,
|
||||
cwd=os.path.dirname(lume_path),
|
||||
start_new_session=True, # Run in new session to avoid blocking
|
||||
)
|
||||
except Exception as e:
|
||||
self.output_file.close()
|
||||
os.unlink(self.output_file.name)
|
||||
raise RuntimeError(f"Failed to start lume server process: {str(e)}")
|
||||
|
||||
# Wait for server to start
|
||||
self.logger.debug(
|
||||
f"Waiting up to {self.server_start_timeout} seconds for server to start..."
|
||||
)
|
||||
start_time = time.time()
|
||||
server_ready = False
|
||||
last_size = 0
|
||||
|
||||
while time.time() - start_time < self.server_start_timeout:
|
||||
if self.server_process.poll() is not None:
|
||||
# Process has terminated
|
||||
self.output_file.seek(0)
|
||||
output = self.output_file.read()
|
||||
self.output_file.close()
|
||||
os.unlink(self.output_file.name)
|
||||
error_msg = (
|
||||
f"Server process terminated unexpectedly.\n"
|
||||
f"Exit code: {self.server_process.returncode}\n"
|
||||
f"Output: {output}"
|
||||
)
|
||||
raise RuntimeError(error_msg)
|
||||
|
||||
# Check output file for server ready message
|
||||
self.output_file.seek(0, os.SEEK_END)
|
||||
size = self.output_file.tell()
|
||||
if size > last_size: # Only read if there's new content
|
||||
self.output_file.seek(last_size)
|
||||
new_output = self.output_file.read()
|
||||
if new_output.strip(): # Only log non-empty output
|
||||
self.logger.debug(f"Server output: {new_output.strip()}")
|
||||
last_size = size
|
||||
|
||||
if "Server started" in new_output:
|
||||
server_ready = True
|
||||
self.logger.debug("Server startup detected")
|
||||
break
|
||||
|
||||
# Try to connect to the server periodically
|
||||
try:
|
||||
cmd = ["curl", "-s", "-w", "%{http_code}", "-m", "5", f"{self.base_url}/vms"]
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode == 0:
|
||||
response = stdout.decode()
|
||||
status_code = int(response[-3:])
|
||||
if status_code == 200:
|
||||
server_ready = True
|
||||
self.logger.debug("Server is responding to requests")
|
||||
break
|
||||
except:
|
||||
pass # Server not ready yet
|
||||
|
||||
await asyncio.sleep(1.0)
|
||||
|
||||
if not server_ready:
|
||||
# Cleanup if server didn't start
|
||||
if self.server_process:
|
||||
self.server_process.terminate()
|
||||
try:
|
||||
self.server_process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.server_process.kill()
|
||||
self.output_file.close()
|
||||
os.unlink(self.output_file.name)
|
||||
raise RuntimeError(
|
||||
f"Failed to start lume server after {self.server_start_timeout} seconds. "
|
||||
"Check the debug output for more details."
|
||||
)
|
||||
|
||||
# Give the server a moment to fully initialize
|
||||
await asyncio.sleep(2.0)
|
||||
|
||||
# Verify server is responding
|
||||
try:
|
||||
cmd = ["curl", "-s", "-w", "%{http_code}", "-m", "10", f"{self.base_url}/vms"]
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode != 0:
|
||||
raise RuntimeError(f"Curl command failed: {stderr.decode()}")
|
||||
|
||||
response = stdout.decode()
|
||||
status_code = int(response[-3:])
|
||||
|
||||
if status_code != 200:
|
||||
raise RuntimeError(f"Server returned status code {status_code}")
|
||||
|
||||
self.logger.debug("PyLume server started successfully")
|
||||
except Exception as e:
|
||||
self.logger.debug(f"Server verification failed: {str(e)}")
|
||||
if self.server_process:
|
||||
self.server_process.terminate()
|
||||
try:
|
||||
self.server_process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.server_process.kill()
|
||||
self.output_file.close()
|
||||
os.unlink(self.output_file.name)
|
||||
raise RuntimeError(f"Server started but is not responding: {str(e)}")
|
||||
|
||||
self.logger.debug("Server startup completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to start lume server: {str(e)}")
|
||||
|
||||
async def _start_server(self) -> None:
|
||||
"""Start the lume server using the lume executable."""
|
||||
self.logger.debug("Starting PyLume server")
|
||||
|
||||
# Get absolute path to lume executable in the same directory as this file
|
||||
lume_path = os.path.join(os.path.dirname(__file__), "lume")
|
||||
if not os.path.exists(lume_path):
|
||||
raise RuntimeError(f"Could not find lume binary at {lume_path}")
|
||||
|
||||
try:
|
||||
# Make executable
|
||||
os.chmod(lume_path, 0o755)
|
||||
|
||||
# Get and validate port
|
||||
self.port = self._get_server_port()
|
||||
self.base_url = f"http://{self.host}:{self.port}/lume"
|
||||
|
||||
# Set up output handling
|
||||
self.output_file = tempfile.NamedTemporaryFile(mode="w+", delete=False)
|
||||
|
||||
# Start the server process with the lume executable
|
||||
env = os.environ.copy()
|
||||
env["RUST_BACKTRACE"] = "1" # Enable backtrace for better error reporting
|
||||
|
||||
# Specify the host to bind to (0.0.0.0 to allow external connections)
|
||||
self.server_process = subprocess.Popen(
|
||||
[lume_path, "serve", "--port", str(self.port)],
|
||||
stdout=self.output_file,
|
||||
stderr=subprocess.STDOUT,
|
||||
cwd=os.path.dirname(lume_path), # Run from same directory as executable
|
||||
env=env,
|
||||
)
|
||||
|
||||
# Wait for server to initialize
|
||||
await asyncio.sleep(2)
|
||||
await self._wait_for_server()
|
||||
|
||||
except Exception as e:
|
||||
await self._cleanup()
|
||||
raise RuntimeError(f"Failed to start lume server process: {str(e)}")
|
||||
|
||||
async def _tail_log(self) -> None:
|
||||
"""Read and display server log output in debug mode."""
|
||||
while True:
|
||||
try:
|
||||
self.output_file.seek(0, os.SEEK_END) # type: ignore[attr-defined]
|
||||
line = self.output_file.readline() # type: ignore[attr-defined]
|
||||
if line:
|
||||
line = line.strip()
|
||||
if line:
|
||||
print(f"SERVER: {line}")
|
||||
if self.server_process.poll() is not None: # type: ignore[attr-defined]
|
||||
print("Server process ended")
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
except Exception as e:
|
||||
print(f"Error reading log: {e}")
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
async def _wait_for_server(self) -> None:
|
||||
"""Wait for server to start and become responsive with increased timeout."""
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < self.server_start_timeout:
|
||||
if self.server_process.poll() is not None: # type: ignore[attr-defined]
|
||||
error_msg = await self._get_error_output()
|
||||
await self._cleanup()
|
||||
raise RuntimeError(error_msg)
|
||||
|
||||
try:
|
||||
await self._verify_server()
|
||||
self.logger.debug("Server is now responsive")
|
||||
return
|
||||
except Exception as e:
|
||||
self.logger.debug(f"Server not ready yet: {str(e)}")
|
||||
await asyncio.sleep(1.0)
|
||||
|
||||
await self._cleanup()
|
||||
raise RuntimeError(f"Server failed to start after {self.server_start_timeout} seconds")
|
||||
|
||||
async def _verify_server(self) -> None:
|
||||
"""Verify server is responding to requests."""
|
||||
try:
|
||||
cmd = [
|
||||
"curl",
|
||||
"-s",
|
||||
"-w",
|
||||
"%{http_code}",
|
||||
"-m",
|
||||
"10",
|
||||
f"http://{self.host}:{self.port}/lume/vms",
|
||||
]
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
if process.returncode != 0:
|
||||
raise RuntimeError(f"Curl command failed: {stderr.decode()}")
|
||||
|
||||
response = stdout.decode()
|
||||
status_code = int(response[-3:])
|
||||
|
||||
if status_code != 200:
|
||||
raise RuntimeError(f"Server returned status code {status_code}")
|
||||
|
||||
self.logger.debug("PyLume server started successfully")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Server not responding: {str(e)}")
|
||||
|
||||
async def _get_error_output(self) -> str:
|
||||
"""Get error output from the server process."""
|
||||
if not self.output_file:
|
||||
return "No output available"
|
||||
self.output_file.seek(0)
|
||||
output = self.output_file.read()
|
||||
return (
|
||||
f"Server process terminated unexpectedly.\n"
|
||||
f"Exit code: {self.server_process.returncode}\n" # type: ignore[attr-defined]
|
||||
f"Output: {output}"
|
||||
)
|
||||
|
||||
async def _cleanup(self) -> None:
|
||||
"""Clean up all server resources."""
|
||||
if self.server_process:
|
||||
try:
|
||||
self.server_process.terminate()
|
||||
try:
|
||||
self.server_process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.server_process.kill()
|
||||
except:
|
||||
pass
|
||||
self.server_process = None
|
||||
|
||||
# Clean up output file
|
||||
if self.output_file:
|
||||
try:
|
||||
self.output_file.close()
|
||||
os.unlink(self.output_file.name)
|
||||
except Exception as e:
|
||||
self.logger.debug(f"Error cleaning up output file: {e}")
|
||||
self.output_file = None
|
||||
|
||||
async def ensure_running(self) -> None:
|
||||
"""Ensure the server is running.
|
||||
|
||||
If use_existing_server is True, will only try to connect to an existing server.
|
||||
Otherwise will:
|
||||
1. Try to connect to an existing server on the specified port
|
||||
2. If that fails and not in Docker, start a new server
|
||||
3. If in Docker and no existing server is found, raise an error
|
||||
"""
|
||||
# First check if we're in Docker
|
||||
in_docker = os.path.exists("/.dockerenv") or (
|
||||
os.path.exists("/proc/1/cgroup") and "docker" in open("/proc/1/cgroup", "r").read()
|
||||
)
|
||||
|
||||
# If using a non-localhost host like host.docker.internal, set up the connection details
|
||||
if self.host not in ["localhost", "127.0.0.1"]:
|
||||
if self.requested_port is None:
|
||||
raise RuntimeError("Port must be specified when using a remote host")
|
||||
|
||||
self.port = self.requested_port
|
||||
self.base_url = f"http://{self.host}:{self.port}/lume"
|
||||
self.logger.debug(f"Using remote host server at {self.base_url}")
|
||||
|
||||
# Try to verify the server is accessible
|
||||
try:
|
||||
await self._verify_server()
|
||||
self.logger.debug("Successfully connected to remote server")
|
||||
return
|
||||
except Exception as e:
|
||||
if self.use_existing_server or in_docker:
|
||||
# If explicitly requesting an existing server or in Docker, we can't start a new one
|
||||
raise RuntimeError(
|
||||
f"Failed to connect to remote server at {self.base_url}: {str(e)}"
|
||||
)
|
||||
else:
|
||||
self.logger.debug(f"Remote server not available at {self.base_url}: {str(e)}")
|
||||
# Fall back to localhost for starting a new server
|
||||
self.host = "localhost"
|
||||
|
||||
# If explicitly using an existing server, verify it's running
|
||||
if self.use_existing_server:
|
||||
if self.requested_port is None:
|
||||
raise RuntimeError("Port must be specified when using an existing server")
|
||||
|
||||
self.port = self.requested_port
|
||||
self.base_url = f"http://{self.host}:{self.port}/lume"
|
||||
|
||||
try:
|
||||
await self._verify_server()
|
||||
self.logger.debug("Successfully connected to existing server")
|
||||
except Exception as e:
|
||||
raise RuntimeError(
|
||||
f"Failed to connect to existing server at {self.base_url}: {str(e)}"
|
||||
)
|
||||
else:
|
||||
# Try to connect to an existing server first
|
||||
if self.requested_port is not None:
|
||||
self.port = self.requested_port
|
||||
self.base_url = f"http://{self.host}:{self.port}/lume"
|
||||
|
||||
try:
|
||||
await self._verify_server()
|
||||
self.logger.debug("Successfully connected to existing server")
|
||||
return
|
||||
except Exception:
|
||||
self.logger.debug(f"No existing server found at {self.base_url}")
|
||||
|
||||
# If in Docker and can't connect to existing server, raise an error
|
||||
if in_docker:
|
||||
raise RuntimeError(
|
||||
f"Failed to connect to server at {self.base_url} and cannot start a new server in Docker"
|
||||
)
|
||||
|
||||
# Start a new server
|
||||
self.logger.debug("Starting a new server instance")
|
||||
await self._start_server()
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stop the server if we're managing it."""
|
||||
if not self.use_existing_server:
|
||||
self.logger.debug("Stopping lume server...")
|
||||
await self._cleanup()
|
||||
@@ -1,51 +0,0 @@
|
||||
[build-system]
|
||||
build-backend = "pdm.backend"
|
||||
requires = ["pdm-backend"]
|
||||
|
||||
[project]
|
||||
authors = [{ name = "TryCua", email = "gh@trycua.com" }]
|
||||
classifiers = [
|
||||
"Intended Audience :: Developers",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: MacOS :: MacOS X",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
]
|
||||
dependencies = ["pydantic>=2.11.1"]
|
||||
description = "Python SDK for lume - run macOS and Linux VMs on Apple Silicon"
|
||||
dynamic = ["version"]
|
||||
keywords = ["apple-silicon", "macos", "virtualization", "vm"]
|
||||
license = { text = "MIT" }
|
||||
name = "pylume"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
|
||||
[tool.pdm.version]
|
||||
path = "pylume/__init__.py"
|
||||
source = "file"
|
||||
|
||||
[project.urls]
|
||||
homepage = "https://github.com/trycua/pylume"
|
||||
repository = "https://github.com/trycua/pylume"
|
||||
|
||||
[tool.pdm]
|
||||
distribution = true
|
||||
|
||||
[tool.pdm.dev-dependencies]
|
||||
dev = [
|
||||
"black>=23.0.0",
|
||||
"isort>=5.12.0",
|
||||
"pytest-asyncio>=0.23.0",
|
||||
"pytest>=7.0.0",
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
asyncio_mode = "auto"
|
||||
python_files = "test_*.py"
|
||||
testpaths = ["tests"]
|
||||
|
||||
[tool.pdm.build]
|
||||
includes = ["pylume/"]
|
||||
source-includes = ["LICENSE", "README.md", "tests/"]
|
||||
23
libs/python/pylume/tests/conftest.py
Normal file
23
libs/python/pylume/tests/conftest.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""Pytest configuration for pylume tests.
|
||||
|
||||
This module provides test fixtures for the pylume package.
|
||||
Note: This package has macOS-specific dependencies and will skip tests
|
||||
if the required modules are not available.
|
||||
"""
|
||||
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_subprocess():
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = Mock(returncode=0, stdout="", stderr="")
|
||||
yield mock_run
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_requests():
|
||||
with patch("requests.get") as mock_get, patch("requests.post") as mock_post:
|
||||
yield {"get": mock_get, "post": mock_post}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user