From 2daa6a9df880c83a41b4dd018124d9213685879d Mon Sep 17 00:00:00 2001 From: bowman Date: Mon, 6 Oct 2025 17:54:23 -0700 Subject: [PATCH 01/37] bump hud version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index baa2567a..874d9ed2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dev = [ "mypy>=1.10.0", "ruff>=0.9.2", "types-requests>=2.31.0", - "hud-python[agent]==0.4.26" + "hud-python[agent]==0.4.52" ] docs = ["mkdocs-material>=9.2.0", "mkdocs>=1.5.0"] test = [ From 1fe41d57f4f128ab33f2f7f93380ef498934a1e9 Mon Sep 17 00:00:00 2001 From: bowman Date: Mon, 6 Oct 2025 20:24:21 -0700 Subject: [PATCH 02/37] update hud in agent pyproject.toml --- libs/python/agent/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/python/agent/pyproject.toml b/libs/python/agent/pyproject.toml index 6fea439c..718b8404 100644 --- a/libs/python/agent/pyproject.toml +++ b/libs/python/agent/pyproject.toml @@ -68,7 +68,7 @@ cli = [ "yaspin>=3.1.0", ] hud = [ - "hud-python==0.4.26", + "hud-python==0.4.52", ] all = [ # uitars requirements @@ -88,7 +88,7 @@ all = [ # cli requirements "yaspin>=3.1.0", # hud requirements - "hud-python==0.4.26", + "hud-python==0.4.52", ] [tool.uv] From b0da328323621298f74e5a0fc8673689cdd9b9e4 Mon Sep 17 00:00:00 2001 From: bowman Date: Mon, 6 Oct 2025 20:27:01 -0700 Subject: [PATCH 03/37] switch demo to gpt-5 in eval_osworld.ipynb --- notebooks/eval_osworld.ipynb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/notebooks/eval_osworld.ipynb b/notebooks/eval_osworld.ipynb index 1bc58e48..27a56020 100644 --- a/notebooks/eval_osworld.ipynb +++ b/notebooks/eval_osworld.ipynb @@ -39,6 +39,7 @@ "outputs": [], "source": [ "from dotenv import load_dotenv\n", + "import os\n", "\n", "# Load environment variables from ../.env\n", "load_dotenv(dotenv_path='../.env')\n", @@ -47,6 +48,8 @@ "# - HUD_API_KEY (for HUD access)\n", "# - ANTHROPIC_API_KEY (for Claude models)\n", "# - OPENAI_API_KEY (for OpenAI models)\n", + "assert os.getenv('HUD_API_KEY') is not None\n", + "assert os.getenv('ANTHROPIC_API_KEY') is not None or os.getenv('OPENAI_API_KEY') is not None\n", "\n", "from pprint import pprint" ] @@ -72,7 +75,7 @@ "# You can swap \"hud-evals/OSWorld-Verified\" -> \"hud-evals/SheetBench-V2\" to test SheetBench.\n", "await run_single_task(\n", " dataset=\"hud-evals/OSWorld-Verified\",\n", - " model=\"openai/computer-use-preview+openai/gpt-5-nano\", # or any supported model string\n", + " model=\"openai/computer-use-preview+openai/gpt-5\", # or any supported model string\n", " task_id=155 # open last tab task (easy)\n", ")" ] From 78901aed48d5d0a9cb8abdaa81a9478ea079cbd0 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 7 Oct 2025 11:04:21 -0400 Subject: [PATCH 04/37] Add --provider and --prompt-file to the CLI --- libs/python/agent/agent/cli.py | 76 +++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 20 deletions(-) diff --git a/libs/python/agent/agent/cli.py b/libs/python/agent/agent/cli.py index 0ea840d2..7edc99e1 100644 --- a/libs/python/agent/agent/cli.py +++ b/libs/python/agent/agent/cli.py @@ -226,6 +226,13 @@ Examples: help="Model string (e.g., 'openai/computer-use-preview', 'anthropic/claude-3-5-sonnet-20241022')" ) + parser.add_argument( + "--provider", + choices=["cloud", "lume", "winsandbox", "docker"], + default="cloud", + help="Computer provider to use: cloud (default), lume, winsandbox, or docker" + ) + parser.add_argument( "--images", type=int, @@ -257,6 +264,12 @@ Examples: help="Initial prompt to send to the agent. Leave blank for interactive mode." ) + parser.add_argument( + "--prompt-file", + type=Path, + help="Path to a UTF-8 text file whose contents will be used as the initial prompt. If provided, overrides --prompt." + ) + parser.add_argument( "--predict-click", dest="predict_click", @@ -289,33 +302,35 @@ Examples: container_name = os.getenv("CUA_CONTAINER_NAME") cua_api_key = os.getenv("CUA_API_KEY") - # Prompt for missing environment variables + # Prompt for missing environment variables (container name always required) if not container_name: - print_colored("CUA_CONTAINER_NAME not set.", dim=True) - print_colored("You can get a CUA container at https://www.trycua.com/", dim=True) - container_name = input("Enter your CUA container name: ").strip() - if not container_name: - print_colored("❌ Container name is required.") - sys.exit(1) - - if not cua_api_key: + if args.provider == "cloud": + print_colored("CUA_CONTAINER_NAME not set.", dim=True) + print_colored("You can get a CUA container at https://www.trycua.com/", dim=True) + container_name = input("Enter your CUA container name: ").strip() + if not container_name: + print_colored("❌ Container name is required.") + sys.exit(1) + else: + container_name = "cli-sandbox" + + # Only require API key for cloud provider + if args.provider == "cloud" and not cua_api_key: print_colored("CUA_API_KEY not set.", dim=True) cua_api_key = input("Enter your CUA API key: ").strip() if not cua_api_key: - print_colored("❌ API key is required.") + print_colored("❌ API key is required for cloud provider.") sys.exit(1) # Check for provider-specific API keys based on model provider_api_keys = { "openai/": "OPENAI_API_KEY", "anthropic/": "ANTHROPIC_API_KEY", - "omniparser+": "OPENAI_API_KEY", - "omniparser+": "ANTHROPIC_API_KEY", } # Find matching provider and check for API key for prefix, env_var in provider_api_keys.items(): - if args.model.startswith(prefix): + if prefix in args.model: if not os.getenv(env_var): print_colored(f"{env_var} not set.", dim=True) api_key = input(f"Enter your {env_var.replace('_', ' ').title()}: ").strip() @@ -335,13 +350,25 @@ Examples: print_colored("Make sure agent and computer libraries are installed.", Colors.YELLOW) sys.exit(1) + # Resolve provider -> os_type, provider_type, api key requirement + provider_map = { + "cloud": ("linux", "cloud", True), + "lume": ("macos", "lume", False), + "winsandbox": ("windows", "winsandbox", False), + "docker": ("linux", "docker", False), + } + os_type, provider_type, needs_api_key = provider_map[args.provider] + + computer_kwargs = { + "os_type": os_type, + "provider_type": provider_type, + "name": container_name, + } + if needs_api_key: + computer_kwargs["api_key"] = cua_api_key # type: ignore + # Create computer instance - async with Computer( - os_type="linux", - provider_type="cloud", - name=container_name, - api_key=cua_api_key - ) as computer: + async with Computer(**computer_kwargs) as computer: # type: ignore # Create agent agent_kwargs = { @@ -442,8 +469,17 @@ Examples: # Done sys.exit(0) + # Resolve initial prompt from --prompt-file or --prompt + initial_prompt = args.prompt or "" + if args.prompt_file: + try: + initial_prompt = args.prompt_file.read_text(encoding="utf-8") + except Exception as e: + print_colored(f"❌ Failed to read --prompt-file: {e}", Colors.RED, bold=True) + sys.exit(1) + # Start chat loop (default interactive mode) - await chat_loop(agent, args.model, container_name, args.prompt, args.usage) + await chat_loop(agent, args.model, container_name, initial_prompt, args.usage) From f54d5537f90815a1119d30a496f65e997fadb8b4 Mon Sep 17 00:00:00 2001 From: James Murdza Date: Tue, 7 Oct 2025 15:29:16 -0700 Subject: [PATCH 05/37] Refactor the developer quickstart documentation --- docs/content/docs/quickstart-devs.mdx | 268 ++++++++++++++++++-------- 1 file changed, 192 insertions(+), 76 deletions(-) diff --git a/docs/content/docs/quickstart-devs.mdx b/docs/content/docs/quickstart-devs.mdx index 37367709..e61c4b9b 100644 --- a/docs/content/docs/quickstart-devs.mdx +++ b/docs/content/docs/quickstart-devs.mdx @@ -1,61 +1,60 @@ --- -title: Quickstart (for Developers) -description: Get started with cua in 5 steps +title: Quickstart +description: Get started with cua in four steps icon: Rocket --- import { Step, Steps } from 'fumadocs-ui/components/steps'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; -Get up and running with cua in 5 simple steps. +The steps below will guide you through the process of creating a computer environment, connecting to it programmatically, and automating tasks. - - -## Introduction - -cua combines Computer (interface) + Agent (AI) for automating desktop apps. Computer handles clicks/typing, Agent provides the intelligence. - - ## Set Up Your Computer Environment -Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup: +Choose how you want to run your Cua computer. This will be the environment where your automated tasks will execute. - - - - **Easiest & safest way to get started** +You can run your Cua computer in the cloud (recommended for easiest setup), locally on macOS with Lume, locally on Windows with a Windows Sandbox, or in a Docker container on any platform. Choose the option that matches your system and needs. + + + + Cua cloud containers are virtual machines that run Ubuntu. + 1. Go to [trycua.com/signin](https://www.trycua.com/signin) 2. Navigate to **Dashboard > Containers > Create Instance** 3. Create a **Medium, Ubuntu 22** container 4. Note your container name and API key - + Your cloud container will be automatically configured and ready to use. - + + + Lume containers are macOS virtual machines that run on a macOS host machine. - 1. Install lume cli + 1. Install the Lume CLI: ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" ``` - 2. Start a local cua container + 2. Start a local Cua container: ```bash lume run macos-sequoia-cua:latest ``` - + + + Windows Sandbox provides Windows virtual environments that run on a Windows host machine. - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) - 2. Install pywinsandbox dependency + 1. Enable [Windows Sandbox](https://learn.microsoft.com/en-us/windows/security/application-security/application-isolation/windows-sandbox/windows-sandbox-install) (requires Windows 10 Pro/Enterprise or Windows 11) + 2. Install the `pywinsandbox` dependency: ```bash pip install -U git+git://github.com/karkason/pywinsandbox.git @@ -64,11 +63,13 @@ Choose how you want to run your cua computer. **Cloud containers are recommended 3. Windows Sandbox will be automatically configured when you run the CLI - - - 1. Install Docker Desktop or Docker Engine + - 2. Pull the CUA Ubuntu container + Docker provides a way to run Ubuntu containers on any host machine. + + 1. Install Docker Desktop or Docker Engine: + + 2. Pull the CUA Ubuntu container: ```bash docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest @@ -81,81 +82,190 @@ Choose how you want to run your cua computer. **Cloud containers are recommended -## Install cua +## Using Computer + +Connect to your Cua computer and perform basic interactions, such as taking screenshots or simulating user input. + Install the Cua computer Python SDK: ```bash - pip install "cua-agent[all]" cua-computer - - # or install specific providers - pip install "cua-agent[openai]" # OpenAI computer-use-preview support - pip install "cua-agent[anthropic]" # Anthropic Claude support - pip install "cua-agent[omni]" # Omniparser + any LLM support - pip install "cua-agent[uitars]" # UI-TARS - pip install "cua-agent[uitars-mlx]" # UI-TARS + MLX support - pip install "cua-agent[uitars-hf]" # UI-TARS + Huggingface support - pip install "cua-agent[glm45v-hf]" # GLM-4.5V + Huggingface support - pip install "cua-agent[ui]" # Gradio UI support + pip install cua-computer ``` + + Then, connect to your desired computer environment: + + + + ```python + from computer import Computer + + computer = Computer( + os_type="linux", + provider_type="cloud", + name="your-container-name", + api_key="your-api-key" + ) + await computer.run() # Connect to the container + ``` + + + ```python + from computer import Computer + + computer = Computer( + os_type="macos", + provider_type="lume", + name="macos-sequoia-cua:latest" + ) + await computer.run() # Launch & connect to the container + ``` + + + ```python + from computer import Computer + + computer = Computer( + os_type="windows", + provider_type="windows_sandbox" + ) + await computer.run() # Launch & connect to the container + ``` + + + ```python + from computer import Computer + + computer = Computer( + os_type="linux", + provider_type="docker", + name="trycua/cua-ubuntu:latest" + ) + await computer.run() # Launch & connect to the container + ``` + + + Install and run `cua-computer-server`: + ```bash + pip install cua-computer-server + python -m computer_server + ``` + + Then, use the `Computer` object to connect: + ```python + from computer import Computer + + computer = Computer(use_host_computer_server=True) + await computer.run() # Connect to the host desktop + ``` + + + + Once connected, you can perform interactions: + ```python + try: + # Take a screenshot of the computer's current display + screenshot = await computer.interface.screenshot() + # Simulate a left-click at coordinates (100, 100) + await computer.interface.left_click(100, 100) + # Type "Hello!" into the active application + await computer.interface.type("Hello!") + finally: + await computer.close() + ``` + + You can automate these actions using an agent. + + Install the Cua computer TypeScript SDK: ```bash npm install @trycua/computer ``` - - - + Then, connect to your desired computer environment: - + + + ```typescript + import { Computer, OSType } from '@trycua/computer'; -## Using Computer + const computer = new Computer({ + osType: OSType.LINUX, + name: "your-container-name", + apiKey: "your-api-key" + }); + await computer.run(); // Connect to the container + ``` + + + ```typescript + import { Computer, OSType, ProviderType } from '@trycua/computer'; - - - ```python - from computer import Computer + const computer = new Computer({ + osType: OSType.MACOS, + providerType: ProviderType.LUME, + name: "macos-sequoia-cua:latest" + }); + await computer.run(); // Launch & connect to the container + ``` + + + ```typescript + import { Computer, OSType, ProviderType } from '@trycua/computer'; - async with Computer( - os_type="linux", - provider_type="cloud", - name="your-container-name", - api_key="your-api-key" - ) as computer: - # Take screenshot - screenshot = await computer.interface.screenshot() + const computer = new Computer({ + osType: OSType.WINDOWS, + providerType: ProviderType.WINDOWS_SANDBOX + }); + await computer.run(); // Launch & connect to the container + ``` + + + ```typescript + import { Computer, OSType, ProviderType } from '@trycua/computer'; - # Click and type - await computer.interface.left_click(100, 100) - await computer.interface.type("Hello!") - ``` + const computer = new Computer({ + osType: OSType.LINUX, + providerType: ProviderType.DOCKER, + name: "trycua/cua-ubuntu:latest" + }); + await computer.run(); // Launch & connect to the container + ``` + + + First, install and run `cua-computer-server`: + ```bash + pip install cua-computer-server + python -m computer_server + ``` - - + Then, use the `Computer` object to connect: + ```typescript + import { Computer } from '@trycua/computer'; + + const computer = new Computer({ useHostComputerServer: true }); + await computer.run(); // Connect to the host desktop + ``` + + + + Once connected, you can perform interactions: ```typescript - import { Computer, OSType } from '@trycua/computer'; - - const computer = new Computer({ - osType: OSType.LINUX, - name: "your-container-name", - apiKey: "your-api-key" - }); - - await computer.run(); - try { - // Take screenshot + // Take a screenshot of the computer's current display const screenshot = await computer.interface.screenshot(); - - // Click and type + // Simulate a left-click at coordinates (100, 100) await computer.interface.leftClick(100, 100); + // Type "Hello!" into the active application await computer.interface.typeText("Hello!"); } finally { await computer.close(); } ``` + You can automate these actions using an agent. + @@ -165,6 +275,14 @@ Choose how you want to run your cua computer. **Cloud containers are recommended ## Using Agent +Utilize an `Agent` to automate complex tasks by providing it with a goal and allowing it to interact with the computer environment. + +Install the Cua agent Python SDK: +```bash +pip install "cua-agent[all]" +``` + +Then, use the `ComputerAgent` object: ```python from agent import ComputerAgent @@ -187,7 +305,5 @@ async for result in agent.run(messages): ## Next Steps -{/* - Explore the [SDK documentation](/sdk) for advanced features */} - - Learn about [trajectory tracking](/agent-sdk/callbacks/trajectories) and [callbacks](/agent-sdk/callbacks/agent-lifecycle) - Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for support From 36bdd399fab8f7ce0250e3acb84a2fd2810e742c Mon Sep 17 00:00:00 2001 From: James Murdza Date: Tue, 7 Oct 2025 15:29:40 -0700 Subject: [PATCH 06/37] Remove the UI quickstart documentation --- docs/content/docs/meta.json | 3 +- docs/content/docs/quickstart-ui.mdx | 216 ---------------------------- 2 files changed, 1 insertion(+), 218 deletions(-) delete mode 100644 docs/content/docs/quickstart-ui.mdx diff --git a/docs/content/docs/meta.json b/docs/content/docs/meta.json index 9aea034a..bee0e1c8 100644 --- a/docs/content/docs/meta.json +++ b/docs/content/docs/meta.json @@ -5,9 +5,8 @@ "defaultOpen": true, "pages": [ "index", - "quickstart-ui", - "quickstart-cli", "quickstart-devs", + "quickstart-cli", "telemetry", "---[BookCopy]Computer Playbook---", "...computer-sdk", diff --git a/docs/content/docs/quickstart-ui.mdx b/docs/content/docs/quickstart-ui.mdx deleted file mode 100644 index 72bac935..00000000 --- a/docs/content/docs/quickstart-ui.mdx +++ /dev/null @@ -1,216 +0,0 @@ ---- -title: Quickstart (GUI) -description: Get started with the cua Agent UI in 3 steps -icon: Rocket ---- - -import { Step, Steps } from 'fumadocs-ui/components/steps'; -import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; -import { Accordion, Accordions } from 'fumadocs-ui/components/accordion'; - -Get up and running with the cua Agent UI in 3 simple steps. - - - - -## Introduction - -cua combines Computer (interface) + Agent (AI) for automating desktop apps. The Agent UI provides a simple chat interface to control your remote computer using natural language. - - - - - -## Set Up Your Computer Environment - -Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup: - - - - - **Easiest & safest way to get started** - - 1. Go to [trycua.com/signin](https://www.trycua.com/signin) - 2. Navigate to **Dashboard > Containers > Create Instance** - 3. Create a **Medium, Ubuntu 22** container - 4. Note your container name and API key - - Your cloud container will be automatically configured and ready to use. - - - - - 1. Install lume cli - - ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" - ``` - - 2. Start a local cua container - - ```bash - lume run macos-sequoia-cua:latest - ``` - - - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) - 2. Install pywinsandbox dependency - - ```bash - pip install -U git+git://github.com/karkason/pywinsandbox.git - ``` - - 3. Windows Sandbox will be automatically configured when you run the CLI - - - - - 1. Install Docker Desktop or Docker Engine - - 2. Pull the CUA Ubuntu container - - ```bash - docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest - ``` - - - - - - - - -## Install and Run cua - - - - - -### Install uv - - - - -```bash -# Use curl to download the script and execute it with sh: -curl -LsSf https://astral.sh/uv/install.sh | sh - -# If your system doesn't have curl, you can use wget: -# wget -qO- https://astral.sh/uv/install.sh | sh -``` - - - - -```powershell -# Use irm to download the script and execute it with iex: -powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" -``` - - - - -### Install Python 3.12 - -```bash -uv python install 3.12 -``` - -### Run cua - -```bash -uv run --with "cua-agent[ui]" -m agent.ui -``` - - - - - -### Install conda - - - - -```bash -mkdir -p ~/miniconda3 -curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh -o ~/miniconda3/miniconda.sh -bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 -rm ~/miniconda3/miniconda.sh -source ~/miniconda3/bin/activate -``` - - - - -```bash -mkdir -p ~/miniconda3 -wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh -bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 -rm ~/miniconda3/miniconda.sh -source ~/miniconda3/bin/activate -``` - - - - -```powershell -wget "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe" -outfile ".\miniconda.exe" -Start-Process -FilePath ".\miniconda.exe" -ArgumentList "/S" -Wait -del .\miniconda.exe -``` - - - - -### Create and activate Python 3.12 environment - -```bash -conda create -n cua python=3.12 -conda activate cua -``` - -### Install and run cua - -```bash -pip install "cua-agent[ui]" cua-computer -python -m agent.ui -``` - - - - - -### Install cua - -```bash -pip install "cua-agent[ui]" cua-computer -``` - -### Run the Agent UI - -```bash -python -m agent.ui -``` - - - - - -### Start Chatting - -Open your browser to the displayed URL and start chatting with your computer-using agent. - -You can ask your agent to perform actions like: - -- "Open Firefox and go to github.com" -- "Take a screenshot and tell me what's on the screen" -- "Type 'Hello world' into the terminal" - - - - ---- - -For advanced Python usage, see the [Quickstart for Developers](/quickstart-devs). From 118fdf39545f82c40fea37273eefdedf8bb57619 Mon Sep 17 00:00:00 2001 From: James Murdza Date: Tue, 7 Oct 2025 15:48:10 -0700 Subject: [PATCH 07/37] Improve quickstart docs wording --- docs/content/docs/quickstart-devs.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/quickstart-devs.mdx b/docs/content/docs/quickstart-devs.mdx index e61c4b9b..1f273e35 100644 --- a/docs/content/docs/quickstart-devs.mdx +++ b/docs/content/docs/quickstart-devs.mdx @@ -1,13 +1,13 @@ --- title: Quickstart -description: Get started with cua in four steps +description: Get started with Cua in three steps icon: Rocket --- import { Step, Steps } from 'fumadocs-ui/components/steps'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; -The steps below will guide you through the process of creating a computer environment, connecting to it programmatically, and automating tasks. +This quickstart guides you through setting up your environment, programmatic control with Cua Computer, and task automation with Cua Agent: From 7253d39fe76d48def60916b6b1bd78183c2b7633 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 7 Oct 2025 22:48:29 +0000 Subject: [PATCH 08/37] Updated `docs/content/docs/agent-sdk/agent-loops.mdx:16-46` with the complete working example that includes: - `asyncio` import and `asyncio.run()` - `async with Computer()` context manager - Full `Computer()` initialization with all required parameters - Proper message format as a list of dictionaries - Complete `async for` loop with item iteration Remove sensitive API key from documentation Removed sensitive API key from agent-loops.mdx. --- docs/content/docs/agent-sdk/agent-loops.mdx | 34 +++++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/docs/content/docs/agent-sdk/agent-loops.mdx b/docs/content/docs/agent-sdk/agent-loops.mdx index 33bf66e2..6cd8daef 100644 --- a/docs/content/docs/agent-sdk/agent-loops.mdx +++ b/docs/content/docs/agent-sdk/agent-loops.mdx @@ -15,20 +15,34 @@ To run an agent loop simply do: ```python from agent import ComputerAgent +import asyncio from computer import Computer -computer = Computer() # Connect to a cua container -agent = ComputerAgent( - model="anthropic/claude-3-5-sonnet-20241022", - tools=[computer] -) +async def take_screenshot(): + async with Computer( + os_type="linux", + provider_type="cloud", + name="m-linux-h3sj2qbz2a", + api_key="" + ) as computer: -prompt = "Take a screenshot and tell me what you see" + agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20241022", + tools=[computer], + max_trajectory_budget=5.0 + ) -async for result in agent.run(prompt): - if result["output"][-1]["type"] == "message": - print("Agent:", result["output"][-1]["content"][0]["text"]) + messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}] + + async for result in agent.run(messages): + for item in result["output"]: + if item["type"] == "message": + print(item["content"][0]["text"]) + + +if __name__ == "__main__": + asyncio.run(take_screenshot()) ``` For a list of supported models and configurations, see the [Supported Agents](./supported-agents/computer-use-agents) page. @@ -170,4 +184,4 @@ except BudgetExceededException: print("Budget limit exceeded") except Exception as e: print(f"Agent error: {e}") -``` \ No newline at end of file +``` From dcb849ea2623899e9bb66c4e422ea1d40894c749 Mon Sep 17 00:00:00 2001 From: James Murdza Date: Tue, 7 Oct 2025 15:48:21 -0700 Subject: [PATCH 09/37] Add links to the quickstart guide --- docs/content/docs/quickstart-devs.mdx | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/docs/content/docs/quickstart-devs.mdx b/docs/content/docs/quickstart-devs.mdx index 1f273e35..d442f3f0 100644 --- a/docs/content/docs/quickstart-devs.mdx +++ b/docs/content/docs/quickstart-devs.mdx @@ -7,7 +7,7 @@ icon: Rocket import { Step, Steps } from 'fumadocs-ui/components/steps'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; -This quickstart guides you through setting up your environment, programmatic control with Cua Computer, and task automation with Cua Agent: +This quickstart guides you through setting up your [computer environment](#set-up-your-computer-environment), programmatic control with a [Cua computer](#using-computer), and task automation with a [Cua agent](#using-agent): @@ -173,9 +173,6 @@ Connect to your Cua computer and perform basic interactions, such as taking scre finally: await computer.close() ``` - - You can automate these actions using an agent. - Install the Cua computer TypeScript SDK: @@ -263,19 +260,18 @@ Connect to your Cua computer and perform basic interactions, such as taking scre await computer.close(); } ``` - - You can automate these actions using an agent. - +Learn more about computers in the [Cua computers documentation](/computer-sdk/computers). You will see how to automate computers with agents in the next step. + ## Using Agent -Utilize an `Agent` to automate complex tasks by providing it with a goal and allowing it to interact with the computer environment. +Utilize an Agent to automate complex tasks by providing it with a goal and allowing it to interact with the computer environment. Install the Cua agent Python SDK: ```bash @@ -300,10 +296,13 @@ async for result in agent.run(messages): print(item["content"][0]["text"]) ``` +Learn more about agents in [Agent Loops](/agent-sdk/agent-loops) and available models in [Supported Models](/agent-sdk/supported-model-providers/). + ## Next Steps -- Learn about [trajectory tracking](/agent-sdk/callbacks/trajectories) and [callbacks](/agent-sdk/callbacks/agent-lifecycle) -- Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for support +- Learn more about [Cua computers](/computer-sdk/computers) and [computer commands](/computer-sdk/commands) +- Read about [Agent loops](/agent-sdk/agent-loops), [tools](/agent-sdk/custom-tools), and [supported model providers](/agent-sdk/supported-model-providers/) +- Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for help From 9d8dd768bfcaa9e93b0dea377ef22ecdd028e472 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Wed, 8 Oct 2025 18:14:39 -0700 Subject: [PATCH 10/37] update container to sandbox --- blog/introducing-cua-cloud-containers.md | 22 ++-- blog/lume-to-containerization.md | 4 +- blog/sandboxed-python-execution.md | 2 +- docs/content/docs/computer-sdk/computers.mdx | 126 +++++++++---------- docs/content/docs/quickstart-devs.mdx | 18 +-- examples/computer-example-ts/README.md | 14 +-- notebooks/README.md | 2 +- notebooks/agent_nb.ipynb | 72 ++--------- notebooks/computer_nb.ipynb | 50 ++------ notebooks/sota_hackathon_cloud.ipynb | 36 +----- scripts/playground-docker.sh | 20 +-- scripts/playground.sh | 24 ++-- 12 files changed, 141 insertions(+), 249 deletions(-) diff --git a/blog/introducing-cua-cloud-containers.md b/blog/introducing-cua-cloud-containers.md index 86cbd400..1555da43 100644 --- a/blog/introducing-cua-cloud-containers.md +++ b/blog/introducing-cua-cloud-containers.md @@ -1,8 +1,8 @@ -# Introducing Cua Cloud Containers: Computer-Use Agents in the Cloud +# Introducing Cua Cloud Sandbox: Computer-Use Agents in the Cloud *Published on May 28, 2025 by Francesco Bonacci* -Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./build-your-own-operator-on-macos-1), we showed you how to build your own Operator on macOS. In [Part 2](./build-your-own-operator-on-macos-2), we explored the cua-agent framework. Today, we're excited to introduce **Cua Cloud Containers** – the easiest way to deploy Computer-Use Agents at scale. +Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./build-your-own-operator-on-macos-1), we showed you how to build your own Operator on macOS. In [Part 2](./build-your-own-operator-on-macos-2), we explored the cua-agent framework. Today, we're excited to introduce **Cua Cloud Sandbox** – the easiest way to deploy Computer-Use Agents at scale.
@@ -10,9 +10,9 @@ Welcome to the next chapter in our Computer-Use Agent journey! In [Part 1](./bui ## What is Cua Cloud? -Think of Cua Cloud as **Docker for Computer-Use Agents**. Instead of managing VMs, installing dependencies, and configuring environments, you can launch pre-configured cloud containers with a single command. Each container comes with a **full desktop environment** accessible via browser (via noVNC), all CUA-related dependencies pre-configured (with a PyAutoGUI-compatible server), and **pay-per-use pricing** that scales with your needs. +Think of Cua Cloud as **Docker for Computer-Use Agents**. Instead of managing VMs, installing dependencies, and configuring environments, you can launch pre-configured Cloud Sandbox instances with a single command. Each sandbox comes with a **full desktop environment** accessible via browser (via noVNC), all CUA-related dependencies pre-configured (with a PyAutoGUI-compatible server), and **pay-per-use pricing** that scales with your needs. -## Why Cua Cloud Containers? +## Why Cua Cloud Sandbox? Four months ago, we launched [**Lume**](https://github.com/trycua/cua/tree/main/libs/lume) and [**Cua**](https://github.com/trycua/cua) with the goal to bring sandboxed VMs and Computer-Use Agents on Apple Silicon. The developer's community response was incredible 🎉 @@ -40,7 +40,7 @@ export CUA_API_KEY=your_api_key_here export CUA_CONTAINER_NAME=my-agent-container ``` -### Step 2: Launch Your First Container +### Step 2: Launch Your First Sandbox ```python import asyncio @@ -80,7 +80,7 @@ We're launching with **three compute tiers** to match your workload needs: - **Medium** (2 vCPU, 8GB RAM) - Ideal for most production workloads - **Large** (8 vCPU, 32GB RAM) - Built for complex, resource-intensive operations -Each tier includes a **full Linux with Xfce desktop environment** with pre-configured browser, **secure VNC access** with SSL, persistent storage during your session, and automatic cleanup on termination. +Each tier includes a **full Linux with Xfce desktop environment** with pre-configured browser, **secure VNC access** with SSL, persistent storage during your session, and automatic cleanup on termination for sandboxes. ## How some customers are using Cua Cloud today @@ -202,23 +202,23 @@ asyncio.run(parallel_scraping()) ## Cost Optimization Tips -To optimize your costs, use appropriate container sizes for your workload and implement timeouts to prevent runaway tasks. Batch related operations together to minimize container spin-up time, and always remember to terminate containers when your work is complete. +To optimize your costs, use appropriate sandbox sizes for your workload and implement timeouts to prevent runaway tasks. Batch related operations together to minimize sandbox spin-up time, and always remember to terminate sandboxes when your work is complete. ## Security Considerations -Cua Cloud runs all containers in isolated environments with encrypted VNC connections. Your API keys are never exposed in trajectories. +Cua Cloud runs all sandboxes in isolated environments with encrypted VNC connections. Your API keys are never exposed in trajectories. ## What's Next for Cua Cloud We're just getting started! Here's what's coming in the next few months: -### Elastic Autoscaled Container Pools +### Elastic Autoscaled Sandbox Pools -Soon you'll be able to create elastic container pools that automatically scale based on demand. Define minimum and maximum container counts, and let Cua Cloud handle the rest. Perfect for batch processing, scheduled automations, and handling traffic spikes without manual intervention. +Soon you'll be able to create elastic sandbox pools that automatically scale based on demand. Define minimum and maximum sandbox counts, and let Cua Cloud handle the rest. Perfect for batch processing, scheduled automations, and handling traffic spikes without manual intervention. ### Windows and macOS Cloud Support -While we're launching with Linux containers, Windows and macOS cloud machines are coming soon. Run Windows-specific automations, test cross-platform workflows, or leverage macOS-exclusive applications – all in the cloud with the same simple API. +While we're launching with Linux sandboxes, Windows and macOS cloud machines are coming soon. Run Windows-specific automations, test cross-platform workflows, or leverage macOS-exclusive applications – all in the cloud with the same simple API. Stay tuned for updates and join our [**Discord**](https://discord.gg/cua-ai) to vote on which features you'd like to see first! diff --git a/blog/lume-to-containerization.md b/blog/lume-to-containerization.md index cf468e0e..7a779536 100644 --- a/blog/lume-to-containerization.md +++ b/blog/lume-to-containerization.md @@ -2,7 +2,7 @@ *Published on June 10, 2025 by Francesco Bonacci* -Yesterday, Apple announced their new [Containerization framework](https://github.com/apple/containerization) at WWDC. Since then, our Discord and X users have been asking what this means for Cua virtualization capabilities on Apple Silicon. We've been working in this space for months - from [Lume](https://github.com/trycua/cua/tree/main/libs/lume) to [Lumier](https://github.com/trycua/cua/tree/main/libs/lumier) to [Cua Cloud Containers](./introducing-cua-cloud-containers). Here's our take on Apple's announcement. +Yesterday, Apple announced their new [Containerization framework](https://github.com/apple/containerization) at WWDC. Since then, our Discord and X users have been asking what this means for Cua virtualization capabilities on Apple Silicon. We've been working in this space for months - from [Lume](https://github.com/trycua/cua/tree/main/libs/lume) to [Lumier](https://github.com/trycua/cua/tree/main/libs/lumier) to [Cua Cloud Sandbox](./introducing-cua-cloud-containers). Here's our take on Apple's announcement. ## Our Story @@ -168,7 +168,7 @@ Apple's announcement confirms we're on the right path. Here's what we're looking - [Apple Containerization Framework](https://github.com/apple/containerization) - [Lume - Direct VM Management](https://github.com/trycua/cua/tree/main/libs/lume) - [Lumier - Docker Interface for VMs](https://github.com/trycua/cua/tree/main/libs/lumier) -- [Cua Cloud Containers](https://trycua.com) +- [Cua Cloud Sandbox](https://trycua.com) - [Join our Discord](https://discord.gg/cua-ai) --- diff --git a/blog/sandboxed-python-execution.md b/blog/sandboxed-python-execution.md index 9261e955..c7b115c4 100644 --- a/blog/sandboxed-python-execution.md +++ b/blog/sandboxed-python-execution.md @@ -235,7 +235,7 @@ print(f"Security audit: {audit_result}") ### Desktop automation in the cloud -Here's where things get really interesting. Cua cloud containers come with full desktop environments, so you can automate GUIs: +Here's where things get really interesting. Cua Cloud Sandbox comes with full desktop environments, so you can automate GUIs: ```python @sandboxed("desktop_env") diff --git a/docs/content/docs/computer-sdk/computers.mdx b/docs/content/docs/computer-sdk/computers.mdx index 0b11d20d..047ad78b 100644 --- a/docs/content/docs/computer-sdk/computers.mdx +++ b/docs/content/docs/computer-sdk/computers.mdx @@ -9,9 +9,9 @@ Before we can automate apps using AI, we need to first connect to a Computer Ser Cua Computers are preconfigured virtual machines running the Computer Server. They can be either macOS, Linux, or Windows. They're found in either a cloud-native container, or on your host desktop. -## cua cloud container +## Cua Cloud Sandbox -This is a cloud container running the Computer Server. This is the easiest & safest way to get a cua computer, and can be done by going on the trycua.com website. +This is a Cloud Sandbox running the Computer Server. This is the easiest & safest way to get a cua computer, and can be done by going on the trycua.com website. @@ -25,7 +25,7 @@ This is a cloud container running the Computer Server. This is the easiest & saf api_key="your-api-key" ) - await computer.run() # Connect to the container + await computer.run() # Connect to the sandbox ``` @@ -39,75 +39,22 @@ This is a cloud container running the Computer Server. This is the easiest & saf apiKey: "your-api-key" }); - await computer.run(); // Connect to the container + await computer.run(); // Connect to the sandbox ``` -## cua local containers +## Cua Local Sandbox -cua provides local containers using different providers depending on your host operating system: +Cua provides local sandboxes using different providers depending on your host operating system: - - - - 1. Install lume cli - - ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" - ``` - - 2. Start a local cua container - - ```bash - lume run macos-sequoia-cua:latest - ``` - - 3. Connect with Computer - - ```python - from computer import Computer - - computer = Computer( - os_type="macos", - provider_type="lume", - name="macos-sequoia-cua:latest" - ) - - await computer.run() # Launch & connect to the container - ``` - - - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) - 2. Install pywinsandbox dependency - - ```bash - pip install -U git+git://github.com/karkason/pywinsandbox.git - ``` - - 3. Windows Sandbox will be automatically configured when you run the CLI - - ```python - from computer import Computer - - computer = Computer( - os_type="windows", - provider_type="winsandbox", - ephemeral=True # Windows Sandbox is always ephemeral - ) - - await computer.run() # Launch & connect to Windows Sandbox - ``` - - + - + 1. Install Docker Desktop or Docker Engine - 2. Build or pull the CUA Ubuntu container + 2. Build or pull the CUA Ubuntu sandbox ```bash # Option 1: Pull from Docker Hub @@ -130,7 +77,60 @@ cua provides local containers using different providers depending on your host o name="my-cua-container" ) - await computer.run() # Launch & connect to Docker container + await computer.run() # Launch & connect to Docker sandbox + ``` + + + + + 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) + 2. Install pywinsandbox dependency + + ```bash + pip install -U git+git://github.com/karkason/pywinsandbox.git + ``` + + 3. Windows Sandbox will be automatically configured when you run the CLI + + ```python + from computer import Computer + + computer = Computer( + os_type="windows", + provider_type="winsandbox", + ephemeral=True # Windows Sandbox is always ephemeral + ) + + await computer.run() # Launch & connect to Windows Sandbox + ``` + + + + + 1. Install lume cli + + ```bash + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" + ``` + + 2. Start a local cua sandbox + + ```bash + lume run macos-sequoia-cua:latest + ``` + + 3. Connect with Computer + + ```python + from computer import Computer + + computer = Computer( + os_type="macos", + provider_type="lume", + name="macos-sequoia-cua:latest" + ) + + await computer.run() # Launch & connect to the sandbox ``` diff --git a/docs/content/docs/quickstart-devs.mdx b/docs/content/docs/quickstart-devs.mdx index d442f3f0..5b7b28a3 100644 --- a/docs/content/docs/quickstart-devs.mdx +++ b/docs/content/docs/quickstart-devs.mdx @@ -21,15 +21,15 @@ You can run your Cua computer in the cloud (recommended for easiest setup), loca - - Cua cloud containers are virtual machines that run Ubuntu. + + Cua Cloud Sandbox provides virtual machines that run Ubuntu. 1. Go to [trycua.com/signin](https://www.trycua.com/signin) 2. Navigate to **Dashboard > Containers > Create Instance** - 3. Create a **Medium, Ubuntu 22** container - 4. Note your container name and API key + 3. Create a **Medium, Ubuntu 22** sandbox + 4. Note your sandbox name and API key - Your cloud container will be automatically configured and ready to use. + Your Cloud Sandbox will be automatically configured and ready to use. @@ -42,7 +42,7 @@ You can run your Cua computer in the cloud (recommended for easiest setup), loca /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" ``` - 2. Start a local Cua container: + 2. Start a local Cua sandbox: ```bash lume run macos-sequoia-cua:latest @@ -69,7 +69,7 @@ You can run your Cua computer in the cloud (recommended for easiest setup), loca 1. Install Docker Desktop or Docker Engine: - 2. Pull the CUA Ubuntu container: + 2. Pull the CUA Ubuntu sandbox: ```bash docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest @@ -106,7 +106,7 @@ Connect to your Cua computer and perform basic interactions, such as taking scre name="your-container-name", api_key="your-api-key" ) - await computer.run() # Connect to the container + await computer.run() # Connect to the sandbox ``` @@ -192,7 +192,7 @@ Connect to your Cua computer and perform basic interactions, such as taking scre name: "your-container-name", apiKey: "your-api-key" }); - await computer.run(); // Connect to the container + await computer.run(); // Connect to the sandbox ``` diff --git a/examples/computer-example-ts/README.md b/examples/computer-example-ts/README.md index 500362c9..7e7fc81e 100644 --- a/examples/computer-example-ts/README.md +++ b/examples/computer-example-ts/README.md @@ -1,13 +1,13 @@ # cua-cloud-openai Example -This example demonstrates how to control a cua Cloud container using the OpenAI `computer-use-preview` model and the `@trycua/computer` TypeScript library. +This example demonstrates how to control a Cua Cloud Sandbox using the OpenAI `computer-use-preview` model and the `@trycua/computer` TypeScript library. ## Overview -- Connects to a cua Cloud container via the `@trycua/computer` library +- Connects to a Cua Cloud Sandbox via the `@trycua/computer` library - Sends screenshots and instructions to OpenAI's computer-use model -- Executes AI-generated actions (clicks, typing, etc.) inside the container -- Designed for Linux containers, but can be adapted for other OS types +- Executes AI-generated actions (clicks, typing, etc.) inside the sandbox +- Designed for Linux sandboxes, but can be adapted for other OS types ## Getting Started @@ -20,8 +20,8 @@ This example demonstrates how to control a cua Cloud container using the OpenAI 2. **Set up environment variables:** Create a `.env` file with the following variables: - `OPENAI_API_KEY` — your OpenAI API key - - `CUA_API_KEY` — your cua Cloud API key - - `CUA_CONTAINER_NAME` — the name of your provisioned container + - `CUA_API_KEY` — your Cua Cloud API key + - `CUA_CONTAINER_NAME` — the name of your provisioned sandbox 3. **Run the example:** @@ -38,7 +38,7 @@ This example demonstrates how to control a cua Cloud container using the OpenAI For a step-by-step tutorial and more detailed explanation, see the accompanying blog post: -➡️ [Controlling a cua Cloud Container with JavaScript](https://placeholder-url-to-blog-post.com) +➡️ [Controlling a Cua Cloud Sandbox with JavaScript](https://placeholder-url-to-blog-post.com) _(This link will be updated once the article is published.)_ diff --git a/notebooks/README.md b/notebooks/README.md index 0a7f4890..25978415 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -5,7 +5,7 @@ This folder contains Jupyter notebooks that demonstrate the core functionality o ## Available Notebooks ### Core Components -- **`computer_nb.ipynb`** - Demonstrates the Computer API for programmatically operating sandbox VMs using either Cua Cloud Containers or local Lume VMs on Apple Silicon macOS systems +- **`computer_nb.ipynb`** - Demonstrates the Computer API for programmatically operating sandbox VMs using either Cua Cloud Sandbox or local Lume VMs on Apple Silicon macOS systems - **`agent_nb.ipynb`** - Shows how to use CUA's Agent to run automated workflows in virtual sandboxes with various AI models (OpenAI, Anthropic, local models) - **`pylume_nb.ipynb`** - Quickstart guide for the pylume Python library, which handles VM creation, management, and image operations - **`computer_server_nb.ipynb`** - Demonstrates how to host and configure the Computer server that powers the Computer API diff --git a/notebooks/agent_nb.ipynb b/notebooks/agent_nb.ipynb index 30746780..7523e25d 100644 --- a/notebooks/agent_nb.ipynb +++ b/notebooks/agent_nb.ipynb @@ -3,11 +3,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## Agent\n", - "\n", - "This notebook demonstrates how to use Cua's Agent to run workflows in virtual sandboxes, either using Cua Cloud Containers or local VMs on Apple Silicon Macs." - ] + "source": "## Agent\n\nThis notebook demonstrates how to use Cua's Agent to run workflows in virtual sandboxes, either using Cua Cloud Sandbox or local VMs on Apple Silicon Macs." }, { "cell_type": "markdown", @@ -61,9 +57,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "Agent allows you to run an agentic workflow in virtual sandbox instances. You can choose between cloud containers or local VMs." - ] + "source": "Agent allows you to run an agentic workflow in virtual sandbox instances. You can choose between Cloud Sandbox or local VMs." }, { "cell_type": "code", @@ -96,32 +90,17 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## Option 1: Agent with Cua Cloud Containers\n", - "\n", - "Use cloud containers for running agents from any system without local setup." - ] + "source": "## Option 1: Agent with Cua Cloud Sandbox\n\nUse Cloud Sandbox for running agents from any system without local setup." }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "### Prerequisites for Cloud Containers\n", - "\n", - "To use Cua Cloud Containers, you need to:\n", - "1. Sign up at https://trycua.com\n", - "2. Create a Cloud Container\n", - "3. Generate an API Key\n", - "\n", - "Once you have these, you can connect to your cloud container and run agents on it." - ] + "source": "### Prerequisites for Cloud Sandbox\n\nTo use Cua Cloud Sandbox, you need to:\n1. Sign up at https://trycua.com\n2. Create a Cloud Sandbox\n3. Generate an API Key\n\nOnce you have these, you can connect to your Cloud Sandbox and run agents on it." }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "Get Cua API credentials and container details" - ] + "source": "Get Cua API credentials and sandbox details" }, { "cell_type": "code", @@ -138,60 +117,31 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "Choose the OS type for your container (linux or macos)" - ] + "source": "Choose the OS type for your sandbox (linux or macos)" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "os_type = input(\"Enter the OS type of your container (linux/macos) [default: linux]: \").lower() or \"linux\"" - ] + "source": "os_type = input(\"Enter the OS type of your sandbox (linux/macos) [default: linux]: \").lower() or \"linux\"" }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "### Create an agent with cloud container" - ] + "source": "### Create an agent with Cloud Sandbox" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import logging\n", - "from pathlib import Path\n", - "\n", - "# Connect to your existing cloud container\n", - "computer = Computer(\n", - " os_type=os_type,\n", - " api_key=cua_api_key,\n", - " name=container_name,\n", - " provider_type=VMProviderType.CLOUD,\n", - " verbosity=logging.INFO\n", - ")\n", - "\n", - "# Create agent\n", - "agent = ComputerAgent(\n", - " model=\"openai/computer-use-preview\",\n", - " tools=[computer],\n", - " trajectory_dir=str(Path(\"trajectories\")),\n", - " only_n_most_recent_images=3,\n", - " verbosity=logging.INFO\n", - ")\n" - ] + "source": "import logging\nfrom pathlib import Path\n\n# Connect to your existing Cloud Sandbox\ncomputer = Computer(\n os_type=os_type,\n api_key=cua_api_key,\n name=container_name,\n provider_type=VMProviderType.CLOUD,\n verbosity=logging.INFO\n)\n\n# Create agent\nagent = ComputerAgent(\n model=\"openai/computer-use-preview\",\n tools=[computer],\n trajectory_dir=str(Path(\"trajectories\")),\n only_n_most_recent_images=3,\n verbosity=logging.INFO\n)\n" }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "Run tasks on cloud container" - ] + "source": "Run tasks on Cloud Sandbox" }, { "cell_type": "code", @@ -565,4 +515,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/notebooks/computer_nb.ipynb b/notebooks/computer_nb.ipynb index 0cf35175..740bad2c 100644 --- a/notebooks/computer_nb.ipynb +++ b/notebooks/computer_nb.ipynb @@ -3,11 +3,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## Computer\n", - "\n", - "This notebook demonstrates how to use Computer to operate sandbox VMs programmatically, either using Cua Cloud Containers or local Lume VMs on Apple Silicon macOS systems." - ] + "source": "## Computer\n\nThis notebook demonstrates how to use Computer to operate sandbox VMs programmatically, either using Cua Cloud Sandbox or local Lume VMs on Apple Silicon macOS systems." }, { "cell_type": "markdown", @@ -513,25 +509,12 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## Option 1: Cua Cloud Containers\n", - "\n", - "Cua Cloud Containers provide remote VMs that can be accessed from any system without local setup." - ] + "source": "## Option 1: Cua Cloud Sandbox\n\nCua Cloud Sandbox provides remote VMs that can be accessed from any system without local setup." }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "### Prerequisites for Cloud Containers\n", - "\n", - "To use Cua Cloud Containers, you need to:\n", - "1. Sign up at https://trycua.com\n", - "2. Create a Cloud Container\n", - "3. Generate an API Key\n", - "\n", - "Once you have these, you can connect to your cloud container using its name." - ] + "source": "### Prerequisites for Cloud Sandbox\n\nTo use Cua Cloud Sandbox, you need to:\n1. Sign up at https://trycua.com\n2. Create a Cloud Sandbox\n3. Generate an API Key\n\nOnce you have these, you can connect to your Cloud Sandbox using its name." }, { "cell_type": "code", @@ -551,25 +534,19 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "Choose the OS type for your container (linux or macos)" - ] + "source": "Choose the OS type for your sandbox (linux or macos)" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "os_type = input(\"Enter the OS type of your container (linux/macos) [default: linux]: \").lower() or \"linux\"" - ] + "source": "os_type = input(\"Enter the OS type of your sandbox (linux/macos) [default: linux]: \").lower() or \"linux\"" }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "### Connect to your Cloud Container" - ] + "source": "### Connect to your Cloud Sandbox" }, { "cell_type": "code", @@ -583,23 +560,14 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "Connect to your existing Cua Cloud Container" - ] + "source": "Connect to your existing Cua Cloud Sandbox" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "computer = Computer(\n", - " os_type=os_type, # Must match the OS type of your cloud container\n", - " api_key=cua_api_key,\n", - " name=container_name,\n", - " provider_type=VMProviderType.CLOUD,\n", - ")" - ] + "source": "computer = Computer(\n os_type=os_type, # Must match the OS type of your Cloud Sandbox\n api_key=cua_api_key,\n name=container_name,\n provider_type=VMProviderType.CLOUD,\n)" }, { "cell_type": "markdown", @@ -1106,4 +1074,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/notebooks/sota_hackathon_cloud.ipynb b/notebooks/sota_hackathon_cloud.ipynb index d6298e94..575d15ea 100644 --- a/notebooks/sota_hackathon_cloud.ipynb +++ b/notebooks/sota_hackathon_cloud.ipynb @@ -56,12 +56,7 @@ "cell_type": "markdown", "id": "47171dc3", "metadata": {}, - "source": [ - "1. Create a Cua account at https://www.trycua.com/\n", - "2. Start a small Cua container at https://www.trycua.com/dashboard/containers (If you need credits, ask us!)\n", - "3. Create a HUD account at https://www.hud.so/\n", - "4. Create a .env file:" - ] + "source": "1. Create a Cua account at https://www.trycua.com/\n2. Start a small Cua sandbox at https://www.trycua.com/dashboard/containers (If you need credits, ask us!)\n3. Create a HUD account at https://www.hud.so/\n4. Create a .env file:" }, { "cell_type": "code", @@ -151,21 +146,13 @@ "cell_type": "markdown", "id": "a07b09ee", "metadata": {}, - "source": [ - "## 🖱️ Test your agent\n", - "\n", - "Run your agent on a test scenario in a Cua cloud container." - ] + "source": "## 🖱️ Test your agent\n\nRun your agent on a test scenario in a Cua Cloud Sandbox." }, { "cell_type": "markdown", "id": "12b9c22c", "metadata": {}, - "source": [ - "Connect to an existing cloud container through the Cua SDK.\n", - "\n", - "You can access the computer through VNC on the [Cua Dashboard](https://www.trycua.com/dashboard)." - ] + "source": "Connect to an existing Cloud Sandbox through the Cua SDK.\n\nYou can access the computer through VNC on the [Cua Dashboard](https://www.trycua.com/dashboard)." }, { "cell_type": "code", @@ -173,20 +160,7 @@ "id": "a210e959", "metadata": {}, "outputs": [], - "source": [ - "from computer import Computer, VMProviderType\n", - "\n", - "# Connect to your existing cloud container\n", - "computer = Computer(\n", - " os_type=\"linux\",\n", - " provider_type=VMProviderType.CLOUD,\n", - " name=os.getenv(\"CUA_CONTAINER_NAME\") or \"\",\n", - " api_key=os.getenv(\"CUA_API_KEY\"),\n", - " verbosity=logging.INFO\n", - ")\n", - "\n", - "agent_config[\"tools\"] = [ computer ]" - ] + "source": "from computer import Computer, VMProviderType\n\n# Connect to your existing Cloud Sandbox\ncomputer = Computer(\n os_type=\"linux\",\n provider_type=VMProviderType.CLOUD,\n name=os.getenv(\"CUA_CONTAINER_NAME\") or \"\",\n api_key=os.getenv(\"CUA_API_KEY\"),\n verbosity=logging.INFO\n)\n\nagent_config[\"tools\"] = [ computer ]" }, { "cell_type": "markdown", @@ -283,4 +257,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/scripts/playground-docker.sh b/scripts/playground-docker.sh index 5cfae574..8a798464 100644 --- a/scripts/playground-docker.sh +++ b/scripts/playground-docker.sh @@ -80,16 +80,16 @@ trap cleanup EXIT echo "" echo "Choose your Cua setup:" -echo "1) ☁️ Cua Cloud Containers (works on any system)" +echo "1) ☁️ Cua Cloud Sandbox (works on any system)" echo "2) 🖥️ Local macOS VMs (requires Apple Silicon Mac + macOS 15+)" echo "3) 🖥️ Local Windows VMs (requires Windows 10 / 11)" echo "" read -p "Enter your choice (1, 2, or 3): " CHOICE if [[ "$CHOICE" == "1" ]]; then - # Cua Cloud Container setup + # Cua Cloud Sandbox setup echo "" - print_info "Setting up Cua Cloud Containers..." + print_info "Setting up Cua Cloud Sandbox..." echo "" # Check if existing .env.local already has CUA_API_KEY @@ -116,15 +116,15 @@ if [[ "$CHOICE" == "1" ]]; then # If no valid API key found, prompt for one if [[ -z "$CUA_API_KEY" ]]; then - echo "To use Cua Cloud Containers, you need to:" + echo "To use Cua Cloud Sandbox, you need to:" echo "1. Sign up at https://trycua.com" - echo "2. Create a Cloud Container" + echo "2. Create a Cloud Sandbox" echo "3. Generate an Api Key" echo "" read -p "Enter your Cua Api Key: " CUA_API_KEY if [[ -z "$CUA_API_KEY" ]]; then - print_error "Cua Api Key is required for Cloud Containers." + print_error "Cua Api Key is required for Cloud Sandbox." exit 1 fi else @@ -142,7 +142,7 @@ elif [[ "$CHOICE" == "2" ]]; then # Check for Apple Silicon Mac if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then print_error "Local macOS VMs require an Apple Silicon Mac (M1/M2/M3/M4)." - echo "💡 Consider using Cua Cloud Containers instead (option 1)." + echo "💡 Consider using Cua Cloud Sandbox instead (option 1)." exit 1 fi @@ -150,7 +150,7 @@ elif [[ "$CHOICE" == "2" ]]; then OSVERSION=$(sw_vers -productVersion) if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then print_error "Local macOS VMs require macOS 15 (Sequoia) or newer. You have $OSVERSION." - echo "💡 Consider using Cua Cloud Containers instead (option 1)." + echo "💡 Consider using Cua Cloud Sandbox instead (option 1)." exit 1 fi @@ -165,7 +165,7 @@ elif [[ "$CHOICE" == "3" ]]; then # Check if we're on Windows if [[ $(uname -s) != MINGW* && $(uname -s) != CYGWIN* && $(uname -s) != MSYS* ]]; then print_error "Local Windows VMs require Windows 10 or 11." - echo "💡 Consider using Cua Cloud Containers instead (option 1)." + echo "💡 Consider using Cua Cloud Sandbox instead (option 1)." echo "" echo "🔗 If you are using WSL, refer to the blog post to get started: https://www.trycua.com/blog/windows-sandbox" exit 1 @@ -303,7 +303,7 @@ chmod +x "$DEMO_DIR/start_ui.sh" print_success "Setup complete!" if [[ "$USE_CLOUD" == "true" ]]; then - echo "☁️ Cua Cloud Container setup complete!" + echo "☁️ Cua Cloud Sandbox setup complete!" else echo "🖥️ Cua Local VM setup complete!" fi diff --git a/scripts/playground.sh b/scripts/playground.sh index 0cde5a25..58bc2da2 100755 --- a/scripts/playground.sh +++ b/scripts/playground.sh @@ -22,18 +22,18 @@ TMP_DIR=$(mktemp -d) cd "$TMP_DIR" trap cleanup EXIT -# Ask user to choose between local macOS VMs or Cua Cloud Containers +# Ask user to choose between local macOS VMs or Cua Cloud Sandbox echo "" echo "Choose your Cua setup:" -echo "1) ☁️ Cua Cloud Containers (works on any system)" +echo "1) ☁️ Cua Cloud Sandbox (works on any system)" echo "2) 🖥️ Local macOS VMs (requires Apple Silicon Mac + macOS 15+)" echo "" read -p "Enter your choice (1 or 2): " CHOICE if [[ "$CHOICE" == "1" ]]; then - # Cua Cloud Container setup + # Cua Cloud Sandbox setup echo "" - echo "☁️ Setting up Cua Cloud Containers..." + echo "☁️ Setting up Cua Cloud Sandbox..." echo "" # Check if existing .env.local already has CUA_API_KEY (check current dir and demo dir) @@ -61,15 +61,15 @@ if [[ "$CHOICE" == "1" ]]; then # If no valid API key found, prompt for one if [[ -z "$CUA_API_KEY" ]]; then - echo "To use Cua Cloud Containers, you need to:" + echo "To use Cua Cloud Sandbox, you need to:" echo "1. Sign up at https://trycua.com" - echo "2. Create a Cloud Container" + echo "2. Create a Cloud Sandbox" echo "3. Generate an Api Key" echo "" read -p "Enter your Cua Api Key: " CUA_API_KEY if [[ -z "$CUA_API_KEY" ]]; then - echo "❌ Cua Api Key is required for Cloud Containers." + echo "❌ Cua Api Key is required for Cloud Sandbox." exit 1 fi fi @@ -84,7 +84,7 @@ elif [[ "$CHOICE" == "2" ]]; then # Check for Apple Silicon Mac if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then echo "❌ Local macOS VMs require an Apple Silicon Mac (M1/M2/M3/M4)." - echo "💡 Consider using Cua Cloud Containers instead (option 1)." + echo "💡 Consider using Cua Cloud Sandbox instead (option 1)." exit 1 fi @@ -92,7 +92,7 @@ elif [[ "$CHOICE" == "2" ]]; then OSVERSION=$(sw_vers -productVersion) if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then echo "❌ Local macOS VMs require macOS 15 (Sequoia) or newer. You have $OSVERSION." - echo "💡 Consider using Cua Cloud Containers instead (option 1)." + echo "💡 Consider using Cua Cloud Sandbox instead (option 1)." exit 1 fi @@ -249,7 +249,7 @@ chmod +x "$DEMO_DIR/start_ui.sh" echo "✅ Setup complete!" if [[ "$USE_CLOUD" == "true" ]]; then - # Create run_demo.py for cloud containers + # Create run_demo.py for cloud sandbox cat > "$DEMO_DIR/run_demo.py" << 'EOF' import asyncio import os @@ -276,7 +276,7 @@ if not openai_key and not anthropic_key: print("\n⚠️ No OpenAI or Anthropic API keys found in .env.local.") print("Please add at least one API key to use AI agents.") -print("🚀 Starting CUA playground with Cloud Containers...") +print("🚀 Starting CUA playground with Cloud Sandbox...") print("📝 Edit .env.local to update your API keys") # Launch the Gradio UI and open it in the browser @@ -314,7 +314,7 @@ app.launch(share=False, inbrowser=True) EOF fi -echo "☁️ CUA Cloud Container setup complete!" +echo "☁️ CUA Cloud Sandbox setup complete!" echo "📝 Edit $DEMO_DIR/.env.local to update your API keys" echo "🖥️ Start the playground by running: $DEMO_DIR/start_ui.sh" From 003c10a84615286d152bdc76276d4385d03bbce5 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Wed, 8 Oct 2025 18:27:15 -0700 Subject: [PATCH 11/37] Fix computer tabs --- docs/content/docs/computer-sdk/computers.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/computer-sdk/computers.mdx b/docs/content/docs/computer-sdk/computers.mdx index 047ad78b..2a653f46 100644 --- a/docs/content/docs/computer-sdk/computers.mdx +++ b/docs/content/docs/computer-sdk/computers.mdx @@ -50,7 +50,7 @@ This is a Cloud Sandbox running the Computer Server. This is the easiest & safes Cua provides local sandboxes using different providers depending on your host operating system: - + 1. Install Docker Desktop or Docker Engine @@ -105,7 +105,7 @@ Cua provides local sandboxes using different providers depending on your host op ``` - + 1. Install lume cli From 0ede822990485f097312e72c19a79ea802c746f4 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Thu, 9 Oct 2025 12:01:45 -0400 Subject: [PATCH 12/37] Add /start, /stop, and /restart to cloud provider --- .../computer/providers/cloud/provider.py | 169 ++++++++++++++++-- 1 file changed, 157 insertions(+), 12 deletions(-) diff --git a/libs/python/computer/computer/providers/cloud/provider.py b/libs/python/computer/computer/providers/cloud/provider.py index 1cfba161..a54e01c4 100644 --- a/libs/python/computer/computer/providers/cloud/provider.py +++ b/libs/python/computer/computer/providers/cloud/provider.py @@ -1,6 +1,11 @@ -"""Cloud VM provider implementation. +"""Cloud VM provider implementation using CUA Public API. -This module contains a stub implementation for a future cloud VM provider. +Implements the following public API endpoints: + +- GET /v1/vms +- POST /v1/vms/:name/start +- POST /v1/vms/:name/stop +- POST /v1/vms/:name/restart """ import logging @@ -14,6 +19,10 @@ logger = logging.getLogger(__name__) import asyncio import aiohttp from urllib.parse import urlparse +import os + + +DEFAULT_API_BASE = os.getenv("CUA_API_BASE", "https://api.cua.ai") class CloudProvider(BaseVMProvider): """Cloud VM Provider implementation.""" @@ -21,6 +30,7 @@ class CloudProvider(BaseVMProvider): self, api_key: str, verbose: bool = False, + api_base: Optional[str] = None, **kwargs, ): """ @@ -32,6 +42,7 @@ class CloudProvider(BaseVMProvider): assert api_key, "api_key required for CloudProvider" self.api_key = api_key self.verbose = verbose + self.api_base = (api_base or DEFAULT_API_BASE).rstrip("/") @property def provider_type(self) -> VMProviderType: @@ -44,24 +55,158 @@ class CloudProvider(BaseVMProvider): pass async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: - """Get VM VNC URL by name using the cloud API.""" - return {"name": name, "hostname": f"{name}.containers.cloud.trycua.com"} + """Get VM information by querying the VM status endpoint. + + - Build hostname via get_ip(name) → "{name}.containers.cloud.trycua.com" + - Probe https://{hostname}:8443/status with a short timeout + - If JSON contains a "status" field, return it; otherwise infer + - Fallback to DNS resolve check to distinguish unknown vs not_found + """ + hostname = await self.get_ip(name=name) + + # Try HTTPS probe to the computer-server status endpoint (8443) + try: + timeout = aiohttp.ClientTimeout(total=3) + async with aiohttp.ClientSession(timeout=timeout) as session: + url = f"https://{hostname}:8443/status" + async with session.get(url, allow_redirects=False) as resp: + status_code = resp.status + vm_status: str + vm_os_type: Optional[str] = None + if status_code == 200: + try: + data = await resp.json(content_type=None) + vm_status = str(data.get("status", "ok")) + if isinstance(data, dict) and "os_type" in data: + vm_os_type = str(data.get("os_type")) + except Exception: + vm_status = "unknown" + elif status_code < 500: + vm_status = "unknown" + else: + vm_status = "unknown" + return { + "name": name, + "status": "running" if vm_status == "ok" else vm_status, + "hostname": hostname, + "os_type": vm_os_type, + } + except Exception: + # Fall back to a DNS resolve check + try: + loop = asyncio.get_event_loop() + await loop.getaddrinfo(hostname, 443) + # Host resolves, but HTTPS probe failed → treat as unknown + return { + "name": name, + "status": "unknown", + "hostname": hostname, + } + except Exception: + # Host does not resolve → not found + return {"name": name, "status": "not_found", "hostname": hostname} async def list_vms(self) -> List[Dict[str, Any]]: - logger.warning("CloudProvider.list_vms is not implemented") - return [] + url = f"{self.api_base}/v1/vms" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as resp: + if resp.status == 200: + try: + data = await resp.json(content_type=None) + except Exception: + text = await resp.text() + logger.error(f"Failed to parse list_vms JSON: {text}") + return [] + if isinstance(data, list): + return data + logger.warning("Unexpected response for list_vms; expected list") + return [] + elif resp.status == 401: + logger.error("Unauthorized: invalid CUA API key for list_vms") + return [] + else: + text = await resp.text() + logger.error(f"list_vms failed: HTTP {resp.status} - {text}") + return [] async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: - # logger.warning("CloudProvider.run_vm is not implemented") - return {"name": name, "status": "unavailable", "message": "CloudProvider.run_vm is not implemented"} + """Start a VM via public API. Returns a minimal status.""" + url = f"{self.api_base}/v1/vms/{name}/start" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers) as resp: + if resp.status in (200, 201, 202, 204): + return {"name": name, "status": "starting"} + elif resp.status == 404: + return {"name": name, "status": "not_found"} + elif resp.status == 401: + return {"name": name, "status": "unauthorized"} + else: + text = await resp.text() + return {"name": name, "status": "error", "message": text} async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: - logger.warning("CloudProvider.stop_vm is not implemented. To clean up resources, please use Computer.disconnect()") - return {"name": name, "status": "stopped", "message": "CloudProvider is not implemented"} + """Stop a VM via public API.""" + url = f"{self.api_base}/v1/vms/{name}/stop" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers) as resp: + if resp.status in (200, 202): + # Spec says 202 with {"status":"stopping"} + body_status: Optional[str] = None + try: + data = await resp.json(content_type=None) + body_status = data.get("status") if isinstance(data, dict) else None + except Exception: + body_status = None + return {"name": name, "status": body_status or "stopping"} + elif resp.status == 404: + return {"name": name, "status": "not_found"} + elif resp.status == 401: + return {"name": name, "status": "unauthorized"} + else: + text = await resp.text() + return {"name": name, "status": "error", "message": text} + + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + """Restart a VM via public API.""" + url = f"{self.api_base}/v1/vms/{name}/restart" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Accept": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers) as resp: + if resp.status in (200, 202): + # Spec says 202 with {"status":"restarting"} + body_status: Optional[str] = None + try: + data = await resp.json(content_type=None) + body_status = data.get("status") if isinstance(data, dict) else None + except Exception: + body_status = None + return {"name": name, "status": body_status or "restarting"} + elif resp.status == 404: + return {"name": name, "status": "not_found"} + elif resp.status == 401: + return {"name": name, "status": "unauthorized"} + else: + text = await resp.text() + return {"name": name, "status": "error", "message": text} async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: - logger.warning("CloudProvider.update_vm is not implemented") - return {"name": name, "status": "unchanged", "message": "CloudProvider is not implemented"} + logger.warning("CloudProvider.update_vm is not implemented via public API") + return {"name": name, "status": "unchanged", "message": "update_vm not supported by public API"} async def get_ip(self, name: Optional[str] = None, storage: Optional[str] = None, retry_delay: int = 2) -> str: """ From 844f138881672436c2a362f0e56519cff665c3f2 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Thu, 9 Oct 2025 12:08:39 -0400 Subject: [PATCH 13/37] Added VM management example to /examples --- examples/cloud_api_examples.py | 55 ++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 examples/cloud_api_examples.py diff --git a/examples/cloud_api_examples.py b/examples/cloud_api_examples.py new file mode 100644 index 00000000..a5777932 --- /dev/null +++ b/examples/cloud_api_examples.py @@ -0,0 +1,55 @@ +import asyncio +import os +from typing import List, Dict, Any + +from computer.providers.cloud.provider import CloudProvider + +async def main() -> None: + api_key = os.getenv("CUA_API_KEY") + if not api_key: + raise RuntimeError( + "CUA_API_KEY environment variable is not set.\n" + "Set it and re-run, e.g.:\n" + " PowerShell: $env:CUA_API_KEY='your-key'\n" + " bash: export CUA_API_KEY=your-key" + ) + # List VMs + provider = CloudProvider(api_key=api_key, verbose=True) + async with provider: + vms = await provider.list_vms() + if not vms: + print("No VMs returned (check API key or account).") + else: + print(f"Found {len(vms)} VM(s):") + for i, vm in enumerate(vms, start=1): + # Typical fields: name, status (if available), region, os_type, etc. (depends on account) + print(f"[{i}] {vm}") + + # --- Additional operations (commented out) --- + # To stop a VM by name: + # api_key = os.getenv("CUA_API_KEY") + # provider = CloudProvider(api_key=api_key, verbose=True) + # async with provider: + # name = "your-vm-name-here" + # resp = await provider.stop_vm(name) + # print("stop_vm response:", resp) + + # To restart a VM by name: + # api_key = os.getenv("CUA_API_KEY") + # provider = CloudProvider(api_key=api_key, verbose=True) + # async with provider: + # name = "your-vm-name-here" + # resp = await provider.restart_vm(name) + # print("restart_vm response:", resp) + + # To probe a VM's status via its public hostname (if you know the name): + # api_key = os.getenv("CUA_API_KEY") + # provider = CloudProvider(api_key=api_key, verbose=True) + # async with provider: + # name = "your-vm-name-here" + # info = await provider.get_vm(name) + # print("get_vm info:", info) + + +if __name__ == "__main__": + asyncio.run(main()) From b29f89597f4728a20f43b5b69c4a515769539ad3 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Thu, 9 Oct 2025 12:36:43 -0400 Subject: [PATCH 14/37] added shared provider vm type --- examples/cloud_api_examples.py | 30 +++++----- .../computer/computer/providers/base.py | 27 ++++++++- .../computer/providers/cloud/provider.py | 22 ++++++- .../computer/providers/cloud/types.py | 57 +++++++++++++++++++ .../computer/computer/providers/types.py | 35 ++++++++++++ 5 files changed, 152 insertions(+), 19 deletions(-) create mode 100644 libs/python/computer/computer/providers/cloud/types.py create mode 100644 libs/python/computer/computer/providers/types.py diff --git a/examples/cloud_api_examples.py b/examples/cloud_api_examples.py index a5777932..47844a55 100644 --- a/examples/cloud_api_examples.py +++ b/examples/cloud_api_examples.py @@ -1,29 +1,31 @@ import asyncio import os -from typing import List, Dict, Any +from utils import load_dotenv_files + +load_dotenv_files() from computer.providers.cloud.provider import CloudProvider async def main() -> None: api_key = os.getenv("CUA_API_KEY") if not api_key: - raise RuntimeError( - "CUA_API_KEY environment variable is not set.\n" - "Set it and re-run, e.g.:\n" - " PowerShell: $env:CUA_API_KEY='your-key'\n" - " bash: export CUA_API_KEY=your-key" - ) + raise RuntimeError("CUA_API_KEY environment variable is not set") + api_base = os.getenv("CUA_API_BASE") + if api_base: + print(f"Using API base: {api_base}") + # List VMs provider = CloudProvider(api_key=api_key, verbose=True) async with provider: vms = await provider.list_vms() - if not vms: - print("No VMs returned (check API key or account).") - else: - print(f"Found {len(vms)} VM(s):") - for i, vm in enumerate(vms, start=1): - # Typical fields: name, status (if available), region, os_type, etc. (depends on account) - print(f"[{i}] {vm}") + print(f"Found {len(vms)} VM(s)") + for vm in vms: + print( + f"name: {vm['name']}\n", + f"status: {vm['status']}\n", # pending, running, stopped, terminated, failed + f"api_url: {vm.get('api_url')}\n", + f"vnc_url: {vm.get('vnc_url')}\n", + ) # --- Additional operations (commented out) --- # To stop a VM by name: diff --git a/libs/python/computer/computer/providers/base.py b/libs/python/computer/computer/providers/base.py index 23526097..0c36c913 100644 --- a/libs/python/computer/computer/providers/base.py +++ b/libs/python/computer/computer/providers/base.py @@ -2,7 +2,9 @@ import abc from enum import StrEnum -from typing import Dict, List, Optional, Any, AsyncContextManager +from typing import Dict, Optional, Any, AsyncContextManager + +from .types import ListVMsResponse class VMProviderType(StrEnum): @@ -42,8 +44,13 @@ class BaseVMProvider(AsyncContextManager): pass @abc.abstractmethod - async def list_vms(self) -> List[Dict[str, Any]]: - """List all available VMs.""" + async def list_vms(self) -> ListVMsResponse: + """List all available VMs. + + Returns: + ListVMsResponse: A list of minimal VM objects as defined in + `computer.providers.types.MinimalVM`. + """ pass @abc.abstractmethod @@ -76,6 +83,20 @@ class BaseVMProvider(AsyncContextManager): """ pass + @abc.abstractmethod + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + """Restart a VM by name. + + Args: + name: Name of the VM to restart + storage: Optional storage path override. If provided, this will be used + instead of the provider's default storage path. + + Returns: + Dictionary with VM restart status and information + """ + pass + @abc.abstractmethod async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: """Update VM configuration. diff --git a/libs/python/computer/computer/providers/cloud/provider.py b/libs/python/computer/computer/providers/cloud/provider.py index a54e01c4..0e3dfd83 100644 --- a/libs/python/computer/computer/providers/cloud/provider.py +++ b/libs/python/computer/computer/providers/cloud/provider.py @@ -12,6 +12,7 @@ import logging from typing import Dict, List, Optional, Any from ..base import BaseVMProvider, VMProviderType +from ..types import ListVMsResponse, MinimalVM # Setup logging logger = logging.getLogger(__name__) @@ -106,7 +107,7 @@ class CloudProvider(BaseVMProvider): # Host does not resolve → not found return {"name": name, "status": "not_found", "hostname": hostname} - async def list_vms(self) -> List[Dict[str, Any]]: + async def list_vms(self) -> ListVMsResponse: url = f"{self.api_base}/v1/vms" headers = { "Authorization": f"Bearer {self.api_key}", @@ -122,7 +123,24 @@ class CloudProvider(BaseVMProvider): logger.error(f"Failed to parse list_vms JSON: {text}") return [] if isinstance(data, list): - return data + # Enrich with convenience URLs when possible. + enriched: List[Dict[str, Any]] = [] + for item in data: + vm = dict(item) if isinstance(item, dict) else {} + name = vm.get("name") + password = vm.get("password") + if isinstance(name, str) and name: + host = f"https://{name}.containers.cloud.trycua.com:8443" + # api_url: always set if missing + if not vm.get("api_url"): + vm["api_url"] = host + # vnc_url: only when password available + if not vm.get("vnc_url") and isinstance(password, str) and password: + vm[ + "vnc_url" + ] = f"https://{host}/vnc.html?autoconnect=true&password={password}" + enriched.append(vm) + return enriched # type: ignore[return-value] logger.warning("Unexpected response for list_vms; expected list") return [] elif resp.status == 401: diff --git a/libs/python/computer/computer/providers/cloud/types.py b/libs/python/computer/computer/providers/cloud/types.py new file mode 100644 index 00000000..a289701d --- /dev/null +++ b/libs/python/computer/computer/providers/cloud/types.py @@ -0,0 +1,57 @@ +"""Pydantic models for the CUA Cloud provider API. + +Documents the response shape for the Cloud list VMs endpoint. + +List VMs +- Method: GET +- Path: `/v1/vms` +- Description: Returns all VMs owned by the API key's user. +- Responses: + - 200: Array of minimal VM objects with fields `{ name, password, status }` + - 401: Unauthorized (missing/invalid API key) + +Example curl: + curl -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms" + +Response shape: +[ + { + "name": "s-windows-x4snp46ebf", + "password": "49b8daa3", + "status": "running" + } +] + +Status values: +- pending : VM deployment in progress +- running : VM is active and accessible +- stopped : VM is stopped but not terminated +- terminated: VM has been permanently destroyed +- failed : VM deployment or operation failed +""" +from __future__ import annotations + +from typing import Literal, Optional + +# Require pydantic for typed models in provider APIs +from pydantic import BaseModel + + +CloudVMStatus = Literal["pending", "running", "stopped", "terminated", "failed"] + + +class CloudVM(BaseModel): + """Minimal VM object returned by CUA Cloud list API. + + Additional optional fields (like URLs) may be filled by callers based on + their environment but are not guaranteed by the API. + """ + + name: str + password: str + status: CloudVMStatus + + # Optional, not guaranteed by the list API, but useful when known/derived + vnc_url: Optional[str] = None + api_url: Optional[str] = None diff --git a/libs/python/computer/computer/providers/types.py b/libs/python/computer/computer/providers/types.py new file mode 100644 index 00000000..48e9f1f2 --- /dev/null +++ b/libs/python/computer/computer/providers/types.py @@ -0,0 +1,35 @@ +"""Shared provider type definitions for VM metadata and responses. + +These base types describe the common shape of objects returned by provider +methods like `list_vms()`. +""" +from __future__ import annotations + +from typing import Literal, TypedDict, NotRequired + +# Core status values per product docs +VMStatus = Literal[ + "pending", # VM deployment in progress + "running", # VM is active and accessible + "stopped", # VM is stopped but not terminated + "terminated", # VM has been permanently destroyed + "failed", # VM deployment or operation failed +] + + +class MinimalVM(TypedDict): + """Minimal VM object shape returned by list calls. + + Providers may include additional fields. Optional fields below are + common extensions some providers expose or that callers may compute. + """ + name: str + status: VMStatus + # Not always included by all providers + password: NotRequired[str] + vnc_url: NotRequired[str] + api_url: NotRequired[str] + + +# Convenience alias for list_vms() responses +ListVMsResponse = list[MinimalVM] From 9595f5affae006901fbfbe6ad26b30c07ec2028d Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Thu, 9 Oct 2025 13:12:55 -0400 Subject: [PATCH 15/37] updated api examples --- examples/cloud_api_examples.py | 58 ++++++++++++------- .../computer/providers/cloud/provider.py | 25 ++------ .../computer/providers/cloud/types.py | 57 ------------------ 3 files changed, 42 insertions(+), 98 deletions(-) delete mode 100644 libs/python/computer/computer/providers/cloud/types.py diff --git a/examples/cloud_api_examples.py b/examples/cloud_api_examples.py index 47844a55..705dca8b 100644 --- a/examples/cloud_api_examples.py +++ b/examples/cloud_api_examples.py @@ -17,6 +17,8 @@ async def main() -> None: # List VMs provider = CloudProvider(api_key=api_key, verbose=True) async with provider: + + # List all VMs vms = await provider.list_vms() print(f"Found {len(vms)} VM(s)") for vm in vms: @@ -27,31 +29,43 @@ async def main() -> None: f"vnc_url: {vm.get('vnc_url')}\n", ) - # --- Additional operations (commented out) --- - # To stop a VM by name: - # api_key = os.getenv("CUA_API_KEY") - # provider = CloudProvider(api_key=api_key, verbose=True) - # async with provider: - # name = "your-vm-name-here" - # resp = await provider.stop_vm(name) - # print("stop_vm response:", resp) + # # --- Additional operations (commented out) --- + # # To stop a VM by name: + # name = "m-linux-96lcxd2c2k" + # resp = await provider.stop_vm(name) + # print( + # "stop_vm response:\n", + # f"name: {resp['name']}\n", + # f"status: {resp['status']}\n", # stopping + # ) - # To restart a VM by name: - # api_key = os.getenv("CUA_API_KEY") - # provider = CloudProvider(api_key=api_key, verbose=True) - # async with provider: - # name = "your-vm-name-here" - # resp = await provider.restart_vm(name) - # print("restart_vm response:", resp) + # # To start a VM by name: + # name = "m-linux-96lcxd2c2k" + # resp = await provider.run_vm(name) + # print( + # "run_vm response:\n", + # f"name: {resp['name']}\n", + # f"status: {resp['status']}\n", # starting + # ) - # To probe a VM's status via its public hostname (if you know the name): - # api_key = os.getenv("CUA_API_KEY") - # provider = CloudProvider(api_key=api_key, verbose=True) - # async with provider: - # name = "your-vm-name-here" - # info = await provider.get_vm(name) - # print("get_vm info:", info) + # # To restart a VM by name: + # name = "m-linux-96lcxd2c2k" + # resp = await provider.restart_vm(name) + # print( + # "restart_vm response:\n", + # f"name: {resp['name']}\n", + # f"status: {resp['status']}\n", # restarting + # ) + # # To probe a VM's status via its public hostname (if you know the name): + name = "m-linux-96lcxd2c2k" + info = await provider.get_vm(name) + print("get_vm info:\n", + f"name: {info['name']}\n", + f"status: {info['status']}\n", # running + f"api_url: {info.get('api_url')}\n", + f"os_type: {info.get('os_type')}\n", + ) if __name__ == "__main__": asyncio.run(main()) diff --git a/libs/python/computer/computer/providers/cloud/provider.py b/libs/python/computer/computer/providers/cloud/provider.py index 0e3dfd83..5e4e7c51 100644 --- a/libs/python/computer/computer/providers/cloud/provider.py +++ b/libs/python/computer/computer/providers/cloud/provider.py @@ -78,8 +78,7 @@ class CloudProvider(BaseVMProvider): try: data = await resp.json(content_type=None) vm_status = str(data.get("status", "ok")) - if isinstance(data, dict) and "os_type" in data: - vm_os_type = str(data.get("os_type")) + vm_os_type = str(data.get("os_type")) except Exception: vm_status = "unknown" elif status_code < 500: @@ -89,23 +88,11 @@ class CloudProvider(BaseVMProvider): return { "name": name, "status": "running" if vm_status == "ok" else vm_status, - "hostname": hostname, + "api_url": f"https://{hostname}:8443", "os_type": vm_os_type, } except Exception: - # Fall back to a DNS resolve check - try: - loop = asyncio.get_event_loop() - await loop.getaddrinfo(hostname, 443) - # Host resolves, but HTTPS probe failed → treat as unknown - return { - "name": name, - "status": "unknown", - "hostname": hostname, - } - except Exception: - # Host does not resolve → not found - return {"name": name, "status": "not_found", "hostname": hostname} + return {"name": name, "status": "not_found", "api_url": f"https://{hostname}:8443"} async def list_vms(self) -> ListVMsResponse: url = f"{self.api_base}/v1/vms" @@ -130,10 +117,10 @@ class CloudProvider(BaseVMProvider): name = vm.get("name") password = vm.get("password") if isinstance(name, str) and name: - host = f"https://{name}.containers.cloud.trycua.com:8443" + host = f"{name}.containers.cloud.trycua.com" # api_url: always set if missing if not vm.get("api_url"): - vm["api_url"] = host + vm["api_url"] = f"https://{host}:8443" # vnc_url: only when password available if not vm.get("vnc_url") and isinstance(password, str) and password: vm[ @@ -151,7 +138,7 @@ class CloudProvider(BaseVMProvider): logger.error(f"list_vms failed: HTTP {resp.status} - {text}") return [] - async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: + async def run_vm(self, name: str, image: Optional[str] = None, run_opts: Optional[Dict[str, Any]] = None, storage: Optional[str] = None) -> Dict[str, Any]: """Start a VM via public API. Returns a minimal status.""" url = f"{self.api_base}/v1/vms/{name}/start" headers = { diff --git a/libs/python/computer/computer/providers/cloud/types.py b/libs/python/computer/computer/providers/cloud/types.py deleted file mode 100644 index a289701d..00000000 --- a/libs/python/computer/computer/providers/cloud/types.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Pydantic models for the CUA Cloud provider API. - -Documents the response shape for the Cloud list VMs endpoint. - -List VMs -- Method: GET -- Path: `/v1/vms` -- Description: Returns all VMs owned by the API key's user. -- Responses: - - 200: Array of minimal VM objects with fields `{ name, password, status }` - - 401: Unauthorized (missing/invalid API key) - -Example curl: - curl -H "Authorization: Bearer $CUA_API_KEY" \ - "https://api.cua.ai/v1/vms" - -Response shape: -[ - { - "name": "s-windows-x4snp46ebf", - "password": "49b8daa3", - "status": "running" - } -] - -Status values: -- pending : VM deployment in progress -- running : VM is active and accessible -- stopped : VM is stopped but not terminated -- terminated: VM has been permanently destroyed -- failed : VM deployment or operation failed -""" -from __future__ import annotations - -from typing import Literal, Optional - -# Require pydantic for typed models in provider APIs -from pydantic import BaseModel - - -CloudVMStatus = Literal["pending", "running", "stopped", "terminated", "failed"] - - -class CloudVM(BaseModel): - """Minimal VM object returned by CUA Cloud list API. - - Additional optional fields (like URLs) may be filled by callers based on - their environment but are not guaranteed by the API. - """ - - name: str - password: str - status: CloudVMStatus - - # Optional, not guaranteed by the list API, but useful when known/derived - vnc_url: Optional[str] = None - api_url: Optional[str] = None From 50d8521e3ee314a7f4fecf9f91de9dcf2f297e82 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Thu, 9 Oct 2025 13:27:24 -0400 Subject: [PATCH 16/37] added computer.start(), computer.restart() --- examples/cloud_api_examples.py | 1 - libs/python/computer/computer/computer.py | 98 +++++++++++++++++++ .../computer/computer/providers/types.py | 1 + 3 files changed, 99 insertions(+), 1 deletion(-) diff --git a/examples/cloud_api_examples.py b/examples/cloud_api_examples.py index 705dca8b..4fc71770 100644 --- a/examples/cloud_api_examples.py +++ b/examples/cloud_api_examples.py @@ -14,7 +14,6 @@ async def main() -> None: if api_base: print(f"Using API base: {api_base}") - # List VMs provider = CloudProvider(api_key=api_key, verbose=True) async with provider: diff --git a/libs/python/computer/computer/computer.py b/libs/python/computer/computer/computer.py index 6841f2f2..0d132ab0 100644 --- a/libs/python/computer/computer/computer.py +++ b/libs/python/computer/computer/computer.py @@ -1,3 +1,4 @@ +import traceback from typing import Optional, List, Literal, Dict, Any, Union, TYPE_CHECKING, cast import asyncio from .models import Computer as ComputerConfig, Display @@ -451,6 +452,7 @@ class Computer: raise RuntimeError(f"VM failed to become ready: {wait_error}") except Exception as e: self.logger.error(f"Failed to initialize computer: {e}") + self.logger.error(traceback.format_exc()) raise RuntimeError(f"Failed to initialize computer: {e}") try: @@ -558,6 +560,102 @@ class Computer: self.logger.debug(f"Computer stop process took {duration_ms:.2f}ms") return + async def start(self) -> None: + """Start the computer.""" + await self.run() + + async def restart(self) -> None: + """Restart the computer. + + If using a VM provider that supports restart, this will issue a restart + without tearing down the provider context, then reconnect the interface. + Falls back to stop()+run() when a provider restart is not available. + """ + # Host computer server: just disconnect and run again + if self.use_host_computer_server: + try: + await self.disconnect() + finally: + await self.run() + return + + # If no VM provider context yet, fall back to full run + if not getattr(self, "_provider_context", None) or self.config.vm_provider is None: + self.logger.info("No provider context active; performing full restart via run()") + await self.run() + return + + # Gracefully close current interface connection if present + if self._interface: + try: + self._interface.close() + except Exception as e: + self.logger.debug(f"Error closing interface prior to restart: {e}") + + # Attempt provider-level restart if implemented + try: + storage_param = "ephemeral" if self.ephemeral else self.storage + if hasattr(self.config.vm_provider, "restart_vm"): + self.logger.info(f"Restarting VM {self.config.name} via provider...") + await self.config.vm_provider.restart_vm(name=self.config.name, storage=storage_param) + else: + # Fallback: stop then start without leaving provider context + self.logger.info(f"Provider has no restart_vm; performing stop+start for {self.config.name}...") + await self.config.vm_provider.stop_vm(name=self.config.name, storage=storage_param) + await self.config.vm_provider.run_vm(image=self.image, name=self.config.name, run_opts={}, storage=storage_param) + except Exception as e: + self.logger.error(f"Failed to restart VM via provider: {e}") + # As a last resort, do a full stop (with provider context exit) and run + try: + await self.stop() + finally: + await self.run() + return + + # Wait for VM to be ready and reconnect interface + try: + self.logger.info("Waiting for VM to be ready after restart...") + if self.provider_type == VMProviderType.LUMIER: + max_retries = 60 + retry_delay = 3 + else: + max_retries = 30 + retry_delay = 2 + ip_address = await self.get_ip(max_retries=max_retries, retry_delay=retry_delay) + + self.logger.info(f"Re-initializing interface for {self.os_type} at {ip_address}") + from .interface.base import BaseComputerInterface + + if self.provider_type == VMProviderType.CLOUD and self.api_key and self.config.name: + self._interface = cast( + BaseComputerInterface, + InterfaceFactory.create_interface_for_os( + os=self.os_type, + ip_address=ip_address, + api_key=self.api_key, + vm_name=self.config.name, + ), + ) + else: + self._interface = cast( + BaseComputerInterface, + InterfaceFactory.create_interface_for_os( + os=self.os_type, + ip_address=ip_address, + ), + ) + + self.logger.info("Connecting to WebSocket interface after restart...") + await self._interface.wait_for_ready(timeout=30) + self.logger.info("Computer reconnected and ready after restart") + except Exception as e: + self.logger.error(f"Failed to reconnect after restart: {e}") + # Try a full reset if reconnection failed + try: + await self.stop() + finally: + await self.run() + # @property async def get_ip(self, max_retries: int = 15, retry_delay: int = 3) -> str: """Get the IP address of the VM or localhost if using host computer server. diff --git a/libs/python/computer/computer/providers/types.py b/libs/python/computer/computer/providers/types.py index 48e9f1f2..2db44230 100644 --- a/libs/python/computer/computer/providers/types.py +++ b/libs/python/computer/computer/providers/types.py @@ -16,6 +16,7 @@ VMStatus = Literal[ "failed", # VM deployment or operation failed ] +OSType = Literal["macos", "linux", "windows"] class MinimalVM(TypedDict): """Minimal VM object shape returned by list calls. From 237f4ab477ed84568104fe229ab58a10848030ad Mon Sep 17 00:00:00 2001 From: r33drichards Date: Thu, 9 Oct 2025 21:37:28 -0700 Subject: [PATCH 17/37] create a github action to build and publish docker image (#453) This pull request introduces a new GitHub Actions workflow for building and publishing the CUA Ubuntu Docker container. The workflow automates image building and publishing for both pull requests and pushes to the main branch, supporting multiple platforms and leveraging Docker Hub for image distribution. Docker workflow automation: Added .github/workflows/docker-publish-kasm.yml to automate building and publishing the cua-ubuntu Docker image for PRs and the main branch, including support for multi-platform builds (linux/amd64, linux/arm64). Integrated Docker Buildx for multi-platform builds and caching to optimize build times and resource usage. Docker Hub integration: Configured workflow to authenticate with Docker Hub using a secret token and push images to the trycua organization. Automatically tags images based on the event type (commit SHA for PRs, latest for main branch) and provides image digest output for traceability. --- .github/workflows/docker-publish-kasm.yml | 149 ++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 .github/workflows/docker-publish-kasm.yml diff --git a/.github/workflows/docker-publish-kasm.yml b/.github/workflows/docker-publish-kasm.yml new file mode 100644 index 00000000..882d6869 --- /dev/null +++ b/.github/workflows/docker-publish-kasm.yml @@ -0,0 +1,149 @@ +name: Build and Publish CUA Ubuntu Container + +on: + push: + branches: + - main + tags: + - "docker-kasm-v*.*.*" + paths: + - "libs/kasm/**" + - ".github/workflows/docker-publish-kasm.yml" + pull_request: + paths: + - "libs/kasm/**" + - ".github/workflows/docker-publish-kasm.yml" + + +env: + IMAGE_NAME: cua-ubuntu + DOCKER_HUB_ORG: trycua + +jobs: + build-and-push: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + # todo unsupported base image + # - windows/amd64 + # - darwin/amd64 + # - darwin/arm64 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Prepare platform tag + id: platform + run: | + # Convert platform (e.g., linux/amd64) to a valid tag suffix (e.g., linux-amd64) + PLATFORM_TAG=$(echo "${{ matrix.platform }}" | sed 's/\//-/g') + echo "tag=${PLATFORM_TAG}" >> $GITHUB_OUTPUT + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: trycua + password: ${{ secrets.DOCKER_HUB_TOKEN }} + + - name: Extract metadata (PR) + if: github.event_name == 'pull_request' + id: meta-pr + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }} + tags: | + type=raw,value=${{ github.sha }} + + - name: Extract metadata (main branch) + if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + id: meta-main + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }} + tags: | + type=raw,value=latest + + - name: Extract metadata (semantic version tag) + if: startsWith(github.ref, 'refs/tags/docker-kasm-v') + id: meta-semver + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}},prefix=docker-kasm-v + type=semver,pattern={{major}}.{{minor}},prefix=docker-kasm-v + type=semver,pattern={{major}},prefix=docker-kasm-v + type=raw,value=latest + + - name: Build and push Docker image (PR) + if: github.event_name == 'pull_request' + uses: docker/build-push-action@v5 + with: + context: ./libs/kasm + file: ./libs/kasm/Dockerfile + push: true + tags: ${{ steps.meta-pr.outputs.tags }} + labels: ${{ steps.meta-pr.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:latest + cache-to: type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Build and push Docker image (main branch) + if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + uses: docker/build-push-action@v5 + with: + context: ./libs/kasm + file: ./libs/kasm/Dockerfile + push: true + tags: ${{ steps.meta-main.outputs.tags }} + labels: ${{ steps.meta-main.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:latest + cache-to: type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Build and push Docker image (semantic version tag) + if: startsWith(github.ref, 'refs/tags/docker-kasm-v') + uses: docker/build-push-action@v5 + with: + context: ./libs/kasm + file: ./libs/kasm/Dockerfile + push: true + tags: ${{ steps.meta-semver.outputs.tags }} + labels: ${{ steps.meta-semver.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:latest + cache-to: type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Image digest + if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/docker-kasm-v') + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Image pushed with digest ${{ steps.meta-pr.outputs.digest }}" + elif [[ "${{ github.ref }}" == refs/tags/docker-kasm-v* ]]; then + echo "Image pushed with digest ${{ steps.meta-semver.outputs.digest }}" + else + echo "Image pushed with digest ${{ steps.meta-main.outputs.digest }}" + fi + + - name: print image tags + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Image tags: ${{ steps.meta-pr.outputs.tags }}" + elif [[ "${{ github.ref }}" == refs/tags/docker-kasm-v* ]]; then + echo "Image tags: ${{ steps.meta-semver.outputs.tags }}" + else + echo "Image tags: ${{ steps.meta-main.outputs.tags }}" + fi From ef69c4431a1708e3300d5d739cbfa8f73c49896d Mon Sep 17 00:00:00 2001 From: f-trycua Date: Thu, 9 Oct 2025 23:41:51 -0700 Subject: [PATCH 18/37] Add vanilla Docker XFCE container for CUA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a lightweight alternative to Kasm-based container with minimal dependencies. Features vanilla Ubuntu 22.04 with XFCE, TigerVNC, noVNC, and computer-server pre-installed. Key features: - Vanilla XFCE desktop environment - TigerVNC server (port 5901) - noVNC web interface (port 6901) - computer-server WebSocket API (port 8000) - Python 3.11 with automation tools - Firefox with telemetry disabled - Supervisord for process management - Persistent storage support Benefits over Kasm: - Reduced dependencies (no KasmWeb infrastructure) - Smaller image size - Full control over all components - Easy customization - Independent maintenance Includes: - Comprehensive README and quickstart guide - Makefile for common operations - docker-compose.yml for orchestration - Example Python scripts - Startup scripts for all services 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- libs/docker-xfce/.dockerignore | 5 + libs/docker-xfce/.gitignore | 4 + libs/docker-xfce/Dockerfile | 122 +++++++++ libs/docker-xfce/LICENSE | 21 ++ libs/docker-xfce/Makefile | 127 ++++++++++ libs/docker-xfce/QUICKSTART.md | 166 ++++++++++++ libs/docker-xfce/README.md | 238 ++++++++++++++++++ libs/docker-xfce/docker-compose.yml | 44 ++++ libs/docker-xfce/example.py | 213 ++++++++++++++++ .../src/scripts/start-computer-server.sh | 13 + libs/docker-xfce/src/scripts/start-novnc.sh | 15 ++ libs/docker-xfce/src/scripts/start-vnc.sh | 22 ++ libs/docker-xfce/src/scripts/xstartup.sh | 21 ++ .../src/supervisor/supervisord.conf | 30 +++ 14 files changed, 1041 insertions(+) create mode 100644 libs/docker-xfce/.dockerignore create mode 100644 libs/docker-xfce/.gitignore create mode 100644 libs/docker-xfce/Dockerfile create mode 100644 libs/docker-xfce/LICENSE create mode 100644 libs/docker-xfce/Makefile create mode 100644 libs/docker-xfce/QUICKSTART.md create mode 100644 libs/docker-xfce/README.md create mode 100644 libs/docker-xfce/docker-compose.yml create mode 100644 libs/docker-xfce/example.py create mode 100644 libs/docker-xfce/src/scripts/start-computer-server.sh create mode 100644 libs/docker-xfce/src/scripts/start-novnc.sh create mode 100644 libs/docker-xfce/src/scripts/start-vnc.sh create mode 100644 libs/docker-xfce/src/scripts/xstartup.sh create mode 100644 libs/docker-xfce/src/supervisor/supervisord.conf diff --git a/libs/docker-xfce/.dockerignore b/libs/docker-xfce/.dockerignore new file mode 100644 index 00000000..d4352f88 --- /dev/null +++ b/libs/docker-xfce/.dockerignore @@ -0,0 +1,5 @@ +README.md +.git +.gitignore +*.md +LICENSE diff --git a/libs/docker-xfce/.gitignore b/libs/docker-xfce/.gitignore new file mode 100644 index 00000000..0a2449a1 --- /dev/null +++ b/libs/docker-xfce/.gitignore @@ -0,0 +1,4 @@ +storage/ +shared/ +*.log +.DS_Store diff --git a/libs/docker-xfce/Dockerfile b/libs/docker-xfce/Dockerfile new file mode 100644 index 00000000..94326ae2 --- /dev/null +++ b/libs/docker-xfce/Dockerfile @@ -0,0 +1,122 @@ +# CUA Docker XFCE Container +# Vanilla XFCE desktop with noVNC and computer-server + +FROM ubuntu:22.04 + +# Avoid prompts from apt +ENV DEBIAN_FRONTEND=noninteractive + +# Set environment variables +ENV HOME=/home/cua +ENV DISPLAY=:1 +ENV VNC_PORT=5901 +ENV NOVNC_PORT=6901 +ENV API_PORT=8000 +ENV VNC_RESOLUTION=1920x1080 +ENV VNC_COL_DEPTH=24 + +# Create user +RUN useradd -m -s /bin/bash -G sudo cua && \ + echo "cua:password" | chpasswd && \ + echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + # Desktop environment + xfce4 \ + xfce4-terminal \ + xfce4-goodies \ + dbus-x11 \ + # VNC server + tigervnc-standalone-server \ + tigervnc-common \ + # noVNC dependencies + python3 \ + python3-pip \ + python3-numpy \ + git \ + net-tools \ + supervisor \ + # Computer-server dependencies + python3.11 \ + python3-tk \ + python3-dev \ + gnome-screenshot \ + wmctrl \ + ffmpeg \ + socat \ + xclip \ + # Browser + wget \ + software-properties-common \ + # Build tools + build-essential \ + libncursesw5-dev \ + libssl-dev \ + libsqlite3-dev \ + tk-dev \ + libgdbm-dev \ + libc6-dev \ + libbz2-dev \ + libffi-dev \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install Python 3.11 +RUN add-apt-repository ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y python3.11 python3.11-distutils && \ + rm -rf /var/lib/apt/lists/* + +# Install noVNC +RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \ + git clone https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \ + ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html + +# Install computer-server +RUN pip3 install cua-computer-server + +# Install Firefox +RUN add-apt-repository -y ppa:mozillateam/ppa && \ + echo 'Package: *\nPin: release o=LP-PPA-mozillateam\nPin-Priority: 1001' > /etc/apt/preferences.d/mozilla-firefox && \ + apt-get update && \ + apt-get install -y firefox && \ + rm -rf /var/lib/apt/lists/* + +# Configure Firefox defaults +RUN mkdir -p /etc/firefox/syspref.js && \ + echo 'pref("datareporting.policy.firstRunURL", "");' >> /etc/firefox/syspref.js && \ + echo 'pref("datareporting.policy.dataSubmissionEnabled", false);' >> /etc/firefox/syspref.js && \ + echo 'pref("datareporting.healthreport.service.enabled", false);' >> /etc/firefox/syspref.js && \ + echo 'pref("datareporting.healthreport.uploadEnabled", false);' >> /etc/firefox/syspref.js && \ + echo 'pref("browser.aboutwelcome.enabled", false);' >> /etc/firefox/syspref.js + +# Copy startup scripts +COPY src/supervisor/ /etc/supervisor/conf.d/ +COPY src/scripts/ /usr/local/bin/ + +# Make scripts executable +RUN chmod +x /usr/local/bin/*.sh + +# Setup VNC +USER cua +WORKDIR /home/cua + +# Create VNC password file +RUN mkdir -p $HOME/.vnc && \ + echo "password" | vncpasswd -f > $HOME/.vnc/passwd && \ + chmod 600 $HOME/.vnc/passwd + +# Configure XFCE for first start +RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml + +# Create storage and shared directories +RUN mkdir -p $HOME/storage $HOME/shared + +USER root + +# Expose ports +EXPOSE $VNC_PORT $NOVNC_PORT $API_PORT + +# Start services via supervisor +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] diff --git a/libs/docker-xfce/LICENSE b/libs/docker-xfce/LICENSE new file mode 100644 index 00000000..6899a9db --- /dev/null +++ b/libs/docker-xfce/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 CUA + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libs/docker-xfce/Makefile b/libs/docker-xfce/Makefile new file mode 100644 index 00000000..e3aa2879 --- /dev/null +++ b/libs/docker-xfce/Makefile @@ -0,0 +1,127 @@ +.PHONY: build run stop push clean test logs shell + +IMAGE_NAME := trycua/cua-docker-xfce +TAG := latest +CONTAINER_NAME := cua-docker-xfce-test + +# Build the Docker image +build: + docker build -t $(IMAGE_NAME):$(TAG) . + +# Run the container +run: + docker run -d \ + --name $(CONTAINER_NAME) \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + $(IMAGE_NAME):$(TAG) + @echo "Container started!" + @echo "noVNC: http://localhost:6901" + @echo "VNC: localhost:5901 (password: password)" + @echo "API: http://localhost:8000" + +# Run with custom resolution +run-hd: + docker run -d \ + --name $(CONTAINER_NAME) \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + -e VNC_RESOLUTION=1280x720 \ + $(IMAGE_NAME):$(TAG) + +# Run with persistent storage +run-persist: + mkdir -p ./storage ./shared + docker run -d \ + --name $(CONTAINER_NAME) \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + -v $(PWD)/storage:/home/cua/storage \ + -v $(PWD)/shared:/home/cua/shared \ + $(IMAGE_NAME):$(TAG) + +# Stop and remove the container +stop: + docker stop $(CONTAINER_NAME) || true + docker rm $(CONTAINER_NAME) || true + +# Push to Docker Hub +push: + docker push $(IMAGE_NAME):$(TAG) + +# Clean up everything +clean: stop + docker rmi $(IMAGE_NAME):$(TAG) || true + rm -rf ./storage ./shared + +# Run tests +test: build run + @echo "Waiting for services to start..." + @sleep 10 + @echo "Testing noVNC..." + @curl -f http://localhost:6901 > /dev/null && echo "✓ noVNC is running" || echo "✗ noVNC failed" + @echo "Testing API..." + @curl -f http://localhost:8000 > /dev/null && echo "✓ API is running" || echo "✗ API failed" + @$(MAKE) stop + +# View logs +logs: + docker logs -f $(CONTAINER_NAME) + +# View supervisor logs +logs-supervisor: + docker exec $(CONTAINER_NAME) tail -f /var/log/supervisor/supervisord.log + +# View individual service logs +logs-vnc: + docker exec $(CONTAINER_NAME) tail -f /var/log/supervisor/vncserver.log + +logs-novnc: + docker exec $(CONTAINER_NAME) tail -f /var/log/supervisor/novnc.log + +logs-api: + docker exec $(CONTAINER_NAME) tail -f /var/log/supervisor/computer-server.log + +# Open a shell in the container +shell: + docker exec -it $(CONTAINER_NAME) /bin/bash + +# Check supervisor status +status: + docker exec $(CONTAINER_NAME) supervisorctl status + +# Restart services +restart-services: + docker exec $(CONTAINER_NAME) supervisorctl restart all + +# Create a snapshot +snapshot: + docker commit $(CONTAINER_NAME) $(IMAGE_NAME):snapshot + @echo "Snapshot created: $(IMAGE_NAME):snapshot" + +# Build and run +dev: build run logs + +# Help +help: + @echo "Available targets:" + @echo " build - Build the Docker image" + @echo " run - Run the container" + @echo " run-hd - Run with 720p resolution" + @echo " run-persist - Run with persistent storage" + @echo " stop - Stop and remove container" + @echo " push - Push to Docker Hub" + @echo " clean - Remove image and container" + @echo " test - Build, run tests, and stop" + @echo " logs - View container logs" + @echo " logs-* - View specific service logs" + @echo " shell - Open shell in container" + @echo " status - Check supervisor status" + @echo " snapshot - Create container snapshot" + @echo " dev - Build, run, and follow logs" diff --git a/libs/docker-xfce/QUICKSTART.md b/libs/docker-xfce/QUICKSTART.md new file mode 100644 index 00000000..9cd8082b --- /dev/null +++ b/libs/docker-xfce/QUICKSTART.md @@ -0,0 +1,166 @@ +# Quick Start Guide + +Get up and running with CUA Docker XFCE in 5 minutes. + +## Prerequisites + +- Docker installed and running +- Python 3.11+ (for using with CUA library) +- `cua-computer` package installed: `pip install cua-computer` + +## Quick Start + +### Option 1: Using Makefile (Recommended) + +```bash +# Build and run +make build +make run + +# Check if it's running +make status + +# View logs +make logs +``` + +Access: +- 🌐 **Web VNC**: http://localhost:6901 +- 🖥️ **VNC Client**: localhost:5901 (password: `password`) +- 🔌 **API**: http://localhost:8000 + +### Option 2: Using Docker Compose + +```bash +# Start the container +docker-compose up -d + +# View logs +docker-compose logs -f + +# Stop the container +docker-compose down +``` + +### Option 3: Docker Command + +```bash +docker run -d \ + --name cua-desktop \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + trycua/cua-docker-xfce:latest +``` + +## Using with Python + +```python +import asyncio +from computer import Computer + +async def main(): + computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest" + ) + + async with computer: + # Take a screenshot + screenshot = await computer.interface.screenshot() + + # Open terminal + await computer.interface.hotkey("ctrl", "alt", "t") + await asyncio.sleep(1) + + # Type and execute command + await computer.interface.type_text("echo 'Hello!'") + await computer.interface.press_key("Return") + +asyncio.run(main()) +``` + +## Common Tasks + +### Run with custom resolution +```bash +make run-hd # 1280x720 +# or +docker run -e VNC_RESOLUTION=1280x720 ... +``` + +### Run with persistent storage +```bash +make run-persist +# or +docker run -v $(pwd)/storage:/home/cua/storage ... +``` + +### View specific logs +```bash +make logs-vnc # VNC server logs +make logs-novnc # noVNC proxy logs +make logs-api # Computer-server logs +``` + +### Open shell in container +```bash +make shell +# or +docker exec -it cua-desktop /bin/bash +``` + +### Create a snapshot +```bash +make snapshot +``` + +## Troubleshooting + +### Container won't start +```bash +# Check if ports are already in use +lsof -i :6901 +lsof -i :8000 + +# View container logs +docker logs cua-desktop +``` + +### Black screen in noVNC +```bash +# Restart VNC server +docker exec cua-desktop supervisorctl restart vncserver +``` + +### API not responding +```bash +# Check if computer-server is running +docker exec cua-desktop supervisorctl status computer-server + +# Restart computer-server +docker exec cua-desktop supervisorctl restart computer-server +``` + +## Next Steps + +- Read the [full README](README.md) for detailed documentation +- Check out [example.py](example.py) for more usage examples +- Customize the [Dockerfile](Dockerfile) for your needs + +## Clean Up + +```bash +# Using Makefile +make clean + +# Using docker-compose +docker-compose down -v + +# Manual +docker stop cua-desktop +docker rm cua-desktop +docker rmi trycua/cua-docker-xfce:latest +``` diff --git a/libs/docker-xfce/README.md b/libs/docker-xfce/README.md new file mode 100644 index 00000000..489d0042 --- /dev/null +++ b/libs/docker-xfce/README.md @@ -0,0 +1,238 @@ +# CUA Docker XFCE Container + +Vanilla XFCE desktop container for Computer-Using Agents (CUA) with noVNC and computer-server. This is a lightweight alternative to the Kasm-based container with minimal dependencies. + +## Features + +- Ubuntu 22.04 (Jammy) with vanilla XFCE desktop environment +- TigerVNC server for remote desktop access +- noVNC for web-based VNC access (no client required) +- Pre-installed computer-server for remote computer control +- Python 3.11 with necessary libraries +- Screen capture tools (gnome-screenshot, wmctrl, ffmpeg) +- Clipboard utilities (xclip, socat) +- Firefox browser with telemetry disabled + +## Architecture + +``` +┌─────────────────────────────────────────┐ +│ Docker Container (Ubuntu 22.04) │ +├─────────────────────────────────────────┤ +│ XFCE Desktop Environment │ +│ ├── Firefox │ +│ ├── XFCE Terminal │ +│ └── Desktop utilities │ +├─────────────────────────────────────────┤ +│ TigerVNC Server (Port 5901) │ +│ └── X11 Display :1 │ +├─────────────────────────────────────────┤ +│ noVNC Web Interface (Port 6901) │ +│ └── WebSocket proxy to VNC │ +├─────────────────────────────────────────┤ +│ CUA Computer Server (Port 8000) │ +│ └── WebSocket API for automation │ +└─────────────────────────────────────────┘ +``` + +## Building the Container + +```bash +docker build -t cua-docker-xfce:latest . +``` + +## Pushing to Registry + +```bash +# Tag for Docker Hub (replace 'trycua' with your Docker Hub username) +docker tag cua-docker-xfce:latest trycua/cua-docker-xfce:latest + +# Login to Docker Hub +docker login + +# Push to Docker Hub +docker push trycua/cua-docker-xfce:latest +``` + +## Running the Container Manually + +### Basic Usage + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + cua-docker-xfce:latest +``` + +### With Custom Resolution + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + -e VNC_RESOLUTION=1280x720 \ + cua-docker-xfce:latest +``` + +### With Persistent Storage + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + -v $(pwd)/storage:/home/cua/storage \ + cua-docker-xfce:latest +``` + +## Accessing the Container + +- **noVNC Web Interface**: Open `http://localhost:6901` in your browser +- **VNC Client**: Connect to `localhost:5901` (password: `password`) +- **Computer Server API**: Available at `http://localhost:8000` + +## Using with CUA Docker Provider + +This container is designed to work with the CUA Docker provider: + +```python +from computer import Computer + +# Create computer with docker-xfce container +computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest", + display="1920x1080", + memory="4GB", + cpu="2" +) + +# Use the computer +async with computer: + # Take a screenshot + screenshot = await computer.interface.screenshot() + + # Click and type + await computer.interface.left_click(100, 100) + await computer.interface.type_text("Hello from CUA!") + + # Run commands + result = await computer.interface.run_command("ls -la") + print(result.stdout) +``` + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `VNC_RESOLUTION` | `1920x1080` | Screen resolution | +| `VNC_COL_DEPTH` | `24` | Color depth | +| `VNC_PORT` | `5901` | VNC server port | +| `NOVNC_PORT` | `6901` | noVNC web interface port | +| `API_PORT` | `8000` | Computer-server API port | +| `DISPLAY` | `:1` | X11 display number | + +## Exposed Ports + +- **5901**: TigerVNC server +- **6901**: noVNC web interface +- **8000**: Computer-server WebSocket API + +## Volume Mount Points + +- `/home/cua/storage`: Persistent storage mount point +- `/home/cua/shared`: Shared folder mount point + +## User Credentials + +- **Username**: `cua` +- **Password**: `password` +- **Sudo access**: Enabled without password + +## Creating Snapshots + +### Filesystem Snapshot +```bash +docker commit cua-docker-xfce-snapshot:latest +``` + +### Running from Snapshot +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 6901:6901 \ + -p 8000:8000 \ + cua-docker-xfce-snapshot:latest +``` + +## Comparison with Kasm Container + +| Feature | Kasm Container | Docker XFCE Container | +|---------|---------------|----------------------| +| Base Image | KasmWeb Ubuntu | Vanilla Ubuntu | +| VNC Server | KasmVNC | TigerVNC | +| Dependencies | Higher | Lower | +| Configuration | Pre-configured | Minimal | +| Size | Larger | Smaller | +| Maintenance | Depends on Kasm | Independent | + +## Process Management + +The container uses `supervisord` to manage three main processes: + +1. **VNC Server** (Priority 10): TigerVNC with XFCE desktop +2. **noVNC** (Priority 20): WebSocket proxy for browser access +3. **Computer Server** (Priority 30): CUA automation API + +All processes are automatically restarted on failure. + +## Troubleshooting + +### VNC server won't start +Check if X11 lock files exist: +```bash +docker exec rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1 +``` + +### noVNC shows black screen +Ensure VNC server is running: +```bash +docker exec supervisorctl status vncserver +``` + +### Computer-server not responding +Check if X server is accessible: +```bash +docker exec env DISPLAY=:1 xdpyinfo +``` + +### View logs +```bash +docker exec tail -f /var/log/supervisor/supervisord.log +docker exec supervisorctl status +``` + +## Integration with CUA System + +This container provides the same functionality as the Kasm container but with: +- **Reduced dependencies**: No reliance on KasmWeb infrastructure +- **Smaller image size**: Minimal base configuration +- **Full control**: Direct access to all components +- **Easy customization**: Simple to modify and extend + +The container integrates seamlessly with: +- CUA Computer library (via WebSocket API) +- Docker provider for lifecycle management +- Standard VNC clients for debugging +- Web browsers for visual monitoring + +## License + +MIT License - See LICENSE file for details diff --git a/libs/docker-xfce/docker-compose.yml b/libs/docker-xfce/docker-compose.yml new file mode 100644 index 00000000..bdc1ba2d --- /dev/null +++ b/libs/docker-xfce/docker-compose.yml @@ -0,0 +1,44 @@ +version: '3.8' + +services: + cua-desktop: + build: . + image: trycua/cua-docker-xfce:latest + container_name: cua-docker-xfce + shm_size: '512m' + ports: + - "5901:5901" # VNC + - "6901:6901" # noVNC + - "8000:8000" # Computer API + environment: + - VNC_RESOLUTION=1920x1080 + - VNC_COL_DEPTH=24 + - VNC_PORT=5901 + - NOVNC_PORT=6901 + - API_PORT=8000 + volumes: + - ./storage:/home/cua/storage + - ./shared:/home/cua/shared + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + # Optional: Multiple instances for parallel testing + cua-desktop-2: + build: . + image: trycua/cua-docker-xfce:latest + container_name: cua-docker-xfce-2 + shm_size: '512m' + ports: + - "5902:5901" + - "6902:6901" + - "8001:8000" + environment: + - VNC_RESOLUTION=1280x720 + restart: unless-stopped + profiles: + - multi diff --git a/libs/docker-xfce/example.py b/libs/docker-xfce/example.py new file mode 100644 index 00000000..6c42bbc2 --- /dev/null +++ b/libs/docker-xfce/example.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +""" +Example script demonstrating how to use the CUA Docker XFCE container +with the Computer library. +""" + +import asyncio +from computer import Computer + + +async def basic_example(): + """Basic example: Take a screenshot and click around""" + print("=== Basic Example ===") + + computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest", + display="1920x1080", + memory="4GB", + cpu="2", + port=8000, + noVNC_port=6901 + ) + + async with computer: + print("Computer is ready!") + print(f"noVNC available at: http://localhost:6901") + + # Get screen info + screen = await computer.interface.get_screen_size() + print(f"Screen size: {screen['width']}x{screen['height']}") + + # Take a screenshot + screenshot = await computer.interface.screenshot() + with open("screenshot.png", "wb") as f: + f.write(screenshot) + print("Screenshot saved to screenshot.png") + + # Click and type + await computer.interface.left_click(100, 100) + await computer.interface.type_text("Hello from CUA!") + + print("Done!") + + +async def file_operations_example(): + """Example: File system operations""" + print("\n=== File Operations Example ===") + + computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest" + ) + + async with computer: + # Create a file + await computer.interface.write_text( + "/home/cua/test.txt", + "Hello from CUA!" + ) + print("Created test.txt") + + # Read it back + content = await computer.interface.read_text("/home/cua/test.txt") + print(f"File content: {content}") + + # List directory + files = await computer.interface.list_dir("/home/cua") + print(f"Files in home directory: {files}") + + +async def command_execution_example(): + """Example: Running shell commands""" + print("\n=== Command Execution Example ===") + + computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest" + ) + + async with computer: + # Run a command + result = await computer.interface.run_command("uname -a") + print(f"System info:\n{result.stdout}") + + # Check Firefox is installed + result = await computer.interface.run_command("which firefox") + print(f"Firefox location: {result.stdout.strip()}") + + # Get Python version + result = await computer.interface.run_command("python3 --version") + print(f"Python version: {result.stdout.strip()}") + + +async def browser_automation_example(): + """Example: Opening Firefox and navigating""" + print("\n=== Browser Automation Example ===") + + computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest" + ) + + async with computer: + # Open Firefox + await computer.interface.run_command("firefox https://example.com &") + print("Firefox opened") + + # Wait for it to load + await asyncio.sleep(5) + + # Take a screenshot + screenshot = await computer.interface.screenshot() + with open("browser_screenshot.png", "wb") as f: + f.write(screenshot) + print("Browser screenshot saved") + + +async def persistent_storage_example(): + """Example: Using persistent storage""" + print("\n=== Persistent Storage Example ===") + + computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest", + shared_directories=["./storage"] + ) + + async with computer: + # Write to persistent storage + await computer.interface.write_text( + "/home/cua/storage/persistent.txt", + "This file persists across container restarts!" + ) + print("Written to persistent storage") + + # Read it back + content = await computer.interface.read_text( + "/home/cua/storage/persistent.txt" + ) + print(f"Content: {content}") + + +async def multi_action_example(): + """Example: Complex interaction sequence""" + print("\n=== Multi-Action Example ===") + + computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest" + ) + + async with computer: + # Open terminal + await computer.interface.hotkey("ctrl", "alt", "t") + await asyncio.sleep(2) + + # Type a command + await computer.interface.type_text("echo 'Hello from CUA!'") + await computer.interface.press_key("Return") + await asyncio.sleep(1) + + # Take screenshot + screenshot = await computer.interface.screenshot() + with open("terminal_screenshot.png", "wb") as f: + f.write(screenshot) + print("Terminal screenshot saved") + + +async def main(): + """Run all examples""" + examples = [ + ("Basic", basic_example), + ("File Operations", file_operations_example), + ("Command Execution", command_execution_example), + ("Browser Automation", browser_automation_example), + ("Persistent Storage", persistent_storage_example), + ("Multi-Action", multi_action_example), + ] + + print("Available examples:") + for i, (name, _) in enumerate(examples, 1): + print(f"{i}. {name}") + print(f"{len(examples) + 1}. Run all") + + choice = input("\nSelect an example (1-7): ").strip() + + try: + if choice == str(len(examples) + 1): + # Run all examples + for name, func in examples: + try: + await func() + except Exception as e: + print(f"Error in {name}: {e}") + else: + idx = int(choice) - 1 + if 0 <= idx < len(examples): + await examples[idx][1]() + else: + print("Invalid choice") + except ValueError: + print("Invalid input") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/libs/docker-xfce/src/scripts/start-computer-server.sh b/libs/docker-xfce/src/scripts/start-computer-server.sh new file mode 100644 index 00000000..bc27a3db --- /dev/null +++ b/libs/docker-xfce/src/scripts/start-computer-server.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -e + +# Wait for X server to be ready +echo "Waiting for X server to start..." +while ! xdpyinfo -display :1 >/dev/null 2>&1; do + sleep 1 +done +echo "X server is ready" + +# Start computer-server +export DISPLAY=:1 +python3 -m computer_server --port ${API_PORT:-8000} diff --git a/libs/docker-xfce/src/scripts/start-novnc.sh b/libs/docker-xfce/src/scripts/start-novnc.sh new file mode 100644 index 00000000..4f95c644 --- /dev/null +++ b/libs/docker-xfce/src/scripts/start-novnc.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +# Wait for VNC server to be ready +echo "Waiting for VNC server to start..." +while ! nc -z localhost ${VNC_PORT:-5901}; do + sleep 1 +done +echo "VNC server is ready" + +# Start noVNC +cd /opt/noVNC +/opt/noVNC/utils/novnc_proxy \ + --vnc localhost:${VNC_PORT:-5901} \ + --listen ${NOVNC_PORT:-6901} diff --git a/libs/docker-xfce/src/scripts/start-vnc.sh b/libs/docker-xfce/src/scripts/start-vnc.sh new file mode 100644 index 00000000..1cbba98f --- /dev/null +++ b/libs/docker-xfce/src/scripts/start-vnc.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -e + +# Clean up any existing VNC lock files +rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1 + +# Start VNC server +vncserver :1 \ + -geometry ${VNC_RESOLUTION:-1920x1080} \ + -depth ${VNC_COL_DEPTH:-24} \ + -rfbport ${VNC_PORT:-5901} \ + -localhost no \ + -SecurityTypes None \ + -AlwaysShared \ + -AcceptPointerEvents \ + -AcceptKeyEvents \ + -AcceptCutText \ + -SendCutText \ + -xstartup /usr/local/bin/xstartup.sh + +# Keep the process running +tail -f /home/cua/.vnc/*.log diff --git a/libs/docker-xfce/src/scripts/xstartup.sh b/libs/docker-xfce/src/scripts/xstartup.sh new file mode 100644 index 00000000..49bb46a2 --- /dev/null +++ b/libs/docker-xfce/src/scripts/xstartup.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +# Start D-Bus +if [ -z "$DBUS_SESSION_BUS_ADDRESS" ]; then + eval $(dbus-launch --sh-syntax --exit-with-session) +fi + +# Start XFCE +startxfce4 & + +# Wait for XFCE to start +sleep 2 + +# Disable screensaver and power management +xset s off +xset -dpms +xset s noblank + +# Wait for the session +wait diff --git a/libs/docker-xfce/src/supervisor/supervisord.conf b/libs/docker-xfce/src/supervisor/supervisord.conf new file mode 100644 index 00000000..fb367c4f --- /dev/null +++ b/libs/docker-xfce/src/supervisor/supervisord.conf @@ -0,0 +1,30 @@ +[supervisord] +nodaemon=true +user=root +logfile=/var/log/supervisor/supervisord.log +pidfile=/var/run/supervisord.pid +childlogdir=/var/log/supervisor + +[program:vncserver] +command=/usr/local/bin/start-vnc.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/vncserver.log +stderr_logfile=/var/log/supervisor/vncserver.error.log +priority=10 + +[program:novnc] +command=/usr/local/bin/start-novnc.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/novnc.log +stderr_logfile=/var/log/supervisor/novnc.error.log +priority=20 + +[program:computer-server] +command=/usr/local/bin/start-computer-server.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/computer-server.log +stderr_logfile=/var/log/supervisor/computer-server.error.log +priority=30 From f7bedbb8a0cbf1712433a0b9dca9211cf02f3cf9 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra <88108002+VinciGit00@users.noreply.github.com> Date: Fri, 10 Oct 2025 18:20:22 +0200 Subject: [PATCH 19/37] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e5451a0f..ce2726b6 100644 --- a/README.md +++ b/README.md @@ -192,7 +192,7 @@ Join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss i ## License -Cua is open-sourced under the MIT License - see the [LICENSE](LICENSE) file for details. +Cua is open-sourced under the MIT License - see the [LICENSE](LICENSE.md) file for details. Portions of this project, specifically components adapted from Kasm Technologies Inc., are also licensed under the MIT License. See [libs/kasm/LICENSE](libs/kasm/LICENSE) for details. From 05c3356f1919f108cb0ef14e489925db7be0c335 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Fri, 10 Oct 2025 12:41:09 -0700 Subject: [PATCH 20/37] Fix Docker XFCE container build and VNC startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed two critical issues: 1. Firefox syspref.js was being created as directory instead of file - Changed mkdir to create parent directory only - Use > for first echo to create file 2. TigerVNC refusing to start without authentication - Changed SecurityTypes from None to VncAuth - Added --I-KNOW-THIS-IS-INSECURE flag for development Container now successfully: - Builds on ARM64 Mac - Starts all services (VNC, noVNC, computer-server) - Accessible on ports 5901 (VNC), 6901 (noVNC), 8000 (API) Also added Dockerfile.slim as lighter alternative. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- libs/docker-xfce/Dockerfile | 11 +---- libs/docker-xfce/Dockerfile.slim | 53 +++++++++++++++++++++++ libs/docker-xfce/src/scripts/start-vnc.sh | 5 ++- 3 files changed, 58 insertions(+), 11 deletions(-) create mode 100644 libs/docker-xfce/Dockerfile.slim diff --git a/libs/docker-xfce/Dockerfile b/libs/docker-xfce/Dockerfile index 94326ae2..d5459f31 100644 --- a/libs/docker-xfce/Dockerfile +++ b/libs/docker-xfce/Dockerfile @@ -38,7 +38,6 @@ RUN apt-get update && apt-get install -y \ net-tools \ supervisor \ # Computer-server dependencies - python3.11 \ python3-tk \ python3-dev \ gnome-screenshot \ @@ -62,12 +61,6 @@ RUN apt-get update && apt-get install -y \ zlib1g-dev \ && rm -rf /var/lib/apt/lists/* -# Install Python 3.11 -RUN add-apt-repository ppa:deadsnakes/ppa && \ - apt-get update && \ - apt-get install -y python3.11 python3.11-distutils && \ - rm -rf /var/lib/apt/lists/* - # Install noVNC RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \ git clone https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \ @@ -84,8 +77,8 @@ RUN add-apt-repository -y ppa:mozillateam/ppa && \ rm -rf /var/lib/apt/lists/* # Configure Firefox defaults -RUN mkdir -p /etc/firefox/syspref.js && \ - echo 'pref("datareporting.policy.firstRunURL", "");' >> /etc/firefox/syspref.js && \ +RUN mkdir -p /etc/firefox && \ + echo 'pref("datareporting.policy.firstRunURL", "");' > /etc/firefox/syspref.js && \ echo 'pref("datareporting.policy.dataSubmissionEnabled", false);' >> /etc/firefox/syspref.js && \ echo 'pref("datareporting.healthreport.service.enabled", false);' >> /etc/firefox/syspref.js && \ echo 'pref("datareporting.healthreport.uploadEnabled", false);' >> /etc/firefox/syspref.js && \ diff --git a/libs/docker-xfce/Dockerfile.slim b/libs/docker-xfce/Dockerfile.slim new file mode 100644 index 00000000..51c2233d --- /dev/null +++ b/libs/docker-xfce/Dockerfile.slim @@ -0,0 +1,53 @@ +# CUA Docker XFCE Container - Slim Version +# Uses existing VNC base to reduce build time + +FROM dorowu/ubuntu-desktop-lxde-vnc:focal + +# Switch to root +USER root + +# Set environment variables +ENV HOME=/home/cua +ENV DISPLAY=:1 +ENV API_PORT=8000 + +# Create cua user +RUN useradd -m -s /bin/bash -G sudo cua && \ + echo "cua:password" | chpasswd && \ + echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + +# Install Python 3.11 and computer-server dependencies +RUN apt-get update && apt-get install -y \ + software-properties-common \ + gnome-screenshot \ + wmctrl \ + ffmpeg \ + socat \ + xclip \ + && add-apt-repository ppa:deadsnakes/ppa \ + && apt-get update \ + && apt-get install -y python3.11 python3-pip \ + && rm -rf /var/lib/apt/lists/* + +# Install computer-server +RUN pip3 install cua-computer-server + +# Copy startup scripts +COPY src/scripts/start-computer-server.sh /usr/local/bin/ +RUN chmod +x /usr/local/bin/start-computer-server.sh + +# Create storage directories +RUN mkdir -p /home/cua/storage /home/cua/shared && \ + chown -R cua:cua /home/cua + +# Expose ports (VNC on 6080, computer-server on 8000) +EXPOSE 6080 8000 + +# Create startup wrapper +RUN echo '#!/bin/bash\n\ +/startup.sh &\n\ +sleep 5\n\ +su - cua -c "/usr/local/bin/start-computer-server.sh"' > /entrypoint.sh && \ + chmod +x /entrypoint.sh + +CMD ["/entrypoint.sh"] diff --git a/libs/docker-xfce/src/scripts/start-vnc.sh b/libs/docker-xfce/src/scripts/start-vnc.sh index 1cbba98f..f77afef0 100644 --- a/libs/docker-xfce/src/scripts/start-vnc.sh +++ b/libs/docker-xfce/src/scripts/start-vnc.sh @@ -10,13 +10,14 @@ vncserver :1 \ -depth ${VNC_COL_DEPTH:-24} \ -rfbport ${VNC_PORT:-5901} \ -localhost no \ - -SecurityTypes None \ + -SecurityTypes VncAuth \ -AlwaysShared \ -AcceptPointerEvents \ -AcceptKeyEvents \ -AcceptCutText \ -SendCutText \ - -xstartup /usr/local/bin/xstartup.sh + -xstartup /usr/local/bin/xstartup.sh \ + --I-KNOW-THIS-IS-INSECURE # Keep the process running tail -f /home/cua/.vnc/*.log From e63d3e32277ffa48c1312f28d28d3da8bbf9517c Mon Sep 17 00:00:00 2001 From: James Murdza Date: Fri, 10 Oct 2025 13:48:35 -0700 Subject: [PATCH 21/37] Add Sponsors section to README --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index ce2726b6..df8ab727 100644 --- a/README.md +++ b/README.md @@ -223,3 +223,9 @@ This project is not affiliated with, endorsed by, or sponsored by Apple Inc., Ca Thank you to all our supporters! [![Stargazers over time](https://starchart.cc/trycua/cua.svg?variant=adaptive)](https://starchart.cc/trycua/cua) + +## Sponsors + +Thank you to all our [GitHub Sponsors](https://github.com/sponsors/trycua)! + +coderabbit-cli From 590c4a8753d497ea94899112d3c3fec60350c973 Mon Sep 17 00:00:00 2001 From: r33drichards Date: Fri, 10 Oct 2025 14:43:07 -0700 Subject: [PATCH 22/37] Add pyproject.toml version verification script and tests (#462) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add pyproject.toml version verification script and tests Adds get_pyproject_version.py script to verify that pyproject.toml versions match expected versions during git tag releases. Includes comprehensive pytest test suite with best practices. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * Revert "Add pyproject.toml version verification script and tests" This reverts commit 1d40e692ccf3d7b3ea9a8a44368769ab23001789. * Add pyproject.toml version verification script and tests Adds get_pyproject_version.py script to verify that pyproject.toml versions match expected versions during git tag releases. Includes comprehensive pytest test suite with best practices. Updates the GitHub Actions workflow to use the verification script, ensuring version consistency before publishing packages. Also removes the old version-setting step as pyproject.toml is now the source of truth for versions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * f * add test for validation script to gha --------- Co-authored-by: Your Name Co-authored-by: Claude --- .github/scripts/get_pyproject_version.py | 68 ++++ .github/scripts/tests/README.md | 131 +++++++ .github/scripts/tests/__init__.py | 1 + .../tests/test_get_pyproject_version.py | 340 ++++++++++++++++++ .github/workflows/pypi-reusable-publish.yml | 25 +- .github/workflows/test-validation-script.yml | 36 ++ libs/python/computer-server/pyproject.toml | 2 +- 7 files changed, 587 insertions(+), 16 deletions(-) create mode 100755 .github/scripts/get_pyproject_version.py create mode 100644 .github/scripts/tests/README.md create mode 100644 .github/scripts/tests/__init__.py create mode 100644 .github/scripts/tests/test_get_pyproject_version.py create mode 100644 .github/workflows/test-validation-script.yml diff --git a/.github/scripts/get_pyproject_version.py b/.github/scripts/get_pyproject_version.py new file mode 100755 index 00000000..a00ea22c --- /dev/null +++ b/.github/scripts/get_pyproject_version.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +""" +Verifies that the version in pyproject.toml matches the expected version. + +Usage: + python get_pyproject_version.py + +Exit codes: + 0 - Versions match + 1 - Versions don't match or error occurred +""" + +import sys +try: + import tomllib +except ImportError: + # Fallback for Python < 3.11 + import toml as tomllib + + +def main(): + if len(sys.argv) != 3: + print("Usage: python get_pyproject_version.py ", file=sys.stderr) + sys.exit(1) + + pyproject_path = sys.argv[1] + expected_version = sys.argv[2] + + # tomllib requires binary mode + try: + with open(pyproject_path, 'rb') as f: + data = tomllib.load(f) + except FileNotFoundError: + print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr) + sys.exit(1) + except Exception as e: + # Fallback to toml if using the old library or handle other errors + try: + import toml + data = toml.load(pyproject_path) + except FileNotFoundError: + print(f"❌ ERROR: File not found: {pyproject_path}", file=sys.stderr) + sys.exit(1) + except Exception as toml_err: + print(f"❌ ERROR: Failed to parse TOML file: {e}", file=sys.stderr) + sys.exit(1) + + actual_version = data.get('project', {}).get('version') + + if not actual_version: + print("❌ ERROR: No version found in pyproject.toml", file=sys.stderr) + sys.exit(1) + + if actual_version != expected_version: + print("❌ Version mismatch detected!", file=sys.stderr) + print(f" pyproject.toml version: {actual_version}", file=sys.stderr) + print(f" Expected version: {expected_version}", file=sys.stderr) + print("", file=sys.stderr) + print("The version in pyproject.toml must match the version being published.", file=sys.stderr) + print(f"Please update pyproject.toml to version {expected_version} or use the correct tag.", file=sys.stderr) + sys.exit(1) + + print(f"✅ Version consistency check passed: {actual_version}") + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/.github/scripts/tests/README.md b/.github/scripts/tests/README.md new file mode 100644 index 00000000..2a440065 --- /dev/null +++ b/.github/scripts/tests/README.md @@ -0,0 +1,131 @@ +# Tests for .github/scripts + +This directory contains comprehensive tests for the GitHub workflow scripts using Python's built-in testing framework. + +## Requirements + +**No external dependencies required!** + +This test suite uses: +- `unittest` - Python's built-in testing framework +- `tomllib` - Python 3.11+ built-in TOML parser + +For Python < 3.11, the `toml` package is used as a fallback. + +## Running Tests + +### Run all tests +```bash +cd .github/scripts/tests +python3 -m unittest discover -v +``` + +### Run a specific test file +```bash +python3 -m unittest test_get_pyproject_version -v +``` + +### Run a specific test class +```bash +python3 -m unittest test_get_pyproject_version.TestGetPyprojectVersion -v +``` + +### Run a specific test method +```bash +python3 -m unittest test_get_pyproject_version.TestGetPyprojectVersion.test_matching_versions -v +``` + +### Run tests directly from the test file +```bash +python3 test_get_pyproject_version.py +``` + +## Test Structure + +### test_get_pyproject_version.py + +Comprehensive tests for `get_pyproject_version.py` covering: + +- ✅ **Version matching**: Tests successful version validation +- ✅ **Version mismatch**: Tests error handling when versions don't match +- ✅ **Missing version**: Tests handling of pyproject.toml without version field +- ✅ **Missing project section**: Tests handling of pyproject.toml without project section +- ✅ **File not found**: Tests handling of non-existent files +- ✅ **Malformed TOML**: Tests handling of invalid TOML syntax +- ✅ **Argument validation**: Tests proper argument count validation +- ✅ **Semantic versioning**: Tests various semantic version formats +- ✅ **Pre-release tags**: Tests versions with alpha, beta, rc tags +- ✅ **Build metadata**: Tests versions with build metadata +- ✅ **Edge cases**: Tests empty versions and other edge cases + +**Total Tests**: 17+ test cases covering all functionality + +## Best Practices Implemented + +1. **Fixture Management**: Uses `setUp()` and `tearDown()` for clean test isolation +2. **Helper Methods**: Provides reusable helpers for creating test fixtures +3. **Temporary Files**: Uses `tempfile` for file creation with proper cleanup +4. **Comprehensive Coverage**: Tests happy paths, error conditions, and edge cases +5. **Clear Documentation**: Each test has a descriptive docstring +6. **Output Capture**: Uses `unittest.mock.patch` and `StringIO` to test stdout/stderr +7. **Exit Code Validation**: Properly tests script exit codes with `assertRaises(SystemExit)` +8. **Type Hints**: Uses type hints in helper methods for clarity +9. **PEP 8 Compliance**: Follows Python style guidelines +10. **Zero External Dependencies**: Uses only Python standard library + +## Continuous Integration + +These tests can be integrated into GitHub Actions workflows with no additional dependencies: + +```yaml +- name: Run .github scripts tests + run: | + cd .github/scripts/tests + python3 -m unittest discover -v +``` + +## Test Output Example + +``` +test_empty_version_string (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of empty version string. ... ok +test_file_not_found (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of non-existent pyproject.toml file. ... ok +test_malformed_toml (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of malformed TOML file. ... ok +test_matching_versions (test_get_pyproject_version.TestGetPyprojectVersion) +Test that matching versions result in success. ... ok +test_missing_project_section (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of pyproject.toml without a project section. ... ok +test_missing_version_field (test_get_pyproject_version.TestGetPyprojectVersion) +Test handling of pyproject.toml without a version field. ... ok +test_no_arguments (test_get_pyproject_version.TestGetPyprojectVersion) +Test that providing no arguments results in usage error. ... ok +test_semantic_version_0_0_1 (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version 0.0.1. ... ok +test_semantic_version_1_0_0 (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version 1.0.0. ... ok +test_semantic_version_10_20_30 (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version 10.20.30. ... ok +test_semantic_version_alpha (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version with alpha tag. ... ok +test_semantic_version_beta (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version with beta tag. ... ok +test_semantic_version_rc_with_build (test_get_pyproject_version.TestGetPyprojectVersion) +Test semantic version with rc and build metadata. ... ok +test_too_few_arguments (test_get_pyproject_version.TestGetPyprojectVersion) +Test that providing too few arguments results in usage error. ... ok +test_too_many_arguments (test_get_pyproject_version.TestGetPyprojectVersion) +Test that providing too many arguments results in usage error. ... ok +test_version_mismatch (test_get_pyproject_version.TestGetPyprojectVersion) +Test that mismatched versions result in failure with appropriate error message. ... ok +test_version_with_build_metadata (test_get_pyproject_version.TestGetPyprojectVersion) +Test matching versions with build metadata. ... ok +test_version_with_prerelease_tags (test_get_pyproject_version.TestGetPyprojectVersion) +Test matching versions with pre-release tags like alpha, beta, rc. ... ok + +---------------------------------------------------------------------- +Ran 18 tests in 0.XXXs + +OK +``` diff --git a/.github/scripts/tests/__init__.py b/.github/scripts/tests/__init__.py new file mode 100644 index 00000000..cbc9d370 --- /dev/null +++ b/.github/scripts/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for .github/scripts.""" diff --git a/.github/scripts/tests/test_get_pyproject_version.py b/.github/scripts/tests/test_get_pyproject_version.py new file mode 100644 index 00000000..95c980a9 --- /dev/null +++ b/.github/scripts/tests/test_get_pyproject_version.py @@ -0,0 +1,340 @@ +""" +Comprehensive tests for get_pyproject_version.py script using unittest. + +This test suite covers: +- Version matching validation +- Error handling for missing versions +- Invalid input handling +- File not found scenarios +- Malformed TOML handling +""" + +import sys +import unittest +import tempfile +from pathlib import Path +from io import StringIO +from unittest.mock import patch + +# Add parent directory to path to import the module +sys.path.insert(0, str(Path(__file__).parent.parent)) + +# Import after path is modified +import get_pyproject_version + + +class TestGetPyprojectVersion(unittest.TestCase): + """Test suite for get_pyproject_version.py functionality.""" + + def setUp(self): + """Reset sys.argv before each test.""" + self.original_argv = sys.argv.copy() + + def tearDown(self): + """Restore sys.argv after each test.""" + sys.argv = self.original_argv + + def create_pyproject_toml(self, version: str) -> Path: + """Helper to create a temporary pyproject.toml file with a given version.""" + temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.toml', delete=False) + temp_file.write(f""" +[project] +name = "test-project" +version = "{version}" +description = "A test project" +""") + temp_file.close() + return Path(temp_file.name) + + def create_pyproject_toml_no_version(self) -> Path: + """Helper to create a pyproject.toml without a version field.""" + temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.toml', delete=False) + temp_file.write(""" +[project] +name = "test-project" +description = "A test project without version" +""") + temp_file.close() + return Path(temp_file.name) + + def create_pyproject_toml_no_project(self) -> Path: + """Helper to create a pyproject.toml without a project section.""" + temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.toml', delete=False) + temp_file.write(""" +[tool.poetry] +name = "test-project" +version = "1.0.0" +""") + temp_file.close() + return Path(temp_file.name) + + def create_malformed_toml(self) -> Path: + """Helper to create a malformed TOML file.""" + temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.toml', delete=False) + temp_file.write(""" +[project +name = "test-project +version = "1.0.0" +""") + temp_file.close() + return Path(temp_file.name) + + # Test: Successful version match + def test_matching_versions(self): + """Test that matching versions result in success.""" + pyproject_file = self.create_pyproject_toml("1.2.3") + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.2.3'] + + # Capture stdout + captured_output = StringIO() + with patch('sys.stdout', captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn("✅ Version consistency check passed: 1.2.3", captured_output.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Version mismatch + def test_version_mismatch(self): + """Test that mismatched versions result in failure with appropriate error message.""" + pyproject_file = self.create_pyproject_toml("1.2.3") + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.2.4'] + + # Capture stderr + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + error_output = captured_error.getvalue() + self.assertIn("❌ Version mismatch detected!", error_output) + self.assertIn("pyproject.toml version: 1.2.3", error_output) + self.assertIn("Expected version: 1.2.4", error_output) + self.assertIn("Please update pyproject.toml to version 1.2.4", error_output) + finally: + pyproject_file.unlink() + + # Test: Missing version in pyproject.toml + def test_missing_version_field(self): + """Test handling of pyproject.toml without a version field.""" + pyproject_file = self.create_pyproject_toml_no_version() + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.0.0'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("❌ ERROR: No version found in pyproject.toml", captured_error.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Missing project section + def test_missing_project_section(self): + """Test handling of pyproject.toml without a project section.""" + pyproject_file = self.create_pyproject_toml_no_project() + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.0.0'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("❌ ERROR: No version found in pyproject.toml", captured_error.getvalue()) + finally: + pyproject_file.unlink() + + # Test: File not found + def test_file_not_found(self): + """Test handling of non-existent pyproject.toml file.""" + sys.argv = ['get_pyproject_version.py', '/nonexistent/pyproject.toml', '1.0.0'] + + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + + # Test: Malformed TOML + def test_malformed_toml(self): + """Test handling of malformed TOML file.""" + pyproject_file = self.create_malformed_toml() + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.0.0'] + + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + finally: + pyproject_file.unlink() + + # Test: Incorrect number of arguments - too few + def test_too_few_arguments(self): + """Test that providing too few arguments results in usage error.""" + sys.argv = ['get_pyproject_version.py', 'pyproject.toml'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("Usage: python get_pyproject_version.py ", + captured_error.getvalue()) + + # Test: Incorrect number of arguments - too many + def test_too_many_arguments(self): + """Test that providing too many arguments results in usage error.""" + sys.argv = ['get_pyproject_version.py', 'pyproject.toml', '1.0.0', 'extra'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("Usage: python get_pyproject_version.py ", + captured_error.getvalue()) + + # Test: No arguments + def test_no_arguments(self): + """Test that providing no arguments results in usage error.""" + sys.argv = ['get_pyproject_version.py'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + self.assertIn("Usage: python get_pyproject_version.py ", + captured_error.getvalue()) + + # Test: Version with pre-release tags + def test_version_with_prerelease_tags(self): + """Test matching versions with pre-release tags like alpha, beta, rc.""" + pyproject_file = self.create_pyproject_toml("1.2.3-rc.1") + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.2.3-rc.1'] + + captured_output = StringIO() + with patch('sys.stdout', captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn("✅ Version consistency check passed: 1.2.3-rc.1", captured_output.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Version with build metadata + def test_version_with_build_metadata(self): + """Test matching versions with build metadata.""" + pyproject_file = self.create_pyproject_toml("1.2.3+build.123") + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.2.3+build.123'] + + captured_output = StringIO() + with patch('sys.stdout', captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn("✅ Version consistency check passed: 1.2.3+build.123", captured_output.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Various semantic version formats + def test_semantic_version_0_0_1(self): + """Test semantic version 0.0.1.""" + self._test_version_format("0.0.1") + + def test_semantic_version_1_0_0(self): + """Test semantic version 1.0.0.""" + self._test_version_format("1.0.0") + + def test_semantic_version_10_20_30(self): + """Test semantic version 10.20.30.""" + self._test_version_format("10.20.30") + + def test_semantic_version_alpha(self): + """Test semantic version with alpha tag.""" + self._test_version_format("1.2.3-alpha") + + def test_semantic_version_beta(self): + """Test semantic version with beta tag.""" + self._test_version_format("1.2.3-beta.1") + + def test_semantic_version_rc_with_build(self): + """Test semantic version with rc and build metadata.""" + self._test_version_format("1.2.3-rc.1+build.456") + + def _test_version_format(self, version: str): + """Helper method to test various semantic version formats.""" + pyproject_file = self.create_pyproject_toml(version) + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), version] + + captured_output = StringIO() + with patch('sys.stdout', captured_output): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 0) + self.assertIn(f"✅ Version consistency check passed: {version}", captured_output.getvalue()) + finally: + pyproject_file.unlink() + + # Test: Empty version string + def test_empty_version_string(self): + """Test handling of empty version string.""" + pyproject_file = self.create_pyproject_toml("") + + try: + sys.argv = ['get_pyproject_version.py', str(pyproject_file), '1.0.0'] + + captured_error = StringIO() + with patch('sys.stderr', captured_error): + with self.assertRaises(SystemExit) as cm: + get_pyproject_version.main() + + self.assertEqual(cm.exception.code, 1) + # Empty string is falsy, so it should trigger error + self.assertIn("❌", captured_error.getvalue()) + finally: + pyproject_file.unlink() + + +class TestSuiteInfo(unittest.TestCase): + """Test suite metadata.""" + + def test_suite_info(self): + """Display test suite information.""" + print("\n" + "="*70) + print("Test Suite: get_pyproject_version.py") + print("Framework: unittest (Python built-in)") + print("TOML Library: tomllib (Python 3.11+ built-in)") + print("="*70) + self.assertTrue(True) + + +if __name__ == '__main__': + # Run tests with verbose output + unittest.main(verbosity=2) diff --git a/.github/workflows/pypi-reusable-publish.yml b/.github/workflows/pypi-reusable-publish.yml index f1eb045e..4a220610 100644 --- a/.github/workflows/pypi-reusable-publish.yml +++ b/.github/workflows/pypi-reusable-publish.yml @@ -71,6 +71,16 @@ jobs: echo "VERSION=${{ inputs.version }}" >> $GITHUB_ENV echo "version=${{ inputs.version }}" >> $GITHUB_OUTPUT + - name: Verify version consistency + run: | + # Install toml parser + pip install toml + + # Verify version matches using script (exits with error if mismatch) + python ${GITHUB_WORKSPACE}/.github/scripts/get_pyproject_version.py \ + ${{ inputs.package_dir }}/pyproject.toml \ + ${{ inputs.version }} + - name: Initialize PDM in package directory run: | # Make sure we're working with a properly initialized PDM project @@ -82,21 +92,6 @@ jobs: pdm lock fi - - name: Set version in package - run: | - cd ${{ inputs.package_dir }} - # Replace pdm bump with direct edit of pyproject.toml - if [[ "$OSTYPE" == "darwin"* ]]; then - # macOS version of sed needs an empty string for -i - sed -i '' "s/version = \".*\"/version = \"$VERSION\"/" pyproject.toml - else - # Linux version - sed -i "s/version = \".*\"/version = \"$VERSION\"/" pyproject.toml - fi - # Verify version was updated - echo "Updated version in pyproject.toml:" - grep "version =" pyproject.toml - # Conditional step for lume binary download (only for pylume package) - name: Download and setup lume binary if: inputs.is_lume_package diff --git a/.github/workflows/test-validation-script.yml b/.github/workflows/test-validation-script.yml new file mode 100644 index 00000000..cc11dda7 --- /dev/null +++ b/.github/workflows/test-validation-script.yml @@ -0,0 +1,36 @@ +name: Test valididation script + +on: + pull_request: + paths: + - '.github/scripts/**' + - '.github/workflows/test-scripts.yml' + push: + branches: + - main + paths: + - '.github/scripts/**' + - '.github/workflows/test-scripts.yml' + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest toml + + - name: Run tests + run: | + cd .github/scripts + pytest tests/ -v diff --git a/libs/python/computer-server/pyproject.toml b/libs/python/computer-server/pyproject.toml index 6e9e7240..941f43c5 100644 --- a/libs/python/computer-server/pyproject.toml +++ b/libs/python/computer-server/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-computer-server" -version = "0.1.0" +version = "0.1.24" description = "Server component for the Computer-Use Interface (CUI) framework powering Cua" authors = [ { name = "TryCua", email = "gh@trycua.com" } From 09049096f494cfb4d26255d9ef72e8375bd7674b Mon Sep 17 00:00:00 2001 From: f-trycua Date: Fri, 10 Oct 2025 16:14:11 -0700 Subject: [PATCH 23/37] Fix noVNC startup by removing netcat dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed start-novnc.sh to use a simple sleep instead of checking VNC availability with netcat. This avoids the need to install netcat and simplifies the startup sequence. Since supervisor starts services in priority order (VNC=10, noVNC=20), a 5-second sleep is sufficient for VNC to be ready. Also added netcat to Dockerfile for future use if needed. Container now fully functional: - VNC server running on port 5901 - noVNC web interface on port 6901 - Computer-server API on port 8000 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- libs/docker-xfce/Dockerfile | 1 + libs/docker-xfce/src/scripts/start-novnc.sh | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/libs/docker-xfce/Dockerfile b/libs/docker-xfce/Dockerfile index d5459f31..2721d599 100644 --- a/libs/docker-xfce/Dockerfile +++ b/libs/docker-xfce/Dockerfile @@ -36,6 +36,7 @@ RUN apt-get update && apt-get install -y \ python3-numpy \ git \ net-tools \ + netcat \ supervisor \ # Computer-server dependencies python3-tk \ diff --git a/libs/docker-xfce/src/scripts/start-novnc.sh b/libs/docker-xfce/src/scripts/start-novnc.sh index 4f95c644..07894acb 100644 --- a/libs/docker-xfce/src/scripts/start-novnc.sh +++ b/libs/docker-xfce/src/scripts/start-novnc.sh @@ -1,12 +1,9 @@ #!/bin/bash set -e -# Wait for VNC server to be ready +# Give VNC a moment to start (supervisor starts it with priority 10, this is priority 20) echo "Waiting for VNC server to start..." -while ! nc -z localhost ${VNC_PORT:-5901}; do - sleep 1 -done -echo "VNC server is ready" +sleep 5 # Start noVNC cd /opt/noVNC From 6e780ac3452128e6231f688fbfcd6297eb650582 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Fri, 10 Oct 2025 18:03:21 -0700 Subject: [PATCH 24/37] Remove XFCE power manager to prevent startup popup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Uninstall xfce4-power-manager package which causes annoying popup dialog on container startup. Power management is not needed in a containerized environment. This provides a cleaner desktop experience on first launch. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- libs/docker-xfce/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libs/docker-xfce/Dockerfile b/libs/docker-xfce/Dockerfile index 2721d599..17c4b962 100644 --- a/libs/docker-xfce/Dockerfile +++ b/libs/docker-xfce/Dockerfile @@ -62,6 +62,9 @@ RUN apt-get update && apt-get install -y \ zlib1g-dev \ && rm -rf /var/lib/apt/lists/* +# Remove power manager to avoid popup in container +RUN apt-get remove -y xfce4-power-manager xfce4-power-manager-data || true + # Install noVNC RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \ git clone https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \ From a9866f831ae767135e84994d582674c8a67d5e5e Mon Sep 17 00:00:00 2001 From: f-trycua Date: Fri, 10 Oct 2025 18:07:36 -0700 Subject: [PATCH 25/37] Remove Firefox to speed up container build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Firefox installation was causing very slow builds (1200+ seconds) downloading 72MB on ARM. Removed it since it's not essential for the basic container functionality. Firefox can be installed later inside the running container if needed: docker exec cua-docker-xfce apt-get install -y firefox Build time reduced from 20+ minutes to under 1 minute. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- libs/docker-xfce/Dockerfile | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/libs/docker-xfce/Dockerfile b/libs/docker-xfce/Dockerfile index 17c4b962..12b890a5 100644 --- a/libs/docker-xfce/Dockerfile +++ b/libs/docker-xfce/Dockerfile @@ -73,20 +73,8 @@ RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \ # Install computer-server RUN pip3 install cua-computer-server -# Install Firefox -RUN add-apt-repository -y ppa:mozillateam/ppa && \ - echo 'Package: *\nPin: release o=LP-PPA-mozillateam\nPin-Priority: 1001' > /etc/apt/preferences.d/mozilla-firefox && \ - apt-get update && \ - apt-get install -y firefox && \ - rm -rf /var/lib/apt/lists/* - -# Configure Firefox defaults -RUN mkdir -p /etc/firefox && \ - echo 'pref("datareporting.policy.firstRunURL", "");' > /etc/firefox/syspref.js && \ - echo 'pref("datareporting.policy.dataSubmissionEnabled", false);' >> /etc/firefox/syspref.js && \ - echo 'pref("datareporting.healthreport.service.enabled", false);' >> /etc/firefox/syspref.js && \ - echo 'pref("datareporting.healthreport.uploadEnabled", false);' >> /etc/firefox/syspref.js && \ - echo 'pref("browser.aboutwelcome.enabled", false);' >> /etc/firefox/syspref.js +# Firefox installation removed to speed up build +# Can be added back later if needed # Copy startup scripts COPY src/supervisor/ /etc/supervisor/conf.d/ From 86c5642128335241a47394192a8605b030e90847 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Fri, 10 Oct 2025 18:13:14 -0700 Subject: [PATCH 26/37] Add Firefox back using Ubuntu default repository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Install Firefox from Ubuntu's default repository instead of the Mozilla PPA. This is much faster (3 minutes vs 20+ minutes) and installs the snap version which works well in containers. Container now includes: - XFCE desktop (no power manager popup) - Firefox browser - VNC/noVNC access - Computer-server API Build time: ~3-4 minutes total 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- libs/docker-xfce/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/docker-xfce/Dockerfile b/libs/docker-xfce/Dockerfile index 12b890a5..d854c876 100644 --- a/libs/docker-xfce/Dockerfile +++ b/libs/docker-xfce/Dockerfile @@ -47,6 +47,7 @@ RUN apt-get update && apt-get install -y \ socat \ xclip \ # Browser + firefox \ wget \ software-properties-common \ # Build tools From e88099aea417bb6ff6b8bdefd845a575164e023c Mon Sep 17 00:00:00 2001 From: f-trycua Date: Fri, 10 Oct 2025 21:02:42 -0700 Subject: [PATCH 27/37] Cleanup --- libs/docker-xfce/Dockerfile | 37 ++++++++++--- libs/docker-xfce/Dockerfile.slim | 53 ------------------- .../docker-xfce/src/scripts/resize-display.sh | 20 +++++++ libs/docker-xfce/src/xfce-config/helpers.rc | 2 + .../src/xfce-config/xfce4-session.xml | 51 ++++++++++++++++++ 5 files changed, 103 insertions(+), 60 deletions(-) delete mode 100644 libs/docker-xfce/Dockerfile.slim create mode 100644 libs/docker-xfce/src/scripts/resize-display.sh create mode 100644 libs/docker-xfce/src/xfce-config/helpers.rc create mode 100644 libs/docker-xfce/src/xfce-config/xfce4-session.xml diff --git a/libs/docker-xfce/Dockerfile b/libs/docker-xfce/Dockerfile index d854c876..02d48eb2 100644 --- a/libs/docker-xfce/Dockerfile +++ b/libs/docker-xfce/Dockerfile @@ -12,7 +12,7 @@ ENV DISPLAY=:1 ENV VNC_PORT=5901 ENV NOVNC_PORT=6901 ENV API_PORT=8000 -ENV VNC_RESOLUTION=1920x1080 +ENV VNC_RESOLUTION=1024x768 ENV VNC_COL_DEPTH=24 # Create user @@ -47,7 +47,6 @@ RUN apt-get update && apt-get install -y \ socat \ xclip \ # Browser - firefox \ wget \ software-properties-common \ # Build tools @@ -66,16 +65,31 @@ RUN apt-get update && apt-get install -y \ # Remove power manager to avoid popup in container RUN apt-get remove -y xfce4-power-manager xfce4-power-manager-data || true +# Install Firefox from Mozilla PPA (snap-free) - inline to avoid script issues +RUN apt-get update && \ + add-apt-repository -y ppa:mozillateam/ppa && \ + echo 'Package: *\nPin: release o=LP-PPA-mozillateam\nPin-Priority: 1001' > /etc/apt/preferences.d/mozilla-firefox && \ + apt-get update && \ + apt-get install -y firefox && \ + echo 'pref("datareporting.policy.firstRunURL", "");\npref("datareporting.policy.dataSubmissionEnabled", false);\npref("datareporting.healthreport.service.enabled", false);\npref("datareporting.healthreport.uploadEnabled", false);\npref("trailhead.firstrun.branches", "nofirstrun-empty");\npref("browser.aboutwelcome.enabled", false);' > /usr/lib/firefox/browser/defaults/preferences/firefox.js && \ + update-alternatives --install /usr/bin/x-www-browser x-www-browser /usr/bin/firefox 100 && \ + update-alternatives --install /usr/bin/gnome-www-browser gnome-www-browser /usr/bin/firefox 100 && \ + rm -rf /var/lib/apt/lists/* + # Install noVNC RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \ git clone https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \ ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html +# Pre-create cache directory with correct ownership before pip install +RUN mkdir -p /home/cua/.cache && \ + chown -R cua:cua /home/cua/.cache + # Install computer-server RUN pip3 install cua-computer-server -# Firefox installation removed to speed up build -# Can be added back later if needed +# Fix any cache files created by pip +RUN chown -R cua:cua /home/cua/.cache # Copy startup scripts COPY src/supervisor/ /etc/supervisor/conf.d/ @@ -94,10 +108,19 @@ RUN mkdir -p $HOME/.vnc && \ chmod 600 $HOME/.vnc/passwd # Configure XFCE for first start -RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml +RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml $HOME/.config/xfce4 $HOME/.config/autostart -# Create storage and shared directories -RUN mkdir -p $HOME/storage $HOME/shared +# Copy XFCE config to disable browser launching and welcome screens +COPY --chown=cua:cua src/xfce-config/helpers.rc $HOME/.config/xfce4/helpers.rc +COPY --chown=cua:cua src/xfce-config/xfce4-session.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-session.xml + +# Disable panel plugins that might try to open browsers +RUN echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-tips-autostart.desktop && \ + chown -R cua:cua $HOME/.config + +# Create storage and shared directories, and Firefox cache directory +RUN mkdir -p $HOME/storage $HOME/shared $HOME/.cache/dconf $HOME/.mozilla/firefox && \ + chown -R cua:cua $HOME/storage $HOME/shared $HOME/.cache $HOME/.mozilla $HOME/.vnc USER root diff --git a/libs/docker-xfce/Dockerfile.slim b/libs/docker-xfce/Dockerfile.slim deleted file mode 100644 index 51c2233d..00000000 --- a/libs/docker-xfce/Dockerfile.slim +++ /dev/null @@ -1,53 +0,0 @@ -# CUA Docker XFCE Container - Slim Version -# Uses existing VNC base to reduce build time - -FROM dorowu/ubuntu-desktop-lxde-vnc:focal - -# Switch to root -USER root - -# Set environment variables -ENV HOME=/home/cua -ENV DISPLAY=:1 -ENV API_PORT=8000 - -# Create cua user -RUN useradd -m -s /bin/bash -G sudo cua && \ - echo "cua:password" | chpasswd && \ - echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers - -# Install Python 3.11 and computer-server dependencies -RUN apt-get update && apt-get install -y \ - software-properties-common \ - gnome-screenshot \ - wmctrl \ - ffmpeg \ - socat \ - xclip \ - && add-apt-repository ppa:deadsnakes/ppa \ - && apt-get update \ - && apt-get install -y python3.11 python3-pip \ - && rm -rf /var/lib/apt/lists/* - -# Install computer-server -RUN pip3 install cua-computer-server - -# Copy startup scripts -COPY src/scripts/start-computer-server.sh /usr/local/bin/ -RUN chmod +x /usr/local/bin/start-computer-server.sh - -# Create storage directories -RUN mkdir -p /home/cua/storage /home/cua/shared && \ - chown -R cua:cua /home/cua - -# Expose ports (VNC on 6080, computer-server on 8000) -EXPOSE 6080 8000 - -# Create startup wrapper -RUN echo '#!/bin/bash\n\ -/startup.sh &\n\ -sleep 5\n\ -su - cua -c "/usr/local/bin/start-computer-server.sh"' > /entrypoint.sh && \ - chmod +x /entrypoint.sh - -CMD ["/entrypoint.sh"] diff --git a/libs/docker-xfce/src/scripts/resize-display.sh b/libs/docker-xfce/src/scripts/resize-display.sh new file mode 100644 index 00000000..ea663dce --- /dev/null +++ b/libs/docker-xfce/src/scripts/resize-display.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Dynamic display resolution script +# Can be called to change the VNC display resolution + +RESOLUTION=${1:-1920x1080} + +# Wait for display to be ready +for i in {1..10}; do + if DISPLAY=:1 xdpyinfo >/dev/null 2>&1; then + break + fi + sleep 1 +done + +# Change resolution using xrandr +DISPLAY=:1 xrandr --output VNC-0 --mode "$RESOLUTION" 2>/dev/null || \ +DISPLAY=:1 xrandr --fb "$RESOLUTION" 2>/dev/null || \ +echo "Failed to set resolution to $RESOLUTION" + +echo "Display resolution set to: $RESOLUTION" diff --git a/libs/docker-xfce/src/xfce-config/helpers.rc b/libs/docker-xfce/src/xfce-config/helpers.rc new file mode 100644 index 00000000..8fd42f69 --- /dev/null +++ b/libs/docker-xfce/src/xfce-config/helpers.rc @@ -0,0 +1,2 @@ +# XFCE preferred applications - disable browser to prevent error popups +WebBrowser= diff --git a/libs/docker-xfce/src/xfce-config/xfce4-session.xml b/libs/docker-xfce/src/xfce-config/xfce4-session.xml new file mode 100644 index 00000000..d7b834d9 --- /dev/null +++ b/libs/docker-xfce/src/xfce-config/xfce4-session.xml @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 3053270f74420a97627a8866c3e48b28330b0889 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 11 Oct 2025 11:03:50 -0700 Subject: [PATCH 28/37] Add XFCE provider --- libs/docker-xfce/README.md | 30 +++++++++++++--- .../computer/providers/docker/provider.py | 36 ++++++++++++++----- 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/libs/docker-xfce/README.md b/libs/docker-xfce/README.md index 489d0042..5d8512c8 100644 --- a/libs/docker-xfce/README.md +++ b/libs/docker-xfce/README.md @@ -99,7 +99,7 @@ docker run --rm -it \ ## Using with CUA Docker Provider -This container is designed to work with the CUA Docker provider: +This container is designed to work with the CUA Docker provider. Simply specify the docker-xfce image: ```python from computer import Computer @@ -108,8 +108,8 @@ from computer import Computer computer = Computer( os_type="linux", provider_type="docker", - image="trycua/cua-docker-xfce:latest", - display="1920x1080", + image="trycua/cua-docker-xfce:latest", # Use docker-xfce instead of Kasm + display="1024x768", memory="4GB", cpu="2" ) @@ -128,11 +128,33 @@ async with computer: print(result.stdout) ``` +### Switching between Kasm and docker-xfce + +The Docker provider automatically detects which image you're using: + +```python +# Use Kasm-based container (default for Linux) +computer_kasm = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-ubuntu:latest", # Kasm image +) + +# Use docker-xfce container (vanilla XFCE) +computer_xfce = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest", # docker-xfce image +) +``` + +Both provide the same API and functionality - the provider automatically configures the correct paths and settings based on the image. + ## Environment Variables | Variable | Default | Description | |----------|---------|-------------| -| `VNC_RESOLUTION` | `1920x1080` | Screen resolution | +| `VNC_RESOLUTION` | `1024x768` | Screen resolution | | `VNC_COL_DEPTH` | `24` | Color depth | | `VNC_PORT` | `5901` | VNC server port | | `NOVNC_PORT` | `6901` | noVNC web interface port | diff --git a/libs/python/computer/computer/providers/docker/provider.py b/libs/python/computer/computer/providers/docker/provider.py index 82ad411c..487edc28 100644 --- a/libs/python/computer/computer/providers/docker/provider.py +++ b/libs/python/computer/computer/providers/docker/provider.py @@ -36,7 +36,7 @@ class DockerProvider(BaseVMProvider): """ def __init__( - self, + self, port: Optional[int] = 8000, host: str = "localhost", storage: Optional[str] = None, @@ -47,13 +47,16 @@ class DockerProvider(BaseVMProvider): vnc_port: Optional[int] = 6901, ): """Initialize the Docker VM Provider. - + Args: port: Currently unused (VM provider port) host: Hostname for the API server (default: localhost) storage: Path for persistent VM storage shared_path: Path for shared folder between host and container image: Docker image to use (default: "trycua/cua-ubuntu:latest") + Supported images: + - "trycua/cua-ubuntu:latest" (Kasm-based) + - "trycua/cua-docker-xfce:latest" (vanilla XFCE) verbose: Enable verbose logging ephemeral: Use ephemeral (temporary) storage vnc_port: Port for VNC interface (default: 6901) @@ -62,19 +65,35 @@ class DockerProvider(BaseVMProvider): self.api_port = 8000 self.vnc_port = vnc_port self.ephemeral = ephemeral - + # Handle ephemeral storage (temporary directory) if ephemeral: self.storage = "ephemeral" else: self.storage = storage - + self.shared_path = shared_path self.image = image self.verbose = verbose self._container_id = None self._running_containers = {} # Track running containers by name + + # Detect image type and configure user directory accordingly + self._detect_image_config() + def _detect_image_config(self): + """Detect image type and configure paths accordingly.""" + # Detect if this is a docker-xfce image or Kasm image + if "docker-xfce" in self.image.lower() or "xfce" in self.image.lower(): + self._home_dir = "/home/cua" + self._image_type = "docker-xfce" + logger.info(f"Detected docker-xfce image: using {self._home_dir}") + else: + # Default to Kasm configuration + self._home_dir = "/home/kasm-user" + self._image_type = "kasm" + logger.info(f"Detected Kasm image: using {self._home_dir}") + @property def provider_type(self) -> VMProviderType: """Return the provider type.""" @@ -277,12 +296,13 @@ class DockerProvider(BaseVMProvider): # Add volume mounts if storage is specified storage_path = storage or self.storage if storage_path and storage_path != "ephemeral": - # Mount storage directory - cmd.extend(["-v", f"{storage_path}:/home/kasm-user/storage"]) - + # Mount storage directory using detected home directory + cmd.extend(["-v", f"{storage_path}:{self._home_dir}/storage"]) + # Add shared path if specified if self.shared_path: - cmd.extend(["-v", f"{self.shared_path}:/home/kasm-user/shared"]) + # Mount shared directory using detected home directory + cmd.extend(["-v", f"{self.shared_path}:{self._home_dir}/shared"]) # Add environment variables cmd.extend(["-e", "VNC_PW=password"]) # Set VNC password From 7b286f77c6e1cf842df96ddd7b0a64fda8f10ff5 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 11 Oct 2025 21:51:00 -0700 Subject: [PATCH 29/37] Set default browser --- libs/docker-xfce/Dockerfile | 20 ++++++++++---------- libs/docker-xfce/README.md | 7 ++++--- libs/docker-xfce/src/scripts/start-vnc.sh | 4 ++-- libs/docker-xfce/src/xfce-config/helpers.rc | 4 ++-- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/libs/docker-xfce/Dockerfile b/libs/docker-xfce/Dockerfile index 02d48eb2..f6411919 100644 --- a/libs/docker-xfce/Dockerfile +++ b/libs/docker-xfce/Dockerfile @@ -15,13 +15,10 @@ ENV API_PORT=8000 ENV VNC_RESOLUTION=1024x768 ENV VNC_COL_DEPTH=24 -# Create user -RUN useradd -m -s /bin/bash -G sudo cua && \ - echo "cua:password" | chpasswd && \ - echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers - -# Install system dependencies +# Install system dependencies first (including sudo) RUN apt-get update && apt-get install -y \ + # System utilities + sudo \ # Desktop environment xfce4 \ xfce4-terminal \ @@ -65,6 +62,11 @@ RUN apt-get update && apt-get install -y \ # Remove power manager to avoid popup in container RUN apt-get remove -y xfce4-power-manager xfce4-power-manager-data || true +# Create user after sudo is installed +RUN useradd -m -s /bin/bash -G sudo cua && \ + echo "cua:password" | chpasswd && \ + echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + # Install Firefox from Mozilla PPA (snap-free) - inline to avoid script issues RUN apt-get update && \ add-apt-repository -y ppa:mozillateam/ppa && \ @@ -102,10 +104,8 @@ RUN chmod +x /usr/local/bin/*.sh USER cua WORKDIR /home/cua -# Create VNC password file -RUN mkdir -p $HOME/.vnc && \ - echo "password" | vncpasswd -f > $HOME/.vnc/passwd && \ - chmod 600 $HOME/.vnc/passwd +# Create VNC directory (no password needed with SecurityTypes None) +RUN mkdir -p $HOME/.vnc # Configure XFCE for first start RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml $HOME/.config/xfce4 $HOME/.config/autostart diff --git a/libs/docker-xfce/README.md b/libs/docker-xfce/README.md index 5d8512c8..9ecdff00 100644 --- a/libs/docker-xfce/README.md +++ b/libs/docker-xfce/README.md @@ -93,8 +93,8 @@ docker run --rm -it \ ## Accessing the Container -- **noVNC Web Interface**: Open `http://localhost:6901` in your browser -- **VNC Client**: Connect to `localhost:5901` (password: `password`) +- **noVNC Web Interface**: Open `http://localhost:6901` in your browser (no password required) +- **VNC Client**: Connect to `localhost:5901` (no password required) - **Computer Server API**: Available at `http://localhost:8000` ## Using with CUA Docker Provider @@ -175,8 +175,9 @@ Both provide the same API and functionality - the provider automatically configu ## User Credentials - **Username**: `cua` -- **Password**: `password` +- **Password**: `password` (for shell login only) - **Sudo access**: Enabled without password +- **VNC access**: No password required ## Creating Snapshots diff --git a/libs/docker-xfce/src/scripts/start-vnc.sh b/libs/docker-xfce/src/scripts/start-vnc.sh index f77afef0..934e6d3c 100644 --- a/libs/docker-xfce/src/scripts/start-vnc.sh +++ b/libs/docker-xfce/src/scripts/start-vnc.sh @@ -4,13 +4,13 @@ set -e # Clean up any existing VNC lock files rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1 -# Start VNC server +# Start VNC server without password authentication vncserver :1 \ -geometry ${VNC_RESOLUTION:-1920x1080} \ -depth ${VNC_COL_DEPTH:-24} \ -rfbport ${VNC_PORT:-5901} \ -localhost no \ - -SecurityTypes VncAuth \ + -SecurityTypes None \ -AlwaysShared \ -AcceptPointerEvents \ -AcceptKeyEvents \ diff --git a/libs/docker-xfce/src/xfce-config/helpers.rc b/libs/docker-xfce/src/xfce-config/helpers.rc index 8fd42f69..b2270633 100644 --- a/libs/docker-xfce/src/xfce-config/helpers.rc +++ b/libs/docker-xfce/src/xfce-config/helpers.rc @@ -1,2 +1,2 @@ -# XFCE preferred applications - disable browser to prevent error popups -WebBrowser= +# XFCE preferred applications - set Firefox as default browser +WebBrowser=firefox From 1a83931587eece82aca0b83b522d39277e689cc2 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 11 Oct 2025 22:14:00 -0700 Subject: [PATCH 30/37] Remove unused --- libs/docker-xfce/LICENSE | 21 --- libs/docker-xfce/Makefile | 127 ----------------- libs/docker-xfce/QUICKSTART.md | 166 ---------------------- libs/docker-xfce/docker-compose.yml | 44 ------ libs/docker-xfce/example.py | 213 ---------------------------- 5 files changed, 571 deletions(-) delete mode 100644 libs/docker-xfce/LICENSE delete mode 100644 libs/docker-xfce/Makefile delete mode 100644 libs/docker-xfce/QUICKSTART.md delete mode 100644 libs/docker-xfce/docker-compose.yml delete mode 100644 libs/docker-xfce/example.py diff --git a/libs/docker-xfce/LICENSE b/libs/docker-xfce/LICENSE deleted file mode 100644 index 6899a9db..00000000 --- a/libs/docker-xfce/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2025 CUA - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/libs/docker-xfce/Makefile b/libs/docker-xfce/Makefile deleted file mode 100644 index e3aa2879..00000000 --- a/libs/docker-xfce/Makefile +++ /dev/null @@ -1,127 +0,0 @@ -.PHONY: build run stop push clean test logs shell - -IMAGE_NAME := trycua/cua-docker-xfce -TAG := latest -CONTAINER_NAME := cua-docker-xfce-test - -# Build the Docker image -build: - docker build -t $(IMAGE_NAME):$(TAG) . - -# Run the container -run: - docker run -d \ - --name $(CONTAINER_NAME) \ - --shm-size=512m \ - -p 5901:5901 \ - -p 6901:6901 \ - -p 8000:8000 \ - $(IMAGE_NAME):$(TAG) - @echo "Container started!" - @echo "noVNC: http://localhost:6901" - @echo "VNC: localhost:5901 (password: password)" - @echo "API: http://localhost:8000" - -# Run with custom resolution -run-hd: - docker run -d \ - --name $(CONTAINER_NAME) \ - --shm-size=512m \ - -p 5901:5901 \ - -p 6901:6901 \ - -p 8000:8000 \ - -e VNC_RESOLUTION=1280x720 \ - $(IMAGE_NAME):$(TAG) - -# Run with persistent storage -run-persist: - mkdir -p ./storage ./shared - docker run -d \ - --name $(CONTAINER_NAME) \ - --shm-size=512m \ - -p 5901:5901 \ - -p 6901:6901 \ - -p 8000:8000 \ - -v $(PWD)/storage:/home/cua/storage \ - -v $(PWD)/shared:/home/cua/shared \ - $(IMAGE_NAME):$(TAG) - -# Stop and remove the container -stop: - docker stop $(CONTAINER_NAME) || true - docker rm $(CONTAINER_NAME) || true - -# Push to Docker Hub -push: - docker push $(IMAGE_NAME):$(TAG) - -# Clean up everything -clean: stop - docker rmi $(IMAGE_NAME):$(TAG) || true - rm -rf ./storage ./shared - -# Run tests -test: build run - @echo "Waiting for services to start..." - @sleep 10 - @echo "Testing noVNC..." - @curl -f http://localhost:6901 > /dev/null && echo "✓ noVNC is running" || echo "✗ noVNC failed" - @echo "Testing API..." - @curl -f http://localhost:8000 > /dev/null && echo "✓ API is running" || echo "✗ API failed" - @$(MAKE) stop - -# View logs -logs: - docker logs -f $(CONTAINER_NAME) - -# View supervisor logs -logs-supervisor: - docker exec $(CONTAINER_NAME) tail -f /var/log/supervisor/supervisord.log - -# View individual service logs -logs-vnc: - docker exec $(CONTAINER_NAME) tail -f /var/log/supervisor/vncserver.log - -logs-novnc: - docker exec $(CONTAINER_NAME) tail -f /var/log/supervisor/novnc.log - -logs-api: - docker exec $(CONTAINER_NAME) tail -f /var/log/supervisor/computer-server.log - -# Open a shell in the container -shell: - docker exec -it $(CONTAINER_NAME) /bin/bash - -# Check supervisor status -status: - docker exec $(CONTAINER_NAME) supervisorctl status - -# Restart services -restart-services: - docker exec $(CONTAINER_NAME) supervisorctl restart all - -# Create a snapshot -snapshot: - docker commit $(CONTAINER_NAME) $(IMAGE_NAME):snapshot - @echo "Snapshot created: $(IMAGE_NAME):snapshot" - -# Build and run -dev: build run logs - -# Help -help: - @echo "Available targets:" - @echo " build - Build the Docker image" - @echo " run - Run the container" - @echo " run-hd - Run with 720p resolution" - @echo " run-persist - Run with persistent storage" - @echo " stop - Stop and remove container" - @echo " push - Push to Docker Hub" - @echo " clean - Remove image and container" - @echo " test - Build, run tests, and stop" - @echo " logs - View container logs" - @echo " logs-* - View specific service logs" - @echo " shell - Open shell in container" - @echo " status - Check supervisor status" - @echo " snapshot - Create container snapshot" - @echo " dev - Build, run, and follow logs" diff --git a/libs/docker-xfce/QUICKSTART.md b/libs/docker-xfce/QUICKSTART.md deleted file mode 100644 index 9cd8082b..00000000 --- a/libs/docker-xfce/QUICKSTART.md +++ /dev/null @@ -1,166 +0,0 @@ -# Quick Start Guide - -Get up and running with CUA Docker XFCE in 5 minutes. - -## Prerequisites - -- Docker installed and running -- Python 3.11+ (for using with CUA library) -- `cua-computer` package installed: `pip install cua-computer` - -## Quick Start - -### Option 1: Using Makefile (Recommended) - -```bash -# Build and run -make build -make run - -# Check if it's running -make status - -# View logs -make logs -``` - -Access: -- 🌐 **Web VNC**: http://localhost:6901 -- 🖥️ **VNC Client**: localhost:5901 (password: `password`) -- 🔌 **API**: http://localhost:8000 - -### Option 2: Using Docker Compose - -```bash -# Start the container -docker-compose up -d - -# View logs -docker-compose logs -f - -# Stop the container -docker-compose down -``` - -### Option 3: Docker Command - -```bash -docker run -d \ - --name cua-desktop \ - --shm-size=512m \ - -p 5901:5901 \ - -p 6901:6901 \ - -p 8000:8000 \ - trycua/cua-docker-xfce:latest -``` - -## Using with Python - -```python -import asyncio -from computer import Computer - -async def main(): - computer = Computer( - os_type="linux", - provider_type="docker", - image="trycua/cua-docker-xfce:latest" - ) - - async with computer: - # Take a screenshot - screenshot = await computer.interface.screenshot() - - # Open terminal - await computer.interface.hotkey("ctrl", "alt", "t") - await asyncio.sleep(1) - - # Type and execute command - await computer.interface.type_text("echo 'Hello!'") - await computer.interface.press_key("Return") - -asyncio.run(main()) -``` - -## Common Tasks - -### Run with custom resolution -```bash -make run-hd # 1280x720 -# or -docker run -e VNC_RESOLUTION=1280x720 ... -``` - -### Run with persistent storage -```bash -make run-persist -# or -docker run -v $(pwd)/storage:/home/cua/storage ... -``` - -### View specific logs -```bash -make logs-vnc # VNC server logs -make logs-novnc # noVNC proxy logs -make logs-api # Computer-server logs -``` - -### Open shell in container -```bash -make shell -# or -docker exec -it cua-desktop /bin/bash -``` - -### Create a snapshot -```bash -make snapshot -``` - -## Troubleshooting - -### Container won't start -```bash -# Check if ports are already in use -lsof -i :6901 -lsof -i :8000 - -# View container logs -docker logs cua-desktop -``` - -### Black screen in noVNC -```bash -# Restart VNC server -docker exec cua-desktop supervisorctl restart vncserver -``` - -### API not responding -```bash -# Check if computer-server is running -docker exec cua-desktop supervisorctl status computer-server - -# Restart computer-server -docker exec cua-desktop supervisorctl restart computer-server -``` - -## Next Steps - -- Read the [full README](README.md) for detailed documentation -- Check out [example.py](example.py) for more usage examples -- Customize the [Dockerfile](Dockerfile) for your needs - -## Clean Up - -```bash -# Using Makefile -make clean - -# Using docker-compose -docker-compose down -v - -# Manual -docker stop cua-desktop -docker rm cua-desktop -docker rmi trycua/cua-docker-xfce:latest -``` diff --git a/libs/docker-xfce/docker-compose.yml b/libs/docker-xfce/docker-compose.yml deleted file mode 100644 index bdc1ba2d..00000000 --- a/libs/docker-xfce/docker-compose.yml +++ /dev/null @@ -1,44 +0,0 @@ -version: '3.8' - -services: - cua-desktop: - build: . - image: trycua/cua-docker-xfce:latest - container_name: cua-docker-xfce - shm_size: '512m' - ports: - - "5901:5901" # VNC - - "6901:6901" # noVNC - - "8000:8000" # Computer API - environment: - - VNC_RESOLUTION=1920x1080 - - VNC_COL_DEPTH=24 - - VNC_PORT=5901 - - NOVNC_PORT=6901 - - API_PORT=8000 - volumes: - - ./storage:/home/cua/storage - - ./shared:/home/cua/shared - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 40s - - # Optional: Multiple instances for parallel testing - cua-desktop-2: - build: . - image: trycua/cua-docker-xfce:latest - container_name: cua-docker-xfce-2 - shm_size: '512m' - ports: - - "5902:5901" - - "6902:6901" - - "8001:8000" - environment: - - VNC_RESOLUTION=1280x720 - restart: unless-stopped - profiles: - - multi diff --git a/libs/docker-xfce/example.py b/libs/docker-xfce/example.py deleted file mode 100644 index 6c42bbc2..00000000 --- a/libs/docker-xfce/example.py +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/bin/env python3 -""" -Example script demonstrating how to use the CUA Docker XFCE container -with the Computer library. -""" - -import asyncio -from computer import Computer - - -async def basic_example(): - """Basic example: Take a screenshot and click around""" - print("=== Basic Example ===") - - computer = Computer( - os_type="linux", - provider_type="docker", - image="trycua/cua-docker-xfce:latest", - display="1920x1080", - memory="4GB", - cpu="2", - port=8000, - noVNC_port=6901 - ) - - async with computer: - print("Computer is ready!") - print(f"noVNC available at: http://localhost:6901") - - # Get screen info - screen = await computer.interface.get_screen_size() - print(f"Screen size: {screen['width']}x{screen['height']}") - - # Take a screenshot - screenshot = await computer.interface.screenshot() - with open("screenshot.png", "wb") as f: - f.write(screenshot) - print("Screenshot saved to screenshot.png") - - # Click and type - await computer.interface.left_click(100, 100) - await computer.interface.type_text("Hello from CUA!") - - print("Done!") - - -async def file_operations_example(): - """Example: File system operations""" - print("\n=== File Operations Example ===") - - computer = Computer( - os_type="linux", - provider_type="docker", - image="trycua/cua-docker-xfce:latest" - ) - - async with computer: - # Create a file - await computer.interface.write_text( - "/home/cua/test.txt", - "Hello from CUA!" - ) - print("Created test.txt") - - # Read it back - content = await computer.interface.read_text("/home/cua/test.txt") - print(f"File content: {content}") - - # List directory - files = await computer.interface.list_dir("/home/cua") - print(f"Files in home directory: {files}") - - -async def command_execution_example(): - """Example: Running shell commands""" - print("\n=== Command Execution Example ===") - - computer = Computer( - os_type="linux", - provider_type="docker", - image="trycua/cua-docker-xfce:latest" - ) - - async with computer: - # Run a command - result = await computer.interface.run_command("uname -a") - print(f"System info:\n{result.stdout}") - - # Check Firefox is installed - result = await computer.interface.run_command("which firefox") - print(f"Firefox location: {result.stdout.strip()}") - - # Get Python version - result = await computer.interface.run_command("python3 --version") - print(f"Python version: {result.stdout.strip()}") - - -async def browser_automation_example(): - """Example: Opening Firefox and navigating""" - print("\n=== Browser Automation Example ===") - - computer = Computer( - os_type="linux", - provider_type="docker", - image="trycua/cua-docker-xfce:latest" - ) - - async with computer: - # Open Firefox - await computer.interface.run_command("firefox https://example.com &") - print("Firefox opened") - - # Wait for it to load - await asyncio.sleep(5) - - # Take a screenshot - screenshot = await computer.interface.screenshot() - with open("browser_screenshot.png", "wb") as f: - f.write(screenshot) - print("Browser screenshot saved") - - -async def persistent_storage_example(): - """Example: Using persistent storage""" - print("\n=== Persistent Storage Example ===") - - computer = Computer( - os_type="linux", - provider_type="docker", - image="trycua/cua-docker-xfce:latest", - shared_directories=["./storage"] - ) - - async with computer: - # Write to persistent storage - await computer.interface.write_text( - "/home/cua/storage/persistent.txt", - "This file persists across container restarts!" - ) - print("Written to persistent storage") - - # Read it back - content = await computer.interface.read_text( - "/home/cua/storage/persistent.txt" - ) - print(f"Content: {content}") - - -async def multi_action_example(): - """Example: Complex interaction sequence""" - print("\n=== Multi-Action Example ===") - - computer = Computer( - os_type="linux", - provider_type="docker", - image="trycua/cua-docker-xfce:latest" - ) - - async with computer: - # Open terminal - await computer.interface.hotkey("ctrl", "alt", "t") - await asyncio.sleep(2) - - # Type a command - await computer.interface.type_text("echo 'Hello from CUA!'") - await computer.interface.press_key("Return") - await asyncio.sleep(1) - - # Take screenshot - screenshot = await computer.interface.screenshot() - with open("terminal_screenshot.png", "wb") as f: - f.write(screenshot) - print("Terminal screenshot saved") - - -async def main(): - """Run all examples""" - examples = [ - ("Basic", basic_example), - ("File Operations", file_operations_example), - ("Command Execution", command_execution_example), - ("Browser Automation", browser_automation_example), - ("Persistent Storage", persistent_storage_example), - ("Multi-Action", multi_action_example), - ] - - print("Available examples:") - for i, (name, _) in enumerate(examples, 1): - print(f"{i}. {name}") - print(f"{len(examples) + 1}. Run all") - - choice = input("\nSelect an example (1-7): ").strip() - - try: - if choice == str(len(examples) + 1): - # Run all examples - for name, func in examples: - try: - await func() - except Exception as e: - print(f"Error in {name}: {e}") - else: - idx = int(choice) - 1 - if 0 <= idx < len(examples): - await examples[idx][1]() - else: - print("Invalid choice") - except ValueError: - print("Invalid input") - - -if __name__ == "__main__": - asyncio.run(main()) From ea5c4a1cdcdd17fce096752badc8643fa58840f2 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sun, 12 Oct 2025 14:35:20 -0700 Subject: [PATCH 31/37] Add xfce gh action, simplify folder name --- .github/workflows/docker-publish-kasm.yml | 144 ++-------------- .github/workflows/docker-publish-xfce.yml | 29 ++++ .github/workflows/docker-reusable-publish.yml | 155 ++++++++++++++++++ libs/{docker-xfce => xfce}/.dockerignore | 0 libs/{docker-xfce => xfce}/.gitignore | 0 libs/{docker-xfce => xfce}/Dockerfile | 0 libs/{docker-xfce => xfce}/README.md | 0 .../src/scripts/resize-display.sh | 0 .../src/scripts/start-computer-server.sh | 0 .../src/scripts/start-novnc.sh | 0 .../src/scripts/start-vnc.sh | 0 .../src/scripts/xstartup.sh | 0 .../src/supervisor/supervisord.conf | 0 .../src/xfce-config/helpers.rc | 0 .../src/xfce-config/xfce4-session.xml | 0 15 files changed, 196 insertions(+), 132 deletions(-) create mode 100644 .github/workflows/docker-publish-xfce.yml create mode 100644 .github/workflows/docker-reusable-publish.yml rename libs/{docker-xfce => xfce}/.dockerignore (100%) rename libs/{docker-xfce => xfce}/.gitignore (100%) rename libs/{docker-xfce => xfce}/Dockerfile (100%) rename libs/{docker-xfce => xfce}/README.md (100%) rename libs/{docker-xfce => xfce}/src/scripts/resize-display.sh (100%) rename libs/{docker-xfce => xfce}/src/scripts/start-computer-server.sh (100%) rename libs/{docker-xfce => xfce}/src/scripts/start-novnc.sh (100%) rename libs/{docker-xfce => xfce}/src/scripts/start-vnc.sh (100%) rename libs/{docker-xfce => xfce}/src/scripts/xstartup.sh (100%) rename libs/{docker-xfce => xfce}/src/supervisor/supervisord.conf (100%) rename libs/{docker-xfce => xfce}/src/xfce-config/helpers.rc (100%) rename libs/{docker-xfce => xfce}/src/xfce-config/xfce4-session.xml (100%) diff --git a/.github/workflows/docker-publish-kasm.yml b/.github/workflows/docker-publish-kasm.yml index 882d6869..d97dbf6f 100644 --- a/.github/workflows/docker-publish-kasm.yml +++ b/.github/workflows/docker-publish-kasm.yml @@ -9,141 +9,21 @@ on: paths: - "libs/kasm/**" - ".github/workflows/docker-publish-kasm.yml" + - ".github/workflows/docker-reusable-publish.yml" pull_request: paths: - "libs/kasm/**" - ".github/workflows/docker-publish-kasm.yml" - - -env: - IMAGE_NAME: cua-ubuntu - DOCKER_HUB_ORG: trycua + - ".github/workflows/docker-reusable-publish.yml" jobs: - build-and-push: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - platform: - - linux/amd64 - - linux/arm64 - # todo unsupported base image - # - windows/amd64 - # - darwin/amd64 - # - darwin/arm64 - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Prepare platform tag - id: platform - run: | - # Convert platform (e.g., linux/amd64) to a valid tag suffix (e.g., linux-amd64) - PLATFORM_TAG=$(echo "${{ matrix.platform }}" | sed 's/\//-/g') - echo "tag=${PLATFORM_TAG}" >> $GITHUB_OUTPUT - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to Docker Hub - uses: docker/login-action@v3 - with: - username: trycua - password: ${{ secrets.DOCKER_HUB_TOKEN }} - - - name: Extract metadata (PR) - if: github.event_name == 'pull_request' - id: meta-pr - uses: docker/metadata-action@v5 - with: - images: ${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }} - tags: | - type=raw,value=${{ github.sha }} - - - name: Extract metadata (main branch) - if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' - id: meta-main - uses: docker/metadata-action@v5 - with: - images: ${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }} - tags: | - type=raw,value=latest - - - name: Extract metadata (semantic version tag) - if: startsWith(github.ref, 'refs/tags/docker-kasm-v') - id: meta-semver - uses: docker/metadata-action@v5 - with: - images: ${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }} - tags: | - type=semver,pattern={{version}},prefix=docker-kasm-v - type=semver,pattern={{major}}.{{minor}},prefix=docker-kasm-v - type=semver,pattern={{major}},prefix=docker-kasm-v - type=raw,value=latest - - - name: Build and push Docker image (PR) - if: github.event_name == 'pull_request' - uses: docker/build-push-action@v5 - with: - context: ./libs/kasm - file: ./libs/kasm/Dockerfile - push: true - tags: ${{ steps.meta-pr.outputs.tags }} - labels: ${{ steps.meta-pr.outputs.labels }} - platforms: ${{ matrix.platform }} - cache-from: | - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }} - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:latest - cache-to: type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }},mode=max - - - name: Build and push Docker image (main branch) - if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' - uses: docker/build-push-action@v5 - with: - context: ./libs/kasm - file: ./libs/kasm/Dockerfile - push: true - tags: ${{ steps.meta-main.outputs.tags }} - labels: ${{ steps.meta-main.outputs.labels }} - platforms: ${{ matrix.platform }} - cache-from: | - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }} - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:latest - cache-to: type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }},mode=max - - - name: Build and push Docker image (semantic version tag) - if: startsWith(github.ref, 'refs/tags/docker-kasm-v') - uses: docker/build-push-action@v5 - with: - context: ./libs/kasm - file: ./libs/kasm/Dockerfile - push: true - tags: ${{ steps.meta-semver.outputs.tags }} - labels: ${{ steps.meta-semver.outputs.labels }} - platforms: ${{ matrix.platform }} - cache-from: | - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }} - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:latest - cache-to: type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }},mode=max - - - name: Image digest - if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/docker-kasm-v') - run: | - if [ "${{ github.event_name }}" == "pull_request" ]; then - echo "Image pushed with digest ${{ steps.meta-pr.outputs.digest }}" - elif [[ "${{ github.ref }}" == refs/tags/docker-kasm-v* ]]; then - echo "Image pushed with digest ${{ steps.meta-semver.outputs.digest }}" - else - echo "Image pushed with digest ${{ steps.meta-main.outputs.digest }}" - fi - - - name: print image tags - run: | - if [ "${{ github.event_name }}" == "pull_request" ]; then - echo "Image tags: ${{ steps.meta-pr.outputs.tags }}" - elif [[ "${{ github.ref }}" == refs/tags/docker-kasm-v* ]]; then - echo "Image tags: ${{ steps.meta-semver.outputs.tags }}" - else - echo "Image tags: ${{ steps.meta-main.outputs.tags }}" - fi + publish: + uses: ./.github/workflows/docker-reusable-publish.yml + with: + image_name: cua-ubuntu + context_dir: libs/kasm + dockerfile_path: Dockerfile + tag_prefix: docker-kasm-v + docker_hub_org: trycua + secrets: + DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} diff --git a/.github/workflows/docker-publish-xfce.yml b/.github/workflows/docker-publish-xfce.yml new file mode 100644 index 00000000..fa64849e --- /dev/null +++ b/.github/workflows/docker-publish-xfce.yml @@ -0,0 +1,29 @@ +name: Build and Publish CUA XFCE Container + +on: + push: + branches: + - main + tags: + - "docker-xfce-v*.*.*" + paths: + - "libs/xfce/**" + - ".github/workflows/docker-publish-xfce.yml" + - ".github/workflows/docker-reusable-publish.yml" + pull_request: + paths: + - "libs/xfce/**" + - ".github/workflows/docker-publish-xfce.yml" + - ".github/workflows/docker-reusable-publish.yml" + +jobs: + publish: + uses: ./.github/workflows/docker-reusable-publish.yml + with: + image_name: cua-xfce + context_dir: libs/xfce + dockerfile_path: Dockerfile + tag_prefix: docker-xfce-v + docker_hub_org: trycua + secrets: + DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} diff --git a/.github/workflows/docker-reusable-publish.yml b/.github/workflows/docker-reusable-publish.yml new file mode 100644 index 00000000..3472883f --- /dev/null +++ b/.github/workflows/docker-reusable-publish.yml @@ -0,0 +1,155 @@ +name: Reusable Docker Publish Workflow + +on: + workflow_call: + inputs: + image_name: + description: "Name of the Docker image (e.g. cua-ubuntu, cua-xfce)" + required: true + type: string + context_dir: + description: "Directory containing the Dockerfile relative to workspace root (e.g. libs/kasm, libs/xfce)" + required: true + type: string + dockerfile_path: + description: "Path to Dockerfile relative to context_dir (e.g. Dockerfile)" + required: false + type: string + default: "Dockerfile" + tag_prefix: + description: "Prefix for semantic version tags (e.g. docker-kasm-v, docker-xfce-v)" + required: true + type: string + docker_hub_org: + description: "Docker Hub organization name" + required: false + type: string + default: "trycua" + secrets: + DOCKER_HUB_TOKEN: + required: true + +jobs: + build-and-push: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Prepare platform tag + id: platform + run: | + # Convert platform (e.g., linux/amd64) to a valid tag suffix (e.g., linux-amd64) + PLATFORM_TAG=$(echo "${{ matrix.platform }}" | sed 's/\//-/g') + echo "tag=${PLATFORM_TAG}" >> $GITHUB_OUTPUT + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ inputs.docker_hub_org }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + + - name: Extract metadata (PR) + if: github.event_name == 'pull_request' + id: meta-pr + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=raw,value=${{ github.sha }} + + - name: Extract metadata (main branch) + if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + id: meta-main + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=raw,value=latest + + - name: Extract metadata (semantic version tag) + if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + id: meta-semver + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=semver,pattern={{version}},prefix=${{ inputs.tag_prefix }} + type=semver,pattern={{major}}.{{minor}},prefix=${{ inputs.tag_prefix }} + type=semver,pattern={{major}},prefix=${{ inputs.tag_prefix }} + type=raw,value=latest + + - name: Build and push Docker image (PR) + if: github.event_name == 'pull_request' + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-pr.outputs.tags }} + labels: ${{ steps.meta-pr.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Build and push Docker image (main branch) + if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-main.outputs.tags }} + labels: ${{ steps.meta-main.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Build and push Docker image (semantic version tag) + if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-semver.outputs.tags }} + labels: ${{ steps.meta-semver.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Image digest + if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Image pushed with digest ${{ steps.meta-pr.outputs.digest }}" + elif [[ "${{ github.ref }}" == refs/tags/${{ inputs.tag_prefix }}* ]]; then + echo "Image pushed with digest ${{ steps.meta-semver.outputs.digest }}" + else + echo "Image pushed with digest ${{ steps.meta-main.outputs.digest }}" + fi + + - name: print image tags + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Image tags: ${{ steps.meta-pr.outputs.tags }}" + elif [[ "${{ github.ref }}" == refs/tags/${{ inputs.tag_prefix }}* ]]; then + echo "Image tags: ${{ steps.meta-semver.outputs.tags }}" + else + echo "Image tags: ${{ steps.meta-main.outputs.tags }}" + fi diff --git a/libs/docker-xfce/.dockerignore b/libs/xfce/.dockerignore similarity index 100% rename from libs/docker-xfce/.dockerignore rename to libs/xfce/.dockerignore diff --git a/libs/docker-xfce/.gitignore b/libs/xfce/.gitignore similarity index 100% rename from libs/docker-xfce/.gitignore rename to libs/xfce/.gitignore diff --git a/libs/docker-xfce/Dockerfile b/libs/xfce/Dockerfile similarity index 100% rename from libs/docker-xfce/Dockerfile rename to libs/xfce/Dockerfile diff --git a/libs/docker-xfce/README.md b/libs/xfce/README.md similarity index 100% rename from libs/docker-xfce/README.md rename to libs/xfce/README.md diff --git a/libs/docker-xfce/src/scripts/resize-display.sh b/libs/xfce/src/scripts/resize-display.sh similarity index 100% rename from libs/docker-xfce/src/scripts/resize-display.sh rename to libs/xfce/src/scripts/resize-display.sh diff --git a/libs/docker-xfce/src/scripts/start-computer-server.sh b/libs/xfce/src/scripts/start-computer-server.sh similarity index 100% rename from libs/docker-xfce/src/scripts/start-computer-server.sh rename to libs/xfce/src/scripts/start-computer-server.sh diff --git a/libs/docker-xfce/src/scripts/start-novnc.sh b/libs/xfce/src/scripts/start-novnc.sh similarity index 100% rename from libs/docker-xfce/src/scripts/start-novnc.sh rename to libs/xfce/src/scripts/start-novnc.sh diff --git a/libs/docker-xfce/src/scripts/start-vnc.sh b/libs/xfce/src/scripts/start-vnc.sh similarity index 100% rename from libs/docker-xfce/src/scripts/start-vnc.sh rename to libs/xfce/src/scripts/start-vnc.sh diff --git a/libs/docker-xfce/src/scripts/xstartup.sh b/libs/xfce/src/scripts/xstartup.sh similarity index 100% rename from libs/docker-xfce/src/scripts/xstartup.sh rename to libs/xfce/src/scripts/xstartup.sh diff --git a/libs/docker-xfce/src/supervisor/supervisord.conf b/libs/xfce/src/supervisor/supervisord.conf similarity index 100% rename from libs/docker-xfce/src/supervisor/supervisord.conf rename to libs/xfce/src/supervisor/supervisord.conf diff --git a/libs/docker-xfce/src/xfce-config/helpers.rc b/libs/xfce/src/xfce-config/helpers.rc similarity index 100% rename from libs/docker-xfce/src/xfce-config/helpers.rc rename to libs/xfce/src/xfce-config/helpers.rc diff --git a/libs/docker-xfce/src/xfce-config/xfce4-session.xml b/libs/xfce/src/xfce-config/xfce4-session.xml similarity index 100% rename from libs/docker-xfce/src/xfce-config/xfce4-session.xml rename to libs/xfce/src/xfce-config/xfce4-session.xml From bf57eee78b3202f2a87bec32347dea52eae6f49a Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sun, 12 Oct 2025 15:13:08 -0700 Subject: [PATCH 32/37] Bump 0.4.8 --- libs/python/computer/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/python/computer/pyproject.toml b/libs/python/computer/pyproject.toml index 4a9b41bb..3cf06f41 100644 --- a/libs/python/computer/pyproject.toml +++ b/libs/python/computer/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-computer" -version = "0.4.0" +version = "0.4.8" description = "Computer-Use Interface (CUI) framework powering Cua" readme = "README.md" authors = [ From cd8aacdf8dd87e4e3f3702e487a901d3bbdbd858 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sun, 12 Oct 2025 18:56:57 -0700 Subject: [PATCH 33/37] Fix XFCE Password Prompts and Lock Screen Issues --- libs/xfce/Dockerfile | 20 ++++++++++++++----- .../src/xfce-config/xfce4-power-manager.xml | 19 ++++++++++++++++++ libs/xfce/src/xfce-config/xfce4-session.xml | 4 ++++ 3 files changed, 38 insertions(+), 5 deletions(-) create mode 100644 libs/xfce/src/xfce-config/xfce4-power-manager.xml diff --git a/libs/xfce/Dockerfile b/libs/xfce/Dockerfile index f6411919..d44bdb95 100644 --- a/libs/xfce/Dockerfile +++ b/libs/xfce/Dockerfile @@ -22,7 +22,6 @@ RUN apt-get update && apt-get install -y \ # Desktop environment xfce4 \ xfce4-terminal \ - xfce4-goodies \ dbus-x11 \ # VNC server tigervnc-standalone-server \ @@ -59,12 +58,19 @@ RUN apt-get update && apt-get install -y \ zlib1g-dev \ && rm -rf /var/lib/apt/lists/* -# Remove power manager to avoid popup in container -RUN apt-get remove -y xfce4-power-manager xfce4-power-manager-data || true +# Remove screensavers and power manager to avoid popups and lock screens +RUN apt-get remove -y \ + xfce4-power-manager \ + xfce4-power-manager-data \ + xfce4-power-manager-plugins \ + xfce4-screensaver \ + light-locker \ + xscreensaver \ + xscreensaver-data || true # Create user after sudo is installed RUN useradd -m -s /bin/bash -G sudo cua && \ - echo "cua:password" | chpasswd && \ + echo "cua:cua" | chpasswd && \ echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers # Install Firefox from Mozilla PPA (snap-free) - inline to avoid script issues @@ -113,9 +119,13 @@ RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml $HOME/.config/xfce4 # Copy XFCE config to disable browser launching and welcome screens COPY --chown=cua:cua src/xfce-config/helpers.rc $HOME/.config/xfce4/helpers.rc COPY --chown=cua:cua src/xfce-config/xfce4-session.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-session.xml +COPY --chown=cua:cua src/xfce-config/xfce4-power-manager.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-power-manager.xml -# Disable panel plugins that might try to open browsers +# Disable autostart for screensaver, lock screen, and power manager RUN echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-tips-autostart.desktop && \ + echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-screensaver.desktop && \ + echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/light-locker.desktop && \ + echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-power-manager.desktop && \ chown -R cua:cua $HOME/.config # Create storage and shared directories, and Firefox cache directory diff --git a/libs/xfce/src/xfce-config/xfce4-power-manager.xml b/libs/xfce/src/xfce-config/xfce4-power-manager.xml new file mode 100644 index 00000000..56447c1e --- /dev/null +++ b/libs/xfce/src/xfce-config/xfce4-power-manager.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/libs/xfce/src/xfce-config/xfce4-session.xml b/libs/xfce/src/xfce-config/xfce4-session.xml index d7b834d9..5af36711 100644 --- a/libs/xfce/src/xfce-config/xfce4-session.xml +++ b/libs/xfce/src/xfce-config/xfce4-session.xml @@ -48,4 +48,8 @@ + + + + From 5fd6c937e88c7f058cafd7a5265332db9fa6ed43 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sun, 12 Oct 2025 21:56:08 -0700 Subject: [PATCH 34/37] Cleanup docs --- docs/content/docs/agent-sdk/agent-loops.mdx | 4 +- docs/content/docs/computer-sdk/computers.mdx | 158 ++++++++++++------- docs/content/docs/quickstart-cli.mdx | 54 ++++--- 3 files changed, 132 insertions(+), 84 deletions(-) diff --git a/docs/content/docs/agent-sdk/agent-loops.mdx b/docs/content/docs/agent-sdk/agent-loops.mdx index 6cd8daef..67fa966d 100644 --- a/docs/content/docs/agent-sdk/agent-loops.mdx +++ b/docs/content/docs/agent-sdk/agent-loops.mdx @@ -23,8 +23,8 @@ async def take_screenshot(): async with Computer( os_type="linux", provider_type="cloud", - name="m-linux-h3sj2qbz2a", - api_key="" + name="your-container-name", + api_key="your-api-key" ) as computer: agent = ComputerAgent( diff --git a/docs/content/docs/computer-sdk/computers.mdx b/docs/content/docs/computer-sdk/computers.mdx index 2a653f46..fe9155ce 100644 --- a/docs/content/docs/computer-sdk/computers.mdx +++ b/docs/content/docs/computer-sdk/computers.mdx @@ -9,9 +9,11 @@ Before we can automate apps using AI, we need to first connect to a Computer Ser Cua Computers are preconfigured virtual machines running the Computer Server. They can be either macOS, Linux, or Windows. They're found in either a cloud-native container, or on your host desktop. -## Cua Cloud Sandbox +## Cloud Sandbox -This is a Cloud Sandbox running the Computer Server. This is the easiest & safest way to get a cua computer, and can be done by going on the trycua.com website. +**Easiest & safest way to get started - works on any host OS** + +This is a Cloud Sandbox running the Computer Server. Get a container at [trycua.com](https://www.trycua.com/). @@ -45,16 +47,48 @@ This is a Cloud Sandbox running the Computer Server. This is the easiest & safes -## Cua Local Sandbox +## Linux on Docker -Cua provides local sandboxes using different providers depending on your host operating system: +**Run Linux desktop locally on macOS, Windows, or Linux hosts** - - +Cua provides two Docker images for running Linux desktops: + + + + + **Recommended for most use cases** - lightweight XFCE desktop with Firefox 1. Install Docker Desktop or Docker Engine - 2. Build or pull the CUA Ubuntu sandbox + 2. Pull the CUA XFCE image + + ```bash + docker pull --platform=linux/amd64 trycua/cua-xfce:latest + ``` + + 3. Connect with Computer + + ```python + from computer import Computer + + computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-xfce:latest", + name="my-xfce-container" + ) + + await computer.run() # Launch & connect to Docker sandbox + ``` + + + + + **Full-featured Ubuntu desktop** with additional applications + + 1. Install Docker Desktop or Docker Engine + + 2. Build or pull the CUA KASM image ```bash # Option 1: Pull from Docker Hub @@ -74,68 +108,70 @@ Cua provides local sandboxes using different providers depending on your host op os_type="linux", provider_type="docker", image="trycua/cua-ubuntu:latest", - name="my-cua-container" + name="my-kasm-container" ) await computer.run() # Launch & connect to Docker sandbox ``` - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) - 2. Install pywinsandbox dependency - - ```bash - pip install -U git+git://github.com/karkason/pywinsandbox.git - ``` - - 3. Windows Sandbox will be automatically configured when you run the CLI - - ```python - from computer import Computer - - computer = Computer( - os_type="windows", - provider_type="winsandbox", - ephemeral=True # Windows Sandbox is always ephemeral - ) - - await computer.run() # Launch & connect to Windows Sandbox - ``` - - - - - 1. Install lume cli - - ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" - ``` - - 2. Start a local cua sandbox - - ```bash - lume run macos-sequoia-cua:latest - ``` - - 3. Connect with Computer - - ```python - from computer import Computer - - computer = Computer( - os_type="macos", - provider_type="lume", - name="macos-sequoia-cua:latest" - ) - - await computer.run() # Launch & connect to the sandbox - ``` - - +## Windows Sandbox + +**Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11** + +1. Enable Windows Sandbox +2. Install pywinsandbox dependency + +```bash +pip install -U git+git://github.com/karkason/pywinsandbox.git +``` + +3. Connect with Computer + +```python +from computer import Computer + +computer = Computer( + os_type="windows", + provider_type="winsandbox", + ephemeral=True # Windows Sandbox is always ephemeral +) + +await computer.run() # Launch & connect to Windows Sandbox +``` + +## macOS VM + +**macOS hosts only - requires Lume CLI** + +1. Install lume cli + +```bash +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" +``` + +2. Start a local cua macOS VM + +```bash +lume run macos-sequoia-cua:latest +``` + +3. Connect with Computer + +```python +from computer import Computer + +computer = Computer( + os_type="macos", + provider_type="lume", + name="macos-sequoia-cua:latest" +) + +await computer.run() # Launch & connect to the sandbox +``` + ## Your host desktop You can also have agents control your desktop directly by running Computer Server without any containerization layer. Beware that AI models may perform risky actions. diff --git a/docs/content/docs/quickstart-cli.mdx b/docs/content/docs/quickstart-cli.mdx index a22907ce..88d5e893 100644 --- a/docs/content/docs/quickstart-cli.mdx +++ b/docs/content/docs/quickstart-cli.mdx @@ -23,39 +23,45 @@ cua combines Computer (interface) + Agent (AI) for automating desktop apps. The ## Set Up Your Computer Environment -Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup: +Choose how you want to run your cua computer. **Cloud Sandbox is recommended** for the easiest setup: + + + + + **Easiest & safest way to get started - works on any host OS** - - - - **Easiest & safest way to get started** - 1. Go to [trycua.com/signin](https://www.trycua.com/signin) 2. Navigate to **Dashboard > Containers > Create Instance** 3. Create a **Medium, Ubuntu 22** container 4. Note your container name and API key - + Your cloud container will be automatically configured and ready to use. - - - 1. Install lume cli + + + **Run Linux desktop locally on macOS, Windows, or Linux hosts** + + 1. Install Docker Desktop or Docker Engine + + 2. Pull the CUA XFCE container (lightweight desktop) ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" + docker pull --platform=linux/amd64 trycua/cua-xfce:latest ``` - 2. Start a local cua container + Or use KASM for a full-featured desktop: ```bash - lume run macos-sequoia-cua:latest + docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest ``` - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) + + + **Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11** + + 1. Enable Windows Sandbox 2. Install pywinsandbox dependency ```bash @@ -65,14 +71,20 @@ Choose how you want to run your cua computer. **Cloud containers are recommended 3. Windows Sandbox will be automatically configured when you run the CLI - - - 1. Install Docker Desktop or Docker Engine + - 2. Pull the CUA Ubuntu container + **macOS hosts only - requires Lume CLI** + + 1. Install lume cli ```bash - docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" + ``` + + 2. Start a local cua macOS VM + + ```bash + lume run macos-sequoia-cua:latest ``` From 192151112e28ecfbbca3d9ade3cfac2332282c79 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sun, 12 Oct 2025 21:59:49 -0700 Subject: [PATCH 35/37] Rename to sandbox --- README.md | 2 +- docs/content/docs/agent-sdk/agent-loops.mdx | 2 +- docs/content/docs/computer-sdk/computers.mdx | 4 ++-- docs/content/docs/quickstart-cli.mdx | 2 +- docs/content/docs/quickstart-devs.mdx | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index df8ab727..5f32c316 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ from computer import Computer async with Computer( os_type="linux", provider_type="cloud", - name="your-container-name", + name="your-sandbox-name", api_key="your-api-key" ) as computer: # Take screenshot diff --git a/docs/content/docs/agent-sdk/agent-loops.mdx b/docs/content/docs/agent-sdk/agent-loops.mdx index 67fa966d..db1d8455 100644 --- a/docs/content/docs/agent-sdk/agent-loops.mdx +++ b/docs/content/docs/agent-sdk/agent-loops.mdx @@ -23,7 +23,7 @@ async def take_screenshot(): async with Computer( os_type="linux", provider_type="cloud", - name="your-container-name", + name="your-sandbox-name", api_key="your-api-key" ) as computer: diff --git a/docs/content/docs/computer-sdk/computers.mdx b/docs/content/docs/computer-sdk/computers.mdx index fe9155ce..1c3558da 100644 --- a/docs/content/docs/computer-sdk/computers.mdx +++ b/docs/content/docs/computer-sdk/computers.mdx @@ -23,7 +23,7 @@ This is a Cloud Sandbox running the Computer Server. Get a container at [trycua. computer = Computer( os_type="linux", provider_type="cloud", - name="your-container-name", + name="your-sandbox-name", api_key="your-api-key" ) @@ -37,7 +37,7 @@ This is a Cloud Sandbox running the Computer Server. Get a container at [trycua. const computer = new Computer({ osType: OSType.LINUX, - name: "your-container-name", + name: "your-sandbox-name", apiKey: "your-api-key" }); diff --git a/docs/content/docs/quickstart-cli.mdx b/docs/content/docs/quickstart-cli.mdx index 88d5e893..7bf53773 100644 --- a/docs/content/docs/quickstart-cli.mdx +++ b/docs/content/docs/quickstart-cli.mdx @@ -312,7 +312,7 @@ python -m agent.cli omniparser+ollama_chat/llama3.2:latest If you haven't set up environment variables, the CLI will guide you through the setup: -1. **Container Name**: Enter your cua container name (or get one at [trycua.com](https://www.trycua.com/)) +1. **Sandbox Name**: Enter your cua sandbox name (or get one at [trycua.com](https://www.trycua.com/)) 2. **CUA API Key**: Enter your cua API key 3. **Provider API Key**: Enter your AI provider API key (OpenAI, Anthropic, etc.) diff --git a/docs/content/docs/quickstart-devs.mdx b/docs/content/docs/quickstart-devs.mdx index 5b7b28a3..8a9adea7 100644 --- a/docs/content/docs/quickstart-devs.mdx +++ b/docs/content/docs/quickstart-devs.mdx @@ -103,7 +103,7 @@ Connect to your Cua computer and perform basic interactions, such as taking scre computer = Computer( os_type="linux", provider_type="cloud", - name="your-container-name", + name="your-sandbox-name", api_key="your-api-key" ) await computer.run() # Connect to the sandbox @@ -189,7 +189,7 @@ Connect to your Cua computer and perform basic interactions, such as taking scre const computer = new Computer({ osType: OSType.LINUX, - name: "your-container-name", + name: "your-sandbox-name", apiKey: "your-api-key" }); await computer.run(); // Connect to the sandbox From cbd54fd9d637e7dbc8cb2db3ac274f9d3d6b6820 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 14 Oct 2025 11:29:23 -0400 Subject: [PATCH 36/37] added cloud vm management to docs --- .../docs/computer-sdk/cloud-vm-management.mdx | 240 ++++++++++++++++++ docs/content/docs/computer-sdk/meta.json | 1 + 2 files changed, 241 insertions(+) create mode 100644 docs/content/docs/computer-sdk/cloud-vm-management.mdx diff --git a/docs/content/docs/computer-sdk/cloud-vm-management.mdx b/docs/content/docs/computer-sdk/cloud-vm-management.mdx new file mode 100644 index 00000000..a48984ff --- /dev/null +++ b/docs/content/docs/computer-sdk/cloud-vm-management.mdx @@ -0,0 +1,240 @@ +--- +title: Cloud VM Management +description: Manage your Cua Cloud sandboxes (VMs) via Python SDK or HTTP API +--- + +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +Use these concise examples to manage your cloud sandboxes. Pick either the Python SDK or plain HTTP (curl) for each action. + +> You need a CUA Database API key. Set it as an environment variable `CUA_API_KEY`. + +## Status values +- `pending` – VM deployment in progress +- `running` – VM is active and accessible +- `stopped` – VM is stopped but not terminated +- `terminated` – VM has been permanently destroyed +- `failed` – VM deployment or operation failed + +--- + +## List VMs + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + # Optional: point to a different API base + # os.environ["CUA_API_BASE"] = "https://api.cua.ai" + + provider = CloudProvider(api_key=api_key, verbose=False) + async with provider: + vms = await provider.list_vms() + for vm in vms: + print({ + "name": vm["name"], + "status": vm["status"], + "api_url": vm.get("api_url"), + "vnc_url": vm.get("vnc_url"), + }) + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms" + ``` + + Example response: + ```json + [ + { + "name": "s-windows-x4snp46ebf", + "status": "running" + } + ] + ``` + + + + +--- + +## Start a VM +Provide the VM name you want to start. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" # e.g., "m-linux-96lcxd2c2k" + + provider = CloudProvider(api_key=api_key) + async with provider: + resp = await provider.run_vm(name) + print(resp) # { "name": name, "status": "starting" } + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -X POST \ + -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms/my-vm-name/start" -i + ``` + + Example response headers (no body): + ```text + HTTP/1.1 204 No Content + ``` + + + + +--- + +## Stop a VM +Stops the VM asynchronously. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" + + provider = CloudProvider(api_key=api_key) + async with provider: + resp = await provider.stop_vm(name) + print(resp) # { "name": name, "status": "stopping" } + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -X POST \ + -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms/my-vm-name/stop" + ``` + + Example response: + ```json + { "status": "stopping" } + ``` + + + + +--- + +## Restart a VM +Restarts the VM asynchronously. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" + + provider = CloudProvider(api_key=api_key) + async with provider: + resp = await provider.restart_vm(name) + print(resp) # { "name": name, "status": "restarting" } + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl -X POST \ + -H "Authorization: Bearer $CUA_API_KEY" \ + "https://api.cua.ai/v1/vms/my-vm-name/restart" + ``` + + Example response: + ```json + { "status": "restarting" } + ``` + + + + +--- + +## Query a VM by name +Query the computer-server running on the VM. Useful for checking details like status or OS type. + + + + + ```python + import os + import asyncio + from computer.providers.cloud.provider import CloudProvider + + async def main(): + api_key = os.getenv("CUA_API_KEY") or "your-api-key" + name = "my-vm-name" + + provider = CloudProvider(api_key=api_key) + async with provider: + info = await provider.get_vm(name) + print(info) + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + + + ```bash + curl "https://my-vm-name.containers.cloud.cua.ai:8443/status" + ``` + + Example response: + ```json + { "status": "ok", "os_type": "linux", "features": ["agent"] } + ``` + + + diff --git a/docs/content/docs/computer-sdk/meta.json b/docs/content/docs/computer-sdk/meta.json index 92e14612..f09c6057 100644 --- a/docs/content/docs/computer-sdk/meta.json +++ b/docs/content/docs/computer-sdk/meta.json @@ -3,6 +3,7 @@ "description": "Build computer-using agents with the Computer SDK", "pages": [ "computers", + "cloud-vm-management", "commands", "computer-ui", "sandboxed-python" From 86a339d1cd5a992e51ce144d573d7f1e36e68944 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 14 Oct 2025 11:30:40 -0400 Subject: [PATCH 37/37] add restart_vm to all providers --- examples/cloud_api_examples.py | 16 ++++++++-------- .../computer/providers/docker/provider.py | 3 +++ .../computer/computer/providers/lume/provider.py | 3 +++ .../computer/providers/lumier/provider.py | 3 +++ .../computer/providers/winsandbox/provider.py | 3 +++ 5 files changed, 20 insertions(+), 8 deletions(-) diff --git a/examples/cloud_api_examples.py b/examples/cloud_api_examples.py index 4fc71770..88b1ea66 100644 --- a/examples/cloud_api_examples.py +++ b/examples/cloud_api_examples.py @@ -57,14 +57,14 @@ async def main() -> None: # ) # # To probe a VM's status via its public hostname (if you know the name): - name = "m-linux-96lcxd2c2k" - info = await provider.get_vm(name) - print("get_vm info:\n", - f"name: {info['name']}\n", - f"status: {info['status']}\n", # running - f"api_url: {info.get('api_url')}\n", - f"os_type: {info.get('os_type')}\n", - ) + # name = "m-linux-96lcxd2c2k" + # info = await provider.get_vm(name) + # print("get_vm info:\n", + # f"name: {info['name']}\n", + # f"status: {info['status']}\n", # running + # f"api_url: {info.get('api_url')}\n", + # f"os_type: {info.get('os_type')}\n", + # ) if __name__ == "__main__": asyncio.run(main()) diff --git a/libs/python/computer/computer/providers/docker/provider.py b/libs/python/computer/computer/providers/docker/provider.py index 82ad411c..481bffaa 100644 --- a/libs/python/computer/computer/providers/docker/provider.py +++ b/libs/python/computer/computer/providers/docker/provider.py @@ -405,6 +405,9 @@ class DockerProvider(BaseVMProvider): "provider": "docker" } + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("DockerProvider does not support restarting VMs.") + async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]: """Update VM configuration. diff --git a/libs/python/computer/computer/providers/lume/provider.py b/libs/python/computer/computer/providers/lume/provider.py index 5816e53e..d651d2eb 100644 --- a/libs/python/computer/computer/providers/lume/provider.py +++ b/libs/python/computer/computer/providers/lume/provider.py @@ -486,6 +486,9 @@ class LumeProvider(BaseVMProvider): """Update VM configuration.""" return self._lume_api_update(name, update_opts, debug=self.verbose) + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("LumeProvider does not support restarting VMs.") + async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str: """Get the IP address of a VM, waiting indefinitely until it's available. diff --git a/libs/python/computer/computer/providers/lumier/provider.py b/libs/python/computer/computer/providers/lumier/provider.py index 67f348be..9b3e8c4d 100644 --- a/libs/python/computer/computer/providers/lumier/provider.py +++ b/libs/python/computer/computer/providers/lumier/provider.py @@ -836,6 +836,9 @@ class LumierProvider(BaseVMProvider): logger.error(error_msg) return error_msg + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("LumierProvider does not support restarting VMs.") + async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str: """Get the IP address of a VM, waiting indefinitely until it's available. diff --git a/libs/python/computer/computer/providers/winsandbox/provider.py b/libs/python/computer/computer/providers/winsandbox/provider.py index e072d900..1c9aec3c 100644 --- a/libs/python/computer/computer/providers/winsandbox/provider.py +++ b/libs/python/computer/computer/providers/winsandbox/provider.py @@ -390,6 +390,9 @@ class WinSandboxProvider(BaseVMProvider): "error": "Windows Sandbox does not support runtime configuration updates. " "Please stop and restart the sandbox with new configuration." } + + async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: + raise NotImplementedError("WinSandboxProvider does not support restarting VMs.") async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str: """Get the IP address of a VM, waiting indefinitely until it's available.