diff --git a/README.md b/README.md index fc0fd796..fe89f8fa 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ trycua%2Fcua | Trendshift -**c/ua** (pronounced "koo-ah") enables AI agents to control full operating systems in high-performance virtual containers with near-native speed on Apple Silicon. +**c/ua** ("koo-ah") is Docker for [Computer-Use Agents](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse) - it enables AI agents to control full operating systems in virtual containers and deploy them locally or to the cloud.
@@ -21,15 +21,14 @@ # šŸš€ Quick Start -Get started with a Computer-Use Agent UI and a VM with a single command: - +Get started with a Computer-Use Agent UI with a single command: ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/scripts/playground.sh)" ``` - This script will: +- Prompt you to choose between [C/ua Cloud Containers](https://trycua.com) or local macOS VMs - Install Lume CLI for VM management (if needed) - Pull the latest macOS CUA image (if needed) - Set up Python environment and install/update required packages @@ -81,21 +80,28 @@ from computer import Computer from agent import ComputerAgent, LLM async def main(): - # Start a local macOS VM with a 1024x768 display - async with Computer(os_type="macos", display="1024x768") as computer: + # Start a local macOS VM + computer = Computer(os_type="macos") - # Example: Direct control of a macOS VM with Computer - await computer.interface.left_click(100, 200) - await computer.interface.type_text("Hello, world!") - screenshot_bytes = await computer.interface.screenshot() - - # Example: Create and run an agent locally using mlx-community/UI-TARS-1.5-7B-6bit - agent = ComputerAgent( - computer=computer, - loop="UITARS", - model=LLM(provider="MLXVLM", name="mlx-community/UI-TARS-1.5-7B-6bit") - ) - await agent.run("Find the trycua/cua repository on GitHub and follow the quick start guide") + # Or with C/ua Cloud Container + computer = Computer( + os_type="linux", + api_key="your_cua_api_key_here", + name="your_container_name_here" + ) + + # Example: Direct control of a macOS VM with Computer + await computer.interface.left_click(100, 200) + await computer.interface.type_text("Hello, world!") + screenshot_bytes = await computer.interface.screenshot() + + # Example: Create and run an agent locally using mlx-community/UI-TARS-1.5-7B-6bit + agent = ComputerAgent( + computer=computer, + loop="UITARS", + model=LLM(provider="MLXVLM", name="mlx-community/UI-TARS-1.5-7B-6bit") + ) + await agent.run("Find the trycua/cua repository on GitHub and follow the quick start guide") main() ``` diff --git a/examples/agent_examples.py b/examples/agent_examples.py index 189ecddd..62573077 100644 --- a/examples/agent_examples.py +++ b/examples/agent_examples.py @@ -5,7 +5,7 @@ import logging import traceback import signal -from computer import Computer +from computer import Computer, VMProviderType # Import the unified agent class and types from agent import ComputerAgent, LLMProvider, LLM, AgentLoop @@ -23,76 +23,88 @@ async def run_agent_example(): print("\n=== Example: ComputerAgent with OpenAI and Omni provider ===") try: + # Create a local macOS computer + computer = Computer( + os_type="macos", + verbosity=logging.DEBUG, + ) + + # Create a remote Linux computer with C/ua + # computer = Computer( + # os_type="linux", + # api_key=os.getenv("CUA_API_KEY"), + # name=os.getenv("CUA_CONTAINER_NAME"), + # provider_type=VMProviderType.CLOUD, + # ) + # Create Computer instance with async context manager - async with Computer(verbosity=logging.DEBUG) as macos_computer: - # Create agent with loop and provider - agent = ComputerAgent( - computer=macos_computer, - # loop=AgentLoop.OPENAI, - # loop=AgentLoop.ANTHROPIC, - # loop=AgentLoop.UITARS, - loop=AgentLoop.OMNI, - # model=LLM(provider=LLMProvider.OPENAI), # No model name for Operator CUA - # model=LLM(provider=LLMProvider.OPENAI, name="gpt-4o"), - # model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"), - # model=LLM(provider=LLMProvider.OLLAMA, name="gemma3:4b-it-q4_K_M"), - # model=LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit"), - model=LLM( - provider=LLMProvider.OAICOMPAT, - name="gemma-3-12b-it", - provider_base_url="http://localhost:1234/v1", # LM Studio local endpoint - ), - save_trajectory=True, - only_n_most_recent_images=3, - verbosity=logging.DEBUG, - ) + agent = ComputerAgent( + computer=computer, + loop=AgentLoop.OPENAI, + # loop=AgentLoop.ANTHROPIC, + # loop=AgentLoop.UITARS, + # loop=AgentLoop.OMNI, + model=LLM(provider=LLMProvider.OPENAI), # No model name for Operator CUA + # model=LLM(provider=LLMProvider.OPENAI, name="gpt-4o"), + # model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"), + # model=LLM(provider=LLMProvider.OLLAMA, name="gemma3:4b-it-q4_K_M"), + # model=LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit"), + # model=LLM( + # provider=LLMProvider.OAICOMPAT, + # name="gemma-3-12b-it", + # provider_base_url="http://localhost:1234/v1", # LM Studio local endpoint + # ), + save_trajectory=True, + only_n_most_recent_images=3, + verbosity=logging.DEBUG, + ) - tasks = [ - "Look for a repository named trycua/cua on GitHub.", - "Check the open issues, open the most recent one and read it.", - "Clone the repository in users/lume/projects if it doesn't exist yet.", - "Open the repository with an app named Cursor (on the dock, black background and white cube icon).", - "From Cursor, open Composer if not already open.", - "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.", - ] + tasks = [ + "Look for a repository named trycua/cua on GitHub.", + "Check the open issues, open the most recent one and read it.", + "Clone the repository in users/lume/projects if it doesn't exist yet.", + "Open the repository with an app named Cursor (on the dock, black background and white cube icon).", + "From Cursor, open Composer if not already open.", + "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.", + ] - for i, task in enumerate(tasks): - print(f"\nExecuting task {i}/{len(tasks)}: {task}") - async for result in agent.run(task): - print("Response ID: ", result.get("id")) + for i, task in enumerate(tasks): + print(f"\nExecuting task {i}/{len(tasks)}: {task}") + async for result in agent.run(task): + print("Response ID: ", result.get("id")) - # Print detailed usage information - usage = result.get("usage") - if usage: - print("\nUsage Details:") - print(f" Input Tokens: {usage.get('input_tokens')}") - if "input_tokens_details" in usage: - print(f" Input Tokens Details: {usage.get('input_tokens_details')}") - print(f" Output Tokens: {usage.get('output_tokens')}") - if "output_tokens_details" in usage: - print(f" Output Tokens Details: {usage.get('output_tokens_details')}") - print(f" Total Tokens: {usage.get('total_tokens')}") + # Print detailed usage information + usage = result.get("usage") + if usage: + print("\nUsage Details:") + print(f" Input Tokens: {usage.get('input_tokens')}") + if "input_tokens_details" in usage: + print(f" Input Tokens Details: {usage.get('input_tokens_details')}") + print(f" Output Tokens: {usage.get('output_tokens')}") + if "output_tokens_details" in usage: + print(f" Output Tokens Details: {usage.get('output_tokens_details')}") + print(f" Total Tokens: {usage.get('total_tokens')}") - print("Response Text: ", result.get("text")) + print("Response Text: ", result.get("text")) - # Print tools information - tools = result.get("tools") - if tools: - print("\nTools:") - print(tools) + # Print tools information + tools = result.get("tools") + if tools: + print("\nTools:") + print(tools) - # Print reasoning and tool call outputs - outputs = result.get("output", []) - for output in outputs: - output_type = output.get("type") - if output_type == "reasoning": - print("\nReasoning Output:") - print(output) - elif output_type == "computer_call": - print("\nTool Call Output:") - print(output) + # Print reasoning and tool call outputs + outputs = result.get("output", []) + for output in outputs: + output_type = output.get("type") + if output_type == "reasoning": + print("\nReasoning Output:") + print(output) + elif output_type == "computer_call": + print("\nTool Call Output:") + print(output) - print(f"\nāœ… Task {i+1}/{len(tasks)} completed: {task}") + print(f"\nāœ… Task {i+1}/{len(tasks)} completed: {task}") except Exception as e: logger.error(f"Error in run_agent_example: {e}") diff --git a/examples/computer_examples.py b/examples/computer_examples.py index 7b9a1d4d..227beb8c 100644 --- a/examples/computer_examples.py +++ b/examples/computer_examples.py @@ -16,17 +16,18 @@ load_dotenv(env_file) pythonpath = os.environ.get("PYTHONPATH", "") for path in pythonpath.split(":"): if path and path not in sys.path: - sys.path.append(path) + sys.path.insert(0, path) # Insert at beginning to prioritize print(f"Added to sys.path: {path}") -from computer import Computer, VMProviderType +from computer.computer import Computer +from computer.providers.base import VMProviderType from computer.logger import LogLevel async def main(): try: print("\n=== Using direct initialization ===") - # Create computer with configured host + # Create a local macOS computer computer = Computer( display="1024x768", memory="8GB", @@ -41,7 +42,8 @@ async def main(): ], ephemeral=False, ) - + + # Create a remote Linux computer with C/ua # computer = Computer( # os_type="linux", # api_key=os.getenv("CUA_API_KEY"), @@ -54,8 +56,15 @@ async def main(): await computer.run() screenshot = await computer.interface.screenshot() - with open(Path("~/cua/examples/screenshot.png").expanduser(), "wb") as f: + + # Create output directory if it doesn't exist + output_dir = Path("./output") + output_dir.mkdir(exist_ok=True) + + screenshot_path = output_dir / "screenshot.png" + with open(screenshot_path, "wb") as f: f.write(screenshot) + print(f"Screenshot saved to: {screenshot_path.absolute()}") # await computer.interface.hotkey("command", "space") diff --git a/libs/computer/computer/interface/linux.py b/libs/computer/computer/interface/linux.py index e2389bdd..401730ca 100644 --- a/libs/computer/computer/interface/linux.py +++ b/libs/computer/computer/interface/linux.py @@ -26,6 +26,7 @@ class LinuxComputerInterface(BaseComputerInterface): self._reconnect_delay = 1 # Start with 1 second delay self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts self._log_connection_attempts = True # Flag to control connection attempt logging + self._authenticated = False # Track authentication status # Set logger name for Linux interface self.logger = Logger("cua.interface.linux", LogLevel.NORMAL) @@ -89,34 +90,14 @@ class LinuxComputerInterface(BaseComputerInterface): ) self.logger.info("WebSocket connection established") - # If api_key and vm_name are provided, perform authentication handshake - if self.api_key and self.vm_name: - self.logger.info("Performing authentication handshake...") - auth_message = { - "command": "authenticate", - "params": { - "api_key": self.api_key, - "container_name": self.vm_name - } - } - await self._ws.send(json.dumps(auth_message)) - - # Wait for authentication response - auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10) - auth_result = json.loads(auth_response) - - if not auth_result.get("success"): - error_msg = auth_result.get("error", "Authentication failed") - self.logger.error(f"Authentication failed: {error_msg}") - await self._ws.close() - self._ws = None - raise ConnectionError(f"Authentication failed: {error_msg}") - - self.logger.info("Authentication successful") + # Authentication will be handled by the first command that needs it + # Don't do authentication here to avoid recv conflicts self._reconnect_delay = 1 # Reset reconnect delay on successful connection self._last_ping = time.time() retry_count = 0 # Reset retry count on successful connection + self._authenticated = False # Reset auth status on new connection + except (asyncio.TimeoutError, websockets.exceptions.WebSocketException) as e: next_retry = self._reconnect_delay @@ -140,13 +121,6 @@ class LinuxComputerInterface(BaseComputerInterface): pass self._ws = None - # Use exponential backoff for connection retries - await asyncio.sleep(self._reconnect_delay) - self._reconnect_delay = min( - self._reconnect_delay * 2, self._max_reconnect_delay - ) - continue - # Regular ping to check connection if self._ws and self._ws.state == websockets.protocol.State.OPEN: try: @@ -225,6 +199,31 @@ class LinuxComputerInterface(BaseComputerInterface): if not self._ws: raise ConnectionError("WebSocket connection is not established") + # Handle authentication if needed + if self.api_key and self.vm_name and not self._authenticated: + self.logger.info("Performing authentication handshake...") + auth_message = { + "command": "authenticate", + "params": { + "api_key": self.api_key, + "container_name": self.vm_name + } + } + await self._ws.send(json.dumps(auth_message)) + + # Wait for authentication response + auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10) + auth_result = json.loads(auth_response) + + if not auth_result.get("success"): + error_msg = auth_result.get("error", "Authentication failed") + self.logger.error(f"Authentication failed: {error_msg}") + self._authenticated = False + raise ConnectionError(f"Authentication failed: {error_msg}") + + self.logger.info("Authentication successful") + self._authenticated = True + message = {"command": command, "params": params or {}} await self._ws.send(json.dumps(message)) response = await asyncio.wait_for(self._ws.recv(), timeout=30) @@ -245,9 +244,7 @@ class LinuxComputerInterface(BaseComputerInterface): f"Failed to send command '{command}' after {max_retries} retries" ) self.logger.debug(f"Command failure details: {e}") - raise - - raise last_error if last_error else RuntimeError("Failed to send command") + raise last_error if last_error else RuntimeError("Failed to send command") async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0): """Wait for WebSocket connection to become available.""" diff --git a/scripts/playground.sh b/scripts/playground.sh index c3f33501..8193f022 100755 --- a/scripts/playground.sh +++ b/scripts/playground.sh @@ -2,63 +2,137 @@ set -e -echo "šŸš€ Setting up CUA playground environment..." +echo "šŸš€ Setting up C/ua playground environment..." -# Check for Apple Silicon Mac -if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then - echo "āŒ This script requires an Apple Silicon Mac (M1/M2/M3/M4)." - exit 1 -fi - -# Check for macOS 15 (Sequoia) or newer -OSVERSION=$(sw_vers -productVersion) -if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then - echo "āŒ This script requires macOS 15 (Sequoia) or newer. You have $OSVERSION." - exit 1 -fi - -# Create a temporary directory for our work -TMP_DIR=$(mktemp -d) -cd "$TMP_DIR" +# Save the original working directory +ORIGINAL_DIR="$(pwd)" # Function to clean up on exit cleanup() { cd ~ - rm -rf "$TMP_DIR" + rm -rf "$TMP_DIR" 2>/dev/null || true } + +# Create a temporary directory for our work +TMP_DIR=$(mktemp -d) +cd "$TMP_DIR" trap cleanup EXIT -# Install Lume if not already installed -if ! command -v lume &> /dev/null; then - echo "šŸ“¦ Installing Lume CLI..." - curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash +# Ask user to choose between local macOS VMs or C/ua Cloud Containers +echo "" +echo "Choose your C/ua setup:" +echo "1) ā˜ļø C/ua Cloud Containers (works on any system)" +echo "2) šŸ–„ļø Local macOS VMs (requires Apple Silicon Mac + macOS 15+)" +echo "" +read -p "Enter your choice (1 or 2): " CHOICE + +if [[ "$CHOICE" == "1" ]]; then + # C/ua Cloud Container setup + echo "" + echo "ā˜ļø Setting up C/ua Cloud Containers..." + echo "" - # Add lume to PATH for this session if it's not already there - if ! command -v lume &> /dev/null; then - export PATH="$PATH:$HOME/.local/bin" + # Check if existing .env.local already has CUA_API_KEY (check current dir and demo dir) + DEMO_DIR="$HOME/.cua-demo" + # Look for .env.local in the original working directory (before cd to temp dir) + CURRENT_ENV_FILE="$ORIGINAL_DIR/.env.local" + DEMO_ENV_FILE="$DEMO_DIR/.env.local" + + CUA_API_KEY="" + + # First check current directory + if [[ -f "$CURRENT_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$CURRENT_ENV_FILE"; then + EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$CURRENT_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs) + if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then + CUA_API_KEY="$EXISTING_CUA_KEY" + fi fi + + # Then check demo directory if not found in current dir + if [[ -z "$CUA_API_KEY" ]] && [[ -f "$DEMO_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$DEMO_ENV_FILE"; then + EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$DEMO_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs) + if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then + CUA_API_KEY="$EXISTING_CUA_KEY" + fi + fi + + # If no valid API key found, prompt for one + if [[ -z "$CUA_API_KEY" ]]; then + echo "To use C/ua Cloud Containers, you need to:" + echo "1. Sign up at https://trycua.com" + echo "2. Create a Cloud Container" + echo "3. Generate an Api Key" + echo "" + read -p "Enter your C/ua Api Key: " CUA_API_KEY + + if [[ -z "$CUA_API_KEY" ]]; then + echo "āŒ C/ua Api Key is required for Cloud Containers." + exit 1 + fi + fi + + USE_CLOUD=true + +elif [[ "$CHOICE" == "2" ]]; then + # Local macOS VM setup + echo "" + echo "šŸ–„ļø Setting up local macOS VMs..." + + # Check for Apple Silicon Mac + if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then + echo "āŒ Local macOS VMs require an Apple Silicon Mac (M1/M2/M3/M4)." + echo "šŸ’” Consider using C/ua Cloud Containers instead (option 1)." + exit 1 + fi + + # Check for macOS 15 (Sequoia) or newer + OSVERSION=$(sw_vers -productVersion) + if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then + echo "āŒ Local macOS VMs require macOS 15 (Sequoia) or newer. You have $OSVERSION." + echo "šŸ’” Consider using C/ua Cloud Containers instead (option 1)." + exit 1 + fi + + USE_CLOUD=false + +else + echo "āŒ Invalid choice. Please run the script again and choose 1 or 2." + exit 1 fi -# Pull the macOS CUA image if not already present -if ! lume ls | grep -q "macos-sequoia-cua"; then - # Check available disk space - IMAGE_SIZE_GB=30 - AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}') - AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024)) - - echo "šŸ“Š The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space." - echo " You currently have ${AVAILABLE_SPACE_GB}GB available on your system." - - # Prompt for confirmation - read -p " Continue? [y]/n: " CONTINUE - CONTINUE=${CONTINUE:-y} - - if [[ $CONTINUE =~ ^[Yy]$ ]]; then - echo "šŸ“„ Pulling macOS CUA image (this may take a while)..." - lume pull macos-sequoia-cua:latest - else - echo "āŒ Installation cancelled." - exit 1 +# Install Lume if not already installed (only for local VMs) +if [[ "$USE_CLOUD" == "false" ]]; then + if ! command -v lume &> /dev/null; then + echo "šŸ“¦ Installing Lume CLI..." + curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash + + # Add lume to PATH for this session if it's not already there + if ! command -v lume &> /dev/null; then + export PATH="$PATH:$HOME/.local/bin" + fi + fi + + # Pull the macOS CUA image if not already present + if ! lume ls | grep -q "macos-sequoia-cua"; then + # Check available disk space + IMAGE_SIZE_GB=30 + AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}') + AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024)) + + echo "šŸ“Š The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space." + echo " You currently have ${AVAILABLE_SPACE_GB}GB available on your system." + + # Prompt for confirmation + read -p " Continue? [y]/n: " CONTINUE + CONTINUE=${CONTINUE:-y} + + if [[ $CONTINUE =~ ^[Yy]$ ]]; then + echo "šŸ“„ Pulling macOS CUA image (this may take a while)..." + lume pull macos-sequoia-cua:latest + else + echo "āŒ Installation cancelled." + exit 1 + fi fi fi @@ -87,7 +161,7 @@ fi source "$VENV_DIR/bin/activate" # Install required packages -echo "šŸ“¦ Updating CUA packages..." +echo "šŸ“¦ Updating C/ua packages..." pip install -U pip setuptools wheel Cmake pip install -U cua-computer "cua-agent[all]" @@ -98,22 +172,30 @@ pip install git+https://github.com/ddupont808/mlx-vlm.git@stable/fix/qwen2-posit DEMO_DIR="$HOME/.cua-demo" mkdir -p "$DEMO_DIR" -cat > "$DEMO_DIR/run_demo.py" << 'EOF' -import asyncio -import os -from computer import Computer -from agent import ComputerAgent, LLM, AgentLoop, LLMProvider -from agent.ui.gradio.app import create_gradio_ui - -# Try to load API keys from environment -api_key = os.environ.get("OPENAI_API_KEY", "") -if not api_key: - print("\nāš ļø No OpenAI API key found. You'll need to provide one in the UI.") - -# Launch the Gradio UI and open it in the browser -app = create_gradio_ui() -app.launch(share=False, inbrowser=True) +# Create .env.local file with API keys (only if it doesn't exist) +if [[ ! -f "$DEMO_DIR/.env.local" ]]; then + cat > "$DEMO_DIR/.env.local" << EOF +# Add your API keys here +OPENAI_API_KEY=your_openai_api_key_here +ANTHROPIC_API_KEY=your_anthropic_api_key_here +CUA_API_KEY=your_cua_api_key_here EOF + echo "šŸ“ Created .env.local file with API key placeholders" +else + echo "šŸ“ Found existing .env.local file - keeping your current settings" +fi + +if [[ "$USE_CLOUD" == "true" ]]; then + # Add CUA API key to .env.local if not already present + if ! grep -q "CUA_API_KEY" "$DEMO_DIR/.env.local"; then + echo "CUA_API_KEY=$CUA_API_KEY" >> "$DEMO_DIR/.env.local" + echo "šŸ”‘ Added CUA_API_KEY to .env.local" + elif grep -q "CUA_API_KEY=your_cua_api_key_here" "$DEMO_DIR/.env.local"; then + # Update placeholder with actual key + sed -i.bak "s/CUA_API_KEY=your_cua_api_key_here/CUA_API_KEY=$CUA_API_KEY/" "$DEMO_DIR/.env.local" + echo "šŸ”‘ Updated CUA_API_KEY in .env.local" + fi +fi # Create a convenience script to run the demo cat > "$DEMO_DIR/start_demo.sh" << EOF @@ -125,20 +207,91 @@ EOF chmod +x "$DEMO_DIR/start_demo.sh" echo "āœ… Setup complete!" -echo "šŸ–„ļø You can start the CUA playground by running: $DEMO_DIR/start_demo.sh" -# Check if the VM is running -echo "šŸ” Checking if the macOS CUA VM is running..." -VM_RUNNING=$(lume ls | grep "macos-sequoia-cua" | grep "running" || echo "") +if [[ "$USE_CLOUD" == "true" ]]; then + # Create run_demo.py for cloud containers + cat > "$DEMO_DIR/run_demo.py" << 'EOF' +import asyncio +import os +from pathlib import Path +from dotenv import load_dotenv +from computer import Computer +from agent import ComputerAgent, LLM, AgentLoop, LLMProvider +from agent.ui.gradio.app import create_gradio_ui -if [ -z "$VM_RUNNING" ]; then - echo "šŸš€ Starting the macOS CUA VM in the background..." - lume run macos-sequoia-cua:latest & - # Wait a moment for the VM to initialize - sleep 5 - echo "āœ… VM started successfully." +# Load environment variables from .env.local +load_dotenv(Path(__file__).parent / ".env.local") + +# Check for required API keys +cua_api_key = os.environ.get("CUA_API_KEY", "") +if not cua_api_key: + print("\nāŒ CUA_API_KEY not found in .env.local file.") + print("Please add your CUA API key to the .env.local file.") + exit(1) + +openai_key = os.environ.get("OPENAI_API_KEY", "") +anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "") + +if not openai_key and not anthropic_key: + print("\nāš ļø No OpenAI or Anthropic API keys found in .env.local.") + print("Please add at least one API key to use AI agents.") + +print("šŸš€ Starting CUA playground with Cloud Containers...") +print("šŸ“ Edit .env.local to update your API keys") + +# Launch the Gradio UI and open it in the browser +app = create_gradio_ui() +app.launch(share=False, inbrowser=True) +EOF else - echo "āœ… macOS CUA VM is already running." + # Create run_demo.py for local macOS VMs + cat > "$DEMO_DIR/run_demo.py" << 'EOF' +import asyncio +import os +from pathlib import Path +from dotenv import load_dotenv +from computer import Computer +from agent import ComputerAgent, LLM, AgentLoop, LLMProvider +from agent.ui.gradio.app import create_gradio_ui + +# Load environment variables from .env.local +load_dotenv(Path(__file__).parent / ".env.local") + +# Try to load API keys from environment +openai_key = os.environ.get("OPENAI_API_KEY", "") +anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "") + +if not openai_key and not anthropic_key: + print("\nāš ļø No OpenAI or Anthropic API keys found in .env.local.") + print("Please add at least one API key to use AI agents.") + +print("šŸš€ Starting CUA playground with local macOS VMs...") +print("šŸ“ Edit .env.local to update your API keys") + +# Launch the Gradio UI and open it in the browser +app = create_gradio_ui() +app.launch(share=False, inbrowser=True) +EOF +fi + +echo "ā˜ļø CUA Cloud Container setup complete!" +echo "šŸ“ Edit $DEMO_DIR/.env.local to update your API keys" +echo "šŸ–„ļø Start the playground by running: $DEMO_DIR/start_demo.sh" + +# Check if the VM is running (only for local setup) +if [[ "$USE_CLOUD" == "false" ]]; then + echo "šŸ” Checking if the macOS CUA VM is running..." + VM_RUNNING=$(lume ls | grep "macos-sequoia-cua" | grep "running" || echo "") + + if [ -z "$VM_RUNNING" ]; then + echo "šŸš€ Starting the macOS CUA VM in the background..." + lume run macos-sequoia-cua:latest & + # Wait a moment for the VM to initialize + sleep 5 + echo "āœ… VM started successfully." + else + echo "āœ… macOS CUA VM is already running." + fi fi # Ask if the user wants to start the demo now