Merge branch 'main' into feat/multiplatform

2026-02-16 03:18:53 -06:00 · 2025-05-28 11:53:35 -04:00
parent c4130a3b9b 6a17e45c57
commit b752648458
6 changed files with 377 additions and 193 deletions
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
  <a href="https://trendshift.io/repositories/13685" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13685" alt="trycua%2Fcua | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 </div>

-**c/ua** (pronounced "koo-ah") enables AI agents to control full operating systems in high-performance virtual containers with near-native speed on Apple Silicon.
+**c/ua** ("koo-ah") is Docker for [Computer-Use Agents](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse) - it enables AI agents to control full operating systems in virtual containers and deploy them locally or to the cloud.

 <div align="center">
  <video src="https://github.com/user-attachments/assets/c619b4ea-bb8e-4382-860e-f3757e36af20" width="800" controls></video>
@@ -21,15 +21,14 @@

 # 🚀 Quick Start

-Get started with a Computer-Use Agent UI and a VM with a single command:
-
+Get started with a Computer-Use Agent UI with a single command:

 ```bash
 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/scripts/playground.sh)"
 ```

-
 This script will:
+- Prompt you to choose between [C/ua Cloud Containers](https://trycua.com) or local macOS VMs
 - Install Lume CLI for VM management (if needed)
 - Pull the latest macOS CUA image (if needed)
 - Set up Python environment and install/update required packages
@@ -81,21 +80,29 @@ from computer import Computer
 from agent import ComputerAgent, LLM

 async def main():
-    # Start a local macOS VM with a 1024x768 display
-    async with Computer(os_type="macos", display="1024x768") as computer:
+    # Start a local macOS VM
+    computer = Computer(os_type="macos")
+    await computer.run()

-        # Example: Direct control of a macOS VM with Computer
-        await computer.interface.left_click(100, 200)
-        await computer.interface.type_text("Hello, world!")
-        screenshot_bytes = await computer.interface.screenshot()
-        
-        # Example: Create and run an agent locally using mlx-community/UI-TARS-1.5-7B-6bit
-        agent = ComputerAgent(
-          computer=computer,
-          loop="UITARS",
-          model=LLM(provider="MLXVLM", name="mlx-community/UI-TARS-1.5-7B-6bit")
-        )
-        await agent.run("Find the trycua/cua repository on GitHub and follow the quick start guide")
+    # Or with C/ua Cloud Container
+    computer = Computer(
+      os_type="linux",
+      api_key="your_cua_api_key_here",
+      name="your_container_name_here"
+    )
+
+    # Example: Direct control of a macOS VM with Computer
+    await computer.interface.left_click(100, 200)
+    await computer.interface.type_text("Hello, world!")
+    screenshot_bytes = await computer.interface.screenshot()
+    
+    # Example: Create and run an agent locally using mlx-community/UI-TARS-1.5-7B-6bit
+    agent = ComputerAgent(
+      computer=computer,
+      loop="UITARS",
+      model=LLM(provider="MLXVLM", name="mlx-community/UI-TARS-1.5-7B-6bit")
+    )
+    await agent.run("Find the trycua/cua repository on GitHub and follow the quick start guide")

 main()
 ```
--- a/examples/agent_examples.py
+++ b/examples/agent_examples.py
@@ -5,7 +5,7 @@ import logging
 import traceback
 import signal

-from computer import Computer
+from computer import Computer, VMProviderType

 # Import the unified agent class and types
 from agent import ComputerAgent, LLMProvider, LLM, AgentLoop
@@ -23,76 +23,88 @@ async def run_agent_example():
    print("\n=== Example: ComputerAgent with OpenAI and Omni provider ===")

    try:
+        # Create a local macOS computer
+        computer = Computer(
+            os_type="macos",
+            verbosity=logging.DEBUG,
+        )
+
+        # Create a remote Linux computer with C/ua
+        # computer = Computer(
+        #     os_type="linux",
+        #     api_key=os.getenv("CUA_API_KEY"),
+        #     name=os.getenv("CUA_CONTAINER_NAME"),
+        #     provider_type=VMProviderType.CLOUD,
+        # )
+
        # Create Computer instance with async context manager
-        async with Computer(verbosity=logging.DEBUG) as macos_computer:
-            # Create agent with loop and provider
-            agent = ComputerAgent(
-                computer=macos_computer,
-                # loop=AgentLoop.OPENAI,
-                # loop=AgentLoop.ANTHROPIC,
-                # loop=AgentLoop.UITARS,
-                loop=AgentLoop.OMNI,
-                # model=LLM(provider=LLMProvider.OPENAI),  # No model name for Operator CUA
-                # model=LLM(provider=LLMProvider.OPENAI, name="gpt-4o"),
-                # model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"),
-                # model=LLM(provider=LLMProvider.OLLAMA, name="gemma3:4b-it-q4_K_M"),
-                # model=LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit"),
-                model=LLM(
-                    provider=LLMProvider.OAICOMPAT,
-                    name="gemma-3-12b-it",
-                    provider_base_url="http://localhost:1234/v1",  # LM Studio local endpoint
-                ),
-                save_trajectory=True,
-                only_n_most_recent_images=3,
-                verbosity=logging.DEBUG,
-            )
+        agent = ComputerAgent(
+            computer=computer,
+            loop=AgentLoop.OPENAI,
+            # loop=AgentLoop.ANTHROPIC,
+            # loop=AgentLoop.UITARS,
+            # loop=AgentLoop.OMNI,
+            model=LLM(provider=LLMProvider.OPENAI),  # No model name for Operator CUA
+            # model=LLM(provider=LLMProvider.OPENAI, name="gpt-4o"),
+            # model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"),
+            # model=LLM(provider=LLMProvider.OLLAMA, name="gemma3:4b-it-q4_K_M"),
+            # model=LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit"),
+            # model=LLM(
+            #     provider=LLMProvider.OAICOMPAT,
+            #     name="gemma-3-12b-it",
+            #     provider_base_url="http://localhost:1234/v1",  # LM Studio local endpoint
+            # ),
+            save_trajectory=True,
+            only_n_most_recent_images=3,
+            verbosity=logging.DEBUG,
+        )

-            tasks = [
-                "Look for a repository named trycua/cua on GitHub.",
-                "Check the open issues, open the most recent one and read it.",
-                "Clone the repository in users/lume/projects if it doesn't exist yet.",
-                "Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
-                "From Cursor, open Composer if not already open.",
-                "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
-            ]
+        tasks = [
+            "Look for a repository named trycua/cua on GitHub.",
+            "Check the open issues, open the most recent one and read it.",
+            "Clone the repository in users/lume/projects if it doesn't exist yet.",
+            "Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
+            "From Cursor, open Composer if not already open.",
+            "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
+        ]

-            for i, task in enumerate(tasks):
-                print(f"\nExecuting task {i}/{len(tasks)}: {task}")
-                async for result in agent.run(task):
-                    print("Response ID: ", result.get("id"))
+        for i, task in enumerate(tasks):
+            print(f"\nExecuting task {i}/{len(tasks)}: {task}")
+            async for result in agent.run(task):
+                print("Response ID: ", result.get("id"))

-                    # Print detailed usage information
-                    usage = result.get("usage")
-                    if usage:
-                        print("\nUsage Details:")
-                        print(f"  Input Tokens: {usage.get('input_tokens')}")
-                        if "input_tokens_details" in usage:
-                            print(f"  Input Tokens Details: {usage.get('input_tokens_details')}")
-                        print(f"  Output Tokens: {usage.get('output_tokens')}")
-                        if "output_tokens_details" in usage:
-                            print(f"  Output Tokens Details: {usage.get('output_tokens_details')}")
-                        print(f"  Total Tokens: {usage.get('total_tokens')}")
+                # Print detailed usage information
+                usage = result.get("usage")
+                if usage:
+                    print("\nUsage Details:")
+                    print(f"  Input Tokens: {usage.get('input_tokens')}")
+                    if "input_tokens_details" in usage:
+                        print(f"  Input Tokens Details: {usage.get('input_tokens_details')}")
+                    print(f"  Output Tokens: {usage.get('output_tokens')}")
+                    if "output_tokens_details" in usage:
+                        print(f"  Output Tokens Details: {usage.get('output_tokens_details')}")
+                    print(f"  Total Tokens: {usage.get('total_tokens')}")

-                    print("Response Text: ", result.get("text"))
+                print("Response Text: ", result.get("text"))

-                    # Print tools information
-                    tools = result.get("tools")
-                    if tools:
-                        print("\nTools:")
-                        print(tools)
+                # Print tools information
+                tools = result.get("tools")
+                if tools:
+                    print("\nTools:")
+                    print(tools)

-                    # Print reasoning and tool call outputs
-                    outputs = result.get("output", [])
-                    for output in outputs:
-                        output_type = output.get("type")
-                        if output_type == "reasoning":
-                            print("\nReasoning Output:")
-                            print(output)
-                        elif output_type == "computer_call":
-                            print("\nTool Call Output:")
-                            print(output)
+                # Print reasoning and tool call outputs
+                outputs = result.get("output", [])
+                for output in outputs:
+                    output_type = output.get("type")
+                    if output_type == "reasoning":
+                        print("\nReasoning Output:")
+                        print(output)
+                    elif output_type == "computer_call":
+                        print("\nTool Call Output:")
+                        print(output)

-                print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")
+            print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")

    except Exception as e:
        logger.error(f"Error in run_agent_example: {e}")
--- a/examples/computer_examples.py
+++ b/examples/computer_examples.py
@@ -16,17 +16,18 @@ load_dotenv(env_file)
 pythonpath = os.environ.get("PYTHONPATH", "")
 for path in pythonpath.split(":"):
    if path and path not in sys.path:
-        sys.path.append(path)
+        sys.path.insert(0, path)  # Insert at beginning to prioritize
        print(f"Added to sys.path: {path}")

-from computer import Computer, VMProviderType
+from computer.computer import Computer
+from computer.providers.base import VMProviderType
 from computer.logger import LogLevel

 async def main():
    try:
        print("\n=== Using direct initialization ===")

-        # Create computer with configured host
+        # Create a local macOS computer
        computer = Computer(
            display="1024x768", 
            memory="8GB", 
@@ -41,7 +42,8 @@ async def main():
            ],
            ephemeral=False,
        )
-        
+
+        # Create a remote Linux computer with C/ua
        # computer = Computer(
        #     os_type="linux",
        #     api_key=os.getenv("CUA_API_KEY"),
@@ -54,8 +56,15 @@ async def main():
            await computer.run()
            
            screenshot = await computer.interface.screenshot()
-            with open(Path("~/cua/examples/screenshot.png").expanduser(), "wb") as f:
+            
+            # Create output directory if it doesn't exist
+            output_dir = Path("./output")
+            output_dir.mkdir(exist_ok=True)
+            
+            screenshot_path = output_dir / "screenshot.png"
+            with open(screenshot_path, "wb") as f:
                f.write(screenshot)
+            print(f"Screenshot saved to: {screenshot_path.absolute()}")
            
            # await computer.interface.hotkey("command", "space")

--- a/libs/agent/agent/ui/gradio/app.py
+++ b/libs/agent/agent/ui/gradio/app.py
@@ -446,7 +446,8 @@ def create_gradio_ui(
    # Check for API keys
    openai_api_key = os.environ.get("OPENAI_API_KEY", "")
    anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
-
+    cua_api_key = os.environ.get("CUA_API_KEY", "")
+    
    # Always show models regardless of API key availability
    openai_models = ["OpenAI: Computer-Use Preview"]
    anthropic_models = [
@@ -464,9 +465,11 @@ def create_gradio_ui(
    # Check if API keys are available
    has_openai_key = bool(openai_api_key)
    has_anthropic_key = bool(anthropic_api_key)
+    has_cua_key = bool(cua_api_key)
    
    print("has_openai_key", has_openai_key)
    print("has_anthropic_key", has_anthropic_key)
+    print("has_cua_key", has_cua_key)

    # Get Ollama models for OMNI
    ollama_models = get_ollama_models()
@@ -752,6 +755,7 @@ if __name__ == "__main__":
                        value="",
                        type="password",
                        info="Required for cloud provider",
+                        visible=(not has_cua_key)
                    )
                    
                with gr.Accordion("Agent Configuration", open=True):
@@ -1176,6 +1180,8 @@ if __name__ == "__main__":
                        else:
                            # For Ollama or default OAICOMPAT (without custom key), no key needed/expected
                            api_key = ""
+                            
+                        cua_cloud_api_key = cua_cloud_api_key or os.environ.get("CUA_API_KEY", "")

                        # --- Save Settings Before Running Agent ---
                        current_settings = {
--- a/libs/computer/computer/interface/linux.py
+++ b/libs/computer/computer/interface/linux.py
@@ -26,6 +26,7 @@ class LinuxComputerInterface(BaseComputerInterface):
        self._reconnect_delay = 1  # Start with 1 second delay
        self._max_reconnect_delay = 30  # Maximum delay between reconnection attempts
        self._log_connection_attempts = True  # Flag to control connection attempt logging
+        self._authenticated = False  # Track authentication status

        # Set logger name for Linux interface
        self.logger = Logger("cua.interface.linux", LogLevel.NORMAL)
@@ -89,34 +90,14 @@ class LinuxComputerInterface(BaseComputerInterface):
                        )
                        self.logger.info("WebSocket connection established")
                        
-                        # If api_key and vm_name are provided, perform authentication handshake
-                        if self.api_key and self.vm_name:
-                            self.logger.info("Performing authentication handshake...")
-                            auth_message = {
-                                "command": "authenticate",
-                                "params": {
-                                    "api_key": self.api_key,
-                                    "container_name": self.vm_name
-                                }
-                            }
-                            await self._ws.send(json.dumps(auth_message))
-                            
-                            # Wait for authentication response
-                            auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
-                            auth_result = json.loads(auth_response)
-                            
-                            if not auth_result.get("success"):
-                                error_msg = auth_result.get("error", "Authentication failed")
-                                self.logger.error(f"Authentication failed: {error_msg}")
-                                await self._ws.close()
-                                self._ws = None
-                                raise ConnectionError(f"Authentication failed: {error_msg}")
-                            
-                            self.logger.info("Authentication successful")
+                        # Authentication will be handled by the first command that needs it
+                        # Don't do authentication here to avoid recv conflicts
                        
                        self._reconnect_delay = 1  # Reset reconnect delay on successful connection
                        self._last_ping = time.time()
                        retry_count = 0  # Reset retry count on successful connection
+                        self._authenticated = False  # Reset auth status on new connection
+
                    except (asyncio.TimeoutError, websockets.exceptions.WebSocketException) as e:
                        next_retry = self._reconnect_delay

@@ -140,13 +121,6 @@ class LinuxComputerInterface(BaseComputerInterface):
                                pass
                        self._ws = None

-                        # Use exponential backoff for connection retries
-                        await asyncio.sleep(self._reconnect_delay)
-                        self._reconnect_delay = min(
-                            self._reconnect_delay * 2, self._max_reconnect_delay
-                        )
-                        continue
-
                # Regular ping to check connection
                if self._ws and self._ws.state == websockets.protocol.State.OPEN:
                    try:
@@ -225,6 +199,31 @@ class LinuxComputerInterface(BaseComputerInterface):
                if not self._ws:
                    raise ConnectionError("WebSocket connection is not established")

+                # Handle authentication if needed
+                if self.api_key and self.vm_name and not self._authenticated:
+                    self.logger.info("Performing authentication handshake...")
+                    auth_message = {
+                        "command": "authenticate",
+                        "params": {
+                            "api_key": self.api_key,
+                            "container_name": self.vm_name
+                        }
+                    }
+                    await self._ws.send(json.dumps(auth_message))
+                    
+                    # Wait for authentication response
+                    auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
+                    auth_result = json.loads(auth_response)
+                    
+                    if not auth_result.get("success"):
+                        error_msg = auth_result.get("error", "Authentication failed")
+                        self.logger.error(f"Authentication failed: {error_msg}")
+                        self._authenticated = False
+                        raise ConnectionError(f"Authentication failed: {error_msg}")
+                    
+                    self.logger.info("Authentication successful")
+                    self._authenticated = True
+
                message = {"command": command, "params": params or {}}
                await self._ws.send(json.dumps(message))
                response = await asyncio.wait_for(self._ws.recv(), timeout=30)
@@ -245,9 +244,7 @@ class LinuxComputerInterface(BaseComputerInterface):
                        f"Failed to send command '{command}' after {max_retries} retries"
                    )
                    self.logger.debug(f"Command failure details: {e}")
-                    raise
-
-        raise last_error if last_error else RuntimeError("Failed to send command")
+                raise last_error if last_error else RuntimeError("Failed to send command")

    async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
        """Wait for WebSocket connection to become available."""
--- a/scripts/playground.sh
+++ b/scripts/playground.sh
@@ -2,63 +2,137 @@

 set -e

-echo "🚀 Setting up CUA playground environment..."
+echo "🚀 Setting up C/ua playground environment..."

-# Check for Apple Silicon Mac
-if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then
-  echo "❌ This script requires an Apple Silicon Mac (M1/M2/M3/M4)."
-  exit 1
-fi
-
-# Check for macOS 15 (Sequoia) or newer
-OSVERSION=$(sw_vers -productVersion)
-if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then
-  echo "❌ This script requires macOS 15 (Sequoia) or newer. You have $OSVERSION."
-  exit 1
-fi
-
-# Create a temporary directory for our work
-TMP_DIR=$(mktemp -d)
-cd "$TMP_DIR"
+# Save the original working directory
+ORIGINAL_DIR="$(pwd)"

 # Function to clean up on exit
 cleanup() {
  cd ~
-  rm -rf "$TMP_DIR"
+  rm -rf "$TMP_DIR" 2>/dev/null || true
 }
+
+# Create a temporary directory for our work
+TMP_DIR=$(mktemp -d)
+cd "$TMP_DIR"
 trap cleanup EXIT

-# Install Lume if not already installed
-if ! command -v lume &> /dev/null; then
-  echo "📦 Installing Lume CLI..."
-  curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
+# Ask user to choose between local macOS VMs or C/ua Cloud Containers
+echo ""
+echo "Choose your C/ua setup:"
+echo "1) ☁️  C/ua Cloud Containers (works on any system)"
+echo "2) 🖥️  Local macOS VMs (requires Apple Silicon Mac + macOS 15+)"
+echo ""
+read -p "Enter your choice (1 or 2): " CHOICE
+
+if [[ "$CHOICE" == "1" ]]; then
+  # C/ua Cloud Container setup
+  echo ""
+  echo "☁️ Setting up C/ua Cloud Containers..."
+  echo ""
  
-  # Add lume to PATH for this session if it's not already there
-  if ! command -v lume &> /dev/null; then
-    export PATH="$PATH:$HOME/.local/bin"
+  # Check if existing .env.local already has CUA_API_KEY (check current dir and demo dir)
+  DEMO_DIR="$HOME/.cua-demo"
+  # Look for .env.local in the original working directory (before cd to temp dir)
+  CURRENT_ENV_FILE="$ORIGINAL_DIR/.env.local"
+  DEMO_ENV_FILE="$DEMO_DIR/.env.local"
+  
+  CUA_API_KEY=""
+  
+  # First check current directory
+  if [[ -f "$CURRENT_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$CURRENT_ENV_FILE"; then
+    EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$CURRENT_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
+    if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
+      CUA_API_KEY="$EXISTING_CUA_KEY"
+    fi
  fi
+  
+  # Then check demo directory if not found in current dir
+  if [[ -z "$CUA_API_KEY" ]] && [[ -f "$DEMO_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$DEMO_ENV_FILE"; then
+    EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$DEMO_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
+    if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
+      CUA_API_KEY="$EXISTING_CUA_KEY"
+    fi
+  fi
+  
+  # If no valid API key found, prompt for one
+  if [[ -z "$CUA_API_KEY" ]]; then
+    echo "To use C/ua Cloud Containers, you need to:"
+    echo "1. Sign up at https://trycua.com"
+    echo "2. Create a Cloud Container"
+    echo "3. Generate an Api Key"
+    echo ""
+    read -p "Enter your C/ua Api Key: " CUA_API_KEY
+    
+    if [[ -z "$CUA_API_KEY" ]]; then
+      echo "❌ C/ua Api Key is required for Cloud Containers."
+      exit 1
+    fi
+  fi
+  
+  USE_CLOUD=true
+
+elif [[ "$CHOICE" == "2" ]]; then
+  # Local macOS VM setup
+  echo ""
+  echo "🖥️ Setting up local macOS VMs..."
+  
+  # Check for Apple Silicon Mac
+  if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then
+    echo "❌ Local macOS VMs require an Apple Silicon Mac (M1/M2/M3/M4)."
+    echo "💡 Consider using C/ua Cloud Containers instead (option 1)."
+    exit 1
+  fi
+
+  # Check for macOS 15 (Sequoia) or newer
+  OSVERSION=$(sw_vers -productVersion)
+  if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then
+    echo "❌ Local macOS VMs require macOS 15 (Sequoia) or newer. You have $OSVERSION."
+    echo "💡 Consider using C/ua Cloud Containers instead (option 1)."
+    exit 1
+  fi
+
+  USE_CLOUD=false
+
+else
+  echo "❌ Invalid choice. Please run the script again and choose 1 or 2."
+  exit 1
 fi

-# Pull the macOS CUA image if not already present
-if ! lume ls | grep -q "macos-sequoia-cua"; then
-  # Check available disk space
-  IMAGE_SIZE_GB=30
-  AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}')
-  AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024))
-  
-  echo "📊 The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space."
-  echo "   You currently have ${AVAILABLE_SPACE_GB}GB available on your system."
-  
-  # Prompt for confirmation
-  read -p "   Continue? [y]/n: " CONTINUE
-  CONTINUE=${CONTINUE:-y}
-  
-  if [[ $CONTINUE =~ ^[Yy]$ ]]; then
-    echo "📥 Pulling macOS CUA image (this may take a while)..."
-    lume pull macos-sequoia-cua:latest
-  else
-    echo "❌ Installation cancelled."
-    exit 1
+# Install Lume if not already installed (only for local VMs)
+if [[ "$USE_CLOUD" == "false" ]]; then
+  if ! command -v lume &> /dev/null; then
+    echo "📦 Installing Lume CLI..."
+    curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
+    
+    # Add lume to PATH for this session if it's not already there
+    if ! command -v lume &> /dev/null; then
+      export PATH="$PATH:$HOME/.local/bin"
+    fi
+  fi
+
+  # Pull the macOS CUA image if not already present
+  if ! lume ls | grep -q "macos-sequoia-cua"; then
+    # Check available disk space
+    IMAGE_SIZE_GB=30
+    AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}')
+    AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024))
+    
+    echo "📊 The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space."
+    echo "   You currently have ${AVAILABLE_SPACE_GB}GB available on your system."
+    
+    # Prompt for confirmation
+    read -p "   Continue? [y]/n: " CONTINUE
+    CONTINUE=${CONTINUE:-y}
+    
+    if [[ $CONTINUE =~ ^[Yy]$ ]]; then
+      echo "📥 Pulling macOS CUA image (this may take a while)..."
+      lume pull macos-sequoia-cua:latest
+    else
+      echo "❌ Installation cancelled."
+      exit 1
+    fi
  fi
 fi

@@ -87,7 +161,7 @@ fi
 source "$VENV_DIR/bin/activate"

 # Install required packages
-echo "📦 Updating CUA packages..."
+echo "📦 Updating C/ua packages..."
 pip install -U pip setuptools wheel Cmake
 pip install -U cua-computer "cua-agent[all]"

@@ -98,22 +172,30 @@ pip install git+https://github.com/ddupont808/mlx-vlm.git@stable/fix/qwen2-posit
 DEMO_DIR="$HOME/.cua-demo"
 mkdir -p "$DEMO_DIR"

-cat > "$DEMO_DIR/run_demo.py" << 'EOF'
-import asyncio
-import os
-from computer import Computer
-from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
-from agent.ui.gradio.app import create_gradio_ui
-
-# Try to load API keys from environment
-api_key = os.environ.get("OPENAI_API_KEY", "")
-if not api_key:
-    print("\n⚠️  No OpenAI API key found. You'll need to provide one in the UI.")
-
-# Launch the Gradio UI and open it in the browser
-app = create_gradio_ui()
-app.launch(share=False, inbrowser=True)
+# Create .env.local file with API keys (only if it doesn't exist)
+if [[ ! -f "$DEMO_DIR/.env.local" ]]; then
+  cat > "$DEMO_DIR/.env.local" << EOF
+# Uncomment and add your API keys here
+# OPENAI_API_KEY=your_openai_api_key_here
+# ANTHROPIC_API_KEY=your_anthropic_api_key_here
+CUA_API_KEY=your_cua_api_key_here
 EOF
+  echo "📝 Created .env.local file with API key placeholders"
+else
+  echo "📝 Found existing .env.local file - keeping your current settings"
+fi
+
+if [[ "$USE_CLOUD" == "true" ]]; then
+  # Add CUA API key to .env.local if not already present
+  if ! grep -q "CUA_API_KEY" "$DEMO_DIR/.env.local"; then
+    echo "CUA_API_KEY=$CUA_API_KEY" >> "$DEMO_DIR/.env.local"
+    echo "🔑 Added CUA_API_KEY to .env.local"
+  elif grep -q "CUA_API_KEY=your_cua_api_key_here" "$DEMO_DIR/.env.local"; then
+    # Update placeholder with actual key
+    sed -i.bak "s/CUA_API_KEY=your_cua_api_key_here/CUA_API_KEY=$CUA_API_KEY/" "$DEMO_DIR/.env.local"
+    echo "🔑 Updated CUA_API_KEY in .env.local"
+  fi
+fi

 # Create a convenience script to run the demo
 cat > "$DEMO_DIR/start_demo.sh" << EOF
@@ -125,20 +207,91 @@ EOF
 chmod +x "$DEMO_DIR/start_demo.sh"

 echo "✅ Setup complete!"
-echo "🖥️  You can start the CUA playground by running: $DEMO_DIR/start_demo.sh"

-# Check if the VM is running
-echo "🔍 Checking if the macOS CUA VM is running..."
-VM_RUNNING=$(lume ls | grep "macos-sequoia-cua" | grep "running" || echo "")
+if [[ "$USE_CLOUD" == "true" ]]; then
+  # Create run_demo.py for cloud containers
+  cat > "$DEMO_DIR/run_demo.py" << 'EOF'
+import asyncio
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+from computer import Computer
+from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
+from agent.ui.gradio.app import create_gradio_ui

-if [ -z "$VM_RUNNING" ]; then
-  echo "🚀 Starting the macOS CUA VM in the background..."
-  lume run macos-sequoia-cua:latest &
-  # Wait a moment for the VM to initialize
-  sleep 5
-  echo "✅ VM started successfully."
+# Load environment variables from .env.local
+load_dotenv(Path(__file__).parent / ".env.local")
+
+# Check for required API keys
+cua_api_key = os.environ.get("CUA_API_KEY", "")
+if not cua_api_key:
+    print("\n❌ CUA_API_KEY not found in .env.local file.")
+    print("Please add your CUA API key to the .env.local file.")
+    exit(1)
+
+openai_key = os.environ.get("OPENAI_API_KEY", "")
+anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
+
+if not openai_key and not anthropic_key:
+    print("\n⚠️  No OpenAI or Anthropic API keys found in .env.local.")
+    print("Please add at least one API key to use AI agents.")
+
+print("🚀 Starting CUA playground with Cloud Containers...")
+print("📝 Edit .env.local to update your API keys")
+
+# Launch the Gradio UI and open it in the browser
+app = create_gradio_ui()
+app.launch(share=False, inbrowser=True)
+EOF
 else
-  echo "✅ macOS CUA VM is already running."
+  # Create run_demo.py for local macOS VMs
+  cat > "$DEMO_DIR/run_demo.py" << 'EOF'
+import asyncio
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+from computer import Computer
+from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
+from agent.ui.gradio.app import create_gradio_ui
+
+# Load environment variables from .env.local
+load_dotenv(Path(__file__).parent / ".env.local")
+
+# Try to load API keys from environment
+openai_key = os.environ.get("OPENAI_API_KEY", "")
+anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
+
+if not openai_key and not anthropic_key:
+    print("\n⚠️  No OpenAI or Anthropic API keys found in .env.local.")
+    print("Please add at least one API key to use AI agents.")
+
+print("🚀 Starting CUA playground with local macOS VMs...")
+print("📝 Edit .env.local to update your API keys")
+
+# Launch the Gradio UI and open it in the browser
+app = create_gradio_ui()
+app.launch(share=False, inbrowser=True)
+EOF
+fi
+
+echo "☁️  CUA Cloud Container setup complete!"
+echo "📝 Edit $DEMO_DIR/.env.local to update your API keys"
+echo "🖥️  Start the playground by running: $DEMO_DIR/start_demo.sh"
+
+# Check if the VM is running (only for local setup)
+if [[ "$USE_CLOUD" == "false" ]]; then
+  echo "🔍 Checking if the macOS CUA VM is running..."
+  VM_RUNNING=$(lume ls | grep "macos-sequoia-cua" | grep "running" || echo "")
+
+  if [ -z "$VM_RUNNING" ]; then
+    echo "🚀 Starting the macOS CUA VM in the background..."
+    lume run macos-sequoia-cua:latest &
+    # Wait a moment for the VM to initialize
+    sleep 5
+    echo "✅ VM started successfully."
+  else
+    echo "✅ macOS CUA VM is already running."
+  fi
 fi

 # Ask if the user wants to start the demo now