Merge branch 'main' into feat/multiplatform

This commit is contained in:
Dillon DuPont
2025-05-28 11:53:35 -04:00
6 changed files with 377 additions and 193 deletions

View File

@@ -13,7 +13,7 @@
<a href="https://trendshift.io/repositories/13685" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13685" alt="trycua%2Fcua | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</div>
**c/ua** (pronounced "koo-ah") enables AI agents to control full operating systems in high-performance virtual containers with near-native speed on Apple Silicon.
**c/ua** ("koo-ah") is Docker for [Computer-Use Agents](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse) - it enables AI agents to control full operating systems in virtual containers and deploy them locally or to the cloud.
<div align="center">
<video src="https://github.com/user-attachments/assets/c619b4ea-bb8e-4382-860e-f3757e36af20" width="800" controls></video>
@@ -21,15 +21,14 @@
# 🚀 Quick Start
Get started with a Computer-Use Agent UI and a VM with a single command:
Get started with a Computer-Use Agent UI with a single command:
```bash
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/scripts/playground.sh)"
```
This script will:
- Prompt you to choose between [C/ua Cloud Containers](https://trycua.com) or local macOS VMs
- Install Lume CLI for VM management (if needed)
- Pull the latest macOS CUA image (if needed)
- Set up Python environment and install/update required packages
@@ -81,21 +80,29 @@ from computer import Computer
from agent import ComputerAgent, LLM
async def main():
# Start a local macOS VM with a 1024x768 display
async with Computer(os_type="macos", display="1024x768") as computer:
# Start a local macOS VM
computer = Computer(os_type="macos")
await computer.run()
# Example: Direct control of a macOS VM with Computer
await computer.interface.left_click(100, 200)
await computer.interface.type_text("Hello, world!")
screenshot_bytes = await computer.interface.screenshot()
# Example: Create and run an agent locally using mlx-community/UI-TARS-1.5-7B-6bit
agent = ComputerAgent(
computer=computer,
loop="UITARS",
model=LLM(provider="MLXVLM", name="mlx-community/UI-TARS-1.5-7B-6bit")
)
await agent.run("Find the trycua/cua repository on GitHub and follow the quick start guide")
# Or with C/ua Cloud Container
computer = Computer(
os_type="linux",
api_key="your_cua_api_key_here",
name="your_container_name_here"
)
# Example: Direct control of a macOS VM with Computer
await computer.interface.left_click(100, 200)
await computer.interface.type_text("Hello, world!")
screenshot_bytes = await computer.interface.screenshot()
# Example: Create and run an agent locally using mlx-community/UI-TARS-1.5-7B-6bit
agent = ComputerAgent(
computer=computer,
loop="UITARS",
model=LLM(provider="MLXVLM", name="mlx-community/UI-TARS-1.5-7B-6bit")
)
await agent.run("Find the trycua/cua repository on GitHub and follow the quick start guide")
main()
```

View File

@@ -5,7 +5,7 @@ import logging
import traceback
import signal
from computer import Computer
from computer import Computer, VMProviderType
# Import the unified agent class and types
from agent import ComputerAgent, LLMProvider, LLM, AgentLoop
@@ -23,76 +23,88 @@ async def run_agent_example():
print("\n=== Example: ComputerAgent with OpenAI and Omni provider ===")
try:
# Create a local macOS computer
computer = Computer(
os_type="macos",
verbosity=logging.DEBUG,
)
# Create a remote Linux computer with C/ua
# computer = Computer(
# os_type="linux",
# api_key=os.getenv("CUA_API_KEY"),
# name=os.getenv("CUA_CONTAINER_NAME"),
# provider_type=VMProviderType.CLOUD,
# )
# Create Computer instance with async context manager
async with Computer(verbosity=logging.DEBUG) as macos_computer:
# Create agent with loop and provider
agent = ComputerAgent(
computer=macos_computer,
# loop=AgentLoop.OPENAI,
# loop=AgentLoop.ANTHROPIC,
# loop=AgentLoop.UITARS,
loop=AgentLoop.OMNI,
# model=LLM(provider=LLMProvider.OPENAI), # No model name for Operator CUA
# model=LLM(provider=LLMProvider.OPENAI, name="gpt-4o"),
# model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"),
# model=LLM(provider=LLMProvider.OLLAMA, name="gemma3:4b-it-q4_K_M"),
# model=LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit"),
model=LLM(
provider=LLMProvider.OAICOMPAT,
name="gemma-3-12b-it",
provider_base_url="http://localhost:1234/v1", # LM Studio local endpoint
),
save_trajectory=True,
only_n_most_recent_images=3,
verbosity=logging.DEBUG,
)
agent = ComputerAgent(
computer=computer,
loop=AgentLoop.OPENAI,
# loop=AgentLoop.ANTHROPIC,
# loop=AgentLoop.UITARS,
# loop=AgentLoop.OMNI,
model=LLM(provider=LLMProvider.OPENAI), # No model name for Operator CUA
# model=LLM(provider=LLMProvider.OPENAI, name="gpt-4o"),
# model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"),
# model=LLM(provider=LLMProvider.OLLAMA, name="gemma3:4b-it-q4_K_M"),
# model=LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit"),
# model=LLM(
# provider=LLMProvider.OAICOMPAT,
# name="gemma-3-12b-it",
# provider_base_url="http://localhost:1234/v1", # LM Studio local endpoint
# ),
save_trajectory=True,
only_n_most_recent_images=3,
verbosity=logging.DEBUG,
)
tasks = [
"Look for a repository named trycua/cua on GitHub.",
"Check the open issues, open the most recent one and read it.",
"Clone the repository in users/lume/projects if it doesn't exist yet.",
"Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
"From Cursor, open Composer if not already open.",
"Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
]
tasks = [
"Look for a repository named trycua/cua on GitHub.",
"Check the open issues, open the most recent one and read it.",
"Clone the repository in users/lume/projects if it doesn't exist yet.",
"Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
"From Cursor, open Composer if not already open.",
"Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
]
for i, task in enumerate(tasks):
print(f"\nExecuting task {i}/{len(tasks)}: {task}")
async for result in agent.run(task):
print("Response ID: ", result.get("id"))
for i, task in enumerate(tasks):
print(f"\nExecuting task {i}/{len(tasks)}: {task}")
async for result in agent.run(task):
print("Response ID: ", result.get("id"))
# Print detailed usage information
usage = result.get("usage")
if usage:
print("\nUsage Details:")
print(f" Input Tokens: {usage.get('input_tokens')}")
if "input_tokens_details" in usage:
print(f" Input Tokens Details: {usage.get('input_tokens_details')}")
print(f" Output Tokens: {usage.get('output_tokens')}")
if "output_tokens_details" in usage:
print(f" Output Tokens Details: {usage.get('output_tokens_details')}")
print(f" Total Tokens: {usage.get('total_tokens')}")
# Print detailed usage information
usage = result.get("usage")
if usage:
print("\nUsage Details:")
print(f" Input Tokens: {usage.get('input_tokens')}")
if "input_tokens_details" in usage:
print(f" Input Tokens Details: {usage.get('input_tokens_details')}")
print(f" Output Tokens: {usage.get('output_tokens')}")
if "output_tokens_details" in usage:
print(f" Output Tokens Details: {usage.get('output_tokens_details')}")
print(f" Total Tokens: {usage.get('total_tokens')}")
print("Response Text: ", result.get("text"))
print("Response Text: ", result.get("text"))
# Print tools information
tools = result.get("tools")
if tools:
print("\nTools:")
print(tools)
# Print tools information
tools = result.get("tools")
if tools:
print("\nTools:")
print(tools)
# Print reasoning and tool call outputs
outputs = result.get("output", [])
for output in outputs:
output_type = output.get("type")
if output_type == "reasoning":
print("\nReasoning Output:")
print(output)
elif output_type == "computer_call":
print("\nTool Call Output:")
print(output)
# Print reasoning and tool call outputs
outputs = result.get("output", [])
for output in outputs:
output_type = output.get("type")
if output_type == "reasoning":
print("\nReasoning Output:")
print(output)
elif output_type == "computer_call":
print("\nTool Call Output:")
print(output)
print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")
print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")
except Exception as e:
logger.error(f"Error in run_agent_example: {e}")

View File

@@ -16,17 +16,18 @@ load_dotenv(env_file)
pythonpath = os.environ.get("PYTHONPATH", "")
for path in pythonpath.split(":"):
if path and path not in sys.path:
sys.path.append(path)
sys.path.insert(0, path) # Insert at beginning to prioritize
print(f"Added to sys.path: {path}")
from computer import Computer, VMProviderType
from computer.computer import Computer
from computer.providers.base import VMProviderType
from computer.logger import LogLevel
async def main():
try:
print("\n=== Using direct initialization ===")
# Create computer with configured host
# Create a local macOS computer
computer = Computer(
display="1024x768",
memory="8GB",
@@ -41,7 +42,8 @@ async def main():
],
ephemeral=False,
)
# Create a remote Linux computer with C/ua
# computer = Computer(
# os_type="linux",
# api_key=os.getenv("CUA_API_KEY"),
@@ -54,8 +56,15 @@ async def main():
await computer.run()
screenshot = await computer.interface.screenshot()
with open(Path("~/cua/examples/screenshot.png").expanduser(), "wb") as f:
# Create output directory if it doesn't exist
output_dir = Path("./output")
output_dir.mkdir(exist_ok=True)
screenshot_path = output_dir / "screenshot.png"
with open(screenshot_path, "wb") as f:
f.write(screenshot)
print(f"Screenshot saved to: {screenshot_path.absolute()}")
# await computer.interface.hotkey("command", "space")

View File

@@ -446,7 +446,8 @@ def create_gradio_ui(
# Check for API keys
openai_api_key = os.environ.get("OPENAI_API_KEY", "")
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
cua_api_key = os.environ.get("CUA_API_KEY", "")
# Always show models regardless of API key availability
openai_models = ["OpenAI: Computer-Use Preview"]
anthropic_models = [
@@ -464,9 +465,11 @@ def create_gradio_ui(
# Check if API keys are available
has_openai_key = bool(openai_api_key)
has_anthropic_key = bool(anthropic_api_key)
has_cua_key = bool(cua_api_key)
print("has_openai_key", has_openai_key)
print("has_anthropic_key", has_anthropic_key)
print("has_cua_key", has_cua_key)
# Get Ollama models for OMNI
ollama_models = get_ollama_models()
@@ -752,6 +755,7 @@ if __name__ == "__main__":
value="",
type="password",
info="Required for cloud provider",
visible=(not has_cua_key)
)
with gr.Accordion("Agent Configuration", open=True):
@@ -1176,6 +1180,8 @@ if __name__ == "__main__":
else:
# For Ollama or default OAICOMPAT (without custom key), no key needed/expected
api_key = ""
cua_cloud_api_key = cua_cloud_api_key or os.environ.get("CUA_API_KEY", "")
# --- Save Settings Before Running Agent ---
current_settings = {

View File

@@ -26,6 +26,7 @@ class LinuxComputerInterface(BaseComputerInterface):
self._reconnect_delay = 1 # Start with 1 second delay
self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts
self._log_connection_attempts = True # Flag to control connection attempt logging
self._authenticated = False # Track authentication status
# Set logger name for Linux interface
self.logger = Logger("cua.interface.linux", LogLevel.NORMAL)
@@ -89,34 +90,14 @@ class LinuxComputerInterface(BaseComputerInterface):
)
self.logger.info("WebSocket connection established")
# If api_key and vm_name are provided, perform authentication handshake
if self.api_key and self.vm_name:
self.logger.info("Performing authentication handshake...")
auth_message = {
"command": "authenticate",
"params": {
"api_key": self.api_key,
"container_name": self.vm_name
}
}
await self._ws.send(json.dumps(auth_message))
# Wait for authentication response
auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
auth_result = json.loads(auth_response)
if not auth_result.get("success"):
error_msg = auth_result.get("error", "Authentication failed")
self.logger.error(f"Authentication failed: {error_msg}")
await self._ws.close()
self._ws = None
raise ConnectionError(f"Authentication failed: {error_msg}")
self.logger.info("Authentication successful")
# Authentication will be handled by the first command that needs it
# Don't do authentication here to avoid recv conflicts
self._reconnect_delay = 1 # Reset reconnect delay on successful connection
self._last_ping = time.time()
retry_count = 0 # Reset retry count on successful connection
self._authenticated = False # Reset auth status on new connection
except (asyncio.TimeoutError, websockets.exceptions.WebSocketException) as e:
next_retry = self._reconnect_delay
@@ -140,13 +121,6 @@ class LinuxComputerInterface(BaseComputerInterface):
pass
self._ws = None
# Use exponential backoff for connection retries
await asyncio.sleep(self._reconnect_delay)
self._reconnect_delay = min(
self._reconnect_delay * 2, self._max_reconnect_delay
)
continue
# Regular ping to check connection
if self._ws and self._ws.state == websockets.protocol.State.OPEN:
try:
@@ -225,6 +199,31 @@ class LinuxComputerInterface(BaseComputerInterface):
if not self._ws:
raise ConnectionError("WebSocket connection is not established")
# Handle authentication if needed
if self.api_key and self.vm_name and not self._authenticated:
self.logger.info("Performing authentication handshake...")
auth_message = {
"command": "authenticate",
"params": {
"api_key": self.api_key,
"container_name": self.vm_name
}
}
await self._ws.send(json.dumps(auth_message))
# Wait for authentication response
auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
auth_result = json.loads(auth_response)
if not auth_result.get("success"):
error_msg = auth_result.get("error", "Authentication failed")
self.logger.error(f"Authentication failed: {error_msg}")
self._authenticated = False
raise ConnectionError(f"Authentication failed: {error_msg}")
self.logger.info("Authentication successful")
self._authenticated = True
message = {"command": command, "params": params or {}}
await self._ws.send(json.dumps(message))
response = await asyncio.wait_for(self._ws.recv(), timeout=30)
@@ -245,9 +244,7 @@ class LinuxComputerInterface(BaseComputerInterface):
f"Failed to send command '{command}' after {max_retries} retries"
)
self.logger.debug(f"Command failure details: {e}")
raise
raise last_error if last_error else RuntimeError("Failed to send command")
raise last_error if last_error else RuntimeError("Failed to send command")
async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
"""Wait for WebSocket connection to become available."""

View File

@@ -2,63 +2,137 @@
set -e
echo "🚀 Setting up CUA playground environment..."
echo "🚀 Setting up C/ua playground environment..."
# Check for Apple Silicon Mac
if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then
echo "❌ This script requires an Apple Silicon Mac (M1/M2/M3/M4)."
exit 1
fi
# Check for macOS 15 (Sequoia) or newer
OSVERSION=$(sw_vers -productVersion)
if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then
echo "❌ This script requires macOS 15 (Sequoia) or newer. You have $OSVERSION."
exit 1
fi
# Create a temporary directory for our work
TMP_DIR=$(mktemp -d)
cd "$TMP_DIR"
# Save the original working directory
ORIGINAL_DIR="$(pwd)"
# Function to clean up on exit
cleanup() {
cd ~
rm -rf "$TMP_DIR"
rm -rf "$TMP_DIR" 2>/dev/null || true
}
# Create a temporary directory for our work
TMP_DIR=$(mktemp -d)
cd "$TMP_DIR"
trap cleanup EXIT
# Install Lume if not already installed
if ! command -v lume &> /dev/null; then
echo "📦 Installing Lume CLI..."
curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
# Ask user to choose between local macOS VMs or C/ua Cloud Containers
echo ""
echo "Choose your C/ua setup:"
echo "1) ☁️ C/ua Cloud Containers (works on any system)"
echo "2) 🖥️ Local macOS VMs (requires Apple Silicon Mac + macOS 15+)"
echo ""
read -p "Enter your choice (1 or 2): " CHOICE
if [[ "$CHOICE" == "1" ]]; then
# C/ua Cloud Container setup
echo ""
echo "☁️ Setting up C/ua Cloud Containers..."
echo ""
# Add lume to PATH for this session if it's not already there
if ! command -v lume &> /dev/null; then
export PATH="$PATH:$HOME/.local/bin"
# Check if existing .env.local already has CUA_API_KEY (check current dir and demo dir)
DEMO_DIR="$HOME/.cua-demo"
# Look for .env.local in the original working directory (before cd to temp dir)
CURRENT_ENV_FILE="$ORIGINAL_DIR/.env.local"
DEMO_ENV_FILE="$DEMO_DIR/.env.local"
CUA_API_KEY=""
# First check current directory
if [[ -f "$CURRENT_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$CURRENT_ENV_FILE"; then
EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$CURRENT_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
CUA_API_KEY="$EXISTING_CUA_KEY"
fi
fi
# Then check demo directory if not found in current dir
if [[ -z "$CUA_API_KEY" ]] && [[ -f "$DEMO_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$DEMO_ENV_FILE"; then
EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$DEMO_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
CUA_API_KEY="$EXISTING_CUA_KEY"
fi
fi
# If no valid API key found, prompt for one
if [[ -z "$CUA_API_KEY" ]]; then
echo "To use C/ua Cloud Containers, you need to:"
echo "1. Sign up at https://trycua.com"
echo "2. Create a Cloud Container"
echo "3. Generate an Api Key"
echo ""
read -p "Enter your C/ua Api Key: " CUA_API_KEY
if [[ -z "$CUA_API_KEY" ]]; then
echo "❌ C/ua Api Key is required for Cloud Containers."
exit 1
fi
fi
USE_CLOUD=true
elif [[ "$CHOICE" == "2" ]]; then
# Local macOS VM setup
echo ""
echo "🖥️ Setting up local macOS VMs..."
# Check for Apple Silicon Mac
if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then
echo "❌ Local macOS VMs require an Apple Silicon Mac (M1/M2/M3/M4)."
echo "💡 Consider using C/ua Cloud Containers instead (option 1)."
exit 1
fi
# Check for macOS 15 (Sequoia) or newer
OSVERSION=$(sw_vers -productVersion)
if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then
echo "❌ Local macOS VMs require macOS 15 (Sequoia) or newer. You have $OSVERSION."
echo "💡 Consider using C/ua Cloud Containers instead (option 1)."
exit 1
fi
USE_CLOUD=false
else
echo "❌ Invalid choice. Please run the script again and choose 1 or 2."
exit 1
fi
# Pull the macOS CUA image if not already present
if ! lume ls | grep -q "macos-sequoia-cua"; then
# Check available disk space
IMAGE_SIZE_GB=30
AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}')
AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024))
echo "📊 The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space."
echo " You currently have ${AVAILABLE_SPACE_GB}GB available on your system."
# Prompt for confirmation
read -p " Continue? [y]/n: " CONTINUE
CONTINUE=${CONTINUE:-y}
if [[ $CONTINUE =~ ^[Yy]$ ]]; then
echo "📥 Pulling macOS CUA image (this may take a while)..."
lume pull macos-sequoia-cua:latest
else
echo "❌ Installation cancelled."
exit 1
# Install Lume if not already installed (only for local VMs)
if [[ "$USE_CLOUD" == "false" ]]; then
if ! command -v lume &> /dev/null; then
echo "📦 Installing Lume CLI..."
curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
# Add lume to PATH for this session if it's not already there
if ! command -v lume &> /dev/null; then
export PATH="$PATH:$HOME/.local/bin"
fi
fi
# Pull the macOS CUA image if not already present
if ! lume ls | grep -q "macos-sequoia-cua"; then
# Check available disk space
IMAGE_SIZE_GB=30
AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}')
AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024))
echo "📊 The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space."
echo " You currently have ${AVAILABLE_SPACE_GB}GB available on your system."
# Prompt for confirmation
read -p " Continue? [y]/n: " CONTINUE
CONTINUE=${CONTINUE:-y}
if [[ $CONTINUE =~ ^[Yy]$ ]]; then
echo "📥 Pulling macOS CUA image (this may take a while)..."
lume pull macos-sequoia-cua:latest
else
echo "❌ Installation cancelled."
exit 1
fi
fi
fi
@@ -87,7 +161,7 @@ fi
source "$VENV_DIR/bin/activate"
# Install required packages
echo "📦 Updating CUA packages..."
echo "📦 Updating C/ua packages..."
pip install -U pip setuptools wheel Cmake
pip install -U cua-computer "cua-agent[all]"
@@ -98,22 +172,30 @@ pip install git+https://github.com/ddupont808/mlx-vlm.git@stable/fix/qwen2-posit
DEMO_DIR="$HOME/.cua-demo"
mkdir -p "$DEMO_DIR"
cat > "$DEMO_DIR/run_demo.py" << 'EOF'
import asyncio
import os
from computer import Computer
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
from agent.ui.gradio.app import create_gradio_ui
# Try to load API keys from environment
api_key = os.environ.get("OPENAI_API_KEY", "")
if not api_key:
print("\n⚠ No OpenAI API key found. You'll need to provide one in the UI.")
# Launch the Gradio UI and open it in the browser
app = create_gradio_ui()
app.launch(share=False, inbrowser=True)
# Create .env.local file with API keys (only if it doesn't exist)
if [[ ! -f "$DEMO_DIR/.env.local" ]]; then
cat > "$DEMO_DIR/.env.local" << EOF
# Uncomment and add your API keys here
# OPENAI_API_KEY=your_openai_api_key_here
# ANTHROPIC_API_KEY=your_anthropic_api_key_here
CUA_API_KEY=your_cua_api_key_here
EOF
echo "📝 Created .env.local file with API key placeholders"
else
echo "📝 Found existing .env.local file - keeping your current settings"
fi
if [[ "$USE_CLOUD" == "true" ]]; then
# Add CUA API key to .env.local if not already present
if ! grep -q "CUA_API_KEY" "$DEMO_DIR/.env.local"; then
echo "CUA_API_KEY=$CUA_API_KEY" >> "$DEMO_DIR/.env.local"
echo "🔑 Added CUA_API_KEY to .env.local"
elif grep -q "CUA_API_KEY=your_cua_api_key_here" "$DEMO_DIR/.env.local"; then
# Update placeholder with actual key
sed -i.bak "s/CUA_API_KEY=your_cua_api_key_here/CUA_API_KEY=$CUA_API_KEY/" "$DEMO_DIR/.env.local"
echo "🔑 Updated CUA_API_KEY in .env.local"
fi
fi
# Create a convenience script to run the demo
cat > "$DEMO_DIR/start_demo.sh" << EOF
@@ -125,20 +207,91 @@ EOF
chmod +x "$DEMO_DIR/start_demo.sh"
echo "✅ Setup complete!"
echo "🖥️ You can start the CUA playground by running: $DEMO_DIR/start_demo.sh"
# Check if the VM is running
echo "🔍 Checking if the macOS CUA VM is running..."
VM_RUNNING=$(lume ls | grep "macos-sequoia-cua" | grep "running" || echo "")
if [[ "$USE_CLOUD" == "true" ]]; then
# Create run_demo.py for cloud containers
cat > "$DEMO_DIR/run_demo.py" << 'EOF'
import asyncio
import os
from pathlib import Path
from dotenv import load_dotenv
from computer import Computer
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
from agent.ui.gradio.app import create_gradio_ui
if [ -z "$VM_RUNNING" ]; then
echo "🚀 Starting the macOS CUA VM in the background..."
lume run macos-sequoia-cua:latest &
# Wait a moment for the VM to initialize
sleep 5
echo "✅ VM started successfully."
# Load environment variables from .env.local
load_dotenv(Path(__file__).parent / ".env.local")
# Check for required API keys
cua_api_key = os.environ.get("CUA_API_KEY", "")
if not cua_api_key:
print("\n❌ CUA_API_KEY not found in .env.local file.")
print("Please add your CUA API key to the .env.local file.")
exit(1)
openai_key = os.environ.get("OPENAI_API_KEY", "")
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
if not openai_key and not anthropic_key:
print("\n⚠ No OpenAI or Anthropic API keys found in .env.local.")
print("Please add at least one API key to use AI agents.")
print("🚀 Starting CUA playground with Cloud Containers...")
print("📝 Edit .env.local to update your API keys")
# Launch the Gradio UI and open it in the browser
app = create_gradio_ui()
app.launch(share=False, inbrowser=True)
EOF
else
echo "✅ macOS CUA VM is already running."
# Create run_demo.py for local macOS VMs
cat > "$DEMO_DIR/run_demo.py" << 'EOF'
import asyncio
import os
from pathlib import Path
from dotenv import load_dotenv
from computer import Computer
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
from agent.ui.gradio.app import create_gradio_ui
# Load environment variables from .env.local
load_dotenv(Path(__file__).parent / ".env.local")
# Try to load API keys from environment
openai_key = os.environ.get("OPENAI_API_KEY", "")
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
if not openai_key and not anthropic_key:
print("\n⚠ No OpenAI or Anthropic API keys found in .env.local.")
print("Please add at least one API key to use AI agents.")
print("🚀 Starting CUA playground with local macOS VMs...")
print("📝 Edit .env.local to update your API keys")
# Launch the Gradio UI and open it in the browser
app = create_gradio_ui()
app.launch(share=False, inbrowser=True)
EOF
fi
echo "☁️ CUA Cloud Container setup complete!"
echo "📝 Edit $DEMO_DIR/.env.local to update your API keys"
echo "🖥️ Start the playground by running: $DEMO_DIR/start_demo.sh"
# Check if the VM is running (only for local setup)
if [[ "$USE_CLOUD" == "false" ]]; then
echo "🔍 Checking if the macOS CUA VM is running..."
VM_RUNNING=$(lume ls | grep "macos-sequoia-cua" | grep "running" || echo "")
if [ -z "$VM_RUNNING" ]; then
echo "🚀 Starting the macOS CUA VM in the background..."
lume run macos-sequoia-cua:latest &
# Wait a moment for the VM to initialize
sleep 5
echo "✅ VM started successfully."
else
echo "✅ macOS CUA VM is already running."
fi
fi
# Ask if the user wants to start the demo now