mirror of
https://github.com/trycua/computer.git
synced 2026-01-01 19:10:30 -06:00
Merge branch 'main' into feat/multiplatform
This commit is contained in:
43
README.md
43
README.md
@@ -13,7 +13,7 @@
|
||||
<a href="https://trendshift.io/repositories/13685" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13685" alt="trycua%2Fcua | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||
</div>
|
||||
|
||||
**c/ua** (pronounced "koo-ah") enables AI agents to control full operating systems in high-performance virtual containers with near-native speed on Apple Silicon.
|
||||
**c/ua** ("koo-ah") is Docker for [Computer-Use Agents](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse) - it enables AI agents to control full operating systems in virtual containers and deploy them locally or to the cloud.
|
||||
|
||||
<div align="center">
|
||||
<video src="https://github.com/user-attachments/assets/c619b4ea-bb8e-4382-860e-f3757e36af20" width="800" controls></video>
|
||||
@@ -21,15 +21,14 @@
|
||||
|
||||
# 🚀 Quick Start
|
||||
|
||||
Get started with a Computer-Use Agent UI and a VM with a single command:
|
||||
|
||||
Get started with a Computer-Use Agent UI with a single command:
|
||||
|
||||
```bash
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/scripts/playground.sh)"
|
||||
```
|
||||
|
||||
|
||||
This script will:
|
||||
- Prompt you to choose between [C/ua Cloud Containers](https://trycua.com) or local macOS VMs
|
||||
- Install Lume CLI for VM management (if needed)
|
||||
- Pull the latest macOS CUA image (if needed)
|
||||
- Set up Python environment and install/update required packages
|
||||
@@ -81,21 +80,29 @@ from computer import Computer
|
||||
from agent import ComputerAgent, LLM
|
||||
|
||||
async def main():
|
||||
# Start a local macOS VM with a 1024x768 display
|
||||
async with Computer(os_type="macos", display="1024x768") as computer:
|
||||
# Start a local macOS VM
|
||||
computer = Computer(os_type="macos")
|
||||
await computer.run()
|
||||
|
||||
# Example: Direct control of a macOS VM with Computer
|
||||
await computer.interface.left_click(100, 200)
|
||||
await computer.interface.type_text("Hello, world!")
|
||||
screenshot_bytes = await computer.interface.screenshot()
|
||||
|
||||
# Example: Create and run an agent locally using mlx-community/UI-TARS-1.5-7B-6bit
|
||||
agent = ComputerAgent(
|
||||
computer=computer,
|
||||
loop="UITARS",
|
||||
model=LLM(provider="MLXVLM", name="mlx-community/UI-TARS-1.5-7B-6bit")
|
||||
)
|
||||
await agent.run("Find the trycua/cua repository on GitHub and follow the quick start guide")
|
||||
# Or with C/ua Cloud Container
|
||||
computer = Computer(
|
||||
os_type="linux",
|
||||
api_key="your_cua_api_key_here",
|
||||
name="your_container_name_here"
|
||||
)
|
||||
|
||||
# Example: Direct control of a macOS VM with Computer
|
||||
await computer.interface.left_click(100, 200)
|
||||
await computer.interface.type_text("Hello, world!")
|
||||
screenshot_bytes = await computer.interface.screenshot()
|
||||
|
||||
# Example: Create and run an agent locally using mlx-community/UI-TARS-1.5-7B-6bit
|
||||
agent = ComputerAgent(
|
||||
computer=computer,
|
||||
loop="UITARS",
|
||||
model=LLM(provider="MLXVLM", name="mlx-community/UI-TARS-1.5-7B-6bit")
|
||||
)
|
||||
await agent.run("Find the trycua/cua repository on GitHub and follow the quick start guide")
|
||||
|
||||
main()
|
||||
```
|
||||
|
||||
@@ -5,7 +5,7 @@ import logging
|
||||
import traceback
|
||||
import signal
|
||||
|
||||
from computer import Computer
|
||||
from computer import Computer, VMProviderType
|
||||
|
||||
# Import the unified agent class and types
|
||||
from agent import ComputerAgent, LLMProvider, LLM, AgentLoop
|
||||
@@ -23,76 +23,88 @@ async def run_agent_example():
|
||||
print("\n=== Example: ComputerAgent with OpenAI and Omni provider ===")
|
||||
|
||||
try:
|
||||
# Create a local macOS computer
|
||||
computer = Computer(
|
||||
os_type="macos",
|
||||
verbosity=logging.DEBUG,
|
||||
)
|
||||
|
||||
# Create a remote Linux computer with C/ua
|
||||
# computer = Computer(
|
||||
# os_type="linux",
|
||||
# api_key=os.getenv("CUA_API_KEY"),
|
||||
# name=os.getenv("CUA_CONTAINER_NAME"),
|
||||
# provider_type=VMProviderType.CLOUD,
|
||||
# )
|
||||
|
||||
# Create Computer instance with async context manager
|
||||
async with Computer(verbosity=logging.DEBUG) as macos_computer:
|
||||
# Create agent with loop and provider
|
||||
agent = ComputerAgent(
|
||||
computer=macos_computer,
|
||||
# loop=AgentLoop.OPENAI,
|
||||
# loop=AgentLoop.ANTHROPIC,
|
||||
# loop=AgentLoop.UITARS,
|
||||
loop=AgentLoop.OMNI,
|
||||
# model=LLM(provider=LLMProvider.OPENAI), # No model name for Operator CUA
|
||||
# model=LLM(provider=LLMProvider.OPENAI, name="gpt-4o"),
|
||||
# model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"),
|
||||
# model=LLM(provider=LLMProvider.OLLAMA, name="gemma3:4b-it-q4_K_M"),
|
||||
# model=LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit"),
|
||||
model=LLM(
|
||||
provider=LLMProvider.OAICOMPAT,
|
||||
name="gemma-3-12b-it",
|
||||
provider_base_url="http://localhost:1234/v1", # LM Studio local endpoint
|
||||
),
|
||||
save_trajectory=True,
|
||||
only_n_most_recent_images=3,
|
||||
verbosity=logging.DEBUG,
|
||||
)
|
||||
agent = ComputerAgent(
|
||||
computer=computer,
|
||||
loop=AgentLoop.OPENAI,
|
||||
# loop=AgentLoop.ANTHROPIC,
|
||||
# loop=AgentLoop.UITARS,
|
||||
# loop=AgentLoop.OMNI,
|
||||
model=LLM(provider=LLMProvider.OPENAI), # No model name for Operator CUA
|
||||
# model=LLM(provider=LLMProvider.OPENAI, name="gpt-4o"),
|
||||
# model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"),
|
||||
# model=LLM(provider=LLMProvider.OLLAMA, name="gemma3:4b-it-q4_K_M"),
|
||||
# model=LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit"),
|
||||
# model=LLM(
|
||||
# provider=LLMProvider.OAICOMPAT,
|
||||
# name="gemma-3-12b-it",
|
||||
# provider_base_url="http://localhost:1234/v1", # LM Studio local endpoint
|
||||
# ),
|
||||
save_trajectory=True,
|
||||
only_n_most_recent_images=3,
|
||||
verbosity=logging.DEBUG,
|
||||
)
|
||||
|
||||
tasks = [
|
||||
"Look for a repository named trycua/cua on GitHub.",
|
||||
"Check the open issues, open the most recent one and read it.",
|
||||
"Clone the repository in users/lume/projects if it doesn't exist yet.",
|
||||
"Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
|
||||
"From Cursor, open Composer if not already open.",
|
||||
"Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
|
||||
]
|
||||
tasks = [
|
||||
"Look for a repository named trycua/cua on GitHub.",
|
||||
"Check the open issues, open the most recent one and read it.",
|
||||
"Clone the repository in users/lume/projects if it doesn't exist yet.",
|
||||
"Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
|
||||
"From Cursor, open Composer if not already open.",
|
||||
"Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
|
||||
]
|
||||
|
||||
for i, task in enumerate(tasks):
|
||||
print(f"\nExecuting task {i}/{len(tasks)}: {task}")
|
||||
async for result in agent.run(task):
|
||||
print("Response ID: ", result.get("id"))
|
||||
for i, task in enumerate(tasks):
|
||||
print(f"\nExecuting task {i}/{len(tasks)}: {task}")
|
||||
async for result in agent.run(task):
|
||||
print("Response ID: ", result.get("id"))
|
||||
|
||||
# Print detailed usage information
|
||||
usage = result.get("usage")
|
||||
if usage:
|
||||
print("\nUsage Details:")
|
||||
print(f" Input Tokens: {usage.get('input_tokens')}")
|
||||
if "input_tokens_details" in usage:
|
||||
print(f" Input Tokens Details: {usage.get('input_tokens_details')}")
|
||||
print(f" Output Tokens: {usage.get('output_tokens')}")
|
||||
if "output_tokens_details" in usage:
|
||||
print(f" Output Tokens Details: {usage.get('output_tokens_details')}")
|
||||
print(f" Total Tokens: {usage.get('total_tokens')}")
|
||||
# Print detailed usage information
|
||||
usage = result.get("usage")
|
||||
if usage:
|
||||
print("\nUsage Details:")
|
||||
print(f" Input Tokens: {usage.get('input_tokens')}")
|
||||
if "input_tokens_details" in usage:
|
||||
print(f" Input Tokens Details: {usage.get('input_tokens_details')}")
|
||||
print(f" Output Tokens: {usage.get('output_tokens')}")
|
||||
if "output_tokens_details" in usage:
|
||||
print(f" Output Tokens Details: {usage.get('output_tokens_details')}")
|
||||
print(f" Total Tokens: {usage.get('total_tokens')}")
|
||||
|
||||
print("Response Text: ", result.get("text"))
|
||||
print("Response Text: ", result.get("text"))
|
||||
|
||||
# Print tools information
|
||||
tools = result.get("tools")
|
||||
if tools:
|
||||
print("\nTools:")
|
||||
print(tools)
|
||||
# Print tools information
|
||||
tools = result.get("tools")
|
||||
if tools:
|
||||
print("\nTools:")
|
||||
print(tools)
|
||||
|
||||
# Print reasoning and tool call outputs
|
||||
outputs = result.get("output", [])
|
||||
for output in outputs:
|
||||
output_type = output.get("type")
|
||||
if output_type == "reasoning":
|
||||
print("\nReasoning Output:")
|
||||
print(output)
|
||||
elif output_type == "computer_call":
|
||||
print("\nTool Call Output:")
|
||||
print(output)
|
||||
# Print reasoning and tool call outputs
|
||||
outputs = result.get("output", [])
|
||||
for output in outputs:
|
||||
output_type = output.get("type")
|
||||
if output_type == "reasoning":
|
||||
print("\nReasoning Output:")
|
||||
print(output)
|
||||
elif output_type == "computer_call":
|
||||
print("\nTool Call Output:")
|
||||
print(output)
|
||||
|
||||
print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")
|
||||
print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in run_agent_example: {e}")
|
||||
|
||||
@@ -16,17 +16,18 @@ load_dotenv(env_file)
|
||||
pythonpath = os.environ.get("PYTHONPATH", "")
|
||||
for path in pythonpath.split(":"):
|
||||
if path and path not in sys.path:
|
||||
sys.path.append(path)
|
||||
sys.path.insert(0, path) # Insert at beginning to prioritize
|
||||
print(f"Added to sys.path: {path}")
|
||||
|
||||
from computer import Computer, VMProviderType
|
||||
from computer.computer import Computer
|
||||
from computer.providers.base import VMProviderType
|
||||
from computer.logger import LogLevel
|
||||
|
||||
async def main():
|
||||
try:
|
||||
print("\n=== Using direct initialization ===")
|
||||
|
||||
# Create computer with configured host
|
||||
# Create a local macOS computer
|
||||
computer = Computer(
|
||||
display="1024x768",
|
||||
memory="8GB",
|
||||
@@ -41,7 +42,8 @@ async def main():
|
||||
],
|
||||
ephemeral=False,
|
||||
)
|
||||
|
||||
|
||||
# Create a remote Linux computer with C/ua
|
||||
# computer = Computer(
|
||||
# os_type="linux",
|
||||
# api_key=os.getenv("CUA_API_KEY"),
|
||||
@@ -54,8 +56,15 @@ async def main():
|
||||
await computer.run()
|
||||
|
||||
screenshot = await computer.interface.screenshot()
|
||||
with open(Path("~/cua/examples/screenshot.png").expanduser(), "wb") as f:
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
output_dir = Path("./output")
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
screenshot_path = output_dir / "screenshot.png"
|
||||
with open(screenshot_path, "wb") as f:
|
||||
f.write(screenshot)
|
||||
print(f"Screenshot saved to: {screenshot_path.absolute()}")
|
||||
|
||||
# await computer.interface.hotkey("command", "space")
|
||||
|
||||
|
||||
@@ -446,7 +446,8 @@ def create_gradio_ui(
|
||||
# Check for API keys
|
||||
openai_api_key = os.environ.get("OPENAI_API_KEY", "")
|
||||
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
||||
|
||||
cua_api_key = os.environ.get("CUA_API_KEY", "")
|
||||
|
||||
# Always show models regardless of API key availability
|
||||
openai_models = ["OpenAI: Computer-Use Preview"]
|
||||
anthropic_models = [
|
||||
@@ -464,9 +465,11 @@ def create_gradio_ui(
|
||||
# Check if API keys are available
|
||||
has_openai_key = bool(openai_api_key)
|
||||
has_anthropic_key = bool(anthropic_api_key)
|
||||
has_cua_key = bool(cua_api_key)
|
||||
|
||||
print("has_openai_key", has_openai_key)
|
||||
print("has_anthropic_key", has_anthropic_key)
|
||||
print("has_cua_key", has_cua_key)
|
||||
|
||||
# Get Ollama models for OMNI
|
||||
ollama_models = get_ollama_models()
|
||||
@@ -752,6 +755,7 @@ if __name__ == "__main__":
|
||||
value="",
|
||||
type="password",
|
||||
info="Required for cloud provider",
|
||||
visible=(not has_cua_key)
|
||||
)
|
||||
|
||||
with gr.Accordion("Agent Configuration", open=True):
|
||||
@@ -1176,6 +1180,8 @@ if __name__ == "__main__":
|
||||
else:
|
||||
# For Ollama or default OAICOMPAT (without custom key), no key needed/expected
|
||||
api_key = ""
|
||||
|
||||
cua_cloud_api_key = cua_cloud_api_key or os.environ.get("CUA_API_KEY", "")
|
||||
|
||||
# --- Save Settings Before Running Agent ---
|
||||
current_settings = {
|
||||
|
||||
@@ -26,6 +26,7 @@ class LinuxComputerInterface(BaseComputerInterface):
|
||||
self._reconnect_delay = 1 # Start with 1 second delay
|
||||
self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts
|
||||
self._log_connection_attempts = True # Flag to control connection attempt logging
|
||||
self._authenticated = False # Track authentication status
|
||||
|
||||
# Set logger name for Linux interface
|
||||
self.logger = Logger("cua.interface.linux", LogLevel.NORMAL)
|
||||
@@ -89,34 +90,14 @@ class LinuxComputerInterface(BaseComputerInterface):
|
||||
)
|
||||
self.logger.info("WebSocket connection established")
|
||||
|
||||
# If api_key and vm_name are provided, perform authentication handshake
|
||||
if self.api_key and self.vm_name:
|
||||
self.logger.info("Performing authentication handshake...")
|
||||
auth_message = {
|
||||
"command": "authenticate",
|
||||
"params": {
|
||||
"api_key": self.api_key,
|
||||
"container_name": self.vm_name
|
||||
}
|
||||
}
|
||||
await self._ws.send(json.dumps(auth_message))
|
||||
|
||||
# Wait for authentication response
|
||||
auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
|
||||
auth_result = json.loads(auth_response)
|
||||
|
||||
if not auth_result.get("success"):
|
||||
error_msg = auth_result.get("error", "Authentication failed")
|
||||
self.logger.error(f"Authentication failed: {error_msg}")
|
||||
await self._ws.close()
|
||||
self._ws = None
|
||||
raise ConnectionError(f"Authentication failed: {error_msg}")
|
||||
|
||||
self.logger.info("Authentication successful")
|
||||
# Authentication will be handled by the first command that needs it
|
||||
# Don't do authentication here to avoid recv conflicts
|
||||
|
||||
self._reconnect_delay = 1 # Reset reconnect delay on successful connection
|
||||
self._last_ping = time.time()
|
||||
retry_count = 0 # Reset retry count on successful connection
|
||||
self._authenticated = False # Reset auth status on new connection
|
||||
|
||||
except (asyncio.TimeoutError, websockets.exceptions.WebSocketException) as e:
|
||||
next_retry = self._reconnect_delay
|
||||
|
||||
@@ -140,13 +121,6 @@ class LinuxComputerInterface(BaseComputerInterface):
|
||||
pass
|
||||
self._ws = None
|
||||
|
||||
# Use exponential backoff for connection retries
|
||||
await asyncio.sleep(self._reconnect_delay)
|
||||
self._reconnect_delay = min(
|
||||
self._reconnect_delay * 2, self._max_reconnect_delay
|
||||
)
|
||||
continue
|
||||
|
||||
# Regular ping to check connection
|
||||
if self._ws and self._ws.state == websockets.protocol.State.OPEN:
|
||||
try:
|
||||
@@ -225,6 +199,31 @@ class LinuxComputerInterface(BaseComputerInterface):
|
||||
if not self._ws:
|
||||
raise ConnectionError("WebSocket connection is not established")
|
||||
|
||||
# Handle authentication if needed
|
||||
if self.api_key and self.vm_name and not self._authenticated:
|
||||
self.logger.info("Performing authentication handshake...")
|
||||
auth_message = {
|
||||
"command": "authenticate",
|
||||
"params": {
|
||||
"api_key": self.api_key,
|
||||
"container_name": self.vm_name
|
||||
}
|
||||
}
|
||||
await self._ws.send(json.dumps(auth_message))
|
||||
|
||||
# Wait for authentication response
|
||||
auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
|
||||
auth_result = json.loads(auth_response)
|
||||
|
||||
if not auth_result.get("success"):
|
||||
error_msg = auth_result.get("error", "Authentication failed")
|
||||
self.logger.error(f"Authentication failed: {error_msg}")
|
||||
self._authenticated = False
|
||||
raise ConnectionError(f"Authentication failed: {error_msg}")
|
||||
|
||||
self.logger.info("Authentication successful")
|
||||
self._authenticated = True
|
||||
|
||||
message = {"command": command, "params": params or {}}
|
||||
await self._ws.send(json.dumps(message))
|
||||
response = await asyncio.wait_for(self._ws.recv(), timeout=30)
|
||||
@@ -245,9 +244,7 @@ class LinuxComputerInterface(BaseComputerInterface):
|
||||
f"Failed to send command '{command}' after {max_retries} retries"
|
||||
)
|
||||
self.logger.debug(f"Command failure details: {e}")
|
||||
raise
|
||||
|
||||
raise last_error if last_error else RuntimeError("Failed to send command")
|
||||
raise last_error if last_error else RuntimeError("Failed to send command")
|
||||
|
||||
async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
|
||||
"""Wait for WebSocket connection to become available."""
|
||||
|
||||
@@ -2,63 +2,137 @@
|
||||
|
||||
set -e
|
||||
|
||||
echo "🚀 Setting up CUA playground environment..."
|
||||
echo "🚀 Setting up C/ua playground environment..."
|
||||
|
||||
# Check for Apple Silicon Mac
|
||||
if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then
|
||||
echo "❌ This script requires an Apple Silicon Mac (M1/M2/M3/M4)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for macOS 15 (Sequoia) or newer
|
||||
OSVERSION=$(sw_vers -productVersion)
|
||||
if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then
|
||||
echo "❌ This script requires macOS 15 (Sequoia) or newer. You have $OSVERSION."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create a temporary directory for our work
|
||||
TMP_DIR=$(mktemp -d)
|
||||
cd "$TMP_DIR"
|
||||
# Save the original working directory
|
||||
ORIGINAL_DIR="$(pwd)"
|
||||
|
||||
# Function to clean up on exit
|
||||
cleanup() {
|
||||
cd ~
|
||||
rm -rf "$TMP_DIR"
|
||||
rm -rf "$TMP_DIR" 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Create a temporary directory for our work
|
||||
TMP_DIR=$(mktemp -d)
|
||||
cd "$TMP_DIR"
|
||||
trap cleanup EXIT
|
||||
|
||||
# Install Lume if not already installed
|
||||
if ! command -v lume &> /dev/null; then
|
||||
echo "📦 Installing Lume CLI..."
|
||||
curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
|
||||
# Ask user to choose between local macOS VMs or C/ua Cloud Containers
|
||||
echo ""
|
||||
echo "Choose your C/ua setup:"
|
||||
echo "1) ☁️ C/ua Cloud Containers (works on any system)"
|
||||
echo "2) 🖥️ Local macOS VMs (requires Apple Silicon Mac + macOS 15+)"
|
||||
echo ""
|
||||
read -p "Enter your choice (1 or 2): " CHOICE
|
||||
|
||||
if [[ "$CHOICE" == "1" ]]; then
|
||||
# C/ua Cloud Container setup
|
||||
echo ""
|
||||
echo "☁️ Setting up C/ua Cloud Containers..."
|
||||
echo ""
|
||||
|
||||
# Add lume to PATH for this session if it's not already there
|
||||
if ! command -v lume &> /dev/null; then
|
||||
export PATH="$PATH:$HOME/.local/bin"
|
||||
# Check if existing .env.local already has CUA_API_KEY (check current dir and demo dir)
|
||||
DEMO_DIR="$HOME/.cua-demo"
|
||||
# Look for .env.local in the original working directory (before cd to temp dir)
|
||||
CURRENT_ENV_FILE="$ORIGINAL_DIR/.env.local"
|
||||
DEMO_ENV_FILE="$DEMO_DIR/.env.local"
|
||||
|
||||
CUA_API_KEY=""
|
||||
|
||||
# First check current directory
|
||||
if [[ -f "$CURRENT_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$CURRENT_ENV_FILE"; then
|
||||
EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$CURRENT_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
|
||||
if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
|
||||
CUA_API_KEY="$EXISTING_CUA_KEY"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Then check demo directory if not found in current dir
|
||||
if [[ -z "$CUA_API_KEY" ]] && [[ -f "$DEMO_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$DEMO_ENV_FILE"; then
|
||||
EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$DEMO_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
|
||||
if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
|
||||
CUA_API_KEY="$EXISTING_CUA_KEY"
|
||||
fi
|
||||
fi
|
||||
|
||||
# If no valid API key found, prompt for one
|
||||
if [[ -z "$CUA_API_KEY" ]]; then
|
||||
echo "To use C/ua Cloud Containers, you need to:"
|
||||
echo "1. Sign up at https://trycua.com"
|
||||
echo "2. Create a Cloud Container"
|
||||
echo "3. Generate an Api Key"
|
||||
echo ""
|
||||
read -p "Enter your C/ua Api Key: " CUA_API_KEY
|
||||
|
||||
if [[ -z "$CUA_API_KEY" ]]; then
|
||||
echo "❌ C/ua Api Key is required for Cloud Containers."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
USE_CLOUD=true
|
||||
|
||||
elif [[ "$CHOICE" == "2" ]]; then
|
||||
# Local macOS VM setup
|
||||
echo ""
|
||||
echo "🖥️ Setting up local macOS VMs..."
|
||||
|
||||
# Check for Apple Silicon Mac
|
||||
if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then
|
||||
echo "❌ Local macOS VMs require an Apple Silicon Mac (M1/M2/M3/M4)."
|
||||
echo "💡 Consider using C/ua Cloud Containers instead (option 1)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for macOS 15 (Sequoia) or newer
|
||||
OSVERSION=$(sw_vers -productVersion)
|
||||
if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then
|
||||
echo "❌ Local macOS VMs require macOS 15 (Sequoia) or newer. You have $OSVERSION."
|
||||
echo "💡 Consider using C/ua Cloud Containers instead (option 1)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
USE_CLOUD=false
|
||||
|
||||
else
|
||||
echo "❌ Invalid choice. Please run the script again and choose 1 or 2."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Pull the macOS CUA image if not already present
|
||||
if ! lume ls | grep -q "macos-sequoia-cua"; then
|
||||
# Check available disk space
|
||||
IMAGE_SIZE_GB=30
|
||||
AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}')
|
||||
AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024))
|
||||
|
||||
echo "📊 The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space."
|
||||
echo " You currently have ${AVAILABLE_SPACE_GB}GB available on your system."
|
||||
|
||||
# Prompt for confirmation
|
||||
read -p " Continue? [y]/n: " CONTINUE
|
||||
CONTINUE=${CONTINUE:-y}
|
||||
|
||||
if [[ $CONTINUE =~ ^[Yy]$ ]]; then
|
||||
echo "📥 Pulling macOS CUA image (this may take a while)..."
|
||||
lume pull macos-sequoia-cua:latest
|
||||
else
|
||||
echo "❌ Installation cancelled."
|
||||
exit 1
|
||||
# Install Lume if not already installed (only for local VMs)
|
||||
if [[ "$USE_CLOUD" == "false" ]]; then
|
||||
if ! command -v lume &> /dev/null; then
|
||||
echo "📦 Installing Lume CLI..."
|
||||
curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
|
||||
|
||||
# Add lume to PATH for this session if it's not already there
|
||||
if ! command -v lume &> /dev/null; then
|
||||
export PATH="$PATH:$HOME/.local/bin"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Pull the macOS CUA image if not already present
|
||||
if ! lume ls | grep -q "macos-sequoia-cua"; then
|
||||
# Check available disk space
|
||||
IMAGE_SIZE_GB=30
|
||||
AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}')
|
||||
AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024))
|
||||
|
||||
echo "📊 The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space."
|
||||
echo " You currently have ${AVAILABLE_SPACE_GB}GB available on your system."
|
||||
|
||||
# Prompt for confirmation
|
||||
read -p " Continue? [y]/n: " CONTINUE
|
||||
CONTINUE=${CONTINUE:-y}
|
||||
|
||||
if [[ $CONTINUE =~ ^[Yy]$ ]]; then
|
||||
echo "📥 Pulling macOS CUA image (this may take a while)..."
|
||||
lume pull macos-sequoia-cua:latest
|
||||
else
|
||||
echo "❌ Installation cancelled."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -87,7 +161,7 @@ fi
|
||||
source "$VENV_DIR/bin/activate"
|
||||
|
||||
# Install required packages
|
||||
echo "📦 Updating CUA packages..."
|
||||
echo "📦 Updating C/ua packages..."
|
||||
pip install -U pip setuptools wheel Cmake
|
||||
pip install -U cua-computer "cua-agent[all]"
|
||||
|
||||
@@ -98,22 +172,30 @@ pip install git+https://github.com/ddupont808/mlx-vlm.git@stable/fix/qwen2-posit
|
||||
DEMO_DIR="$HOME/.cua-demo"
|
||||
mkdir -p "$DEMO_DIR"
|
||||
|
||||
cat > "$DEMO_DIR/run_demo.py" << 'EOF'
|
||||
import asyncio
|
||||
import os
|
||||
from computer import Computer
|
||||
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
|
||||
from agent.ui.gradio.app import create_gradio_ui
|
||||
|
||||
# Try to load API keys from environment
|
||||
api_key = os.environ.get("OPENAI_API_KEY", "")
|
||||
if not api_key:
|
||||
print("\n⚠️ No OpenAI API key found. You'll need to provide one in the UI.")
|
||||
|
||||
# Launch the Gradio UI and open it in the browser
|
||||
app = create_gradio_ui()
|
||||
app.launch(share=False, inbrowser=True)
|
||||
# Create .env.local file with API keys (only if it doesn't exist)
|
||||
if [[ ! -f "$DEMO_DIR/.env.local" ]]; then
|
||||
cat > "$DEMO_DIR/.env.local" << EOF
|
||||
# Uncomment and add your API keys here
|
||||
# OPENAI_API_KEY=your_openai_api_key_here
|
||||
# ANTHROPIC_API_KEY=your_anthropic_api_key_here
|
||||
CUA_API_KEY=your_cua_api_key_here
|
||||
EOF
|
||||
echo "📝 Created .env.local file with API key placeholders"
|
||||
else
|
||||
echo "📝 Found existing .env.local file - keeping your current settings"
|
||||
fi
|
||||
|
||||
if [[ "$USE_CLOUD" == "true" ]]; then
|
||||
# Add CUA API key to .env.local if not already present
|
||||
if ! grep -q "CUA_API_KEY" "$DEMO_DIR/.env.local"; then
|
||||
echo "CUA_API_KEY=$CUA_API_KEY" >> "$DEMO_DIR/.env.local"
|
||||
echo "🔑 Added CUA_API_KEY to .env.local"
|
||||
elif grep -q "CUA_API_KEY=your_cua_api_key_here" "$DEMO_DIR/.env.local"; then
|
||||
# Update placeholder with actual key
|
||||
sed -i.bak "s/CUA_API_KEY=your_cua_api_key_here/CUA_API_KEY=$CUA_API_KEY/" "$DEMO_DIR/.env.local"
|
||||
echo "🔑 Updated CUA_API_KEY in .env.local"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Create a convenience script to run the demo
|
||||
cat > "$DEMO_DIR/start_demo.sh" << EOF
|
||||
@@ -125,20 +207,91 @@ EOF
|
||||
chmod +x "$DEMO_DIR/start_demo.sh"
|
||||
|
||||
echo "✅ Setup complete!"
|
||||
echo "🖥️ You can start the CUA playground by running: $DEMO_DIR/start_demo.sh"
|
||||
|
||||
# Check if the VM is running
|
||||
echo "🔍 Checking if the macOS CUA VM is running..."
|
||||
VM_RUNNING=$(lume ls | grep "macos-sequoia-cua" | grep "running" || echo "")
|
||||
if [[ "$USE_CLOUD" == "true" ]]; then
|
||||
# Create run_demo.py for cloud containers
|
||||
cat > "$DEMO_DIR/run_demo.py" << 'EOF'
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
from computer import Computer
|
||||
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
|
||||
from agent.ui.gradio.app import create_gradio_ui
|
||||
|
||||
if [ -z "$VM_RUNNING" ]; then
|
||||
echo "🚀 Starting the macOS CUA VM in the background..."
|
||||
lume run macos-sequoia-cua:latest &
|
||||
# Wait a moment for the VM to initialize
|
||||
sleep 5
|
||||
echo "✅ VM started successfully."
|
||||
# Load environment variables from .env.local
|
||||
load_dotenv(Path(__file__).parent / ".env.local")
|
||||
|
||||
# Check for required API keys
|
||||
cua_api_key = os.environ.get("CUA_API_KEY", "")
|
||||
if not cua_api_key:
|
||||
print("\n❌ CUA_API_KEY not found in .env.local file.")
|
||||
print("Please add your CUA API key to the .env.local file.")
|
||||
exit(1)
|
||||
|
||||
openai_key = os.environ.get("OPENAI_API_KEY", "")
|
||||
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
||||
|
||||
if not openai_key and not anthropic_key:
|
||||
print("\n⚠️ No OpenAI or Anthropic API keys found in .env.local.")
|
||||
print("Please add at least one API key to use AI agents.")
|
||||
|
||||
print("🚀 Starting CUA playground with Cloud Containers...")
|
||||
print("📝 Edit .env.local to update your API keys")
|
||||
|
||||
# Launch the Gradio UI and open it in the browser
|
||||
app = create_gradio_ui()
|
||||
app.launch(share=False, inbrowser=True)
|
||||
EOF
|
||||
else
|
||||
echo "✅ macOS CUA VM is already running."
|
||||
# Create run_demo.py for local macOS VMs
|
||||
cat > "$DEMO_DIR/run_demo.py" << 'EOF'
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
from computer import Computer
|
||||
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
|
||||
from agent.ui.gradio.app import create_gradio_ui
|
||||
|
||||
# Load environment variables from .env.local
|
||||
load_dotenv(Path(__file__).parent / ".env.local")
|
||||
|
||||
# Try to load API keys from environment
|
||||
openai_key = os.environ.get("OPENAI_API_KEY", "")
|
||||
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
||||
|
||||
if not openai_key and not anthropic_key:
|
||||
print("\n⚠️ No OpenAI or Anthropic API keys found in .env.local.")
|
||||
print("Please add at least one API key to use AI agents.")
|
||||
|
||||
print("🚀 Starting CUA playground with local macOS VMs...")
|
||||
print("📝 Edit .env.local to update your API keys")
|
||||
|
||||
# Launch the Gradio UI and open it in the browser
|
||||
app = create_gradio_ui()
|
||||
app.launch(share=False, inbrowser=True)
|
||||
EOF
|
||||
fi
|
||||
|
||||
echo "☁️ CUA Cloud Container setup complete!"
|
||||
echo "📝 Edit $DEMO_DIR/.env.local to update your API keys"
|
||||
echo "🖥️ Start the playground by running: $DEMO_DIR/start_demo.sh"
|
||||
|
||||
# Check if the VM is running (only for local setup)
|
||||
if [[ "$USE_CLOUD" == "false" ]]; then
|
||||
echo "🔍 Checking if the macOS CUA VM is running..."
|
||||
VM_RUNNING=$(lume ls | grep "macos-sequoia-cua" | grep "running" || echo "")
|
||||
|
||||
if [ -z "$VM_RUNNING" ]; then
|
||||
echo "🚀 Starting the macOS CUA VM in the background..."
|
||||
lume run macos-sequoia-cua:latest &
|
||||
# Wait a moment for the VM to initialize
|
||||
sleep 5
|
||||
echo "✅ VM started successfully."
|
||||
else
|
||||
echo "✅ macOS CUA VM is already running."
|
||||
fi
|
||||
fi
|
||||
|
||||
# Ask if the user wants to start the demo now
|
||||
|
||||
Reference in New Issue
Block a user