mirror of
https://github.com/trycua/computer.git
synced 2026-01-05 04:50:08 -06:00
optimize onboarding
This commit is contained in:
@@ -480,6 +480,83 @@ def create_gradio_ui(
|
||||
"Open Safari, search for 'macOS automation tools', and save the first three results as bookmarks",
|
||||
"Configure SSH keys and set up a connection to a remote server",
|
||||
]
|
||||
|
||||
# Function to generate Python code based on configuration and tasks
|
||||
def generate_python_code(agent_loop_choice, provider, model_name, tasks, provider_url, recent_images=3, save_trajectory=True):
|
||||
"""Generate Python code for the current configuration and tasks.
|
||||
|
||||
Args:
|
||||
agent_loop_choice: The agent loop type (e.g., UITARS, OPENAI, ANTHROPIC, OMNI)
|
||||
provider: The provider type (e.g., OPENAI, ANTHROPIC, OLLAMA, OAICOMPAT)
|
||||
model_name: The model name
|
||||
tasks: List of tasks to execute
|
||||
provider_url: The provider base URL for OAICOMPAT providers
|
||||
recent_images: Number of recent images to keep in context
|
||||
save_trajectory: Whether to save the agent trajectory
|
||||
|
||||
Returns:
|
||||
Formatted Python code as a string
|
||||
"""
|
||||
# Format the tasks as a Python list
|
||||
tasks_str = ""
|
||||
for task in tasks:
|
||||
if task and task.strip():
|
||||
tasks_str += f' "{task}",\n'
|
||||
|
||||
# Create the Python code template
|
||||
code = f'''import asyncio
|
||||
from computer import Computer
|
||||
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
|
||||
|
||||
async def main():
|
||||
async with Computer() as macos_computer:
|
||||
agent = ComputerAgent(
|
||||
computer=macos_computer,
|
||||
loop=AgentLoop.{agent_loop_choice},
|
||||
only_n_most_recent_images={recent_images},
|
||||
save_trajectory={save_trajectory},'''
|
||||
|
||||
# Add the model configuration based on provider
|
||||
if provider == LLMProvider.OAICOMPAT:
|
||||
code += f'''
|
||||
model=LLM(
|
||||
provider=LLMProvider.OAICOMPAT,
|
||||
name="{model_name}",
|
||||
provider_base_url="{provider_url}"
|
||||
)'''
|
||||
|
||||
code += """
|
||||
)
|
||||
"""
|
||||
|
||||
# Add tasks section if there are tasks
|
||||
if tasks_str:
|
||||
code += f'''
|
||||
# Prompts for the computer-use agent
|
||||
tasks = [
|
||||
{tasks_str.rstrip()}
|
||||
]
|
||||
|
||||
for task in tasks:
|
||||
print(f"Executing task: {{task}}")
|
||||
async for result in agent.run(task):
|
||||
print(result)'''
|
||||
else:
|
||||
# If no tasks, just add a placeholder for a single task
|
||||
code += f'''
|
||||
# Execute a single task
|
||||
task = "Search for information about CUA on GitHub"
|
||||
print(f"Executing task: {{task}}")
|
||||
async for result in agent.run(task):
|
||||
print(result)'''
|
||||
|
||||
# Add the main block
|
||||
code += '''
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())'''
|
||||
|
||||
return code
|
||||
|
||||
# Function to update model choices based on agent loop selection
|
||||
def update_model_choices(loop):
|
||||
@@ -537,50 +614,20 @@ def create_gradio_ui(
|
||||
"""
|
||||
)
|
||||
|
||||
# Add installation prerequisites as a collapsible section
|
||||
with gr.Accordion("Prerequisites & Installation", open=False):
|
||||
gr.Markdown(
|
||||
"""
|
||||
## Prerequisites
|
||||
|
||||
Before using the Computer-Use Agent, you need to set up the Lume daemon and pull the macOS VM image.
|
||||
|
||||
### 1. Install Lume daemon
|
||||
|
||||
While a lume binary is included with Computer, we recommend installing the standalone version with brew, and starting the lume daemon service:
|
||||
|
||||
```bash
|
||||
sudo /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
|
||||
```
|
||||
|
||||
### 2. Start the Lume daemon service
|
||||
|
||||
In a separate terminal:
|
||||
|
||||
```bash
|
||||
lume serve
|
||||
```
|
||||
|
||||
### 3. Pull the pre-built macOS image
|
||||
|
||||
```bash
|
||||
lume pull macos-sequoia-cua:latest
|
||||
```
|
||||
|
||||
Initial download requires 80GB storage, but reduces to ~30GB after first run due to macOS's sparse file system.
|
||||
|
||||
VMs are stored in `~/.lume`, and locally cached images are stored in `~/.lume/cache`.
|
||||
|
||||
### 4. Test the sandbox
|
||||
|
||||
```bash
|
||||
lume run macos-sequoia-cua:latest
|
||||
```
|
||||
|
||||
For more detailed instructions, visit the [CUA GitHub repository](https://github.com/trycua/cua).
|
||||
"""
|
||||
# Add accordion for Python code
|
||||
with gr.Accordion("Python Code", open=False):
|
||||
code_display = gr.Code(
|
||||
language="python",
|
||||
value=generate_python_code(
|
||||
initial_loop,
|
||||
LLMProvider.OPENAI,
|
||||
"gpt-4o",
|
||||
[],
|
||||
"https://openrouter.ai/api/v1"
|
||||
),
|
||||
interactive=False,
|
||||
)
|
||||
|
||||
|
||||
with gr.Accordion("Configuration", open=True):
|
||||
# Configuration options
|
||||
agent_loop = gr.Dropdown(
|
||||
@@ -643,6 +690,7 @@ def create_gradio_ui(
|
||||
info="Number of recent images to keep in context",
|
||||
interactive=True,
|
||||
)
|
||||
|
||||
|
||||
# Right column for chat interface
|
||||
with gr.Column(scale=2):
|
||||
@@ -900,6 +948,62 @@ def create_gradio_ui(
|
||||
queue=False, # Process immediately without queueing
|
||||
)
|
||||
|
||||
# Function to update the code display based on configuration and chat history
|
||||
def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val):
|
||||
# Extract messages from chat history
|
||||
messages = []
|
||||
if chat_history:
|
||||
for msg in chat_history:
|
||||
if msg.get("role") == "user":
|
||||
messages.append(msg.get("content", ""))
|
||||
|
||||
# Determine provider and model name based on selection
|
||||
model_string = custom_model_val if model_choice_val == "Custom model..." else model_choice_val
|
||||
provider, model_name, _ = get_provider_and_model(model_string, agent_loop)
|
||||
|
||||
# Generate and return the code
|
||||
return generate_python_code(
|
||||
agent_loop,
|
||||
provider,
|
||||
model_name,
|
||||
messages,
|
||||
provider_base_url,
|
||||
recent_images_val,
|
||||
save_trajectory_val
|
||||
)
|
||||
|
||||
# Update code display when configuration changes
|
||||
agent_loop.change(
|
||||
update_code_display,
|
||||
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
||||
outputs=[code_display]
|
||||
)
|
||||
model_choice.change(
|
||||
update_code_display,
|
||||
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
||||
outputs=[code_display]
|
||||
)
|
||||
custom_model.change(
|
||||
update_code_display,
|
||||
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
||||
outputs=[code_display]
|
||||
)
|
||||
chatbot_history.change(
|
||||
update_code_display,
|
||||
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
||||
outputs=[code_display]
|
||||
)
|
||||
recent_images.change(
|
||||
update_code_display,
|
||||
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
||||
outputs=[code_display]
|
||||
)
|
||||
save_trajectory.change(
|
||||
update_code_display,
|
||||
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
||||
outputs=[code_display]
|
||||
)
|
||||
|
||||
return demo
|
||||
|
||||
|
||||
|
||||
157
scripts/playground.sh
Executable file
157
scripts/playground.sh
Executable file
@@ -0,0 +1,157 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
echo "🚀 Setting up CUA playground environment..."
|
||||
|
||||
# Check for Apple Silicon Mac
|
||||
if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then
|
||||
echo "❌ This script requires an Apple Silicon Mac (M1/M2/M3/M4)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for macOS 15 (Sequoia) or newer
|
||||
OSVERSION=$(sw_vers -productVersion)
|
||||
if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then
|
||||
echo "❌ This script requires macOS 15 (Sequoia) or newer. You have $OSVERSION."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create a temporary directory for our work
|
||||
TMP_DIR=$(mktemp -d)
|
||||
cd "$TMP_DIR"
|
||||
|
||||
# Function to clean up on exit
|
||||
cleanup() {
|
||||
cd ~
|
||||
rm -rf "$TMP_DIR"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Install Lume if not already installed
|
||||
if ! command -v lume &> /dev/null; then
|
||||
echo "📦 Installing Lume CLI..."
|
||||
curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
|
||||
|
||||
# Add lume to PATH for this session if it's not already there
|
||||
if ! command -v lume &> /dev/null; then
|
||||
export PATH="$PATH:$HOME/.lume/bin"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Pull the macOS CUA image if not already present
|
||||
if ! lume ls | grep -q "macos-sequoia-cua"; then
|
||||
# Check available disk space
|
||||
IMAGE_SIZE_GB=30
|
||||
AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}')
|
||||
AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024))
|
||||
|
||||
echo "📊 The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space."
|
||||
echo " You currently have ${AVAILABLE_SPACE_GB}GB available on your system."
|
||||
|
||||
# Prompt for confirmation
|
||||
read -p " Continue? [y]/n: " CONTINUE
|
||||
CONTINUE=${CONTINUE:-y}
|
||||
|
||||
if [[ $CONTINUE =~ ^[Yy]$ ]]; then
|
||||
echo "📥 Pulling macOS CUA image (this may take a while)..."
|
||||
lume pull macos-sequoia-cua:latest
|
||||
else
|
||||
echo "❌ Installation cancelled."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Create a Python virtual environment
|
||||
echo "🐍 Setting up Python environment..."
|
||||
PYTHON_CMD="python3"
|
||||
|
||||
# Check if Python 3.11+ is available
|
||||
PYTHON_VERSION=$($PYTHON_CMD --version 2>&1 | cut -d" " -f2)
|
||||
PYTHON_MAJOR=$(echo $PYTHON_VERSION | cut -d. -f1)
|
||||
PYTHON_MINOR=$(echo $PYTHON_VERSION | cut -d. -f2)
|
||||
|
||||
if [ "$PYTHON_MAJOR" -lt 3 ] || ([ "$PYTHON_MAJOR" -eq 3 ] && [ "$PYTHON_MINOR" -lt 11 ]); then
|
||||
echo "❌ Python 3.11+ is required. You have $PYTHON_VERSION."
|
||||
echo "Please install Python 3.11+ and try again."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create a virtual environment
|
||||
VENV_DIR="$HOME/.cua-venv"
|
||||
if [ ! -d "$VENV_DIR" ]; then
|
||||
$PYTHON_CMD -m venv "$VENV_DIR"
|
||||
fi
|
||||
|
||||
# Activate the virtual environment
|
||||
source "$VENV_DIR/bin/activate"
|
||||
|
||||
# Install required packages
|
||||
echo "📦 Installing CUA packages..."
|
||||
pip install -U pip
|
||||
pip install cua-computer cua-agent[all]
|
||||
|
||||
# Setup environment for MCP server
|
||||
echo "🔧 Setting up MCP server..."
|
||||
|
||||
# Create a simple demo script
|
||||
DEMO_DIR="$HOME/.cua-demo"
|
||||
mkdir -p "$DEMO_DIR"
|
||||
|
||||
cat > "$DEMO_DIR/run_demo.py" << 'EOF'
|
||||
import asyncio
|
||||
import os
|
||||
from computer import Computer
|
||||
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
|
||||
from agent.ui.gradio.app import create_gradio_ui
|
||||
|
||||
# Try to load API keys from environment
|
||||
api_key = os.environ.get("OPENAI_API_KEY", "")
|
||||
if not api_key:
|
||||
print("\n⚠️ No OpenAI API key found. You'll need to provide one in the UI.")
|
||||
|
||||
# Launch the Gradio UI
|
||||
app = create_gradio_ui()
|
||||
app.launch(share=False)
|
||||
EOF
|
||||
|
||||
# Create a convenience script to run the demo
|
||||
cat > "$DEMO_DIR/start_demo.sh" << EOF
|
||||
#!/bin/bash
|
||||
source "$VENV_DIR/bin/activate"
|
||||
cd "$DEMO_DIR"
|
||||
python run_demo.py
|
||||
EOF
|
||||
chmod +x "$DEMO_DIR/start_demo.sh"
|
||||
|
||||
# Create a script to run the MCP server with the correct PYTHONPATH
|
||||
cat > "$DEMO_DIR/start_mcp_server.sh" << EOF
|
||||
#!/bin/bash
|
||||
source "$VENV_DIR/bin/activate"
|
||||
|
||||
# Set PYTHONPATH to include all necessary libraries
|
||||
export PYTHONPATH="$PYTHONPATH:$(pip show cua-computer-server | grep Location | cut -d' ' -f2)"
|
||||
|
||||
# Run the MCP server using the Python module approach
|
||||
python -m computer_server.mcp_server
|
||||
EOF
|
||||
chmod +x "$DEMO_DIR/start_mcp_server.sh"
|
||||
|
||||
# Create a desktop shortcut for the demo
|
||||
cat > "$HOME/Desktop/CUA Playground.command" << EOF
|
||||
#!/bin/bash
|
||||
"$DEMO_DIR/start_demo.sh"
|
||||
EOF
|
||||
chmod +x "$HOME/Desktop/CUA Playground.command"
|
||||
|
||||
echo "✅ Setup complete!"
|
||||
echo "🖥️ You can start the CUA playground by running: $DEMO_DIR/start_demo.sh"
|
||||
echo "🖱️ Or double-click the 'CUA Playground' shortcut on your desktop"
|
||||
echo "🤖 To run the MCP server: $DEMO_DIR/start_mcp_server.sh"
|
||||
|
||||
# Ask if the user wants to start the demo now
|
||||
read -p "Would you like to start the CUA playground now? (y/n) " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
"$DEMO_DIR/start_demo.sh"
|
||||
fi
|
||||
Reference in New Issue
Block a user