optimize onboarding

This commit is contained in:
Dillon DuPont
2025-05-09 14:54:33 -04:00
parent 116b4dc9a9
commit ce0fd05d6d
2 changed files with 304 additions and 43 deletions

View File

@@ -480,6 +480,83 @@ def create_gradio_ui(
"Open Safari, search for 'macOS automation tools', and save the first three results as bookmarks",
"Configure SSH keys and set up a connection to a remote server",
]
# Function to generate Python code based on configuration and tasks
def generate_python_code(agent_loop_choice, provider, model_name, tasks, provider_url, recent_images=3, save_trajectory=True):
"""Generate Python code for the current configuration and tasks.
Args:
agent_loop_choice: The agent loop type (e.g., UITARS, OPENAI, ANTHROPIC, OMNI)
provider: The provider type (e.g., OPENAI, ANTHROPIC, OLLAMA, OAICOMPAT)
model_name: The model name
tasks: List of tasks to execute
provider_url: The provider base URL for OAICOMPAT providers
recent_images: Number of recent images to keep in context
save_trajectory: Whether to save the agent trajectory
Returns:
Formatted Python code as a string
"""
# Format the tasks as a Python list
tasks_str = ""
for task in tasks:
if task and task.strip():
tasks_str += f' "{task}",\n'
# Create the Python code template
code = f'''import asyncio
from computer import Computer
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
async def main():
async with Computer() as macos_computer:
agent = ComputerAgent(
computer=macos_computer,
loop=AgentLoop.{agent_loop_choice},
only_n_most_recent_images={recent_images},
save_trajectory={save_trajectory},'''
# Add the model configuration based on provider
if provider == LLMProvider.OAICOMPAT:
code += f'''
model=LLM(
provider=LLMProvider.OAICOMPAT,
name="{model_name}",
provider_base_url="{provider_url}"
)'''
code += """
)
"""
# Add tasks section if there are tasks
if tasks_str:
code += f'''
# Prompts for the computer-use agent
tasks = [
{tasks_str.rstrip()}
]
for task in tasks:
print(f"Executing task: {{task}}")
async for result in agent.run(task):
print(result)'''
else:
# If no tasks, just add a placeholder for a single task
code += f'''
# Execute a single task
task = "Search for information about CUA on GitHub"
print(f"Executing task: {{task}}")
async for result in agent.run(task):
print(result)'''
# Add the main block
code += '''
if __name__ == "__main__":
asyncio.run(main())'''
return code
# Function to update model choices based on agent loop selection
def update_model_choices(loop):
@@ -537,50 +614,20 @@ def create_gradio_ui(
"""
)
# Add installation prerequisites as a collapsible section
with gr.Accordion("Prerequisites & Installation", open=False):
gr.Markdown(
"""
## Prerequisites
Before using the Computer-Use Agent, you need to set up the Lume daemon and pull the macOS VM image.
### 1. Install Lume daemon
While a lume binary is included with Computer, we recommend installing the standalone version with brew, and starting the lume daemon service:
```bash
sudo /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
```
### 2. Start the Lume daemon service
In a separate terminal:
```bash
lume serve
```
### 3. Pull the pre-built macOS image
```bash
lume pull macos-sequoia-cua:latest
```
Initial download requires 80GB storage, but reduces to ~30GB after first run due to macOS's sparse file system.
VMs are stored in `~/.lume`, and locally cached images are stored in `~/.lume/cache`.
### 4. Test the sandbox
```bash
lume run macos-sequoia-cua:latest
```
For more detailed instructions, visit the [CUA GitHub repository](https://github.com/trycua/cua).
"""
# Add accordion for Python code
with gr.Accordion("Python Code", open=False):
code_display = gr.Code(
language="python",
value=generate_python_code(
initial_loop,
LLMProvider.OPENAI,
"gpt-4o",
[],
"https://openrouter.ai/api/v1"
),
interactive=False,
)
with gr.Accordion("Configuration", open=True):
# Configuration options
agent_loop = gr.Dropdown(
@@ -643,6 +690,7 @@ def create_gradio_ui(
info="Number of recent images to keep in context",
interactive=True,
)
# Right column for chat interface
with gr.Column(scale=2):
@@ -900,6 +948,62 @@ def create_gradio_ui(
queue=False, # Process immediately without queueing
)
# Function to update the code display based on configuration and chat history
def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val):
# Extract messages from chat history
messages = []
if chat_history:
for msg in chat_history:
if msg.get("role") == "user":
messages.append(msg.get("content", ""))
# Determine provider and model name based on selection
model_string = custom_model_val if model_choice_val == "Custom model..." else model_choice_val
provider, model_name, _ = get_provider_and_model(model_string, agent_loop)
# Generate and return the code
return generate_python_code(
agent_loop,
provider,
model_name,
messages,
provider_base_url,
recent_images_val,
save_trajectory_val
)
# Update code display when configuration changes
agent_loop.change(
update_code_display,
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
outputs=[code_display]
)
model_choice.change(
update_code_display,
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
outputs=[code_display]
)
custom_model.change(
update_code_display,
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
outputs=[code_display]
)
chatbot_history.change(
update_code_display,
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
outputs=[code_display]
)
recent_images.change(
update_code_display,
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
outputs=[code_display]
)
save_trajectory.change(
update_code_display,
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
outputs=[code_display]
)
return demo

157
scripts/playground.sh Executable file
View File

@@ -0,0 +1,157 @@
#!/bin/bash
set -e
echo "🚀 Setting up CUA playground environment..."
# Check for Apple Silicon Mac
if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then
echo "❌ This script requires an Apple Silicon Mac (M1/M2/M3/M4)."
exit 1
fi
# Check for macOS 15 (Sequoia) or newer
OSVERSION=$(sw_vers -productVersion)
if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then
echo "❌ This script requires macOS 15 (Sequoia) or newer. You have $OSVERSION."
exit 1
fi
# Create a temporary directory for our work
TMP_DIR=$(mktemp -d)
cd "$TMP_DIR"
# Function to clean up on exit
cleanup() {
cd ~
rm -rf "$TMP_DIR"
}
trap cleanup EXIT
# Install Lume if not already installed
if ! command -v lume &> /dev/null; then
echo "📦 Installing Lume CLI..."
curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
# Add lume to PATH for this session if it's not already there
if ! command -v lume &> /dev/null; then
export PATH="$PATH:$HOME/.lume/bin"
fi
fi
# Pull the macOS CUA image if not already present
if ! lume ls | grep -q "macos-sequoia-cua"; then
# Check available disk space
IMAGE_SIZE_GB=30
AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}')
AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024))
echo "📊 The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space."
echo " You currently have ${AVAILABLE_SPACE_GB}GB available on your system."
# Prompt for confirmation
read -p " Continue? [y]/n: " CONTINUE
CONTINUE=${CONTINUE:-y}
if [[ $CONTINUE =~ ^[Yy]$ ]]; then
echo "📥 Pulling macOS CUA image (this may take a while)..."
lume pull macos-sequoia-cua:latest
else
echo "❌ Installation cancelled."
exit 1
fi
fi
# Create a Python virtual environment
echo "🐍 Setting up Python environment..."
PYTHON_CMD="python3"
# Check if Python 3.11+ is available
PYTHON_VERSION=$($PYTHON_CMD --version 2>&1 | cut -d" " -f2)
PYTHON_MAJOR=$(echo $PYTHON_VERSION | cut -d. -f1)
PYTHON_MINOR=$(echo $PYTHON_VERSION | cut -d. -f2)
if [ "$PYTHON_MAJOR" -lt 3 ] || ([ "$PYTHON_MAJOR" -eq 3 ] && [ "$PYTHON_MINOR" -lt 11 ]); then
echo "❌ Python 3.11+ is required. You have $PYTHON_VERSION."
echo "Please install Python 3.11+ and try again."
exit 1
fi
# Create a virtual environment
VENV_DIR="$HOME/.cua-venv"
if [ ! -d "$VENV_DIR" ]; then
$PYTHON_CMD -m venv "$VENV_DIR"
fi
# Activate the virtual environment
source "$VENV_DIR/bin/activate"
# Install required packages
echo "📦 Installing CUA packages..."
pip install -U pip
pip install cua-computer cua-agent[all]
# Setup environment for MCP server
echo "🔧 Setting up MCP server..."
# Create a simple demo script
DEMO_DIR="$HOME/.cua-demo"
mkdir -p "$DEMO_DIR"
cat > "$DEMO_DIR/run_demo.py" << 'EOF'
import asyncio
import os
from computer import Computer
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
from agent.ui.gradio.app import create_gradio_ui
# Try to load API keys from environment
api_key = os.environ.get("OPENAI_API_KEY", "")
if not api_key:
print("\n⚠ No OpenAI API key found. You'll need to provide one in the UI.")
# Launch the Gradio UI
app = create_gradio_ui()
app.launch(share=False)
EOF
# Create a convenience script to run the demo
cat > "$DEMO_DIR/start_demo.sh" << EOF
#!/bin/bash
source "$VENV_DIR/bin/activate"
cd "$DEMO_DIR"
python run_demo.py
EOF
chmod +x "$DEMO_DIR/start_demo.sh"
# Create a script to run the MCP server with the correct PYTHONPATH
cat > "$DEMO_DIR/start_mcp_server.sh" << EOF
#!/bin/bash
source "$VENV_DIR/bin/activate"
# Set PYTHONPATH to include all necessary libraries
export PYTHONPATH="$PYTHONPATH:$(pip show cua-computer-server | grep Location | cut -d' ' -f2)"
# Run the MCP server using the Python module approach
python -m computer_server.mcp_server
EOF
chmod +x "$DEMO_DIR/start_mcp_server.sh"
# Create a desktop shortcut for the demo
cat > "$HOME/Desktop/CUA Playground.command" << EOF
#!/bin/bash
"$DEMO_DIR/start_demo.sh"
EOF
chmod +x "$HOME/Desktop/CUA Playground.command"
echo "✅ Setup complete!"
echo "🖥️ You can start the CUA playground by running: $DEMO_DIR/start_demo.sh"
echo "🖱️ Or double-click the 'CUA Playground' shortcut on your desktop"
echo "🤖 To run the MCP server: $DEMO_DIR/start_mcp_server.sh"
# Ask if the user wants to start the demo now
read -p "Would you like to start the CUA playground now? (y/n) " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
"$DEMO_DIR/start_demo.sh"
fi