From ce0fd05d6d9411b01d9f7f2cb2e65a55d19446a4 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 9 May 2025 14:54:33 -0400 Subject: [PATCH] optimize onboarding --- libs/agent/agent/ui/gradio/app.py | 190 +++++++++++++++++++++++------- scripts/playground.sh | 157 ++++++++++++++++++++++++ 2 files changed, 304 insertions(+), 43 deletions(-) create mode 100755 scripts/playground.sh diff --git a/libs/agent/agent/ui/gradio/app.py b/libs/agent/agent/ui/gradio/app.py index a4541019..2ab2a3ca 100644 --- a/libs/agent/agent/ui/gradio/app.py +++ b/libs/agent/agent/ui/gradio/app.py @@ -480,6 +480,83 @@ def create_gradio_ui( "Open Safari, search for 'macOS automation tools', and save the first three results as bookmarks", "Configure SSH keys and set up a connection to a remote server", ] + + # Function to generate Python code based on configuration and tasks + def generate_python_code(agent_loop_choice, provider, model_name, tasks, provider_url, recent_images=3, save_trajectory=True): + """Generate Python code for the current configuration and tasks. + + Args: + agent_loop_choice: The agent loop type (e.g., UITARS, OPENAI, ANTHROPIC, OMNI) + provider: The provider type (e.g., OPENAI, ANTHROPIC, OLLAMA, OAICOMPAT) + model_name: The model name + tasks: List of tasks to execute + provider_url: The provider base URL for OAICOMPAT providers + recent_images: Number of recent images to keep in context + save_trajectory: Whether to save the agent trajectory + + Returns: + Formatted Python code as a string + """ + # Format the tasks as a Python list + tasks_str = "" + for task in tasks: + if task and task.strip(): + tasks_str += f' "{task}",\n' + + # Create the Python code template + code = f'''import asyncio +from computer import Computer +from agent import ComputerAgent, LLM, AgentLoop, LLMProvider + +async def main(): + async with Computer() as macos_computer: + agent = ComputerAgent( + computer=macos_computer, + loop=AgentLoop.{agent_loop_choice}, + only_n_most_recent_images={recent_images}, + save_trajectory={save_trajectory},''' + + # Add the model configuration based on provider + if provider == LLMProvider.OAICOMPAT: + code += f''' + model=LLM( + provider=LLMProvider.OAICOMPAT, + name="{model_name}", + provider_base_url="{provider_url}" + )''' + + code += """ + ) + """ + + # Add tasks section if there are tasks + if tasks_str: + code += f''' + # Prompts for the computer-use agent + tasks = [ +{tasks_str.rstrip()} + ] + + for task in tasks: + print(f"Executing task: {{task}}") + async for result in agent.run(task): + print(result)''' + else: + # If no tasks, just add a placeholder for a single task + code += f''' + # Execute a single task + task = "Search for information about CUA on GitHub" + print(f"Executing task: {{task}}") + async for result in agent.run(task): + print(result)''' + + # Add the main block + code += ''' + +if __name__ == "__main__": + asyncio.run(main())''' + + return code # Function to update model choices based on agent loop selection def update_model_choices(loop): @@ -537,50 +614,20 @@ def create_gradio_ui( """ ) - # Add installation prerequisites as a collapsible section - with gr.Accordion("Prerequisites & Installation", open=False): - gr.Markdown( - """ - ## Prerequisites - - Before using the Computer-Use Agent, you need to set up the Lume daemon and pull the macOS VM image. - - ### 1. Install Lume daemon - - While a lume binary is included with Computer, we recommend installing the standalone version with brew, and starting the lume daemon service: - - ```bash - sudo /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" - ``` - - ### 2. Start the Lume daemon service - - In a separate terminal: - - ```bash - lume serve - ``` - - ### 3. Pull the pre-built macOS image - - ```bash - lume pull macos-sequoia-cua:latest - ``` - - Initial download requires 80GB storage, but reduces to ~30GB after first run due to macOS's sparse file system. - - VMs are stored in `~/.lume`, and locally cached images are stored in `~/.lume/cache`. - - ### 4. Test the sandbox - - ```bash - lume run macos-sequoia-cua:latest - ``` - - For more detailed instructions, visit the [CUA GitHub repository](https://github.com/trycua/cua). - """ + # Add accordion for Python code + with gr.Accordion("Python Code", open=False): + code_display = gr.Code( + language="python", + value=generate_python_code( + initial_loop, + LLMProvider.OPENAI, + "gpt-4o", + [], + "https://openrouter.ai/api/v1" + ), + interactive=False, ) - + with gr.Accordion("Configuration", open=True): # Configuration options agent_loop = gr.Dropdown( @@ -643,6 +690,7 @@ def create_gradio_ui( info="Number of recent images to keep in context", interactive=True, ) + # Right column for chat interface with gr.Column(scale=2): @@ -900,6 +948,62 @@ def create_gradio_ui( queue=False, # Process immediately without queueing ) + # Function to update the code display based on configuration and chat history + def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val): + # Extract messages from chat history + messages = [] + if chat_history: + for msg in chat_history: + if msg.get("role") == "user": + messages.append(msg.get("content", "")) + + # Determine provider and model name based on selection + model_string = custom_model_val if model_choice_val == "Custom model..." else model_choice_val + provider, model_name, _ = get_provider_and_model(model_string, agent_loop) + + # Generate and return the code + return generate_python_code( + agent_loop, + provider, + model_name, + messages, + provider_base_url, + recent_images_val, + save_trajectory_val + ) + + # Update code display when configuration changes + agent_loop.change( + update_code_display, + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + outputs=[code_display] + ) + model_choice.change( + update_code_display, + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + outputs=[code_display] + ) + custom_model.change( + update_code_display, + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + outputs=[code_display] + ) + chatbot_history.change( + update_code_display, + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + outputs=[code_display] + ) + recent_images.change( + update_code_display, + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + outputs=[code_display] + ) + save_trajectory.change( + update_code_display, + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + outputs=[code_display] + ) + return demo diff --git a/scripts/playground.sh b/scripts/playground.sh new file mode 100755 index 00000000..bad1df3b --- /dev/null +++ b/scripts/playground.sh @@ -0,0 +1,157 @@ +#!/bin/bash + +set -e + +echo "šŸš€ Setting up CUA playground environment..." + +# Check for Apple Silicon Mac +if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then + echo "āŒ This script requires an Apple Silicon Mac (M1/M2/M3/M4)." + exit 1 +fi + +# Check for macOS 15 (Sequoia) or newer +OSVERSION=$(sw_vers -productVersion) +if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then + echo "āŒ This script requires macOS 15 (Sequoia) or newer. You have $OSVERSION." + exit 1 +fi + +# Create a temporary directory for our work +TMP_DIR=$(mktemp -d) +cd "$TMP_DIR" + +# Function to clean up on exit +cleanup() { + cd ~ + rm -rf "$TMP_DIR" +} +trap cleanup EXIT + +# Install Lume if not already installed +if ! command -v lume &> /dev/null; then + echo "šŸ“¦ Installing Lume CLI..." + curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash + + # Add lume to PATH for this session if it's not already there + if ! command -v lume &> /dev/null; then + export PATH="$PATH:$HOME/.lume/bin" + fi +fi + +# Pull the macOS CUA image if not already present +if ! lume ls | grep -q "macos-sequoia-cua"; then + # Check available disk space + IMAGE_SIZE_GB=30 + AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}') + AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024)) + + echo "šŸ“Š The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space." + echo " You currently have ${AVAILABLE_SPACE_GB}GB available on your system." + + # Prompt for confirmation + read -p " Continue? [y]/n: " CONTINUE + CONTINUE=${CONTINUE:-y} + + if [[ $CONTINUE =~ ^[Yy]$ ]]; then + echo "šŸ“„ Pulling macOS CUA image (this may take a while)..." + lume pull macos-sequoia-cua:latest + else + echo "āŒ Installation cancelled." + exit 1 + fi +fi + +# Create a Python virtual environment +echo "šŸ Setting up Python environment..." +PYTHON_CMD="python3" + +# Check if Python 3.11+ is available +PYTHON_VERSION=$($PYTHON_CMD --version 2>&1 | cut -d" " -f2) +PYTHON_MAJOR=$(echo $PYTHON_VERSION | cut -d. -f1) +PYTHON_MINOR=$(echo $PYTHON_VERSION | cut -d. -f2) + +if [ "$PYTHON_MAJOR" -lt 3 ] || ([ "$PYTHON_MAJOR" -eq 3 ] && [ "$PYTHON_MINOR" -lt 11 ]); then + echo "āŒ Python 3.11+ is required. You have $PYTHON_VERSION." + echo "Please install Python 3.11+ and try again." + exit 1 +fi + +# Create a virtual environment +VENV_DIR="$HOME/.cua-venv" +if [ ! -d "$VENV_DIR" ]; then + $PYTHON_CMD -m venv "$VENV_DIR" +fi + +# Activate the virtual environment +source "$VENV_DIR/bin/activate" + +# Install required packages +echo "šŸ“¦ Installing CUA packages..." +pip install -U pip +pip install cua-computer cua-agent[all] + +# Setup environment for MCP server +echo "šŸ”§ Setting up MCP server..." + +# Create a simple demo script +DEMO_DIR="$HOME/.cua-demo" +mkdir -p "$DEMO_DIR" + +cat > "$DEMO_DIR/run_demo.py" << 'EOF' +import asyncio +import os +from computer import Computer +from agent import ComputerAgent, LLM, AgentLoop, LLMProvider +from agent.ui.gradio.app import create_gradio_ui + +# Try to load API keys from environment +api_key = os.environ.get("OPENAI_API_KEY", "") +if not api_key: + print("\nāš ļø No OpenAI API key found. You'll need to provide one in the UI.") + +# Launch the Gradio UI +app = create_gradio_ui() +app.launch(share=False) +EOF + +# Create a convenience script to run the demo +cat > "$DEMO_DIR/start_demo.sh" << EOF +#!/bin/bash +source "$VENV_DIR/bin/activate" +cd "$DEMO_DIR" +python run_demo.py +EOF +chmod +x "$DEMO_DIR/start_demo.sh" + +# Create a script to run the MCP server with the correct PYTHONPATH +cat > "$DEMO_DIR/start_mcp_server.sh" << EOF +#!/bin/bash +source "$VENV_DIR/bin/activate" + +# Set PYTHONPATH to include all necessary libraries +export PYTHONPATH="$PYTHONPATH:$(pip show cua-computer-server | grep Location | cut -d' ' -f2)" + +# Run the MCP server using the Python module approach +python -m computer_server.mcp_server +EOF +chmod +x "$DEMO_DIR/start_mcp_server.sh" + +# Create a desktop shortcut for the demo +cat > "$HOME/Desktop/CUA Playground.command" << EOF +#!/bin/bash +"$DEMO_DIR/start_demo.sh" +EOF +chmod +x "$HOME/Desktop/CUA Playground.command" + +echo "āœ… Setup complete!" +echo "šŸ–„ļø You can start the CUA playground by running: $DEMO_DIR/start_demo.sh" +echo "šŸ–±ļø Or double-click the 'CUA Playground' shortcut on your desktop" +echo "šŸ¤– To run the MCP server: $DEMO_DIR/start_mcp_server.sh" + +# Ask if the user wants to start the demo now +read -p "Would you like to start the CUA playground now? (y/n) " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]]; then + "$DEMO_DIR/start_demo.sh" +fi