diff --git a/libs/python/agent2/README.md b/libs/python/agent2/README.md index b76bb673..0c5595e1 100644 --- a/libs/python/agent2/README.md +++ b/libs/python/agent2/README.md @@ -147,12 +147,12 @@ agent = ComputerAgent( ## Callbacks System -Agent2 provides a comprehensive callback system for extending functionality: +agent provides a comprehensive callback system for extending functionality: ### Built-in Callbacks ```python -from agent2.callbacks import ( +from agent.callbacks import ( ImageRetentionCallback, TrajectorySaverCallback, BudgetManagerCallback, @@ -174,7 +174,7 @@ agent = ComputerAgent( ### Custom Callbacks ```python -from agent2.callbacks.base import AsyncCallbackHandler +from agent.callbacks.base import AsyncCallbackHandler class CustomCallback(AsyncCallbackHandler): async def on_llm_start(self, messages): diff --git a/libs/python/agent2/agent2/__init__.py b/libs/python/agent2/agent/__init__.py similarity index 80% rename from libs/python/agent2/agent2/__init__.py rename to libs/python/agent2/agent/__init__.py index 7125beb3..9515a9bb 100644 --- a/libs/python/agent2/agent2/__init__.py +++ b/libs/python/agent2/agent/__init__.py @@ -1,5 +1,5 @@ """ -Agent2 - Decorator-based Computer Use Agent with liteLLM integration +agent - Decorator-based Computer Use Agent with liteLLM integration """ from .decorators import agent_loop diff --git a/libs/python/agent2/agent/__main__.py b/libs/python/agent2/agent/__main__.py new file mode 100644 index 00000000..1b4d6697 --- /dev/null +++ b/libs/python/agent2/agent/__main__.py @@ -0,0 +1,21 @@ +""" +Entry point for running agent CLI module. + +Usage: + python -m agent.cli +""" + +import sys +import asyncio +from .cli import main + +if __name__ == "__main__": + # Check if 'cli' is specified as the module + if len(sys.argv) > 1 and sys.argv[1] == "cli": + # Remove 'cli' from arguments and run CLI + sys.argv.pop(1) + asyncio.run(main()) + else: + print("Usage: python -m agent.cli ") + print("Example: python -m agent.cli openai/computer-use-preview") + sys.exit(1) diff --git a/libs/python/agent2/agent2/adapters/__init__.py b/libs/python/agent2/agent/adapters/__init__.py similarity index 65% rename from libs/python/agent2/agent2/adapters/__init__.py rename to libs/python/agent2/agent/adapters/__init__.py index c16120b6..2d9abbe3 100644 --- a/libs/python/agent2/agent2/adapters/__init__.py +++ b/libs/python/agent2/agent/adapters/__init__.py @@ -1,5 +1,5 @@ """ -Adapters package for agent2 - Custom LLM adapters for LiteLLM +Adapters package for agent - Custom LLM adapters for LiteLLM """ from .huggingfacelocal_adapter import HuggingFaceLocalAdapter diff --git a/libs/python/agent2/agent2/adapters/huggingfacelocal_adapter.py b/libs/python/agent2/agent/adapters/huggingfacelocal_adapter.py similarity index 100% rename from libs/python/agent2/agent2/adapters/huggingfacelocal_adapter.py rename to libs/python/agent2/agent/adapters/huggingfacelocal_adapter.py diff --git a/libs/python/agent2/agent2/agent.py b/libs/python/agent2/agent/agent.py similarity index 96% rename from libs/python/agent2/agent2/agent.py rename to libs/python/agent2/agent/agent.py index e14669a7..ba86a632 100644 --- a/libs/python/agent2/agent2/agent.py +++ b/libs/python/agent2/agent/agent.py @@ -192,7 +192,7 @@ class ComputerAgent: ] # == Initialize computer agent == - + # Find the appropriate agent loop if custom_loop: self.agent_loop = custom_loop @@ -204,16 +204,26 @@ class ComputerAgent: self.agent_loop = loop_info.func self.agent_loop_info = loop_info - # Process tools and create tool schemas - self.tool_schemas = self._process_tools() + self.tool_schemas = [] + self.computer_handler = None - # Find computer tool and create interface adapter - computer_handler = None - for schema in self.tool_schemas: - if schema["type"] == "computer": - computer_handler = OpenAIComputerHandler(schema["computer"].interface) - break - self.computer_handler = computer_handler + async def _initialize_computers(self): + """Initialize computer objects""" + if not self.tool_schemas: + for tool in self.tools: + if hasattr(tool, '_initialized') and not tool._initialized: + await tool.run() + + # Process tools and create tool schemas + self.tool_schemas = self._process_tools() + + # Find computer tool and create interface adapter + computer_handler = None + for schema in self.tool_schemas: + if schema["type"] == "computer": + computer_handler = OpenAIComputerHandler(schema["computer"].interface) + break + self.computer_handler = computer_handler def _process_input(self, input: Messages) -> List[Dict[str, Any]]: """Process input messages and create schemas for the agent loop""" @@ -484,6 +494,9 @@ class ComputerAgent: Returns: AsyncGenerator that yields response chunks """ + + await self._initialize_computers() + # Merge kwargs merged_kwargs = {**self.kwargs, **kwargs} diff --git a/libs/python/agent2/agent2/callbacks/__init__.py b/libs/python/agent2/agent/callbacks/__init__.py similarity index 100% rename from libs/python/agent2/agent2/callbacks/__init__.py rename to libs/python/agent2/agent/callbacks/__init__.py diff --git a/libs/python/agent2/agent2/callbacks/base.py b/libs/python/agent2/agent/callbacks/base.py similarity index 100% rename from libs/python/agent2/agent2/callbacks/base.py rename to libs/python/agent2/agent/callbacks/base.py diff --git a/libs/python/agent2/agent2/callbacks/budget_manager.py b/libs/python/agent2/agent/callbacks/budget_manager.py similarity index 100% rename from libs/python/agent2/agent2/callbacks/budget_manager.py rename to libs/python/agent2/agent/callbacks/budget_manager.py diff --git a/libs/python/agent2/agent2/callbacks/image_retention.py b/libs/python/agent2/agent/callbacks/image_retention.py similarity index 100% rename from libs/python/agent2/agent2/callbacks/image_retention.py rename to libs/python/agent2/agent/callbacks/image_retention.py diff --git a/libs/python/agent2/agent2/callbacks/logging.py b/libs/python/agent2/agent/callbacks/logging.py similarity index 99% rename from libs/python/agent2/agent2/callbacks/logging.py rename to libs/python/agent2/agent/callbacks/logging.py index 5a64fd16..af171925 100644 --- a/libs/python/agent2/agent2/callbacks/logging.py +++ b/libs/python/agent2/agent/callbacks/logging.py @@ -54,10 +54,10 @@ class LoggingCallback(AsyncCallbackHandler): Initialize the logging callback. Args: - logger: Logger instance to use. If None, creates a logger named 'agent2.ComputerAgent' + logger: Logger instance to use. If None, creates a logger named 'agent.ComputerAgent' level: Logging level (logging.DEBUG, logging.INFO, etc.) """ - self.logger = logger or logging.getLogger('agent2.ComputerAgent') + self.logger = logger or logging.getLogger('agent.ComputerAgent') self.level = level # Set up logger if it doesn't have handlers diff --git a/libs/python/agent2/agent2/callbacks/pii_anonymization.py b/libs/python/agent2/agent/callbacks/pii_anonymization.py similarity index 100% rename from libs/python/agent2/agent2/callbacks/pii_anonymization.py rename to libs/python/agent2/agent/callbacks/pii_anonymization.py diff --git a/libs/python/agent2/agent2/callbacks/trajectory_saver.py b/libs/python/agent2/agent/callbacks/trajectory_saver.py similarity index 100% rename from libs/python/agent2/agent2/callbacks/trajectory_saver.py rename to libs/python/agent2/agent/callbacks/trajectory_saver.py diff --git a/libs/python/agent2/agent/cli.py b/libs/python/agent2/agent/cli.py new file mode 100644 index 00000000..1f029ba7 --- /dev/null +++ b/libs/python/agent2/agent/cli.py @@ -0,0 +1,290 @@ +""" +CLI chat interface for agent - Computer Use Agent + +Usage: + python -m agent.cli + +Examples: + python -m agent.cli openai/computer-use-preview + python -m agent.cli anthropic/claude-3-5-sonnet-20241022 + python -m agent.cli omniparser+anthropic/claude-3-5-sonnet-20241022 +""" + +import asyncio +import argparse +import os +import sys +import json +from typing import List, Dict, Any +import dotenv +from yaspin import yaspin + +# Load environment variables +dotenv.load_dotenv() + +# Color codes for terminal output +class Colors: + RESET = '\033[0m' + BOLD = '\033[1m' + DIM = '\033[2m' + + # Text colors + RED = '\033[31m' + GREEN = '\033[32m' + YELLOW = '\033[33m' + BLUE = '\033[34m' + MAGENTA = '\033[35m' + CYAN = '\033[36m' + WHITE = '\033[37m' + GRAY = '\033[90m' + + # Background colors + BG_RED = '\033[41m' + BG_GREEN = '\033[42m' + BG_YELLOW = '\033[43m' + BG_BLUE = '\033[44m' + + +def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = False, end: str = "\n"): + """Print colored text to terminal.""" + prefix = "" + if bold: + prefix += Colors.BOLD + if dim: + prefix += Colors.DIM + if color: + prefix += color + + print(f"{prefix}{text}{Colors.RESET}", end=end) + + +def print_action(action_type: str, details: Dict[str, Any]): + """Print computer action with nice formatting.""" + # Format action details + args_str = "" + if action_type == "click" and "x" in details and "y" in details: + args_str = f"({details['x']}, {details['y']})" + elif action_type == "type" and "text" in details: + text = details["text"] + if len(text) > 50: + text = text[:47] + "..." + args_str = f'"{text}"' + elif action_type == "key" and "key" in details: + args_str = f"'{details['key']}'" + elif action_type == "scroll" and "x" in details and "y" in details: + args_str = f"({details['x']}, {details['y']})" + + print_colored(f"šŸ› ļø {action_type}{args_str}", dim=True) + + +def print_welcome(model: str, agent_loop: str, container_name: str): + """Print welcome message.""" + print_colored(f"Connected to {container_name} ({model}, {agent_loop})") + print_colored("Type 'exit' to quit.", dim=True) + +async def ainput(prompt: str = ""): + return await asyncio.to_thread(input, prompt) + +async def chat_loop(agent, model: str, container_name: str): + """Main chat loop with the agent.""" + print_welcome(model, agent.agent_loop.__name__, container_name) + + history = [] + + while True: + # Get user input with prompt + print_colored("> ", end="") + user_input = await ainput() + + if user_input.lower() in ['exit', 'quit', 'q']: + print_colored("\nšŸ‘‹ Goodbye!") + break + + if not user_input: + continue + + # Add user message to history + history.append({"role": "user", "content": user_input}) + + # Stream responses from the agent with spinner + with yaspin(text="Thinking...", spinner="line", attrs=["dark"]) as spinner: + spinner.hide() + + async for result in agent.run(history): + # Add agent responses to history + history.extend(result.get("output", [])) + + # Process and display the output + for item in result.get("output", []): + if item.get("type") == "message": + # Display agent text response + content = item.get("content", []) + for content_part in content: + if content_part.get("text"): + text = content_part.get("text", "").strip() + if text: + spinner.hide() + print_colored(text) + + elif item.get("type") == "computer_call": + # Display computer action + action = item.get("action", {}) + action_type = action.get("type", "") + if action_type: + spinner.hide() + print_action(action_type, action) + spinner.text = f"Performing {action_type}..." + spinner.show() + + elif item.get("type") == "function_call": + # Display function call + function_name = item.get("name", "") + spinner.hide() + print_colored(f"šŸ”§ Calling function: {function_name}", dim=True) + spinner.text = f"Calling {function_name}..." + spinner.show() + + elif item.get("type") == "function_call_output": + # Display function output (dimmed) + output = item.get("output", "") + if output and len(output.strip()) > 0: + spinner.hide() + print_colored(f"šŸ“¤ {output}", dim=True) + + spinner.hide() + + +async def main(): + """Main CLI function.""" + parser = argparse.ArgumentParser( + description="CUA Agent CLI - Interactive computer use assistant", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python -m agent.cli openai/computer-use-preview + python -m agent.cli anthropic/claude-3-5-sonnet-20241022 + python -m agent.cli omniparser+anthropic/claude-3-5-sonnet-20241022 + python -m agent.cli huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B + """ + ) + + parser.add_argument( + "model", + help="Model string (e.g., 'openai/computer-use-preview', 'anthropic/claude-3-5-sonnet-20241022')" + ) + + parser.add_argument( + "--images", + type=int, + default=3, + help="Number of recent images to keep in context (default: 3)" + ) + + parser.add_argument( + "--trajectory", + action="store_true", + help="Save trajectory for debugging" + ) + + parser.add_argument( + "--budget", + type=float, + help="Maximum budget for the session (in dollars)" + ) + + parser.add_argument( + "--verbose", + action="store_true", + help="Enable verbose logging" + ) + + args = parser.parse_args() + + # Check for required environment variables + container_name = os.getenv("CUA_CONTAINER_NAME") + cua_api_key = os.getenv("CUA_API_KEY") + + # Prompt for missing environment variables + if not container_name: + print_colored("CUA_CONTAINER_NAME not set.", dim=True) + print_colored("You can get a CUA container at https://www.trycua.com/", dim=True) + container_name = input("Enter your CUA container name: ").strip() + if not container_name: + print_colored("āŒ Container name is required.") + sys.exit(1) + + if not cua_api_key: + print_colored("CUA_API_KEY not set.", dim=True) + cua_api_key = input("Enter your CUA API key: ").strip() + if not cua_api_key: + print_colored("āŒ API key is required.") + sys.exit(1) + + # Check for provider-specific API keys based on model + provider_api_keys = { + "openai/": "OPENAI_API_KEY", + "anthropic/": "ANTHROPIC_API_KEY", + "omniparser+": "OPENAI_API_KEY", + "omniparser+": "ANTHROPIC_API_KEY", + } + + # Find matching provider and check for API key + for prefix, env_var in provider_api_keys.items(): + if args.model.startswith(prefix): + if not os.getenv(env_var): + print_colored(f"{env_var} not set.", dim=True) + api_key = input(f"Enter your {env_var.replace('_', ' ').title()}: ").strip() + if not api_key: + print_colored(f"āŒ {env_var.replace('_', ' ').title()} is required.") + sys.exit(1) + # Set the environment variable for the session + os.environ[env_var] = api_key + break + + # Import here to avoid import errors if dependencies are missing + try: + from agent import ComputerAgent + from computer import Computer + except ImportError as e: + print_colored(f"āŒ Import error: {e}", Colors.RED, bold=True) + print_colored("Make sure agent and computer libraries are installed.", Colors.YELLOW) + sys.exit(1) + + # Create computer instance + async with Computer( + os_type="linux", + provider_type="cloud", + name=container_name, + api_key=cua_api_key + ) as computer: + + # Create agent + agent_kwargs = { + "model": args.model, + "tools": [computer], + "only_n_most_recent_images": args.images, + "verbosity": 20 if args.verbose else 30, # DEBUG vs WARNING + } + + if args.trajectory: + agent_kwargs["trajectory_dir"] = "trajectories" + + if args.budget: + agent_kwargs["max_trajectory_budget"] = { + "max_budget": args.budget, + "raise_error": True, + "reset_after_each_run": False + } + + agent = ComputerAgent(**agent_kwargs) + + # Start chat loop + await chat_loop(agent, args.model, container_name) + + + +if __name__ == "__main__": + try: + asyncio.run(main()) + except (KeyboardInterrupt, EOFError) as _: + print_colored("\n\nšŸ‘‹ Goodbye!") \ No newline at end of file diff --git a/libs/python/agent2/agent2/computer_handler.py b/libs/python/agent2/agent/computer_handler.py similarity index 100% rename from libs/python/agent2/agent2/computer_handler.py rename to libs/python/agent2/agent/computer_handler.py diff --git a/libs/python/agent2/agent2/decorators.py b/libs/python/agent2/agent/decorators.py similarity index 98% rename from libs/python/agent2/agent2/decorators.py rename to libs/python/agent2/agent/decorators.py index 102d26fb..0b31c25a 100644 --- a/libs/python/agent2/agent2/decorators.py +++ b/libs/python/agent2/agent/decorators.py @@ -1,5 +1,5 @@ """ -Decorators for agent2 - agent_loop decorator +Decorators for agent - agent_loop decorator """ import asyncio diff --git a/libs/python/agent2/agent2/loops/__init__.py b/libs/python/agent2/agent/loops/__init__.py similarity index 89% rename from libs/python/agent2/agent2/loops/__init__.py rename to libs/python/agent2/agent/loops/__init__.py index c02cdb5d..aa159411 100644 --- a/libs/python/agent2/agent2/loops/__init__.py +++ b/libs/python/agent2/agent/loops/__init__.py @@ -1,5 +1,5 @@ """ -Agent loops for agent2 +Agent loops for agent """ # Import the loops to register them diff --git a/libs/python/agent2/agent2/loops/anthropic.py b/libs/python/agent2/agent/loops/anthropic.py similarity index 100% rename from libs/python/agent2/agent2/loops/anthropic.py rename to libs/python/agent2/agent/loops/anthropic.py diff --git a/libs/python/agent2/agent2/loops/omniparser.py b/libs/python/agent2/agent/loops/omniparser.py similarity index 100% rename from libs/python/agent2/agent2/loops/omniparser.py rename to libs/python/agent2/agent/loops/omniparser.py diff --git a/libs/python/agent2/agent2/loops/openai.py b/libs/python/agent2/agent/loops/openai.py similarity index 100% rename from libs/python/agent2/agent2/loops/openai.py rename to libs/python/agent2/agent/loops/openai.py diff --git a/libs/python/agent2/agent2/loops/uitars.py b/libs/python/agent2/agent/loops/uitars.py similarity index 100% rename from libs/python/agent2/agent2/loops/uitars.py rename to libs/python/agent2/agent/loops/uitars.py diff --git a/libs/python/agent2/agent2/responses.py b/libs/python/agent2/agent/responses.py similarity index 100% rename from libs/python/agent2/agent2/responses.py rename to libs/python/agent2/agent/responses.py diff --git a/libs/python/agent2/agent2/types.py b/libs/python/agent2/agent/types.py similarity index 98% rename from libs/python/agent2/agent2/types.py rename to libs/python/agent2/agent/types.py index 2999fad1..2b07a6cf 100644 --- a/libs/python/agent2/agent2/types.py +++ b/libs/python/agent2/agent/types.py @@ -1,5 +1,5 @@ """ -Type definitions for agent2 +Type definitions for agent """ from typing import Dict, List, Any, Optional, Callable, Protocol, Literal diff --git a/libs/python/agent2/agent/ui/__init__.py b/libs/python/agent2/agent/ui/__init__.py new file mode 100644 index 00000000..1654ce93 --- /dev/null +++ b/libs/python/agent2/agent/ui/__init__.py @@ -0,0 +1,7 @@ +""" +UI components for agent +""" + +from .gradio import test_cua, create_gradio_ui + +__all__ = ["test_cua", "create_gradio_ui"] diff --git a/libs/python/agent2/agent/ui/gradio/__init__.py b/libs/python/agent2/agent/ui/gradio/__init__.py new file mode 100644 index 00000000..ce6cd3b8 --- /dev/null +++ b/libs/python/agent2/agent/ui/gradio/__init__.py @@ -0,0 +1,8 @@ +""" +Gradio UI for agent +""" + +from .app import test_cua +from .ui_components import create_gradio_ui + +__all__ = ["test_cua", "create_gradio_ui"] diff --git a/libs/python/agent2/agent/ui/gradio/app.py b/libs/python/agent2/agent/ui/gradio/app.py new file mode 100644 index 00000000..d5c8a7f3 --- /dev/null +++ b/libs/python/agent2/agent/ui/gradio/app.py @@ -0,0 +1,248 @@ +""" +Advanced Gradio UI for Computer-Use Agent (cua-agent) + +This is a Gradio interface for the Computer-Use Agent v0.4.x (cua-agent) +with an advanced UI for model selection and configuration. + +Supported Agent Models: +- OpenAI: openai/computer-use-preview +- Anthropic: anthropic/claude-3-5-sonnet-20241022, anthropic/claude-3-7-sonnet-20250219 +- UI-TARS: huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B +- Omniparser: omniparser+anthropic/claude-3-5-sonnet-20241022, omniparser+ollama_chat/gemma3 + +Requirements: + - Mac with Apple Silicon (M1/M2/M3/M4), Linux, or Windows + - macOS 14 (Sonoma) or newer / Ubuntu 20.04+ + - Python 3.11+ + - Lume CLI installed (https://github.com/trycua/cua) + - OpenAI or Anthropic API key +""" + +import os +import asyncio +import logging +import json +import platform +from pathlib import Path +from typing import Dict, List, Optional, AsyncGenerator, Any, Tuple, Union +import gradio as gr +from gradio.components.chatbot import MetadataDict +from typing import cast + +# Import from agent package +from agent import ComputerAgent +from agent.types import Messages, AgentResponse +from computer import Computer + +# Global variables +global_agent = None +global_computer = None +SETTINGS_FILE = Path(".gradio_settings.json") + + +import dotenv +if dotenv.load_dotenv(): + print(f"DEBUG - Loaded environment variables from {dotenv.find_dotenv()}") +else: + print("DEBUG - No .env file found") + +# --- Settings Load/Save Functions --- +def load_settings() -> Dict[str, Any]: + """Loads settings from the JSON file.""" + if SETTINGS_FILE.exists(): + try: + with open(SETTINGS_FILE, "r") as f: + settings = json.load(f) + if isinstance(settings, dict): + print(f"DEBUG - Loaded settings from {SETTINGS_FILE}") + return settings + except (json.JSONDecodeError, IOError) as e: + print(f"Warning: Could not load settings from {SETTINGS_FILE}: {e}") + return {} + + +def save_settings(settings: Dict[str, Any]): + """Saves settings to the JSON file.""" + settings.pop("provider_api_key", None) + try: + with open(SETTINGS_FILE, "w") as f: + json.dump(settings, f, indent=4) + print(f"DEBUG - Saved settings to {SETTINGS_FILE}") + except IOError as e: + print(f"Warning: Could not save settings to {SETTINGS_FILE}: {e}") + + +# Custom Screenshot Handler for Gradio chat +class GradioChatScreenshotHandler: + """Custom handler that adds screenshots to the Gradio chatbot.""" + + def __init__(self, chatbot_history: List[gr.ChatMessage]): + self.chatbot_history = chatbot_history + print("GradioChatScreenshotHandler initialized") + + async def on_screenshot(self, screenshot_base64: str, action_type: str = "") -> None: + """Add screenshot to chatbot when a screenshot is taken.""" + image_markdown = f"![Screenshot after {action_type}](data:image/png;base64,{screenshot_base64})" + + if self.chatbot_history is not None: + self.chatbot_history.append( + gr.ChatMessage( + role="assistant", + content=image_markdown, + metadata={"title": f"šŸ–„ļø Screenshot - {action_type}", "status": "done"}, + ) + ) + + +# Detect platform capabilities +is_mac = platform.system().lower() == "darwin" +is_lume_available = is_mac or (os.environ.get("PYLUME_HOST", "localhost") != "localhost") + +print("PYLUME_HOST: ", os.environ.get("PYLUME_HOST", "localhost")) +print("is_mac: ", is_mac) +print("Lume available: ", is_lume_available) + +# Map model names to agent model strings +MODEL_MAPPINGS = { + "openai": { + "default": "openai/computer-use-preview", + "OpenAI: Computer-Use Preview": "openai/computer-use-preview", + }, + "anthropic": { + "default": "anthropic/claude-3-7-sonnet-20250219", + "Anthropic: Claude 4 Opus (20250514)": "anthropic/claude-opus-4-20250514", + "Anthropic: Claude 4 Sonnet (20250514)": "anthropic/claude-sonnet-4-20250514", + "Anthropic: Claude 3.7 Sonnet (20250219)": "anthropic/claude-3-7-sonnet-20250219", + "Anthropic: Claude 3.5 Sonnet (20240620)": "anthropic/claude-3-5-sonnet-20240620", + }, + "omni": { + "default": "omniparser+openai/gpt-4o", + "OMNI: OpenAI GPT-4o": "omniparser+openai/gpt-4o", + "OMNI: OpenAI GPT-4o mini": "omniparser+openai/gpt-4o-mini", + "OMNI: Claude 3.7 Sonnet (20250219)": "omniparser+anthropic/claude-3-7-sonnet-20250219", + "OMNI: Claude 3.5 Sonnet (20240620)": "omniparser+anthropic/claude-3-5-sonnet-20240620", + }, + "uitars": { + "default": "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B" if is_mac else "ui-tars", + "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B": "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", + }, +} + + +def get_model_string(model_name: str, loop_provider: str) -> str: + """Determine the agent model string based on the input.""" + if model_name == "Custom model (OpenAI compatible API)": + return "custom_oaicompat" + elif model_name == "Custom model (ollama)": + return "custom_ollama" + elif loop_provider == "OMNI-OLLAMA" or model_name.startswith("OMNI: Ollama "): + if model_name.startswith("OMNI: Ollama "): + ollama_model = model_name.split("OMNI: Ollama ", 1)[1] + return f"omniparser+ollama_chat/{ollama_model}" + return "omniparser+ollama_chat/llama3" + + # Map based on loop provider + mapping = MODEL_MAPPINGS.get(loop_provider.lower(), MODEL_MAPPINGS["openai"]) + return mapping.get(model_name, mapping["default"]) + + +def get_ollama_models() -> List[str]: + """Get available models from Ollama if installed.""" + try: + import subprocess + result = subprocess.run(["ollama", "list"], capture_output=True, text=True) + if result.returncode == 0: + lines = result.stdout.strip().split("\n") + if len(lines) < 2: + return [] + models = [] + for line in lines[1:]: + parts = line.split() + if parts: + model_name = parts[0] + models.append(f"OMNI: Ollama {model_name}") + return models + return [] + except Exception as e: + logging.error(f"Error getting Ollama models: {e}") + return [] + + +def create_computer_instance( + verbosity: int = logging.INFO, + os_type: str = "macos", + provider_type: str = "lume", + name: Optional[str] = None, + api_key: Optional[str] = None +) -> Computer: + """Create or get the global Computer instance.""" + global global_computer + if global_computer is None: + global_computer = Computer( + verbosity=verbosity, + os_type=os_type, + provider_type=provider_type, + name=name if name else "", + api_key=api_key + ) + return global_computer + + +def create_agent( + model_string: str, + save_trajectory: bool = True, + only_n_most_recent_images: int = 3, + verbosity: int = logging.INFO, + custom_model_name: Optional[str] = None, + computer_os: str = "macos", + computer_provider: str = "lume", + computer_name: Optional[str] = None, + computer_api_key: Optional[str] = None, + max_trajectory_budget: Optional[float] = None, +) -> ComputerAgent: + """Create or update the global agent with the specified parameters.""" + global global_agent + + # Create the computer + computer = create_computer_instance( + verbosity=verbosity, + os_type=computer_os, + provider_type=computer_provider, + name=computer_name, + api_key=computer_api_key + ) + + # Handle custom models + if model_string == "custom_oaicompat" and custom_model_name: + model_string = custom_model_name + elif model_string == "custom_ollama" and custom_model_name: + model_string = f"omniparser+ollama_chat/{custom_model_name}" + + # Create agent kwargs + agent_kwargs = { + "model": model_string, + "tools": [computer], + "only_n_most_recent_images": only_n_most_recent_images, + "verbosity": verbosity, + } + + if save_trajectory: + agent_kwargs["trajectory_dir"] = "trajectories" + + if max_trajectory_budget: + agent_kwargs["max_trajectory_budget"] = {"max_budget": max_trajectory_budget, "raise_error": True} + + global_agent = ComputerAgent(**agent_kwargs) + return global_agent + + +def test_cua(): + """Standalone function to launch the Gradio app.""" + from agent.ui.gradio.ui_components import create_gradio_ui + print(f"Starting Gradio app for CUA Agent...") + demo = create_gradio_ui() + demo.launch(share=False, inbrowser=True) + + +if __name__ == "__main__": + test_cua() diff --git a/libs/python/agent2/agent/ui/gradio/ui_components.py b/libs/python/agent2/agent/ui/gradio/ui_components.py new file mode 100644 index 00000000..13b7c833 --- /dev/null +++ b/libs/python/agent2/agent/ui/gradio/ui_components.py @@ -0,0 +1,703 @@ +""" +UI Components for the Gradio interface +""" + +import os +import asyncio +import logging +import json +import platform +from pathlib import Path +from typing import Dict, List, Optional, Any, cast +import gradio as gr +from gradio.components.chatbot import MetadataDict + +from .app import ( + load_settings, save_settings, create_agent, get_model_string, + get_ollama_models, GradioChatScreenshotHandler, global_agent, global_computer +) + + +def create_gradio_ui() -> gr.Blocks: + """Create a Gradio UI for the Computer-Use Agent.""" + + # Load settings + saved_settings = load_settings() + + # Check for API keys + openai_api_key = os.environ.get("OPENAI_API_KEY", "") + anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY", "") + cua_api_key = os.environ.get("CUA_API_KEY", "") + + # Model choices + openai_models = ["OpenAI: Computer-Use Preview"] + anthropic_models = [ + "Anthropic: Claude 4 Opus (20250514)", + "Anthropic: Claude 4 Sonnet (20250514)", + "Anthropic: Claude 3.7 Sonnet (20250219)", + "Anthropic: Claude 3.5 Sonnet (20240620)", + ] + omni_models = [ + "OMNI: OpenAI GPT-4o", + "OMNI: OpenAI GPT-4o mini", + "OMNI: Claude 3.7 Sonnet (20250219)", + "OMNI: Claude 3.5 Sonnet (20240620)" + ] + + # Check if API keys are available + has_openai_key = bool(openai_api_key) + has_anthropic_key = bool(anthropic_api_key) + has_cua_key = bool(cua_api_key) + + # Get Ollama models for OMNI + ollama_models = get_ollama_models() + if ollama_models: + omni_models += ollama_models + + # Detect platform + is_mac = platform.system().lower() == "darwin" + + # Format model choices + provider_to_models = { + "OPENAI": openai_models, + "ANTHROPIC": anthropic_models, + "OMNI": omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"], + "UITARS": ([ + "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", + ] if is_mac else []) + ["Custom model (OpenAI compatible API)"], + } + + # Apply saved settings + initial_loop = saved_settings.get("agent_loop", "OMNI") + available_models_for_loop = provider_to_models.get(initial_loop, []) + saved_model_choice = saved_settings.get("model_choice") + if saved_model_choice and saved_model_choice in available_models_for_loop: + initial_model = saved_model_choice + else: + if initial_loop == "OPENAI": + initial_model = openai_models[0] if openai_models else "No models available" + elif initial_loop == "ANTHROPIC": + initial_model = anthropic_models[0] if anthropic_models else "No models available" + else: # OMNI + initial_model = omni_models[0] if omni_models else "Custom model (OpenAI compatible API)" + + initial_custom_model = saved_settings.get("custom_model", "Qwen2.5-VL-7B-Instruct") + initial_provider_base_url = saved_settings.get("provider_base_url", "http://localhost:1234/v1") + initial_save_trajectory = saved_settings.get("save_trajectory", True) + initial_recent_images = saved_settings.get("recent_images", 3) + + # Example prompts + example_messages = [ + "Create a Python virtual environment, install pandas and matplotlib, then plot stock data", + "Open a PDF in Preview, add annotations, and save it as a compressed version", + "Open Safari, search for 'macOS automation tools', and save the first three results as bookmarks", + "Configure SSH keys and set up a connection to a remote server", + ] + + def generate_python_code(agent_loop_choice, model_name, tasks, recent_images=3, save_trajectory=True, computer_os="linux", computer_provider="cloud", container_name="", cua_cloud_api_key="", max_budget=None): + """Generate Python code for the current configuration and tasks.""" + tasks_str = "" + for task in tasks: + if task and task.strip(): + tasks_str += f' "{task}",\n' + + model_string = get_model_string(model_name, agent_loop_choice) + + computer_args = [] + if computer_os != "macos": + computer_args.append(f'os_type="{computer_os}"') + if computer_provider != "lume": + computer_args.append(f'provider_type="{computer_provider}"') + if container_name: + computer_args.append(f'name="{container_name}"') + if cua_cloud_api_key: + computer_args.append(f'api_key="{cua_cloud_api_key}"') + + computer_args_str = ", ".join(computer_args) + if computer_args_str: + computer_args_str = f"({computer_args_str})" + else: + computer_args_str = "()" + + code = f'''import asyncio +from computer import Computer +from agent import ComputerAgent + +async def main(): + async with Computer{computer_args_str} as computer: + agent = ComputerAgent( + model="{model_string}", + tools=[computer], + only_n_most_recent_images={recent_images},''' + + if save_trajectory: + code += ''' + trajectory_dir="trajectories",''' + + if max_budget: + code += f''' + max_trajectory_budget={{"max_budget": {max_budget}, "raise_error": True}},''' + + code += ''' + ) + ''' + + if tasks_str: + code += f''' + # Prompts for the computer-use agent + tasks = [ +{tasks_str.rstrip()} + ] + + for task in tasks: + print(f"Executing task: {{task}}") + messages = [{{"role": "user", "content": task}}] + async for result in agent.run(messages): + for item in result["output"]: + if item["type"] == "message": + print(item["content"][0]["text"])''' + else: + code += f''' + # Execute a single task + task = "Search for information about CUA on GitHub" + print(f"Executing task: {{task}}") + messages = [{{"role": "user", "content": task}}] + async for result in agent.run(messages): + for item in result["output"]: + if item["type"] == "message": + print(item["content"][0]["text"])''' + + code += ''' + +if __name__ == "__main__": + asyncio.run(main())''' + + return code + + # Create the Gradio interface + with gr.Blocks(title="Computer-Use Agent") as demo: + with gr.Row(): + # Left column for settings + with gr.Column(scale=1): + # Logo + gr.HTML( + """ +
+ CUA Logo +
+ """ + ) + + # Python code accordion + with gr.Accordion("Python Code", open=False): + code_display = gr.Code( + language="python", + value=generate_python_code(initial_loop, "gpt-4o", []), + interactive=False, + ) + + with gr.Accordion("Computer Configuration", open=True): + computer_os = gr.Radio( + choices=["macos", "linux", "windows"], + label="Operating System", + value="macos", + info="Select the operating system for the computer", + ) + + is_windows = platform.system().lower() == "windows" + is_mac = platform.system().lower() == "darwin" + + providers = ["cloud"] + if is_mac: + providers += ["lume"] + if is_windows: + providers += ["winsandbox"] + + computer_provider = gr.Radio( + choices=providers, + label="Provider", + value="lume" if is_mac else "cloud", + info="Select the computer provider", + ) + + container_name = gr.Textbox( + label="Container Name", + placeholder="Enter container name (optional)", + value=os.environ.get("CUA_CONTAINER_NAME", ""), + info="Optional name for the container", + ) + + cua_cloud_api_key = gr.Textbox( + label="CUA Cloud API Key", + placeholder="Enter your CUA Cloud API key", + value=os.environ.get("CUA_API_KEY", ""), + type="password", + info="Required for cloud provider", + visible=(not has_cua_key) + ) + + with gr.Accordion("Agent Configuration", open=True): + agent_loop = gr.Dropdown( + choices=["OPENAI", "ANTHROPIC", "OMNI", "UITARS"], + label="Agent Loop", + value=initial_loop, + info="Select the agent loop provider", + ) + + # Model selection dropdowns + with gr.Group() as model_selection_group: + openai_model_choice = gr.Dropdown( + choices=openai_models, + label="OpenAI Model", + value=openai_models[0] if openai_models else "No models available", + info="Select OpenAI model", + interactive=True, + visible=(initial_loop == "OPENAI") + ) + + anthropic_model_choice = gr.Dropdown( + choices=anthropic_models, + label="Anthropic Model", + value=anthropic_models[0] if anthropic_models else "No models available", + info="Select Anthropic model", + interactive=True, + visible=(initial_loop == "ANTHROPIC") + ) + + omni_model_choice = gr.Dropdown( + choices=omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"], + label="OMNI Model", + value=omni_models[0] if omni_models else "Custom model (OpenAI compatible API)", + info="Select OMNI model or choose a custom model option", + interactive=True, + visible=(initial_loop == "OMNI") + ) + + uitars_model_choice = gr.Dropdown( + choices=provider_to_models.get("UITARS", ["No models available"]), + label="UITARS Model", + value=provider_to_models.get("UITARS", ["No models available"])[0] if provider_to_models.get("UITARS") else "No models available", + info="Select UITARS model", + interactive=True, + visible=(initial_loop == "UITARS") + ) + + model_choice = gr.Textbox(visible=False) + + # API key inputs + with gr.Group(visible=not has_openai_key and (initial_loop == "OPENAI" or initial_loop == "OMNI")) as openai_key_group: + openai_api_key_input = gr.Textbox( + label="OpenAI API Key", + placeholder="Enter your OpenAI API key", + value=os.environ.get("OPENAI_API_KEY", ""), + interactive=True, + type="password", + info="Required for OpenAI models" + ) + + with gr.Group(visible=not has_anthropic_key and (initial_loop == "ANTHROPIC" or initial_loop == "OMNI")) as anthropic_key_group: + anthropic_api_key_input = gr.Textbox( + label="Anthropic API Key", + placeholder="Enter your Anthropic API key", + value=os.environ.get("ANTHROPIC_API_KEY", ""), + interactive=True, + type="password", + info="Required for Anthropic models" + ) + + # API key handlers + def set_openai_api_key(key): + if key and key.strip(): + os.environ["OPENAI_API_KEY"] = key.strip() + print(f"DEBUG - Set OpenAI API key environment variable") + return key + + def set_anthropic_api_key(key): + if key and key.strip(): + os.environ["ANTHROPIC_API_KEY"] = key.strip() + print(f"DEBUG - Set Anthropic API key environment variable") + return key + + openai_api_key_input.change( + fn=set_openai_api_key, + inputs=[openai_api_key_input], + outputs=[openai_api_key_input], + queue=False + ) + + anthropic_api_key_input.change( + fn=set_anthropic_api_key, + inputs=[anthropic_api_key_input], + outputs=[anthropic_api_key_input], + queue=False + ) + + # UI update function + def update_ui(loop=None, openai_model=None, anthropic_model=None, omni_model=None, uitars_model=None): + loop = loop or agent_loop.value + + model_value = None + if loop == "OPENAI" and openai_model: + model_value = openai_model + elif loop == "ANTHROPIC" and anthropic_model: + model_value = anthropic_model + elif loop == "OMNI" and omni_model: + model_value = omni_model + elif loop == "UITARS" and uitars_model: + model_value = uitars_model + + openai_visible = (loop == "OPENAI") + anthropic_visible = (loop == "ANTHROPIC") + omni_visible = (loop == "OMNI") + uitars_visible = (loop == "UITARS") + + show_openai_key = not has_openai_key and (loop == "OPENAI" or (loop == "OMNI" and model_value and "OpenAI" in model_value and "Custom" not in model_value)) + show_anthropic_key = not has_anthropic_key and (loop == "ANTHROPIC" or (loop == "OMNI" and model_value and "Claude" in model_value and "Custom" not in model_value)) + + is_custom_openai_api = model_value == "Custom model (OpenAI compatible API)" + is_custom_ollama = model_value == "Custom model (ollama)" + is_any_custom = is_custom_openai_api or is_custom_ollama + + model_choice_value = model_value if model_value else "" + + return [ + gr.update(visible=openai_visible), + gr.update(visible=anthropic_visible), + gr.update(visible=omni_visible), + gr.update(visible=uitars_visible), + gr.update(visible=show_openai_key), + gr.update(visible=show_anthropic_key), + gr.update(visible=is_any_custom), + gr.update(visible=is_custom_openai_api), + gr.update(visible=is_custom_openai_api), + gr.update(value=model_choice_value) + ] + + # Custom model inputs + custom_model = gr.Textbox( + label="Custom Model Name", + placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct or llama3)", + value=initial_custom_model, + visible=(initial_model == "Custom model (OpenAI compatible API)" or initial_model == "Custom model (ollama)"), + interactive=True, + ) + + provider_base_url = gr.Textbox( + label="Provider Base URL", + placeholder="Enter provider base URL (e.g., http://localhost:1234/v1)", + value=initial_provider_base_url, + visible=(initial_model == "Custom model (OpenAI compatible API)"), + interactive=True, + ) + + provider_api_key = gr.Textbox( + label="Provider API Key", + placeholder="Enter provider API key (if required)", + value="", + visible=(initial_model == "Custom model (OpenAI compatible API)"), + interactive=True, + type="password", + ) + + # Connect UI update events + for dropdown in [agent_loop, omni_model_choice, uitars_model_choice, openai_model_choice, anthropic_model_choice]: + dropdown.change( + fn=update_ui, + inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice], + outputs=[ + openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice, + openai_key_group, anthropic_key_group, + custom_model, provider_base_url, provider_api_key, + model_choice + ], + queue=False + ) + + save_trajectory = gr.Checkbox( + label="Save Trajectory", + value=initial_save_trajectory, + info="Save the agent's trajectory for debugging", + interactive=True, + ) + + recent_images = gr.Slider( + label="Recent Images", + minimum=1, + maximum=10, + value=initial_recent_images, + step=1, + info="Number of recent images to keep in context", + interactive=True, + ) + + max_budget = gr.Number( + label="Max Budget ($)", + value=lambda: None, + minimum=-1, + maximum=100.0, + step=0.1, + info="Optional budget limit for trajectory (0 = no limit)", + interactive=True, + ) + + # Right column for chat interface + with gr.Column(scale=2): + gr.Markdown( + "Ask me to perform tasks in a virtual environment.
Built with github.com/trycua/cua." + ) + + chatbot_history = gr.Chatbot(type="messages") + msg = gr.Textbox( + placeholder="Ask me to perform tasks in a virtual environment" + ) + clear = gr.Button("Clear") + cancel_button = gr.Button("Cancel", variant="stop") + + # Add examples + example_group = gr.Examples(examples=example_messages, inputs=msg) + + # Chat submission function + def chat_submit(message, history): + history.append(gr.ChatMessage(role="user", content=message)) + return "", history + + # Cancel function + async def cancel_agent_task(history): + global global_agent + if global_agent: + print("DEBUG - Cancelling agent task") + history.append(gr.ChatMessage(role="assistant", content="Task cancelled by user", metadata={"title": "āŒ Cancelled"})) + else: + history.append(gr.ChatMessage(role="assistant", content="No active agent task to cancel", metadata={"title": "ā„¹ļø Info"})) + return history + + # Process response function + async def process_response( + history, + openai_model_value, + anthropic_model_value, + omni_model_value, + uitars_model_value, + custom_model_value, + agent_loop_choice, + save_traj, + recent_imgs, + custom_url_value=None, + custom_api_key=None, + openai_key_input=None, + anthropic_key_input=None, + computer_os="linux", + computer_provider="cloud", + container_name="", + cua_cloud_api_key="", + max_budget_value=None, + ): + if not history: + yield history + return + + # Get the last user message + last_user_message = history[-1]["content"] + + # Get the appropriate model value based on the agent loop + if agent_loop_choice == "OPENAI": + model_choice_value = openai_model_value + elif agent_loop_choice == "ANTHROPIC": + model_choice_value = anthropic_model_value + elif agent_loop_choice == "OMNI": + model_choice_value = omni_model_value + elif agent_loop_choice == "UITARS": + model_choice_value = uitars_model_value + else: + model_choice_value = "No models available" + + # Determine if this is a custom model selection + is_custom_model_selected = model_choice_value in ["Custom model (OpenAI compatible API)", "Custom model (ollama)"] + + # Determine the model name string to analyze + if is_custom_model_selected: + model_string_to_analyze = custom_model_value + else: + model_string_to_analyze = model_choice_value + + try: + # Get the model string + model_string = get_model_string(model_string_to_analyze, agent_loop_choice) + + # Set API keys if provided + if openai_key_input: + os.environ["OPENAI_API_KEY"] = openai_key_input + if anthropic_key_input: + os.environ["ANTHROPIC_API_KEY"] = anthropic_key_input + if cua_cloud_api_key: + os.environ["CUA_API_KEY"] = cua_cloud_api_key + + # Save settings + current_settings = { + "agent_loop": agent_loop_choice, + "model_choice": model_choice_value, + "custom_model": custom_model_value, + "provider_base_url": custom_url_value, + "save_trajectory": save_traj, + "recent_images": recent_imgs, + "computer_os": computer_os, + "computer_provider": computer_provider, + "container_name": container_name, + } + save_settings(current_settings) + + # Create agent + global_agent = create_agent( + model_string=model_string, + save_trajectory=save_traj, + only_n_most_recent_images=recent_imgs, + custom_model_name=custom_model_value if is_custom_model_selected else None, + computer_os=computer_os, + computer_provider=computer_provider, + computer_name=container_name, + computer_api_key=cua_cloud_api_key, + verbosity=logging.DEBUG, + max_trajectory_budget=max_budget_value if max_budget_value and max_budget_value > 0 else None, + ) + + if global_agent is None: + history.append( + gr.ChatMessage( + role="assistant", + content="Failed to create agent. Check API keys and configuration.", + ) + ) + yield history + return + + # Create message list for agent + messages = [{"role": "user", "content": last_user_message}] + + # Stream responses from the agent + async for result in global_agent.run(messages): + print(f"DEBUG - Agent response ------- START") + from pprint import pprint + pprint(result) + print(f"DEBUG - Agent response ------- END") + + # Process the result output + for item in result.get("output", []): + if item.get("type") == "message": + content = item.get("content", []) + for content_part in content: + if content_part.get("text"): + history.append(gr.ChatMessage( + role=item.get("role", "assistant"), + content=content_part.get("text", ""), + metadata=content_part.get("metadata", {}) + )) + elif item.get("type") == "computer_call": + action = item.get("action", {}) + action_type = action.get("type", "") + if action_type: + action_title = f"šŸ› ļø Performing {action_type}" + if action.get("x") and action.get("y"): + action_title += f" at ({action['x']}, {action['y']})" + history.append(gr.ChatMessage( + role="assistant", + content=f"```json\n{json.dumps(action)}\n```", + metadata={"title": action_title} + )) + elif item.get("type") == "function_call": + function_name = item.get("name", "") + arguments = item.get("arguments", "{}") + history.append(gr.ChatMessage( + role="assistant", + content=f"šŸ”§ Calling function: {function_name}\n```json\n{arguments}\n```", + metadata={"title": f"Function Call: {function_name}"} + )) + elif item.get("type") == "function_call_output": + output = item.get("output", "") + history.append(gr.ChatMessage( + role="assistant", + content=f"šŸ“¤ Function output:\n```\n{output}\n```", + metadata={"title": "Function Output"} + )) + + yield history + + except Exception as e: + import traceback + traceback.print_exc() + history.append(gr.ChatMessage(role="assistant", content=f"Error: {str(e)}")) + yield history + + # Connect the submit button + submit_event = msg.submit( + fn=chat_submit, + inputs=[msg, chatbot_history], + outputs=[msg, chatbot_history], + queue=False, + ).then( + fn=process_response, + inputs=[ + chatbot_history, + openai_model_choice, + anthropic_model_choice, + omni_model_choice, + uitars_model_choice, + custom_model, + agent_loop, + save_trajectory, + recent_images, + provider_base_url, + provider_api_key, + openai_api_key_input, + anthropic_api_key_input, + computer_os, + computer_provider, + container_name, + cua_cloud_api_key, + max_budget, + ], + outputs=[chatbot_history], + queue=True, + ) + + # Clear button functionality + clear.click(lambda: None, None, chatbot_history, queue=False) + + # Connect cancel button + cancel_button.click( + cancel_agent_task, + [chatbot_history], + [chatbot_history], + queue=False + ) + + # Code display update function + def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, recent_images_val, save_trajectory_val, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget_val): + messages = [] + if chat_history: + for msg in chat_history: + if isinstance(msg, dict) and msg.get("role") == "user": + messages.append(msg.get("content", "")) + + return generate_python_code( + agent_loop, + model_choice_val or custom_model_val or "gpt-4o", + messages, + recent_images_val, + save_trajectory_val, + computer_os, + computer_provider, + container_name, + cua_cloud_api_key, + max_budget_val + ) + + # Update code display when configuration changes + for component in [agent_loop, model_choice, custom_model, chatbot_history, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget]: + component.change( + update_code_display, + inputs=[agent_loop, model_choice, custom_model, chatbot_history, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget], + outputs=[code_display] + ) + + return demo diff --git a/libs/python/agent2/example.py b/libs/python/agent2/example.py index 38f4fbcd..f686b790 100644 --- a/libs/python/agent2/example.py +++ b/libs/python/agent2/example.py @@ -1,12 +1,12 @@ """ -Example usage of the agent2 library with docstring-based tool definitions. +Example usage of the agent library with docstring-based tool definitions. """ import asyncio import logging -from agent2 import agent_loop, ComputerAgent -from agent2.types import Messages +from agent import agent_loop, ComputerAgent +from agent.types import Messages from computer import Computer from computer.helpers import sandboxed diff --git a/libs/python/agent2/pyproject.toml b/libs/python/agent2/pyproject.toml index 5656c40a..cd5530d3 100644 --- a/libs/python/agent2/pyproject.toml +++ b/libs/python/agent2/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-agent" -version = "0.4.0" +version = "0.4.0b1" description = "CUA (Computer Use) Agent for AI-driven computer interaction" readme = "README.md" authors = [ @@ -44,6 +44,9 @@ ui = [ "gradio>=5.23.3", "python-dotenv>=1.0.1", ] +cli = [ + "yaspin>=3.1.0", +] all = [ # omni requirements "ultralytics>=8.0.0", @@ -54,6 +57,8 @@ all = [ # ui requirements "gradio>=5.23.3", "python-dotenv>=1.0.1", + # cli requirements + "yaspin>=3.1.0", ] [tool.uv] @@ -63,4 +68,4 @@ constraint-dependencies = ["fastrtc>0.43.0", "mlx-audio>0.2.3"] distribution = true [tool.pdm.build] -includes = ["agent2/"] +includes = ["agent/"]