From b2ecf18a1ed67ddcbf6d55bb94145949dd13ca3b Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Sat, 24 May 2025 18:59:24 -0400 Subject: [PATCH] added cloud provider to agent ui --- libs/agent/agent/ui/gradio/app.py | 234 +++++++++++++++++++----------- 1 file changed, 148 insertions(+), 86 deletions(-) diff --git a/libs/agent/agent/ui/gradio/app.py b/libs/agent/agent/ui/gradio/app.py index b6b733f6..bf8f7b47 100644 --- a/libs/agent/agent/ui/gradio/app.py +++ b/libs/agent/agent/ui/gradio/app.py @@ -290,7 +290,7 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple: model_name_to_use = cleaned_model_name # agent_loop remains AgentLoop.OMNI elif agent_loop == AgentLoop.UITARS: - # For UITARS, use MLXVLM provider for the MLX models, OAICOMPAT for custom + # For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for custom if model_name == "Custom model (OpenAI compatible API)": provider = LLMProvider.OAICOMPAT model_name_to_use = "tgi" @@ -333,12 +333,25 @@ def get_ollama_models() -> List[str]: logging.error(f"Error getting Ollama models: {e}") return [] -def create_computer_instance(verbosity: int = logging.INFO) -> Computer: + +def create_computer_instance( + verbosity: int = logging.INFO, + os_type: str = "macos", + provider_type: str = "lume", + name: Optional[str] = None, + api_key: Optional[str] = None +) -> Computer: """Create or get the global Computer instance.""" global global_computer if global_computer is None: - global_computer = Computer(verbosity=verbosity) + global_computer = Computer( + verbosity=verbosity, + os_type=os_type, + provider_type=provider_type, + name=name if name else "", + api_key=api_key + ) return global_computer @@ -353,12 +366,22 @@ def create_agent( verbosity: int = logging.INFO, use_oaicompat: bool = False, provider_base_url: Optional[str] = None, + computer_os: str = "macos", + computer_provider: str = "lume", + computer_name: Optional[str] = None, + computer_api_key: Optional[str] = None, ) -> ComputerAgent: """Create or update the global agent with the specified parameters.""" global global_agent # Create the computer if not already done - computer = create_computer_instance(verbosity=verbosity) + computer = create_computer_instance( + verbosity=verbosity, + os_type=computer_os, + provider_type=computer_provider, + name=computer_name, + api_key=computer_api_key + ) # Get API key from environment if not provided if api_key is None: @@ -401,6 +424,7 @@ def create_agent( return global_agent + def create_gradio_ui( provider_name: str = "openai", model_name: str = "gpt-4o", @@ -439,6 +463,9 @@ def create_gradio_ui( # Check if API keys are available has_openai_key = bool(openai_api_key) has_anthropic_key = bool(anthropic_api_key) + + print("has_openai_key", has_openai_key) + print("has_anthropic_key", has_anthropic_key) # Get Ollama models for OMNI ollama_models = get_ollama_models() @@ -473,7 +500,7 @@ def create_gradio_ui( elif initial_loop == "ANTHROPIC": initial_model = anthropic_models[0] if anthropic_models else "No models available" else: # OMNI - initial_model = omni_models[0] if omni_models else "No models available" + initial_model = omni_models[0] if omni_models else "Custom model (OpenAI compatible API)" if "Custom model (OpenAI compatible API)" in available_models_for_loop: initial_model = ( "Custom model (OpenAI compatible API)" # Default to custom if available and no other default fits @@ -494,7 +521,7 @@ def create_gradio_ui( ] # Function to generate Python code based on configuration and tasks - def generate_python_code(agent_loop_choice, provider, model_name, tasks, provider_url, recent_images=3, save_trajectory=True): + def generate_python_code(agent_loop_choice, provider, model_name, tasks, provider_url, recent_images=3, save_trajectory=True, computer_os="macos", computer_provider="lume", vm_name="", cua_cloud_api_key=""): """Generate Python code for the current configuration and tasks. Args: @@ -505,6 +532,10 @@ def create_gradio_ui( provider_url: The provider base URL for OAICOMPAT providers recent_images: Number of recent images to keep in context save_trajectory: Whether to save the agent trajectory + computer_os: Operating system type for the computer + computer_provider: Provider type for the computer + vm_name: Optional VM name + cua_cloud_api_key: Optional CUA Cloud API key Returns: Formatted Python code as a string @@ -515,13 +546,29 @@ def create_gradio_ui( if task and task.strip(): tasks_str += f' "{task}",\n' - # Create the Python code template + # Create the Python code template with computer configuration + computer_args = [] + if computer_os != "macos": + computer_args.append(f'os_type="{computer_os}"') + if computer_provider != "lume": + computer_args.append(f'provider_type="{computer_provider}"') + if vm_name: + computer_args.append(f'name="{vm_name}"') + if cua_cloud_api_key: + computer_args.append(f'api_key="{cua_cloud_api_key}"') + + computer_args_str = ", ".join(computer_args) + if computer_args_str: + computer_args_str = f"({computer_args_str})" + else: + computer_args_str = "()" + code = f'''import asyncio from computer import Computer from agent import ComputerAgent, LLM, AgentLoop, LLMProvider async def main(): - async with Computer() as macos_computer: + async with Computer{computer_args_str} as macos_computer: agent = ComputerAgent( computer=macos_computer, loop=AgentLoop.{agent_loop_choice}, @@ -660,12 +707,49 @@ if __name__ == "__main__": LLMProvider.OPENAI, "gpt-4o", [], - "https://openrouter.ai/api/v1" + "https://openrouter.ai/api/v1", + 3, # recent_images default + True, # save_trajectory default + "macos", + "lume", + "", + "" ), interactive=False, ) - with gr.Accordion("Configuration", open=True): + with gr.Accordion("Computer Configuration", open=True): + # Computer configuration options + computer_os = gr.Dropdown( + choices=["macos", "linux"], + label="Operating System", + value="macos", + info="Select the operating system for the computer", + ) + + computer_provider = gr.Dropdown( + choices=["cloud", "lume"], + label="Provider", + value="lume", + info="Select the computer provider", + ) + + vm_name = gr.Textbox( + label="VM Name", + placeholder="Enter VM name (optional)", + value="", + info="Optional name for the virtual machine", + ) + + cua_cloud_api_key = gr.Textbox( + label="CUA Cloud API Key", + placeholder="Enter your CUA Cloud API key", + value="", + type="password", + info="Required for cloud provider", + ) + + with gr.Accordion("Agent Configuration", open=True): # Configuration options agent_loop = gr.Dropdown( choices=["OPENAI", "ANTHROPIC", "OMNI", "UITARS"], @@ -986,6 +1070,10 @@ if __name__ == "__main__": custom_api_key=None, openai_key_input=None, anthropic_key_input=None, + computer_os="macos", + computer_provider="lume", + vm_name="", + cua_cloud_api_key="", ): if not history: yield history @@ -1092,6 +1180,10 @@ if __name__ == "__main__": "provider_base_url": custom_url_value, "save_trajectory": save_traj, "recent_images": recent_imgs, + "computer_os": computer_os, + "computer_provider": computer_provider, + "vm_name": vm_name, + "cua_cloud_api_key": cua_cloud_api_key, } save_settings(current_settings) # --- End Save Settings --- @@ -1109,6 +1201,10 @@ if __name__ == "__main__": use_oaicompat=is_oaicompat, # Set flag if custom model was selected # Pass custom URL only if custom model was selected provider_base_url=custom_url_value if is_oaicompat else None, + computer_os=computer_os, + computer_provider=computer_provider, + computer_name=vm_name, + computer_api_key=cua_cloud_api_key, verbosity=logging.DEBUG, # Added verbosity here ) @@ -1235,6 +1331,10 @@ if __name__ == "__main__": provider_api_key, openai_api_key_input, anthropic_api_key_input, + computer_os, + computer_provider, + vm_name, + cua_cloud_api_key, ], outputs=[chatbot_history], queue=True, @@ -1253,82 +1353,20 @@ if __name__ == "__main__": # Function to update the code display based on configuration and chat history - def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val): + def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val, computer_os, computer_provider, vm_name, cua_cloud_api_key): # Extract messages from chat history messages = [] if chat_history: for msg in chat_history: - if msg.get("role") == "user": + if isinstance(msg, dict) and msg.get("role") == "user": messages.append(msg.get("content", "")) - # Determine if this is a custom model selection and which type - is_custom_openai_api = model_choice_val == "Custom model (OpenAI compatible API)" - is_custom_ollama = model_choice_val == "Custom model (ollama)" - is_custom_model_selected = is_custom_openai_api or is_custom_ollama + # Determine provider and model based on current selection + provider, model_name, _ = get_provider_and_model( + model_choice_val or custom_model_val or "gpt-4o", + agent_loop + ) - # Determine provider and model name based on agent loop - if agent_loop == "OPENAI": - # For OPENAI loop, always use OPENAI provider with computer-use-preview - provider = LLMProvider.OPENAI - model_name = "computer-use-preview" - elif agent_loop == "ANTHROPIC": - # For ANTHROPIC loop, always use ANTHROPIC provider - provider = LLMProvider.ANTHROPIC - # Extract model name from the UI string - if model_choice_val.startswith("Anthropic: Claude "): - # Extract the model name based on the UI string - model_parts = model_choice_val.replace("Anthropic: Claude ", "").split(" (") - version = model_parts[0] # e.g., "3.7 Sonnet" - date = model_parts[1].replace(")", "") if len(model_parts) > 1 else "" # e.g., "20250219" - - # Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620 - version = version.replace(".", "-").replace(" ", "-").lower() - model_name = f"claude-{version}-{date}" - else: - # Use the model_choice_val directly if it doesn't match the expected format - model_name = model_choice_val - elif agent_loop == "UITARS": - # For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for custom - if model_choice_val == "Custom model (OpenAI compatible API)": - provider = LLMProvider.OAICOMPAT - model_name = custom_model_val - else: - provider = LLMProvider.MLXVLM - model_name = model_choice_val - elif agent_loop == "OMNI": - # For OMNI, provider can be OPENAI, ANTHROPIC, OLLAMA, or OAICOMPAT - if is_custom_openai_api: - provider = LLMProvider.OAICOMPAT - model_name = custom_model_val - elif is_custom_ollama: - provider = LLMProvider.OLLAMA - model_name = custom_model_val - elif model_choice_val.startswith("OMNI: OpenAI "): - provider = LLMProvider.OPENAI - # Extract model name from UI string (e.g., "OMNI: OpenAI GPT-4o" -> "gpt-4o") - model_name = model_choice_val.replace("OMNI: OpenAI ", "").lower().replace(" ", "-") - elif model_choice_val.startswith("OMNI: Claude "): - provider = LLMProvider.ANTHROPIC - # Extract model name from UI string (similar to ANTHROPIC loop case) - model_parts = model_choice_val.replace("OMNI: Claude ", "").split(" (") - version = model_parts[0] # e.g., "3.7 Sonnet" - date = model_parts[1].replace(")", "") if len(model_parts) > 1 else "" # e.g., "20250219" - - # Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620 - version = version.replace(".", "-").replace(" ", "-").lower() - model_name = f"claude-{version}-{date}" - elif model_choice_val.startswith("OMNI: Ollama "): - provider = LLMProvider.OLLAMA - # Extract model name from UI string (e.g., "OMNI: Ollama llama3" -> "llama3") - model_name = model_choice_val.replace("OMNI: Ollama ", "") - else: - # Fallback to get_provider_and_model for any other cases - provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop) - else: - # Fallback for any other agent loop - provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop) - - # Generate and return the code return generate_python_code( agent_loop, provider, @@ -1336,38 +1374,62 @@ if __name__ == "__main__": messages, provider_base_url, recent_images_val, - save_trajectory_val + save_trajectory_val, + computer_os, + computer_provider, + vm_name, + cua_cloud_api_key ) # Update code display when configuration changes agent_loop.change( update_code_display, - inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, vm_name, cua_cloud_api_key], outputs=[code_display] ) model_choice.change( update_code_display, - inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, vm_name, cua_cloud_api_key], outputs=[code_display] ) custom_model.change( update_code_display, - inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, vm_name, cua_cloud_api_key], outputs=[code_display] ) chatbot_history.change( update_code_display, - inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, vm_name, cua_cloud_api_key], outputs=[code_display] ) recent_images.change( update_code_display, - inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, vm_name, cua_cloud_api_key], outputs=[code_display] ) save_trajectory.change( update_code_display, - inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory], + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, vm_name, cua_cloud_api_key], + outputs=[code_display] + ) + computer_os.change( + update_code_display, + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, vm_name, cua_cloud_api_key], + outputs=[code_display] + ) + computer_provider.change( + update_code_display, + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, vm_name, cua_cloud_api_key], + outputs=[code_display] + ) + vm_name.change( + update_code_display, + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, vm_name, cua_cloud_api_key], + outputs=[code_display] + ) + cua_cloud_api_key.change( + update_code_display, + inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, vm_name, cua_cloud_api_key], outputs=[code_display] )