Gradio UI - Cancel button, fixed errors and incorrect Python

2026-01-05 12:59:58 -06:00 · 2025-05-10 20:40:34 -04:00
parent ad58052e8f
commit 8f37d0550c
1 changed files with 424 additions and 149 deletions
--- a/libs/agent/agent/ui/gradio/app.py
+++ b/libs/agent/agent/ui/gradio/app.py
@@ -6,7 +6,7 @@ with an advanced UI for model selection and configuration.

 Supported Agent Loops and Models:
 - AgentLoop.OPENAI: Uses OpenAI Operator CUA model
-  • computer_use_preview
+  • computer-use-preview

 - AgentLoop.ANTHROPIC: Uses Anthropic Computer-Use models
  • claude-3-5-sonnet-20240620
@@ -133,12 +133,12 @@ class GradioChatScreenshotHandler(DefaultCallbackHandler):
 MODEL_MAPPINGS = {
    "openai": {
        # Default to operator CUA model
-        "default": "computer_use_preview",
+        "default": "computer-use-preview",
        # Map standard OpenAI model names to CUA-specific model names
-        "gpt-4-turbo": "computer_use_preview",
-        "gpt-4o": "computer_use_preview",
-        "gpt-4": "computer_use_preview",
-        "gpt-4.5-preview": "computer_use_preview",
+        "gpt-4-turbo": "computer-use-preview",
+        "gpt-4o": "computer-use-preview",
+        "gpt-4": "computer-use-preview",
+        "gpt-4.5-preview": "computer-use-preview",
        "gpt-4o-mini": "gpt-4o-mini",
    },
    "anthropic": {
@@ -217,7 +217,7 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
        # Determine provider and clean model name based on the full string from UI
        cleaned_model_name = model_name  # Default to using the name as-is (for custom)

-        if model_name == "Custom model...":
+        if model_name == "Custom model (OpenAI compatible API)":
            # Actual model name comes from custom_model_value via model_to_use.
            # Assume OAICOMPAT for custom models unless overridden by URL/key later?
            # get_provider_and_model determines the *initial* provider/model.
@@ -278,8 +278,8 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
                    break
            # Note: No fallback needed here as we explicitly check against omni keys

-        else:  # Handles unexpected formats or the raw custom name if "Custom model..." selected
-            # Should only happen if user selected "Custom model..."
+        else:  # Handles unexpected formats or the raw custom name if "Custom model (OpenAI compatible API)" selected
+            # Should only happen if user selected "Custom model (OpenAI compatible API)"
            # Or if a model name format isn't caught above
            provider = LLMProvider.OAICOMPAT
            cleaned_model_name = (
@@ -291,7 +291,7 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
        # agent_loop remains AgentLoop.OMNI
    elif agent_loop == AgentLoop.UITARS:
        # For UITARS, use MLXVLM provider for the MLX models, OAICOMPAT for custom
-        if model_name == "Custom model...":
+        if model_name == "Custom model (OpenAI compatible API)":
            provider = LLMProvider.OAICOMPAT
            model_name_to_use = "tgi"
        else:
@@ -449,11 +449,11 @@ def create_gradio_ui(
    provider_to_models = {
        "OPENAI": openai_models,
        "ANTHROPIC": anthropic_models,
-        "OMNI": omni_models + ["Custom model..."],  # Add custom model option
+        "OMNI": omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"],  # Add custom model options
        "UITARS": [
            "mlx-community/UI-TARS-1.5-7B-4bit",
            "mlx-community/UI-TARS-1.5-7B-6bit",
-            "Custom model..."
+            "Custom model (OpenAI compatible API)"
        ],  # UI-TARS options with MLX models
    }

@@ -474,9 +474,9 @@ def create_gradio_ui(
            initial_model = anthropic_models[0] if anthropic_models else "No models available"
        else:  # OMNI
            initial_model = omni_models[0] if omni_models else "No models available"
-            if "Custom model..." in available_models_for_loop:
+            if "Custom model (OpenAI compatible API)" in available_models_for_loop:
                initial_model = (
-                    "Custom model..."  # Default to custom if available and no other default fits
+                    "Custom model (OpenAI compatible API)"  # Default to custom if available and no other default fits
                )

    initial_custom_model = saved_settings.get("custom_model", "Qwen2.5-VL-7B-Instruct")
@@ -499,7 +499,7 @@ def create_gradio_ui(
        
        Args:
            agent_loop_choice: The agent loop type (e.g., UITARS, OPENAI, ANTHROPIC, OMNI)
-            provider: The provider type (e.g., OPENAI, ANTHROPIC, OLLAMA, OAICOMPAT)
+            provider: The provider type (e.g., OPENAI, ANTHROPIC, OLLAMA, OAICOMPAT, MLXVLM)
            model_name: The model name
            tasks: List of tasks to execute
            provider_url: The provider base URL for OAICOMPAT providers
@@ -528,14 +528,58 @@ async def main():
            only_n_most_recent_images={recent_images},
            save_trajectory={save_trajectory},'''
        
-        # Add the model configuration based on provider
-        if provider == LLMProvider.OAICOMPAT:
+        # Add the model configuration based on provider and agent loop
+        if agent_loop_choice == "OPENAI":
+            # For OPENAI loop, always use OPENAI provider with computer-use-preview
            code += f'''
+            model=LLM(
+                provider=LLMProvider.OPENAI, 
+                name="computer-use-preview"
+            )'''
+        elif agent_loop_choice == "ANTHROPIC":
+            # For ANTHROPIC loop, always use ANTHROPIC provider
+            code += f'''
+            model=LLM(
+                provider=LLMProvider.ANTHROPIC, 
+                name="{model_name}"
+            )'''
+        elif agent_loop_choice == "UITARS":
+            # For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for others
+            if provider == LLMProvider.MLXVLM:
+                code += f'''
+            model=LLM(
+                provider=LLMProvider.MLXVLM, 
+                name="{model_name}"
+            )'''
+            else:  # OAICOMPAT
+                code += f'''
            model=LLM(
                provider=LLMProvider.OAICOMPAT, 
                name="{model_name}",
                provider_base_url="{provider_url}"
            )'''
+        elif agent_loop_choice == "OMNI":
+            # For OMNI, provider can be OPENAI, ANTHROPIC, OLLAMA, or OAICOMPAT
+            if provider == LLMProvider.OAICOMPAT:
+                code += f'''
+            model=LLM(
+                provider=LLMProvider.OAICOMPAT, 
+                name="{model_name}",
+                provider_base_url="{provider_url}"
+            )'''
+            else:  # OPENAI, ANTHROPIC, OLLAMA
+                code += f'''
+            model=LLM(
+                provider=LLMProvider.{provider.name}, 
+                name="{model_name}"
+            )'''
+        else:
+            # Default case - just use the provided provider and model
+            code += f'''
+            model=LLM(
+                provider=LLMProvider.{provider.name}, 
+                name="{model_name}"
+            )'''
            
        code += """
        )
@@ -561,6 +605,8 @@ async def main():
        print(f"Executing task: {{task}}")
        async for result in agent.run(task):
            print(result)'''
+
+
        
        # Add the main block
        code += '''
@@ -570,62 +616,6 @@ if __name__ == "__main__":
        
        return code

-    # Function to update model choices based on agent loop selection
-    def update_model_choices(loop):
-        models = provider_to_models.get(loop, [])
-        if loop == "OMNI":
-            # For OMNI, include the custom model option
-            if not models:
-                models = ["Custom model..."]
-            elif "Custom model..." not in models:
-                models.append("Custom model...")
-
-            # Show both OpenAI and Anthropic key inputs for OMNI if keys aren't set
-            return [
-                gr.update(choices=models, value=models[0] if models else "Custom model...", interactive=True),
-                gr.update(visible=not has_openai_key),
-                gr.update(visible=not has_anthropic_key)
-            ]
-        elif loop == "OPENAI":
-            # Show only OpenAI key input for OPENAI loop if key isn't set
-            if not models:
-                return [
-                    gr.update(choices=["No models available"], value="No models available", interactive=True),
-                    gr.update(visible=not has_openai_key),
-                    gr.update(visible=False)
-                ]
-            return [
-                gr.update(choices=models, value=models[0] if models else None, interactive=True),
-                gr.update(visible=not has_openai_key),
-                gr.update(visible=False)
-            ]
-        elif loop == "ANTHROPIC":
-            # Show only Anthropic key input for ANTHROPIC loop if key isn't set
-            if not models:
-                return [
-                    gr.update(choices=["No models available"], value="No models available", interactive=True),
-                    gr.update(visible=False),
-                    gr.update(visible=not has_anthropic_key)
-                ]
-            return [
-                gr.update(choices=models, value=models[0] if models else None, interactive=True),
-                gr.update(visible=False),
-                gr.update(visible=not has_anthropic_key)
-            ]
-        else:
-            # For other providers (like UITARS), don't show API key inputs
-            if not models:
-                return [
-                    gr.update(choices=["No models available"], value="No models available", interactive=True),
-                    gr.update(visible=False),
-                    gr.update(visible=False)
-                ]
-            return [
-                gr.update(choices=models, value=models[0] if models else None, interactive=True),
-                gr.update(visible=False),
-                gr.update(visible=False)
-            ]
-
    # Create the Gradio interface with advanced UI
    with gr.Blocks(title="Computer-Use Agent") as demo:
        with gr.Row():
@@ -684,14 +674,52 @@ if __name__ == "__main__":
                        info="Select the agent loop provider",
                    )

-                    # Create model selection dropdown with custom value support for OMNI
-                    model_choice = gr.Dropdown(
-                        choices=provider_to_models.get(initial_loop, ["No models available"]),
-                        label="LLM Provider and Model",
-                        value=initial_model,
-                        info="Select model or choose 'Custom model...' to enter a custom name",
-                        interactive=True,
-                    )
+
+                    # Create separate model selection dropdowns for each provider type
+                    # This avoids the Gradio bug with updating choices
+                    with gr.Group() as model_selection_group:
+                        # OpenAI models dropdown
+                        openai_model_choice = gr.Dropdown(
+                            choices=openai_models,
+                            label="OpenAI Model",
+                            value=openai_models[0] if openai_models else "No models available",
+                            info="Select OpenAI model",
+                            interactive=True,
+                            visible=(initial_loop == "OPENAI")
+                        )
+                        
+                        # Anthropic models dropdown
+                        anthropic_model_choice = gr.Dropdown(
+                            choices=anthropic_models,
+                            label="Anthropic Model",
+                            value=anthropic_models[0] if anthropic_models else "No models available",
+                            info="Select Anthropic model",
+                            interactive=True,
+                            visible=(initial_loop == "ANTHROPIC")
+                        )
+                        
+                        # OMNI models dropdown
+                        omni_model_choice = gr.Dropdown(
+                            choices=omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"],
+                            label="OMNI Model",
+                            value=omni_models[0] if omni_models else "Custom model (OpenAI compatible API)",
+                            info="Select OMNI model or choose a custom model option",
+                            interactive=True,
+                            visible=(initial_loop == "OMNI")
+                        )
+                        
+                        # UITARS models dropdown
+                        uitars_model_choice = gr.Dropdown(
+                            choices=provider_to_models.get("UITARS", ["No models available"]),
+                            label="UITARS Model",
+                            value=provider_to_models.get("UITARS", ["No models available"])[0] if provider_to_models.get("UITARS") else "No models available",
+                            info="Select UITARS model",
+                            interactive=True,
+                            visible=(initial_loop == "UITARS")
+                        )
+                        
+                        # Hidden field to store the selected model (for compatibility with existing code)
+                        model_choice = gr.Textbox(visible=False)

                    # Add API key inputs for OpenAI and Anthropic
                    with gr.Group(visible=not has_openai_key and (initial_loop == "OPENAI" or initial_loop == "OMNI")) as openai_key_group:
@@ -713,34 +741,177 @@ if __name__ == "__main__":
                            type="password",
                            info="Required for Anthropic models"
                        )
+                        
+                    # Function to set OpenAI API key environment variable
+                    def set_openai_api_key(key):
+                        if key and key.strip():
+                            os.environ["OPENAI_API_KEY"] = key.strip()
+                            print(f"DEBUG - Set OpenAI API key environment variable")
+                        return key
+                    
+                    # Function to set Anthropic API key environment variable
+                    def set_anthropic_api_key(key):
+                        if key and key.strip():
+                            os.environ["ANTHROPIC_API_KEY"] = key.strip()
+                            print(f"DEBUG - Set Anthropic API key environment variable")
+                        return key
+                    
+                    # Add change event handlers for API key inputs
+                    openai_api_key_input.change(
+                        fn=set_openai_api_key,
+                        inputs=[openai_api_key_input],
+                        outputs=[openai_api_key_input],
+                        queue=False
+                    )
+                    
+                    anthropic_api_key_input.change(
+                        fn=set_anthropic_api_key,
+                        inputs=[anthropic_api_key_input],
+                        outputs=[anthropic_api_key_input],
+                        queue=False
+                    )

-                    # Add custom model textbox (only visible when "Custom model..." is selected)
+                    # Combined function to update UI based on selections
+                    def update_ui(loop=None, openai_model=None, anthropic_model=None, omni_model=None, uitars_model=None):
+                        # Default values if not provided
+                        loop = loop or agent_loop.value
+                        
+                        # Determine which model value to use for custom model checks
+                        model_value = None
+                        if loop == "OPENAI" and openai_model:
+                            model_value = openai_model
+                        elif loop == "ANTHROPIC" and anthropic_model:
+                            model_value = anthropic_model
+                        elif loop == "OMNI" and omni_model:
+                            model_value = omni_model
+                        elif loop == "UITARS" and uitars_model:
+                            model_value = uitars_model
+                        
+                        # Show/hide appropriate model dropdown based on loop selection
+                        openai_visible = (loop == "OPENAI")
+                        anthropic_visible = (loop == "ANTHROPIC")
+                        omni_visible = (loop == "OMNI")
+                        uitars_visible = (loop == "UITARS")
+                        
+                        # Show/hide API key inputs based on loop selection
+                        show_openai_key = not has_openai_key and (loop == "OPENAI" or (loop == "OMNI" and model_value and "OpenAI" in model_value and "Custom" not in model_value))
+                        show_anthropic_key = not has_anthropic_key and (loop == "ANTHROPIC" or (loop == "OMNI" and model_value and "Claude" in model_value and "Custom" not in model_value))
+                        
+                        # Determine custom model visibility
+                        is_custom_openai_api = model_value == "Custom model (OpenAI compatible API)"
+                        is_custom_ollama = model_value == "Custom model (ollama)"
+                        is_any_custom = is_custom_openai_api or is_custom_ollama
+                        
+                        # Update the hidden model_choice field based on the visible dropdown
+                        model_choice_value = model_value if model_value else ""
+                        
+                        # Return all UI updates
+                        return [
+                            # Model dropdowns visibility
+                            gr.update(visible=openai_visible),
+                            gr.update(visible=anthropic_visible),
+                            gr.update(visible=omni_visible),
+                            gr.update(visible=uitars_visible),
+                            # API key inputs visibility
+                            gr.update(visible=show_openai_key),
+                            gr.update(visible=show_anthropic_key),
+                            # Custom model fields visibility
+                            gr.update(visible=is_any_custom),  # Custom model name always visible for any custom option
+                            gr.update(visible=is_custom_openai_api),  # Provider base URL only for OpenAI compatible API
+                            gr.update(visible=is_custom_openai_api),   # Provider API key only for OpenAI compatible API
+                            # Update the hidden model_choice field
+                            gr.update(value=model_choice_value)
+                        ]
+                        
+                    # Add custom model textbox (visible for both custom model options)
                    custom_model = gr.Textbox(
                        label="Custom Model Name",
-                        placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct)",
+                        placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct or llama3)",
                        value=initial_custom_model,
-                        visible=(initial_model == "Custom model..."),
+                        visible=(initial_model == "Custom model (OpenAI compatible API)" or initial_model == "Custom model (ollama)"),
                        interactive=True,
                    )

-                    # Add custom provider base URL textbox (only visible when "Custom model..." is selected)
+                    # Add custom provider base URL textbox (only visible for OpenAI compatible API)
                    provider_base_url = gr.Textbox(
                        label="Provider Base URL",
                        placeholder="Enter provider base URL (e.g., http://localhost:1234/v1)",
                        value=initial_provider_base_url,
-                        visible=(initial_model == "Custom model..."),
+                        visible=(initial_model == "Custom model (OpenAI compatible API)"),
                        interactive=True,
                    )

-                    # Add custom API key textbox (only visible when "Custom model..." is selected)
+                    # Add custom API key textbox (only visible for OpenAI compatible API)
                    provider_api_key = gr.Textbox(
                        label="Provider API Key",
                        placeholder="Enter provider API key (if required)",
                        value="",
-                        visible=(initial_model == "Custom model..."),
+                        visible=(initial_model == "Custom model (OpenAI compatible API)"),
                        interactive=True,
                        type="password",
                    )
+                    
+                    # Connect agent_loop changes to update all UI elements
+                    agent_loop.change(
+                        fn=update_ui,
+                        inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
+                        outputs=[
+                            openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice, 
+                            openai_key_group, anthropic_key_group,
+                            custom_model, provider_base_url, provider_api_key,
+                            model_choice  # Add model_choice to outputs
+                        ],
+                        queue=False  # Process immediately without queueing
+                    )
+
+                    # Connect each model dropdown to update UI
+                    omni_model_choice.change(
+                        fn=update_ui,
+                        inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],                        
+                        outputs=[
+                            openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice, 
+                            openai_key_group, anthropic_key_group,
+                            custom_model, provider_base_url, provider_api_key,
+                            model_choice  # Add model_choice to outputs
+                        ],
+                        queue=False
+                    )
+                    
+                    uitars_model_choice.change(
+                        fn=update_ui,
+                        inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],             
+                        outputs=[
+                            openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice, 
+                            openai_key_group, anthropic_key_group,
+                            custom_model, provider_base_url, provider_api_key,
+                            model_choice  # Add model_choice to outputs
+                        ],
+                        queue=False
+                    )
+                    
+                    openai_model_choice.change(
+                        fn=update_ui,
+                        inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],             
+                        outputs=[
+                            openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice, 
+                            openai_key_group, anthropic_key_group,
+                            custom_model, provider_base_url, provider_api_key,
+                            model_choice  # Add model_choice to outputs
+                        ],
+                        queue=False
+                    )
+
+                    anthropic_model_choice.change(
+                        fn=update_ui,
+                        inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],             
+                        outputs=[
+                            openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice, 
+                            openai_key_group, anthropic_key_group,
+                            custom_model, provider_base_url, provider_api_key,
+                            model_choice  # Add model_choice to outputs
+                        ],
+                        queue=False
+                    )

                    save_trajectory = gr.Checkbox(
                        label="Save Trajectory",
@@ -772,6 +943,9 @@ if __name__ == "__main__":
                    placeholder="Ask me to perform tasks in a virtual macOS environment"
                )
                clear = gr.Button("Clear")
+                
+                # Add cancel button
+                cancel_button = gr.Button("Cancel", variant="stop")

                # Add examples
                example_group = gr.Examples(examples=example_messages, inputs=msg)
@@ -782,10 +956,28 @@ if __name__ == "__main__":
                    history.append(gr.ChatMessage(role="user", content=message))
                    return "", history

+                # Function to cancel the running agent
+                async def cancel_agent_task(history):
+                    global global_agent
+                    if global_agent and hasattr(global_agent, '_loop'):
+                        print("DEBUG - Cancelling agent task")
+                        # Cancel the agent loop
+                        if hasattr(global_agent._loop, 'cancel') and callable(global_agent._loop.cancel):
+                            await global_agent._loop.cancel()
+                            history.append(gr.ChatMessage(role="assistant", content="Task cancelled by user", metadata={"title": "❌ Cancelled"}))
+                        else:
+                            history.append(gr.ChatMessage(role="assistant", content="Could not cancel task: cancel method not found", metadata={"title": "⚠️ Warning"}))
+                    else:
+                        history.append(gr.ChatMessage(role="assistant", content="No active agent task to cancel", metadata={"title": "ℹ️ Info"}))
+                    return history
+                
                # Function to process agent response after user input
                async def process_response(
                    history,
-                    model_choice_value,
+                    openai_model_value,
+                    anthropic_model_value,
+                    omni_model_value,
+                    uitars_model_value,
                    custom_model_value,
                    agent_loop_choice,
                    save_traj,
@@ -802,21 +994,47 @@ if __name__ == "__main__":
                    # Get the last user message
                    last_user_message = history[-1]["content"]

+                    # Get the appropriate model value based on the agent loop
+                    if agent_loop_choice == "OPENAI":
+                        model_choice_value = openai_model_value
+                    elif agent_loop_choice == "ANTHROPIC":
+                        model_choice_value = anthropic_model_value
+                    elif agent_loop_choice == "OMNI":
+                        model_choice_value = omni_model_value
+                    elif agent_loop_choice == "UITARS":
+                        model_choice_value = uitars_model_value
+                    else:
+                        model_choice_value = "No models available"
+                    
+                    # Determine if this is a custom model selection and which type
+                    is_custom_openai_api = model_choice_value == "Custom model (OpenAI compatible API)"
+                    is_custom_ollama = model_choice_value == "Custom model (ollama)"
+                    is_custom_model_selected = is_custom_openai_api or is_custom_ollama
+                    
                    # Determine the model name string to analyze: custom or from dropdown
-                    model_string_to_analyze = (
-                        custom_model_value
-                        if model_choice_value == "Custom model..."
-                        else model_choice_value  # Use the full UI string initially
-                    )
-
-                    # Determine if this is a custom model selection
-                    is_custom_model_selected = model_choice_value == "Custom model..."
+                    if is_custom_model_selected:
+                        model_string_to_analyze = custom_model_value
+                    else:
+                        model_string_to_analyze = model_choice_value  # Use the full UI string initially

                    try:
-                        # Get the provider, *cleaned* model name, and agent loop type
-                        provider, cleaned_model_name_from_func, agent_loop_type = (
-                            get_provider_and_model(model_string_to_analyze, agent_loop_choice)
-                        )
+                        # Special case for UITARS - use MLXVLM provider
+                        if agent_loop_choice == "UITARS":
+                            provider = LLMProvider.MLXVLM
+                            cleaned_model_name_from_func = model_string_to_analyze
+                            agent_loop_type = AgentLoop.UITARS
+                            print(f"Using MLXVLM provider for UITARS model: {model_string_to_analyze}")
+                        # Special case for Ollama custom model
+                        elif is_custom_ollama and agent_loop_choice == "OMNI":
+                            provider = LLMProvider.OLLAMA
+                            cleaned_model_name_from_func = custom_model_value
+                            agent_loop_type = AgentLoop.OMNI
+                            print(f"Using Ollama provider for custom model: {custom_model_value}")
+                        else:
+                            # Get the provider, *cleaned* model name, and agent loop type
+                            provider, cleaned_model_name_from_func, agent_loop_type = (
+                                get_provider_and_model(model_string_to_analyze, agent_loop_choice)
+                            )
                        
                        print(f"provider={provider} cleaned_model_name_from_func={cleaned_model_name_from_func} agent_loop_type={agent_loop_type} agent_loop_choice={agent_loop_choice}")

@@ -828,26 +1046,34 @@ if __name__ == "__main__":
                            else cleaned_model_name_from_func
                        )

-                        # Determine if OAICOMPAT should be used (only if custom model explicitly selected)
-                        is_oaicompat = is_custom_model_selected
+                        # Determine if OAICOMPAT should be used (only for OpenAI compatible API custom model)
+                        is_oaicompat = is_custom_openai_api and agent_loop_choice != "UITARS"

                        # Get API key based on provider determined by get_provider_and_model
                        if is_oaicompat and custom_api_key:
-                            # Use custom API key if provided for custom model
+                            # Use custom API key if provided for OpenAI compatible API custom model
                            api_key = custom_api_key
                            print(
-                                f"DEBUG - Using custom API key for model: {final_model_name_to_send}"
+                                f"DEBUG - Using custom API key for OpenAI compatible API model: {final_model_name_to_send}"
                            )
+                        elif provider == LLMProvider.OLLAMA:
+                            # No API key needed for Ollama
+                            api_key = ""
+                            print(f"DEBUG - No API key needed for Ollama model: {final_model_name_to_send}")
                        elif provider == LLMProvider.OPENAI:
                            # Use OpenAI key from input if provided, otherwise use environment variable
                            api_key = openai_key_input if openai_key_input else (openai_api_key or os.environ.get("OPENAI_API_KEY", ""))
                            if openai_key_input:
-                                print(f"DEBUG - Using provided OpenAI API key from UI")
+                                # Set the environment variable for the OpenAI API key
+                                os.environ["OPENAI_API_KEY"] = openai_key_input
+                                print(f"DEBUG - Using provided OpenAI API key from UI and set as environment variable")
                        elif provider == LLMProvider.ANTHROPIC:
                            # Use Anthropic key from input if provided, otherwise use environment variable
                            api_key = anthropic_key_input if anthropic_key_input else (anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", ""))
                            if anthropic_key_input:
-                                print(f"DEBUG - Using provided Anthropic API key from UI")
+                                # Set the environment variable for the Anthropic API key
+                                os.environ["ANTHROPIC_API_KEY"] = anthropic_key_input
+                                print(f"DEBUG - Using provided Anthropic API key from UI and set as environment variable")
                        else:
                            # For Ollama or default OAICOMPAT (without custom key), no key needed/expected
                            api_key = ""
@@ -866,8 +1092,8 @@ if __name__ == "__main__":

                        # Create or update the agent
                        create_agent(
-                            # Provider determined by get_provider_and_model unless custom model selected
-                            provider=LLMProvider.OAICOMPAT if is_oaicompat else provider,
+                            # Provider determined by special cases and get_provider_and_model
+                            provider=provider,
                            agent_loop=agent_loop_type,
                            # Pass the FINAL determined model name (cleaned or custom)
                            model_name=final_model_name_to_send,
@@ -980,13 +1206,21 @@ if __name__ == "__main__":
                        # Update with error message
                        history.append(gr.ChatMessage(role="assistant", content=f"Error: {str(e)}"))
                        yield history
-
-                # Connect the components
-                msg.submit(chat_submit, [msg, chatbot_history], [msg, chatbot_history]).then(
-                    process_response,
-                    [
+                        
+                # Connect the submit button to the process_response function
+                submit_event = msg.submit(
+                    fn=chat_submit,
+                    inputs=[msg, chatbot_history],
+                    outputs=[msg, chatbot_history],
+                    queue=False,
+                ).then(
+                    fn=process_response,
+                    inputs=[
                        chatbot_history,
-                        model_choice,
+                        openai_model_choice,
+                        anthropic_model_choice,
+                        omni_model_choice,
+                        uitars_model_choice,
                        custom_model,
                        agent_loop,
                        save_trajectory,
@@ -996,44 +1230,22 @@ if __name__ == "__main__":
                        openai_api_key_input,
                        anthropic_api_key_input,
                    ],
-                    [chatbot_history],
+                    outputs=[chatbot_history],
+                    queue=True,
                )

                # Clear button functionality
                clear.click(lambda: None, None, chatbot_history, queue=False)
-
-                # Connect agent_loop changes to model selection
-                agent_loop.change(
-                    fn=update_model_choices,
-                    inputs=[agent_loop],
-                    outputs=[model_choice],
-                    queue=False,  # Process immediately without queueing
-                )
-
-                # Show/hide custom model, provider base URL, and API key textboxes based on dropdown selection
-                def update_custom_model_visibility(model_value):
-                    is_custom = model_value == "Custom model..."
-                    return (
-                        gr.update(visible=is_custom),
-                        gr.update(visible=is_custom),
-                        gr.update(visible=is_custom),
-                    )
-
-                model_choice.change(
-                    fn=update_custom_model_visibility,
-                    inputs=[model_choice],
-                    outputs=[custom_model, provider_base_url, provider_api_key],
-                    queue=False,  # Process immediately without queueing
-                )
                
-                # Connect agent_loop changes to model selection and API key visibility
-                agent_loop.change(
-                    fn=update_model_choices,
-                    inputs=[agent_loop],
-                    outputs=[model_choice, openai_key_group, anthropic_key_group],
-                    queue=False,  # Process immediately without queueing
+                # Connect cancel button to cancel function
+                cancel_button.click(
+                    cancel_agent_task,
+                    [chatbot_history],
+                    [chatbot_history],
+                    queue=False  # Process immediately without queueing
                )

+
                # Function to update the code display based on configuration and chat history
                def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val):
                    # Extract messages from chat history
@@ -1043,9 +1255,72 @@ if __name__ == "__main__":
                            if msg.get("role") == "user":
                                messages.append(msg.get("content", ""))
                    
-                    # Determine provider and model name based on selection
-                    model_string = custom_model_val if model_choice_val == "Custom model..." else model_choice_val
-                    provider, model_name, _ = get_provider_and_model(model_string, agent_loop)
+                    # Determine if this is a custom model selection and which type
+                    is_custom_openai_api = model_choice_val == "Custom model (OpenAI compatible API)"
+                    is_custom_ollama = model_choice_val == "Custom model (ollama)"
+                    is_custom_model_selected = is_custom_openai_api or is_custom_ollama
+                    
+                    # Determine provider and model name based on agent loop
+                    if agent_loop == "OPENAI":
+                        # For OPENAI loop, always use OPENAI provider with computer-use-preview
+                        provider = LLMProvider.OPENAI
+                        model_name = "computer-use-preview"
+                    elif agent_loop == "ANTHROPIC":
+                        # For ANTHROPIC loop, always use ANTHROPIC provider
+                        provider = LLMProvider.ANTHROPIC
+                        # Extract model name from the UI string
+                        if model_choice_val.startswith("Anthropic: Claude "):
+                            # Extract the model name based on the UI string
+                            model_parts = model_choice_val.replace("Anthropic: Claude ", "").split(" (")
+                            version = model_parts[0]  # e.g., "3.7 Sonnet"
+                            date = model_parts[1].replace(")", "") if len(model_parts) > 1 else ""  # e.g., "20250219"
+                            
+                            # Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620
+                            version = version.replace(".", "-").replace(" ", "-").lower()
+                            model_name = f"claude-{version}-{date}"
+                        else:
+                            # Use the model_choice_val directly if it doesn't match the expected format
+                            model_name = model_choice_val
+                    elif agent_loop == "UITARS":
+                        # For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for custom
+                        if model_choice_val == "Custom model (OpenAI compatible API)":
+                            provider = LLMProvider.OAICOMPAT
+                            model_name = custom_model_val
+                        else:
+                            provider = LLMProvider.MLXVLM
+                            model_name = model_choice_val
+                    elif agent_loop == "OMNI":
+                        # For OMNI, provider can be OPENAI, ANTHROPIC, OLLAMA, or OAICOMPAT
+                        if is_custom_openai_api:
+                            provider = LLMProvider.OAICOMPAT
+                            model_name = custom_model_val
+                        elif is_custom_ollama:
+                            provider = LLMProvider.OLLAMA
+                            model_name = custom_model_val
+                        elif model_choice_val.startswith("OMNI: OpenAI "):
+                            provider = LLMProvider.OPENAI
+                            # Extract model name from UI string (e.g., "OMNI: OpenAI GPT-4o" -> "gpt-4o")
+                            model_name = model_choice_val.replace("OMNI: OpenAI ", "").lower().replace(" ", "-")
+                        elif model_choice_val.startswith("OMNI: Claude "):
+                            provider = LLMProvider.ANTHROPIC
+                            # Extract model name from UI string (similar to ANTHROPIC loop case)
+                            model_parts = model_choice_val.replace("OMNI: Claude ", "").split(" (")
+                            version = model_parts[0]  # e.g., "3.7 Sonnet"
+                            date = model_parts[1].replace(")", "") if len(model_parts) > 1 else ""  # e.g., "20250219"
+                            
+                            # Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620
+                            version = version.replace(".", "-").replace(" ", "-").lower()
+                            model_name = f"claude-{version}-{date}"
+                        elif model_choice_val.startswith("OMNI: Ollama "):
+                            provider = LLMProvider.OLLAMA
+                            # Extract model name from UI string (e.g., "OMNI: Ollama llama3" -> "llama3")
+                            model_name = model_choice_val.replace("OMNI: Ollama ", "")
+                        else:
+                            # Fallback to get_provider_and_model for any other cases
+                            provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop)
+                    else:
+                        # Fallback for any other agent loop
+                        provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop)
                    
                    # Generate and return the code
                    return generate_python_code(