Merge pull request #179 from trycua/fix/computer/linux

[Computer] Fix hotkeys and computer initialization
2026-01-02 19:40:18 -06:00 · 2025-05-15 18:35:11 +02:00
parent bb35867733 a2852f8220
commit 216c8b075d
3 changed files with 113 additions and 74 deletions
--- a/libs/computer-server/computer_server/handlers/linux.py
+++ b/libs/computer-server/computer_server/handlers/linux.py
@@ -168,7 +168,7 @@ class LinuxAutomationHandler(BaseAutomationHandler):
        except Exception as e:
            return {"success": False, "error": str(e)}

-    async def hotkey(self, *keys: str) -> Dict[str, Any]:
+    async def hotkey(self, keys: List[str]) -> Dict[str, Any]:
        try:
            pyautogui.hotkey(*keys)
            return {"success": True}
--- a/libs/computer/computer/computer.py
+++ b/libs/computer/computer/computer.py
@@ -78,11 +78,11 @@ class Computer:
        self.provider_type = provider_type
        self.ephemeral = ephemeral

-        if ephemeral:
-            self.storage = "ephemeral"
-        else:
-            self.storage = storage
-            
+        # The default is currently to use non-ephemeral storage
+        if storage and ephemeral and storage != "ephemeral":
+            raise ValueError("Storage path and ephemeral flag cannot be used together")
+        self.storage = "ephemeral" if ephemeral else storage
+        
        # For Lumier provider, store the first shared directory path to use
        # for VM file sharing
        self.shared_path = None
@@ -279,12 +279,14 @@ class Computer:
                        raise RuntimeError(f"Failed to initialize VM provider: {e}")

                # Check if VM exists or create it
+                is_running = False
                try:
                    if self.config.vm_provider is None:
                        raise RuntimeError(f"VM provider not initialized for {self.config.name}")
                        
                    vm = await self.config.vm_provider.get_vm(self.config.name)
                    self.logger.verbose(f"Found existing VM: {self.config.name}")
+                    is_running = vm.get("status") == "running"
                except Exception as e:
                    self.logger.error(f"VM not found: {self.config.name}")
                    self.logger.error(f"Error: {e}")
@@ -292,63 +294,67 @@ class Computer:
                        f"VM {self.config.name} could not be found or created."
                    )

-                # Convert paths to dictionary format for shared directories
-                shared_dirs = []
-                for path in self.shared_directories:
-                    self.logger.verbose(f"Adding shared directory: {path}")
-                    path = os.path.abspath(os.path.expanduser(path))
-                    if os.path.exists(path):
-                        # Add path in format expected by Lume API
-                        shared_dirs.append({
-                            "hostPath": path,
-                            "readOnly": False
-                        })
-                    else:
-                        self.logger.warning(f"Shared directory does not exist: {path}")
+                # Start the VM if it's not running
+                if not is_running:
+                    self.logger.info(f"VM {self.config.name} is not running, starting it...")
+
+                    # Convert paths to dictionary format for shared directories
+                    shared_dirs = []
+                    for path in self.shared_directories:
+                        self.logger.verbose(f"Adding shared directory: {path}")
+                        path = os.path.abspath(os.path.expanduser(path))
+                        if os.path.exists(path):
+                            # Add path in format expected by Lume API
+                            shared_dirs.append({
+                                "hostPath": path,
+                                "readOnly": False
+                            })
+                        else:
+                            self.logger.warning(f"Shared directory does not exist: {path}")
+                            
+                    # Prepare run options to pass to the provider
+                    run_opts = {}
+
+                    # Add display information if available
+                    if self.config.display is not None:
+                        display_info = {
+                            "width": self.config.display.width,
+                            "height": self.config.display.height,
+                        }
                        
-                # Prepare run options to pass to the provider
-                run_opts = {}
-
-                # Add display information if available
-                if self.config.display is not None:
-                    display_info = {
-                        "width": self.config.display.width,
-                        "height": self.config.display.height,
-                    }
-                    
-                    # Check if scale_factor exists before adding it
-                    if hasattr(self.config.display, "scale_factor"):
-                        display_info["scale_factor"] = self.config.display.scale_factor
-                    
-                    run_opts["display"] = display_info
-
-                # Add shared directories if available
-                if self.shared_directories:
-                    run_opts["shared_directories"] = shared_dirs.copy()
-
-                # Run the VM with the provider
-                try:
-                    if self.config.vm_provider is None:
-                        raise RuntimeError(f"VM provider not initialized for {self.config.name}")
+                        # Check if scale_factor exists before adding it
+                        if hasattr(self.config.display, "scale_factor"):
+                            display_info["scale_factor"] = self.config.display.scale_factor
                        
-                    # Use the complete run_opts we prepared earlier
-                    # Handle ephemeral storage for run_vm method too
-                    storage_param = "ephemeral" if self.ephemeral else self.storage
-                    
-                    # Log the image being used
-                    self.logger.info(f"Running VM using image: {self.image}")
-                    
-                    # Call provider.run_vm with explicit image parameter
-                    response = await self.config.vm_provider.run_vm(
-                        image=self.image,
-                        name=self.config.name,
-                        run_opts=run_opts,
-                        storage=storage_param
-                    )
-                    self.logger.info(f"VM run response: {response if response else 'None'}")
-                except Exception as run_error:
-                    self.logger.error(f"Failed to run VM: {run_error}")
-                    raise RuntimeError(f"Failed to start VM: {run_error}")
+                        run_opts["display"] = display_info
+
+                    # Add shared directories if available
+                    if self.shared_directories:
+                        run_opts["shared_directories"] = shared_dirs.copy()
+
+                    # Run the VM with the provider
+                    try:
+                        if self.config.vm_provider is None:
+                            raise RuntimeError(f"VM provider not initialized for {self.config.name}")
+                            
+                        # Use the complete run_opts we prepared earlier
+                        # Handle ephemeral storage for run_vm method too
+                        storage_param = "ephemeral" if self.ephemeral else self.storage
+                        
+                        # Log the image being used
+                        self.logger.info(f"Running VM using image: {self.image}")
+                        
+                        # Call provider.run_vm with explicit image parameter
+                        response = await self.config.vm_provider.run_vm(
+                            image=self.image,
+                            name=self.config.name,
+                            run_opts=run_opts,
+                            storage=storage_param
+                        )
+                        self.logger.info(f"VM run response: {response if response else 'None'}")
+                    except Exception as run_error:
+                        self.logger.error(f"Failed to run VM: {run_error}")
+                        raise RuntimeError(f"Failed to start VM: {run_error}")

                # Wait for VM to be ready with a valid IP address
                self.logger.info("Waiting for VM to be ready with a valid IP address...")
@@ -406,6 +412,9 @@ class Computer:
                raise TimeoutError(
                    f"Could not connect to WebSocket interface at {ip_address}:8000/ws: {str(e)}"
                )
+                # self.logger.warning(
+                #     f"Could not connect to WebSocket interface at {ip_address}:8000/ws: {str(e)}, expect missing functionality"
+                # )

            # Create an event to keep the VM running in background if needed
            if not self.use_host_computer_server:
--- a/libs/computer/computer/ui/gradio/app.py
+++ b/libs/computer/computer/ui/gradio/app.py
@@ -17,7 +17,7 @@ import base64
 from datetime import datetime
 from PIL import Image
 from huggingface_hub import DatasetCard, DatasetCardData
-from computer import Computer
+from computer import Computer, VMProviderType
 from gradio.components import ChatMessage
 import pandas as pd
 from datasets import Dataset, Features, Sequence, concatenate_datasets
@@ -528,21 +528,44 @@ async def execute(name, action, arguments):
    
    return results

-async def handle_init_computer():
-    """Initialize the computer instance and tools"""
+async def handle_init_computer(os_choice: str):
+    """Initialize the computer instance and tools for macOS or Ubuntu"""
    global computer, tool_call_logs, tools
-    
-    computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4")
+
+    if os_choice == "Ubuntu":
+        computer = Computer(
+            image="ubuntu-noble-vanilla:latest",
+            os_type="linux",
+            provider_type=VMProviderType.LUME,
+            display="1024x768",
+            memory="8GB",
+            cpu="4"
+        )
+        os_type_str = "linux"
+        image_str = "ubuntu-noble-vanilla:latest"
+    else:
+        computer = Computer(
+            image="macos-sequoia-cua:latest",
+            os_type="macos",
+            provider_type=VMProviderType.LUME,
+            display="1024x768",
+            memory="8GB",
+            cpu="4"
+        )
+        os_type_str = "macos"
+        image_str = "macos-sequoia-cua:latest"
+
    await computer.run()
-    
+
    # Log computer initialization as a tool call
    result = await execute("computer", "initialize", {
-        "os": "macos", 
-        "display": "1024x768", 
-        "memory": "8GB", 
+        "os": os_type_str,
+        "image": image_str,
+        "display": "1024x768",
+        "memory": "8GB",
        "cpu": "4"
    })
-    
+
    return result["screenshot"], json.dumps(tool_call_logs, indent=2)

 async def handle_screenshot():
@@ -1004,8 +1027,15 @@ def create_gradio_ui():
                            run_setup_btn = gr.Button("⚙️ Run Task Setup")
                    # Setup status textbox
                    setup_status = gr.Textbox(label="Setup Status", value="")
-                    
-                start_btn = gr.Button("Initialize Computer")
+                
+                with gr.Group():
+                    os_choice = gr.Radio(
+                        label="OS",
+                        choices=["macOS", "Ubuntu"],
+                        value="macOS",
+                        interactive=False # disable until the ubuntu image is ready
+                    )
+                    start_btn = gr.Button("Initialize Computer")
                
                with gr.Group():
                    input_text = gr.Textbox(label="Type Text")
@@ -1169,7 +1199,7 @@ def create_gradio_ui():
        )
                
        img.select(handle_click, inputs=[img, click_type], outputs=[img, action_log])
-        start_btn.click(handle_init_computer, outputs=[img, action_log])
+        start_btn.click(handle_init_computer, inputs=[os_choice], outputs=[img, action_log])
        wait_btn.click(handle_wait, outputs=[img, action_log])
        
        # DONE and FAIL buttons just do a placeholder action