Merge pull request #179 from trycua/fix/computer/linux

[Computer] Fix hotkeys and computer initialization
This commit is contained in:
f-trycua
2025-05-15 18:35:11 +02:00
committed by GitHub
3 changed files with 113 additions and 74 deletions

View File

@@ -168,7 +168,7 @@ class LinuxAutomationHandler(BaseAutomationHandler):
except Exception as e:
return {"success": False, "error": str(e)}
async def hotkey(self, *keys: str) -> Dict[str, Any]:
async def hotkey(self, keys: List[str]) -> Dict[str, Any]:
try:
pyautogui.hotkey(*keys)
return {"success": True}

View File

@@ -78,11 +78,11 @@ class Computer:
self.provider_type = provider_type
self.ephemeral = ephemeral
if ephemeral:
self.storage = "ephemeral"
else:
self.storage = storage
# The default is currently to use non-ephemeral storage
if storage and ephemeral and storage != "ephemeral":
raise ValueError("Storage path and ephemeral flag cannot be used together")
self.storage = "ephemeral" if ephemeral else storage
# For Lumier provider, store the first shared directory path to use
# for VM file sharing
self.shared_path = None
@@ -279,12 +279,14 @@ class Computer:
raise RuntimeError(f"Failed to initialize VM provider: {e}")
# Check if VM exists or create it
is_running = False
try:
if self.config.vm_provider is None:
raise RuntimeError(f"VM provider not initialized for {self.config.name}")
vm = await self.config.vm_provider.get_vm(self.config.name)
self.logger.verbose(f"Found existing VM: {self.config.name}")
is_running = vm.get("status") == "running"
except Exception as e:
self.logger.error(f"VM not found: {self.config.name}")
self.logger.error(f"Error: {e}")
@@ -292,63 +294,67 @@ class Computer:
f"VM {self.config.name} could not be found or created."
)
# Convert paths to dictionary format for shared directories
shared_dirs = []
for path in self.shared_directories:
self.logger.verbose(f"Adding shared directory: {path}")
path = os.path.abspath(os.path.expanduser(path))
if os.path.exists(path):
# Add path in format expected by Lume API
shared_dirs.append({
"hostPath": path,
"readOnly": False
})
else:
self.logger.warning(f"Shared directory does not exist: {path}")
# Start the VM if it's not running
if not is_running:
self.logger.info(f"VM {self.config.name} is not running, starting it...")
# Convert paths to dictionary format for shared directories
shared_dirs = []
for path in self.shared_directories:
self.logger.verbose(f"Adding shared directory: {path}")
path = os.path.abspath(os.path.expanduser(path))
if os.path.exists(path):
# Add path in format expected by Lume API
shared_dirs.append({
"hostPath": path,
"readOnly": False
})
else:
self.logger.warning(f"Shared directory does not exist: {path}")
# Prepare run options to pass to the provider
run_opts = {}
# Add display information if available
if self.config.display is not None:
display_info = {
"width": self.config.display.width,
"height": self.config.display.height,
}
# Prepare run options to pass to the provider
run_opts = {}
# Add display information if available
if self.config.display is not None:
display_info = {
"width": self.config.display.width,
"height": self.config.display.height,
}
# Check if scale_factor exists before adding it
if hasattr(self.config.display, "scale_factor"):
display_info["scale_factor"] = self.config.display.scale_factor
run_opts["display"] = display_info
# Add shared directories if available
if self.shared_directories:
run_opts["shared_directories"] = shared_dirs.copy()
# Run the VM with the provider
try:
if self.config.vm_provider is None:
raise RuntimeError(f"VM provider not initialized for {self.config.name}")
# Check if scale_factor exists before adding it
if hasattr(self.config.display, "scale_factor"):
display_info["scale_factor"] = self.config.display.scale_factor
# Use the complete run_opts we prepared earlier
# Handle ephemeral storage for run_vm method too
storage_param = "ephemeral" if self.ephemeral else self.storage
# Log the image being used
self.logger.info(f"Running VM using image: {self.image}")
# Call provider.run_vm with explicit image parameter
response = await self.config.vm_provider.run_vm(
image=self.image,
name=self.config.name,
run_opts=run_opts,
storage=storage_param
)
self.logger.info(f"VM run response: {response if response else 'None'}")
except Exception as run_error:
self.logger.error(f"Failed to run VM: {run_error}")
raise RuntimeError(f"Failed to start VM: {run_error}")
run_opts["display"] = display_info
# Add shared directories if available
if self.shared_directories:
run_opts["shared_directories"] = shared_dirs.copy()
# Run the VM with the provider
try:
if self.config.vm_provider is None:
raise RuntimeError(f"VM provider not initialized for {self.config.name}")
# Use the complete run_opts we prepared earlier
# Handle ephemeral storage for run_vm method too
storage_param = "ephemeral" if self.ephemeral else self.storage
# Log the image being used
self.logger.info(f"Running VM using image: {self.image}")
# Call provider.run_vm with explicit image parameter
response = await self.config.vm_provider.run_vm(
image=self.image,
name=self.config.name,
run_opts=run_opts,
storage=storage_param
)
self.logger.info(f"VM run response: {response if response else 'None'}")
except Exception as run_error:
self.logger.error(f"Failed to run VM: {run_error}")
raise RuntimeError(f"Failed to start VM: {run_error}")
# Wait for VM to be ready with a valid IP address
self.logger.info("Waiting for VM to be ready with a valid IP address...")
@@ -406,6 +412,9 @@ class Computer:
raise TimeoutError(
f"Could not connect to WebSocket interface at {ip_address}:8000/ws: {str(e)}"
)
# self.logger.warning(
# f"Could not connect to WebSocket interface at {ip_address}:8000/ws: {str(e)}, expect missing functionality"
# )
# Create an event to keep the VM running in background if needed
if not self.use_host_computer_server:

View File

@@ -17,7 +17,7 @@ import base64
from datetime import datetime
from PIL import Image
from huggingface_hub import DatasetCard, DatasetCardData
from computer import Computer
from computer import Computer, VMProviderType
from gradio.components import ChatMessage
import pandas as pd
from datasets import Dataset, Features, Sequence, concatenate_datasets
@@ -528,21 +528,44 @@ async def execute(name, action, arguments):
return results
async def handle_init_computer():
"""Initialize the computer instance and tools"""
async def handle_init_computer(os_choice: str):
"""Initialize the computer instance and tools for macOS or Ubuntu"""
global computer, tool_call_logs, tools
computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4")
if os_choice == "Ubuntu":
computer = Computer(
image="ubuntu-noble-vanilla:latest",
os_type="linux",
provider_type=VMProviderType.LUME,
display="1024x768",
memory="8GB",
cpu="4"
)
os_type_str = "linux"
image_str = "ubuntu-noble-vanilla:latest"
else:
computer = Computer(
image="macos-sequoia-cua:latest",
os_type="macos",
provider_type=VMProviderType.LUME,
display="1024x768",
memory="8GB",
cpu="4"
)
os_type_str = "macos"
image_str = "macos-sequoia-cua:latest"
await computer.run()
# Log computer initialization as a tool call
result = await execute("computer", "initialize", {
"os": "macos",
"display": "1024x768",
"memory": "8GB",
"os": os_type_str,
"image": image_str,
"display": "1024x768",
"memory": "8GB",
"cpu": "4"
})
return result["screenshot"], json.dumps(tool_call_logs, indent=2)
async def handle_screenshot():
@@ -1004,8 +1027,15 @@ def create_gradio_ui():
run_setup_btn = gr.Button("⚙️ Run Task Setup")
# Setup status textbox
setup_status = gr.Textbox(label="Setup Status", value="")
start_btn = gr.Button("Initialize Computer")
with gr.Group():
os_choice = gr.Radio(
label="OS",
choices=["macOS", "Ubuntu"],
value="macOS",
interactive=False # disable until the ubuntu image is ready
)
start_btn = gr.Button("Initialize Computer")
with gr.Group():
input_text = gr.Textbox(label="Type Text")
@@ -1169,7 +1199,7 @@ def create_gradio_ui():
)
img.select(handle_click, inputs=[img, click_type], outputs=[img, action_log])
start_btn.click(handle_init_computer, outputs=[img, action_log])
start_btn.click(handle_init_computer, inputs=[os_choice], outputs=[img, action_log])
wait_btn.click(handle_wait, outputs=[img, action_log])
# DONE and FAIL buttons just do a placeholder action