mirror of
https://github.com/trycua/computer.git
synced 2026-01-02 03:20:22 -06:00
App-usage stability fixes
This commit is contained in:
@@ -36,11 +36,21 @@ class Diorama:
|
||||
cls._ensure_scheduler()
|
||||
return cls(args).computer
|
||||
|
||||
# Dictionary to store cursor positions for each unique app_list hash
|
||||
_cursor_positions = {}
|
||||
|
||||
def __init__(self, app_list):
|
||||
self.app_list = app_list
|
||||
self.interface = self.Interface(self)
|
||||
self.computer = DioramaComputer(self)
|
||||
self.focus_context = None
|
||||
|
||||
# Create a hash for this app_list to use as a key
|
||||
self.app_list_hash = hash(tuple(sorted(app_list)))
|
||||
|
||||
# Initialize cursor position for this app_list if it doesn't exist
|
||||
if self.app_list_hash not in Diorama._cursor_positions:
|
||||
Diorama._cursor_positions[self.app_list_hash] = (0, 0)
|
||||
|
||||
@classmethod
|
||||
def _ensure_scheduler(cls):
|
||||
@@ -67,10 +77,11 @@ class Diorama:
|
||||
frontmost_app, active_app_to_use, active_app_pid = get_frontmost_and_active_app(all_windows, running_apps, app_whitelist)
|
||||
focus_context = AppActivationContext(active_app_pid, active_app_to_use, logger)
|
||||
|
||||
app_list_hash = hash(tuple(sorted(app_whitelist)))
|
||||
|
||||
with focus_context:
|
||||
try:
|
||||
if action == "screenshot":
|
||||
app_whitelist = list(args["app_list"])
|
||||
logger.info(f"Taking screenshot for apps: {app_whitelist}")
|
||||
result, img = capture_all_apps(
|
||||
app_whitelist=app_whitelist,
|
||||
@@ -82,8 +93,15 @@ class Diorama:
|
||||
future.set_result((result, img))
|
||||
# Mouse actions
|
||||
elif action in ["left_click", "right_click", "double_click", "move_cursor", "drag_to"]:
|
||||
x = args.get("x")
|
||||
y = args.get("y")
|
||||
# Get last cursor position for this app_list hash
|
||||
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
|
||||
|
||||
x = args.get("x", last_pos[0])
|
||||
y = args.get("y", last_pos[1])
|
||||
|
||||
# Update the cursor position for this app_list hash
|
||||
Diorama._cursor_positions[app_list_hash] = (x, y)
|
||||
|
||||
duration = args.get("duration", 0.5)
|
||||
if action == "left_click":
|
||||
await automation_handler.left_click(x, y)
|
||||
@@ -98,6 +116,10 @@ class Diorama:
|
||||
if future:
|
||||
future.set_result(None)
|
||||
elif action in ["scroll_up", "scroll_down"]:
|
||||
# Move cursor to last known position for this app_list hash
|
||||
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
|
||||
await automation_handler.move_cursor(*last_pos)
|
||||
|
||||
clicks = args.get("clicks", 1)
|
||||
if action == "scroll_up":
|
||||
await automation_handler.scroll_up(clicks)
|
||||
|
||||
@@ -37,7 +37,7 @@ class DioramaComputerInterface:
|
||||
raise RuntimeError("Computer interface not initialized. Call run() first.")
|
||||
result = await iface.diorama_cmd(action, arguments)
|
||||
if not result.get("success"):
|
||||
raise RuntimeError(f"Diorama command failed: {result.get('error')}")
|
||||
raise RuntimeError(f"Diorama command failed: {result.get('error')}\n{result.get('trace')}")
|
||||
return result.get("result")
|
||||
|
||||
async def screenshot(self, as_bytes=True):
|
||||
|
||||
@@ -463,7 +463,7 @@ async def execute(name, action, arguments):
|
||||
elif action == "left_click":
|
||||
if "x" in arguments and "y" in arguments:
|
||||
await computer.interface.move_cursor(arguments["x"], arguments["y"])
|
||||
await computer.interface.left_click()
|
||||
await computer.interface.left_click(arguments["x"], arguments["y"])
|
||||
await asyncio.sleep(0.5)
|
||||
elif action == "right_click":
|
||||
if "x" in arguments and "y" in arguments:
|
||||
@@ -528,43 +528,75 @@ async def execute(name, action, arguments):
|
||||
|
||||
return results
|
||||
|
||||
async def handle_init_computer(os_choice: str):
|
||||
"""Initialize the computer instance and tools for macOS or Ubuntu"""
|
||||
async def handle_init_computer(os_choice: str, app_list=None, provider="lume"):
|
||||
"""Initialize the computer instance and tools for macOS or Ubuntu
|
||||
|
||||
Args:
|
||||
os_choice: The OS to use ("macOS" or "Ubuntu")
|
||||
app_list: Optional list of apps to focus on using the app-use experiment
|
||||
provider: The provider to use ("lume" or "self")
|
||||
"""
|
||||
global computer, tool_call_logs, tools
|
||||
|
||||
|
||||
# Check if we should enable app-use experiment
|
||||
use_app_experiment = app_list and len(app_list) > 0
|
||||
experiments = ["app-use"] if use_app_experiment else None
|
||||
|
||||
# Determine if we should use host computer server
|
||||
use_host_computer_server = provider == "self"
|
||||
|
||||
if os_choice == "Ubuntu":
|
||||
computer = Computer(
|
||||
image="ubuntu-noble-vanilla:latest",
|
||||
os_type="linux",
|
||||
provider_type=VMProviderType.LUME,
|
||||
display="1024x768",
|
||||
memory="8GB",
|
||||
cpu="4"
|
||||
)
|
||||
os_type_str = "linux"
|
||||
image_str = "ubuntu-noble-vanilla:latest"
|
||||
else:
|
||||
os_type_str = "macos"
|
||||
image_str = "macos-sequoia-cua:latest"
|
||||
|
||||
# Create computer instance with appropriate configuration
|
||||
if use_host_computer_server:
|
||||
computer = Computer(
|
||||
image="macos-sequoia-cua:latest",
|
||||
os_type="macos",
|
||||
os_type=os_type_str,
|
||||
use_host_computer_server=True,
|
||||
experiments=experiments
|
||||
)
|
||||
else:
|
||||
computer = Computer(
|
||||
image=image_str,
|
||||
os_type=os_type_str,
|
||||
provider_type=VMProviderType.LUME,
|
||||
display="1024x768",
|
||||
memory="8GB",
|
||||
cpu="4"
|
||||
cpu="4",
|
||||
experiments=experiments
|
||||
)
|
||||
os_type_str = "macos"
|
||||
image_str = "macos-sequoia-cua:latest"
|
||||
|
||||
await computer.run()
|
||||
|
||||
# If app list is provided, create desktop from apps
|
||||
if use_app_experiment:
|
||||
computer = computer.create_desktop_from_apps(app_list)
|
||||
|
||||
# Log computer initialization as a tool call
|
||||
result = await execute("computer", "initialize", {
|
||||
init_params = {
|
||||
"os": os_type_str,
|
||||
"image": image_str,
|
||||
"display": "1024x768",
|
||||
"memory": "8GB",
|
||||
"cpu": "4"
|
||||
})
|
||||
"provider": provider
|
||||
}
|
||||
|
||||
# Add VM-specific parameters if not using host computer server
|
||||
if not use_host_computer_server:
|
||||
init_params.update({
|
||||
"image": image_str,
|
||||
"display": "1024x768",
|
||||
"memory": "8GB",
|
||||
"cpu": "4"
|
||||
})
|
||||
|
||||
# Add app list to the log if provided
|
||||
if use_app_experiment:
|
||||
init_params["apps"] = app_list
|
||||
init_params["experiments"] = ["app-use"]
|
||||
|
||||
result = await execute("computer", "initialize", init_params)
|
||||
|
||||
return result["screenshot"], json.dumps(tool_call_logs, indent=2)
|
||||
|
||||
@@ -1029,12 +1061,31 @@ def create_gradio_ui():
|
||||
setup_status = gr.Textbox(label="Setup Status", value="")
|
||||
|
||||
with gr.Group():
|
||||
os_choice = gr.Radio(
|
||||
label="OS",
|
||||
choices=["macOS", "Ubuntu"],
|
||||
value="macOS",
|
||||
interactive=False # disable until the ubuntu image is ready
|
||||
)
|
||||
with gr.Accordion("Computer Configuration", open=False):
|
||||
with gr.Row():
|
||||
os_choice = gr.Radio(
|
||||
label="OS",
|
||||
choices=["macOS", "Ubuntu"],
|
||||
value="macOS",
|
||||
interactive=False # disable until the ubuntu image is ready
|
||||
)
|
||||
|
||||
# Provider selection radio
|
||||
provider_choice = gr.Radio(
|
||||
label="Provider",
|
||||
choices=["lume", "self"],
|
||||
value="lume",
|
||||
info="'lume' uses a VM, 'self' uses the host computer server"
|
||||
)
|
||||
|
||||
# App filtering dropdown for app-use experiment
|
||||
app_filter = gr.Dropdown(
|
||||
label="Filter by apps (App-Use)",
|
||||
multiselect=True,
|
||||
allow_custom_value=True,
|
||||
info="When apps are selected, the computer will focus on those apps using the app-use experiment"
|
||||
)
|
||||
|
||||
start_btn = gr.Button("Initialize Computer")
|
||||
|
||||
with gr.Group():
|
||||
@@ -1199,7 +1250,7 @@ def create_gradio_ui():
|
||||
)
|
||||
|
||||
img.select(handle_click, inputs=[img, click_type], outputs=[img, action_log])
|
||||
start_btn.click(handle_init_computer, inputs=[os_choice], outputs=[img, action_log])
|
||||
start_btn.click(handle_init_computer, inputs=[os_choice, app_filter, provider_choice], outputs=[img, action_log])
|
||||
wait_btn.click(handle_wait, outputs=[img, action_log])
|
||||
|
||||
# DONE and FAIL buttons just do a placeholder action
|
||||
|
||||
Reference in New Issue
Block a user