diff --git a/libs/agent/agent/providers/anthropic/tools/computer.py b/libs/agent/agent/providers/anthropic/tools/computer.py index ecf232bd..2bb944ea 100644 --- a/libs/agent/agent/providers/anthropic/tools/computer.py +++ b/libs/agent/agent/providers/anthropic/tools/computer.py @@ -478,17 +478,11 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool): if direction == "down": # Scroll down (Page Down on macOS) self.logger.info(f"Scrolling down, amount: {amount}") - # Use fn+down for page down on macOS - for _ in range(amount): - await self.computer.interface.hotkey("fn", "down") - await asyncio.sleep(0.1) + await self.computer.interface.scroll_down(amount) else: # Scroll up (Page Up on macOS) self.logger.info(f"Scrolling up, amount: {amount}") - # Use fn+up for page up on macOS - for _ in range(amount): - await self.computer.interface.hotkey("fn", "up") - await asyncio.sleep(0.1) + await self.computer.interface.scroll_up(amount) # Wait briefly for UI changes await asyncio.sleep(0.5) diff --git a/libs/computer/computer/diorama_computer.py b/libs/computer/computer/diorama_computer.py index 5cad0006..dfb541b9 100644 --- a/libs/computer/computer/diorama_computer.py +++ b/libs/computer/computer/diorama_computer.py @@ -87,7 +87,17 @@ class DioramaComputerInterface: await self._send_cmd("press_key", {"key": key}) async def hotkey(self, *keys): - await self._send_cmd("hotkey", {"keys": list(keys)}) + actual_keys = [] + for key in keys: + if isinstance(key, Key): + actual_keys.append(key.value) + elif isinstance(key, str): + # Try to convert to enum if it matches a known key + key_or_enum = Key.from_string(key) + actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum) + else: + raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.") + await self._send_cmd("hotkey", {"keys": actual_keys}) async def to_screen_coordinates(self, x, y): return await self._send_cmd("to_screen_coordinates", {"x": x, "y": y})