diff --git a/libs/python/computer/computer/diorama_computer.py b/libs/python/computer/computer/diorama_computer.py index 2eee77f0..da67c72c 100644 --- a/libs/python/computer/computer/diorama_computer.py +++ b/libs/python/computer/computer/diorama_computer.py @@ -6,16 +6,35 @@ class DioramaComputer: A Computer-compatible proxy for Diorama that sends commands over the ComputerInterface. """ def __init__(self, computer, apps): + """ + Initialize the DioramaComputer with a computer instance and list of apps. + + Args: + computer: The computer instance to proxy commands through + apps: List of applications available in the diorama environment + """ self.computer = computer self.apps = apps self.interface = DioramaComputerInterface(computer, apps) self._initialized = False async def __aenter__(self): + """ + Async context manager entry point. + + Returns: + self: The DioramaComputer instance + """ self._initialized = True return self async def run(self): + """ + Initialize and run the DioramaComputer if not already initialized. + + Returns: + self: The DioramaComputer instance + """ if not self._initialized: await self.__aenter__() return self @@ -25,11 +44,31 @@ class DioramaComputerInterface: Diorama Interface proxy that sends diorama_cmds via the Computer's interface. """ def __init__(self, computer, apps): + """ + Initialize the DioramaComputerInterface. + + Args: + computer: The computer instance to send commands through + apps: List of applications available in the diorama environment + """ self.computer = computer self.apps = apps self._scene_size = None async def _send_cmd(self, action, arguments=None): + """ + Send a command to the diorama interface through the computer. + + Args: + action (str): The action/command to execute + arguments (dict, optional): Additional arguments for the command + + Returns: + The result from the diorama command execution + + Raises: + RuntimeError: If the computer interface is not initialized or command fails + """ arguments = arguments or {} arguments = {"app_list": self.apps, **arguments} # Use the computer's interface (must be initialized) @@ -42,6 +81,15 @@ class DioramaComputerInterface: return result.get("result") async def screenshot(self, as_bytes=True): + """ + Take a screenshot of the diorama scene. + + Args: + as_bytes (bool): If True, return image as bytes; if False, return PIL Image object + + Returns: + bytes or PIL.Image: Screenshot data in the requested format + """ from PIL import Image import base64 result = await self._send_cmd("screenshot") @@ -53,41 +101,122 @@ class DioramaComputerInterface: return img_bytes if as_bytes else img async def get_screen_size(self): + """ + Get the dimensions of the diorama scene. + + Returns: + dict: Dictionary containing 'width' and 'height' keys with pixel dimensions + """ if not self._scene_size: await self.screenshot(as_bytes=False) return {"width": self._scene_size[0], "height": self._scene_size[1]} async def move_cursor(self, x, y): + """ + Move the cursor to the specified coordinates. + + Args: + x (int): X coordinate to move cursor to + y (int): Y coordinate to move cursor to + """ await self._send_cmd("move_cursor", {"x": x, "y": y}) async def left_click(self, x=None, y=None): + """ + Perform a left mouse click at the specified coordinates or current cursor position. + + Args: + x (int, optional): X coordinate to click at. If None, clicks at current cursor position + y (int, optional): Y coordinate to click at. If None, clicks at current cursor position + """ await self._send_cmd("left_click", {"x": x, "y": y}) async def right_click(self, x=None, y=None): + """ + Perform a right mouse click at the specified coordinates or current cursor position. + + Args: + x (int, optional): X coordinate to click at. If None, clicks at current cursor position + y (int, optional): Y coordinate to click at. If None, clicks at current cursor position + """ await self._send_cmd("right_click", {"x": x, "y": y}) async def double_click(self, x=None, y=None): + """ + Perform a double mouse click at the specified coordinates or current cursor position. + + Args: + x (int, optional): X coordinate to double-click at. If None, clicks at current cursor position + y (int, optional): Y coordinate to double-click at. If None, clicks at current cursor position + """ await self._send_cmd("double_click", {"x": x, "y": y}) async def scroll_up(self, clicks=1): + """ + Scroll up by the specified number of clicks. + + Args: + clicks (int): Number of scroll clicks to perform upward. Defaults to 1 + """ await self._send_cmd("scroll_up", {"clicks": clicks}) async def scroll_down(self, clicks=1): + """ + Scroll down by the specified number of clicks. + + Args: + clicks (int): Number of scroll clicks to perform downward. Defaults to 1 + """ await self._send_cmd("scroll_down", {"clicks": clicks}) async def drag_to(self, x, y, duration=0.5): + """ + Drag from the current cursor position to the specified coordinates. + + Args: + x (int): X coordinate to drag to + y (int): Y coordinate to drag to + duration (float): Duration of the drag operation in seconds. Defaults to 0.5 + """ await self._send_cmd("drag_to", {"x": x, "y": y, "duration": duration}) async def get_cursor_position(self): + """ + Get the current cursor position. + + Returns: + dict: Dictionary containing the current cursor coordinates + """ return await self._send_cmd("get_cursor_position") async def type_text(self, text): + """ + Type the specified text at the current cursor position. + + Args: + text (str): The text to type + """ await self._send_cmd("type_text", {"text": text}) async def press_key(self, key): + """ + Press a single key. + + Args: + key: The key to press + """ await self._send_cmd("press_key", {"key": key}) async def hotkey(self, *keys): + """ + Press multiple keys simultaneously as a hotkey combination. + + Args: + *keys: Variable number of keys to press together. Can be Key enum instances or strings + + Raises: + ValueError: If any key is not a Key enum or string type + """ actual_keys = [] for key in keys: if isinstance(key, Key): @@ -101,4 +230,14 @@ class DioramaComputerInterface: await self._send_cmd("hotkey", {"keys": actual_keys}) async def to_screen_coordinates(self, x, y): + """ + Convert coordinates to screen coordinates. + + Args: + x (int): X coordinate to convert + y (int): Y coordinate to convert + + Returns: + dict: Dictionary containing the converted screen coordinates + """ return await self._send_cmd("to_screen_coordinates", {"x": x, "y": y})