diff --git a/libs/python/computer-server/computer_server/handlers/windows.py b/libs/python/computer-server/computer_server/handlers/windows.py index 2d91ce53..eeb0d6bf 100644 --- a/libs/python/computer-server/computer_server/handlers/windows.py +++ b/libs/python/computer-server/computer_server/handlers/windows.py @@ -41,7 +41,14 @@ class WindowsAccessibilityHandler(BaseAccessibilityHandler): """Windows implementation of accessibility handler.""" async def get_accessibility_tree(self) -> Dict[str, Any]: - """Get the accessibility tree of the current window.""" + """Get the accessibility tree of the current window. + + Returns: + Dict[str, Any]: A dictionary containing the success status and either + the accessibility tree or an error message. + Structure: {"success": bool, "tree": dict} or + {"success": bool, "error": str} + """ if not WINDOWS_API_AVAILABLE: return {"success": False, "error": "Windows API not available"} @@ -65,6 +72,15 @@ class WindowsAccessibilityHandler(BaseAccessibilityHandler): # Enumerate child windows def enum_child_proc(hwnd_child, children_list): + """Callback function to enumerate child windows and collect their information. + + Args: + hwnd_child: Handle to the child window being enumerated. + children_list: List to append child window information to. + + Returns: + bool: True to continue enumeration, False to stop. + """ try: child_text = win32gui.GetWindowText(hwnd_child) child_rect = win32gui.GetWindowRect(hwnd_child) @@ -93,7 +109,19 @@ class WindowsAccessibilityHandler(BaseAccessibilityHandler): async def find_element(self, role: Optional[str] = None, title: Optional[str] = None, value: Optional[str] = None) -> Dict[str, Any]: - """Find an element in the accessibility tree by criteria.""" + """Find an element in the accessibility tree by criteria. + + Args: + role (Optional[str]): The role or class name of the element to find. + title (Optional[str]): The title or text of the element to find. + value (Optional[str]): The value of the element (not used in Windows implementation). + + Returns: + Dict[str, Any]: A dictionary containing the success status and either + the found element or an error message. + Structure: {"success": bool, "element": dict} or + {"success": bool, "error": str} + """ if not WINDOWS_API_AVAILABLE: return {"success": False, "error": "Windows API not available"} @@ -140,6 +168,16 @@ class WindowsAutomationHandler(BaseAutomationHandler): # Mouse Actions async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]: + """Press and hold a mouse button at the specified coordinates. + + Args: + x (Optional[int]): The x-coordinate to move to before pressing. If None, uses current position. + y (Optional[int]): The y-coordinate to move to before pressing. If None, uses current position. + button (str): The mouse button to press ("left", "right", or "middle"). + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -152,6 +190,16 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> Dict[str, Any]: + """Release a mouse button at the specified coordinates. + + Args: + x (Optional[int]): The x-coordinate to move to before releasing. If None, uses current position. + y (Optional[int]): The y-coordinate to move to before releasing. If None, uses current position. + button (str): The mouse button to release ("left", "right", or "middle"). + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -164,6 +212,15 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def move_cursor(self, x: int, y: int) -> Dict[str, Any]: + """Move the mouse cursor to the specified coordinates. + + Args: + x (int): The x-coordinate to move to. + y (int): The y-coordinate to move to. + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -174,6 +231,15 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: + """Perform a left mouse click at the specified coordinates. + + Args: + x (Optional[int]): The x-coordinate to click at. If None, clicks at current position. + y (Optional[int]): The y-coordinate to click at. If None, clicks at current position. + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -186,6 +252,15 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: + """Perform a right mouse click at the specified coordinates. + + Args: + x (Optional[int]): The x-coordinate to click at. If None, clicks at current position. + y (Optional[int]): The y-coordinate to click at. If None, clicks at current position. + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -198,6 +273,15 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: + """Perform a double left mouse click at the specified coordinates. + + Args: + x (Optional[int]): The x-coordinate to double-click at. If None, clicks at current position. + y (Optional[int]): The y-coordinate to double-click at. If None, clicks at current position. + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -210,6 +294,17 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5) -> Dict[str, Any]: + """Drag from the current position to the specified coordinates. + + Args: + x (int): The x-coordinate to drag to. + y (int): The y-coordinate to drag to. + button (str): The mouse button to use for dragging ("left", "right", or "middle"). + duration (float): The time in seconds to take for the drag operation. + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -220,6 +315,16 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> Dict[str, Any]: + """Drag the mouse through a series of coordinates. + + Args: + path (List[Tuple[int, int]]): A list of (x, y) coordinate tuples to drag through. + button (str): The mouse button to use for dragging ("left", "right", or "middle"). + duration (float): The total time in seconds for the entire drag operation. + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -240,6 +345,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): # Keyboard Actions async def key_down(self, key: str) -> Dict[str, Any]: + """Press and hold a keyboard key. + + Args: + key (str): The key to press down (e.g., 'ctrl', 'shift', 'a'). + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -250,6 +363,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def key_up(self, key: str) -> Dict[str, Any]: + """Release a keyboard key. + + Args: + key (str): The key to release (e.g., 'ctrl', 'shift', 'a'). + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -260,6 +381,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def type_text(self, text: str) -> Dict[str, Any]: + """Type the specified text. + + Args: + text (str): The text to type. + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -270,6 +399,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def press_key(self, key: str) -> Dict[str, Any]: + """Press and release a keyboard key. + + Args: + key (str): The key to press (e.g., 'enter', 'space', 'tab'). + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -280,6 +417,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def hotkey(self, keys: str) -> Dict[str, Any]: + """Press a combination of keys simultaneously. + + Args: + keys (str): The keys to press together (e.g., 'ctrl+c', 'alt+tab'). + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -291,6 +436,15 @@ class WindowsAutomationHandler(BaseAutomationHandler): # Scrolling Actions async def scroll(self, x: int, y: int) -> Dict[str, Any]: + """Scroll vertically at the current cursor position. + + Args: + x (int): Horizontal scroll amount (not used in pyautogui implementation). + y (int): Vertical scroll amount. Positive values scroll up, negative values scroll down. + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -302,6 +456,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def scroll_down(self, clicks: int = 1) -> Dict[str, Any]: + """Scroll down by the specified number of clicks. + + Args: + clicks (int): The number of scroll clicks to perform downward. + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -312,6 +474,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def scroll_up(self, clicks: int = 1) -> Dict[str, Any]: + """Scroll up by the specified number of clicks. + + Args: + clicks (int): The number of scroll clicks to perform upward. + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -323,6 +493,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): # Screen Actions async def screenshot(self) -> Dict[str, Any]: + """Capture a screenshot of the entire screen. + + Returns: + Dict[str, Any]: A dictionary containing the success status and either + base64-encoded image data or an error message. + Structure: {"success": bool, "image_data": str} or + {"success": bool, "error": str} + """ if not pyautogui: return {"success": False, "error": "pyautogui not available"} @@ -341,6 +519,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": f"Screenshot error: {str(e)}"} async def get_screen_size(self) -> Dict[str, Any]: + """Get the size of the screen in pixels. + + Returns: + Dict[str, Any]: A dictionary containing the success status and either + screen size information or an error message. + Structure: {"success": bool, "size": {"width": int, "height": int}} or + {"success": bool, "error": str} + """ try: if pyautogui: size = pyautogui.size() @@ -356,6 +542,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def get_cursor_position(self) -> Dict[str, Any]: + """Get the current position of the mouse cursor. + + Returns: + Dict[str, Any]: A dictionary containing the success status and either + cursor position or an error message. + Structure: {"success": bool, "position": {"x": int, "y": int}} or + {"success": bool, "error": str} + """ try: if pyautogui: pos = pyautogui.position() @@ -371,6 +565,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): # Clipboard Actions async def copy_to_clipboard(self) -> Dict[str, Any]: + """Get the current content of the clipboard. + + Returns: + Dict[str, Any]: A dictionary containing the success status and either + clipboard content or an error message. + Structure: {"success": bool, "content": str} or + {"success": bool, "error": str} + """ try: import pyperclip content = pyperclip.paste() @@ -379,6 +581,14 @@ class WindowsAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def set_clipboard(self, text: str) -> Dict[str, Any]: + """Set the clipboard content to the specified text. + + Args: + text (str): The text to copy to the clipboard. + + Returns: + Dict[str, Any]: A dictionary with success status and optional error message. + """ try: import pyperclip pyperclip.copy(text) @@ -388,6 +598,17 @@ class WindowsAutomationHandler(BaseAutomationHandler): # Command Execution async def run_command(self, command: str) -> Dict[str, Any]: + """Execute a shell command asynchronously. + + Args: + command (str): The shell command to execute. + + Returns: + Dict[str, Any]: A dictionary containing the success status and either + command output or an error message. + Structure: {"success": bool, "stdout": str, "stderr": str, "return_code": int} or + {"success": bool, "error": str} + """ try: # Create subprocess process = await asyncio.create_subprocess_shell(