add get_keyboard_focus to computer_server

This commit is contained in:
Dillon DuPont
2025-05-11 17:35:49 -04:00
parent d4f623efdf
commit fffe51be1b
5 changed files with 82 additions and 0 deletions
@@ -16,6 +16,11 @@ class BaseAccessibilityHandler(ABC):
"""Find an element in the accessibility tree by criteria."""
pass
@abstractmethod
async def get_keyboard_focus(self) -> Dict[str, Any]:
"""Get the currently focused UI element."""
pass
class BaseAutomationHandler(ABC):
"""Abstract base class for OS-specific automation handlers.
@@ -33,6 +33,8 @@ from ApplicationServices import (
AXValueGetValue, # type: ignore
kAXVisibleChildrenAttribute, # type: ignore
kAXRoleDescriptionAttribute, # type: ignore
kAXFocusedApplicationAttribute, # type: ignore
kAXFocusedUIElementAttribute, # type: ignore
)
import objc
import re
@@ -514,6 +516,68 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler):
except Exception as e:
return {"success": False, "error": str(e)}
async def get_keyboard_focus(self) -> Dict[str, Any]:
"""Get the coordinates of the currently focused UI element.
Returns:
A dictionary with success status and coordinates if found.
"""
try:
# Create system-wide accessibility object
system = AXUIElementCreateSystemWide()
# Get focused application
err, focused_app = AXUIElementCopyAttributeValue(system, kAXFocusedApplicationAttribute, None)
if err != kAXErrorSuccess or not focused_app:
return {"success": False, "error": "Could not get focused application"}
# Get focused UI element
err, focused_element = AXUIElementCopyAttributeValue(focused_app, kAXFocusedUIElementAttribute, None)
if err != kAXErrorSuccess or not focused_element:
return {"success": False, "error": "Could not get focused UI element"}
# Get position of focused element
position = self.get_ax_attribute(focused_element, kAXPositionAttribute)
if not position:
return {"success": False, "error": "Could not get position of focused element"}
# Get size of focused element
size = self.get_ax_attribute(focused_element, kAXSizeAttribute)
if not size:
return {"success": False, "error": "Could not get size of focused element"}
# Convert position to point
position_point = element_value(position, kAXValueCGPointType)
if not position_point:
return {"success": False, "error": "Could not convert position to point"}
# Convert size to CGSize
size_value = element_value(size, kAXValueCGSizeType)
if not size_value:
return {"success": False, "error": "Could not convert size to CGSize"}
# Calculate center point of the element
center_x = position_point.x + (size_value.width / 2)
center_y = position_point.y + (size_value.height / 2)
# Get additional information about the focused element
role = self.get_ax_attribute(focused_element, kAXRoleAttribute)
title = self.get_ax_attribute(focused_element, kAXTitleAttribute)
value = self.get_ax_attribute(focused_element, kAXValueAttribute)
return {
"success": True,
"position": {"x": position_point.x, "y": position_point.y},
"size": {"width": size_value.width, "height": size_value.height},
"center": {"x": center_x, "y": center_y},
"role": role,
"title": title,
"value": value
}
except Exception as e:
return {"success": False, "error": str(e)}
class MacOSAutomationHandler(BaseAutomationHandler):
@@ -54,6 +54,7 @@ async def websocket_endpoint(websocket: WebSocket):
# Accessibility commands
"get_accessibility_tree": manager.accessibility_handler.get_accessibility_tree,
"find_element": manager.accessibility_handler.find_element,
"get_keyboard_focus": manager.accessibility_handler.get_keyboard_focus,
# Automation commands
"screenshot": manager.automation_handler.screenshot,
"left_click": manager.automation_handler.left_click,
+5
View File
@@ -173,6 +173,11 @@ class BaseComputerInterface(ABC):
async def get_accessibility_tree(self) -> Dict:
"""Get the accessibility tree of the current screen."""
pass
@abstractmethod
async def get_keyboard_focus(self) -> Dict:
"""Get the currently focused UI element."""
pass
@abstractmethod
async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
@@ -532,6 +532,13 @@ class MacOSComputerInterface(BaseComputerInterface):
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to get accessibility tree"))
return result
async def get_keyboard_focus(self) -> Dict[str, Any]:
"""Get the currently focused UI element."""
result = await self._send_command("get_keyboard_focus")
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to get keyboard focus"))
return result
async def get_active_window_bounds(self) -> Dict[str, int]:
"""Get the bounds of the currently active window."""