mirror of
https://github.com/trycua/computer.git
synced 2026-05-03 05:31:06 -05:00
add get_keyboard_focus to computer_server
This commit is contained in:
@@ -16,6 +16,11 @@ class BaseAccessibilityHandler(ABC):
|
||||
"""Find an element in the accessibility tree by criteria."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_keyboard_focus(self) -> Dict[str, Any]:
|
||||
"""Get the currently focused UI element."""
|
||||
pass
|
||||
|
||||
class BaseAutomationHandler(ABC):
|
||||
"""Abstract base class for OS-specific automation handlers.
|
||||
|
||||
|
||||
@@ -33,6 +33,8 @@ from ApplicationServices import (
|
||||
AXValueGetValue, # type: ignore
|
||||
kAXVisibleChildrenAttribute, # type: ignore
|
||||
kAXRoleDescriptionAttribute, # type: ignore
|
||||
kAXFocusedApplicationAttribute, # type: ignore
|
||||
kAXFocusedUIElementAttribute, # type: ignore
|
||||
)
|
||||
import objc
|
||||
import re
|
||||
@@ -514,6 +516,68 @@ class MacOSAccessibilityHandler(BaseAccessibilityHandler):
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def get_keyboard_focus(self) -> Dict[str, Any]:
|
||||
"""Get the coordinates of the currently focused UI element.
|
||||
|
||||
Returns:
|
||||
A dictionary with success status and coordinates if found.
|
||||
"""
|
||||
try:
|
||||
# Create system-wide accessibility object
|
||||
system = AXUIElementCreateSystemWide()
|
||||
|
||||
# Get focused application
|
||||
err, focused_app = AXUIElementCopyAttributeValue(system, kAXFocusedApplicationAttribute, None)
|
||||
if err != kAXErrorSuccess or not focused_app:
|
||||
return {"success": False, "error": "Could not get focused application"}
|
||||
|
||||
# Get focused UI element
|
||||
err, focused_element = AXUIElementCopyAttributeValue(focused_app, kAXFocusedUIElementAttribute, None)
|
||||
if err != kAXErrorSuccess or not focused_element:
|
||||
return {"success": False, "error": "Could not get focused UI element"}
|
||||
|
||||
# Get position of focused element
|
||||
position = self.get_ax_attribute(focused_element, kAXPositionAttribute)
|
||||
if not position:
|
||||
return {"success": False, "error": "Could not get position of focused element"}
|
||||
|
||||
# Get size of focused element
|
||||
size = self.get_ax_attribute(focused_element, kAXSizeAttribute)
|
||||
if not size:
|
||||
return {"success": False, "error": "Could not get size of focused element"}
|
||||
|
||||
# Convert position to point
|
||||
position_point = element_value(position, kAXValueCGPointType)
|
||||
if not position_point:
|
||||
return {"success": False, "error": "Could not convert position to point"}
|
||||
|
||||
# Convert size to CGSize
|
||||
size_value = element_value(size, kAXValueCGSizeType)
|
||||
if not size_value:
|
||||
return {"success": False, "error": "Could not convert size to CGSize"}
|
||||
|
||||
# Calculate center point of the element
|
||||
center_x = position_point.x + (size_value.width / 2)
|
||||
center_y = position_point.y + (size_value.height / 2)
|
||||
|
||||
# Get additional information about the focused element
|
||||
role = self.get_ax_attribute(focused_element, kAXRoleAttribute)
|
||||
title = self.get_ax_attribute(focused_element, kAXTitleAttribute)
|
||||
value = self.get_ax_attribute(focused_element, kAXValueAttribute)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"position": {"x": position_point.x, "y": position_point.y},
|
||||
"size": {"width": size_value.width, "height": size_value.height},
|
||||
"center": {"x": center_x, "y": center_y},
|
||||
"role": role,
|
||||
"title": title,
|
||||
"value": value
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
class MacOSAutomationHandler(BaseAutomationHandler):
|
||||
|
||||
@@ -54,6 +54,7 @@ async def websocket_endpoint(websocket: WebSocket):
|
||||
# Accessibility commands
|
||||
"get_accessibility_tree": manager.accessibility_handler.get_accessibility_tree,
|
||||
"find_element": manager.accessibility_handler.find_element,
|
||||
"get_keyboard_focus": manager.accessibility_handler.get_keyboard_focus,
|
||||
# Automation commands
|
||||
"screenshot": manager.automation_handler.screenshot,
|
||||
"left_click": manager.automation_handler.left_click,
|
||||
|
||||
@@ -173,6 +173,11 @@ class BaseComputerInterface(ABC):
|
||||
async def get_accessibility_tree(self) -> Dict:
|
||||
"""Get the accessibility tree of the current screen."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_keyboard_focus(self) -> Dict:
|
||||
"""Get the currently focused UI element."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
|
||||
|
||||
@@ -532,6 +532,13 @@ class MacOSComputerInterface(BaseComputerInterface):
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get accessibility tree"))
|
||||
return result
|
||||
|
||||
async def get_keyboard_focus(self) -> Dict[str, Any]:
|
||||
"""Get the currently focused UI element."""
|
||||
result = await self._send_command("get_keyboard_focus")
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get keyboard focus"))
|
||||
return result
|
||||
|
||||
async def get_active_window_bounds(self) -> Dict[str, int]:
|
||||
"""Get the bounds of the currently active window."""
|
||||
|
||||
Reference in New Issue
Block a user