Added reference documentation for: libs/python/computer-server/computer_server/diorama/diorama.py

This commit is contained in:
Andrei Onel
2025-09-01 22:51:53 +03:00
parent a13b51265c
commit ba2063cc22

View File

@@ -20,6 +20,12 @@ logger = logging.getLogger(__name__)
automation_handler = MacOSAutomationHandler()
class Diorama:
"""Virtual desktop manager that provides automation capabilities for macOS applications.
Manages application windows and provides an interface for taking screenshots,
mouse interactions, keyboard input, and coordinate transformations between
screenshot space and screen space.
"""
_scheduler_queue = None
_scheduler_task = None
_loop = None
@@ -27,6 +33,14 @@ class Diorama:
@classmethod
def create_from_apps(cls, *args) -> DioramaComputer:
"""Create a DioramaComputer instance from a list of application names.
Args:
*args: Variable number of application names to include in the desktop
Returns:
DioramaComputer: A computer interface for the specified applications
"""
cls._ensure_scheduler()
return cls(args).computer
@@ -34,6 +48,11 @@ class Diorama:
_cursor_positions = {}
def __init__(self, app_list):
"""Initialize a Diorama instance for the specified applications.
Args:
app_list: List of application names to manage
"""
self.app_list = app_list
self.interface = self.Interface(self)
self.computer = DioramaComputer(self)
@@ -48,6 +67,10 @@ class Diorama:
@classmethod
def _ensure_scheduler(cls):
"""Ensure the async scheduler loop is running.
Creates and starts the scheduler task if it hasn't been started yet.
"""
if not cls._scheduler_started:
logger.info("Starting Diorama scheduler loop…")
cls._scheduler_queue = asyncio.Queue()
@@ -57,6 +80,11 @@ class Diorama:
@classmethod
async def _scheduler_loop(cls):
"""Main scheduler loop that processes automation commands.
Continuously processes commands from the scheduler queue, handling
screenshots, mouse actions, keyboard input, and scrolling operations.
"""
while True:
cmd = await cls._scheduler_queue.get()
action = cmd.get("action")
@@ -144,13 +172,33 @@ class Diorama:
future.set_exception(e)
class Interface():
"""Interface for interacting with the virtual desktop.
Provides methods for taking screenshots, mouse interactions, keyboard input,
and coordinate transformations between screenshot and screen coordinates.
"""
def __init__(self, diorama):
"""Initialize the interface with a reference to the parent Diorama instance.
Args:
diorama: The parent Diorama instance
"""
self._diorama = diorama
self._scene_hitboxes = []
self._scene_size = None
async def _send_cmd(self, action, arguments=None):
"""Send a command to the scheduler queue.
Args:
action (str): The action to perform
arguments (dict, optional): Arguments for the action
Returns:
The result of the command execution
"""
Diorama._ensure_scheduler()
loop = asyncio.get_event_loop()
future = loop.create_future()
@@ -167,6 +215,14 @@ class Diorama:
return None
async def screenshot(self, as_bytes: bool = True) -> Union[str, Image.Image]:
"""Take a screenshot of the managed applications.
Args:
as_bytes (bool): If True, return base64-encoded bytes; if False, return PIL Image
Returns:
Union[str, Image.Image]: Base64-encoded PNG bytes or PIL Image object
"""
import base64
result, img = await self._send_cmd("screenshot")
self._scene_hitboxes = result.get("hitboxes", [])
@@ -184,6 +240,12 @@ class Diorama:
return img
async def left_click(self, x, y):
"""Perform a left mouse click at the specified coordinates.
Args:
x (int): X coordinate in screenshot space (or None to use last position)
y (int): Y coordinate in screenshot space (or None to use last position)
"""
# Get last cursor position for this app_list hash
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -195,6 +257,12 @@ class Diorama:
await self._send_cmd("left_click", {"x": sx, "y": sy})
async def right_click(self, x, y):
"""Perform a right mouse click at the specified coordinates.
Args:
x (int): X coordinate in screenshot space (or None to use last position)
y (int): Y coordinate in screenshot space (or None to use last position)
"""
# Get last cursor position for this app_list hash
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -206,6 +274,12 @@ class Diorama:
await self._send_cmd("right_click", {"x": sx, "y": sy})
async def double_click(self, x, y):
"""Perform a double mouse click at the specified coordinates.
Args:
x (int): X coordinate in screenshot space (or None to use last position)
y (int): Y coordinate in screenshot space (or None to use last position)
"""
# Get last cursor position for this app_list hash
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -217,6 +291,12 @@ class Diorama:
await self._send_cmd("double_click", {"x": sx, "y": sy})
async def move_cursor(self, x, y):
"""Move the mouse cursor to the specified coordinates.
Args:
x (int): X coordinate in screenshot space (or None to use last position)
y (int): Y coordinate in screenshot space (or None to use last position)
"""
# Get last cursor position for this app_list hash
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -228,6 +308,13 @@ class Diorama:
await self._send_cmd("move_cursor", {"x": sx, "y": sy})
async def drag_to(self, x, y, duration=0.5):
"""Drag the mouse from current position to the specified coordinates.
Args:
x (int): X coordinate in screenshot space (or None to use last position)
y (int): Y coordinate in screenshot space (or None to use last position)
duration (float): Duration of the drag operation in seconds
"""
# Get last cursor position for this app_list hash
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -239,18 +326,43 @@ class Diorama:
await self._send_cmd("drag_to", {"x": sx, "y": sy, "duration": duration})
async def get_cursor_position(self):
"""Get the current cursor position in screen coordinates.
Returns:
tuple: (x, y) coordinates of the cursor in screen space
"""
return await self._send_cmd("get_cursor_position")
async def type_text(self, text):
"""Type the specified text using the keyboard.
Args:
text (str): The text to type
"""
await self._send_cmd("type_text", {"text": text})
async def press_key(self, key):
"""Press a single key on the keyboard.
Args:
key (str): The key to press
"""
await self._send_cmd("press_key", {"key": key})
async def hotkey(self, keys):
"""Press a combination of keys simultaneously.
Args:
keys (list): List of keys to press together
"""
await self._send_cmd("hotkey", {"keys": list(keys)})
async def scroll_up(self, clicks: int = 1):
"""Scroll up at the current cursor position.
Args:
clicks (int): Number of scroll clicks to perform
"""
# Get last cursor position for this app_list hash
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -259,6 +371,11 @@ class Diorama:
await self._send_cmd("scroll_up", {"clicks": clicks, "x": x, "y": y})
async def scroll_down(self, clicks: int = 1):
"""Scroll down at the current cursor position.
Args:
clicks (int): Number of scroll clicks to perform
"""
# Get last cursor position for this app_list hash
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -267,6 +384,11 @@ class Diorama:
await self._send_cmd("scroll_down", {"clicks": clicks, "x": x, "y": y})
async def get_screen_size(self) -> dict[str, int]:
"""Get the size of the screenshot area.
Returns:
dict[str, int]: Dictionary with 'width' and 'height' keys
"""
if not self._scene_size:
await self.screenshot()
return { "width": self._scene_size[0], "height": self._scene_size[1] }
@@ -348,6 +470,7 @@ import pyautogui
import time
async def main():
"""Main function demonstrating Diorama usage with multiple desktops and mouse tracking."""
desktop1 = Diorama.create_from_apps(["Discord", "Notes"])
desktop2 = Diorama.create_from_apps(["Terminal"])