Added experiment flag for now

This commit is contained in:
Dillon DuPont
2025-05-31 12:19:51 -04:00
parent feca80e793
commit 500465883d
6 changed files with 34 additions and 10 deletions

View File

@@ -97,6 +97,14 @@ class Diorama:
await automation_handler.drag_to(x, y, duration=duration)
if future:
future.set_result(None)
elif action in ["scroll_up", "scroll_down"]:
clicks = args.get("clicks", 1)
if action == "scroll_up":
await automation_handler.scroll_up(clicks)
else:
await automation_handler.scroll_down(clicks)
if future:
future.set_result(None)
# Keyboard actions
elif action == "type_text":
text = args.get("text")
@@ -198,6 +206,12 @@ class Diorama:
async def hotkey(self, keys):
await self._send_cmd("hotkey", {"keys": list(keys)})
async def scroll_up(self, clicks: int = 1):
await self._send_cmd("scroll_up", {"clicks": clicks})
async def scroll_down(self, clicks: int = 1):
await self._send_cmd("scroll_down", {"clicks": clicks})
async def get_screen_size(self) -> dict[str, int]:
if not self._scene_size:
await self.screenshot()

View File

@@ -4,10 +4,11 @@ import platform
import inspect
from computer_server.diorama.diorama import Diorama
from computer_server.diorama.base import BaseDioramaHandler
from typing import Optional
class MacOSDioramaHandler(BaseDioramaHandler):
"""Handler for Diorama commands on macOS, using local diorama module."""
async def diorama_cmd(self, action: str, arguments: dict = None) -> dict:
async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict:
if platform.system().lower() != "darwin":
return {"success": False, "error": "Diorama is only supported on macOS."}
try:

View File

@@ -55,7 +55,7 @@ class HandlerFactory:
if os_type == 'darwin':
return MacOSAccessibilityHandler(), MacOSAutomationHandler(), MacOSDioramaHandler()
else:
elif os_type == 'linux':
return LinuxAccessibilityHandler(), LinuxAutomationHandler(), BaseDioramaHandler()
else:
raise NotImplementedError(f"OS '{os_type}' is not supported")

View File

@@ -31,6 +31,7 @@ class Computer:
Returns:
DioramaComputer: A proxy object with the Diorama interface, but using diorama_cmds.
"""
assert "app-use" in self.experiments, "App Usage is an experimental feature. Enable it by passing experiments=['app-use'] to Computer()"
from .diorama_computer import DioramaComputer
return DioramaComputer(self, apps)
@@ -52,7 +53,8 @@ class Computer:
host: str = os.environ.get("PYLUME_HOST", "localhost"),
storage: Optional[str] = None,
ephemeral: bool = False,
api_key: Optional[str] = None
api_key: Optional[str] = None,
experiments: Optional[List[str]] = None
):
"""Initialize a new Computer instance.
@@ -78,6 +80,8 @@ class Computer:
host: Host to use for VM provider connections (e.g. "localhost", "host.docker.internal")
storage: Optional path for persistent VM storage (Lumier provider)
ephemeral: Whether to use ephemeral storage
api_key: Optional API key for cloud providers
experiments: Optional list of experimental features to enable (e.g. ["app-use"])
"""
self.logger = Logger("cua.computer", verbosity)
@@ -93,6 +97,7 @@ class Computer:
self.ephemeral = ephemeral
self.api_key = api_key
self.experiments = experiments or []
# The default is currently to use non-ephemeral storage
if storage and ephemeral and storage != "ephemeral":

View File

@@ -26,7 +26,6 @@ class DioramaComputerInterface:
def __init__(self, computer, apps):
self.computer = computer
self.apps = apps
self._scene_hitboxes = []
self._scene_size = None
async def _send_cmd(self, action, arguments=None):
@@ -43,11 +42,10 @@ class DioramaComputerInterface:
async def screenshot(self, as_bytes=True):
from PIL import Image
import base64
result = await self._send_cmd("screenshot")
img_bytes = result.get("image_bytes")
hitboxes = result.get("hitboxes", [])
self._scene_hitboxes = hitboxes
# Assume server returns PNG bytes
# assume result is a b64 string of an image
img_bytes = base64.b64decode(result)
import io
img = Image.open(io.BytesIO(img_bytes))
self._scene_size = img.size
@@ -70,6 +68,12 @@ class DioramaComputerInterface:
async def double_click(self, x=None, y=None):
await self._send_cmd("double_click", {"x": x, "y": y})
async def scroll_up(self, clicks=1):
await self._send_cmd("scroll_up", {"clicks": clicks})
async def scroll_down(self, clicks=1):
await self._send_cmd("scroll_down", {"clicks": clicks})
async def drag_to(self, x, y, duration=0.5):
await self._send_cmd("drag_to", {"x": x, "y": y, "duration": duration})

View File

@@ -346,7 +346,7 @@ class MacOSComputerInterface(BaseComputerInterface):
asyncio.create_task(self._ws.close())
self._ws = None
async def diorama_cmd(self, action: str, arguments: dict = None) -> dict:
async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict:
"""Send a diorama command to the server (macOS only)."""
return await self._send_command("diorama_cmd", {"action": action, "arguments": arguments or {}})