mirror of
https://github.com/trycua/computer.git
synced 2025-12-31 18:40:04 -06:00
add global and local delay control (#308)
This commit is contained in:
@@ -24,6 +24,9 @@ class BaseComputerInterface(ABC):
|
||||
self.api_key = api_key
|
||||
self.vm_name = vm_name
|
||||
self.logger = Logger("cua.interface", LogLevel.NORMAL)
|
||||
|
||||
# Optional default delay time between commands (in seconds)
|
||||
self.delay: float = 0.0
|
||||
|
||||
@abstractmethod
|
||||
async def wait_for_ready(self, timeout: int = 60) -> None:
|
||||
@@ -52,37 +55,75 @@ class BaseComputerInterface(ABC):
|
||||
|
||||
# Mouse Actions
|
||||
@abstractmethod
|
||||
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
|
||||
"""Press and hold a mouse button."""
|
||||
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None:
|
||||
"""Press and hold a mouse button.
|
||||
|
||||
Args:
|
||||
x: X coordinate to press at. If None, uses current cursor position.
|
||||
y: Y coordinate to press at. If None, uses current cursor position.
|
||||
button: Mouse button to press ('left', 'middle', 'right').
|
||||
delay: Optional delay in seconds after the action
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
|
||||
"""Release a mouse button."""
|
||||
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None:
|
||||
"""Release a mouse button.
|
||||
|
||||
Args:
|
||||
x: X coordinate to release at. If None, uses current cursor position.
|
||||
y: Y coordinate to release at. If None, uses current cursor position.
|
||||
button: Mouse button to release ('left', 'middle', 'right').
|
||||
delay: Optional delay in seconds after the action
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
||||
"""Perform a left click."""
|
||||
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
||||
"""Perform a left mouse button click.
|
||||
|
||||
Args:
|
||||
x: X coordinate to click at. If None, uses current cursor position.
|
||||
y: Y coordinate to click at. If None, uses current cursor position.
|
||||
delay: Optional delay in seconds after the action
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
||||
"""Perform a right click."""
|
||||
async def right_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
||||
"""Perform a right mouse button click.
|
||||
|
||||
Args:
|
||||
x: X coordinate to click at. If None, uses current cursor position.
|
||||
y: Y coordinate to click at. If None, uses current cursor position.
|
||||
delay: Optional delay in seconds after the action
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
||||
"""Perform a double click."""
|
||||
async def double_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
||||
"""Perform a double left mouse button click.
|
||||
|
||||
Args:
|
||||
x: X coordinate to double-click at. If None, uses current cursor position.
|
||||
y: Y coordinate to double-click at. If None, uses current cursor position.
|
||||
delay: Optional delay in seconds after the action
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def move_cursor(self, x: int, y: int) -> None:
|
||||
"""Move the cursor to specified position."""
|
||||
async def move_cursor(self, x: int, y: int, delay: Optional[float] = None) -> None:
|
||||
"""Move the cursor to the specified screen coordinates.
|
||||
|
||||
Args:
|
||||
x: X coordinate to move cursor to.
|
||||
y: Y coordinate to move cursor to.
|
||||
delay: Optional delay in seconds after the action
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5) -> None:
|
||||
async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
|
||||
"""Drag from current position to specified coordinates.
|
||||
|
||||
Args:
|
||||
@@ -90,60 +131,103 @@ class BaseComputerInterface(ABC):
|
||||
y: The y coordinate to drag to
|
||||
button: The mouse button to use ('left', 'middle', 'right')
|
||||
duration: How long the drag should take in seconds
|
||||
delay: Optional delay in seconds after the action
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
|
||||
async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
|
||||
"""Drag the cursor along a path of coordinates.
|
||||
|
||||
Args:
|
||||
path: List of (x, y) coordinate tuples defining the drag path
|
||||
button: The mouse button to use ('left', 'middle', 'right')
|
||||
duration: Total time in seconds that the drag operation should take
|
||||
delay: Optional delay in seconds after the action
|
||||
"""
|
||||
pass
|
||||
|
||||
# Keyboard Actions
|
||||
@abstractmethod
|
||||
async def key_down(self, key: str) -> None:
|
||||
"""Press and hold a key."""
|
||||
async def key_down(self, key: str, delay: Optional[float] = None) -> None:
|
||||
"""Press and hold a key.
|
||||
|
||||
Args:
|
||||
key: The key to press and hold (e.g., 'a', 'shift', 'ctrl').
|
||||
delay: Optional delay in seconds after the action.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def key_up(self, key: str) -> None:
|
||||
"""Release a key."""
|
||||
async def key_up(self, key: str, delay: Optional[float] = None) -> None:
|
||||
"""Release a previously pressed key.
|
||||
|
||||
Args:
|
||||
key: The key to release (e.g., 'a', 'shift', 'ctrl').
|
||||
delay: Optional delay in seconds after the action.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def type_text(self, text: str) -> None:
|
||||
"""Type the specified text."""
|
||||
async def type_text(self, text: str, delay: Optional[float] = None) -> None:
|
||||
"""Type the specified text string.
|
||||
|
||||
Args:
|
||||
text: The text string to type.
|
||||
delay: Optional delay in seconds after the action.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def press_key(self, key: str) -> None:
|
||||
"""Press a single key."""
|
||||
async def press_key(self, key: str, delay: Optional[float] = None) -> None:
|
||||
"""Press and release a single key.
|
||||
|
||||
Args:
|
||||
key: The key to press (e.g., 'a', 'enter', 'escape').
|
||||
delay: Optional delay in seconds after the action.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def hotkey(self, *keys: str) -> None:
|
||||
"""Press multiple keys simultaneously."""
|
||||
async def hotkey(self, *keys: str, delay: Optional[float] = None) -> None:
|
||||
"""Press multiple keys simultaneously (keyboard shortcut).
|
||||
|
||||
Args:
|
||||
*keys: Variable number of keys to press together (e.g., 'ctrl', 'c').
|
||||
delay: Optional delay in seconds after the action.
|
||||
"""
|
||||
pass
|
||||
|
||||
# Scrolling Actions
|
||||
@abstractmethod
|
||||
async def scroll(self, x: int, y: int) -> None:
|
||||
"""Scroll the mouse wheel."""
|
||||
async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None:
|
||||
"""Scroll the mouse wheel by specified amounts.
|
||||
|
||||
Args:
|
||||
x: Horizontal scroll amount (positive = right, negative = left).
|
||||
y: Vertical scroll amount (positive = up, negative = down).
|
||||
delay: Optional delay in seconds after the action.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def scroll_down(self, clicks: int = 1) -> None:
|
||||
"""Scroll down."""
|
||||
async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None:
|
||||
"""Scroll down by the specified number of clicks.
|
||||
|
||||
Args:
|
||||
clicks: Number of scroll clicks to perform downward.
|
||||
delay: Optional delay in seconds after the action.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def scroll_up(self, clicks: int = 1) -> None:
|
||||
"""Scroll up."""
|
||||
async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None:
|
||||
"""Scroll up by the specified number of clicks.
|
||||
|
||||
Args:
|
||||
clicks: Number of scroll clicks to perform upward.
|
||||
delay: Optional delay in seconds after the action.
|
||||
"""
|
||||
pass
|
||||
|
||||
# Screen Actions
|
||||
@@ -167,44 +251,89 @@ class BaseComputerInterface(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def get_cursor_position(self) -> Dict[str, int]:
|
||||
"""Get current cursor position."""
|
||||
"""Get the current cursor position on screen.
|
||||
|
||||
Returns:
|
||||
Dict with 'x' and 'y' keys containing cursor coordinates.
|
||||
"""
|
||||
pass
|
||||
|
||||
# Clipboard Actions
|
||||
@abstractmethod
|
||||
async def copy_to_clipboard(self) -> str:
|
||||
"""Get clipboard content."""
|
||||
"""Get the current clipboard content.
|
||||
|
||||
Returns:
|
||||
The text content currently stored in the clipboard.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def set_clipboard(self, text: str) -> None:
|
||||
"""Set clipboard content."""
|
||||
"""Set the clipboard content to the specified text.
|
||||
|
||||
Args:
|
||||
text: The text to store in the clipboard.
|
||||
"""
|
||||
pass
|
||||
|
||||
# File System Actions
|
||||
@abstractmethod
|
||||
async def file_exists(self, path: str) -> bool:
|
||||
"""Check if file exists."""
|
||||
"""Check if a file exists at the specified path.
|
||||
|
||||
Args:
|
||||
path: The file path to check.
|
||||
|
||||
Returns:
|
||||
True if the file exists, False otherwise.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def directory_exists(self, path: str) -> bool:
|
||||
"""Check if directory exists."""
|
||||
"""Check if a directory exists at the specified path.
|
||||
|
||||
Args:
|
||||
path: The directory path to check.
|
||||
|
||||
Returns:
|
||||
True if the directory exists, False otherwise.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def list_dir(self, path: str) -> List[str]:
|
||||
"""List directory contents."""
|
||||
"""List the contents of a directory.
|
||||
|
||||
Args:
|
||||
path: The directory path to list.
|
||||
|
||||
Returns:
|
||||
List of file and directory names in the specified directory.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def read_text(self, path: str) -> str:
|
||||
"""Read file text contents."""
|
||||
"""Read the text contents of a file.
|
||||
|
||||
Args:
|
||||
path: The file path to read from.
|
||||
|
||||
Returns:
|
||||
The text content of the file.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def write_text(self, path: str, content: str) -> None:
|
||||
"""Write file text contents."""
|
||||
"""Write text content to a file.
|
||||
|
||||
Args:
|
||||
path: The file path to write to.
|
||||
content: The text content to write.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@@ -220,27 +349,51 @@ class BaseComputerInterface(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def write_bytes(self, path: str, content: bytes) -> None:
|
||||
"""Write file binary contents."""
|
||||
"""Write binary content to a file.
|
||||
|
||||
Args:
|
||||
path: The file path to write to.
|
||||
content: The binary content to write.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def delete_file(self, path: str) -> None:
|
||||
"""Delete file."""
|
||||
"""Delete a file at the specified path.
|
||||
|
||||
Args:
|
||||
path: The file path to delete.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def create_dir(self, path: str) -> None:
|
||||
"""Create directory."""
|
||||
"""Create a directory at the specified path.
|
||||
|
||||
Args:
|
||||
path: The directory path to create.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def delete_dir(self, path: str) -> None:
|
||||
"""Delete directory."""
|
||||
"""Delete a directory at the specified path.
|
||||
|
||||
Args:
|
||||
path: The directory path to delete.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_file_size(self, path: str) -> int:
|
||||
"""Get the size of a file in bytes."""
|
||||
"""Get the size of a file in bytes.
|
||||
|
||||
Args:
|
||||
path: The file path to get the size of.
|
||||
|
||||
Returns:
|
||||
The size of the file in bytes.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@@ -274,7 +427,11 @@ class BaseComputerInterface(ABC):
|
||||
# Accessibility Actions
|
||||
@abstractmethod
|
||||
async def get_accessibility_tree(self) -> Dict:
|
||||
"""Get the accessibility tree of the current screen."""
|
||||
"""Get the accessibility tree of the current screen.
|
||||
|
||||
Returns:
|
||||
Dict containing the hierarchical accessibility information of screen elements.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -32,6 +32,21 @@ class GenericComputerInterface(BaseComputerInterface):
|
||||
# Set logger name for the interface
|
||||
self.logger = Logger(logger_name, LogLevel.NORMAL)
|
||||
|
||||
# Optional default delay time between commands (in seconds)
|
||||
self.delay = 0.0
|
||||
|
||||
async def _handle_delay(self, delay: Optional[float] = None):
|
||||
"""Handle delay between commands using async sleep.
|
||||
|
||||
Args:
|
||||
delay: Optional delay in seconds. If None, uses self.delay.
|
||||
"""
|
||||
if delay is not None:
|
||||
if isinstance(delay, float) and delay > 0:
|
||||
await asyncio.sleep(delay)
|
||||
elif isinstance(self.delay, float) and self.delay > 0:
|
||||
await asyncio.sleep(self.delay)
|
||||
|
||||
@property
|
||||
def ws_uri(self) -> str:
|
||||
"""Get the WebSocket URI using the current IP address.
|
||||
@@ -44,42 +59,52 @@ class GenericComputerInterface(BaseComputerInterface):
|
||||
return f"{protocol}://{self.ip_address}:{port}/ws"
|
||||
|
||||
# Mouse actions
|
||||
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
|
||||
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left", delay: Optional[float] = None) -> None:
|
||||
await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
|
||||
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left", delay: Optional[float] = None) -> None:
|
||||
await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
||||
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
||||
await self._send_command("left_click", {"x": x, "y": y})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
||||
async def right_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
||||
await self._send_command("right_click", {"x": x, "y": y})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
||||
async def double_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
||||
await self._send_command("double_click", {"x": x, "y": y})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def move_cursor(self, x: int, y: int) -> None:
|
||||
async def move_cursor(self, x: int, y: int, delay: Optional[float] = None) -> None:
|
||||
await self._send_command("move_cursor", {"x": x, "y": y})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5) -> None:
|
||||
async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
|
||||
await self._send_command(
|
||||
"drag_to", {"x": x, "y": y, "button": button, "duration": duration}
|
||||
)
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5) -> None:
|
||||
async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
|
||||
await self._send_command(
|
||||
"drag", {"path": path, "button": button, "duration": duration}
|
||||
)
|
||||
await self._handle_delay(delay)
|
||||
|
||||
# Keyboard Actions
|
||||
async def key_down(self, key: "KeyType") -> None:
|
||||
async def key_down(self, key: "KeyType", delay: Optional[float] = None) -> None:
|
||||
await self._send_command("key_down", {"key": key})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def key_up(self, key: "KeyType") -> None:
|
||||
async def key_up(self, key: "KeyType", delay: Optional[float] = None) -> None:
|
||||
await self._send_command("key_up", {"key": key})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def type_text(self, text: str) -> None:
|
||||
async def type_text(self, text: str, delay: Optional[float] = None) -> None:
|
||||
# Temporary fix for https://github.com/trycua/cua/issues/165
|
||||
# Check if text contains Unicode characters
|
||||
if any(ord(char) > 127 for char in text):
|
||||
@@ -89,8 +114,9 @@ class GenericComputerInterface(BaseComputerInterface):
|
||||
else:
|
||||
# For ASCII text, use the regular typing method
|
||||
await self._send_command("type_text", {"text": text})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def press(self, key: "KeyType") -> None:
|
||||
async def press(self, key: "KeyType", delay: Optional[float] = None) -> None:
|
||||
"""Press a single key.
|
||||
|
||||
Args:
|
||||
@@ -126,16 +152,17 @@ class GenericComputerInterface(BaseComputerInterface):
|
||||
raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
|
||||
|
||||
await self._send_command("press_key", {"key": actual_key})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def press_key(self, key: "KeyType") -> None:
|
||||
async def press_key(self, key: "KeyType", delay: Optional[float] = None) -> None:
|
||||
"""DEPRECATED: Use press() instead.
|
||||
|
||||
This method is kept for backward compatibility but will be removed in a future version.
|
||||
Please use the press() method instead.
|
||||
"""
|
||||
await self.press(key)
|
||||
await self.press(key, delay)
|
||||
|
||||
async def hotkey(self, *keys: "KeyType") -> None:
|
||||
async def hotkey(self, *keys: "KeyType", delay: Optional[float] = None) -> None:
|
||||
"""Press multiple keys simultaneously.
|
||||
|
||||
Args:
|
||||
@@ -169,16 +196,20 @@ class GenericComputerInterface(BaseComputerInterface):
|
||||
raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
|
||||
|
||||
await self._send_command("hotkey", {"keys": actual_keys})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
# Scrolling Actions
|
||||
async def scroll(self, x: int, y: int) -> None:
|
||||
async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None:
|
||||
await self._send_command("scroll", {"x": x, "y": y})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def scroll_down(self, clicks: int = 1) -> None:
|
||||
async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None:
|
||||
await self._send_command("scroll_down", {"clicks": clicks})
|
||||
|
||||
async def scroll_up(self, clicks: int = 1) -> None:
|
||||
await self._handle_delay(delay)
|
||||
|
||||
async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None:
|
||||
await self._send_command("scroll_up", {"clicks": clicks})
|
||||
await self._handle_delay(delay)
|
||||
|
||||
# Screen actions
|
||||
async def screenshot(
|
||||
|
||||
Reference in New Issue
Block a user