pyautogui cleanup

This commit is contained in:
Adam
2025-12-28 21:25:49 -05:00
parent 76c0ca4302
commit 545c2180f8
12 changed files with 2931 additions and 2951 deletions

View File

@@ -21,11 +21,15 @@ from ..types import AgentCapability, AgentResponse, Messages, Tools
from .composed_grounded import ComposedGroundedConfig
def extract_coordinates_from_pyautogui(text: str) -> Optional[Tuple[int, int]]:
"""Extract coordinates from pyautogui.click(x=..., y=...) format."""
def extract_coordinates_from_click(text: str) -> Optional[Tuple[int, int]]:
"""Extract coordinates from click(x=..., y=...) or pyautogui.click(x=..., y=...) format.
This function supports parsing both generic click() and legacy pyautogui.click() formats
for backwards compatibility with models that may still output pyautogui format.
"""
try:
# Look for pyautogui.click(x=1443, y=343) pattern
pattern = r"pyautogui\.click\(x=(\d+),\s*y=(\d+)\)"
# Look for click(x=1443, y=343) or pyautogui.click(x=1443, y=343) pattern
pattern = r"(?:pyautogui\.)?click\(x=(\d+),\s*y=(\d+)\)"
match = re.search(pattern, text)
if match:
x, y = int(match.group(1)), int(match.group(2))
@@ -90,7 +94,7 @@ class OpenCUAConfig(ComposedGroundedConfig):
# Prepare system message
system_prompt = (
"You are a GUI agent. You are given a task and a screenshot of the screen. "
"You need to perform a series of pyautogui actions to complete the task."
"You need to perform a series of click actions to complete the task."
)
system_message = {"role": "system", "content": system_prompt}
@@ -120,8 +124,8 @@ class OpenCUAConfig(ComposedGroundedConfig):
output_text = response.choices[0].message.content
# print(output_text)
# Extract coordinates from pyautogui format
coordinates = extract_coordinates_from_pyautogui(output_text)
# Extract coordinates from click format (supports both click() and pyautogui.click() for backwards compatibility)
coordinates = extract_coordinates_from_click(output_text)
return coordinates

View File

@@ -432,12 +432,12 @@ def take_screenshot() -> Image.Image:
PIL Image of the screenshot
"""
try:
import pyautogui
from PIL import ImageGrab
screenshot = pyautogui.screenshot()
screenshot = ImageGrab.grab()
return screenshot
except ImportError:
print("pyautogui not installed. Please install it with: pip install pyautogui")
print("PIL/Pillow not installed. Please install it with: pip install pillow")
raise
except Exception as e:
print(f"Error taking screenshot: {e}")

View File

@@ -16,7 +16,7 @@
</h1>
</div>
**Computer Server** is the server component for the Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, PyAutoGUI-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen).
**Computer Server** is the server component for the Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, automation-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen).
## Features

View File

@@ -22,7 +22,6 @@ from PIL import Image, ImageGrab
# Configure logger
logger = logging.getLogger(__name__)
# pyautogui removed in favor of pynput
from pynput.keyboard import Controller as KeyboardController
from pynput.keyboard import Key
@@ -81,7 +80,7 @@ class LinuxAccessibilityHandler(BaseAccessibilityHandler):
Returns:
Tuple[int, int]: The x and y coordinates of the cursor position.
Returns (0, 0) if pyautogui is not available.
Returns (0, 0) if cursor position cannot be determined.
"""
try:
# Use pynput mouse controller
@@ -98,7 +97,7 @@ class LinuxAccessibilityHandler(BaseAccessibilityHandler):
Returns:
Tuple[int, int]: The width and height of the screen in pixels.
Returns (1920, 1080) if pyautogui is not available.
Returns (1920, 1080) if screen size cannot be determined.
"""
try:
img = ImageGrab.grab()

View File

@@ -504,7 +504,7 @@ class WindowsAutomationHandler(BaseAutomationHandler):
"""Scroll vertically at the current cursor position.
Args:
x (int): Horizontal scroll amount (not used in pyautogui implementation).
x (int): Horizontal scroll amount.
y (int): Vertical scroll amount. Positive values scroll up, negative values scroll down.
Returns:

View File

@@ -16,7 +16,7 @@
</h1>
</div>
**cua-computer** is a Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, PyAutoGUI-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). Computer relies on [Lume](https://github.com/trycua/lume) for creating and managing sandbox environments.
**cua-computer** is a Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, automation-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). Computer relies on [Lume](https://github.com/trycua/lume) for creating and managing sandbox environments.
### Get started with Computer

View File

@@ -31,7 +31,7 @@ FunctionKey = Literal["f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10
class Key(Enum):
"""Keyboard keys that can be used with press_key.
These key names map to PyAutoGUI's expected key names.
These key names follow a consistent cross-platform keyboard key naming convention.
"""
# Navigation