mirror of
https://github.com/trycua/lume.git
synced 2026-01-05 20:09:56 -06:00
pyautogui cleanup
This commit is contained in:
@@ -21,11 +21,15 @@ from ..types import AgentCapability, AgentResponse, Messages, Tools
|
||||
from .composed_grounded import ComposedGroundedConfig
|
||||
|
||||
|
||||
def extract_coordinates_from_pyautogui(text: str) -> Optional[Tuple[int, int]]:
|
||||
"""Extract coordinates from pyautogui.click(x=..., y=...) format."""
|
||||
def extract_coordinates_from_click(text: str) -> Optional[Tuple[int, int]]:
|
||||
"""Extract coordinates from click(x=..., y=...) or pyautogui.click(x=..., y=...) format.
|
||||
|
||||
This function supports parsing both generic click() and legacy pyautogui.click() formats
|
||||
for backwards compatibility with models that may still output pyautogui format.
|
||||
"""
|
||||
try:
|
||||
# Look for pyautogui.click(x=1443, y=343) pattern
|
||||
pattern = r"pyautogui\.click\(x=(\d+),\s*y=(\d+)\)"
|
||||
# Look for click(x=1443, y=343) or pyautogui.click(x=1443, y=343) pattern
|
||||
pattern = r"(?:pyautogui\.)?click\(x=(\d+),\s*y=(\d+)\)"
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
x, y = int(match.group(1)), int(match.group(2))
|
||||
@@ -90,7 +94,7 @@ class OpenCUAConfig(ComposedGroundedConfig):
|
||||
# Prepare system message
|
||||
system_prompt = (
|
||||
"You are a GUI agent. You are given a task and a screenshot of the screen. "
|
||||
"You need to perform a series of pyautogui actions to complete the task."
|
||||
"You need to perform a series of click actions to complete the task."
|
||||
)
|
||||
|
||||
system_message = {"role": "system", "content": system_prompt}
|
||||
@@ -120,8 +124,8 @@ class OpenCUAConfig(ComposedGroundedConfig):
|
||||
output_text = response.choices[0].message.content
|
||||
# print(output_text)
|
||||
|
||||
# Extract coordinates from pyautogui format
|
||||
coordinates = extract_coordinates_from_pyautogui(output_text)
|
||||
# Extract coordinates from click format (supports both click() and pyautogui.click() for backwards compatibility)
|
||||
coordinates = extract_coordinates_from_click(output_text)
|
||||
|
||||
return coordinates
|
||||
|
||||
|
||||
@@ -432,12 +432,12 @@ def take_screenshot() -> Image.Image:
|
||||
PIL Image of the screenshot
|
||||
"""
|
||||
try:
|
||||
import pyautogui
|
||||
from PIL import ImageGrab
|
||||
|
||||
screenshot = pyautogui.screenshot()
|
||||
screenshot = ImageGrab.grab()
|
||||
return screenshot
|
||||
except ImportError:
|
||||
print("pyautogui not installed. Please install it with: pip install pyautogui")
|
||||
print("PIL/Pillow not installed. Please install it with: pip install pillow")
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"Error taking screenshot: {e}")
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
</h1>
|
||||
</div>
|
||||
|
||||
**Computer Server** is the server component for the Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, PyAutoGUI-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen).
|
||||
**Computer Server** is the server component for the Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, automation-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen).
|
||||
|
||||
## Features
|
||||
|
||||
|
||||
@@ -22,7 +22,6 @@ from PIL import Image, ImageGrab
|
||||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# pyautogui removed in favor of pynput
|
||||
|
||||
from pynput.keyboard import Controller as KeyboardController
|
||||
from pynput.keyboard import Key
|
||||
@@ -81,7 +80,7 @@ class LinuxAccessibilityHandler(BaseAccessibilityHandler):
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: The x and y coordinates of the cursor position.
|
||||
Returns (0, 0) if pyautogui is not available.
|
||||
Returns (0, 0) if cursor position cannot be determined.
|
||||
"""
|
||||
try:
|
||||
# Use pynput mouse controller
|
||||
@@ -98,7 +97,7 @@ class LinuxAccessibilityHandler(BaseAccessibilityHandler):
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: The width and height of the screen in pixels.
|
||||
Returns (1920, 1080) if pyautogui is not available.
|
||||
Returns (1920, 1080) if screen size cannot be determined.
|
||||
"""
|
||||
try:
|
||||
img = ImageGrab.grab()
|
||||
|
||||
@@ -504,7 +504,7 @@ class WindowsAutomationHandler(BaseAutomationHandler):
|
||||
"""Scroll vertically at the current cursor position.
|
||||
|
||||
Args:
|
||||
x (int): Horizontal scroll amount (not used in pyautogui implementation).
|
||||
x (int): Horizontal scroll amount.
|
||||
y (int): Vertical scroll amount. Positive values scroll up, negative values scroll down.
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
</h1>
|
||||
</div>
|
||||
|
||||
**cua-computer** is a Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, PyAutoGUI-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). Computer relies on [Lume](https://github.com/trycua/lume) for creating and managing sandbox environments.
|
||||
**cua-computer** is a Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, automation-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). Computer relies on [Lume](https://github.com/trycua/lume) for creating and managing sandbox environments.
|
||||
|
||||
### Get started with Computer
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ FunctionKey = Literal["f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10
|
||||
class Key(Enum):
|
||||
"""Keyboard keys that can be used with press_key.
|
||||
|
||||
These key names map to PyAutoGUI's expected key names.
|
||||
These key names follow a consistent cross-platform keyboard key naming convention.
|
||||
"""
|
||||
|
||||
# Navigation
|
||||
|
||||
Reference in New Issue
Block a user