mirror of
https://github.com/trycua/computer.git
synced 2026-01-22 05:20:23 -06:00
Merge pull request #116 from trycua/feature/computer/ui
[Computer] Add Gradio UI and fix interaction bugs
This commit is contained in:
@@ -107,7 +107,9 @@ If you want to use AI agents with virtualized environments:
|
||||
app.launch(share=False)
|
||||
```
|
||||
|
||||
7. For Developers only (contribute and use latest features):
|
||||
### Option 3: Build from Source (Nightly)
|
||||
If you want to contribute to the project or need the latest nightly features:
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/trycua/cua.git
|
||||
|
||||
27
examples/computer_ui_examples.py
Normal file
27
examples/computer_ui_examples.py
Normal file
@@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple example script for the Computer Interface Gradio UI.
|
||||
|
||||
This script launches the advanced Gradio UI for the Computer Interface
|
||||
with full model selection and configuration options.
|
||||
It can be run directly from the command line.
|
||||
"""
|
||||
|
||||
|
||||
from utils import load_dotenv_files
|
||||
|
||||
load_dotenv_files()
|
||||
|
||||
# Import the create_gradio_ui function
|
||||
from computer.ui.gradio.app import create_gradio_ui
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Launching Computer Interface Gradio UI with advanced features...")
|
||||
app = create_gradio_ui()
|
||||
app.launch(share=False)
|
||||
|
||||
# Optional: Using the saved dataset
|
||||
# import datasets
|
||||
# from computer.ui.utils import convert_to_unsloth
|
||||
# ds = datasets.load_dataset("ddupont/highquality-cua-demonstrations")
|
||||
# ds = convert_to_unsloth(ds)
|
||||
@@ -162,8 +162,8 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
|
||||
y = kwargs.get("y")
|
||||
if x is None or y is None:
|
||||
raise ToolError("x and y coordinates are required for scroll action")
|
||||
scroll_x = kwargs.get("scroll_x", 0)
|
||||
scroll_y = kwargs.get("scroll_y", 0)
|
||||
scroll_x = kwargs.get("scroll_x", 0) // 20
|
||||
scroll_y = kwargs.get("scroll_y", 0) // 20
|
||||
return await self.handle_scroll(x, y, scroll_x, scroll_y)
|
||||
elif type == "screenshot":
|
||||
return await self.screenshot()
|
||||
|
||||
@@ -542,7 +542,7 @@ class MacOSAutomationHandler(BaseAutomationHandler):
|
||||
try:
|
||||
if x is not None and y is not None:
|
||||
pyautogui.moveTo(x, y)
|
||||
pyautogui.doubleClick()
|
||||
pyautogui.doubleClick(interval=0.1)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
@@ -63,4 +63,34 @@ The `cua-computer` PyPi package pulls automatically the latest executable versio
|
||||
|
||||
Refer to this notebook for a step-by-step guide on how to use the Computer-Use Interface (CUI):
|
||||
|
||||
- [Computer-Use Interface (CUI)](../../notebooks/computer_nb.ipynb)
|
||||
- [Computer-Use Interface (CUI)](../../notebooks/computer_nb.ipynb)
|
||||
|
||||
## Using the Gradio Computer UI
|
||||
|
||||
The computer module includes a Gradio UI for creating and sharing demonstration data. The UI provides built-in integration with HuggingFace Datasets for sharing demonstrations and incorporating them into CUA ML pipelines.
|
||||
|
||||
```bash
|
||||
# Install with UI support
|
||||
pip install "cua-computer[ui]"
|
||||
```
|
||||
|
||||
|
||||
<details open>
|
||||
<summary>View demonstration video</summary>
|
||||
<video src="https://github.com/user-attachments/assets/7c683b58-f04d-4e8c-b63f-6ef36e9637d5" controls width="600"></video>
|
||||
</details>
|
||||
|
||||
> **Note:** For precise control of the computer, we recommend using VNC or Screen Sharing instead of the Computer Gradio UI.
|
||||
|
||||
|
||||
### Launch the UI
|
||||
|
||||
```python
|
||||
# launch_ui.py
|
||||
from computer.ui.gradio.app import create_gradio_ui
|
||||
|
||||
app = create_gradio_ui()
|
||||
app.launch(share=False)
|
||||
```
|
||||
|
||||
For examples, see [Computer UI Examples](../../examples/computer_ui_examples.py)
|
||||
|
||||
@@ -377,17 +377,47 @@ class MacOSComputerInterface(BaseComputerInterface):
|
||||
"""
|
||||
await self.press(key)
|
||||
|
||||
async def hotkey(self, *keys: str) -> None:
|
||||
await self._send_command("hotkey", {"keys": list(keys)})
|
||||
async def hotkey(self, *keys: "KeyType") -> None:
|
||||
"""Press multiple keys simultaneously.
|
||||
|
||||
Args:
|
||||
*keys: Multiple keys to press simultaneously. Each key can be any of:
|
||||
- A Key enum value (recommended), e.g. Key.COMMAND
|
||||
- A direct key value string, e.g. 'command'
|
||||
- A single character string, e.g. 'a'
|
||||
|
||||
Examples:
|
||||
```python
|
||||
# Using enums (recommended)
|
||||
await interface.hotkey(Key.COMMAND, Key.C) # Copy
|
||||
await interface.hotkey(Key.COMMAND, Key.V) # Paste
|
||||
|
||||
# Using mixed formats
|
||||
await interface.hotkey(Key.COMMAND, 'a') # Select all
|
||||
```
|
||||
|
||||
Raises:
|
||||
ValueError: If any key type is invalid or not recognized
|
||||
"""
|
||||
actual_keys = []
|
||||
for key in keys:
|
||||
if isinstance(key, Key):
|
||||
actual_keys.append(key.value)
|
||||
elif isinstance(key, str):
|
||||
# Try to convert to enum if it matches a known key
|
||||
key_or_enum = Key.from_string(key)
|
||||
actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
|
||||
else:
|
||||
raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
|
||||
|
||||
await self._send_command("hotkey", {"keys": actual_keys})
|
||||
|
||||
# Scrolling Actions
|
||||
async def scroll_down(self, clicks: int = 1) -> None:
|
||||
for _ in range(clicks):
|
||||
await self.hotkey("pagedown")
|
||||
|
||||
await self._send_command("scroll_down", {"clicks": clicks})
|
||||
|
||||
async def scroll_up(self, clicks: int = 1) -> None:
|
||||
for _ in range(clicks):
|
||||
await self.hotkey("pageup")
|
||||
await self._send_command("scroll_up", {"clicks": clicks})
|
||||
|
||||
# Screen Actions
|
||||
async def screenshot(
|
||||
|
||||
1
libs/computer/computer/ui/__init__.py
Normal file
1
libs/computer/computer/ui/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""UI modules for the Computer Interface."""
|
||||
6
libs/computer/computer/ui/gradio/__init__.py
Normal file
6
libs/computer/computer/ui/gradio/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""Gradio UI for Computer UI."""
|
||||
|
||||
import gradio as gr
|
||||
from typing import Optional
|
||||
|
||||
from .app import create_gradio_ui
|
||||
1510
libs/computer/computer/ui/gradio/app.py
Normal file
1510
libs/computer/computer/ui/gradio/app.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -21,6 +21,12 @@ dependencies = [
|
||||
]
|
||||
requires-python = ">=3.10"
|
||||
|
||||
[project.optional-dependencies]
|
||||
ui = [
|
||||
"gradio>=5.23.3,<6.0.0",
|
||||
"python-dotenv>=1.0.1,<2.0.0",
|
||||
]
|
||||
|
||||
[tool.pdm]
|
||||
distribution = true
|
||||
|
||||
|
||||
Reference in New Issue
Block a user