mirror of
https://github.com/trycua/computer.git
synced 2026-01-03 03:49:58 -06:00
add cua-bench-ui
This commit is contained in:
26
libs/python/bench-ui/README.md
Normal file
26
libs/python/bench-ui/README.md
Normal file
@@ -0,0 +1,26 @@
|
||||
# CUA Bench UI
|
||||
|
||||
Lightweight webUI window controller for CUA bench environments using pywebview
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from bench_ui import launch_window, get_element_rect, execute_javascript
|
||||
|
||||
# Launch a window with inline HTML content
|
||||
pid = launch_window(html="<html><body><h1>Hello</h1></body></html>")
|
||||
|
||||
# Get element rect in screen space
|
||||
rect = get_element_rect(pid, "h1", space="screen")
|
||||
print(rect)
|
||||
|
||||
# Execute arbitrary JavaScript
|
||||
text = execute_javascript(pid, "document.querySelector('h1')?.textContent")
|
||||
print(text)
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install cua-bench-ui
|
||||
```
|
||||
3
libs/python/bench-ui/bench_ui/__init__.py
Normal file
3
libs/python/bench-ui/bench_ui/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .api import launch_window, get_element_rect, execute_javascript
|
||||
|
||||
__all__ = ["launch_window", "get_element_rect", "execute_javascript"]
|
||||
149
libs/python/bench-ui/bench_ui/api.py
Normal file
149
libs/python/bench-ui/bench_ui/api.py
Normal file
@@ -0,0 +1,149 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
from urllib import request
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
# Map child PID -> listening port
|
||||
_pid_to_port: Dict[int, int] = {}
|
||||
|
||||
|
||||
def _post_json(url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
data = json.dumps(payload).encode("utf-8")
|
||||
req = request.Request(url, data=data, headers={"Content-Type": "application/json"}, method="POST")
|
||||
try:
|
||||
with request.urlopen(req, timeout=5) as resp:
|
||||
text = resp.read().decode("utf-8")
|
||||
return json.loads(text)
|
||||
except HTTPError as e:
|
||||
try:
|
||||
body = (e.read() or b"").decode("utf-8", errors="ignore")
|
||||
return json.loads(body)
|
||||
except Exception:
|
||||
return {"error": "http_error", "status": getattr(e, 'code', None)}
|
||||
except URLError as e:
|
||||
return {"error": "url_error", "reason": str(e.reason)}
|
||||
|
||||
|
||||
def launch_window(
|
||||
url: Optional[str] = None,
|
||||
*,
|
||||
html: Optional[str] = None,
|
||||
title: str = "Window",
|
||||
x: Optional[int] = None,
|
||||
y: Optional[int] = None,
|
||||
width: int = 600,
|
||||
height: int = 400,
|
||||
icon: Optional[str] = None,
|
||||
use_inner_size: bool = False,
|
||||
title_bar_style: str = "default",
|
||||
) -> int:
|
||||
"""Create a pywebview window in a child process and return its PID.
|
||||
|
||||
Preferred input is a URL via the positional `url` parameter.
|
||||
To load inline HTML instead, pass `html=...`.
|
||||
|
||||
Spawns `python -m bench_ui.child` with a JSON config passed via a temp file.
|
||||
The child prints a single JSON line: {"pid": <pid>, "port": <port>}.
|
||||
We cache pid->port for subsequent control calls like get_element_rect.
|
||||
"""
|
||||
if not url and not html:
|
||||
raise ValueError("launch_window requires either a url or html")
|
||||
|
||||
config = {
|
||||
"url": url,
|
||||
"html": html,
|
||||
"title": title,
|
||||
"x": x,
|
||||
"y": y,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"icon": icon,
|
||||
"use_inner_size": use_inner_size,
|
||||
"title_bar_style": title_bar_style,
|
||||
}
|
||||
|
||||
with tempfile.NamedTemporaryFile("w", delete=False, suffix=".json") as f:
|
||||
json.dump(config, f)
|
||||
cfg_path = f.name
|
||||
|
||||
try:
|
||||
# Launch child process
|
||||
proc = subprocess.Popen(
|
||||
[sys.executable, "-m", "bench_ui.child", cfg_path],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
)
|
||||
assert proc.stdout is not None
|
||||
# Read first line with startup info
|
||||
line = proc.stdout.readline().strip()
|
||||
info = json.loads(line)
|
||||
pid = int(info["pid"]) if "pid" in info else proc.pid
|
||||
port = int(info["port"]) # required
|
||||
_pid_to_port[pid] = port
|
||||
return pid
|
||||
finally:
|
||||
try:
|
||||
os.unlink(cfg_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def get_element_rect(pid: int, selector: str, *, space: str = "window"):
|
||||
"""Ask the child process to compute element client rect via injected JS.
|
||||
|
||||
Returns a dict like {"x": float, "y": float, "width": float, "height": float} or None if not found.
|
||||
"""
|
||||
if pid not in _pid_to_port:
|
||||
raise RuntimeError(f"Unknown pid {pid}; no registered bench-ui window")
|
||||
port = _pid_to_port[pid]
|
||||
url = f"http://127.0.0.1:{port}/rect"
|
||||
last: Dict[str, Any] = {}
|
||||
for _ in range(30): # ~3s total
|
||||
resp = _post_json(url, {"selector": selector, "space": space})
|
||||
last = resp or {}
|
||||
rect = last.get("rect") if isinstance(last, dict) else None
|
||||
err = last.get("error") if isinstance(last, dict) else None
|
||||
if rect is not None:
|
||||
return rect
|
||||
if err in ("window_not_ready", "invalid_json"):
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
# If other transient errors, brief retry
|
||||
if err:
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
time.sleep(0.1)
|
||||
raise RuntimeError(f"Failed to get element rect: {last}")
|
||||
|
||||
|
||||
def execute_javascript(pid: int, javascript: str):
|
||||
"""Execute arbitrary JavaScript in the window and return its result.
|
||||
|
||||
Retries briefly while the window is still becoming ready.
|
||||
"""
|
||||
if pid not in _pid_to_port:
|
||||
raise RuntimeError(f"Unknown pid {pid}; no registered bench-ui window")
|
||||
port = _pid_to_port[pid]
|
||||
url = f"http://127.0.0.1:{port}/eval"
|
||||
last: Dict[str, Any] = {}
|
||||
for _ in range(30): # ~3s total
|
||||
resp = _post_json(url, {"javascript": javascript})
|
||||
last = resp or {}
|
||||
if isinstance(last, dict):
|
||||
if "result" in last:
|
||||
return last["result"]
|
||||
if last.get("error") in ("window_not_ready", "invalid_json"):
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
if last.get("error"):
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
time.sleep(0.1)
|
||||
raise RuntimeError(f"Failed to execute JavaScript: {last}")
|
||||
174
libs/python/bench-ui/bench_ui/child.py
Normal file
174
libs/python/bench-ui/bench_ui/child.py
Normal file
@@ -0,0 +1,174 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import socket
|
||||
import sys
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import webview
|
||||
from aiohttp import web
|
||||
|
||||
|
||||
def _get_free_port() -> int:
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.bind(("127.0.0.1", 0))
|
||||
return s.getsockname()[1]
|
||||
|
||||
|
||||
def _start_http_server(window: webview.Window, port: int, ready_event: threading.Event):
|
||||
async def rect_handler(request: web.Request):
|
||||
try:
|
||||
data = await request.json()
|
||||
except Exception:
|
||||
return web.json_response({"error": "invalid_json"}, status=400)
|
||||
selector = data.get("selector")
|
||||
space = data.get("space", "window")
|
||||
if not isinstance(selector, str):
|
||||
return web.json_response({"error": "selector_required"}, status=400)
|
||||
|
||||
# Ensure window content is loaded
|
||||
if not ready_event.is_set():
|
||||
# give it a short chance to finish loading
|
||||
ready_event.wait(timeout=2.0)
|
||||
if not ready_event.is_set():
|
||||
return web.json_response({"error": "window_not_ready"}, status=409)
|
||||
|
||||
# Safely embed selector into JS
|
||||
selector_js = json.dumps(selector)
|
||||
if space == "screen":
|
||||
# Compute approximate screen coordinates using window metrics
|
||||
js = (
|
||||
"(function(){"
|
||||
f"const s = {selector_js};"
|
||||
"const el = document.querySelector(s);"
|
||||
"if(!el){return null;}"
|
||||
"const r = el.getBoundingClientRect();"
|
||||
"const sx = (window.screenX ?? window.screenLeft ?? 0);"
|
||||
"const syRaw = (window.screenY ?? window.screenTop ?? 0);"
|
||||
"const frameH = (window.outerHeight - window.innerHeight) || 0;"
|
||||
"const sy = syRaw + frameH;"
|
||||
"return {x:sx + r.left, y:sy + r.top, width:r.width, height:r.height};"
|
||||
"})()"
|
||||
)
|
||||
else:
|
||||
js = (
|
||||
"(function(){"
|
||||
f"const s = {selector_js};"
|
||||
"const el = document.querySelector(s);"
|
||||
"if(!el){return null;}"
|
||||
"const r = el.getBoundingClientRect();"
|
||||
"return {x:r.left,y:r.top,width:r.width,height:r.height};"
|
||||
"})()"
|
||||
)
|
||||
try:
|
||||
# Evaluate JS on the target window; this call is thread-safe in pywebview
|
||||
result = window.evaluate_js(js)
|
||||
except Exception as e:
|
||||
return web.json_response({"error": str(e)}, status=500)
|
||||
return web.json_response({"rect": result})
|
||||
|
||||
async def eval_handler(request: web.Request):
|
||||
try:
|
||||
data = await request.json()
|
||||
except Exception:
|
||||
return web.json_response({"error": "invalid_json"}, status=400)
|
||||
code = data.get("javascript") or data.get("code")
|
||||
if not isinstance(code, str):
|
||||
return web.json_response({"error": "javascript_required"}, status=400)
|
||||
|
||||
if not ready_event.is_set():
|
||||
ready_event.wait(timeout=2.0)
|
||||
if not ready_event.is_set():
|
||||
return web.json_response({"error": "window_not_ready"}, status=409)
|
||||
|
||||
try:
|
||||
result = window.evaluate_js(code)
|
||||
except Exception as e:
|
||||
return web.json_response({"error": str(e)}, status=500)
|
||||
return web.json_response({"result": result})
|
||||
|
||||
app = web.Application()
|
||||
app.router.add_post("/rect", rect_handler)
|
||||
app.router.add_post("/eval", eval_handler)
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
|
||||
def run_loop():
|
||||
asyncio.set_event_loop(loop)
|
||||
runner = web.AppRunner(app)
|
||||
loop.run_until_complete(runner.setup())
|
||||
site = web.TCPSite(runner, "127.0.0.1", port)
|
||||
loop.run_until_complete(site.start())
|
||||
loop.run_forever()
|
||||
|
||||
t = threading.Thread(target=run_loop, daemon=True)
|
||||
t.start()
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python -m bench_ui.child <config.json>", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
cfg_path = Path(sys.argv[1])
|
||||
cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
|
||||
|
||||
html: Optional[str] = cfg.get("html") or ""
|
||||
url: Optional[str] = cfg.get("url")
|
||||
title: str = cfg.get("title", "Window")
|
||||
x: Optional[int] = cfg.get("x")
|
||||
y: Optional[int] = cfg.get("y")
|
||||
width: int = int(cfg.get("width", 600))
|
||||
height: int = int(cfg.get("height", 400))
|
||||
icon: Optional[str] = cfg.get("icon")
|
||||
use_inner_size: bool = bool(cfg.get("use_inner_size", False))
|
||||
title_bar_style: str = cfg.get("title_bar_style", "default")
|
||||
|
||||
# Create window
|
||||
if url:
|
||||
window = webview.create_window(
|
||||
title,
|
||||
url=url,
|
||||
width=width,
|
||||
height=height,
|
||||
x=x,
|
||||
y=y,
|
||||
confirm_close=False,
|
||||
text_select=True,
|
||||
background_color="#FFFFFF",
|
||||
)
|
||||
else:
|
||||
window = webview.create_window(
|
||||
title,
|
||||
html=html,
|
||||
width=width,
|
||||
height=height,
|
||||
x=x,
|
||||
y=y,
|
||||
confirm_close=False,
|
||||
text_select=True,
|
||||
background_color="#FFFFFF",
|
||||
)
|
||||
|
||||
# Track when the page is loaded so JS execution succeeds
|
||||
window_ready = threading.Event()
|
||||
def _on_loaded():
|
||||
window_ready.set()
|
||||
window.events.loaded += _on_loaded # type: ignore[attr-defined]
|
||||
|
||||
# Start HTTP server for control
|
||||
port = _get_free_port()
|
||||
_start_http_server(window, port, window_ready)
|
||||
|
||||
# Print startup info for parent to read
|
||||
print(json.dumps({"pid": os.getpid(), "port": port}), flush=True)
|
||||
|
||||
# Start GUI (blocking)
|
||||
webview.start()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
libs/python/bench-ui/examples/output_overlay.png
Normal file
BIN
libs/python/bench-ui/examples/output_overlay.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 714 KiB |
62
libs/python/bench-ui/examples/simple_example.py
Normal file
62
libs/python/bench-ui/examples/simple_example.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from __future__ import annotations
|
||||
import time
|
||||
from bench_ui import launch_window, get_element_rect, execute_javascript
|
||||
from pathlib import Path
|
||||
|
||||
HTML = """
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Bench UI Example</title>
|
||||
<style>
|
||||
body { font-family: system-ui, sans-serif; margin: 24px; }
|
||||
#target { width: 220px; height: 120px; background: #4f46e5; color: white; display: flex; align-items: center; justify-content: center; border-radius: 8px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Bench UI Example</h1>
|
||||
<div id="target">Hello from pywebview</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
def main():
|
||||
# Launch a window with inline HTML content
|
||||
pid = launch_window(
|
||||
html=HTML,
|
||||
title="Bench UI Example",
|
||||
width=800,
|
||||
height=600,
|
||||
)
|
||||
print(f"Launched window with PID: {pid}")
|
||||
|
||||
# Give the window a brief moment to render
|
||||
time.sleep(1.0)
|
||||
|
||||
# Query the client rect of an element via CSS selector in SCREEN space
|
||||
rect = get_element_rect(pid, "#target", space="screen")
|
||||
print("Element rect (screen space):", rect)
|
||||
|
||||
# Take a screenshot and overlay the bbox
|
||||
try:
|
||||
from PIL import ImageGrab, ImageDraw
|
||||
|
||||
img = ImageGrab.grab() # full screen
|
||||
draw = ImageDraw.Draw(img)
|
||||
x, y, w, h = rect["x"], rect["y"], rect["width"], rect["height"]
|
||||
box = (x, y, x + w, y + h)
|
||||
draw.rectangle(box, outline=(255, 0, 0), width=3)
|
||||
out_path = Path(__file__).parent / "output_overlay.png"
|
||||
img.save(out_path)
|
||||
print(f"Saved overlay screenshot to: {out_path}")
|
||||
except Exception as e:
|
||||
print(f"Failed to capture/annotate screenshot: {e}")
|
||||
|
||||
# Execute arbitrary JavaScript
|
||||
text = execute_javascript(pid, "document.querySelector('#t')?.textContent")
|
||||
print("text:", text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
24
libs/python/bench-ui/pyproject.toml
Normal file
24
libs/python/bench-ui/pyproject.toml
Normal file
@@ -0,0 +1,24 @@
|
||||
[build-system]
|
||||
requires = ["pdm-backend"]
|
||||
build-backend = "pdm.backend"
|
||||
|
||||
[project]
|
||||
name = "cua-bench-ui"
|
||||
version = "0.1.0"
|
||||
description = "Lightweight webUI window launcher for CUA bench using pywebview"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
{ name = "TryCua", email = "gh@trycua.com" }
|
||||
]
|
||||
dependencies = [
|
||||
"pywebview>=5.3",
|
||||
"aiohttp>=3.9.0",
|
||||
]
|
||||
requires-python = ">=3.12"
|
||||
|
||||
[tool.pdm]
|
||||
distribution = true
|
||||
|
||||
[tool.pdm.build]
|
||||
includes = ["bench_ui/"]
|
||||
source-includes = ["README.md"]
|
||||
Reference in New Issue
Block a user