add cua-bench-ui

This commit is contained in:
Dillon DuPont
2025-11-01 19:44:35 -04:00
parent 30b6fdbef9
commit be4c7e45aa
7 changed files with 438 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
# CUA Bench UI
Lightweight webUI window controller for CUA bench environments using pywebview
## Usage
```python
from bench_ui import launch_window, get_element_rect, execute_javascript
# Launch a window with inline HTML content
pid = launch_window(html="<html><body><h1>Hello</h1></body></html>")
# Get element rect in screen space
rect = get_element_rect(pid, "h1", space="screen")
print(rect)
# Execute arbitrary JavaScript
text = execute_javascript(pid, "document.querySelector('h1')?.textContent")
print(text)
```
## Installation
```bash
pip install cua-bench-ui
```

View File

@@ -0,0 +1,3 @@
from .api import launch_window, get_element_rect, execute_javascript
__all__ = ["launch_window", "get_element_rect", "execute_javascript"]

View File

@@ -0,0 +1,149 @@
import json
import os
import subprocess
import sys
import tempfile
import time
from pathlib import Path
from typing import Optional, Dict, Any
from urllib import request
from urllib.error import HTTPError, URLError
# Map child PID -> listening port
_pid_to_port: Dict[int, int] = {}
def _post_json(url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
data = json.dumps(payload).encode("utf-8")
req = request.Request(url, data=data, headers={"Content-Type": "application/json"}, method="POST")
try:
with request.urlopen(req, timeout=5) as resp:
text = resp.read().decode("utf-8")
return json.loads(text)
except HTTPError as e:
try:
body = (e.read() or b"").decode("utf-8", errors="ignore")
return json.loads(body)
except Exception:
return {"error": "http_error", "status": getattr(e, 'code', None)}
except URLError as e:
return {"error": "url_error", "reason": str(e.reason)}
def launch_window(
url: Optional[str] = None,
*,
html: Optional[str] = None,
title: str = "Window",
x: Optional[int] = None,
y: Optional[int] = None,
width: int = 600,
height: int = 400,
icon: Optional[str] = None,
use_inner_size: bool = False,
title_bar_style: str = "default",
) -> int:
"""Create a pywebview window in a child process and return its PID.
Preferred input is a URL via the positional `url` parameter.
To load inline HTML instead, pass `html=...`.
Spawns `python -m bench_ui.child` with a JSON config passed via a temp file.
The child prints a single JSON line: {"pid": <pid>, "port": <port>}.
We cache pid->port for subsequent control calls like get_element_rect.
"""
if not url and not html:
raise ValueError("launch_window requires either a url or html")
config = {
"url": url,
"html": html,
"title": title,
"x": x,
"y": y,
"width": width,
"height": height,
"icon": icon,
"use_inner_size": use_inner_size,
"title_bar_style": title_bar_style,
}
with tempfile.NamedTemporaryFile("w", delete=False, suffix=".json") as f:
json.dump(config, f)
cfg_path = f.name
try:
# Launch child process
proc = subprocess.Popen(
[sys.executable, "-m", "bench_ui.child", cfg_path],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
assert proc.stdout is not None
# Read first line with startup info
line = proc.stdout.readline().strip()
info = json.loads(line)
pid = int(info["pid"]) if "pid" in info else proc.pid
port = int(info["port"]) # required
_pid_to_port[pid] = port
return pid
finally:
try:
os.unlink(cfg_path)
except Exception:
pass
def get_element_rect(pid: int, selector: str, *, space: str = "window"):
"""Ask the child process to compute element client rect via injected JS.
Returns a dict like {"x": float, "y": float, "width": float, "height": float} or None if not found.
"""
if pid not in _pid_to_port:
raise RuntimeError(f"Unknown pid {pid}; no registered bench-ui window")
port = _pid_to_port[pid]
url = f"http://127.0.0.1:{port}/rect"
last: Dict[str, Any] = {}
for _ in range(30): # ~3s total
resp = _post_json(url, {"selector": selector, "space": space})
last = resp or {}
rect = last.get("rect") if isinstance(last, dict) else None
err = last.get("error") if isinstance(last, dict) else None
if rect is not None:
return rect
if err in ("window_not_ready", "invalid_json"):
time.sleep(0.1)
continue
# If other transient errors, brief retry
if err:
time.sleep(0.1)
continue
time.sleep(0.1)
raise RuntimeError(f"Failed to get element rect: {last}")
def execute_javascript(pid: int, javascript: str):
"""Execute arbitrary JavaScript in the window and return its result.
Retries briefly while the window is still becoming ready.
"""
if pid not in _pid_to_port:
raise RuntimeError(f"Unknown pid {pid}; no registered bench-ui window")
port = _pid_to_port[pid]
url = f"http://127.0.0.1:{port}/eval"
last: Dict[str, Any] = {}
for _ in range(30): # ~3s total
resp = _post_json(url, {"javascript": javascript})
last = resp or {}
if isinstance(last, dict):
if "result" in last:
return last["result"]
if last.get("error") in ("window_not_ready", "invalid_json"):
time.sleep(0.1)
continue
if last.get("error"):
time.sleep(0.1)
continue
time.sleep(0.1)
raise RuntimeError(f"Failed to execute JavaScript: {last}")

View File

@@ -0,0 +1,174 @@
import asyncio
import json
import os
import random
import socket
import sys
import threading
from pathlib import Path
from typing import Optional
import webview
from aiohttp import web
def _get_free_port() -> int:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("127.0.0.1", 0))
return s.getsockname()[1]
def _start_http_server(window: webview.Window, port: int, ready_event: threading.Event):
async def rect_handler(request: web.Request):
try:
data = await request.json()
except Exception:
return web.json_response({"error": "invalid_json"}, status=400)
selector = data.get("selector")
space = data.get("space", "window")
if not isinstance(selector, str):
return web.json_response({"error": "selector_required"}, status=400)
# Ensure window content is loaded
if not ready_event.is_set():
# give it a short chance to finish loading
ready_event.wait(timeout=2.0)
if not ready_event.is_set():
return web.json_response({"error": "window_not_ready"}, status=409)
# Safely embed selector into JS
selector_js = json.dumps(selector)
if space == "screen":
# Compute approximate screen coordinates using window metrics
js = (
"(function(){"
f"const s = {selector_js};"
"const el = document.querySelector(s);"
"if(!el){return null;}"
"const r = el.getBoundingClientRect();"
"const sx = (window.screenX ?? window.screenLeft ?? 0);"
"const syRaw = (window.screenY ?? window.screenTop ?? 0);"
"const frameH = (window.outerHeight - window.innerHeight) || 0;"
"const sy = syRaw + frameH;"
"return {x:sx + r.left, y:sy + r.top, width:r.width, height:r.height};"
"})()"
)
else:
js = (
"(function(){"
f"const s = {selector_js};"
"const el = document.querySelector(s);"
"if(!el){return null;}"
"const r = el.getBoundingClientRect();"
"return {x:r.left,y:r.top,width:r.width,height:r.height};"
"})()"
)
try:
# Evaluate JS on the target window; this call is thread-safe in pywebview
result = window.evaluate_js(js)
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
return web.json_response({"rect": result})
async def eval_handler(request: web.Request):
try:
data = await request.json()
except Exception:
return web.json_response({"error": "invalid_json"}, status=400)
code = data.get("javascript") or data.get("code")
if not isinstance(code, str):
return web.json_response({"error": "javascript_required"}, status=400)
if not ready_event.is_set():
ready_event.wait(timeout=2.0)
if not ready_event.is_set():
return web.json_response({"error": "window_not_ready"}, status=409)
try:
result = window.evaluate_js(code)
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
return web.json_response({"result": result})
app = web.Application()
app.router.add_post("/rect", rect_handler)
app.router.add_post("/eval", eval_handler)
loop = asyncio.new_event_loop()
def run_loop():
asyncio.set_event_loop(loop)
runner = web.AppRunner(app)
loop.run_until_complete(runner.setup())
site = web.TCPSite(runner, "127.0.0.1", port)
loop.run_until_complete(site.start())
loop.run_forever()
t = threading.Thread(target=run_loop, daemon=True)
t.start()
def main():
if len(sys.argv) < 2:
print("Usage: python -m bench_ui.child <config.json>", file=sys.stderr)
sys.exit(2)
cfg_path = Path(sys.argv[1])
cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
html: Optional[str] = cfg.get("html") or ""
url: Optional[str] = cfg.get("url")
title: str = cfg.get("title", "Window")
x: Optional[int] = cfg.get("x")
y: Optional[int] = cfg.get("y")
width: int = int(cfg.get("width", 600))
height: int = int(cfg.get("height", 400))
icon: Optional[str] = cfg.get("icon")
use_inner_size: bool = bool(cfg.get("use_inner_size", False))
title_bar_style: str = cfg.get("title_bar_style", "default")
# Create window
if url:
window = webview.create_window(
title,
url=url,
width=width,
height=height,
x=x,
y=y,
confirm_close=False,
text_select=True,
background_color="#FFFFFF",
)
else:
window = webview.create_window(
title,
html=html,
width=width,
height=height,
x=x,
y=y,
confirm_close=False,
text_select=True,
background_color="#FFFFFF",
)
# Track when the page is loaded so JS execution succeeds
window_ready = threading.Event()
def _on_loaded():
window_ready.set()
window.events.loaded += _on_loaded # type: ignore[attr-defined]
# Start HTTP server for control
port = _get_free_port()
_start_http_server(window, port, window_ready)
# Print startup info for parent to read
print(json.dumps({"pid": os.getpid(), "port": port}), flush=True)
# Start GUI (blocking)
webview.start()
if __name__ == "__main__":
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 714 KiB

View File

@@ -0,0 +1,62 @@
from __future__ import annotations
import time
from bench_ui import launch_window, get_element_rect, execute_javascript
from pathlib import Path
HTML = """
<!doctype html>
<html>
<head>
<meta charset="utf-8" />
<title>Bench UI Example</title>
<style>
body { font-family: system-ui, sans-serif; margin: 24px; }
#target { width: 220px; height: 120px; background: #4f46e5; color: white; display: flex; align-items: center; justify-content: center; border-radius: 8px; }
</style>
</head>
<body>
<h1>Bench UI Example</h1>
<div id="target">Hello from pywebview</div>
</body>
</html>
"""
def main():
# Launch a window with inline HTML content
pid = launch_window(
html=HTML,
title="Bench UI Example",
width=800,
height=600,
)
print(f"Launched window with PID: {pid}")
# Give the window a brief moment to render
time.sleep(1.0)
# Query the client rect of an element via CSS selector in SCREEN space
rect = get_element_rect(pid, "#target", space="screen")
print("Element rect (screen space):", rect)
# Take a screenshot and overlay the bbox
try:
from PIL import ImageGrab, ImageDraw
img = ImageGrab.grab() # full screen
draw = ImageDraw.Draw(img)
x, y, w, h = rect["x"], rect["y"], rect["width"], rect["height"]
box = (x, y, x + w, y + h)
draw.rectangle(box, outline=(255, 0, 0), width=3)
out_path = Path(__file__).parent / "output_overlay.png"
img.save(out_path)
print(f"Saved overlay screenshot to: {out_path}")
except Exception as e:
print(f"Failed to capture/annotate screenshot: {e}")
# Execute arbitrary JavaScript
text = execute_javascript(pid, "document.querySelector('#t')?.textContent")
print("text:", text)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,24 @@
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
[project]
name = "cua-bench-ui"
version = "0.1.0"
description = "Lightweight webUI window launcher for CUA bench using pywebview"
readme = "README.md"
authors = [
{ name = "TryCua", email = "gh@trycua.com" }
]
dependencies = [
"pywebview>=5.3",
"aiohttp>=3.9.0",
]
requires-python = ">=3.12"
[tool.pdm]
distribution = true
[tool.pdm.build]
includes = ["bench_ui/"]
source-includes = ["README.md"]