add cua-bench-ui

2026-01-03 03:49:58 -06:00 · 2025-11-01 19:44:35 -04:00
parent 30b6fdbef9
commit be4c7e45aa
7 changed files with 438 additions and 0 deletions
--- a/libs/python/bench-ui/README.md
+++ b/libs/python/bench-ui/README.md
@@ -0,0 +1,26 @@
+# CUA Bench UI
+
+Lightweight webUI window controller for CUA bench environments using pywebview
+
+## Usage
+
+```python
+from bench_ui import launch_window, get_element_rect, execute_javascript
+
+# Launch a window with inline HTML content
+pid = launch_window(html="<html><body><h1>Hello</h1></body></html>")
+
+# Get element rect in screen space
+rect = get_element_rect(pid, "h1", space="screen")
+print(rect)
+
+# Execute arbitrary JavaScript
+text = execute_javascript(pid, "document.querySelector('h1')?.textContent")
+print(text)
+```
+
+## Installation
+
+```bash
+pip install cua-bench-ui
+```
--- a/libs/python/bench-ui/bench_ui/init.py
+++ b/libs/python/bench-ui/bench_ui/init.py
@@ -0,0 +1,3 @@
+from .api import launch_window, get_element_rect, execute_javascript
+
+__all__ = ["launch_window", "get_element_rect", "execute_javascript"]
--- a/libs/python/bench-ui/bench_ui/api.py
+++ b/libs/python/bench-ui/bench_ui/api.py
@@ -0,0 +1,149 @@
+import json
+import os
+import subprocess
+import sys
+import tempfile
+import time
+from pathlib import Path
+from typing import Optional, Dict, Any
+from urllib import request
+from urllib.error import HTTPError, URLError
+
+# Map child PID -> listening port
+_pid_to_port: Dict[int, int] = {}
+
+
+def _post_json(url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
+    data = json.dumps(payload).encode("utf-8")
+    req = request.Request(url, data=data, headers={"Content-Type": "application/json"}, method="POST")
+    try:
+        with request.urlopen(req, timeout=5) as resp:
+            text = resp.read().decode("utf-8")
+            return json.loads(text)
+    except HTTPError as e:
+        try:
+            body = (e.read() or b"").decode("utf-8", errors="ignore")
+            return json.loads(body)
+        except Exception:
+            return {"error": "http_error", "status": getattr(e, 'code', None)}
+    except URLError as e:
+        return {"error": "url_error", "reason": str(e.reason)}
+
+
+def launch_window(
+    url: Optional[str] = None,
+    *,
+    html: Optional[str] = None,
+    title: str = "Window",
+    x: Optional[int] = None,
+    y: Optional[int] = None,
+    width: int = 600,
+    height: int = 400,
+    icon: Optional[str] = None,
+    use_inner_size: bool = False,
+    title_bar_style: str = "default",
+) -> int:
+    """Create a pywebview window in a child process and return its PID.
+
+    Preferred input is a URL via the positional `url` parameter.
+    To load inline HTML instead, pass `html=...`.
+
+    Spawns `python -m bench_ui.child` with a JSON config passed via a temp file.
+    The child prints a single JSON line: {"pid": <pid>, "port": <port>}.
+    We cache pid->port for subsequent control calls like get_element_rect.
+    """
+    if not url and not html:
+        raise ValueError("launch_window requires either a url or html")
+
+    config = {
+        "url": url,
+        "html": html,
+        "title": title,
+        "x": x,
+        "y": y,
+        "width": width,
+        "height": height,
+        "icon": icon,
+        "use_inner_size": use_inner_size,
+        "title_bar_style": title_bar_style,
+    }
+
+    with tempfile.NamedTemporaryFile("w", delete=False, suffix=".json") as f:
+        json.dump(config, f)
+        cfg_path = f.name
+
+    try:
+        # Launch child process
+        proc = subprocess.Popen(
+            [sys.executable, "-m", "bench_ui.child", cfg_path],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+        )
+        assert proc.stdout is not None
+        # Read first line with startup info
+        line = proc.stdout.readline().strip()
+        info = json.loads(line)
+        pid = int(info["pid"]) if "pid" in info else proc.pid
+        port = int(info["port"])  # required
+        _pid_to_port[pid] = port
+        return pid
+    finally:
+        try:
+            os.unlink(cfg_path)
+        except Exception:
+            pass
+
+
+def get_element_rect(pid: int, selector: str, *, space: str = "window"):
+    """Ask the child process to compute element client rect via injected JS.
+
+    Returns a dict like {"x": float, "y": float, "width": float, "height": float} or None if not found.
+    """
+    if pid not in _pid_to_port:
+        raise RuntimeError(f"Unknown pid {pid}; no registered bench-ui window")
+    port = _pid_to_port[pid]
+    url = f"http://127.0.0.1:{port}/rect"
+    last: Dict[str, Any] = {}
+    for _ in range(30):  # ~3s total
+        resp = _post_json(url, {"selector": selector, "space": space})
+        last = resp or {}
+        rect = last.get("rect") if isinstance(last, dict) else None
+        err = last.get("error") if isinstance(last, dict) else None
+        if rect is not None:
+            return rect
+        if err in ("window_not_ready", "invalid_json"):
+            time.sleep(0.1)
+            continue
+        # If other transient errors, brief retry
+        if err:
+            time.sleep(0.1)
+            continue
+        time.sleep(0.1)
+    raise RuntimeError(f"Failed to get element rect: {last}")
+
+
+def execute_javascript(pid: int, javascript: str):
+    """Execute arbitrary JavaScript in the window and return its result.
+
+    Retries briefly while the window is still becoming ready.
+    """
+    if pid not in _pid_to_port:
+        raise RuntimeError(f"Unknown pid {pid}; no registered bench-ui window")
+    port = _pid_to_port[pid]
+    url = f"http://127.0.0.1:{port}/eval"
+    last: Dict[str, Any] = {}
+    for _ in range(30):  # ~3s total
+        resp = _post_json(url, {"javascript": javascript})
+        last = resp or {}
+        if isinstance(last, dict):
+            if "result" in last:
+                return last["result"]
+            if last.get("error") in ("window_not_ready", "invalid_json"):
+                time.sleep(0.1)
+                continue
+            if last.get("error"):
+                time.sleep(0.1)
+                continue
+        time.sleep(0.1)
+    raise RuntimeError(f"Failed to execute JavaScript: {last}")
--- a/libs/python/bench-ui/bench_ui/child.py
+++ b/libs/python/bench-ui/bench_ui/child.py
@@ -0,0 +1,174 @@
+import asyncio
+import json
+import os
+import random
+import socket
+import sys
+import threading
+from pathlib import Path
+from typing import Optional
+
+import webview
+from aiohttp import web
+
+
+def _get_free_port() -> int:
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("127.0.0.1", 0))
+        return s.getsockname()[1]
+
+
+def _start_http_server(window: webview.Window, port: int, ready_event: threading.Event):
+    async def rect_handler(request: web.Request):
+        try:
+            data = await request.json()
+        except Exception:
+            return web.json_response({"error": "invalid_json"}, status=400)
+        selector = data.get("selector")
+        space = data.get("space", "window")
+        if not isinstance(selector, str):
+            return web.json_response({"error": "selector_required"}, status=400)
+
+        # Ensure window content is loaded
+        if not ready_event.is_set():
+            # give it a short chance to finish loading
+            ready_event.wait(timeout=2.0)
+        if not ready_event.is_set():
+            return web.json_response({"error": "window_not_ready"}, status=409)
+
+        # Safely embed selector into JS
+        selector_js = json.dumps(selector)
+        if space == "screen":
+            # Compute approximate screen coordinates using window metrics
+            js = (
+                "(function(){"
+                f"const s = {selector_js};"
+                "const el = document.querySelector(s);"
+                "if(!el){return null;}"
+                "const r = el.getBoundingClientRect();"
+                "const sx = (window.screenX ?? window.screenLeft ?? 0);"
+                "const syRaw = (window.screenY ?? window.screenTop ?? 0);"
+                "const frameH = (window.outerHeight - window.innerHeight) || 0;"
+                "const sy = syRaw + frameH;"
+                "return {x:sx + r.left, y:sy + r.top, width:r.width, height:r.height};"
+                "})()"
+            )
+        else:
+            js = (
+                "(function(){"
+                f"const s = {selector_js};"
+                "const el = document.querySelector(s);"
+                "if(!el){return null;}"
+                "const r = el.getBoundingClientRect();"
+                "return {x:r.left,y:r.top,width:r.width,height:r.height};"
+                "})()"
+            )
+        try:
+            # Evaluate JS on the target window; this call is thread-safe in pywebview
+            result = window.evaluate_js(js)
+        except Exception as e:
+            return web.json_response({"error": str(e)}, status=500)
+        return web.json_response({"rect": result})
+
+    async def eval_handler(request: web.Request):
+        try:
+            data = await request.json()
+        except Exception:
+            return web.json_response({"error": "invalid_json"}, status=400)
+        code = data.get("javascript") or data.get("code")
+        if not isinstance(code, str):
+            return web.json_response({"error": "javascript_required"}, status=400)
+
+        if not ready_event.is_set():
+            ready_event.wait(timeout=2.0)
+        if not ready_event.is_set():
+            return web.json_response({"error": "window_not_ready"}, status=409)
+
+        try:
+            result = window.evaluate_js(code)
+        except Exception as e:
+            return web.json_response({"error": str(e)}, status=500)
+        return web.json_response({"result": result})
+
+    app = web.Application()
+    app.router.add_post("/rect", rect_handler)
+    app.router.add_post("/eval", eval_handler)
+
+    loop = asyncio.new_event_loop()
+
+    def run_loop():
+        asyncio.set_event_loop(loop)
+        runner = web.AppRunner(app)
+        loop.run_until_complete(runner.setup())
+        site = web.TCPSite(runner, "127.0.0.1", port)
+        loop.run_until_complete(site.start())
+        loop.run_forever()
+
+    t = threading.Thread(target=run_loop, daemon=True)
+    t.start()
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python -m bench_ui.child <config.json>", file=sys.stderr)
+        sys.exit(2)
+
+    cfg_path = Path(sys.argv[1])
+    cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
+
+    html: Optional[str] = cfg.get("html") or ""
+    url: Optional[str] = cfg.get("url")
+    title: str = cfg.get("title", "Window")
+    x: Optional[int] = cfg.get("x")
+    y: Optional[int] = cfg.get("y")
+    width: int = int(cfg.get("width", 600))
+    height: int = int(cfg.get("height", 400))
+    icon: Optional[str] = cfg.get("icon")
+    use_inner_size: bool = bool(cfg.get("use_inner_size", False))
+    title_bar_style: str = cfg.get("title_bar_style", "default")
+
+    # Create window
+    if url:
+        window = webview.create_window(
+            title,
+            url=url,
+            width=width,
+            height=height,
+            x=x,
+            y=y,
+            confirm_close=False,
+            text_select=True,
+            background_color="#FFFFFF",
+        )
+    else:
+        window = webview.create_window(
+            title,
+            html=html,
+            width=width,
+            height=height,
+            x=x,
+            y=y,
+            confirm_close=False,
+            text_select=True,
+            background_color="#FFFFFF",
+        )
+
+    # Track when the page is loaded so JS execution succeeds
+    window_ready = threading.Event()
+    def _on_loaded():
+        window_ready.set()
+    window.events.loaded += _on_loaded  # type: ignore[attr-defined]
+
+    # Start HTTP server for control
+    port = _get_free_port()
+    _start_http_server(window, port, window_ready)
+
+    # Print startup info for parent to read
+    print(json.dumps({"pid": os.getpid(), "port": port}), flush=True)
+
+    # Start GUI (blocking)
+    webview.start()
+
+
+if __name__ == "__main__":
+    main()
--- a/libs/python/bench-ui/examples/output_overlay.png
+++ b/libs/python/bench-ui/examples/output_overlay.png
--- a/libs/python/bench-ui/examples/simple_example.py
+++ b/libs/python/bench-ui/examples/simple_example.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+import time
+from bench_ui import launch_window, get_element_rect, execute_javascript
+from pathlib import Path
+
+HTML = """
+<!doctype html>
+<html>
+  <head>
+    <meta charset="utf-8" />
+    <title>Bench UI Example</title>
+    <style>
+      body { font-family: system-ui, sans-serif; margin: 24px; }
+      #target { width: 220px; height: 120px; background: #4f46e5; color: white; display: flex; align-items: center; justify-content: center; border-radius: 8px; }
+    </style>
+  </head>
+  <body>
+    <h1>Bench UI Example</h1>
+    <div id="target">Hello from pywebview</div>
+  </body>
+</html>
+"""
+
+def main():
+    # Launch a window with inline HTML content
+    pid = launch_window(
+        html=HTML,
+        title="Bench UI Example",
+        width=800,
+        height=600,
+    )
+    print(f"Launched window with PID: {pid}")
+
+    # Give the window a brief moment to render
+    time.sleep(1.0)
+
+    # Query the client rect of an element via CSS selector in SCREEN space
+    rect = get_element_rect(pid, "#target", space="screen")
+    print("Element rect (screen space):", rect)
+
+    # Take a screenshot and overlay the bbox
+    try:
+        from PIL import ImageGrab, ImageDraw
+
+        img = ImageGrab.grab()  # full screen
+        draw = ImageDraw.Draw(img)
+        x, y, w, h = rect["x"], rect["y"], rect["width"], rect["height"]
+        box = (x, y, x + w, y + h)
+        draw.rectangle(box, outline=(255, 0, 0), width=3)
+        out_path = Path(__file__).parent / "output_overlay.png"
+        img.save(out_path)
+        print(f"Saved overlay screenshot to: {out_path}")
+    except Exception as e:
+        print(f"Failed to capture/annotate screenshot: {e}")
+
+    # Execute arbitrary JavaScript
+    text = execute_javascript(pid, "document.querySelector('#t')?.textContent")
+    print("text:", text)
+
+
+if __name__ == "__main__":
+    main()
--- a/libs/python/bench-ui/pyproject.toml
+++ b/libs/python/bench-ui/pyproject.toml
@@ -0,0 +1,24 @@
+[build-system]
+requires = ["pdm-backend"]
+build-backend = "pdm.backend"
+
+[project]
+name = "cua-bench-ui"
+version = "0.1.0"
+description = "Lightweight webUI window launcher for CUA bench using pywebview"
+readme = "README.md"
+authors = [
+    { name = "TryCua", email = "gh@trycua.com" }
+]
+dependencies = [
+    "pywebview>=5.3",
+    "aiohttp>=3.9.0",
+]
+requires-python = ">=3.12"
+
+[tool.pdm]
+distribution = true
+
+[tool.pdm.build]
+includes = ["bench_ui/"]
+source-includes = ["README.md"]