diff --git a/libs/python/bench-ui/README.md b/libs/python/bench-ui/README.md new file mode 100644 index 00000000..e57cc800 --- /dev/null +++ b/libs/python/bench-ui/README.md @@ -0,0 +1,26 @@ +# CUA Bench UI + +Lightweight webUI window controller for CUA bench environments using pywebview + +## Usage + +```python +from bench_ui import launch_window, get_element_rect, execute_javascript + +# Launch a window with inline HTML content +pid = launch_window(html="

Hello

") + +# Get element rect in screen space +rect = get_element_rect(pid, "h1", space="screen") +print(rect) + +# Execute arbitrary JavaScript +text = execute_javascript(pid, "document.querySelector('h1')?.textContent") +print(text) +``` + +## Installation + +```bash +pip install cua-bench-ui +``` diff --git a/libs/python/bench-ui/bench_ui/__init__.py b/libs/python/bench-ui/bench_ui/__init__.py new file mode 100644 index 00000000..3e730f77 --- /dev/null +++ b/libs/python/bench-ui/bench_ui/__init__.py @@ -0,0 +1,3 @@ +from .api import launch_window, get_element_rect, execute_javascript + +__all__ = ["launch_window", "get_element_rect", "execute_javascript"] diff --git a/libs/python/bench-ui/bench_ui/api.py b/libs/python/bench-ui/bench_ui/api.py new file mode 100644 index 00000000..aa04d8e5 --- /dev/null +++ b/libs/python/bench-ui/bench_ui/api.py @@ -0,0 +1,149 @@ +import json +import os +import subprocess +import sys +import tempfile +import time +from pathlib import Path +from typing import Optional, Dict, Any +from urllib import request +from urllib.error import HTTPError, URLError + +# Map child PID -> listening port +_pid_to_port: Dict[int, int] = {} + + +def _post_json(url: str, payload: Dict[str, Any]) -> Dict[str, Any]: + data = json.dumps(payload).encode("utf-8") + req = request.Request(url, data=data, headers={"Content-Type": "application/json"}, method="POST") + try: + with request.urlopen(req, timeout=5) as resp: + text = resp.read().decode("utf-8") + return json.loads(text) + except HTTPError as e: + try: + body = (e.read() or b"").decode("utf-8", errors="ignore") + return json.loads(body) + except Exception: + return {"error": "http_error", "status": getattr(e, 'code', None)} + except URLError as e: + return {"error": "url_error", "reason": str(e.reason)} + + +def launch_window( + url: Optional[str] = None, + *, + html: Optional[str] = None, + title: str = "Window", + x: Optional[int] = None, + y: Optional[int] = None, + width: int = 600, + height: int = 400, + icon: Optional[str] = None, + use_inner_size: bool = False, + title_bar_style: str = "default", +) -> int: + """Create a pywebview window in a child process and return its PID. + + Preferred input is a URL via the positional `url` parameter. + To load inline HTML instead, pass `html=...`. + + Spawns `python -m bench_ui.child` with a JSON config passed via a temp file. + The child prints a single JSON line: {"pid": , "port": }. + We cache pid->port for subsequent control calls like get_element_rect. + """ + if not url and not html: + raise ValueError("launch_window requires either a url or html") + + config = { + "url": url, + "html": html, + "title": title, + "x": x, + "y": y, + "width": width, + "height": height, + "icon": icon, + "use_inner_size": use_inner_size, + "title_bar_style": title_bar_style, + } + + with tempfile.NamedTemporaryFile("w", delete=False, suffix=".json") as f: + json.dump(config, f) + cfg_path = f.name + + try: + # Launch child process + proc = subprocess.Popen( + [sys.executable, "-m", "bench_ui.child", cfg_path], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + assert proc.stdout is not None + # Read first line with startup info + line = proc.stdout.readline().strip() + info = json.loads(line) + pid = int(info["pid"]) if "pid" in info else proc.pid + port = int(info["port"]) # required + _pid_to_port[pid] = port + return pid + finally: + try: + os.unlink(cfg_path) + except Exception: + pass + + +def get_element_rect(pid: int, selector: str, *, space: str = "window"): + """Ask the child process to compute element client rect via injected JS. + + Returns a dict like {"x": float, "y": float, "width": float, "height": float} or None if not found. + """ + if pid not in _pid_to_port: + raise RuntimeError(f"Unknown pid {pid}; no registered bench-ui window") + port = _pid_to_port[pid] + url = f"http://127.0.0.1:{port}/rect" + last: Dict[str, Any] = {} + for _ in range(30): # ~3s total + resp = _post_json(url, {"selector": selector, "space": space}) + last = resp or {} + rect = last.get("rect") if isinstance(last, dict) else None + err = last.get("error") if isinstance(last, dict) else None + if rect is not None: + return rect + if err in ("window_not_ready", "invalid_json"): + time.sleep(0.1) + continue + # If other transient errors, brief retry + if err: + time.sleep(0.1) + continue + time.sleep(0.1) + raise RuntimeError(f"Failed to get element rect: {last}") + + +def execute_javascript(pid: int, javascript: str): + """Execute arbitrary JavaScript in the window and return its result. + + Retries briefly while the window is still becoming ready. + """ + if pid not in _pid_to_port: + raise RuntimeError(f"Unknown pid {pid}; no registered bench-ui window") + port = _pid_to_port[pid] + url = f"http://127.0.0.1:{port}/eval" + last: Dict[str, Any] = {} + for _ in range(30): # ~3s total + resp = _post_json(url, {"javascript": javascript}) + last = resp or {} + if isinstance(last, dict): + if "result" in last: + return last["result"] + if last.get("error") in ("window_not_ready", "invalid_json"): + time.sleep(0.1) + continue + if last.get("error"): + time.sleep(0.1) + continue + time.sleep(0.1) + raise RuntimeError(f"Failed to execute JavaScript: {last}") diff --git a/libs/python/bench-ui/bench_ui/child.py b/libs/python/bench-ui/bench_ui/child.py new file mode 100644 index 00000000..1c19c12c --- /dev/null +++ b/libs/python/bench-ui/bench_ui/child.py @@ -0,0 +1,174 @@ +import asyncio +import json +import os +import random +import socket +import sys +import threading +from pathlib import Path +from typing import Optional + +import webview +from aiohttp import web + + +def _get_free_port() -> int: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("127.0.0.1", 0)) + return s.getsockname()[1] + + +def _start_http_server(window: webview.Window, port: int, ready_event: threading.Event): + async def rect_handler(request: web.Request): + try: + data = await request.json() + except Exception: + return web.json_response({"error": "invalid_json"}, status=400) + selector = data.get("selector") + space = data.get("space", "window") + if not isinstance(selector, str): + return web.json_response({"error": "selector_required"}, status=400) + + # Ensure window content is loaded + if not ready_event.is_set(): + # give it a short chance to finish loading + ready_event.wait(timeout=2.0) + if not ready_event.is_set(): + return web.json_response({"error": "window_not_ready"}, status=409) + + # Safely embed selector into JS + selector_js = json.dumps(selector) + if space == "screen": + # Compute approximate screen coordinates using window metrics + js = ( + "(function(){" + f"const s = {selector_js};" + "const el = document.querySelector(s);" + "if(!el){return null;}" + "const r = el.getBoundingClientRect();" + "const sx = (window.screenX ?? window.screenLeft ?? 0);" + "const syRaw = (window.screenY ?? window.screenTop ?? 0);" + "const frameH = (window.outerHeight - window.innerHeight) || 0;" + "const sy = syRaw + frameH;" + "return {x:sx + r.left, y:sy + r.top, width:r.width, height:r.height};" + "})()" + ) + else: + js = ( + "(function(){" + f"const s = {selector_js};" + "const el = document.querySelector(s);" + "if(!el){return null;}" + "const r = el.getBoundingClientRect();" + "return {x:r.left,y:r.top,width:r.width,height:r.height};" + "})()" + ) + try: + # Evaluate JS on the target window; this call is thread-safe in pywebview + result = window.evaluate_js(js) + except Exception as e: + return web.json_response({"error": str(e)}, status=500) + return web.json_response({"rect": result}) + + async def eval_handler(request: web.Request): + try: + data = await request.json() + except Exception: + return web.json_response({"error": "invalid_json"}, status=400) + code = data.get("javascript") or data.get("code") + if not isinstance(code, str): + return web.json_response({"error": "javascript_required"}, status=400) + + if not ready_event.is_set(): + ready_event.wait(timeout=2.0) + if not ready_event.is_set(): + return web.json_response({"error": "window_not_ready"}, status=409) + + try: + result = window.evaluate_js(code) + except Exception as e: + return web.json_response({"error": str(e)}, status=500) + return web.json_response({"result": result}) + + app = web.Application() + app.router.add_post("/rect", rect_handler) + app.router.add_post("/eval", eval_handler) + + loop = asyncio.new_event_loop() + + def run_loop(): + asyncio.set_event_loop(loop) + runner = web.AppRunner(app) + loop.run_until_complete(runner.setup()) + site = web.TCPSite(runner, "127.0.0.1", port) + loop.run_until_complete(site.start()) + loop.run_forever() + + t = threading.Thread(target=run_loop, daemon=True) + t.start() + + +def main(): + if len(sys.argv) < 2: + print("Usage: python -m bench_ui.child ", file=sys.stderr) + sys.exit(2) + + cfg_path = Path(sys.argv[1]) + cfg = json.loads(cfg_path.read_text(encoding="utf-8")) + + html: Optional[str] = cfg.get("html") or "" + url: Optional[str] = cfg.get("url") + title: str = cfg.get("title", "Window") + x: Optional[int] = cfg.get("x") + y: Optional[int] = cfg.get("y") + width: int = int(cfg.get("width", 600)) + height: int = int(cfg.get("height", 400)) + icon: Optional[str] = cfg.get("icon") + use_inner_size: bool = bool(cfg.get("use_inner_size", False)) + title_bar_style: str = cfg.get("title_bar_style", "default") + + # Create window + if url: + window = webview.create_window( + title, + url=url, + width=width, + height=height, + x=x, + y=y, + confirm_close=False, + text_select=True, + background_color="#FFFFFF", + ) + else: + window = webview.create_window( + title, + html=html, + width=width, + height=height, + x=x, + y=y, + confirm_close=False, + text_select=True, + background_color="#FFFFFF", + ) + + # Track when the page is loaded so JS execution succeeds + window_ready = threading.Event() + def _on_loaded(): + window_ready.set() + window.events.loaded += _on_loaded # type: ignore[attr-defined] + + # Start HTTP server for control + port = _get_free_port() + _start_http_server(window, port, window_ready) + + # Print startup info for parent to read + print(json.dumps({"pid": os.getpid(), "port": port}), flush=True) + + # Start GUI (blocking) + webview.start() + + +if __name__ == "__main__": + main() diff --git a/libs/python/bench-ui/examples/output_overlay.png b/libs/python/bench-ui/examples/output_overlay.png new file mode 100644 index 00000000..d7e3a493 Binary files /dev/null and b/libs/python/bench-ui/examples/output_overlay.png differ diff --git a/libs/python/bench-ui/examples/simple_example.py b/libs/python/bench-ui/examples/simple_example.py new file mode 100644 index 00000000..6a99abed --- /dev/null +++ b/libs/python/bench-ui/examples/simple_example.py @@ -0,0 +1,62 @@ +from __future__ import annotations +import time +from bench_ui import launch_window, get_element_rect, execute_javascript +from pathlib import Path + +HTML = """ + + + + + Bench UI Example + + + +

Bench UI Example

+
Hello from pywebview
+ + +""" + +def main(): + # Launch a window with inline HTML content + pid = launch_window( + html=HTML, + title="Bench UI Example", + width=800, + height=600, + ) + print(f"Launched window with PID: {pid}") + + # Give the window a brief moment to render + time.sleep(1.0) + + # Query the client rect of an element via CSS selector in SCREEN space + rect = get_element_rect(pid, "#target", space="screen") + print("Element rect (screen space):", rect) + + # Take a screenshot and overlay the bbox + try: + from PIL import ImageGrab, ImageDraw + + img = ImageGrab.grab() # full screen + draw = ImageDraw.Draw(img) + x, y, w, h = rect["x"], rect["y"], rect["width"], rect["height"] + box = (x, y, x + w, y + h) + draw.rectangle(box, outline=(255, 0, 0), width=3) + out_path = Path(__file__).parent / "output_overlay.png" + img.save(out_path) + print(f"Saved overlay screenshot to: {out_path}") + except Exception as e: + print(f"Failed to capture/annotate screenshot: {e}") + + # Execute arbitrary JavaScript + text = execute_javascript(pid, "document.querySelector('#t')?.textContent") + print("text:", text) + + +if __name__ == "__main__": + main() diff --git a/libs/python/bench-ui/pyproject.toml b/libs/python/bench-ui/pyproject.toml new file mode 100644 index 00000000..c37643a9 --- /dev/null +++ b/libs/python/bench-ui/pyproject.toml @@ -0,0 +1,24 @@ +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" + +[project] +name = "cua-bench-ui" +version = "0.1.0" +description = "Lightweight webUI window launcher for CUA bench using pywebview" +readme = "README.md" +authors = [ + { name = "TryCua", email = "gh@trycua.com" } +] +dependencies = [ + "pywebview>=5.3", + "aiohttp>=3.9.0", +] +requires-python = ">=3.12" + +[tool.pdm] +distribution = true + +[tool.pdm.build] +includes = ["bench_ui/"] +source-includes = ["README.md"]