From 8dee77bf6897d446238045e68dea5e5b245a625a Mon Sep 17 00:00:00 2001
From: Dillon DuPont <ddupont@mit.edu>
Date: Wed, 27 Aug 2025 17:44:04 -0400
Subject: [PATCH] updated docs

---
 .../docs/agent-sdk/integrations/hud.mdx       | 44 +++++++++----------
 libs/python/agent/pyproject.toml              |  4 +-
 2 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/docs/content/docs/agent-sdk/integrations/hud.mdx b/docs/content/docs/agent-sdk/integrations/hud.mdx
index b517121e..cee5f77f 100644
--- a/docs/content/docs/agent-sdk/integrations/hud.mdx
+++ b/docs/content/docs/agent-sdk/integrations/hud.mdx
@@ -10,37 +10,35 @@ The HUD integration allows you to use ComputerAgent with the [HUD benchmarking f
 ```bash
 pip install "cua-agent[hud]"
 ## or install hud-python directly
-# pip install hud-python==0.2.10
+# pip install hud-python==0.4.12
 ```
 
 ## Usage
 
 ```python
-from agent.integrations.hud import run_job
-from hud import load_taskset
-from hud.taskset import TaskSet
-import logging
+# Quick single-task smoke test
+from agent.integrations.hud import run_single_task
 
-# Load taskset
-taskset = await load_taskset("OSWorld-Verified")
-taskset = TaskSet(tasks=taskset[:10]) # limit to 10 tasks instead of all 370
-
-# Run benchmark job
-job = await run_job(
-    model="openai/computer-use-preview",
-    # model="anthropic/claude-3-5-sonnet-20241022",
-    # model="huggingface-local/HelloKKMe/GTA1-7B+openai/gpt-5",
-    task_or_taskset=taskset,
-    job_name="test-computeragent-job",
-    max_concurrent_tasks=5,
-    # add any extra ComputerAgent kwargs:
-    verbosity=logging.INFO,  # Enable logging
-    # trajectory_dir=".."       # Save trajectories locally
+await run_single_task(
+    dataset="hud-evals/OSWorld-Verified-XLang",   # or another HUD dataset
+    model="openai/computer-use-preview+openai/gpt-5-nano",  # any supported model string
+    task_id=155,  # e.g., reopen last closed tab
 )
 
-# Get results OR view them at app.hud.so
-print(await job.get_analytics())
-print(f"View results at: https://app.hud.so/jobs/{job.id}")
+# Run a small split of OSWorld-Verified in parallel
+from agent.integrations.hud import run_full_dataset
+
+results = await run_full_dataset(
+    dataset="hud-evals/OSWorld-Verified-XLang",   # can also pass a Dataset or list[dict]
+    model="openai/computer-use-preview",
+    split="train[:3]",           # try a few tasks to start
+    max_concurrent=20,            # tune to your infra
+    max_steps=50                  # safety cap per task
+)
+
+# Environment variables required:
+# - HUD_API_KEY (HUD access)
+# - OPENAI_API_KEY or ANTHROPIC_API_KEY depending on your chosen model(s)
 ```
 
 **Available Benchmarks:**
diff --git a/libs/python/agent/pyproject.toml b/libs/python/agent/pyproject.toml
index 4dd27062..4be2f6b6 100644
--- a/libs/python/agent/pyproject.toml
+++ b/libs/python/agent/pyproject.toml
@@ -55,7 +55,7 @@ cli = [
     "yaspin>=3.1.0",
 ]
 hud = [
-    "hud-python==0.2.10",
+    "hud-python>=0.4.12,<0.5.0",
 ]
 all = [
     # omni requirements
@@ -72,7 +72,7 @@ all = [
     # cli requirements
     "yaspin>=3.1.0",
     # hud requirements
-    "hud-python==0.2.10",
+    "hud-python>=0.4.12,<0.5.0",
 ]
 
 [tool.uv]