Reuse agent configuration for HUD evaluation

2026-02-17 20:10:07 -06:00 · 2025-09-12 14:49:54 -04:00
parent 48e42d2334
commit ea1caea73c
1 changed files with 10 additions and 8 deletions
--- a/notebooks/hud_hackathon.ipynb
+++ b/notebooks/hud_hackathon.ipynb
@@ -124,14 +124,16 @@
    "    verbosity=logging.INFO\n",
    ")\n",
    "\n",
+    "agent_config = {\n",
+    "    \"model\": \"openai/computer-use-preview\",\n",
+    "    \"tools\": [computer],\n",
+    "    \"trajectory_dir\": str(Path(\"trajectories\")),\n",
+    "    \"only_n_most_recent_images\": 3,\n",
+    "    \"verbosity\": logging.INFO\n",
+    "}\n",
+    "\n",
    "# Create agent\n",
-    "agent = ComputerAgent(\n",
-    "    model=\"openai/computer-use-preview\",\n",
-    "    tools=[computer],\n",
-    "    trajectory_dir=str(Path(\"trajectories\")),\n",
-    "    only_n_most_recent_images=3,\n",
-    "    verbosity=logging.INFO\n",
-    ")"
+    "agent = ComputerAgent(**agent_config)"
   ]
  },
  {
@@ -195,7 +197,7 @@
    "results = await run_full_dataset(\n",
    "    dataset=\"ddupont/OSWorld-Tiny-Public\",          # You can also pass a Dataset or a list[dict]\n",
    "    job_name=job_name,                   # Optional; defaults to a timestamp for custom datasets\n",
-    "    model=\"openai/computer-use-preview\", # Or any supported model string\n",
+    "    **agent_config,\n",
    "    max_concurrent=20,                   # Tune to your infra\n",
    "    max_steps=50,                        # Safety cap per task\n",
    "    #split=\"train[:5]\"                   # Limit to just 5 tasks\n",