From 4ec4bbc888c7fba91c17f3a13e92f6ca9207a92b Mon Sep 17 00:00:00 2001 From: James Murdza Date: Fri, 12 Sep 2025 21:41:09 -0400 Subject: [PATCH] Add link to HUD integration documentation --- notebooks/hud_hackathon.ipynb | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/notebooks/hud_hackathon.ipynb b/notebooks/hud_hackathon.ipynb index b0e6e84cb..7192b0004 100644 --- a/notebooks/hud_hackathon.ipynb +++ b/notebooks/hud_hackathon.ipynb @@ -239,16 +239,17 @@ "from pprint import pprint\n", "from agent.integrations.hud import run_full_dataset\n", "\n", - "# Full dataset evaluation (runs via HUD's run_dataset under the hood)\n", "job_name = f\"osworld-test-{str(uuid.uuid4())[:4]}\"\n", "\n", + "# Full dataset evaluation (runs via HUD's run_dataset under the hood)\n", + "# See the documentation here: https://docs.trycua.com/docs/agent-sdk/integrations/hud#running-a-full-dataset\n", "results = await run_full_dataset(\n", - " dataset=\"ddupont/OSWorld-Tiny-Public\", # You can also pass a Dataset or a list[dict]\n", - " job_name=job_name, # Optional; defaults to a timestamp for custom datasets\n", + " dataset=\"ddupont/OSWorld-Tiny-Public\",\n", + " job_name=job_name,\n", " **agent_config,\n", - " max_concurrent=20, # Tune to your infra\n", - " max_steps=50, # Safety cap per task\n", - " #split=\"train[:5]\" # Limit to just 5 tasks\n", + " max_concurrent=20,\n", + " max_steps=50,\n", + " #split=\"train[:5]\"\n", ")\n", "\n", "# results is a list from hud.datasets.run_dataset; inspect/aggregate as needed\n",