mirror of
https://github.com/trycua/computer.git
synced 2026-05-19 15:38:48 -05:00
Add link to HUD integration documentation
This commit is contained in:
@@ -239,16 +239,17 @@
|
||||
"from pprint import pprint\n",
|
||||
"from agent.integrations.hud import run_full_dataset\n",
|
||||
"\n",
|
||||
"# Full dataset evaluation (runs via HUD's run_dataset under the hood)\n",
|
||||
"job_name = f\"osworld-test-{str(uuid.uuid4())[:4]}\"\n",
|
||||
"\n",
|
||||
"# Full dataset evaluation (runs via HUD's run_dataset under the hood)\n",
|
||||
"# See the documentation here: https://docs.trycua.com/docs/agent-sdk/integrations/hud#running-a-full-dataset\n",
|
||||
"results = await run_full_dataset(\n",
|
||||
" dataset=\"ddupont/OSWorld-Tiny-Public\", # You can also pass a Dataset or a list[dict]\n",
|
||||
" job_name=job_name, # Optional; defaults to a timestamp for custom datasets\n",
|
||||
" dataset=\"ddupont/OSWorld-Tiny-Public\",\n",
|
||||
" job_name=job_name,\n",
|
||||
" **agent_config,\n",
|
||||
" max_concurrent=20, # Tune to your infra\n",
|
||||
" max_steps=50, # Safety cap per task\n",
|
||||
" #split=\"train[:5]\" # Limit to just 5 tasks\n",
|
||||
" max_concurrent=20,\n",
|
||||
" max_steps=50,\n",
|
||||
" #split=\"train[:5]\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# results is a list from hud.datasets.run_dataset; inspect/aggregate as needed\n",
|
||||
|
||||
Reference in New Issue
Block a user