mirror of
https://github.com/trycua/computer.git
synced 2026-01-03 03:49:58 -06:00
Reuse agent configuration for HUD evaluation
This commit is contained in:
@@ -124,14 +124,16 @@
|
||||
" verbosity=logging.INFO\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent_config = {\n",
|
||||
" \"model\": \"openai/computer-use-preview\",\n",
|
||||
" \"tools\": [computer],\n",
|
||||
" \"trajectory_dir\": str(Path(\"trajectories\")),\n",
|
||||
" \"only_n_most_recent_images\": 3,\n",
|
||||
" \"verbosity\": logging.INFO\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Create agent\n",
|
||||
"agent = ComputerAgent(\n",
|
||||
" model=\"openai/computer-use-preview\",\n",
|
||||
" tools=[computer],\n",
|
||||
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
||||
" only_n_most_recent_images=3,\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
")"
|
||||
"agent = ComputerAgent(**agent_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -195,7 +197,7 @@
|
||||
"results = await run_full_dataset(\n",
|
||||
" dataset=\"ddupont/OSWorld-Tiny-Public\", # You can also pass a Dataset or a list[dict]\n",
|
||||
" job_name=job_name, # Optional; defaults to a timestamp for custom datasets\n",
|
||||
" model=\"openai/computer-use-preview\", # Or any supported model string\n",
|
||||
" **agent_config,\n",
|
||||
" max_concurrent=20, # Tune to your infra\n",
|
||||
" max_steps=50, # Safety cap per task\n",
|
||||
" #split=\"train[:5]\" # Limit to just 5 tasks\n",
|
||||
|
||||
Reference in New Issue
Block a user