switch demo to gpt-5 in eval_osworld.ipynb

This commit is contained in:
bowman
2025-10-06 20:27:01 -07:00
parent 1fe41d57f4
commit b0da328323

View File

@@ -39,6 +39,7 @@
"outputs": [],
"source": [
"from dotenv import load_dotenv\n",
"import os\n",
"\n",
"# Load environment variables from ../.env\n",
"load_dotenv(dotenv_path='../.env')\n",
@@ -47,6 +48,8 @@
"# - HUD_API_KEY (for HUD access)\n",
"# - ANTHROPIC_API_KEY (for Claude models)\n",
"# - OPENAI_API_KEY (for OpenAI models)\n",
"assert os.getenv('HUD_API_KEY') is not None\n",
"assert os.getenv('ANTHROPIC_API_KEY') is not None or os.getenv('OPENAI_API_KEY') is not None\n",
"\n",
"from pprint import pprint"
]
@@ -72,7 +75,7 @@
"# You can swap \"hud-evals/OSWorld-Verified\" -> \"hud-evals/SheetBench-V2\" to test SheetBench.\n",
"await run_single_task(\n",
" dataset=\"hud-evals/OSWorld-Verified\",\n",
" model=\"openai/computer-use-preview+openai/gpt-5-nano\", # or any supported model string\n",
" model=\"openai/computer-use-preview+openai/gpt-5\", # or any supported model string\n",
" task_id=155 # open last tab task (easy)\n",
")"
]