mirror of
https://github.com/trycua/computer.git
synced 2026-01-05 12:59:58 -06:00
switch demo to gpt-5 in eval_osworld.ipynb
This commit is contained in:
@@ -39,6 +39,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from dotenv import load_dotenv\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Load environment variables from ../.env\n",
|
||||
"load_dotenv(dotenv_path='../.env')\n",
|
||||
@@ -47,6 +48,8 @@
|
||||
"# - HUD_API_KEY (for HUD access)\n",
|
||||
"# - ANTHROPIC_API_KEY (for Claude models)\n",
|
||||
"# - OPENAI_API_KEY (for OpenAI models)\n",
|
||||
"assert os.getenv('HUD_API_KEY') is not None\n",
|
||||
"assert os.getenv('ANTHROPIC_API_KEY') is not None or os.getenv('OPENAI_API_KEY') is not None\n",
|
||||
"\n",
|
||||
"from pprint import pprint"
|
||||
]
|
||||
@@ -72,7 +75,7 @@
|
||||
"# You can swap \"hud-evals/OSWorld-Verified\" -> \"hud-evals/SheetBench-V2\" to test SheetBench.\n",
|
||||
"await run_single_task(\n",
|
||||
" dataset=\"hud-evals/OSWorld-Verified\",\n",
|
||||
" model=\"openai/computer-use-preview+openai/gpt-5-nano\", # or any supported model string\n",
|
||||
" model=\"openai/computer-use-preview+openai/gpt-5\", # or any supported model string\n",
|
||||
" task_id=155 # open last tab task (easy)\n",
|
||||
")"
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user