From b0da328323621298f74e5a0fc8673689cdd9b9e4 Mon Sep 17 00:00:00 2001 From: bowman Date: Mon, 6 Oct 2025 20:27:01 -0700 Subject: [PATCH] switch demo to gpt-5 in eval_osworld.ipynb --- notebooks/eval_osworld.ipynb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/notebooks/eval_osworld.ipynb b/notebooks/eval_osworld.ipynb index 1bc58e48..27a56020 100644 --- a/notebooks/eval_osworld.ipynb +++ b/notebooks/eval_osworld.ipynb @@ -39,6 +39,7 @@ "outputs": [], "source": [ "from dotenv import load_dotenv\n", + "import os\n", "\n", "# Load environment variables from ../.env\n", "load_dotenv(dotenv_path='../.env')\n", @@ -47,6 +48,8 @@ "# - HUD_API_KEY (for HUD access)\n", "# - ANTHROPIC_API_KEY (for Claude models)\n", "# - OPENAI_API_KEY (for OpenAI models)\n", + "assert os.getenv('HUD_API_KEY') is not None\n", + "assert os.getenv('ANTHROPIC_API_KEY') is not None or os.getenv('OPENAI_API_KEY') is not None\n", "\n", "from pprint import pprint" ] @@ -72,7 +75,7 @@ "# You can swap \"hud-evals/OSWorld-Verified\" -> \"hud-evals/SheetBench-V2\" to test SheetBench.\n", "await run_single_task(\n", " dataset=\"hud-evals/OSWorld-Verified\",\n", - " model=\"openai/computer-use-preview+openai/gpt-5-nano\", # or any supported model string\n", + " model=\"openai/computer-use-preview+openai/gpt-5\", # or any supported model string\n", " task_id=155 # open last tab task (easy)\n", ")" ]