diff --git a/notebooks/eval_osworld.ipynb b/notebooks/eval_osworld.ipynb index 1bc58e48..27a56020 100644 --- a/notebooks/eval_osworld.ipynb +++ b/notebooks/eval_osworld.ipynb @@ -39,6 +39,7 @@ "outputs": [], "source": [ "from dotenv import load_dotenv\n", + "import os\n", "\n", "# Load environment variables from ../.env\n", "load_dotenv(dotenv_path='../.env')\n", @@ -47,6 +48,8 @@ "# - HUD_API_KEY (for HUD access)\n", "# - ANTHROPIC_API_KEY (for Claude models)\n", "# - OPENAI_API_KEY (for OpenAI models)\n", + "assert os.getenv('HUD_API_KEY') is not None\n", + "assert os.getenv('ANTHROPIC_API_KEY') is not None or os.getenv('OPENAI_API_KEY') is not None\n", "\n", "from pprint import pprint" ] @@ -72,7 +75,7 @@ "# You can swap \"hud-evals/OSWorld-Verified\" -> \"hud-evals/SheetBench-V2\" to test SheetBench.\n", "await run_single_task(\n", " dataset=\"hud-evals/OSWorld-Verified\",\n", - " model=\"openai/computer-use-preview+openai/gpt-5-nano\", # or any supported model string\n", + " model=\"openai/computer-use-preview+openai/gpt-5\", # or any supported model string\n", " task_id=155 # open last tab task (easy)\n", ")" ]