diff --git a/libs/python/agent/benchmarks/utils.py b/libs/python/agent/benchmarks/utils.py index aa99184f..d7ef4445 100644 --- a/libs/python/agent/benchmarks/utils.py +++ b/libs/python/agent/benchmarks/utils.py @@ -3,6 +3,9 @@ Shared utilities for ScreenSpot-Pro benchmarking and interactive testing. """ +import dotenv +dotenv.load_dotenv() + import asyncio import base64 import os @@ -85,9 +88,12 @@ def get_available_models() -> List[Union[str, ModelProtocol]]: models = [ # === ComputerAgent model strings === + "openai/computer-use-preview", + "anthropic/claude-opus-4-20250514", # f"{local_provider}HelloKKMe/GTA1-7B", # f"{local_provider}HelloKKMe/GTA1-32B", - "openai/computer-use-preview+openai/gpt-4o-mini" + "openai/computer-use-preview+openai/gpt-4o-mini", + "anthropic/claude-opus-4-20250514+openai/gpt-4o-mini", # === Reference model classes === # GTA1Model("HelloKKMe/GTA1-7B"),