From 8eb662bf4dee862ddaec2a7f71fdb7ff55b203eb Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 5 Aug 2025 12:45:00 -0400 Subject: [PATCH] added base models to benchmark --- libs/python/agent/benchmarks/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libs/python/agent/benchmarks/utils.py b/libs/python/agent/benchmarks/utils.py index aa99184f..d7ef4445 100644 --- a/libs/python/agent/benchmarks/utils.py +++ b/libs/python/agent/benchmarks/utils.py @@ -3,6 +3,9 @@ Shared utilities for ScreenSpot-Pro benchmarking and interactive testing. """ +import dotenv +dotenv.load_dotenv() + import asyncio import base64 import os @@ -85,9 +88,12 @@ def get_available_models() -> List[Union[str, ModelProtocol]]: models = [ # === ComputerAgent model strings === + "openai/computer-use-preview", + "anthropic/claude-opus-4-20250514", # f"{local_provider}HelloKKMe/GTA1-7B", # f"{local_provider}HelloKKMe/GTA1-32B", - "openai/computer-use-preview+openai/gpt-4o-mini" + "openai/computer-use-preview+openai/gpt-4o-mini", + "anthropic/claude-opus-4-20250514+openai/gpt-4o-mini", # === Reference model classes === # GTA1Model("HelloKKMe/GTA1-7B"),