From 8eb662bf4dee862ddaec2a7f71fdb7ff55b203eb Mon Sep 17 00:00:00 2001
From: Dillon DuPont <ddupont@mit.edu>
Date: Tue, 5 Aug 2025 12:45:00 -0400
Subject: [PATCH] added base models to benchmark

---
 libs/python/agent/benchmarks/utils.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libs/python/agent/benchmarks/utils.py b/libs/python/agent/benchmarks/utils.py
index aa99184f..d7ef4445 100644
--- a/libs/python/agent/benchmarks/utils.py
+++ b/libs/python/agent/benchmarks/utils.py
@@ -3,6 +3,9 @@
 Shared utilities for ScreenSpot-Pro benchmarking and interactive testing.
 """
 
+import dotenv
+dotenv.load_dotenv()
+
 import asyncio
 import base64
 import os
@@ -85,9 +88,12 @@ def get_available_models() -> List[Union[str, ModelProtocol]]:
 
     models = [
         # === ComputerAgent model strings ===
+        "openai/computer-use-preview",
+        "anthropic/claude-opus-4-20250514",
         # f"{local_provider}HelloKKMe/GTA1-7B",
         # f"{local_provider}HelloKKMe/GTA1-32B",
-        "openai/computer-use-preview+openai/gpt-4o-mini"
+        "openai/computer-use-preview+openai/gpt-4o-mini",
+        "anthropic/claude-opus-4-20250514+openai/gpt-4o-mini",
         
         # === Reference model classes ===
         # GTA1Model("HelloKKMe/GTA1-7B"),