diff --git a/libs/python/agent/agent/integrations/hud/__init__.py b/libs/python/agent/agent/integrations/hud/__init__.py
index 8a203e0e4..e27060ff0 100644
--- a/libs/python/agent/agent/integrations/hud/__init__.py
+++ b/libs/python/agent/agent/integrations/hud/__init__.py
@@ -11,6 +11,7 @@ Exports:
 import time
 from typing import Any, Optional
 
+from agent.computers import is_agent_computer
 from datasets import load_dataset, Dataset
 from hud.datasets import Task, run_dataset
 from hud import trace
@@ -55,6 +56,15 @@ async def run_single_task(
     sample_task = dataset[task_id]  # type: ignore[index]
     task_prompt = sample_task.get("prompt", f"Task {sample_task.get('id', 0)}")  # type: ignore[attr-defined]
 
+    # Filter any existing Computer tools
+    # The eval framework will add its own Computer tool per task
+    if tools:
+        tools = [
+            tool 
+            for tool in tools 
+            if not is_agent_computer(tool)
+        ]
+    
     with trace(name=task_prompt):
         task = Task(**sample_task)  # type: ignore[arg-type]
 
@@ -118,6 +128,15 @@ async def run_full_dataset(
         dataset_name = "custom"
         job_name = job_name or f"Evaluation {time.strftime('%H:%M %Y-%m-%d')}"
 
+    # Filter any existing Computer tools
+    # The eval framework will add its own Computer tool per task
+    if tools:
+        tools = [
+            tool 
+            for tool in tools 
+            if not is_agent_computer(tool)
+        ]
+    
     # Execute evaluation
     return await run_dataset(
         name=job_name,