From 6f5b7363f1794483446937c5e0372ccf69373073 Mon Sep 17 00:00:00 2001
From: Dillon DuPont <ddupont@mit.edu>
Date: Tue, 19 Aug 2025 12:27:09 -0400
Subject: [PATCH 1/3] Added ToolError and IllegalArgumentError

---
 libs/python/agent/agent/agent.py | 21 ++++++++++++++++++++-
 libs/python/agent/agent/types.py |  9 +++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py
index 7f30166f..81164e42 100644
--- a/libs/python/agent/agent/agent.py
+++ b/libs/python/agent/agent/agent.py
@@ -7,7 +7,12 @@ from typing import Dict, List, Any, Optional, AsyncGenerator, Union, cast, Calla
 
 from litellm.responses.utils import Usage
 
-from .types import Messages, AgentCapability
+from .types import (
+    Messages,
+    AgentCapability,
+    ToolError,
+    IllegalArgumentError
+)
 from .decorators import find_agent_config
 import json
 import litellm
@@ -30,6 +35,14 @@ from .computers import (
     make_computer_handler
 )
 
+def is_callable_with(f, *args, **kwargs):
+    """Check if function can be called with given arguments."""
+    try:
+        inspect.signature(f).bind(*args, **kwargs)
+        return True
+    except TypeError:
+        return False
+
 def get_json(obj: Any, max_depth: int = 10) -> Any:
     def custom_serializer(o: Any, depth: int = 0, seen: Optional[Set[int]] = None) -> Any:
         if seen is None:
@@ -439,6 +452,8 @@ class ComputerAgent:
             # Execute the computer action
             computer_method = getattr(computer, action_type, None)
             if computer_method:
+                if not is_callable_with(computer_method, **action_args):
+                    raise IllegalArgumentError(f"Invalid arguments for computer method {action_type}: {action_args}")
                 await computer_method(**action_args)
             else:
                 print(f"Unknown computer action: {action_type}")
@@ -493,6 +508,10 @@ class ComputerAgent:
         
             args = json.loads(item.get("arguments"))
 
+            # Validate arguments before execution
+            if not is_callable_with(function, **args):
+                raise IllegalArgumentError(f"Invalid arguments for function {item.get('name')}: {args}")
+
             # Execute function - use asyncio.to_thread for non-async functions
             if inspect.iscoroutinefunction(function):
                 result = await function(**args)
diff --git a/libs/python/agent/agent/types.py b/libs/python/agent/agent/types.py
index 23946c86..f47c9286 100644
--- a/libs/python/agent/agent/types.py
+++ b/libs/python/agent/agent/types.py
@@ -16,6 +16,15 @@ Tools = Optional[Iterable[ToolParam]]
 AgentResponse = ResponsesAPIResponse 
 AgentCapability = Literal["step", "click"]
 
+# Exception types
+class ToolError(RuntimeError):
+    """Base exception for tool-related errors"""
+    pass
+
+class IllegalArgumentError(ToolError):
+    """Exception raised when function arguments are invalid"""
+    pass
+
 
 # Agent config registration
 class AgentConfigInfo(BaseModel):

From c9e3af8f17677910a33436120d467608222c7527 Mon Sep 17 00:00:00 2001
From: Dillon DuPont <ddupont@mit.edu>
Date: Tue, 19 Aug 2025 12:41:37 -0400
Subject: [PATCH 2/3] Implemented computer call and tool call errors

---
 libs/python/agent/agent/agent.py     | 190 ++++++++++++++-------------
 libs/python/agent/agent/responses.py |  47 +++++++
 2 files changed, 145 insertions(+), 92 deletions(-)

diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py
index 81164e42..0c00346d 100644
--- a/libs/python/agent/agent/agent.py
+++ b/libs/python/agent/agent/agent.py
@@ -13,6 +13,7 @@ from .types import (
     ToolError,
     IllegalArgumentError
 )
+from .responses import make_tool_error_item, replace_failed_computer_calls_with_function_calls
 from .decorators import find_agent_config
 import json
 import litellm
@@ -418,7 +419,8 @@ class ComputerAgent:
     
     async def _handle_item(self, item: Any, computer: Optional[AsyncComputerHandler] = None, ignore_call_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
         """Handle each item; may cause a computer action + screenshot."""
-        if ignore_call_ids and item.get("call_id") and item.get("call_id") in ignore_call_ids:
+        call_id = item.get("call_id")
+        if ignore_call_ids and call_id and call_id in ignore_call_ids:
             return []
         
         item_type = item.get("type", None)
@@ -432,102 +434,105 @@ class ComputerAgent:
             #             print(content_item.get("text"))
             return []
         
-        if item_type == "computer_call":
-            await self._on_computer_call_start(item)
-            if not computer:
-                raise ValueError("Computer handler is required for computer calls")
+        try:
+            if item_type == "computer_call":
+                await self._on_computer_call_start(item)
+                if not computer:
+                    raise ValueError("Computer handler is required for computer calls")
 
-            # Perform computer actions
-            action = item.get("action")
-            action_type = action.get("type")
-            if action_type is None:
-                print(f"Action type cannot be `None`: action={action}, action_type={action_type}")
-                return []
+                # Perform computer actions
+                action = item.get("action")
+                action_type = action.get("type")
+                if action_type is None:
+                    print(f"Action type cannot be `None`: action={action}, action_type={action_type}")
+                    return []
+                
+                # Extract action arguments (all fields except 'type')
+                action_args = {k: v for k, v in action.items() if k != "type"}
+                
+                # print(f"{action_type}({action_args})")
+                
+                # Execute the computer action
+                computer_method = getattr(computer, action_type, None)
+                if computer_method:
+                    if not is_callable_with(computer_method, **action_args):
+                        raise IllegalArgumentError(f"Invalid arguments for computer method {action_type}: {action_args}")
+                    await computer_method(**action_args)
+                else:
+                    print(f"Unknown computer action: {action_type}")
+                    return []
+                
+                # Take screenshot after action
+                if self.screenshot_delay and self.screenshot_delay > 0:
+                    await asyncio.sleep(self.screenshot_delay)
+                screenshot_base64 = await computer.screenshot()
+                await self._on_screenshot(screenshot_base64, "screenshot_after")
+                
+                # Handle safety checks
+                pending_checks = item.get("pending_safety_checks", [])
+                acknowledged_checks = []
+                for check in pending_checks:
+                    check_message = check.get("message", str(check))
+                    acknowledged_checks.append(check)
+                    # TODO: implement a callback for safety checks
+                    # if acknowledge_safety_check_callback(check_message, allow_always=True):
+                    #     acknowledged_checks.append(check)
+                    # else:
+                    #     raise ValueError(f"Safety check failed: {check_message}")
+                
+                # Create call output
+                call_output = {
+                    "type": "computer_call_output",
+                    "call_id": item.get("call_id"),
+                    "acknowledged_safety_checks": acknowledged_checks,
+                    "output": {
+                        "type": "input_image",
+                        "image_url": f"data:image/png;base64,{screenshot_base64}",
+                    },
+                }
+                
+                # # Additional URL safety checks for browser environments
+                # if await computer.get_environment() == "browser":
+                #     current_url = await computer.get_current_url()
+                #     call_output["output"]["current_url"] = current_url
+                #     # TODO: implement a callback for URL safety checks
+                #     # check_blocklisted_url(current_url)
+                
+                result = [call_output]
+                await self._on_computer_call_end(item, result)
+                return result
             
-            # Extract action arguments (all fields except 'type')
-            action_args = {k: v for k, v in action.items() if k != "type"}
+            if item_type == "function_call":
+                await self._on_function_call_start(item)
+                # Perform function call
+                function = self._get_tool(item.get("name"))
+                if not function:
+                    raise ValueError(f"Function {item.get("name")} not found")
             
-            # print(f"{action_type}({action_args})")
-            
-            # Execute the computer action
-            computer_method = getattr(computer, action_type, None)
-            if computer_method:
-                if not is_callable_with(computer_method, **action_args):
-                    raise IllegalArgumentError(f"Invalid arguments for computer method {action_type}: {action_args}")
-                await computer_method(**action_args)
-            else:
-                print(f"Unknown computer action: {action_type}")
-                return []
-            
-            # Take screenshot after action
-            if self.screenshot_delay and self.screenshot_delay > 0:
-                await asyncio.sleep(self.screenshot_delay)
-            screenshot_base64 = await computer.screenshot()
-            await self._on_screenshot(screenshot_base64, "screenshot_after")
-            
-            # Handle safety checks
-            pending_checks = item.get("pending_safety_checks", [])
-            acknowledged_checks = []
-            for check in pending_checks:
-                check_message = check.get("message", str(check))
-                acknowledged_checks.append(check)
-                # TODO: implement a callback for safety checks
-                # if acknowledge_safety_check_callback(check_message, allow_always=True):
-                #     acknowledged_checks.append(check)
-                # else:
-                #     raise ValueError(f"Safety check failed: {check_message}")
-            
-            # Create call output
-            call_output = {
-                "type": "computer_call_output",
-                "call_id": item.get("call_id"),
-                "acknowledged_safety_checks": acknowledged_checks,
-                "output": {
-                    "type": "input_image",
-                    "image_url": f"data:image/png;base64,{screenshot_base64}",
-                },
-            }
-            
-            # # Additional URL safety checks for browser environments
-            # if await computer.get_environment() == "browser":
-            #     current_url = await computer.get_current_url()
-            #     call_output["output"]["current_url"] = current_url
-            #     # TODO: implement a callback for URL safety checks
-            #     # check_blocklisted_url(current_url)
-            
-            result = [call_output]
-            await self._on_computer_call_end(item, result)
-            return result
-        
-        if item_type == "function_call":
-            await self._on_function_call_start(item)
-            # Perform function call
-            function = self._get_tool(item.get("name"))
-            if not function:
-                raise ValueError(f"Function {item.get("name")} not found")
-        
-            args = json.loads(item.get("arguments"))
+                args = json.loads(item.get("arguments"))
 
-            # Validate arguments before execution
-            if not is_callable_with(function, **args):
-                raise IllegalArgumentError(f"Invalid arguments for function {item.get('name')}: {args}")
+                # Validate arguments before execution
+                if not is_callable_with(function, **args):
+                    raise IllegalArgumentError(f"Invalid arguments for function {item.get('name')}: {args}")
 
-            # Execute function - use asyncio.to_thread for non-async functions
-            if inspect.iscoroutinefunction(function):
-                result = await function(**args)
-            else:
-                result = await asyncio.to_thread(function, **args)
-        
-            # Create function call output
-            call_output = {
-                "type": "function_call_output",
-                "call_id": item.get("call_id"),
-                "output": str(result),
-            }
-        
-            result = [call_output]
-            await self._on_function_call_end(item, result)
-            return result
+                # Execute function - use asyncio.to_thread for non-async functions
+                if inspect.iscoroutinefunction(function):
+                    result = await function(**args)
+                else:
+                    result = await asyncio.to_thread(function, **args)
+            
+                # Create function call output
+                call_output = {
+                    "type": "function_call_output",
+                    "call_id": item.get("call_id"),
+                    "output": str(result),
+                }
+            
+                result = [call_output]
+                await self._on_function_call_end(item, result)
+                return result
+        except ToolError as e:
+            return [make_tool_error_item(repr(e), call_id)]
 
         return []
 
@@ -588,6 +593,7 @@ class ComputerAgent:
             # - PII anonymization
             # - Image retention policy
             combined_messages = old_items + new_items
+            combined_messages = replace_failed_computer_calls_with_function_calls(combined_messages)
             preprocessed_messages = await self._on_llm_start(combined_messages)
             
             loop_kwargs = {
diff --git a/libs/python/agent/agent/responses.py b/libs/python/agent/agent/responses.py
index fb034a70..34318bce 100644
--- a/libs/python/agent/agent/responses.py
+++ b/libs/python/agent/agent/responses.py
@@ -252,6 +252,53 @@ def make_failed_tool_call_items(tool_name: str, tool_kwargs: Dict[str, Any], err
         }
     ]
 
+def make_tool_error_item(error_message: str, call_id: Optional[str] = None) -> Dict[str, Any]:
+    call_id = call_id if call_id else random_id()
+    return {
+        "type": "function_call_output",
+        "call_id": call_id,
+        "output": json.dumps({"error": error_message}),
+    }
+
+def replace_failed_computer_calls_with_function_calls(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Replace computer_call items with function_call items if they share a call_id with a function_call_output.
+    This indicates the computer call failed and should be treated as a function call instead.
+    We do this because the computer_call_output items do not support text output.
+    
+    Args:
+        messages: List of message items to process
+    """
+    messages = messages.copy()
+
+    # Find all call_ids that have function_call_output items
+    failed_call_ids = set()
+    for msg in messages:
+        if msg.get("type") == "function_call_output":
+            call_id = msg.get("call_id")
+            if call_id:
+                failed_call_ids.add(call_id)
+    
+    # Replace computer_call items that have matching call_ids
+    for i, msg in enumerate(messages):
+        if (msg.get("type") == "computer_call" and 
+            msg.get("call_id") in failed_call_ids):
+            
+            # Extract action from computer_call
+            action = msg.get("action", {})
+            call_id = msg.get("call_id")
+            
+            # Create function_call replacement
+            messages[i] = {
+                "type": "function_call",
+                "id": msg.get("id", random_id()),
+                "call_id": call_id,
+                "name": "computer",
+                "arguments": json.dumps(action),
+            }
+    
+    return messages
+
 # Conversion functions between element descriptions and coordinates
 def convert_computer_calls_desc2xy(responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple]) -> List[Dict[str, Any]]:
     """

From 07346d566d3775c1c3cf803c4df12481e88c4fb2 Mon Sep 17 00:00:00 2001
From: Dillon DuPont <ddupont@mit.edu>
Date: Tue, 19 Aug 2025 15:53:59 -0400
Subject: [PATCH 3/3] Improved error message

---
 libs/python/agent/agent/agent.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py
index 0c00346d..0428b797 100644
--- a/libs/python/agent/agent/agent.py
+++ b/libs/python/agent/agent/agent.py
@@ -36,13 +36,14 @@ from .computers import (
     make_computer_handler
 )
 
-def is_callable_with(f, *args, **kwargs):
-    """Check if function can be called with given arguments."""
-    try:
-        inspect.signature(f).bind(*args, **kwargs)
-        return True
-    except TypeError:
-        return False
+def assert_callable_with(f, *args, **kwargs):
+   """Check if function can be called with given arguments."""
+   try:
+       inspect.signature(f).bind(*args, **kwargs)
+       return True
+   except TypeError as e:
+       sig = inspect.signature(f)
+       raise IllegalArgumentError(f"Expected {sig}, got args={args} kwargs={kwargs}") from e
 
 def get_json(obj: Any, max_depth: int = 10) -> Any:
     def custom_serializer(o: Any, depth: int = 0, seen: Optional[Set[int]] = None) -> Any:
@@ -455,8 +456,7 @@ class ComputerAgent:
                 # Execute the computer action
                 computer_method = getattr(computer, action_type, None)
                 if computer_method:
-                    if not is_callable_with(computer_method, **action_args):
-                        raise IllegalArgumentError(f"Invalid arguments for computer method {action_type}: {action_args}")
+                    assert_callable_with(computer_method, **action_args)
                     await computer_method(**action_args)
                 else:
                     print(f"Unknown computer action: {action_type}")
@@ -512,8 +512,7 @@ class ComputerAgent:
                 args = json.loads(item.get("arguments"))
 
                 # Validate arguments before execution
-                if not is_callable_with(function, **args):
-                    raise IllegalArgumentError(f"Invalid arguments for function {item.get('name')}: {args}")
+                assert_callable_with(function, **args)
 
                 # Execute function - use asyncio.to_thread for non-async functions
                 if inspect.iscoroutinefunction(function):