Made coordinates optional for left_mouse_up/down

This commit is contained in:
Dillon DuPont
2025-08-07 16:47:34 -04:00
parent 1b406b197a
commit 4b0b07240c
3 changed files with 37 additions and 17 deletions

View File

@@ -3,7 +3,7 @@ Computer handler implementation for OpenAI computer-use-preview protocol.
"""
import base64
from typing import Dict, List, Any, Literal, Union
from typing import Dict, List, Any, Literal, Union, Optional
from .types import Computer
@@ -97,11 +97,11 @@ class OpenAIComputerHandler:
return ""
# ==== Anthropic Computer Action Space ====
async def left_mouse_down(self, x: int, y: int) -> None:
async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
"""Left mouse down at coordinates."""
await self.interface.mouse_down(x, y, button="left")
async def left_mouse_up(self, x: int, y: int) -> None:
async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
"""Left mouse up at coordinates."""
await self.interface.mouse_up(x, y, button="left")

View File

@@ -568,6 +568,26 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[
"action": "screenshot"
}
})
elif action_type == "left_mouse_down":
tool_use_content.append({
"type": "tool_use",
"id": call_id,
"name": "computer",
"input": {
"action": "left_mouse_down",
"coordinate": [action.get("x", None), action.get("y", None)]
}
})
elif action_type == "left_mouse_up":
tool_use_content.append({
"type": "tool_use",
"id": call_id,
"name": "computer",
"input": {
"action": "left_mouse_up",
"coordinate": [action.get("x", None), action.get("y", None)]
}
})
# Convert tool_use_content to OpenAI tool_calls format
openai_tool_calls = []
@@ -762,10 +782,10 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
# "y": coordinate[1] if len(coordinate) > 1 else 0
# }
# })
coordinate = tool_input.get("coordinate", [0, 0])
coordinate = tool_input.get("coordinate", [None, None])
responses_items.append(make_left_mouse_down_item(
x=coordinate[0] if len(coordinate) > 0 else 0,
y=coordinate[1] if len(coordinate) > 1 else 0,
x=coordinate[0] if len(coordinate) > 0 else None,
y=coordinate[1] if len(coordinate) > 1 else None,
call_id=call_id
))
elif action_type == "left_mouse_up":
@@ -780,10 +800,10 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
# "y": coordinate[1] if len(coordinate) > 1 else 0
# }
# })
coordinate = tool_input.get("coordinate", [0, 0])
coordinate = tool_input.get("coordinate", [None, None])
responses_items.append(make_left_mouse_up_item(
x=coordinate[0] if len(coordinate) > 0 else 0,
y=coordinate[1] if len(coordinate) > 1 else 0,
x=coordinate[0] if len(coordinate) > 0 else None,
y=coordinate[1] if len(coordinate) > 1 else None,
call_id=call_id
))
elif action_type == "hold_key":
@@ -1189,10 +1209,10 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
# "y": 280
# }
# }
coordinate = args.get("coordinate", [0, 0])
coordinate = args.get("coordinate", [None, None])
responses_items.append(make_left_mouse_down_item(
x=coordinate[0] if len(coordinate) > 0 else 0,
y=coordinate[1] if len(coordinate) > 1 else 0,
x=coordinate[0] if len(coordinate) > 0 else None,
y=coordinate[1] if len(coordinate) > 1 else None,
call_id=call_id
))
elif action_type == "left_mouse_up":
@@ -1220,10 +1240,10 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
# "y": 300
# }
# }
coordinate = args.get("coordinate", [0, 0])
coordinate = args.get("coordinate", [None, None])
responses_items.append(make_left_mouse_up_item(
x=coordinate[0] if len(coordinate) > 0 else 0,
y=coordinate[1] if len(coordinate) > 1 else 0,
x=coordinate[0] if len(coordinate) > 0 else None,
y=coordinate[1] if len(coordinate) > 1 else None,
call_id=call_id
))
elif action_type == "hold_key":

View File

@@ -207,7 +207,7 @@ def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallPar
)
# Extra anthropic computer calls
def make_left_mouse_down_item(x: int, y: int, call_id: Optional[str] = None) -> Dict[str, Any]:
def make_left_mouse_down_item(x: Optional[int] = None, y: Optional[int] = None, call_id: Optional[str] = None) -> Dict[str, Any]:
return {
"id": random_id(),
"call_id": call_id if call_id else random_id(),
@@ -221,7 +221,7 @@ def make_left_mouse_down_item(x: int, y: int, call_id: Optional[str] = None) ->
"type": "computer_call"
}
def make_left_mouse_up_item(x: int, y: int, call_id: Optional[str] = None) -> Dict[str, Any]:
def make_left_mouse_up_item(x: Optional[int] = None, y: Optional[int] = None, call_id: Optional[str] = None) -> Dict[str, Any]:
return {
"id": random_id(),
"call_id": call_id if call_id else random_id(),