mirror of
https://github.com/trycua/computer.git
synced 2026-01-03 03:49:58 -06:00
Merge pull request #333 from trycua/fix/passthrough-tool-errors
[Agent] Implement left_mouse_down, left_mouse_up, and tool errors
This commit is contained in:
@@ -94,14 +94,14 @@ def print_action(action_type: str, details: Dict[str, Any], total_cost: float):
|
||||
# Format action details
|
||||
args_str = ""
|
||||
if action_type == "click" and "x" in details and "y" in details:
|
||||
args_str = f"({details['x']}, {details['y']})"
|
||||
args_str = f"_{details['button']}({details['x']}, {details['y']})"
|
||||
elif action_type == "type" and "text" in details:
|
||||
text = details["text"]
|
||||
if len(text) > 50:
|
||||
text = text[:47] + "..."
|
||||
args_str = f'"{text}"'
|
||||
elif action_type == "key" and "key" in details:
|
||||
args_str = f"'{details['key']}'"
|
||||
args_str = f'("{text}")'
|
||||
elif action_type == "key" and "text" in details:
|
||||
args_str = f"('{details['text']}')"
|
||||
elif action_type == "scroll" and "x" in details and "y" in details:
|
||||
args_str = f"({details['x']}, {details['y']})"
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ Computer handler implementation for OpenAI computer-use-preview protocol.
|
||||
"""
|
||||
|
||||
import base64
|
||||
from typing import Dict, List, Any, Literal, Union
|
||||
from typing import Dict, List, Any, Literal, Union, Optional
|
||||
from .types import Computer
|
||||
|
||||
|
||||
@@ -14,11 +14,13 @@ class OpenAIComputerHandler:
|
||||
"""Initialize with a computer interface (from tool schema)."""
|
||||
self.interface = computer_interface
|
||||
|
||||
# ==== Computer-Use-Preview Action Space ====
|
||||
|
||||
async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
|
||||
"""Get the current environment type."""
|
||||
# For now, return a default - this could be enhanced to detect actual environment
|
||||
return "windows"
|
||||
|
||||
|
||||
async def get_dimensions(self) -> tuple[int, int]:
|
||||
"""Get screen dimensions as (width, height)."""
|
||||
screen_size = await self.interface.get_screen_size()
|
||||
@@ -94,6 +96,14 @@ class OpenAIComputerHandler:
|
||||
# For now, return empty string
|
||||
return ""
|
||||
|
||||
# ==== Anthropic Computer Action Space ====
|
||||
async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
||||
"""Left mouse down at coordinates."""
|
||||
await self.interface.mouse_down(x, y, button="left")
|
||||
|
||||
async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
||||
"""Left mouse up at coordinates."""
|
||||
await self.interface.mouse_up(x, y, button="left")
|
||||
|
||||
def acknowledge_safety_check_callback(message: str, allow_always: bool = False) -> bool:
|
||||
"""Safety check callback for user acknowledgment."""
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -206,6 +206,51 @@ def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallPar
|
||||
type="computer_call"
|
||||
)
|
||||
|
||||
# Extra anthropic computer calls
|
||||
def make_left_mouse_down_item(x: Optional[int] = None, y: Optional[int] = None, call_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
return {
|
||||
"id": random_id(),
|
||||
"call_id": call_id if call_id else random_id(),
|
||||
"action": {
|
||||
"type": "left_mouse_down",
|
||||
"x": x,
|
||||
"y": y
|
||||
},
|
||||
"pending_safety_checks": [],
|
||||
"status": "completed",
|
||||
"type": "computer_call"
|
||||
}
|
||||
|
||||
def make_left_mouse_up_item(x: Optional[int] = None, y: Optional[int] = None, call_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
return {
|
||||
"id": random_id(),
|
||||
"call_id": call_id if call_id else random_id(),
|
||||
"action": {
|
||||
"type": "left_mouse_up",
|
||||
"x": x,
|
||||
"y": y
|
||||
},
|
||||
"pending_safety_checks": [],
|
||||
"status": "completed",
|
||||
"type": "computer_call"
|
||||
}
|
||||
|
||||
def make_failed_tool_call_items(tool_name: str, tool_kwargs: Dict[str, Any], error_message: str, call_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
||||
call_id = call_id if call_id else random_id()
|
||||
return [
|
||||
{
|
||||
"type": "function_call",
|
||||
"id": random_id(),
|
||||
"call_id": call_id,
|
||||
"name": tool_name,
|
||||
"arguments": json.dumps(tool_kwargs),
|
||||
},
|
||||
{
|
||||
"type": "function_call_output",
|
||||
"call_id": call_id,
|
||||
"output": json.dumps({"error": error_message}),
|
||||
}
|
||||
]
|
||||
|
||||
# Conversion functions between element descriptions and coordinates
|
||||
def convert_computer_calls_desc2xy(responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple]) -> List[Dict[str, Any]]:
|
||||
|
||||
Reference in New Issue
Block a user