mirror of
https://github.com/trycua/computer.git
synced 2026-01-08 14:30:25 -06:00
208 lines
6.5 KiB
Python
208 lines
6.5 KiB
Python
"""
|
|
Functions for making various Responses API items from different types of responses.
|
|
Based on the OpenAI spec for Responses API items.
|
|
"""
|
|
|
|
import base64
|
|
import json
|
|
import uuid
|
|
from typing import List, Dict, Any, Literal, Union, Optional
|
|
|
|
from openai.types.responses.response_computer_tool_call_param import (
|
|
ResponseComputerToolCallParam,
|
|
ActionClick,
|
|
ActionDoubleClick,
|
|
ActionDrag,
|
|
ActionDragPath,
|
|
ActionKeypress,
|
|
ActionMove,
|
|
ActionScreenshot,
|
|
ActionScroll,
|
|
ActionType as ActionTypeAction,
|
|
ActionWait,
|
|
PendingSafetyCheck
|
|
)
|
|
|
|
from openai.types.responses.response_function_tool_call_param import ResponseFunctionToolCallParam
|
|
from openai.types.responses.response_output_text_param import ResponseOutputTextParam
|
|
from openai.types.responses.response_reasoning_item_param import ResponseReasoningItemParam, Summary
|
|
from openai.types.responses.response_output_message_param import ResponseOutputMessageParam
|
|
from openai.types.responses.easy_input_message_param import EasyInputMessageParam
|
|
from openai.types.responses.response_input_image_param import ResponseInputImageParam
|
|
|
|
def random_id():
|
|
return str(uuid.uuid4())
|
|
|
|
# User message items
|
|
def make_input_image_item(image_data: Union[str, bytes]) -> EasyInputMessageParam:
|
|
return EasyInputMessageParam(
|
|
content=[
|
|
ResponseInputImageParam(
|
|
type="input_image",
|
|
image_url=f"data:image/png;base64,{base64.b64encode(image_data).decode('utf-8') if isinstance(image_data, bytes) else image_data}"
|
|
)
|
|
],
|
|
role="user",
|
|
type="message"
|
|
)
|
|
|
|
# Text items
|
|
def make_reasoning_item(reasoning: str) -> ResponseReasoningItemParam:
|
|
return ResponseReasoningItemParam(
|
|
id=random_id(),
|
|
summary=[
|
|
Summary(text=reasoning, type="summary_text")
|
|
],
|
|
type="reasoning"
|
|
)
|
|
|
|
def make_output_text_item(content: str) -> ResponseOutputMessageParam:
|
|
return ResponseOutputMessageParam(
|
|
id=random_id(),
|
|
content=[
|
|
ResponseOutputTextParam(
|
|
text=content,
|
|
type="output_text",
|
|
annotations=[]
|
|
)
|
|
],
|
|
role="assistant",
|
|
status="completed",
|
|
type="message"
|
|
)
|
|
|
|
# Function call items
|
|
def make_function_call_item(function_name: str, arguments: Dict[str, Any], call_id: Optional[str] = None) -> ResponseFunctionToolCallParam:
|
|
return ResponseFunctionToolCallParam(
|
|
id=random_id(),
|
|
call_id=call_id if call_id else random_id(),
|
|
name=function_name,
|
|
arguments=json.dumps(arguments),
|
|
status="completed",
|
|
type="function_call"
|
|
)
|
|
|
|
# Computer tool call items
|
|
def make_click_item(x: int, y: int, button: Literal["left", "right", "wheel", "back", "forward"] = "left", call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
return ResponseComputerToolCallParam(
|
|
id=random_id(),
|
|
call_id=call_id if call_id else random_id(),
|
|
action=ActionClick(
|
|
button=button,
|
|
type="click",
|
|
x=x,
|
|
y=y
|
|
),
|
|
pending_safety_checks=[],
|
|
status="completed",
|
|
type="computer_call"
|
|
)
|
|
|
|
def make_double_click_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
return ResponseComputerToolCallParam(
|
|
id=random_id(),
|
|
call_id=call_id if call_id else random_id(),
|
|
action=ActionDoubleClick(
|
|
type="double_click",
|
|
x=x,
|
|
y=y
|
|
),
|
|
pending_safety_checks=[],
|
|
status="completed",
|
|
type="computer_call"
|
|
)
|
|
|
|
def make_drag_item(path: List[Dict[str, int]], call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
drag_path = [ActionDragPath(x=point["x"], y=point["y"]) for point in path]
|
|
return ResponseComputerToolCallParam(
|
|
id=random_id(),
|
|
call_id=call_id if call_id else random_id(),
|
|
action=ActionDrag(
|
|
path=drag_path,
|
|
type="drag"
|
|
),
|
|
pending_safety_checks=[],
|
|
status="completed",
|
|
type="computer_call"
|
|
)
|
|
|
|
def make_keypress_item(keys: List[str], call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
return ResponseComputerToolCallParam(
|
|
id=random_id(),
|
|
call_id=call_id if call_id else random_id(),
|
|
action=ActionKeypress(
|
|
keys=keys,
|
|
type="keypress"
|
|
),
|
|
pending_safety_checks=[],
|
|
status="completed",
|
|
type="computer_call"
|
|
)
|
|
|
|
def make_move_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
return ResponseComputerToolCallParam(
|
|
id=random_id(),
|
|
call_id=call_id if call_id else random_id(),
|
|
action=ActionMove(
|
|
type="move",
|
|
x=x,
|
|
y=y
|
|
),
|
|
pending_safety_checks=[],
|
|
status="completed",
|
|
type="computer_call"
|
|
)
|
|
|
|
def make_screenshot_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
return ResponseComputerToolCallParam(
|
|
id=random_id(),
|
|
call_id=call_id if call_id else random_id(),
|
|
action=ActionScreenshot(
|
|
type="screenshot"
|
|
),
|
|
pending_safety_checks=[],
|
|
status="completed",
|
|
type="computer_call"
|
|
)
|
|
|
|
def make_scroll_item(x: int, y: int, scroll_x: int, scroll_y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
return ResponseComputerToolCallParam(
|
|
id=random_id(),
|
|
call_id=call_id if call_id else random_id(),
|
|
action=ActionScroll(
|
|
scroll_x=scroll_x,
|
|
scroll_y=scroll_y,
|
|
type="scroll",
|
|
x=x,
|
|
y=y
|
|
),
|
|
pending_safety_checks=[],
|
|
status="completed",
|
|
type="computer_call"
|
|
)
|
|
|
|
def make_type_item(text: str, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
return ResponseComputerToolCallParam(
|
|
id=random_id(),
|
|
call_id=call_id if call_id else random_id(),
|
|
action=ActionTypeAction(
|
|
text=text,
|
|
type="type"
|
|
),
|
|
pending_safety_checks=[],
|
|
status="completed",
|
|
type="computer_call"
|
|
)
|
|
|
|
def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
return ResponseComputerToolCallParam(
|
|
id=random_id(),
|
|
call_id=call_id if call_id else random_id(),
|
|
action=ActionWait(
|
|
type="wait"
|
|
),
|
|
pending_safety_checks=[],
|
|
status="completed",
|
|
type="computer_call"
|
|
)
|