mirror of
https://github.com/trycua/computer.git
synced 2026-01-02 03:20:22 -06:00
Added disable_response_storage
This commit is contained in:
@@ -29,6 +29,7 @@ class ComputerAgent:
|
||||
trajectory_dir: str = "trajectories",
|
||||
only_n_most_recent_images: Optional[int] = None,
|
||||
verbosity: int = logging.INFO,
|
||||
disable_response_storage: bool = False,
|
||||
):
|
||||
"""Initialize the ComputerAgent.
|
||||
|
||||
@@ -45,6 +46,7 @@ class ComputerAgent:
|
||||
trajectory_dir: Directory to save the trajectory.
|
||||
only_n_most_recent_images: Maximum number of recent screenshots to include in API requests.
|
||||
verbosity: Logging level.
|
||||
disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
|
||||
"""
|
||||
# Basic agent configuration
|
||||
self.max_retries = max_retries
|
||||
@@ -55,6 +57,7 @@ class ComputerAgent:
|
||||
self._retry_count = 0
|
||||
self._initialized = False
|
||||
self._in_context = False
|
||||
self.disable_response_storage = disable_response_storage
|
||||
|
||||
# Set logging level
|
||||
logger.setLevel(verbosity)
|
||||
@@ -105,6 +108,7 @@ class ComputerAgent:
|
||||
trajectory_dir=trajectory_dir,
|
||||
only_n_most_recent_images=only_n_most_recent_images,
|
||||
provider_base_url=self.provider_base_url,
|
||||
disable_response_storage=disable_response_storage,
|
||||
)
|
||||
except ValueError as e:
|
||||
logger.error(f"Failed to create loop: {str(e)}")
|
||||
|
||||
@@ -29,6 +29,7 @@ class BaseLoop(ABC):
|
||||
save_trajectory: bool = True,
|
||||
only_n_most_recent_images: Optional[int] = 2,
|
||||
callback_handlers: Optional[List[CallbackHandler]] = None,
|
||||
disable_response_storage: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize base agent loop.
|
||||
@@ -43,6 +44,7 @@ class BaseLoop(ABC):
|
||||
base_dir: Base directory for saving experiment data
|
||||
save_trajectory: Whether to save trajectory data
|
||||
only_n_most_recent_images: Maximum number of recent screenshots to include in API requests
|
||||
disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
|
||||
**kwargs: Additional provider-specific arguments
|
||||
"""
|
||||
self.computer = computer
|
||||
@@ -54,6 +56,7 @@ class BaseLoop(ABC):
|
||||
self.base_dir = base_dir
|
||||
self.save_trajectory = save_trajectory
|
||||
self.only_n_most_recent_images = only_n_most_recent_images
|
||||
self.disable_response_storage = disable_response_storage
|
||||
self._kwargs = kwargs
|
||||
|
||||
# Initialize message manager
|
||||
|
||||
@@ -30,6 +30,7 @@ class LoopFactory:
|
||||
only_n_most_recent_images: Optional[int] = None,
|
||||
acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
|
||||
provider_base_url: Optional[str] = None,
|
||||
disable_response_storage: bool = False,
|
||||
) -> BaseLoop:
|
||||
"""Create and return an appropriate loop instance based on type."""
|
||||
if loop_type == AgentLoop.ANTHROPIC:
|
||||
@@ -49,6 +50,7 @@ class LoopFactory:
|
||||
save_trajectory=save_trajectory,
|
||||
base_dir=trajectory_dir,
|
||||
only_n_most_recent_images=only_n_most_recent_images,
|
||||
disable_response_storage=disable_response_storage,
|
||||
)
|
||||
elif loop_type == AgentLoop.OPENAI:
|
||||
# Lazy import OpenAILoop only when needed
|
||||
@@ -68,6 +70,7 @@ class LoopFactory:
|
||||
base_dir=trajectory_dir,
|
||||
only_n_most_recent_images=only_n_most_recent_images,
|
||||
acknowledge_safety_check_callback=acknowledge_safety_check_callback,
|
||||
disable_response_storage=disable_response_storage,
|
||||
)
|
||||
elif loop_type == AgentLoop.OMNI:
|
||||
# Lazy import OmniLoop and related classes only when needed
|
||||
@@ -97,6 +100,7 @@ class LoopFactory:
|
||||
only_n_most_recent_images=only_n_most_recent_images,
|
||||
parser=OmniParser(),
|
||||
provider_base_url=provider_base_url,
|
||||
disable_response_storage=disable_response_storage,
|
||||
)
|
||||
elif loop_type == AgentLoop.UITARS:
|
||||
# Lazy import UITARSLoop only when needed
|
||||
@@ -117,6 +121,7 @@ class LoopFactory:
|
||||
only_n_most_recent_images=only_n_most_recent_images,
|
||||
provider_base_url=provider_base_url,
|
||||
provider=provider,
|
||||
disable_response_storage=disable_response_storage,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported loop type: {loop_type}")
|
||||
|
||||
@@ -69,6 +69,44 @@ class StandardMessageManager:
|
||||
return self._apply_image_retention(self.messages)
|
||||
return self.messages
|
||||
|
||||
def add_openai_response(self, response: Dict[str, Any]) -> None:
|
||||
"""Add OpenAI response output to message history.
|
||||
|
||||
This method extracts the output items from an OpenAI response and adds them
|
||||
as assistant messages to maintain conversation state manually.
|
||||
|
||||
Args:
|
||||
response: OpenAI API response containing output items
|
||||
"""
|
||||
if not isinstance(response, dict) or "output" not in response:
|
||||
logger.warning("Invalid OpenAI response format for adding to message history")
|
||||
return
|
||||
|
||||
output_items = response.get("output", [])
|
||||
if not isinstance(output_items, list):
|
||||
logger.warning("OpenAI response output is not a list")
|
||||
return
|
||||
|
||||
# Convert output items to assistant message content
|
||||
assistant_content = []
|
||||
for item in output_items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
|
||||
item_type = item.get("type")
|
||||
if item_type == "output_text":
|
||||
assistant_content.append({
|
||||
"type": "text",
|
||||
"text": item.get("text", "")
|
||||
})
|
||||
elif item_type == "computer_call":
|
||||
# Keep computer calls as-is for tool execution tracking
|
||||
assistant_content.append(item)
|
||||
|
||||
# Add as assistant message if we have content
|
||||
if assistant_content:
|
||||
self.add_assistant_message(assistant_content)
|
||||
|
||||
def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Apply image retention policy to messages.
|
||||
|
||||
|
||||
@@ -15,11 +15,12 @@ logger = logging.getLogger(__name__)
|
||||
class OpenAIAPIHandler:
|
||||
"""Handler for OpenAI API interactions."""
|
||||
|
||||
def __init__(self, loop: "OpenAILoop"):
|
||||
def __init__(self, loop: "OpenAILoop", disable_response_storage: bool = False):
|
||||
"""Initialize the API handler.
|
||||
|
||||
Args:
|
||||
loop: OpenAI loop instance
|
||||
disable_response_storage: Whether to disable response storage
|
||||
"""
|
||||
self.loop = loop
|
||||
self.api_key = os.getenv("OPENAI_API_KEY")
|
||||
@@ -45,7 +46,7 @@ class OpenAIAPIHandler:
|
||||
display_width: str,
|
||||
display_height: str,
|
||||
previous_response_id: Optional[str] = None,
|
||||
os_type: str,
|
||||
os_type: str = "mac",
|
||||
) -> Dict[str, Any]:
|
||||
"""Send an initial request to the OpenAI API with a screenshot.
|
||||
|
||||
@@ -61,10 +62,7 @@ class OpenAIAPIHandler:
|
||||
# Convert from our internal OS types to the ones OpenAI expects
|
||||
if os_type == "macos":
|
||||
os_type = "mac"
|
||||
elif os_type == "linux":
|
||||
os_type = "ubuntu"
|
||||
|
||||
if os_type not in ["mac", "windows", "ubuntu", "browser"]:
|
||||
if os_type not in ["mac", "windows", "linux", "browser"]:
|
||||
raise ValueError(f"Invalid OS type: {os_type}")
|
||||
|
||||
# Convert display dimensions to integers
|
||||
@@ -143,7 +141,7 @@ class OpenAIAPIHandler:
|
||||
],
|
||||
"input": input_array,
|
||||
"reasoning": {
|
||||
"generate_summary": "concise",
|
||||
"summary": "concise",
|
||||
},
|
||||
"truncation": "auto",
|
||||
}
|
||||
@@ -207,10 +205,8 @@ class OpenAIAPIHandler:
|
||||
# Convert from our internal OS types to the ones OpenAI expects
|
||||
if os_type == "macos":
|
||||
os_type = "mac"
|
||||
elif os_type == "linux":
|
||||
os_type = "ubuntu"
|
||||
|
||||
if os_type not in ["mac", "windows", "ubuntu", "browser"]:
|
||||
if os_type not in ["mac", "windows", "linux", "browser"]:
|
||||
raise ValueError(f"Invalid OS type: {os_type}")
|
||||
|
||||
# Convert display dimensions to integers
|
||||
@@ -289,6 +285,9 @@ class OpenAIAPIHandler:
|
||||
},
|
||||
}
|
||||
],
|
||||
"reasoning": {
|
||||
"summary": "concise",
|
||||
},
|
||||
"truncation": "auto",
|
||||
}
|
||||
|
||||
|
||||
@@ -40,6 +40,7 @@ class OpenAILoop(BaseLoop):
|
||||
retry_delay: float = 1.0,
|
||||
save_trajectory: bool = True,
|
||||
acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
|
||||
disable_response_storage: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the OpenAI loop.
|
||||
@@ -54,6 +55,7 @@ class OpenAILoop(BaseLoop):
|
||||
retry_delay: Delay between retries in seconds
|
||||
save_trajectory: Whether to save trajectory data
|
||||
acknowledge_safety_check_callback: Optional callback for safety check acknowledgment
|
||||
disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
|
||||
**kwargs: Additional provider-specific arguments
|
||||
"""
|
||||
# Always use computer-use-preview model
|
||||
@@ -72,6 +74,7 @@ class OpenAILoop(BaseLoop):
|
||||
base_dir=base_dir,
|
||||
save_trajectory=save_trajectory,
|
||||
only_n_most_recent_images=only_n_most_recent_images,
|
||||
disable_response_storage=disable_response_storage,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -90,7 +93,7 @@ class OpenAILoop(BaseLoop):
|
||||
self.loop_task = None # Store the loop task for cancellation
|
||||
|
||||
# Initialize handlers
|
||||
self.api_handler = OpenAIAPIHandler(self)
|
||||
self.api_handler = OpenAIAPIHandler(self, self.disable_response_storage)
|
||||
self.response_handler = OpenAIResponseHandler(self)
|
||||
|
||||
# Initialize tool manager with callback
|
||||
@@ -275,24 +278,47 @@ class OpenAILoop(BaseLoop):
|
||||
|
||||
# Call API
|
||||
screen_size = await self.computer.interface.get_screen_size()
|
||||
response = await self.api_handler.send_initial_request(
|
||||
messages=self.message_manager.get_messages(), # Apply image retention policy
|
||||
display_width=str(screen_size["width"]),
|
||||
display_height=str(screen_size["height"]),
|
||||
previous_response_id=self.last_response_id,
|
||||
os_type=self.computer.os_type,
|
||||
)
|
||||
|
||||
# Store response ID for next request
|
||||
# OpenAI API response structure: the ID is in the response dictionary
|
||||
if isinstance(response, dict) and "id" in response:
|
||||
self.last_response_id = response["id"] # Update instance variable
|
||||
logger.info(f"Received response with ID: {self.last_response_id}")
|
||||
else:
|
||||
logger.warning(
|
||||
f"Could not find response ID in OpenAI response: {type(response)}"
|
||||
|
||||
# Choose API call method based on disable_response_storage setting
|
||||
if self.disable_response_storage:
|
||||
# Manual conversation state management - always send full message history
|
||||
response = await self.api_handler.send_initial_request(
|
||||
messages=self.message_manager.get_messages(), # Apply image retention policy
|
||||
display_width=str(screen_size["width"]),
|
||||
display_height=str(screen_size["height"]),
|
||||
previous_response_id=None, # Don't use response chaining
|
||||
os_type=self.computer.os_type,
|
||||
)
|
||||
# Don't reset last_response_id to None - keep the previous value if available
|
||||
else:
|
||||
# Use OpenAI's response storage with previous_response_id
|
||||
response = await self.api_handler.send_initial_request(
|
||||
messages=self.message_manager.get_messages(), # Apply image retention policy
|
||||
display_width=str(screen_size["width"]),
|
||||
display_height=str(screen_size["height"]),
|
||||
previous_response_id=self.last_response_id,
|
||||
os_type=self.computer.os_type,
|
||||
)
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
print("========== send_initial_request ===========")
|
||||
pprint(response)
|
||||
print("===========================================")
|
||||
|
||||
if self.disable_response_storage:
|
||||
# Manual conversation state management - add response to message history
|
||||
self.message_manager.add_openai_response(response)
|
||||
else:
|
||||
# Store response ID for next request
|
||||
# OpenAI API response structure: the ID is in the response dictionary
|
||||
if isinstance(response, dict) and "id" in response:
|
||||
self.last_response_id = response["id"] # Update instance variable
|
||||
logger.info(f"Received response with ID: {self.last_response_id}")
|
||||
else:
|
||||
logger.warning(
|
||||
f"Could not find response ID in OpenAI response: {type(response)}"
|
||||
)
|
||||
# Don't reset last_response_id to None - keep the previous value if available
|
||||
|
||||
|
||||
# Log standardized response for ease of parsing
|
||||
@@ -393,27 +419,54 @@ class OpenAILoop(BaseLoop):
|
||||
)
|
||||
self.message_manager.add_user_message([computer_call_output])
|
||||
|
||||
# For follow-up requests with previous_response_id, we only need to send
|
||||
# the computer_call_output, not the full message history
|
||||
# The API handler will extract this from the message history
|
||||
if isinstance(self.last_response_id, str):
|
||||
response = await self.api_handler.send_computer_call_request(
|
||||
# Choose API call method based on disable_response_storage setting
|
||||
if self.disable_response_storage:
|
||||
# Manual conversation state management - send full message history
|
||||
response = await self.api_handler.send_initial_request(
|
||||
messages=self.message_manager.get_messages(), # Apply image retention policy
|
||||
display_width=str(screen_size["width"]),
|
||||
display_height=str(screen_size["height"]),
|
||||
previous_response_id=self.last_response_id, # Use instance variable
|
||||
previous_response_id=None, # Don't use response chaining
|
||||
os_type=self.computer.os_type,
|
||||
)
|
||||
|
||||
# Store response ID for next request
|
||||
if isinstance(response, dict) and "id" in response:
|
||||
self.last_response_id = response["id"] # Update instance variable
|
||||
logger.info(f"Received response with ID: {self.last_response_id}")
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
print("========== send_initial_request (manual mode) ===========")
|
||||
pprint(response)
|
||||
print("========================================================")
|
||||
|
||||
# Add response to message history for manual state management
|
||||
self.message_manager.add_openai_response(response)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Could not find response ID in OpenAI response: {type(response)}"
|
||||
)
|
||||
# Keep using the previous response ID if we can't find a new one
|
||||
# Use OpenAI's response storage with previous_response_id
|
||||
# For follow-up requests with previous_response_id, we only need to send
|
||||
# the computer_call_output, not the full message history
|
||||
# The API handler will extract this from the message history
|
||||
if isinstance(self.last_response_id, str):
|
||||
response = await self.api_handler.send_computer_call_request(
|
||||
messages=self.message_manager.get_messages(), # Apply image retention policy
|
||||
display_width=str(screen_size["width"]),
|
||||
display_height=str(screen_size["height"]),
|
||||
previous_response_id=self.last_response_id, # Use instance variable
|
||||
os_type=self.computer.os_type,
|
||||
)
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
print("========== send_computer_call_request ===========")
|
||||
pprint(response)
|
||||
print("============================================")
|
||||
|
||||
# Store response ID for next request
|
||||
if isinstance(response, dict) and "id" in response:
|
||||
self.last_response_id = response["id"] # Update instance variable
|
||||
logger.info(f"Received response with ID: {self.last_response_id}")
|
||||
else:
|
||||
logger.warning(
|
||||
f"Could not find response ID in OpenAI response: {type(response)}"
|
||||
)
|
||||
# Keep using the previous response ID if we can't find a new one
|
||||
|
||||
# Process the response
|
||||
# await self.response_handler.process_response(response, queue)
|
||||
@@ -455,20 +508,3 @@ class OpenAILoop(BaseLoop):
|
||||
}
|
||||
)
|
||||
await queue.put(None) # Signal that we're done
|
||||
|
||||
def get_last_response_id(self) -> Optional[str]:
|
||||
"""Get the last response ID.
|
||||
|
||||
Returns:
|
||||
The last response ID or None if no response has been received
|
||||
"""
|
||||
return self.last_response_id
|
||||
|
||||
def set_last_response_id(self, response_id: str) -> None:
|
||||
"""Set the last response ID.
|
||||
|
||||
Args:
|
||||
response_id: OpenAI response ID to set
|
||||
"""
|
||||
self.last_response_id = response_id
|
||||
logger.info(f"Manually set response ID to: {self.last_response_id}")
|
||||
|
||||
@@ -91,6 +91,9 @@ all = [
|
||||
"mlx-vlm>=0.1.27; sys_platform == 'darwin'"
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
constraint-dependencies = ["fastrtc>0.43.0", "mlx-audio>0.2.3"]
|
||||
|
||||
[tool.pdm]
|
||||
distribution = true
|
||||
|
||||
|
||||
Reference in New Issue
Block a user