Added disable_response_storage

This commit is contained in:
Dillon DuPont
2025-07-21 12:07:54 -04:00
parent 96fd9cb98e
commit 9068ec32d8
8 changed files with 331 additions and 60 deletions

View File

@@ -29,6 +29,7 @@ class ComputerAgent:
trajectory_dir: str = "trajectories",
only_n_most_recent_images: Optional[int] = None,
verbosity: int = logging.INFO,
disable_response_storage: bool = False,
):
"""Initialize the ComputerAgent.
@@ -45,6 +46,7 @@ class ComputerAgent:
trajectory_dir: Directory to save the trajectory.
only_n_most_recent_images: Maximum number of recent screenshots to include in API requests.
verbosity: Logging level.
disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
"""
# Basic agent configuration
self.max_retries = max_retries
@@ -55,6 +57,7 @@ class ComputerAgent:
self._retry_count = 0
self._initialized = False
self._in_context = False
self.disable_response_storage = disable_response_storage
# Set logging level
logger.setLevel(verbosity)
@@ -105,6 +108,7 @@ class ComputerAgent:
trajectory_dir=trajectory_dir,
only_n_most_recent_images=only_n_most_recent_images,
provider_base_url=self.provider_base_url,
disable_response_storage=disable_response_storage,
)
except ValueError as e:
logger.error(f"Failed to create loop: {str(e)}")

View File

@@ -29,6 +29,7 @@ class BaseLoop(ABC):
save_trajectory: bool = True,
only_n_most_recent_images: Optional[int] = 2,
callback_handlers: Optional[List[CallbackHandler]] = None,
disable_response_storage: bool = False,
**kwargs,
):
"""Initialize base agent loop.
@@ -43,6 +44,7 @@ class BaseLoop(ABC):
base_dir: Base directory for saving experiment data
save_trajectory: Whether to save trajectory data
only_n_most_recent_images: Maximum number of recent screenshots to include in API requests
disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
**kwargs: Additional provider-specific arguments
"""
self.computer = computer
@@ -54,6 +56,7 @@ class BaseLoop(ABC):
self.base_dir = base_dir
self.save_trajectory = save_trajectory
self.only_n_most_recent_images = only_n_most_recent_images
self.disable_response_storage = disable_response_storage
self._kwargs = kwargs
# Initialize message manager

View File

@@ -30,6 +30,7 @@ class LoopFactory:
only_n_most_recent_images: Optional[int] = None,
acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
provider_base_url: Optional[str] = None,
disable_response_storage: bool = False,
) -> BaseLoop:
"""Create and return an appropriate loop instance based on type."""
if loop_type == AgentLoop.ANTHROPIC:
@@ -49,6 +50,7 @@ class LoopFactory:
save_trajectory=save_trajectory,
base_dir=trajectory_dir,
only_n_most_recent_images=only_n_most_recent_images,
disable_response_storage=disable_response_storage,
)
elif loop_type == AgentLoop.OPENAI:
# Lazy import OpenAILoop only when needed
@@ -68,6 +70,7 @@ class LoopFactory:
base_dir=trajectory_dir,
only_n_most_recent_images=only_n_most_recent_images,
acknowledge_safety_check_callback=acknowledge_safety_check_callback,
disable_response_storage=disable_response_storage,
)
elif loop_type == AgentLoop.OMNI:
# Lazy import OmniLoop and related classes only when needed
@@ -97,6 +100,7 @@ class LoopFactory:
only_n_most_recent_images=only_n_most_recent_images,
parser=OmniParser(),
provider_base_url=provider_base_url,
disable_response_storage=disable_response_storage,
)
elif loop_type == AgentLoop.UITARS:
# Lazy import UITARSLoop only when needed
@@ -117,6 +121,7 @@ class LoopFactory:
only_n_most_recent_images=only_n_most_recent_images,
provider_base_url=provider_base_url,
provider=provider,
disable_response_storage=disable_response_storage,
)
else:
raise ValueError(f"Unsupported loop type: {loop_type}")

View File

@@ -69,6 +69,44 @@ class StandardMessageManager:
return self._apply_image_retention(self.messages)
return self.messages
def add_openai_response(self, response: Dict[str, Any]) -> None:
"""Add OpenAI response output to message history.
This method extracts the output items from an OpenAI response and adds them
as assistant messages to maintain conversation state manually.
Args:
response: OpenAI API response containing output items
"""
if not isinstance(response, dict) or "output" not in response:
logger.warning("Invalid OpenAI response format for adding to message history")
return
output_items = response.get("output", [])
if not isinstance(output_items, list):
logger.warning("OpenAI response output is not a list")
return
# Convert output items to assistant message content
assistant_content = []
for item in output_items:
if not isinstance(item, dict):
continue
item_type = item.get("type")
if item_type == "output_text":
assistant_content.append({
"type": "text",
"text": item.get("text", "")
})
elif item_type == "computer_call":
# Keep computer calls as-is for tool execution tracking
assistant_content.append(item)
# Add as assistant message if we have content
if assistant_content:
self.add_assistant_message(assistant_content)
def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Apply image retention policy to messages.

View File

@@ -15,11 +15,12 @@ logger = logging.getLogger(__name__)
class OpenAIAPIHandler:
"""Handler for OpenAI API interactions."""
def __init__(self, loop: "OpenAILoop"):
def __init__(self, loop: "OpenAILoop", disable_response_storage: bool = False):
"""Initialize the API handler.
Args:
loop: OpenAI loop instance
disable_response_storage: Whether to disable response storage
"""
self.loop = loop
self.api_key = os.getenv("OPENAI_API_KEY")
@@ -45,7 +46,7 @@ class OpenAIAPIHandler:
display_width: str,
display_height: str,
previous_response_id: Optional[str] = None,
os_type: str,
os_type: str = "mac",
) -> Dict[str, Any]:
"""Send an initial request to the OpenAI API with a screenshot.
@@ -61,10 +62,7 @@ class OpenAIAPIHandler:
# Convert from our internal OS types to the ones OpenAI expects
if os_type == "macos":
os_type = "mac"
elif os_type == "linux":
os_type = "ubuntu"
if os_type not in ["mac", "windows", "ubuntu", "browser"]:
if os_type not in ["mac", "windows", "linux", "browser"]:
raise ValueError(f"Invalid OS type: {os_type}")
# Convert display dimensions to integers
@@ -143,7 +141,7 @@ class OpenAIAPIHandler:
],
"input": input_array,
"reasoning": {
"generate_summary": "concise",
"summary": "concise",
},
"truncation": "auto",
}
@@ -207,10 +205,8 @@ class OpenAIAPIHandler:
# Convert from our internal OS types to the ones OpenAI expects
if os_type == "macos":
os_type = "mac"
elif os_type == "linux":
os_type = "ubuntu"
if os_type not in ["mac", "windows", "ubuntu", "browser"]:
if os_type not in ["mac", "windows", "linux", "browser"]:
raise ValueError(f"Invalid OS type: {os_type}")
# Convert display dimensions to integers
@@ -289,6 +285,9 @@ class OpenAIAPIHandler:
},
}
],
"reasoning": {
"summary": "concise",
},
"truncation": "auto",
}

View File

@@ -40,6 +40,7 @@ class OpenAILoop(BaseLoop):
retry_delay: float = 1.0,
save_trajectory: bool = True,
acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
disable_response_storage: bool = False,
**kwargs,
):
"""Initialize the OpenAI loop.
@@ -54,6 +55,7 @@ class OpenAILoop(BaseLoop):
retry_delay: Delay between retries in seconds
save_trajectory: Whether to save trajectory data
acknowledge_safety_check_callback: Optional callback for safety check acknowledgment
disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
**kwargs: Additional provider-specific arguments
"""
# Always use computer-use-preview model
@@ -72,6 +74,7 @@ class OpenAILoop(BaseLoop):
base_dir=base_dir,
save_trajectory=save_trajectory,
only_n_most_recent_images=only_n_most_recent_images,
disable_response_storage=disable_response_storage,
**kwargs,
)
@@ -90,7 +93,7 @@ class OpenAILoop(BaseLoop):
self.loop_task = None # Store the loop task for cancellation
# Initialize handlers
self.api_handler = OpenAIAPIHandler(self)
self.api_handler = OpenAIAPIHandler(self, self.disable_response_storage)
self.response_handler = OpenAIResponseHandler(self)
# Initialize tool manager with callback
@@ -275,24 +278,47 @@ class OpenAILoop(BaseLoop):
# Call API
screen_size = await self.computer.interface.get_screen_size()
response = await self.api_handler.send_initial_request(
messages=self.message_manager.get_messages(), # Apply image retention policy
display_width=str(screen_size["width"]),
display_height=str(screen_size["height"]),
previous_response_id=self.last_response_id,
os_type=self.computer.os_type,
)
# Store response ID for next request
# OpenAI API response structure: the ID is in the response dictionary
if isinstance(response, dict) and "id" in response:
self.last_response_id = response["id"] # Update instance variable
logger.info(f"Received response with ID: {self.last_response_id}")
else:
logger.warning(
f"Could not find response ID in OpenAI response: {type(response)}"
# Choose API call method based on disable_response_storage setting
if self.disable_response_storage:
# Manual conversation state management - always send full message history
response = await self.api_handler.send_initial_request(
messages=self.message_manager.get_messages(), # Apply image retention policy
display_width=str(screen_size["width"]),
display_height=str(screen_size["height"]),
previous_response_id=None, # Don't use response chaining
os_type=self.computer.os_type,
)
# Don't reset last_response_id to None - keep the previous value if available
else:
# Use OpenAI's response storage with previous_response_id
response = await self.api_handler.send_initial_request(
messages=self.message_manager.get_messages(), # Apply image retention policy
display_width=str(screen_size["width"]),
display_height=str(screen_size["height"]),
previous_response_id=self.last_response_id,
os_type=self.computer.os_type,
)
from pprint import pprint
print("========== send_initial_request ===========")
pprint(response)
print("===========================================")
if self.disable_response_storage:
# Manual conversation state management - add response to message history
self.message_manager.add_openai_response(response)
else:
# Store response ID for next request
# OpenAI API response structure: the ID is in the response dictionary
if isinstance(response, dict) and "id" in response:
self.last_response_id = response["id"] # Update instance variable
logger.info(f"Received response with ID: {self.last_response_id}")
else:
logger.warning(
f"Could not find response ID in OpenAI response: {type(response)}"
)
# Don't reset last_response_id to None - keep the previous value if available
# Log standardized response for ease of parsing
@@ -393,27 +419,54 @@ class OpenAILoop(BaseLoop):
)
self.message_manager.add_user_message([computer_call_output])
# For follow-up requests with previous_response_id, we only need to send
# the computer_call_output, not the full message history
# The API handler will extract this from the message history
if isinstance(self.last_response_id, str):
response = await self.api_handler.send_computer_call_request(
# Choose API call method based on disable_response_storage setting
if self.disable_response_storage:
# Manual conversation state management - send full message history
response = await self.api_handler.send_initial_request(
messages=self.message_manager.get_messages(), # Apply image retention policy
display_width=str(screen_size["width"]),
display_height=str(screen_size["height"]),
previous_response_id=self.last_response_id, # Use instance variable
previous_response_id=None, # Don't use response chaining
os_type=self.computer.os_type,
)
# Store response ID for next request
if isinstance(response, dict) and "id" in response:
self.last_response_id = response["id"] # Update instance variable
logger.info(f"Received response with ID: {self.last_response_id}")
from pprint import pprint
print("========== send_initial_request (manual mode) ===========")
pprint(response)
print("========================================================")
# Add response to message history for manual state management
self.message_manager.add_openai_response(response)
else:
logger.warning(
f"Could not find response ID in OpenAI response: {type(response)}"
)
# Keep using the previous response ID if we can't find a new one
# Use OpenAI's response storage with previous_response_id
# For follow-up requests with previous_response_id, we only need to send
# the computer_call_output, not the full message history
# The API handler will extract this from the message history
if isinstance(self.last_response_id, str):
response = await self.api_handler.send_computer_call_request(
messages=self.message_manager.get_messages(), # Apply image retention policy
display_width=str(screen_size["width"]),
display_height=str(screen_size["height"]),
previous_response_id=self.last_response_id, # Use instance variable
os_type=self.computer.os_type,
)
from pprint import pprint
print("========== send_computer_call_request ===========")
pprint(response)
print("============================================")
# Store response ID for next request
if isinstance(response, dict) and "id" in response:
self.last_response_id = response["id"] # Update instance variable
logger.info(f"Received response with ID: {self.last_response_id}")
else:
logger.warning(
f"Could not find response ID in OpenAI response: {type(response)}"
)
# Keep using the previous response ID if we can't find a new one
# Process the response
# await self.response_handler.process_response(response, queue)
@@ -455,20 +508,3 @@ class OpenAILoop(BaseLoop):
}
)
await queue.put(None) # Signal that we're done
def get_last_response_id(self) -> Optional[str]:
"""Get the last response ID.
Returns:
The last response ID or None if no response has been received
"""
return self.last_response_id
def set_last_response_id(self, response_id: str) -> None:
"""Set the last response ID.
Args:
response_id: OpenAI response ID to set
"""
self.last_response_id = response_id
logger.info(f"Manually set response ID to: {self.last_response_id}")

View File

@@ -91,6 +91,9 @@ all = [
"mlx-vlm>=0.1.27; sys_platform == 'darwin'"
]
[tool.uv]
constraint-dependencies = ["fastrtc>0.43.0", "mlx-audio>0.2.3"]
[tool.pdm]
distribution = true