Added disable_response_storage

2026-02-17 11:58:59 -06:00 · 2025-07-21 12:07:54 -04:00
parent 96fd9cb98e
commit 9068ec32d8
8 changed files with 331 additions and 60 deletions
--- a/libs/python/agent/agent/core/agent.py
+++ b/libs/python/agent/agent/core/agent.py
@@ -29,6 +29,7 @@ class ComputerAgent:
        trajectory_dir: str = "trajectories",
        only_n_most_recent_images: Optional[int] = None,
        verbosity: int = logging.INFO,
+        disable_response_storage: bool = False,
    ):
        """Initialize the ComputerAgent.

@@ -45,6 +46,7 @@ class ComputerAgent:
            trajectory_dir: Directory to save the trajectory.
            only_n_most_recent_images: Maximum number of recent screenshots to include in API requests.
            verbosity: Logging level.
+            disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
        """
        # Basic agent configuration
        self.max_retries = max_retries
@@ -55,6 +57,7 @@ class ComputerAgent:
        self._retry_count = 0
        self._initialized = False
        self._in_context = False
+        self.disable_response_storage = disable_response_storage

        # Set logging level
        logger.setLevel(verbosity)
@@ -105,6 +108,7 @@ class ComputerAgent:
                trajectory_dir=trajectory_dir,
                only_n_most_recent_images=only_n_most_recent_images,
                provider_base_url=self.provider_base_url,
+                disable_response_storage=disable_response_storage,
            )
        except ValueError as e:
            logger.error(f"Failed to create loop: {str(e)}")
--- a/libs/python/agent/agent/core/base.py
+++ b/libs/python/agent/agent/core/base.py
@@ -29,6 +29,7 @@ class BaseLoop(ABC):
        save_trajectory: bool = True,
        only_n_most_recent_images: Optional[int] = 2,
        callback_handlers: Optional[List[CallbackHandler]] = None,
+        disable_response_storage: bool = False,
        **kwargs,
    ):
        """Initialize base agent loop.
@@ -43,6 +44,7 @@ class BaseLoop(ABC):
            base_dir: Base directory for saving experiment data
            save_trajectory: Whether to save trajectory data
            only_n_most_recent_images: Maximum number of recent screenshots to include in API requests
+            disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
            **kwargs: Additional provider-specific arguments
        """
        self.computer = computer
@@ -54,6 +56,7 @@ class BaseLoop(ABC):
        self.base_dir = base_dir
        self.save_trajectory = save_trajectory
        self.only_n_most_recent_images = only_n_most_recent_images
+        self.disable_response_storage = disable_response_storage
        self._kwargs = kwargs

        # Initialize message manager
--- a/libs/python/agent/agent/core/factory.py
+++ b/libs/python/agent/agent/core/factory.py
@@ -30,6 +30,7 @@ class LoopFactory:
        only_n_most_recent_images: Optional[int] = None,
        acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
        provider_base_url: Optional[str] = None,
+        disable_response_storage: bool = False,
    ) -> BaseLoop:
        """Create and return an appropriate loop instance based on type."""
        if loop_type == AgentLoop.ANTHROPIC:
@@ -49,6 +50,7 @@ class LoopFactory:
                save_trajectory=save_trajectory,
                base_dir=trajectory_dir,
                only_n_most_recent_images=only_n_most_recent_images,
+                disable_response_storage=disable_response_storage,
            )
        elif loop_type == AgentLoop.OPENAI:
            # Lazy import OpenAILoop only when needed
@@ -68,6 +70,7 @@ class LoopFactory:
                base_dir=trajectory_dir,
                only_n_most_recent_images=only_n_most_recent_images,
                acknowledge_safety_check_callback=acknowledge_safety_check_callback,
+                disable_response_storage=disable_response_storage,
            )
        elif loop_type == AgentLoop.OMNI:
            # Lazy import OmniLoop and related classes only when needed
@@ -97,6 +100,7 @@ class LoopFactory:
                only_n_most_recent_images=only_n_most_recent_images,
                parser=OmniParser(),
                provider_base_url=provider_base_url,
+                disable_response_storage=disable_response_storage,
            )
        elif loop_type == AgentLoop.UITARS:
            # Lazy import UITARSLoop only when needed
@@ -117,6 +121,7 @@ class LoopFactory:
                only_n_most_recent_images=only_n_most_recent_images,
                provider_base_url=provider_base_url,
                provider=provider,
+                disable_response_storage=disable_response_storage,
            )
        else:
            raise ValueError(f"Unsupported loop type: {loop_type}")
--- a/libs/python/agent/agent/core/messages.py
+++ b/libs/python/agent/agent/core/messages.py
@@ -69,6 +69,44 @@ class StandardMessageManager:
            return self._apply_image_retention(self.messages)
        return self.messages

+    def add_openai_response(self, response: Dict[str, Any]) -> None:
+        """Add OpenAI response output to message history.
+        
+        This method extracts the output items from an OpenAI response and adds them
+        as assistant messages to maintain conversation state manually.
+        
+        Args:
+            response: OpenAI API response containing output items
+        """
+        if not isinstance(response, dict) or "output" not in response:
+            logger.warning("Invalid OpenAI response format for adding to message history")
+            return
+            
+        output_items = response.get("output", [])
+        if not isinstance(output_items, list):
+            logger.warning("OpenAI response output is not a list")
+            return
+            
+        # Convert output items to assistant message content
+        assistant_content = []
+        for item in output_items:
+            if not isinstance(item, dict):
+                continue
+                
+            item_type = item.get("type")
+            if item_type == "output_text":
+                assistant_content.append({
+                    "type": "text",
+                    "text": item.get("text", "")
+                })
+            elif item_type == "computer_call":
+                # Keep computer calls as-is for tool execution tracking
+                assistant_content.append(item)
+        
+        # Add as assistant message if we have content
+        if assistant_content:
+            self.add_assistant_message(assistant_content)
+
    def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Apply image retention policy to messages.

--- a/libs/python/agent/agent/providers/openai/api_handler.py
+++ b/libs/python/agent/agent/providers/openai/api_handler.py
@@ -15,11 +15,12 @@ logger = logging.getLogger(__name__)
 class OpenAIAPIHandler:
    """Handler for OpenAI API interactions."""

-    def __init__(self, loop: "OpenAILoop"):
+    def __init__(self, loop: "OpenAILoop", disable_response_storage: bool = False):
        """Initialize the API handler.

        Args:
            loop: OpenAI loop instance
+            disable_response_storage: Whether to disable response storage
        """
        self.loop = loop
        self.api_key = os.getenv("OPENAI_API_KEY")
@@ -45,7 +46,7 @@ class OpenAIAPIHandler:
        display_width: str,
        display_height: str,
        previous_response_id: Optional[str] = None,
-        os_type: str,
+        os_type: str = "mac",
    ) -> Dict[str, Any]:
        """Send an initial request to the OpenAI API with a screenshot.

@@ -61,10 +62,7 @@ class OpenAIAPIHandler:
        # Convert from our internal OS types to the ones OpenAI expects
        if os_type == "macos":
            os_type = "mac"
-        elif os_type == "linux":
-            os_type = "ubuntu"
-        
-        if os_type not in ["mac", "windows", "ubuntu", "browser"]:
+        if os_type not in ["mac", "windows", "linux", "browser"]:
            raise ValueError(f"Invalid OS type: {os_type}")

        # Convert display dimensions to integers
@@ -143,7 +141,7 @@ class OpenAIAPIHandler:
            ],
            "input": input_array,
            "reasoning": {
-                "generate_summary": "concise",
+                "summary": "concise",
            },
            "truncation": "auto",
        }
@@ -207,10 +205,8 @@ class OpenAIAPIHandler:
        # Convert from our internal OS types to the ones OpenAI expects
        if os_type == "macos":
            os_type = "mac"
-        elif os_type == "linux":
-            os_type = "ubuntu"
        
-        if os_type not in ["mac", "windows", "ubuntu", "browser"]:
+        if os_type not in ["mac", "windows", "linux", "browser"]:
            raise ValueError(f"Invalid OS type: {os_type}")

        # Convert display dimensions to integers
@@ -289,6 +285,9 @@ class OpenAIAPIHandler:
                    },
                }
            ],
+            "reasoning": {
+                "summary": "concise",
+            },
            "truncation": "auto",
        }

--- a/libs/python/agent/agent/providers/openai/loop.py
+++ b/libs/python/agent/agent/providers/openai/loop.py
@@ -40,6 +40,7 @@ class OpenAILoop(BaseLoop):
        retry_delay: float = 1.0,
        save_trajectory: bool = True,
        acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
+        disable_response_storage: bool = False,
        **kwargs,
    ):
        """Initialize the OpenAI loop.
@@ -54,6 +55,7 @@ class OpenAILoop(BaseLoop):
            retry_delay: Delay between retries in seconds
            save_trajectory: Whether to save trajectory data
            acknowledge_safety_check_callback: Optional callback for safety check acknowledgment
+            disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
            **kwargs: Additional provider-specific arguments
        """
        # Always use computer-use-preview model
@@ -72,6 +74,7 @@ class OpenAILoop(BaseLoop):
            base_dir=base_dir,
            save_trajectory=save_trajectory,
            only_n_most_recent_images=only_n_most_recent_images,
+            disable_response_storage=disable_response_storage,
            **kwargs,
        )

@@ -90,7 +93,7 @@ class OpenAILoop(BaseLoop):
        self.loop_task = None  # Store the loop task for cancellation

        # Initialize handlers
-        self.api_handler = OpenAIAPIHandler(self)
+        self.api_handler = OpenAIAPIHandler(self, self.disable_response_storage)
        self.response_handler = OpenAIResponseHandler(self)

        # Initialize tool manager with callback
@@ -275,24 +278,47 @@ class OpenAILoop(BaseLoop):

                # Call API
                screen_size = await self.computer.interface.get_screen_size()
-                response = await self.api_handler.send_initial_request(
-                    messages=self.message_manager.get_messages(), # Apply image retention policy
-                    display_width=str(screen_size["width"]),
-                    display_height=str(screen_size["height"]),
-                    previous_response_id=self.last_response_id,
-                    os_type=self.computer.os_type,
-                )
-
-                # Store response ID for next request
-                # OpenAI API response structure: the ID is in the response dictionary
-                if isinstance(response, dict) and "id" in response:
-                    self.last_response_id = response["id"]  # Update instance variable
-                    logger.info(f"Received response with ID: {self.last_response_id}")
-                else:
-                    logger.warning(
-                        f"Could not find response ID in OpenAI response: {type(response)}"
+                
+                # Choose API call method based on disable_response_storage setting
+                if self.disable_response_storage:
+                    # Manual conversation state management - always send full message history
+                    response = await self.api_handler.send_initial_request(
+                        messages=self.message_manager.get_messages(), # Apply image retention policy
+                        display_width=str(screen_size["width"]),
+                        display_height=str(screen_size["height"]),
+                        previous_response_id=None,  # Don't use response chaining
+                        os_type=self.computer.os_type,
                    )
-                    # Don't reset last_response_id to None - keep the previous value if available
+                else:
+                    # Use OpenAI's response storage with previous_response_id
+                    response = await self.api_handler.send_initial_request(
+                        messages=self.message_manager.get_messages(), # Apply image retention policy
+                        display_width=str(screen_size["width"]),
+                        display_height=str(screen_size["height"]),
+                        previous_response_id=self.last_response_id,
+                        os_type=self.computer.os_type,
+                    )
+                    
+                from pprint import pprint
+
+                print("========== send_initial_request ===========")
+                pprint(response)
+                print("===========================================")
+
+                if self.disable_response_storage:
+                    # Manual conversation state management - add response to message history
+                    self.message_manager.add_openai_response(response)
+                else:
+                    # Store response ID for next request
+                    # OpenAI API response structure: the ID is in the response dictionary
+                    if isinstance(response, dict) and "id" in response:
+                        self.last_response_id = response["id"]  # Update instance variable
+                        logger.info(f"Received response with ID: {self.last_response_id}")
+                    else:
+                        logger.warning(
+                            f"Could not find response ID in OpenAI response: {type(response)}"
+                        )
+                        # Don't reset last_response_id to None - keep the previous value if available


                # Log standardized response for ease of parsing
@@ -393,27 +419,54 @@ class OpenAILoop(BaseLoop):
                        )
                        self.message_manager.add_user_message([computer_call_output])

-                        # For follow-up requests with previous_response_id, we only need to send
-                        # the computer_call_output, not the full message history
-                        # The API handler will extract this from the message history
-                        if isinstance(self.last_response_id, str):
-                            response = await self.api_handler.send_computer_call_request(
+                        # Choose API call method based on disable_response_storage setting
+                        if self.disable_response_storage:
+                            # Manual conversation state management - send full message history
+                            response = await self.api_handler.send_initial_request(
                                messages=self.message_manager.get_messages(), # Apply image retention policy
                                display_width=str(screen_size["width"]),
                                display_height=str(screen_size["height"]),
-                                previous_response_id=self.last_response_id,  # Use instance variable
+                                previous_response_id=None,  # Don't use response chaining
                                os_type=self.computer.os_type,
                            )
-
-                        # Store response ID for next request
-                        if isinstance(response, dict) and "id" in response:
-                            self.last_response_id = response["id"]  # Update instance variable
-                            logger.info(f"Received response with ID: {self.last_response_id}")
+                            
+                            from pprint import pprint
+                            
+                            print("========== send_initial_request (manual mode) ===========")
+                            pprint(response)
+                            print("========================================================")
+                            
+                            # Add response to message history for manual state management
+                            self.message_manager.add_openai_response(response)
                        else:
-                            logger.warning(
-                                f"Could not find response ID in OpenAI response: {type(response)}"
-                            )
-                            # Keep using the previous response ID if we can't find a new one
+                            # Use OpenAI's response storage with previous_response_id
+                            # For follow-up requests with previous_response_id, we only need to send
+                            # the computer_call_output, not the full message history
+                            # The API handler will extract this from the message history
+                            if isinstance(self.last_response_id, str):
+                                response = await self.api_handler.send_computer_call_request(
+                                    messages=self.message_manager.get_messages(), # Apply image retention policy
+                                    display_width=str(screen_size["width"]),
+                                    display_height=str(screen_size["height"]),
+                                    previous_response_id=self.last_response_id,  # Use instance variable
+                                    os_type=self.computer.os_type,
+                                )
+
+                                from pprint import pprint
+                                
+                                print("========== send_computer_call_request ===========")
+                                pprint(response)
+                                print("============================================")
+
+                            # Store response ID for next request
+                            if isinstance(response, dict) and "id" in response:
+                                self.last_response_id = response["id"]  # Update instance variable
+                                logger.info(f"Received response with ID: {self.last_response_id}")
+                            else:
+                                logger.warning(
+                                    f"Could not find response ID in OpenAI response: {type(response)}"
+                                )
+                                # Keep using the previous response ID if we can't find a new one

                        # Process the response
                        # await self.response_handler.process_response(response, queue)
@@ -455,20 +508,3 @@ class OpenAILoop(BaseLoop):
                }
            )
            await queue.put(None)  # Signal that we're done
-
-    def get_last_response_id(self) -> Optional[str]:
-        """Get the last response ID.
-
-        Returns:
-            The last response ID or None if no response has been received
-        """
-        return self.last_response_id
-
-    def set_last_response_id(self, response_id: str) -> None:
-        """Set the last response ID.
-
-        Args:
-            response_id: OpenAI response ID to set
-        """
-        self.last_response_id = response_id
-        logger.info(f"Manually set response ID to: {self.last_response_id}")
--- a/libs/python/agent/pyproject.toml
+++ b/libs/python/agent/pyproject.toml
@@ -91,6 +91,9 @@ all = [
    "mlx-vlm>=0.1.27; sys_platform == 'darwin'"
 ]

+[tool.uv]
+constraint-dependencies = ["fastrtc>0.43.0", "mlx-audio>0.2.3"]
+
 [tool.pdm]
 distribution = true