Added disable_response_storage

This commit is contained in:
Dillon DuPont
2025-07-21 12:07:54 -04:00
parent 96fd9cb98e
commit 9068ec32d8
8 changed files with 331 additions and 60 deletions
+4
View File
@@ -29,6 +29,7 @@ class ComputerAgent:
trajectory_dir: str = "trajectories",
only_n_most_recent_images: Optional[int] = None,
verbosity: int = logging.INFO,
disable_response_storage: bool = False,
):
"""Initialize the ComputerAgent.
@@ -45,6 +46,7 @@ class ComputerAgent:
trajectory_dir: Directory to save the trajectory.
only_n_most_recent_images: Maximum number of recent screenshots to include in API requests.
verbosity: Logging level.
disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
"""
# Basic agent configuration
self.max_retries = max_retries
@@ -55,6 +57,7 @@ class ComputerAgent:
self._retry_count = 0
self._initialized = False
self._in_context = False
self.disable_response_storage = disable_response_storage
# Set logging level
logger.setLevel(verbosity)
@@ -105,6 +108,7 @@ class ComputerAgent:
trajectory_dir=trajectory_dir,
only_n_most_recent_images=only_n_most_recent_images,
provider_base_url=self.provider_base_url,
disable_response_storage=disable_response_storage,
)
except ValueError as e:
logger.error(f"Failed to create loop: {str(e)}")
+3
View File
@@ -29,6 +29,7 @@ class BaseLoop(ABC):
save_trajectory: bool = True,
only_n_most_recent_images: Optional[int] = 2,
callback_handlers: Optional[List[CallbackHandler]] = None,
disable_response_storage: bool = False,
**kwargs,
):
"""Initialize base agent loop.
@@ -43,6 +44,7 @@ class BaseLoop(ABC):
base_dir: Base directory for saving experiment data
save_trajectory: Whether to save trajectory data
only_n_most_recent_images: Maximum number of recent screenshots to include in API requests
disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
**kwargs: Additional provider-specific arguments
"""
self.computer = computer
@@ -54,6 +56,7 @@ class BaseLoop(ABC):
self.base_dir = base_dir
self.save_trajectory = save_trajectory
self.only_n_most_recent_images = only_n_most_recent_images
self.disable_response_storage = disable_response_storage
self._kwargs = kwargs
# Initialize message manager
+5
View File
@@ -30,6 +30,7 @@ class LoopFactory:
only_n_most_recent_images: Optional[int] = None,
acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
provider_base_url: Optional[str] = None,
disable_response_storage: bool = False,
) -> BaseLoop:
"""Create and return an appropriate loop instance based on type."""
if loop_type == AgentLoop.ANTHROPIC:
@@ -49,6 +50,7 @@ class LoopFactory:
save_trajectory=save_trajectory,
base_dir=trajectory_dir,
only_n_most_recent_images=only_n_most_recent_images,
disable_response_storage=disable_response_storage,
)
elif loop_type == AgentLoop.OPENAI:
# Lazy import OpenAILoop only when needed
@@ -68,6 +70,7 @@ class LoopFactory:
base_dir=trajectory_dir,
only_n_most_recent_images=only_n_most_recent_images,
acknowledge_safety_check_callback=acknowledge_safety_check_callback,
disable_response_storage=disable_response_storage,
)
elif loop_type == AgentLoop.OMNI:
# Lazy import OmniLoop and related classes only when needed
@@ -97,6 +100,7 @@ class LoopFactory:
only_n_most_recent_images=only_n_most_recent_images,
parser=OmniParser(),
provider_base_url=provider_base_url,
disable_response_storage=disable_response_storage,
)
elif loop_type == AgentLoop.UITARS:
# Lazy import UITARSLoop only when needed
@@ -117,6 +121,7 @@ class LoopFactory:
only_n_most_recent_images=only_n_most_recent_images,
provider_base_url=provider_base_url,
provider=provider,
disable_response_storage=disable_response_storage,
)
else:
raise ValueError(f"Unsupported loop type: {loop_type}")
+38
View File
@@ -69,6 +69,44 @@ class StandardMessageManager:
return self._apply_image_retention(self.messages)
return self.messages
def add_openai_response(self, response: Dict[str, Any]) -> None:
"""Add OpenAI response output to message history.
This method extracts the output items from an OpenAI response and adds them
as assistant messages to maintain conversation state manually.
Args:
response: OpenAI API response containing output items
"""
if not isinstance(response, dict) or "output" not in response:
logger.warning("Invalid OpenAI response format for adding to message history")
return
output_items = response.get("output", [])
if not isinstance(output_items, list):
logger.warning("OpenAI response output is not a list")
return
# Convert output items to assistant message content
assistant_content = []
for item in output_items:
if not isinstance(item, dict):
continue
item_type = item.get("type")
if item_type == "output_text":
assistant_content.append({
"type": "text",
"text": item.get("text", "")
})
elif item_type == "computer_call":
# Keep computer calls as-is for tool execution tracking
assistant_content.append(item)
# Add as assistant message if we have content
if assistant_content:
self.add_assistant_message(assistant_content)
def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Apply image retention policy to messages.
@@ -15,11 +15,12 @@ logger = logging.getLogger(__name__)
class OpenAIAPIHandler:
"""Handler for OpenAI API interactions."""
def __init__(self, loop: "OpenAILoop"):
def __init__(self, loop: "OpenAILoop", disable_response_storage: bool = False):
"""Initialize the API handler.
Args:
loop: OpenAI loop instance
disable_response_storage: Whether to disable response storage
"""
self.loop = loop
self.api_key = os.getenv("OPENAI_API_KEY")
@@ -45,7 +46,7 @@ class OpenAIAPIHandler:
display_width: str,
display_height: str,
previous_response_id: Optional[str] = None,
os_type: str,
os_type: str = "mac",
) -> Dict[str, Any]:
"""Send an initial request to the OpenAI API with a screenshot.
@@ -61,10 +62,7 @@ class OpenAIAPIHandler:
# Convert from our internal OS types to the ones OpenAI expects
if os_type == "macos":
os_type = "mac"
elif os_type == "linux":
os_type = "ubuntu"
if os_type not in ["mac", "windows", "ubuntu", "browser"]:
if os_type not in ["mac", "windows", "linux", "browser"]:
raise ValueError(f"Invalid OS type: {os_type}")
# Convert display dimensions to integers
@@ -143,7 +141,7 @@ class OpenAIAPIHandler:
],
"input": input_array,
"reasoning": {
"generate_summary": "concise",
"summary": "concise",
},
"truncation": "auto",
}
@@ -207,10 +205,8 @@ class OpenAIAPIHandler:
# Convert from our internal OS types to the ones OpenAI expects
if os_type == "macos":
os_type = "mac"
elif os_type == "linux":
os_type = "ubuntu"
if os_type not in ["mac", "windows", "ubuntu", "browser"]:
if os_type not in ["mac", "windows", "linux", "browser"]:
raise ValueError(f"Invalid OS type: {os_type}")
# Convert display dimensions to integers
@@ -289,6 +285,9 @@ class OpenAIAPIHandler:
},
}
],
"reasoning": {
"summary": "concise",
},
"truncation": "auto",
}
@@ -40,6 +40,7 @@ class OpenAILoop(BaseLoop):
retry_delay: float = 1.0,
save_trajectory: bool = True,
acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
disable_response_storage: bool = False,
**kwargs,
):
"""Initialize the OpenAI loop.
@@ -54,6 +55,7 @@ class OpenAILoop(BaseLoop):
retry_delay: Delay between retries in seconds
save_trajectory: Whether to save trajectory data
acknowledge_safety_check_callback: Optional callback for safety check acknowledgment
disable_response_storage: Whether to disable response storage on the provider side. Turn this on if you are participating in a Zero Data Retention policy.
**kwargs: Additional provider-specific arguments
"""
# Always use computer-use-preview model
@@ -72,6 +74,7 @@ class OpenAILoop(BaseLoop):
base_dir=base_dir,
save_trajectory=save_trajectory,
only_n_most_recent_images=only_n_most_recent_images,
disable_response_storage=disable_response_storage,
**kwargs,
)
@@ -90,7 +93,7 @@ class OpenAILoop(BaseLoop):
self.loop_task = None # Store the loop task for cancellation
# Initialize handlers
self.api_handler = OpenAIAPIHandler(self)
self.api_handler = OpenAIAPIHandler(self, self.disable_response_storage)
self.response_handler = OpenAIResponseHandler(self)
# Initialize tool manager with callback
@@ -275,24 +278,47 @@ class OpenAILoop(BaseLoop):
# Call API
screen_size = await self.computer.interface.get_screen_size()
response = await self.api_handler.send_initial_request(
messages=self.message_manager.get_messages(), # Apply image retention policy
display_width=str(screen_size["width"]),
display_height=str(screen_size["height"]),
previous_response_id=self.last_response_id,
os_type=self.computer.os_type,
)
# Store response ID for next request
# OpenAI API response structure: the ID is in the response dictionary
if isinstance(response, dict) and "id" in response:
self.last_response_id = response["id"] # Update instance variable
logger.info(f"Received response with ID: {self.last_response_id}")
else:
logger.warning(
f"Could not find response ID in OpenAI response: {type(response)}"
# Choose API call method based on disable_response_storage setting
if self.disable_response_storage:
# Manual conversation state management - always send full message history
response = await self.api_handler.send_initial_request(
messages=self.message_manager.get_messages(), # Apply image retention policy
display_width=str(screen_size["width"]),
display_height=str(screen_size["height"]),
previous_response_id=None, # Don't use response chaining
os_type=self.computer.os_type,
)
# Don't reset last_response_id to None - keep the previous value if available
else:
# Use OpenAI's response storage with previous_response_id
response = await self.api_handler.send_initial_request(
messages=self.message_manager.get_messages(), # Apply image retention policy
display_width=str(screen_size["width"]),
display_height=str(screen_size["height"]),
previous_response_id=self.last_response_id,
os_type=self.computer.os_type,
)
from pprint import pprint
print("========== send_initial_request ===========")
pprint(response)
print("===========================================")
if self.disable_response_storage:
# Manual conversation state management - add response to message history
self.message_manager.add_openai_response(response)
else:
# Store response ID for next request
# OpenAI API response structure: the ID is in the response dictionary
if isinstance(response, dict) and "id" in response:
self.last_response_id = response["id"] # Update instance variable
logger.info(f"Received response with ID: {self.last_response_id}")
else:
logger.warning(
f"Could not find response ID in OpenAI response: {type(response)}"
)
# Don't reset last_response_id to None - keep the previous value if available
# Log standardized response for ease of parsing
@@ -393,27 +419,54 @@ class OpenAILoop(BaseLoop):
)
self.message_manager.add_user_message([computer_call_output])
# For follow-up requests with previous_response_id, we only need to send
# the computer_call_output, not the full message history
# The API handler will extract this from the message history
if isinstance(self.last_response_id, str):
response = await self.api_handler.send_computer_call_request(
# Choose API call method based on disable_response_storage setting
if self.disable_response_storage:
# Manual conversation state management - send full message history
response = await self.api_handler.send_initial_request(
messages=self.message_manager.get_messages(), # Apply image retention policy
display_width=str(screen_size["width"]),
display_height=str(screen_size["height"]),
previous_response_id=self.last_response_id, # Use instance variable
previous_response_id=None, # Don't use response chaining
os_type=self.computer.os_type,
)
# Store response ID for next request
if isinstance(response, dict) and "id" in response:
self.last_response_id = response["id"] # Update instance variable
logger.info(f"Received response with ID: {self.last_response_id}")
from pprint import pprint
print("========== send_initial_request (manual mode) ===========")
pprint(response)
print("========================================================")
# Add response to message history for manual state management
self.message_manager.add_openai_response(response)
else:
logger.warning(
f"Could not find response ID in OpenAI response: {type(response)}"
)
# Keep using the previous response ID if we can't find a new one
# Use OpenAI's response storage with previous_response_id
# For follow-up requests with previous_response_id, we only need to send
# the computer_call_output, not the full message history
# The API handler will extract this from the message history
if isinstance(self.last_response_id, str):
response = await self.api_handler.send_computer_call_request(
messages=self.message_manager.get_messages(), # Apply image retention policy
display_width=str(screen_size["width"]),
display_height=str(screen_size["height"]),
previous_response_id=self.last_response_id, # Use instance variable
os_type=self.computer.os_type,
)
from pprint import pprint
print("========== send_computer_call_request ===========")
pprint(response)
print("============================================")
# Store response ID for next request
if isinstance(response, dict) and "id" in response:
self.last_response_id = response["id"] # Update instance variable
logger.info(f"Received response with ID: {self.last_response_id}")
else:
logger.warning(
f"Could not find response ID in OpenAI response: {type(response)}"
)
# Keep using the previous response ID if we can't find a new one
# Process the response
# await self.response_handler.process_response(response, queue)
@@ -455,20 +508,3 @@ class OpenAILoop(BaseLoop):
}
)
await queue.put(None) # Signal that we're done
def get_last_response_id(self) -> Optional[str]:
"""Get the last response ID.
Returns:
The last response ID or None if no response has been received
"""
return self.last_response_id
def set_last_response_id(self, response_id: str) -> None:
"""Set the last response ID.
Args:
response_id: OpenAI response ID to set
"""
self.last_response_id = response_id
logger.info(f"Manually set response ID to: {self.last_response_id}")
+3
View File
@@ -91,6 +91,9 @@ all = [
"mlx-vlm>=0.1.27; sys_platform == 'darwin'"
]
[tool.uv]
constraint-dependencies = ["fastrtc>0.43.0", "mlx-audio>0.2.3"]
[tool.pdm]
distribution = true
+183
View File
@@ -0,0 +1,183 @@
#!/bin/bash
# Exit on error
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Function to print step information
print_step() {
echo -e "${BLUE}==> $1${NC}"
}
# Function to print success message
print_success() {
echo -e "${GREEN}==> Success: $1${NC}"
}
# Function to print error message
print_error() {
echo -e "${RED}==> Error: $1${NC}" >&2
}
# Function to print warning message
print_warning() {
echo -e "${YELLOW}==> Warning: $1${NC}"
}
# Function to check if UV is installed
check_uv() {
if command -v uv &> /dev/null; then
print_success "UV is already installed"
uv --version
return 0
else
return 1
fi
}
# Function to install UV
install_uv() {
print_step "UV not found. Installing UV..."
# Detect OS
if [[ "$OSTYPE" == "linux-gnu"* ]] || [[ "$OSTYPE" == "darwin"* ]]; then
print_step "Installing UV for Unix-like system..."
curl -LsSf https://astral.sh/uv/install.sh | sh
# Add UV to PATH for current session
export PATH="$HOME/.cargo/bin:$PATH"
# Check if installation was successful
if command -v uv &> /dev/null; then
print_success "UV installed successfully"
uv --version
else
print_error "UV installation failed"
print_step "Please restart your terminal and try again, or install manually:"
echo " curl -LsSf https://astral.sh/uv/install.sh | sh"
exit 1
fi
elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]]; then
print_error "For Windows, please use PowerShell and run:"
echo " powershell -ExecutionPolicy ByPass -c \"irm https://astral.sh/uv/install.ps1 | iex\""
exit 1
else
print_error "Unsupported operating system: $OSTYPE"
print_step "Please install UV manually from: https://docs.astral.sh/uv/getting-started/installation/"
exit 1
fi
}
# Get the script's directory
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PROJECT_ROOT="$( cd "${SCRIPT_DIR}/.." && pwd )"
# Change to project root
cd "$PROJECT_ROOT"
# Check if UV is installed, install if not
if ! check_uv; then
install_uv
fi
# Load environment variables from .env.local
if [ -f .env.local ]; then
print_step "Loading environment variables from .env.local..."
set -a
source .env.local
set +a
print_success "Environment variables loaded"
else
print_error ".env.local file not found"
exit 1
fi
# Clean up existing environments and cache
print_step "Cleaning up existing environments..."
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
find . -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true
find . -type d -name "dist" -exec rm -rf {} + 2>/dev/null || true
find . -type d -name ".venv" -exec rm -rf {} + 2>/dev/null || true
find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true
print_success "Environment cleanup complete"
# Install Python 3.12 using UV
print_step "Installing Python 3.12 using UV..."
uv python install 3.12
print_success "Python 3.12 installed"
# Create virtual environment using UV
print_step "Creating virtual environment with UV..."
uv venv .venv --python 3.12
print_success "Virtual environment created"
# Activate virtual environment
print_step "Activating virtual environment..."
source .venv/bin/activate
print_success "Virtual environment activated"
# Function to install a package and its dependencies using UV
install_package() {
local package_dir=$1
local package_name=$2
local extras=$3
print_step "Installing ${package_name} with UV..."
cd "$package_dir"
if [ -f "pyproject.toml" ]; then
if [ -n "$extras" ]; then
uv pip install -e ".[${extras}]"
else
uv pip install -e .
fi
else
print_error "No pyproject.toml found in ${package_dir}"
return 1
fi
cd "$PROJECT_ROOT"
}
# Install packages in order of dependency
print_step "Installing packages in development mode with UV..."
# Install core first (base package with telemetry support)
install_package "libs/python/core" "core"
# Install pylume (base dependency)
install_package "libs/python/pylume" "pylume"
# Install computer with all its dependencies and extras
install_package "libs/python/computer" "computer" "all"
# Install omniparser
install_package "libs/python/som" "som"
# Install agent with all its dependencies and extras
install_package "libs/python/agent" "agent" "all"
# Install computer-server
install_package "libs/python/computer-server" "computer-server"
# Install mcp-server
install_package "libs/python/mcp-server" "mcp-server"
# Install development tools from root project
print_step "Installing development dependencies with UV..."
uv pip install -e ".[dev,test,docs]"
# Create a .env file for VS Code to use the virtual environment
print_step "Creating .env file for VS Code..."
echo "PYTHONPATH=${PROJECT_ROOT}/libs/python/core:${PROJECT_ROOT}/libs/python/computer:${PROJECT_ROOT}/libs/python/agent:${PROJECT_ROOT}/libs/python/som:${PROJECT_ROOT}/libs/python/pylume:${PROJECT_ROOT}/libs/python/computer-server:${PROJECT_ROOT}/libs/python/mcp-server" > .env
print_success "All packages installed successfully with UV!"
print_step "Your virtual environment is ready. To activate it:"
echo " source .venv/bin/activate"
print_step "UV provides fast dependency resolution and installation."
print_step "You can also use 'uv run' to run commands in the virtual environment without activation."