From 876d42af0a7703afa4658daf1b769a9d989d96a8 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 29 Jul 2025 18:16:22 -0400 Subject: [PATCH 01/76] Replaced agent loop func with agent config class --- libs/python/agent/agent/agent.py | 67 ++- libs/python/agent/agent/cli.py | 8 +- libs/python/agent/agent/decorators.py | 89 ++-- libs/python/agent/agent/loops/anthropic.py | 175 ++++---- libs/python/agent/agent/loops/omniparser.py | 199 +++++---- libs/python/agent/agent/loops/openai.py | 160 ++++--- libs/python/agent/agent/loops/uitars.py | 456 ++++++++++++-------- libs/python/agent/agent/types.py | 12 +- 8 files changed, 695 insertions(+), 471 deletions(-) diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py index 0b9f243a..f117fe8b 100644 --- a/libs/python/agent/agent/agent.py +++ b/libs/python/agent/agent/agent.py @@ -3,12 +3,12 @@ ComputerAgent - Main agent class that selects and runs agent loops """ import asyncio -from typing import Dict, List, Any, Optional, AsyncGenerator, Union, cast, Callable, Set +from typing import Dict, List, Any, Optional, AsyncGenerator, Union, cast, Callable, Set, Tuple from litellm.responses.utils import Usage -from .types import Messages, Computer -from .decorators import find_agent_loop +from .types import Messages, Computer, AgentCapability +from .decorators import find_agent_config from .computer_handler import OpenAIComputerHandler, acknowledge_safety_check_callback, check_blocklisted_url import json import litellm @@ -213,13 +213,14 @@ class ComputerAgent: # Find the appropriate agent loop if custom_loop: self.agent_loop = custom_loop - self.agent_loop_info = None + self.agent_config_info = None else: - loop_info = find_agent_loop(model) - if not loop_info: - raise ValueError(f"No agent loop found for model: {model}") - self.agent_loop = loop_info.func - self.agent_loop_info = loop_info + config_info = find_agent_config(model) + if not config_info: + raise ValueError(f"No agent config found for model: {model}") + # Instantiate the agent config class + self.agent_loop = config_info.agent_class() + self.agent_config_info = config_info self.tool_schemas = [] self.computer_handler = None @@ -511,6 +512,9 @@ class ComputerAgent: Returns: AsyncGenerator that yields response chunks """ + capabilities = self.get_capabilities() + if "step" not in capabilities: + raise ValueError(f"Agent loop {self.agent_loop.__name__} does not support step predictions") await self._initialize_computers() @@ -555,7 +559,7 @@ class ComputerAgent: } # Run agent loop iteration - result = await self.agent_loop( + result = await self.agent_loop.predict_step( **loop_kwargs, _on_api_start=self._on_api_start, _on_api_end=self._on_api_end, @@ -591,4 +595,45 @@ class ComputerAgent: ) } - await self._on_run_end(loop_kwargs, old_items, new_items) \ No newline at end of file + await self._on_run_end(loop_kwargs, old_items, new_items) + + async def predict_click( + self, + instruction: str, + image_b64: Optional[str] = None + ) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates based on image and instruction. + + Args: + instruction: Instruction for where to click + image_b64: Base64 encoded image (optional, will take screenshot if not provided) + + Returns: + None or tuple with (x, y) coordinates + """ + capabilities = self.get_capabilities() + if "click" not in capabilities: + raise ValueError(f"Agent loop {self.agent_loop.__name__} does not support click predictions") + if hasattr(self.agent_loop, 'predict_click'): + if not self.computer_handler: + raise ValueError("Computer tool is required for predict_click") + if not image_b64: + image_b64 = await self.computer_handler.screenshot() + return await self.agent_loop.predict_click( + model=self.model, + image_b64=image_b64, + instruction=instruction + ) + return None + + def get_capabilities(self) -> List[AgentCapability]: + """ + Get list of capabilities supported by the current agent config. + + Returns: + List of capability strings (e.g., ["step", "click"]) + """ + if hasattr(self.agent_loop, 'get_capabilities'): + return self.agent_loop.get_capabilities() + return ["step"] # Default capability \ No newline at end of file diff --git a/libs/python/agent/agent/cli.py b/libs/python/agent/agent/cli.py index b5d97337..6d767fc3 100644 --- a/libs/python/agent/agent/cli.py +++ b/libs/python/agent/agent/cli.py @@ -260,7 +260,12 @@ Examples: help="Show total cost of the agent runs" ) - + parser.add_argument( + "-r", "--max-retries", + type=int, + default=3, + help="Maximum number of retries for the LLM API calls" + ) args = parser.parse_args() @@ -327,6 +332,7 @@ Examples: "model": args.model, "tools": [computer], "verbosity": 20 if args.verbose else 30, # DEBUG vs WARNING + "max_retries": args.max_retries } if args.images > 0: diff --git a/libs/python/agent/agent/decorators.py b/libs/python/agent/agent/decorators.py index 0b31c25a..7305b702 100644 --- a/libs/python/agent/agent/decorators.py +++ b/libs/python/agent/agent/decorators.py @@ -7,84 +7,51 @@ import inspect from typing import Dict, List, Any, Callable, Optional from functools import wraps -from .types import AgentLoopInfo +from .types import AgentConfigInfo +from .loops.base import AsyncAgentConfig # Global registry -_agent_loops: List[AgentLoopInfo] = [] +_agent_configs: List[AgentConfigInfo] = [] -def agent_loop(models: str, priority: int = 0): +def register_agent(models: str, priority: int = 0): """ - Decorator to register an agent loop function. + Decorator to register an AsyncAgentConfig class. Args: models: Regex pattern to match supported models - priority: Priority for loop selection (higher = more priority) + priority: Priority for agent selection (higher = more priority) """ - def decorator(func: Callable): - # Validate function signature - sig = inspect.signature(func) - required_params = {'messages', 'model'} - func_params = set(sig.parameters.keys()) + def decorator(agent_class: type): + # Validate that the class implements AsyncAgentConfig protocol + if not hasattr(agent_class, 'predict_step'): + raise ValueError(f"Agent class {agent_class.__name__} must implement predict_step method") + if not hasattr(agent_class, 'predict_click'): + raise ValueError(f"Agent class {agent_class.__name__} must implement predict_click method") + if not hasattr(agent_class, 'get_capabilities'): + raise ValueError(f"Agent class {agent_class.__name__} must implement get_capabilities method") - if not required_params.issubset(func_params): - missing = required_params - func_params - raise ValueError(f"Agent loop function must have parameters: {missing}") - - # Register the loop - loop_info = AgentLoopInfo( - func=func, + # Register the agent config + config_info = AgentConfigInfo( + agent_class=agent_class, models_regex=models, priority=priority ) - _agent_loops.append(loop_info) + _agent_configs.append(config_info) # Sort by priority (highest first) - _agent_loops.sort(key=lambda x: x.priority, reverse=True) + _agent_configs.sort(key=lambda x: x.priority, reverse=True) - @wraps(func) - async def wrapper(*args, **kwargs): - # Wrap the function in an asyncio.Queue for cancellation support - queue = asyncio.Queue() - task = None - - try: - # Create a task that can be cancelled - async def run_loop(): - try: - result = await func(*args, **kwargs) - await queue.put(('result', result)) - except Exception as e: - await queue.put(('error', e)) - - task = asyncio.create_task(run_loop()) - - # Wait for result or cancellation - event_type, data = await queue.get() - - if event_type == 'error': - raise data - return data - - except asyncio.CancelledError: - if task: - task.cancel() - try: - await task - except asyncio.CancelledError: - pass - raise - - return wrapper + return agent_class return decorator -def get_agent_loops() -> List[AgentLoopInfo]: - """Get all registered agent loops""" - return _agent_loops.copy() +def get_agent_configs() -> List[AgentConfigInfo]: + """Get all registered agent configs""" + return _agent_configs.copy() -def find_agent_loop(model: str) -> Optional[AgentLoopInfo]: - """Find the best matching agent loop for a model""" - for loop_info in _agent_loops: - if loop_info.matches_model(model): - return loop_info +def find_agent_config(model: str) -> Optional[AgentConfigInfo]: + """Find the best matching agent config for a model""" + for config_info in _agent_configs: + if config_info.matches_model(model): + return config_info return None diff --git a/libs/python/agent/agent/loops/anthropic.py b/libs/python/agent/agent/loops/anthropic.py index 02ac1c29..91021ffc 100644 --- a/libs/python/agent/agent/loops/anthropic.py +++ b/libs/python/agent/agent/loops/anthropic.py @@ -4,12 +4,13 @@ Anthropic hosted tools agent loop implementation using liteLLM import asyncio import json -from typing import Dict, List, Any, AsyncGenerator, Union, Optional +from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple import litellm from litellm.responses.litellm_completion_transformation.transformation import LiteLLMCompletionResponsesConfig -from ..decorators import agent_loop -from ..types import Messages, AgentResponse, Tools +from ..decorators import register_agent +from ..types import Messages, AgentResponse, Tools, AgentCapability +from ..loops.base import AsyncAgentConfig from ..responses import ( make_reasoning_item, make_output_text_item, @@ -1284,84 +1285,100 @@ def _merge_consecutive_text(content_list: List[Dict[str, Any]]) -> List[Dict[str return merged -@agent_loop(models=r".*claude-.*", priority=5) -async def anthropic_hosted_tools_loop( - messages: Messages, - model: str, - tools: Optional[List[Dict[str, Any]]] = None, - max_retries: Optional[int] = None, - stream: bool = False, - computer_handler=None, - use_prompt_caching: Optional[bool] = False, - _on_api_start=None, - _on_api_end=None, - _on_usage=None, - _on_screenshot=None, - **kwargs -) -> Union[AgentResponse, AsyncGenerator[Dict[str, Any], None]]: - """ - Anthropic hosted tools agent loop using liteLLM acompletion. +@register_agent(models=r".*claude-.*", priority=5) +class AnthropicHostedToolsConfig(AsyncAgentConfig): + """Anthropic hosted tools agent configuration implementing AsyncAgentConfig protocol.""" - Supports Anthropic's computer use models with hosted tools. - """ - tools = tools or [] - - # Get tool configuration for this model - tool_config = _get_tool_config_for_model(model) - - # Prepare tools for Anthropic API - anthropic_tools = _prepare_tools_for_anthropic(tools, model) - - # Convert responses_items messages to completion format - completion_messages = _convert_responses_items_to_completion_messages(messages) - if use_prompt_caching: - # First combine messages to reduce number of blocks - completion_messages = _combine_completion_messages(completion_messages) - # Then add cache control, anthropic requires explicit "cache_control" dicts - completion_messages = _add_cache_control(completion_messages) - - # Prepare API call kwargs - api_kwargs = { - "model": model, - "messages": completion_messages, - "tools": anthropic_tools if anthropic_tools else None, - "stream": stream, - "num_retries": max_retries, + async def predict_step( + self, + messages: Messages, + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + use_prompt_caching: Optional[bool] = False, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, **kwargs - } - - # Add beta header for computer use - if anthropic_tools: - api_kwargs["headers"] = { - "anthropic-beta": tool_config["beta_flag"] + ) -> Dict[str, Any]: + """ + Anthropic hosted tools agent loop using liteLLM acompletion. + + Supports Anthropic's computer use models with hosted tools. + """ + tools = tools or [] + + # Get tool configuration for this model + tool_config = _get_tool_config_for_model(model) + + # Prepare tools for Anthropic API + anthropic_tools = _prepare_tools_for_anthropic(tools, model) + + # Convert responses_items messages to completion format + completion_messages = _convert_responses_items_to_completion_messages(messages) + if use_prompt_caching: + # First combine messages to reduce number of blocks + completion_messages = _combine_completion_messages(completion_messages) + # Then add cache control, anthropic requires explicit "cache_control" dicts + completion_messages = _add_cache_control(completion_messages) + + # Prepare API call kwargs + api_kwargs = { + "model": model, + "messages": completion_messages, + "tools": anthropic_tools if anthropic_tools else None, + "stream": stream, + "num_retries": max_retries, + **kwargs + } + + # Add beta header for computer use + if anthropic_tools: + api_kwargs["headers"] = { + "anthropic-beta": tool_config["beta_flag"] + } + + # Call API start hook + if _on_api_start: + await _on_api_start(api_kwargs) + + # Use liteLLM acompletion + response = await litellm.acompletion(**api_kwargs) + + # Call API end hook + if _on_api_end: + await _on_api_end(api_kwargs, response) + + # Convert response to responses_items format + responses_items = _convert_completion_to_responses_items(response) + + # Extract usage information + responses_usage = { + **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(response.usage).model_dump(), + "response_cost": response._hidden_params.get("response_cost", 0.0), + } + if _on_usage: + await _on_usage(responses_usage) + + # Return in AsyncAgentConfig format + return { + "output": responses_items, + "usage": responses_usage } - # Call API start hook - if _on_api_start: - await _on_api_start(api_kwargs) + async def predict_click( + self, + model: str, + image_b64: str, + instruction: str, + **kwargs + ) -> Optional[Tuple[float, float]]: + """Anthropic hosted tools does not support click prediction.""" + return None - # Use liteLLM acompletion - response = await litellm.acompletion(**api_kwargs) - - # Call API end hook - if _on_api_end: - await _on_api_end(api_kwargs, response) - - # Convert response to responses_items format - responses_items = _convert_completion_to_responses_items(response) - - # Extract usage information - responses_usage = { - **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(response.usage).model_dump(), - "response_cost": response._hidden_params.get("response_cost", 0.0), - } - if _on_usage: - await _on_usage(responses_usage) - - # Create agent response - agent_response = { - "output": responses_items, - "usage": responses_usage - } - - return agent_response + def get_capabilities(self) -> List[AgentCapability]: + """Return the capabilities supported by this agent.""" + return ["step"] diff --git a/libs/python/agent/agent/loops/omniparser.py b/libs/python/agent/agent/loops/omniparser.py index f0e7832a..e92ef660 100644 --- a/libs/python/agent/agent/loops/omniparser.py +++ b/libs/python/agent/agent/loops/omniparser.py @@ -9,8 +9,9 @@ import litellm import inspect import base64 -from ..decorators import agent_loop -from ..types import Messages, AgentResponse, Tools +from ..decorators import register_agent +from ..types import Messages, AgentResponse, Tools, AgentCapability +from ..loops.base import AsyncAgentConfig SOM_TOOL_SCHEMA = { "type": "function", @@ -246,94 +247,114 @@ async def replace_computer_call_with_function(item: Dict[str, Any], xy2id: Dict[ return [item] -@agent_loop(models=r"omniparser\+.*|omni\+.*", priority=10) -async def omniparser_loop( - messages: Messages, - model: str, - tools: Optional[List[Dict[str, Any]]] = None, - max_retries: Optional[int] = None, - stream: bool = False, - computer_handler=None, - use_prompt_caching: Optional[bool] = False, - _on_api_start=None, - _on_api_end=None, - _on_usage=None, - _on_screenshot=None, - **kwargs -) -> Union[AgentResponse, AsyncGenerator[Dict[str, Any], None]]: - """ - OpenAI computer-use-preview agent loop using liteLLM responses. +@register_agent(models=r"omniparser\+.*|omni\+.*", priority=10) +class OmniparsrConfig(AsyncAgentConfig): + """Omniparser agent configuration implementing AsyncAgentConfig protocol.""" - Supports OpenAI's computer use preview models. - """ - if not OMNIPARSER_AVAILABLE: - raise ValueError("omniparser loop requires som to be installed. Install it with `pip install cua-som`.") - - tools = tools or [] - - llm_model = model.split('+')[-1] - - # Prepare tools for OpenAI API - openai_tools, id2xy = _prepare_tools_for_omniparser(tools) - - # Find last computer_call_output - last_computer_call_output = get_last_computer_call_output(messages) - if last_computer_call_output: - image_url = last_computer_call_output.get("output", {}).get("image_url", "") - image_data = image_url.split(",")[-1] - if image_data: - parser = get_parser() - result = parser.parse(image_data) - if _on_screenshot: - await _on_screenshot(result.annotated_image_base64, "annotated_image") - for element in result.elements: - id2xy[element.id] = ((element.bbox.x1 + element.bbox.x2) / 2, (element.bbox.y1 + element.bbox.y2) / 2) - - # handle computer calls -> function calls - new_messages = [] - for message in messages: - if not isinstance(message, dict): - message = message.__dict__ - new_messages += await replace_computer_call_with_function(message, id2xy) - messages = new_messages - - # Prepare API call kwargs - api_kwargs = { - "model": llm_model, - "input": messages, - "tools": openai_tools if openai_tools else None, - "stream": stream, - "reasoning": {"summary": "concise"}, - "truncation": "auto", - "num_retries": max_retries, + async def predict_step( + self, + messages: Messages, + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + use_prompt_caching: Optional[bool] = False, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, **kwargs - } + ) -> Dict[str, Any]: + """ + OpenAI computer-use-preview agent loop using liteLLM responses. + + Supports OpenAI's computer use preview models. + """ + if not OMNIPARSER_AVAILABLE: + raise ValueError("omniparser loop requires som to be installed. Install it with `pip install cua-som`.") + + tools = tools or [] + + llm_model = model.split('+')[-1] + + # Prepare tools for OpenAI API + openai_tools, id2xy = _prepare_tools_for_omniparser(tools) + + # Find last computer_call_output + last_computer_call_output = get_last_computer_call_output(messages) + if last_computer_call_output: + image_url = last_computer_call_output.get("output", {}).get("image_url", "") + image_data = image_url.split(",")[-1] + if image_data: + parser = get_parser() + result = parser.parse(image_data) + if _on_screenshot: + await _on_screenshot(result.annotated_image_base64, "annotated_image") + for element in result.elements: + id2xy[element.id] = ((element.bbox.x1 + element.bbox.x2) / 2, (element.bbox.y1 + element.bbox.y2) / 2) + + # handle computer calls -> function calls + new_messages = [] + for message in messages: + if not isinstance(message, dict): + message = message.__dict__ + new_messages += await replace_computer_call_with_function(message, id2xy) + messages = new_messages + + # Prepare API call kwargs + api_kwargs = { + "model": llm_model, + "input": messages, + "tools": openai_tools if openai_tools else None, + "stream": stream, + "reasoning": {"summary": "concise"}, + "truncation": "auto", + "num_retries": max_retries, + **kwargs + } + + # Call API start hook + if _on_api_start: + await _on_api_start(api_kwargs) + + print(str(api_kwargs)[:1000]) + + # Use liteLLM responses + response = await litellm.aresponses(**api_kwargs) + + # Call API end hook + if _on_api_end: + await _on_api_end(api_kwargs, response) + + # Extract usage information + usage = { + **response.usage.model_dump(), + "response_cost": response._hidden_params.get("response_cost", 0.0), + } + if _on_usage: + await _on_usage(usage) + + # handle som function calls -> xy computer calls + new_output = [] + for i in range(len(response.output)): + new_output += await replace_function_with_computer_call(response.output[i].model_dump(), id2xy) + + return { + "output": new_output, + "usage": usage + } - # Call API start hook - if _on_api_start: - await _on_api_start(api_kwargs) + async def predict_click( + self, + model: str, + image_b64: str, + instruction: str, + **kwargs + ) -> Optional[Tuple[float, float]]: + """Omniparser does not support click prediction.""" + return None - print(str(api_kwargs)[:1000]) - - # Use liteLLM responses - response = await litellm.aresponses(**api_kwargs) - - # Call API end hook - if _on_api_end: - await _on_api_end(api_kwargs, response) - - # Extract usage information - response.usage = { - **response.usage.model_dump(), - "response_cost": response._hidden_params.get("response_cost", 0.0), - } - if _on_usage: - await _on_usage(response.usage) - - # handle som function calls -> xy computer calls - new_output = [] - for i in range(len(response.output)): - new_output += await replace_function_with_computer_call(response.output[i].model_dump(), id2xy) - response.output = new_output - - return response + def get_capabilities(self) -> List[AgentCapability]: + """Return the capabilities supported by this agent.""" + return ["step"] diff --git a/libs/python/agent/agent/loops/openai.py b/libs/python/agent/agent/loops/openai.py index 84b79d1f..13bcb1f1 100644 --- a/libs/python/agent/agent/loops/openai.py +++ b/libs/python/agent/agent/loops/openai.py @@ -4,11 +4,11 @@ OpenAI computer-use-preview agent loop implementation using liteLLM import asyncio import json -from typing import Dict, List, Any, AsyncGenerator, Union, Optional +from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple import litellm -from ..decorators import agent_loop -from ..types import Messages, AgentResponse, Tools +from ..decorators import register_agent +from ..types import Messages, AgentResponse, Tools, AgentCapability def _map_computer_tool_to_openai(computer_tool: Any) -> Dict[str, Any]: """Map a computer tool to OpenAI's computer-use-preview tool schema""" @@ -36,60 +36,116 @@ def _prepare_tools_for_openai(tool_schemas: List[Dict[str, Any]]) -> Tools: return openai_tools -@agent_loop(models=r".*computer-use-preview.*", priority=10) -async def openai_computer_use_loop( - messages: Messages, - model: str, - tools: Optional[List[Dict[str, Any]]] = None, - max_retries: Optional[int] = None, - stream: bool = False, - computer_handler=None, - use_prompt_caching: Optional[bool] = False, - _on_api_start=None, - _on_api_end=None, - _on_usage=None, - _on_screenshot=None, - **kwargs -) -> Union[AgentResponse, AsyncGenerator[Dict[str, Any], None]]: +@register_agent(models=r".*computer-use-preview.*", priority=10) +class OpenAIComputerUseConfig: """ - OpenAI computer-use-preview agent loop using liteLLM responses. + OpenAI computer-use-preview agent configuration using liteLLM responses. Supports OpenAI's computer use preview models. """ - tools = tools or [] - # Prepare tools for OpenAI API - openai_tools = _prepare_tools_for_openai(tools) - - # Prepare API call kwargs - api_kwargs = { - "model": model, - "input": messages, - "tools": openai_tools if openai_tools else None, - "stream": stream, - "reasoning": {"summary": "concise"}, - "truncation": "auto", - "num_retries": max_retries, + async def predict_step( + self, + messages: List[Dict[str, Any]], + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + use_prompt_caching: Optional[bool] = False, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, **kwargs - } - - # Call API start hook - if _on_api_start: - await _on_api_start(api_kwargs) - - # Use liteLLM responses - response = await litellm.aresponses(**api_kwargs) - - # Call API end hook - if _on_api_end: - await _on_api_end(api_kwargs, response) + ) -> Dict[str, Any]: + """ + Predict the next step based on input items. + + Args: + messages: Input items following Responses format + model: Model name to use + tools: Optional list of tool schemas + max_retries: Maximum number of retries + stream: Whether to stream responses + computer_handler: Computer handler instance + _on_api_start: Callback for API start + _on_api_end: Callback for API end + _on_usage: Callback for usage tracking + _on_screenshot: Callback for screenshot events + **kwargs: Additional arguments + + Returns: + Dictionary with "output" (output items) and "usage" array + """ + tools = tools or [] + + # Prepare tools for OpenAI API + openai_tools = _prepare_tools_for_openai(tools) - # Extract usage information - response.usage = { - **response.usage.model_dump(), - "response_cost": response._hidden_params.get("response_cost", 0.0), - } - if _on_usage: - await _on_usage(response.usage) + # Prepare API call kwargs + api_kwargs = { + "model": model, + "input": messages, + "tools": openai_tools if openai_tools else None, + "stream": stream, + "reasoning": {"summary": "concise"}, + "truncation": "auto", + "num_retries": max_retries, + **kwargs + } + + # Call API start hook + if _on_api_start: + await _on_api_start(api_kwargs) + + # Use liteLLM responses + response = await litellm.aresponses(**api_kwargs) + + # Call API end hook + if _on_api_end: + await _on_api_end(api_kwargs, response) + + # Extract usage information + usage = { + **response.usage.model_dump(), + "response_cost": response._hidden_params.get("response_cost", 0.0), + } + if _on_usage: + await _on_usage(usage) + + # Return in the expected format + output_dict = response.model_dump() + output_dict["usage"] = usage + return output_dict - return response + async def predict_click( + self, + model: str, + image_b64: str, + instruction: str + ) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates based on image and instruction. + + Note: OpenAI computer-use-preview doesn't support direct click prediction, + so this returns None. + + Args: + model: Model name to use + image_b64: Base64 encoded image + instruction: Instruction for where to click + + Returns: + None (not supported by OpenAI computer-use-preview) + """ + return None + + def get_capabilities(self) -> List[AgentCapability]: + """ + Get list of capabilities supported by this agent config. + + Returns: + List of capability strings + """ + return ["step"] diff --git a/libs/python/agent/agent/loops/uitars.py b/libs/python/agent/agent/loops/uitars.py index e82e005d..f5188288 100644 --- a/libs/python/agent/agent/loops/uitars.py +++ b/libs/python/agent/agent/loops/uitars.py @@ -9,7 +9,7 @@ import base64 import math import re import ast -from typing import Dict, List, Any, AsyncGenerator, Union, Optional +from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple from io import BytesIO from PIL import Image import litellm @@ -21,8 +21,8 @@ from openai.types.responses.response_input_param import ComputerCallOutput from openai.types.responses.response_output_message_param import ResponseOutputMessageParam from openai.types.responses.response_reasoning_item_param import ResponseReasoningItemParam, Summary -from ..decorators import agent_loop -from ..types import Messages, AgentResponse, Tools +from ..decorators import register_agent +from ..types import Messages, AgentResponse, Tools, AgentCapability from ..responses import ( make_reasoning_item, make_output_text_item, @@ -501,188 +501,298 @@ def convert_uitars_messages_to_litellm(messages: Messages) -> List[Dict[str, Any return litellm_messages -@agent_loop(models=r"(?i).*ui-?tars.*", priority=10) -async def uitars_loop( - messages: Messages, - model: str, - tools: Optional[List[Dict[str, Any]]] = None, - max_retries: Optional[int] = None, - stream: bool = False, - computer_handler=None, - use_prompt_caching: Optional[bool] = False, - _on_api_start=None, - _on_api_end=None, - _on_usage=None, - _on_screenshot=None, - **kwargs -) -> Union[AgentResponse, AsyncGenerator[Dict[str, Any], None]]: +@register_agent(models=r"(?i).*ui-?tars.*", priority=10) +class UITARSConfig: """ - UITARS agent loop using liteLLM for ByteDance-Seed/UI-TARS-1.5-7B model. + UITARS agent configuration using liteLLM for ByteDance-Seed/UI-TARS-1.5-7B model. Supports UITARS vision-language models for computer control. """ - tools = tools or [] - # Create response items - response_items = [] - - # Find computer tool for screen dimensions - computer_tool = None - for tool_schema in tools: - if tool_schema["type"] == "computer": - computer_tool = tool_schema["computer"] - break - - # Get screen dimensions - screen_width, screen_height = 1024, 768 - if computer_tool: - try: - screen_width, screen_height = await computer_tool.get_dimensions() - except: - pass - - # Process messages to extract instruction and image - instruction = "" - image_data = None - - # Convert messages to list if string - if isinstance(messages, str): - messages = [{"role": "user", "content": messages}] - - # Extract instruction and latest screenshot - for message in reversed(messages): - if isinstance(message, dict): - content = message.get("content", "") + async def predict_step( + self, + messages: Messages, + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + use_prompt_caching: Optional[bool] = False, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, + **kwargs + ) -> Dict[str, Any]: + """ + Predict the next step based on input messages. + + Args: + messages: Input messages following Responses format + model: Model name to use + tools: Optional list of tool schemas + max_retries: Maximum number of retries + stream: Whether to stream responses + computer_handler: Computer handler instance + _on_api_start: Callback for API start + _on_api_end: Callback for API end + _on_usage: Callback for usage tracking + _on_screenshot: Callback for screenshot events + **kwargs: Additional arguments - # Handle different content formats - if isinstance(content, str): - if not instruction and message.get("role") == "user": - instruction = content - elif isinstance(content, list): - for item in content: - if isinstance(item, dict): - if item.get("type") == "text" and not instruction: - instruction = item.get("text", "") - elif item.get("type") == "image_url" and not image_data: - image_url = item.get("image_url", {}) - if isinstance(image_url, dict): - image_data = image_url.get("url", "") - else: - image_data = image_url + Returns: + Dictionary with "output" (output items) and "usage" array + """ + tools = tools or [] - # Also check for computer_call_output with screenshots - if message.get("type") == "computer_call_output" and not image_data: - output = message.get("output", {}) - if isinstance(output, dict) and output.get("type") == "input_image": - image_data = output.get("image_url", "") + # Create response items + response_items = [] - if instruction and image_data: - break - - if not instruction: - instruction = "Help me complete this task by analyzing the screen and taking appropriate actions." - - # Create prompt - user_prompt = UITARS_PROMPT_TEMPLATE.format( - instruction=instruction, - action_space=UITARS_ACTION_SPACE, - language="English" - ) - - # Convert conversation history to LiteLLM format - history_messages = convert_uitars_messages_to_litellm(messages) - - # Prepare messages for liteLLM - litellm_messages = [ - { - "role": "system", - "content": "You are a helpful assistant." - } - ] - - # Add current user instruction with screenshot - current_user_message = { - "role": "user", - "content": [ - {"type": "text", "text": user_prompt}, + # Find computer tool for screen dimensions + computer_tool = None + for tool_schema in tools: + if tool_schema["type"] == "computer": + computer_tool = tool_schema["computer"] + break + + # Get screen dimensions + screen_width, screen_height = 1024, 768 + if computer_tool: + try: + screen_width, screen_height = await computer_tool.get_dimensions() + except: + pass + + # Process messages to extract instruction and image + instruction = "" + image_data = None + + # Convert messages to list if string + if isinstance(messages, str): + messages = [{"role": "user", "content": messages}] + + # Extract instruction and latest screenshot + for message in reversed(messages): + if isinstance(message, dict): + content = message.get("content", "") + + # Handle different content formats + if isinstance(content, str): + if not instruction and message.get("role") == "user": + instruction = content + elif isinstance(content, list): + for item in content: + if isinstance(item, dict): + if item.get("type") == "text" and not instruction: + instruction = item.get("text", "") + elif item.get("type") == "image_url" and not image_data: + image_url = item.get("image_url", {}) + if isinstance(image_url, dict): + image_data = image_url.get("url", "") + else: + image_data = image_url + + # Also check for computer_call_output with screenshots + if message.get("type") == "computer_call_output" and not image_data: + output = message.get("output", {}) + if isinstance(output, dict) and output.get("type") == "input_image": + image_data = output.get("image_url", "") + + if instruction and image_data: + break + + if not instruction: + instruction = "Help me complete this task by analyzing the screen and taking appropriate actions." + + # Create prompt + user_prompt = UITARS_PROMPT_TEMPLATE.format( + instruction=instruction, + action_space=UITARS_ACTION_SPACE, + language="English" + ) + + # Convert conversation history to LiteLLM format + history_messages = convert_uitars_messages_to_litellm(messages) + + # Prepare messages for liteLLM + litellm_messages = [ + { + "role": "system", + "content": "You are a helpful assistant." + } ] - } - litellm_messages.append(current_user_message) - - # Process image for UITARS - if not image_data: - # Take screenshot if none found in messages - if computer_handler: - image_data = await computer_handler.screenshot() - await _on_screenshot(image_data, "screenshot_before") - # Add screenshot to output items so it can be retained in history - response_items.append(make_input_image_item(image_data)) - else: - raise ValueError("No screenshot found in messages and no computer_handler provided") - processed_image, original_width, original_height = process_image_for_uitars(image_data) - encoded_image = pil_to_base64(processed_image) - - # Add conversation history - if history_messages: - litellm_messages.extend(history_messages) - else: - litellm_messages.append({ - "role": "user", + # Add current user instruction with screenshot + current_user_message = { + "role": "user", "content": [ - {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}} + {"type": "text", "text": user_prompt}, ] - }) + } + litellm_messages.append(current_user_message) + + # Process image for UITARS + if not image_data: + # Take screenshot if none found in messages + if computer_handler: + image_data = await computer_handler.screenshot() + await _on_screenshot(image_data, "screenshot_before") - # Prepare API call kwargs - api_kwargs = { - "model": model, - "messages": litellm_messages, - "max_tokens": kwargs.get("max_tokens", 500), - "temperature": kwargs.get("temperature", 0.0), - "do_sample": kwargs.get("temperature", 0.0) > 0.0, - "num_retries": max_retries, - **{k: v for k, v in kwargs.items() if k not in ["max_tokens", "temperature"]} - } - - # Call API start hook - if _on_api_start: - await _on_api_start(api_kwargs) - - # Call liteLLM with UITARS model - response = await litellm.acompletion(**api_kwargs) - - # Call API end hook - if _on_api_end: - await _on_api_end(api_kwargs, response) - - # Extract response content - response_content = response.choices[0].message.content.strip() # type: ignore - - # Parse UITARS response - parsed_responses = parse_uitars_response(response_content, original_width, original_height) - - # Convert to computer actions - computer_actions = convert_to_computer_actions(parsed_responses, original_width, original_height) - - # Add computer actions to response items - thought = parsed_responses[0].get("thought", "") - if thought: - response_items.append(make_reasoning_item(thought)) - response_items.extend(computer_actions) - - # Extract usage information - response_usage = { - **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(response.usage).model_dump(), - "response_cost": response._hidden_params.get("response_cost", 0.0), - } - if _on_usage: - await _on_usage(response_usage) + # Add screenshot to output items so it can be retained in history + response_items.append(make_input_image_item(image_data)) + else: + raise ValueError("No screenshot found in messages and no computer_handler provided") + processed_image, original_width, original_height = process_image_for_uitars(image_data) + encoded_image = pil_to_base64(processed_image) + + # Add conversation history + if history_messages: + litellm_messages.extend(history_messages) + else: + litellm_messages.append({ + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}} + ] + }) - # Create agent response - agent_response = { - "output": response_items, - "usage": response_usage - } + # Prepare API call kwargs + api_kwargs = { + "model": model, + "messages": litellm_messages, + "max_tokens": kwargs.get("max_tokens", 500), + "temperature": kwargs.get("temperature", 0.0), + "do_sample": kwargs.get("temperature", 0.0) > 0.0, + "num_retries": max_retries, + **{k: v for k, v in kwargs.items() if k not in ["max_tokens", "temperature"]} + } + + # Call API start hook + if _on_api_start: + await _on_api_start(api_kwargs) + + # Call liteLLM with UITARS model + response = await litellm.acompletion(**api_kwargs) + + # Call API end hook + if _on_api_end: + await _on_api_end(api_kwargs, response) + + # Extract response content + response_content = response.choices[0].message.content.strip() # type: ignore + + # Parse UITARS response + parsed_responses = parse_uitars_response(response_content, original_width, original_height) + + # Convert to computer actions + computer_actions = convert_to_computer_actions(parsed_responses, original_width, original_height) + + # Add computer actions to response items + thought = parsed_responses[0].get("thought", "") + if thought: + response_items.append(make_reasoning_item(thought)) + response_items.extend(computer_actions) + + # Extract usage information + response_usage = { + **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(response.usage).model_dump(), + "response_cost": response._hidden_params.get("response_cost", 0.0), + } + if _on_usage: + await _on_usage(response_usage) + + # Create agent response + agent_response = { + "output": response_items, + "usage": response_usage + } + + return agent_response - return agent_response \ No newline at end of file + async def predict_click( + self, + model: str, + image_b64: str, + instruction: str + ) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates based on image and instruction. + + UITARS supports click prediction through its action parsing. + + Args: + model: Model name to use + image_b64: Base64 encoded image + instruction: Instruction for where to click + + Returns: + Tuple with (x, y) coordinates or None + """ + try: + # Create a simple click instruction for UITARS + user_prompt = UITARS_PROMPT_TEMPLATE.format( + instruction=f"Click on: {instruction}", + action_space=UITARS_ACTION_SPACE, + language="English" + ) + + # Prepare messages for liteLLM + litellm_messages = [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": [ + {"type": "text", "text": user_prompt}, + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}} + ] + } + ] + + # Call liteLLM with UITARS model + response = await litellm.acompletion( + model=model, + messages=litellm_messages, + max_tokens=100, + temperature=0.0 + ) + + # Extract response content + response_content = response.choices[0].message.content.strip() # type: ignore + + # Parse UITARS response to extract click coordinates + parsed_responses = parse_uitars_response(response_content, 1024, 768) # Default dimensions + + if parsed_responses and len(parsed_responses) > 0: + action_type = parsed_responses[0].get("action_type") + if action_type == "click": + action_inputs = parsed_responses[0].get("action_inputs", {}) + start_box = action_inputs.get("start_box") + if start_box: + # Parse coordinates from start_box + try: + coords = eval(start_box) # Parse the coordinate list + if len(coords) >= 2: + # Convert normalized coordinates back to pixel coordinates + x = int(coords[0] * 1024) + y = int(coords[1] * 768) + return (x, y) + except: + pass + + return None + + except Exception as e: + print(f"Error in UITARS predict_click: {e}") + return None + + def get_capabilities(self) -> List[AgentCapability]: + """ + Get list of capabilities supported by this agent config. + + Returns: + List of capability strings + """ + return ["step", "click"] \ No newline at end of file diff --git a/libs/python/agent/agent/types.py b/libs/python/agent/agent/types.py index 2b07a6cf..881e1c20 100644 --- a/libs/python/agent/agent/types.py +++ b/libs/python/agent/agent/types.py @@ -14,16 +14,18 @@ Tools = Optional[Iterable[ToolParam]] # Agent output types AgentResponse = ResponsesAPIResponse +AgentCapability = Literal["step", "click"] -# Agent loop registration -class AgentLoopInfo(BaseModel): - """Information about a registered agent loop""" - func: Callable + +# Agent config registration +class AgentConfigInfo(BaseModel): + """Information about a registered agent config""" + agent_class: type models_regex: str priority: int = 0 def matches_model(self, model: str) -> bool: - """Check if this loop matches the given model""" + """Check if this agent config matches the given model""" return bool(re.match(self.models_regex, model)) # Computer tool interface From 3a67485e425f330435142a0c67108a9ceeb40e51 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 29 Jul 2025 18:53:46 -0400 Subject: [PATCH 02/76] Fix circular deps, add GTA1 models --- libs/python/agent/agent/__init__.py | 4 +- libs/python/agent/agent/agent.py | 4 +- libs/python/agent/agent/decorators.py | 7 +- libs/python/agent/agent/loops/base.py | 76 +++++++++++ libs/python/agent/agent/loops/gta1.py | 178 ++++++++++++++++++++++++++ 5 files changed, 259 insertions(+), 10 deletions(-) create mode 100644 libs/python/agent/agent/loops/base.py create mode 100644 libs/python/agent/agent/loops/gta1.py diff --git a/libs/python/agent/agent/__init__.py b/libs/python/agent/agent/__init__.py index 6797dab6..08d782d3 100644 --- a/libs/python/agent/agent/__init__.py +++ b/libs/python/agent/agent/__init__.py @@ -5,7 +5,7 @@ agent - Decorator-based Computer Use Agent with liteLLM integration import logging import sys -from .decorators import agent_loop +from .decorators import register_agent from .agent import ComputerAgent from .types import Messages, AgentResponse @@ -13,7 +13,7 @@ from .types import Messages, AgentResponse from . import loops __all__ = [ - "agent_loop", + "register_agent", "ComputerAgent", "Messages", "AgentResponse" diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py index f117fe8b..efacea45 100644 --- a/libs/python/agent/agent/agent.py +++ b/libs/python/agent/agent/agent.py @@ -616,9 +616,9 @@ class ComputerAgent: if "click" not in capabilities: raise ValueError(f"Agent loop {self.agent_loop.__name__} does not support click predictions") if hasattr(self.agent_loop, 'predict_click'): - if not self.computer_handler: - raise ValueError("Computer tool is required for predict_click") if not image_b64: + if not self.computer_handler: + raise ValueError("Computer tool or image_b64 is required for predict_click") image_b64 = await self.computer_handler.screenshot() return await self.agent_loop.predict_click( model=self.model, diff --git a/libs/python/agent/agent/decorators.py b/libs/python/agent/agent/decorators.py index 7305b702..7fba0443 100644 --- a/libs/python/agent/agent/decorators.py +++ b/libs/python/agent/agent/decorators.py @@ -2,13 +2,8 @@ Decorators for agent - agent_loop decorator """ -import asyncio -import inspect -from typing import Dict, List, Any, Callable, Optional -from functools import wraps - +from typing import List, Optional from .types import AgentConfigInfo -from .loops.base import AsyncAgentConfig # Global registry _agent_configs: List[AgentConfigInfo] = [] diff --git a/libs/python/agent/agent/loops/base.py b/libs/python/agent/agent/loops/base.py new file mode 100644 index 00000000..887605b1 --- /dev/null +++ b/libs/python/agent/agent/loops/base.py @@ -0,0 +1,76 @@ +""" +Base protocol for async agent configurations +""" + +from typing import Protocol, List, Dict, Any, Optional, Tuple, Union +from abc import abstractmethod +from ..types import AgentCapability + +class AsyncAgentConfig(Protocol): + """Protocol defining the interface for async agent configurations.""" + + @abstractmethod + async def predict_step( + self, + messages: List[Dict[str, Any]], + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, + **kwargs + ) -> Dict[str, Any]: + """ + Predict the next step based on input items. + + Args: + messages: Input items following Responses format (message, function_call, computer_call) + model: Model name to use + tools: Optional list of tool schemas + max_retries: Maximum number of retries for failed API calls + stream: Whether to stream responses + computer_handler: Computer handler instance + _on_api_start: Callback for API start + _on_api_end: Callback for API end + _on_usage: Callback for usage tracking + _on_screenshot: Callback for screenshot events + **kwargs: Additional arguments + + Returns: + Dictionary with "output" (output items) and "usage" array + """ + ... + + @abstractmethod + async def predict_click( + self, + model: str, + image_b64: str, + instruction: str + ) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates based on image and instruction. + + Args: + model: Model name to use + image_b64: Base64 encoded image + instruction: Instruction for where to click + + Returns: + None or tuple with (x, y) coordinates + """ + ... + + @abstractmethod + def get_capabilities(self) -> List[AgentCapability]: + """ + Get list of capabilities supported by this agent config. + + Returns: + List of capability strings (e.g., ["step", "click"]) + """ + ... diff --git a/libs/python/agent/agent/loops/gta1.py b/libs/python/agent/agent/loops/gta1.py new file mode 100644 index 00000000..4d0d3349 --- /dev/null +++ b/libs/python/agent/agent/loops/gta1.py @@ -0,0 +1,178 @@ +""" +GTA1 agent loop implementation for click prediction using litellm.acompletion +""" + +import asyncio +import json +import re +import base64 +from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple +from io import BytesIO +from PIL import Image +import litellm + +from ..decorators import register_agent +from ..types import Messages, AgentResponse, Tools, AgentCapability +from ..loops.base import AsyncAgentConfig + +SYSTEM_PROMPT = ''' +You are an expert UI element locator. Given a GUI image and a user's element description, provide the coordinates of the specified element as a single (x,y) point. The image resolution is height {height} and width {width}. For elements with area, return the center point. + +Output the coordinate pair exactly: +(x,y) +''' + +def extract_coordinates(raw_string: str) -> Tuple[float, float]: + """Extract coordinates from model output.""" + try: + matches = re.findall(r"\((-?\d*\.?\d+),\s*(-?\d*\.?\d+)\)", raw_string) + return tuple(map(float, matches[0])) # type: ignore + except: + return (0.0, 0.0) + +def smart_resize(height: int, width: int, factor: int = 28, min_pixels: int = 3136, max_pixels: int = 8847360) -> Tuple[int, int]: + """Smart resize function similar to qwen_vl_utils.""" + # Calculate the total pixels + total_pixels = height * width + + # If already within bounds, return original dimensions + if min_pixels <= total_pixels <= max_pixels: + # Round to nearest factor + new_height = (height // factor) * factor + new_width = (width // factor) * factor + return new_height, new_width + + # Calculate scaling factor + if total_pixels > max_pixels: + scale = (max_pixels / total_pixels) ** 0.5 + else: + scale = (min_pixels / total_pixels) ** 0.5 + + # Apply scaling + new_height = int(height * scale) + new_width = int(width * scale) + + # Round to nearest factor + new_height = (new_height // factor) * factor + new_width = (new_width // factor) * factor + + # Ensure minimum size + new_height = max(new_height, factor) + new_width = max(new_width, factor) + + return new_height, new_width + +@register_agent(models=r".*GTA1-.*", priority=10) +class GTA1Config(AsyncAgentConfig): + """GTA1 agent configuration implementing AsyncAgentConfig protocol for click prediction.""" + + async def predict_step( + self, + messages: Messages, + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + use_prompt_caching: Optional[bool] = False, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, + **kwargs + ) -> Dict[str, Any]: + """ + GTA1 does not support step prediction - only click prediction. + """ + raise NotImplementedError("GTA1 agent only supports click prediction via predict_click method") + + async def predict_click( + self, + model: str, + image_b64: str, + instruction: str, + **kwargs + ) -> Optional[Tuple[float, float]]: + """ + Predict click coordinates using GTA1 model via litellm.acompletion. + + Args: + model: The GTA1 model name + image_b64: Base64 encoded image + instruction: Instruction for where to click + + Returns: + Tuple of (x, y) coordinates or None if prediction fails + """ + try: + # Decode base64 image + image_data = base64.b64decode(image_b64) + image = Image.open(BytesIO(image_data)) + width, height = image.width, image.height + + # Smart resize the image (similar to qwen_vl_utils) + resized_height, resized_width = smart_resize( + height, width, + factor=28, # Default factor for Qwen models + min_pixels=3136, + max_pixels=4096 * 2160 + ) + resized_image = image.resize((resized_width, resized_height)) + scale_x, scale_y = width / resized_width, height / resized_height + + # Convert resized image back to base64 + buffered = BytesIO() + resized_image.save(buffered, format="PNG") + resized_image_b64 = base64.b64encode(buffered.getvalue()).decode() + + # Prepare system and user messages + system_message = { + "role": "system", + "content": SYSTEM_PROMPT.format(height=resized_height, width=resized_width) + } + + user_message = { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{resized_image_b64}" + } + }, + { + "type": "text", + "text": instruction + } + ] + } + + # Prepare API call kwargs + api_kwargs = { + "model": model, + "messages": [system_message, user_message], + "max_tokens": 32, + "temperature": 0.0, + **kwargs + } + + # Use liteLLM acompletion + response = await litellm.acompletion(**api_kwargs) + + # Extract response text + output_text = response.choices[0].message.content + + # Extract and rescale coordinates + pred_x, pred_y = extract_coordinates(output_text) + pred_x *= scale_x + pred_y *= scale_y + + return (pred_x, pred_y) + + except Exception as e: + print(f"GTA1 click prediction failed: {e}") + return None + + def get_capabilities(self) -> List[AgentCapability]: + """Return the capabilities supported by this agent.""" + return ["click"] From 2076ec75966de2c4b2961be7d8585f3e816e91b0 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 29 Jul 2025 20:48:44 -0400 Subject: [PATCH 03/76] added GTA1 agent and click benchmarks (ss-pro, repl) --- .../adapters/huggingfacelocal_adapter.py | 6 +- libs/python/agent/agent/loops/__init__.py | 3 +- libs/python/agent/agent/loops/gta1.py | 4 +- libs/python/agent/agent/loops/omniparser.py | 2 + libs/python/agent/agent/loops/uitars.py | 81 +++-- libs/python/agent/benchmarks/.gitignore | 2 + libs/python/agent/benchmarks/interactive.py | 201 +++++++++++ .../agent/benchmarks/models/__init__.py | 4 + libs/python/agent/benchmarks/models/base.py | 36 ++ libs/python/agent/benchmarks/models/gta1.py | 162 +++++++++ libs/python/agent/benchmarks/ss-pro.py | 157 +++++++++ libs/python/agent/benchmarks/utils.py | 316 ++++++++++++++++++ 12 files changed, 939 insertions(+), 35 deletions(-) create mode 100644 libs/python/agent/benchmarks/.gitignore create mode 100644 libs/python/agent/benchmarks/interactive.py create mode 100644 libs/python/agent/benchmarks/models/__init__.py create mode 100644 libs/python/agent/benchmarks/models/base.py create mode 100644 libs/python/agent/benchmarks/models/gta1.py create mode 100644 libs/python/agent/benchmarks/ss-pro.py create mode 100644 libs/python/agent/benchmarks/utils.py diff --git a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py index f8706868..5692401d 100644 --- a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py +++ b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py @@ -48,7 +48,11 @@ class HuggingFaceLocalAdapter(CustomLLM): ) # Load processor - processor = AutoProcessor.from_pretrained(model_name) + processor = AutoProcessor.from_pretrained( + model_name, + min_pixels=3136, + max_pixels=4096 * 2160 + ) # Cache them self.models[model_name] = model diff --git a/libs/python/agent/agent/loops/__init__.py b/libs/python/agent/agent/loops/__init__.py index aa159411..91722e55 100644 --- a/libs/python/agent/agent/loops/__init__.py +++ b/libs/python/agent/agent/loops/__init__.py @@ -7,5 +7,6 @@ from . import anthropic from . import openai from . import uitars from . import omniparser +from . import gta1 -__all__ = ["anthropic", "openai", "uitars", "omniparser"] +__all__ = ["anthropic", "openai", "uitars", "omniparser", "gta1"] diff --git a/libs/python/agent/agent/loops/gta1.py b/libs/python/agent/agent/loops/gta1.py index 4d0d3349..fb272f30 100644 --- a/libs/python/agent/agent/loops/gta1.py +++ b/libs/python/agent/agent/loops/gta1.py @@ -1,5 +1,7 @@ """ GTA1 agent loop implementation for click prediction using litellm.acompletion +Paper: https://arxiv.org/pdf/2507.05791 +Code: https://github.com/Yan98/GTA1 """ import asyncio @@ -20,7 +22,7 @@ You are an expert UI element locator. Given a GUI image and a user's element des Output the coordinate pair exactly: (x,y) -''' +'''.strip() def extract_coordinates(raw_string: str) -> Tuple[float, float]: """Extract coordinates from model output.""" diff --git a/libs/python/agent/agent/loops/omniparser.py b/libs/python/agent/agent/loops/omniparser.py index e92ef660..aff73edf 100644 --- a/libs/python/agent/agent/loops/omniparser.py +++ b/libs/python/agent/agent/loops/omniparser.py @@ -1,5 +1,7 @@ """ OpenAI computer-use-preview agent loop implementation using liteLLM +Paper: https://arxiv.org/abs/2408.00203 +Code: https://github.com/microsoft/OmniParser """ import asyncio diff --git a/libs/python/agent/agent/loops/uitars.py b/libs/python/agent/agent/loops/uitars.py index f5188288..f715ef61 100644 --- a/libs/python/agent/agent/loops/uitars.py +++ b/libs/python/agent/agent/loops/uitars.py @@ -1,5 +1,7 @@ """ UITARS agent loop implementation using liteLLM for ByteDance-Seed/UI-TARS-1.5-7B +Paper: https://arxiv.org/abs/2501.12326 +Code: https://github.com/bytedance/UI-TARS """ import asyncio @@ -79,6 +81,18 @@ Action: ... {instruction} """ +GROUNDING_UITARS_PROMPT_TEMPLATE = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. + +## Output Format + +Action: ... + + +## Action Space +click(point='<|box_start|>(x1,y1)<|box_end|>') + +## User Instruction +{instruction}""" def round_by_factor(number: float, factor: int) -> int: """Returns the closest integer to 'number' that is divisible by 'factor'.""" @@ -511,7 +525,7 @@ class UITARSConfig: async def predict_step( self, - messages: Messages, + messages: List[Dict[str, Any]], model: str, tools: Optional[List[Dict[str, Any]]] = None, max_retries: Optional[int] = None, @@ -729,13 +743,15 @@ class UITARSConfig: Tuple with (x, y) coordinates or None """ try: - # Create a simple click instruction for UITARS - user_prompt = UITARS_PROMPT_TEMPLATE.format( - instruction=f"Click on: {instruction}", - action_space=UITARS_ACTION_SPACE, - language="English" + # Create prompt using grounding template + user_prompt = GROUNDING_UITARS_PROMPT_TEMPLATE.format( + instruction=instruction ) + # Process image for UITARS + processed_image, original_width, original_height = process_image_for_uitars(image_b64) + encoded_image = pil_to_base64(processed_image) + # Prepare messages for liteLLM litellm_messages = [ { @@ -746,46 +762,47 @@ class UITARSConfig: "role": "user", "content": [ {"type": "text", "text": user_prompt}, - {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}} + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}} ] } ] + # Prepare API call kwargs + api_kwargs = { + "model": model, + "messages": litellm_messages, + "max_tokens": 100, + "temperature": 0.0, + "do_sample": False + } + # Call liteLLM with UITARS model - response = await litellm.acompletion( - model=model, - messages=litellm_messages, - max_tokens=100, - temperature=0.0 - ) + response = await litellm.acompletion(**api_kwargs) # Extract response content response_content = response.choices[0].message.content.strip() # type: ignore - # Parse UITARS response to extract click coordinates - parsed_responses = parse_uitars_response(response_content, 1024, 768) # Default dimensions + # Parse the response to extract click coordinates + # Look for click action with coordinates + click_pattern = r"click\(point='<\|box_start\|>\((\d+),(\d+)\)<\|box_end\|>'\)" + match = re.search(click_pattern, response_content) - if parsed_responses and len(parsed_responses) > 0: - action_type = parsed_responses[0].get("action_type") - if action_type == "click": - action_inputs = parsed_responses[0].get("action_inputs", {}) - start_box = action_inputs.get("start_box") - if start_box: - # Parse coordinates from start_box - try: - coords = eval(start_box) # Parse the coordinate list - if len(coords) >= 2: - # Convert normalized coordinates back to pixel coordinates - x = int(coords[0] * 1024) - y = int(coords[1] * 768) - return (x, y) - except: - pass + if match: + x, y = int(match.group(1)), int(match.group(2)) + # Scale coordinates back to original image dimensions + scale_x = original_width / processed_image.width + scale_y = original_height / processed_image.height + + scaled_x = int(x * scale_x) + scaled_y = int(y * scale_y) + + return (scaled_x, scaled_y) return None except Exception as e: - print(f"Error in UITARS predict_click: {e}") + # Log error and return None + print(f"Error in predict_click: {e}") return None def get_capabilities(self) -> List[AgentCapability]: diff --git a/libs/python/agent/benchmarks/.gitignore b/libs/python/agent/benchmarks/.gitignore new file mode 100644 index 00000000..b9f463f1 --- /dev/null +++ b/libs/python/agent/benchmarks/.gitignore @@ -0,0 +1,2 @@ +output/ +interactive_output/ diff --git a/libs/python/agent/benchmarks/interactive.py b/libs/python/agent/benchmarks/interactive.py new file mode 100644 index 00000000..6d0aba82 --- /dev/null +++ b/libs/python/agent/benchmarks/interactive.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +""" +Interactive Click Prediction Tool + +Takes screenshots and allows testing multiple models interactively. +Models are loaded/unloaded one at a time to avoid memory issues. +""" + +import asyncio +import os +from datetime import datetime +from typing import List, Dict, Any + +from utils import ( + ModelWrapper, + take_screenshot, + save_prediction_visualization, + get_available_models +) + + +async def predict_with_all_models(image, instruction: str, models) -> List[Dict[str, Any]]: + """ + Predict click coordinates with all models sequentially. + + Args: + image: PIL Image to analyze + instruction: Instruction text + models: List of model instances + + Returns: + List of prediction results + """ + predictions = [] + + for model in models: + model_wrapper = ModelWrapper(model) + print(f"\n🔄 Loading {model_wrapper.model_name}...") + + try: + # Load model + await model_wrapper.load_model() + + # Predict + coords = await model_wrapper.predict_click(image, instruction) + + predictions.append({ + 'model_name': model_wrapper.model_name, + 'coords': coords, + 'error': None + }) + + if coords: + print(f"✅ {model_wrapper.model_name}: ({coords[0]}, {coords[1]})") + else: + print(f"❌ {model_wrapper.model_name}: No prediction") + + except Exception as e: + print(f"❌ {model_wrapper.model_name}: ERROR - {str(e)}") + predictions.append({ + 'model_name': model_wrapper.model_name, + 'coords': None, + 'error': str(e) + }) + + finally: + # Always unload model to free memory + try: + await model_wrapper.unload_model() + print(f"🗑️ Unloaded {model_wrapper.model_name}") + except Exception as e: + print(f"⚠️ Error unloading {model_wrapper.model_name}: {e}") + + return predictions + + +def print_header(): + """Print the interactive tool header.""" + print("=" * 60) + print("🖱️ Interactive Click Prediction Tool") + print("=" * 60) + print("Commands:") + print(" • Type an instruction to test models on last screenshot") + print(" • 'screenshot' - Take a new screenshot") + print(" • 'models' - List available models") + print(" • 'quit' or 'exit' - Exit the tool") + print("=" * 60) + print("💡 Tip: Take a screenshot first, then send instructions to test models!") + + +def print_models(models): + """Print available models.""" + print("\n📋 Available Models:") + for i, model in enumerate(models, 1): + if isinstance(model, str): + print(f" {i}. {model}") + else: + print(f" {i}. models.{model.__class__.__name__}") + + +async def main(): + """ + Main interactive loop. + """ + print_header() + + # Get available models + models = get_available_models() + print_models(models) + + # Create output directory for visualizations + output_dir = "interactive_output" + os.makedirs(output_dir, exist_ok=True) + + session_count = 0 + last_screenshot = None + screenshot_timestamp = None + + while True: + try: + # Get user input + print(f"\n{'='*40}") + user_input = input("🎯 Enter instruction (or command): ").strip() + + if not user_input: + continue + + # Handle commands + if user_input.lower() in ['quit', 'exit', 'q']: + print("👋 Goodbye!") + break + + elif user_input.lower() == 'models': + print_models(models) + continue + + elif user_input.lower() == 'screenshot': + print("📸 Taking screenshot...") + try: + last_screenshot = take_screenshot() + screenshot_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + screenshot_path = os.path.join(output_dir, f"screenshot_{screenshot_timestamp}.png") + last_screenshot.save(screenshot_path) + print(f"✅ Screenshot captured and saved to: {screenshot_path}") + print(f"📝 Ready for instructions! Screenshot size: {last_screenshot.size}") + except Exception as e: + print(f"❌ Error taking screenshot: {e}") + continue + + # Handle instruction input + if last_screenshot is None: + print("⚠️ No screenshot available! Please take a screenshot first using 'screenshot' command.") + continue + + session_count += 1 + print(f"\n🎯 Session {session_count}: '{user_input}'") + print(f"📷 Using screenshot from: {screenshot_timestamp}") + + # Predict with all models using last screenshot + print(f"\n🤖 Testing {len(models)} models on screenshot...") + predictions = await predict_with_all_models(last_screenshot, user_input, models) + + # Display results summary + print(f"\n📊 Results Summary:") + print("-" * 50) + for pred in predictions: + if pred['coords']: + print(f"✅ {pred['model_name']}: ({pred['coords'][0]}, {pred['coords'][1]})") + elif pred['error']: + print(f"❌ {pred['model_name']}: ERROR - {pred['error']}") + else: + print(f"❌ {pred['model_name']}: No prediction") + + # Save visualization + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + vis_filename = f"session_{session_count:03d}_{timestamp}.png" + vis_path = os.path.join(output_dir, vis_filename) + + try: + save_prediction_visualization(last_screenshot, user_input, predictions, vis_path) + print(f"\n💾 Visualization saved to: {vis_path}") + except Exception as e: + print(f"⚠️ Error saving visualization: {e}") + + print(f"\n✨ Session {session_count} completed!") + + except KeyboardInterrupt: + print("\n\n👋 Interrupted by user. Goodbye!") + break + except Exception as e: + print(f"\n❌ Unexpected error: {e}") + print("Continuing...") + + +if __name__ == "__main__": + try: + asyncio.run(main()) + except KeyboardInterrupt: + print("\n👋 Goodbye!") + except Exception as e: + print(f"❌ Fatal error: {e}") diff --git a/libs/python/agent/benchmarks/models/__init__.py b/libs/python/agent/benchmarks/models/__init__.py new file mode 100644 index 00000000..51033a7b --- /dev/null +++ b/libs/python/agent/benchmarks/models/__init__.py @@ -0,0 +1,4 @@ +from .base import ModelProtocol +from .gta1 import GTA1Model + +__all__ = ["ModelProtocol", "GTA1Model"] diff --git a/libs/python/agent/benchmarks/models/base.py b/libs/python/agent/benchmarks/models/base.py new file mode 100644 index 00000000..8ad100a3 --- /dev/null +++ b/libs/python/agent/benchmarks/models/base.py @@ -0,0 +1,36 @@ +""" +Base protocol for benchmark models. +""" + +from typing import Protocol, Optional, Tuple +from PIL import Image + + +class ModelProtocol(Protocol): + """Protocol for benchmark models that can predict click coordinates.""" + + @property + def model_name(self) -> str: + """Return the name of the model.""" + ... + + async def load_model(self) -> None: + """Load the model into memory.""" + ... + + async def unload_model(self) -> None: + """Unload the model from memory.""" + ... + + async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates for the given image and instruction. + + Args: + image: PIL Image to analyze + instruction: Text instruction describing what to click + + Returns: + Tuple of (x, y) coordinates or None if prediction fails + """ + ... diff --git a/libs/python/agent/benchmarks/models/gta1.py b/libs/python/agent/benchmarks/models/gta1.py new file mode 100644 index 00000000..2bb4fe1d --- /dev/null +++ b/libs/python/agent/benchmarks/models/gta1.py @@ -0,0 +1,162 @@ +""" +GTA1 model implementation for benchmarking. +""" + +from typing import Optional, Tuple +from PIL import Image +import torch +import re +import gc +from qwen_vl_utils import process_vision_info, smart_resize +from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor + +from .base import ModelProtocol + + +class GTA1Model: + """Ground truth GTA1 model implementation.""" + + def __init__(self, model_path: str = "HelloKKMe/GTA1-7B"): + self.model_path = model_path + self.model = None + self.processor = None + self.max_new_tokens = 32 + + self.system_prompt = ''' +You are an expert UI element locator. Given a GUI image and a user's element description, provide the coordinates of the specified element as a single (x,y) point. The image resolution is height {height} and width {width}. For elements with area, return the center point. + +Output the coordinate pair exactly: +(x,y) +'''.strip() + + @property + def model_name(self) -> str: + """Return the name of the model.""" + return f"GTA1-{self.model_path.split('/')[-1]}" + + async def load_model(self) -> None: + """Load the model into memory.""" + if self.model is None: + print(f"Loading GTA1 model: {self.model_path}") + self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + self.model_path, + torch_dtype=torch.bfloat16, + device_map="auto" + ) + self.processor = AutoProcessor.from_pretrained( + self.model_path, + min_pixels=3136, + max_pixels=4096 * 2160 + ) + print("GTA1 model loaded successfully") + + async def unload_model(self) -> None: + """Unload the model from memory.""" + if self.model is not None: + print("Unloading GTA1 model from GPU...") + del self.model + del self.processor + self.model = None + self.processor = None + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + print("GTA1 model unloaded") + + def _extract_coordinates(self, raw_string: str) -> Tuple[int, int]: + """Extract coordinates from model output.""" + try: + matches = re.findall(r"\((-?\d*\.?\d+),\s*(-?\d*\.?\d+)\)", raw_string) + return tuple(map(int, map(float, matches[0]))) # type: ignore + except: + return (0, 0) + + async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates for the given image and instruction. + + Args: + image: PIL Image to analyze + instruction: Text instruction describing what to click + + Returns: + Tuple of (x, y) coordinates or None if prediction fails + """ + if self.model is None or self.processor is None: + await self.load_model() + + assert self.processor is not None + assert self.model is not None + + try: + width, height = image.width, image.height + + # Resize image according to processor requirements + resized_height, resized_width = smart_resize( + image.height, + image.width, + factor=self.processor.image_processor.patch_size * self.processor.image_processor.merge_size, + min_pixels=self.processor.image_processor.min_pixels, + max_pixels=self.processor.image_processor.max_pixels, + ) + resized_image = image.resize((resized_width, resized_height)) + scale_x, scale_y = width / resized_width, height / resized_height + + # Prepare messages + system_message = { + "role": "system", + "content": self.system_prompt.format(height=resized_height, width=resized_width) + } + + user_message = { + "role": "user", + "content": [ + {"type": "image", "image": resized_image}, + {"type": "text", "text": instruction} + ] + } + + # Process inputs + image_inputs, video_inputs = process_vision_info([system_message, user_message]) + text = self.processor.apply_chat_template( + [system_message, user_message], + tokenize=False, + add_generation_prompt=True + ) + inputs = self.processor( + text=[text], + images=image_inputs, + videos=video_inputs, + padding=True, + return_tensors="pt" + ) + inputs = inputs.to(self.model.device) + + # Generate prediction + output_ids = self.model.generate( + **inputs, + max_new_tokens=self.max_new_tokens, + do_sample=False, + temperature=1.0, + use_cache=True + ) + generated_ids = [ + output_ids[len(input_ids):] + for input_ids, output_ids in zip(inputs.input_ids, output_ids) + ] + output_text = self.processor.batch_decode( + generated_ids, + skip_special_tokens=True, + clean_up_tokenization_spaces=True + )[0] + + # Extract and rescale coordinates + pred_x, pred_y = self._extract_coordinates(output_text) + pred_x = int(pred_x * scale_x) + pred_y = int(pred_y * scale_y) + + return (pred_x, pred_y) + + except Exception as e: + print(f"Error in GTA1 prediction: {e}") + return None diff --git a/libs/python/agent/benchmarks/ss-pro.py b/libs/python/agent/benchmarks/ss-pro.py new file mode 100644 index 00000000..57f2c971 --- /dev/null +++ b/libs/python/agent/benchmarks/ss-pro.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +""" +ScreenSpot-Pro Benchmark Script + +Evaluates models on the ScreenSpot-Pro dataset for click prediction accuracy. +Supports both ComputerAgent model strings and custom model classes. +""" + +import asyncio +from typing import Optional + +from datasets import load_dataset +from tqdm import tqdm + +from utils import ( + ModelWrapper, + is_click_in_bbox, + save_results_to_markdown, + save_visualizations, + get_available_models +) + + +async def evaluate_model(model_wrapper: ModelWrapper, dataset, max_samples: Optional[int] = None) -> dict: + """ + Evaluate a model on the ScreenSpot-Pro dataset. + + Args: + model_wrapper: ModelWrapper instance + dataset: ScreenSpot-Pro dataset (list of samples) + max_samples: Maximum number of samples to evaluate (None for all) + + Returns: + Dictionary with evaluation results + """ + print(f"\nEvaluating model: {model_wrapper.model_name}") + + # Load model + await model_wrapper.load_model() + + total_samples = len(dataset) + if max_samples is not None: + total_samples = min(max_samples, total_samples) + + correct_predictions = 0 + failed_predictions = 0 + results = [] + + try: + for i in tqdm(range(total_samples), desc=f"Evaluating {model_wrapper.model_name}"): + sample = dataset[i] + + # Extract sample data + image = sample['image'] + instruction = sample['instruction'] + bbox = sample['bbox'] # [x1, y1, x2, y2] + sample_id = sample['id'] + + # Predict click coordinates + try: + click_coords = await model_wrapper.predict_click(image, instruction) + + # Check if prediction is correct + is_correct = is_click_in_bbox(click_coords, bbox) + + if is_correct: + correct_predictions += 1 + + results.append({ + 'id': sample_id, + 'instruction': instruction, + 'bbox': bbox, + 'predicted_coords': click_coords, + 'is_correct': is_correct, + 'failed': False + }) + + except Exception as e: + print(f"\nError predicting sample {sample_id}: {e}") + failed_predictions += 1 + results.append({ + 'id': sample_id, + 'instruction': instruction, + 'bbox': bbox, + 'predicted_coords': None, + 'is_correct': False, + 'failed': True, + 'error': str(e) + }) + + finally: + # Unload model + await model_wrapper.unload_model() + + # Calculate metrics + accuracy = correct_predictions / total_samples if total_samples > 0 else 0.0 + failure_rate = failed_predictions / total_samples if total_samples > 0 else 0.0 + + return { + 'model_name': model_wrapper.model_name, + 'total_samples': total_samples, + 'correct_predictions': correct_predictions, + 'failed_predictions': failed_predictions, + 'accuracy': accuracy, + 'failure_rate': failure_rate, + 'results': results + } + + +async def main(): + """ + Main function to run the benchmark. + """ + # Load dataset + print("Loading ScreenSpot-Pro dataset...") + ds = load_dataset("lmms-lab/ScreenSpot-Pro") + dataset = ds['train'] # type: ignore + # Convert to list to support indexing + dataset_list = list(dataset) + print(f"Dataset loaded: {len(dataset_list)} samples") + + # Get available models + models = get_available_models() + + # Evaluation settings + max_samples = 5 # Set to None to evaluate on full dataset + + # Run evaluations + all_results = [] + + for model in models: + try: + model_wrapper = ModelWrapper(model) + result = await evaluate_model(model_wrapper, dataset_list, max_samples) + all_results.append(result) + + # Print summary + print(f"\n{result['model_name']} Results:") + print(f" Accuracy: {result['accuracy']*100:.2f}%") + print(f" Correct: {result['correct_predictions']}/{result['total_samples']}") + print(f" Failed: {result['failed_predictions']}") + + except Exception as e: + print(f"\nError evaluating model {model}: {e}") + continue + + # Save results + if all_results: + save_results_to_markdown(all_results) + save_visualizations(all_results, dataset_list) + print("\nBenchmark completed successfully!") + else: + print("\nNo successful evaluations completed.") + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/libs/python/agent/benchmarks/utils.py b/libs/python/agent/benchmarks/utils.py new file mode 100644 index 00000000..c1fc41cf --- /dev/null +++ b/libs/python/agent/benchmarks/utils.py @@ -0,0 +1,316 @@ +#!/usr/bin/env python3 +""" +Shared utilities for ScreenSpot-Pro benchmarking and interactive testing. +""" + +import asyncio +import base64 +import os +import sys +from datetime import datetime +from io import BytesIO +from typing import List, Union, Tuple, Optional + +from PIL import Image, ImageDraw +from tqdm import tqdm +import gc +import torch + +# Add parent directory to path for imports +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from agent.agent import ComputerAgent +from models import GTA1Model +from models.base import ModelProtocol + +def get_available_models() -> List[Union[str, ModelProtocol]]: + """ + Get list of available models for testing. + + Returns: + List of model strings and model classes + """ + local_provider = "huggingface-local/" # Options: huggingface-local/ or mlx/ + + models = [ + # === ComputerAgent model strings === + f"{local_provider}HelloKKMe/GTA1-7B", + # f"{local_provider}HelloKKMe/GTA1-32B", # Uncomment if you have this model + + # === Reference model classes === + GTA1Model("HelloKKMe/GTA1-7B"), + # GTA1Model("HelloKKMe/GTA1-32B"), # Uncomment if you have this model + ] + + return models + + +def is_click_in_bbox(click_coords: Optional[Tuple[int, int]], bbox: List[int]) -> bool: + """ + Check if click coordinates are within the bounding box. + + Args: + click_coords: (x, y) coordinates or None + bbox: [x1, y1, x2, y2] bounding box + + Returns: + True if click is within bbox, False otherwise + """ + if click_coords is None: + return False + + x, y = click_coords + x1, y1, x2, y2 = bbox + + return x1 <= x <= x2 and y1 <= y <= y2 + + +def image_to_base64(image: Image.Image) -> str: + """ + Convert PIL Image to base64 string. + + Args: + image: PIL Image + + Returns: + Base64 encoded image string + """ + buffered = BytesIO() + image.save(buffered, format="PNG") + return base64.b64encode(buffered.getvalue()).decode() + + +class ModelWrapper: + """ + Wrapper to provide unified interface for both ComputerAgent and custom models. + """ + + def __init__(self, model: Union[str, ModelProtocol]): + self.model = model + self.is_computer_agent = isinstance(model, str) + self.agent: Optional[ComputerAgent] = None + + if self.is_computer_agent: + self.model_name = str(model) + else: + self.model_name = f"models.{model.__class__.__name__}" + + async def load_model(self) -> None: + """Load the model.""" + if self.is_computer_agent: + self.agent = ComputerAgent(model=str(self.model)) + else: + await self.model.load_model() # type: ignore + + async def unload_model(self) -> None: + """Unload the model.""" + if not self.is_computer_agent: + await self.model.unload_model() # type: ignore + else: + del self.agent + self.agent = None + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + + async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: + """Predict click coordinates.""" + if self.is_computer_agent: + if self.agent is None: + await self.load_model() + + if self.agent is not None: + image_b64 = image_to_base64(image) + result = await self.agent.predict_click(instruction=instruction, image_b64=image_b64) + return result + return None + else: + return await self.model.predict_click(image, instruction) # type: ignore + + +def save_results_to_markdown(all_results: List[dict], output_file: str = "screenspot_pro_results.md") -> None: + """ + Save evaluation results to a markdown table. + + Args: + all_results: List of evaluation results for each model + output_file: Output markdown file path + """ + with open(output_file, 'w', encoding='utf-8') as f: + f.write("# ScreenSpot-Pro Benchmark Results\n\n") + f.write(f"**Evaluation Date:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") + + # Summary table + f.write("## Summary\n\n") + f.write("| Model | Total Samples | Correct | Failed | Accuracy | Failure Rate |\n") + f.write("|-------|---------------|---------|--------|----------|--------------|\n") + + for result in all_results: + model_name = result['model_name'] + total = result['total_samples'] + correct = result['correct_predictions'] + failed = result['failed_predictions'] + accuracy = result['accuracy'] * 100 + failure_rate = result['failure_rate'] * 100 + + f.write(f"| {model_name} | {total} | {correct} | {failed} | {accuracy:.2f}% | {failure_rate:.2f}% |\n") + + # Detailed results for each model + for result in all_results: + f.write(f"\n## {result['model_name']} - Detailed Results\n\n") + f.write("| Sample ID | Instruction | BBox | Predicted | Correct | Failed |\n") + f.write("|-----------|-------------|------|-----------|---------|--------|\n") + + for sample_result in result['results'][:10]: # Show first 10 samples + sample_id = sample_result['id'] + instruction = sample_result['instruction'][:50] + "..." if len(sample_result['instruction']) > 50 else sample_result['instruction'] + bbox = str(sample_result['bbox']) + predicted = str(sample_result['predicted_coords']) if sample_result['predicted_coords'] else "None" + correct = "PASS" if sample_result['is_correct'] else "FAIL" + failed = "YES" if sample_result['failed'] else "NO" + + f.write(f"| {sample_id} | {instruction} | {bbox} | {predicted} | {correct} | {failed} |\n") + + if len(result['results']) > 10: + f.write(f"\n*Showing first 10 of {len(result['results'])} samples*\n") + + print(f"\nResults saved to: {output_file}") + + +def save_visualizations(all_results: List[dict], dataset_list, output_dir: str = "output") -> None: + """ + Save visualizations of predicted coordinates vs bboxes to an output folder. + + Args: + all_results: List of evaluation results for each model + dataset_list: List of dataset samples + output_dir: Output directory path + """ + # Create output directory + os.makedirs(output_dir, exist_ok=True) + + for result in all_results: + model_name = result['model_name'].replace('/', '_').replace('.', '_') + model_dir = os.path.join(output_dir, model_name) + os.makedirs(model_dir, exist_ok=True) + + print(f"\nSaving visualizations for {result['model_name']}...") + + for i, sample_result in enumerate(tqdm(result['results'][:10], desc=f"Saving {model_name} visualizations")): + try: + # Find the original sample + sample_id = sample_result['id'] + sample = None + for s in dataset_list: + if s['id'] == sample_id: + sample = s + break + + if sample is None: + continue + + # Get image and data + image = sample['image'].copy() + bbox = sample_result['bbox'] # [x1, y1, x2, y2] + predicted_coords = sample_result['predicted_coords'] + is_correct = sample_result['is_correct'] + + # Draw on image + draw = ImageDraw.Draw(image) + + # Draw bounding box (ground truth) in green + x1, y1, x2, y2 = bbox + draw.rectangle([x1, y1, x2, y2], outline="green", width=3) + draw.text((x1, y1-20), "Ground Truth", fill="green") + + # Draw predicted click in red or blue + if predicted_coords is not None: + px, py = predicted_coords + color = "blue" if is_correct else "red" + # Draw crosshair + crosshair_size = 15 + draw.line([(px-crosshair_size, py), (px+crosshair_size, py)], fill=color, width=3) + draw.line([(px, py-crosshair_size), (px, py+crosshair_size)], fill=color, width=3) + draw.text((px+10, py-20), f"Predicted ({px},{py})", fill=color) + + # Add status text + status = "CORRECT" if is_correct else "INCORRECT" + status_color = "blue" if is_correct else "red" + draw.text((10, 10), f"Status: {status}", fill=status_color) + draw.text((10, 30), f"Instruction: {sample_result['instruction'][:50]}...", fill="black") + + # Save image + filename = f"sample_{i+1:02d}_{sample_id}_{status.lower()}.png" + filepath = os.path.join(model_dir, filename) + image.save(filepath) + + except Exception as e: + print(f"Error saving visualization for sample {sample_id}: {e}") + continue + + print(f"Visualizations saved to: {model_dir}") + + +def save_prediction_visualization(image: Image.Image, instruction: str, predictions: List[dict], + output_file: str = "interactive_prediction.png") -> None: + """ + Save visualization of multiple model predictions on a single image. + + Args: + image: PIL Image to visualize + instruction: Instruction text + predictions: List of prediction dicts with keys: model_name, coords, error + output_file: Output file path + """ + # Create a copy of the image + vis_image = image.copy() + draw = ImageDraw.Draw(vis_image) + + # Colors for different models + colors = ["red", "blue", "orange", "purple", "brown", "pink", "gray", "olive"] + + # Draw predictions + for i, pred in enumerate(predictions): + color = colors[i % len(colors)] + model_name = pred['model_name'] + coords = pred.get('coords') + error = pred.get('error') + + if coords is not None: + px, py = coords + # Draw crosshair + crosshair_size = 20 + draw.line([(px-crosshair_size, py), (px+crosshair_size, py)], fill=color, width=4) + draw.line([(px, py-crosshair_size), (px, py+crosshair_size)], fill=color, width=4) + # Draw model name + draw.text((px+15, py+15), f"{model_name}: ({px},{py})", fill=color) + else: + # Draw error text + draw.text((10, 50 + i*20), f"{model_name}: ERROR - {error}", fill=color) + + # Add instruction at the top + draw.text((10, 10), f"Instruction: {instruction}", fill="black") + + # Save image + vis_image.save(output_file) + print(f"Prediction visualization saved to: {output_file}") + + +def take_screenshot() -> Image.Image: + """ + Take a screenshot of the current screen. + + Returns: + PIL Image of the screenshot + """ + try: + import pyautogui + screenshot = pyautogui.screenshot() + return screenshot + except ImportError: + print("pyautogui not installed. Please install it with: pip install pyautogui") + raise + except Exception as e: + print(f"Error taking screenshot: {e}") + raise + From ffc88e203138ac6ef62d73361c800362815bf7f9 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Wed, 30 Jul 2025 13:41:58 -0400 Subject: [PATCH 04/76] added agent benchmarks --- libs/python/agent/benchmarks/.gitignore | 1 + libs/python/agent/benchmarks/README.md | 177 +++++++++++++++++ libs/python/agent/benchmarks/models/gta1.py | 2 +- libs/python/agent/benchmarks/ss-pro.py | 128 +++++++------ libs/python/agent/benchmarks/ss-v2.py | 179 ++++++++++++++++++ libs/python/agent/benchmarks/utils.py | 200 +++++++++++++------- 6 files changed, 553 insertions(+), 134 deletions(-) create mode 100644 libs/python/agent/benchmarks/README.md create mode 100644 libs/python/agent/benchmarks/ss-v2.py diff --git a/libs/python/agent/benchmarks/.gitignore b/libs/python/agent/benchmarks/.gitignore index b9f463f1..a0aed392 100644 --- a/libs/python/agent/benchmarks/.gitignore +++ b/libs/python/agent/benchmarks/.gitignore @@ -1,2 +1,3 @@ output/ interactive_output/ +*_results.md \ No newline at end of file diff --git a/libs/python/agent/benchmarks/README.md b/libs/python/agent/benchmarks/README.md new file mode 100644 index 00000000..033f0157 --- /dev/null +++ b/libs/python/agent/benchmarks/README.md @@ -0,0 +1,177 @@ +# Computer Agent Benchmarks + +This directory contains benchmarks designed to test agent providers in the Computer Agent SDK against reference agent implementations. + +## Overview + +The benchmark system evaluates models on GUI grounding tasks, specifically click prediction accuracy. It supports both: +- **Computer Agent SDK providers** (using model strings like `"huggingface-local/HelloKKMe/GTA1-7B"`) +- **Reference agent implementations** (custom model classes implementing the `ModelProtocol`) + +## Available Benchmarks + +### 1. ScreenSpot-v2 (`ss-v2.py`) +- **Dataset**: ScreenSpot-v2 (click-only GUI grounding) +- **Format**: Standard resolution screenshots +- **Task**: Predict click coordinates given an instruction and image +- **Metrics**: Accuracy, Error Rate, Timing, VRAM usage + +### 2. ScreenSpot-Pro (`ss-pro.py`) +- **Dataset**: ScreenSpot-Pro (high-resolution click-only GUI grounding) +- **Format**: High-resolution screenshots +- **Task**: Predict click coordinates given an instruction and image +- **Metrics**: Accuracy, Error Rate, Timing, VRAM usage + +### 3. Interactive Testing (`interactive.py`) +- **Real-time testing**: Take screenshots and visualize model predictions +- **Commands**: + - Type instruction → screenshot + test all models + - `screenshot` → take screenshot without prediction + - `models` → list available models + - `quit`/`exit` → exit tool +- **Output**: Visual predictions with crosshairs for each model + +## Adding Reference Agent Implementations + +### 1. Implement the ModelProtocol + +Create a new file in `models/` directory implementing the `ModelProtocol`: + +```python +from models.base import ModelProtocol +from typing import Optional, Tuple +from PIL import Image + +class YourModelName(ModelProtocol): + def __init__(self, model_path: str): + self.model_path = model_path + self._model = None + + @property + def model_name(self) -> str: + return self.model_path + + async def load_model(self) -> None: + """Load the model into memory.""" + # Your model loading logic here + pass + + async def unload_model(self) -> None: + """Unload the model from memory.""" + # Your model cleanup logic here + pass + + async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates for the given image and instruction. + + Args: + image: PIL Image to analyze + instruction: Text instruction describing what to click + + Returns: + Tuple of (x, y) coordinates or None if prediction fails + """ + # Your prediction logic here + return (x, y) # Return predicted coordinates +``` + +### 2. Register Your Model + +Add your model to the `get_available_models()` function in `utils.py`: + +```python +def get_available_models() -> List[Union[str, ModelProtocol]]: + models = [ + # Computer Agent SDK providers + "huggingface-local/HelloKKMe/GTA1-7B", + + # Reference implementations + GTA1Model("HelloKKMe/GTA1-7B"), + YourModelName("path/to/your/model"), # Add your model here + ] + return models +``` + +## Running Benchmarks + +### 1. Configure Models +Edit `utils.py` to specify which models you want to test in `get_available_models()`. + +### 2. Set Sample Count +Edit the benchmark script to change the number of samples: +```python +max_samples = 50 # Set to None to evaluate on full dataset +``` + +### 3. Run Benchmark +```bash +# ScreenSpot-v2 benchmark +python ss-v2.py + +# ScreenSpot-Pro benchmark +python ss-pro.py + +# Interactive testing +python interactive.py +``` + +## Output + +### Console Output +``` +Model Results: + Accuracy: 85.50% + Correct: 171/200 + Errors: 5 + Error Rate: 2.50% + Avg Time: 1.23s + Time Range: 0.89s - 2.45s + VRAM Max: 4.5GB + VRAM Avg: 3.4GB +``` + +### Generated Files +- **Markdown Report**: `*_results.md` with detailed results tables +- **Visualizations**: `output/` directory with prediction visualizations +- **Interactive Output**: `interactive_output/` for interactive session results + +## Metrics Tracked + +- **Accuracy**: Percentage of clicks within bounding boxes +- **Error Rate**: Percentage of failed predictions +- **Timing**: Average, min, max prediction times +- **VRAM Usage**: Maximum and average GPU memory usage +- **Per-sample Results**: Detailed breakdown for debugging + +## Requirements + +- Python 3.8+ +- PyTorch (for VRAM tracking) +- PIL/Pillow (for image processing) +- datasets (for HuggingFace datasets) +- tqdm (for progress bars) +- Computer Agent SDK + +## Architecture + +The benchmark system is designed for: +- **Modularity**: Easy to add new models and benchmarks +- **Flexibility**: Works with any iterator of dicts with `image`, `bbox`, `instruction` keys +- **Performance**: VRAM tracking and timing analysis +- **Visualization**: Automatic generation of prediction visualizations +- **No Exception Handling**: Fails fast to surface real issues + +## Results Table + +| Model | Dataset | Accuracy | Error Rate | Avg Time | VRAM Max | VRAM Avg | +|-------|---------|----------|------------|----------|----------|----------| +| (coming soon) | | | | | | | + +## Contributing + +To add a new benchmark: +1. Create a new script following the pattern in `ss-v2.py` +2. Use the `evaluate_model()` function from utils +3. Ensure your dataset yields dicts with `image`, `bbox`, `instruction` keys +4. Update this README with benchmark details diff --git a/libs/python/agent/benchmarks/models/gta1.py b/libs/python/agent/benchmarks/models/gta1.py index 2bb4fe1d..a1dee599 100644 --- a/libs/python/agent/benchmarks/models/gta1.py +++ b/libs/python/agent/benchmarks/models/gta1.py @@ -117,7 +117,7 @@ Output the coordinate pair exactly: } # Process inputs - image_inputs, video_inputs = process_vision_info([system_message, user_message]) + image_inputs, video_inputs = process_vision_info([system_message, user_message]) # type: ignore text = self.processor.apply_chat_template( [system_message, user_message], tokenize=False, diff --git a/libs/python/agent/benchmarks/ss-pro.py b/libs/python/agent/benchmarks/ss-pro.py index 57f2c971..e1e54a1d 100644 --- a/libs/python/agent/benchmarks/ss-pro.py +++ b/libs/python/agent/benchmarks/ss-pro.py @@ -7,6 +7,7 @@ Supports both ComputerAgent model strings and custom model classes. """ import asyncio +import time from typing import Optional from datasets import load_dataset @@ -43,66 +44,67 @@ async def evaluate_model(model_wrapper: ModelWrapper, dataset, max_samples: Opti total_samples = min(max_samples, total_samples) correct_predictions = 0 - failed_predictions = 0 + error_predictions = 0 results = [] - try: - for i in tqdm(range(total_samples), desc=f"Evaluating {model_wrapper.model_name}"): - sample = dataset[i] - - # Extract sample data - image = sample['image'] - instruction = sample['instruction'] - bbox = sample['bbox'] # [x1, y1, x2, y2] - sample_id = sample['id'] - - # Predict click coordinates - try: - click_coords = await model_wrapper.predict_click(image, instruction) - - # Check if prediction is correct - is_correct = is_click_in_bbox(click_coords, bbox) - - if is_correct: - correct_predictions += 1 - - results.append({ - 'id': sample_id, - 'instruction': instruction, - 'bbox': bbox, - 'predicted_coords': click_coords, - 'is_correct': is_correct, - 'failed': False - }) - - except Exception as e: - print(f"\nError predicting sample {sample_id}: {e}") - failed_predictions += 1 - results.append({ - 'id': sample_id, - 'instruction': instruction, - 'bbox': bbox, - 'predicted_coords': None, - 'is_correct': False, - 'failed': True, - 'error': str(e) - }) + for i in tqdm(range(total_samples), desc=f"Evaluating {model_wrapper.model_name}"): + sample = dataset[i] + + # Extract sample data + image = sample['image'] + instruction = sample['instruction'] + bbox = sample['bbox'] # [x1, y1, x2, y2] + sample_id = sample['img_filename'] + + # Predict click coordinates with timing + start_time = time.time() + click_coords = await model_wrapper.predict_click(image, instruction) + prediction_time = time.time() - start_time + + # Check if prediction is correct + is_correct = is_click_in_bbox(click_coords, bbox) + + if is_correct: + correct_predictions += 1 + + results.append({ + 'id': sample_id, + 'instruction': instruction, + 'bbox': bbox, + 'predicted_coords': click_coords, + 'is_correct': is_correct, + 'failed': False, + 'prediction_time': prediction_time + }) - finally: - # Unload model - await model_wrapper.unload_model() + # Unload model + await model_wrapper.unload_model() # Calculate metrics accuracy = correct_predictions / total_samples if total_samples > 0 else 0.0 - failure_rate = failed_predictions / total_samples if total_samples > 0 else 0.0 + error_rate = error_predictions / total_samples if total_samples > 0 else 0.0 + + # Calculate timing statistics + successful_times = [r['prediction_time'] for r in results if not r['failed']] + avg_prediction_time = sum(successful_times) / len(successful_times) if successful_times else 0.0 + min_prediction_time = min(successful_times) if successful_times else 0.0 + max_prediction_time = max(successful_times) if successful_times else 0.0 + + # Get VRAM statistics + vram_stats = model_wrapper.get_vram_stats() return { 'model_name': model_wrapper.model_name, 'total_samples': total_samples, 'correct_predictions': correct_predictions, - 'failed_predictions': failed_predictions, + 'failed_predictions': error_predictions, 'accuracy': accuracy, - 'failure_rate': failure_rate, + 'failure_rate': error_rate, + 'avg_prediction_time': avg_prediction_time, + 'min_prediction_time': min_prediction_time, + 'max_prediction_time': max_prediction_time, + 'vram_max_mb': vram_stats['max_mb'], + 'vram_avg_mb': vram_stats['avg_mb'], 'results': results } @@ -123,26 +125,26 @@ async def main(): models = get_available_models() # Evaluation settings - max_samples = 5 # Set to None to evaluate on full dataset + max_samples = 300 # Set to None to evaluate on full dataset # Run evaluations all_results = [] for model in models: - try: - model_wrapper = ModelWrapper(model) - result = await evaluate_model(model_wrapper, dataset_list, max_samples) - all_results.append(result) - - # Print summary - print(f"\n{result['model_name']} Results:") - print(f" Accuracy: {result['accuracy']*100:.2f}%") - print(f" Correct: {result['correct_predictions']}/{result['total_samples']}") - print(f" Failed: {result['failed_predictions']}") - - except Exception as e: - print(f"\nError evaluating model {model}: {e}") - continue + model_wrapper = ModelWrapper(model) + result = await evaluate_model(model_wrapper, dataset_list, max_samples) + all_results.append(result) + + # Print summary + print(f"\n{result['model_name']} Results:") + print(f" Accuracy: {result['accuracy']*100:.2f}%") + print(f" Correct: {result['correct_predictions']}/{result['total_samples']}") + print(f" Errors: {result['failed_predictions']}") + print(f" Error Rate: {result['failure_rate']*100:.2f}%") + print(f" Avg Time: {result['avg_prediction_time']:.2f}s") + print(f" Time Range: {result['min_prediction_time']:.2f}s - {result['max_prediction_time']:.2f}s") + print(f" VRAM Max: {result['vram_max_mb']:.1f}MB") + print(f" VRAM Avg: {result['vram_avg_mb']:.1f}MB") # Save results if all_results: diff --git a/libs/python/agent/benchmarks/ss-v2.py b/libs/python/agent/benchmarks/ss-v2.py new file mode 100644 index 00000000..919a1001 --- /dev/null +++ b/libs/python/agent/benchmarks/ss-v2.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +""" +ScreenSpot-Pro Benchmark Script + +Evaluates models on the ScreenSpot-Pro dataset for click prediction accuracy. +Supports both ComputerAgent model strings and custom model classes. +""" + +import asyncio +import time +from typing import Optional + +from datasets import load_dataset +from tqdm import tqdm + +from utils import ( + ModelWrapper, + is_click_in_bbox, + save_results_to_markdown, + save_visualizations, + get_available_models +) + + +async def evaluate_model(model_wrapper: ModelWrapper, samples, max_samples: Optional[int] = None) -> dict: + """ + Evaluate a model on any iterable of samples. + + Args: + model_wrapper: ModelWrapper instance + samples: Iterable of dicts with keys: image, bbox, instruction + max_samples: Maximum number of samples to evaluate (None for all) + + Returns: + Dictionary with evaluation results + """ + print(f"\nEvaluating model: {model_wrapper.model_name}") + + # Load model + await model_wrapper.load_model() + + # Convert to list if needed and limit samples + if hasattr(samples, '__len__'): + total_samples = len(samples) + if max_samples is not None: + total_samples = min(max_samples, total_samples) + sample_list = list(samples)[:total_samples] + else: + # For iterators, take max_samples or all + sample_list = list(samples) + if max_samples is not None: + sample_list = sample_list[:max_samples] + total_samples = len(sample_list) + + correct_predictions = 0 + error_predictions = 0 + results = [] + + for i, sample in enumerate(tqdm(sample_list, desc=f"Evaluating {model_wrapper.model_name}")): + # Extract required data (only these 3 keys matter) + image = sample['image'] + instruction = sample['instruction'] + bbox = sample['bbox'] # [x1, y1, x2, y2] + + # Predict click coordinates with timing + start_time = time.time() + click_coords = await model_wrapper.predict_click(image, instruction) + prediction_time = time.time() - start_time + + # Check if prediction is correct + is_correct = is_click_in_bbox(click_coords, bbox) + + if is_correct: + correct_predictions += 1 + + results.append({ + 'sample_idx': i, + 'instruction': instruction, + 'bbox': bbox, + 'predicted_coords': click_coords, + 'is_correct': is_correct, + 'failed': False, + 'prediction_time': prediction_time + }) + + # Unload model + await model_wrapper.unload_model() + + # Calculate metrics + accuracy = correct_predictions / total_samples if total_samples > 0 else 0.0 + error_rate = error_predictions / total_samples if total_samples > 0 else 0.0 + + # Calculate timing statistics + successful_times = [r['prediction_time'] for r in results if not r['failed']] + avg_prediction_time = sum(successful_times) / len(successful_times) if successful_times else 0.0 + min_prediction_time = min(successful_times) if successful_times else 0.0 + max_prediction_time = max(successful_times) if successful_times else 0.0 + + # Get VRAM statistics + vram_stats = model_wrapper.get_vram_stats() + + return { + 'model_name': model_wrapper.model_name, + 'total_samples': total_samples, + 'correct_predictions': correct_predictions, + 'failed_predictions': error_predictions, + 'accuracy': accuracy, + 'failure_rate': error_rate, + 'avg_prediction_time': avg_prediction_time, + 'min_prediction_time': min_prediction_time, + 'max_prediction_time': max_prediction_time, + 'vram_max_mb': vram_stats['max_mb'], + 'vram_avg_mb': vram_stats['avg_mb'], + 'results': results + } + + +async def main(): + """ + Main function to run the benchmark. + """ + # Load dataset + print("Loading ScreenSpot-v2 dataset...") + ds = load_dataset("lmms-lab/ScreenSpot-v2") + dataset = ds['train'] # type: ignore + # Convert to simple list of dicts with only required keys + samples = [] + for item in dataset: + # Convert dataset item to dict if needed + item_dict = dict(item) if hasattr(item, 'keys') else item + + # Convert ScreenSpot-v2 bbox format [x, y, w, h] to [x1, y1, x2, y2] + bbox_xywh = item_dict['bbox'] # type: ignore + x, y, w, h = bbox_xywh + bbox_xyxy = [x, y, x + w, y + h] + + samples.append({ + 'image': item_dict['image'], # type: ignore + 'instruction': item_dict['instruction'], # type: ignore + 'bbox': bbox_xyxy + }) + print(f"Dataset loaded: {len(samples)} samples") + + # Get available models + models = get_available_models() + + # Evaluation settings + max_samples = 500 # Set to None to evaluate on full dataset + + # Run evaluations + all_results = [] + + for model in models: + model_wrapper = ModelWrapper(model) + result = await evaluate_model(model_wrapper, samples, max_samples) + all_results.append(result) + + # Print summary + print(f"\n{result['model_name']} Results:") + print(f" Accuracy: {result['accuracy']*100:.2f}%") + print(f" Correct: {result['correct_predictions']}/{result['total_samples']}") + print(f" Errors: {result['failed_predictions']}") + print(f" Error Rate: {result['failure_rate']*100:.2f}%") + print(f" Avg Time: {result['avg_prediction_time']:.2f}s") + print(f" Time Range: {result['min_prediction_time']:.2f}s - {result['max_prediction_time']:.2f}s") + print(f" VRAM Max: {result['vram_max_mb']:.1f}MB") + print(f" VRAM Avg: {result['vram_avg_mb']:.1f}MB") + + # Save results + if all_results: + save_results_to_markdown(all_results, "screenspot_v2_results.md", title="ScreenSpot-v2 Benchmark Results") + save_visualizations(all_results, samples) + print("\nBenchmark completed successfully!") + else: + print("\nNo successful evaluations completed.") + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/libs/python/agent/benchmarks/utils.py b/libs/python/agent/benchmarks/utils.py index c1fc41cf..099499a5 100644 --- a/libs/python/agent/benchmarks/utils.py +++ b/libs/python/agent/benchmarks/utils.py @@ -22,6 +22,33 @@ from agent.agent import ComputerAgent from models import GTA1Model from models.base import ModelProtocol +def get_vram_usage() -> dict: + """ + Get current VRAM usage statistics. + + Returns: + Dictionary with VRAM usage info (in MB) + """ + if torch.cuda.is_available(): + device = torch.cuda.current_device() + allocated = torch.cuda.memory_allocated(device) / 1024 / 1024 # Convert to MB + reserved = torch.cuda.memory_reserved(device) / 1024 / 1024 # Convert to MB + total = torch.cuda.get_device_properties(device).total_memory / 1024 / 1024 + return { + 'allocated_mb': allocated, + 'reserved_mb': reserved, + 'total_mb': total, + 'free_mb': total - reserved + } + else: + return { + 'allocated_mb': 0.0, + 'reserved_mb': 0.0, + 'total_mb': 0.0, + 'free_mb': 0.0 + } + + def get_available_models() -> List[Union[str, ModelProtocol]]: """ Get list of available models for testing. @@ -34,11 +61,11 @@ def get_available_models() -> List[Union[str, ModelProtocol]]: models = [ # === ComputerAgent model strings === f"{local_provider}HelloKKMe/GTA1-7B", - # f"{local_provider}HelloKKMe/GTA1-32B", # Uncomment if you have this model + f"{local_provider}HelloKKMe/GTA1-32B", # === Reference model classes === GTA1Model("HelloKKMe/GTA1-7B"), - # GTA1Model("HelloKKMe/GTA1-32B"), # Uncomment if you have this model + GTA1Model("HelloKKMe/GTA1-32B"), ] return models @@ -88,11 +115,12 @@ class ModelWrapper: self.model = model self.is_computer_agent = isinstance(model, str) self.agent: Optional[ComputerAgent] = None + self.vram_usage_history: List[float] = [] # Track VRAM usage over time if self.is_computer_agent: self.model_name = str(model) else: - self.model_name = f"models.{model.__class__.__name__}" + self.model_name = f"{model.__class__.__name__}('{getattr(model, 'model_name', 'unknown')}')" async def load_model(self) -> None: """Load the model.""" @@ -100,6 +128,10 @@ class ModelWrapper: self.agent = ComputerAgent(model=str(self.model)) else: await self.model.load_model() # type: ignore + + # Record initial VRAM usage after loading + vram_info = get_vram_usage() + self.vram_usage_history.append(vram_info['allocated_mb']) async def unload_model(self) -> None: """Unload the model.""" @@ -111,10 +143,28 @@ class ModelWrapper: gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() + + # Record VRAM usage after unloading + vram_info = get_vram_usage() + self.vram_usage_history.append(vram_info['allocated_mb']) + + def get_vram_stats(self) -> dict: + """Get VRAM usage statistics for this model.""" + if not self.vram_usage_history: + return {'max_mb': 0.0, 'avg_mb': 0.0} + + return { + 'max_mb': max(self.vram_usage_history), + 'avg_mb': sum(self.vram_usage_history) / len(self.vram_usage_history) + } async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: """Predict click coordinates.""" + # Record VRAM usage before prediction + vram_info = get_vram_usage() + self.vram_usage_history.append(vram_info['allocated_mb']) + if self.is_computer_agent: if self.agent is None: await self.load_model() @@ -122,13 +172,24 @@ class ModelWrapper: if self.agent is not None: image_b64 = image_to_base64(image) result = await self.agent.predict_click(instruction=instruction, image_b64=image_b64) + + # Record VRAM usage after prediction + vram_info = get_vram_usage() + self.vram_usage_history.append(vram_info['allocated_mb']) + return result return None else: - return await self.model.predict_click(image, instruction) # type: ignore + result = await self.model.predict_click(image, instruction) # type: ignore + + # Record VRAM usage after prediction + vram_info = get_vram_usage() + self.vram_usage_history.append(vram_info['allocated_mb']) + + return result -def save_results_to_markdown(all_results: List[dict], output_file: str = "screenspot_pro_results.md") -> None: +def save_results_to_markdown(all_results: List[dict],output_file: str = "screenspot_pro_results.md", title: str = "ScreenSpot-Pro Benchmark Results") -> None: """ Save evaluation results to a markdown table. @@ -137,39 +198,46 @@ def save_results_to_markdown(all_results: List[dict], output_file: str = "screen output_file: Output markdown file path """ with open(output_file, 'w', encoding='utf-8') as f: - f.write("# ScreenSpot-Pro Benchmark Results\n\n") + f.write(f"# {title}\n\n") f.write(f"**Evaluation Date:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") # Summary table f.write("## Summary\n\n") - f.write("| Model | Total Samples | Correct | Failed | Accuracy | Failure Rate |\n") - f.write("|-------|---------------|---------|--------|----------|--------------|\n") + f.write("| Model | Total Samples | Correct | Errors | Accuracy | Error Rate | Avg Time (s) | Time Range (s) | VRAM Max (GB) | VRAM Avg (GB) |\n") + f.write("|-------|---------------|---------|--------|----------|------------|--------------|----------------|---------------|---------------|\n") for result in all_results: model_name = result['model_name'] total = result['total_samples'] correct = result['correct_predictions'] - failed = result['failed_predictions'] + errors = result['failed_predictions'] accuracy = result['accuracy'] * 100 - failure_rate = result['failure_rate'] * 100 + error_rate = result['failure_rate'] * 100 + avg_time = result.get('avg_prediction_time', 0.0) + min_time = result.get('min_prediction_time', 0.0) + max_time = result.get('max_prediction_time', 0.0) + time_range = f"{min_time:.2f} - {max_time:.2f}" + vram_max = result.get('vram_max_mb', 0.0) / 1024 + vram_avg = result.get('vram_avg_mb', 0.0) / 1024 - f.write(f"| {model_name} | {total} | {correct} | {failed} | {accuracy:.2f}% | {failure_rate:.2f}% |\n") + f.write(f"| {model_name} | {total} | {correct} | {errors} | {accuracy:.2f}% | {error_rate:.2f}% | {avg_time:.2f} | {time_range} | {vram_max:.1f} | {vram_avg:.1f} |\n") # Detailed results for each model for result in all_results: f.write(f"\n## {result['model_name']} - Detailed Results\n\n") - f.write("| Sample ID | Instruction | BBox | Predicted | Correct | Failed |\n") - f.write("|-----------|-------------|------|-----------|---------|--------|\n") + f.write("| Sample Index | Instruction | BBox | Predicted | Correct | Error | Time (s) |\n") + f.write("|-----------|-------------|------|-----------|---------|-------|----------|\n") for sample_result in result['results'][:10]: # Show first 10 samples - sample_id = sample_result['id'] + sample_idx = sample_result['sample_idx'] instruction = sample_result['instruction'][:50] + "..." if len(sample_result['instruction']) > 50 else sample_result['instruction'] bbox = str(sample_result['bbox']) predicted = str(sample_result['predicted_coords']) if sample_result['predicted_coords'] else "None" correct = "PASS" if sample_result['is_correct'] else "FAIL" - failed = "YES" if sample_result['failed'] else "NO" + error = "YES" if sample_result['failed'] else "NO" + pred_time = sample_result.get('prediction_time', 0.0) - f.write(f"| {sample_id} | {instruction} | {bbox} | {predicted} | {correct} | {failed} |\n") + f.write(f"| {sample_idx} | {instruction} | {bbox} | {predicted} | {correct} | {error} | {pred_time:.2f} |\n") if len(result['results']) > 10: f.write(f"\n*Showing first 10 of {len(result['results'])} samples*\n") @@ -177,76 +245,68 @@ def save_results_to_markdown(all_results: List[dict], output_file: str = "screen print(f"\nResults saved to: {output_file}") -def save_visualizations(all_results: List[dict], dataset_list, output_dir: str = "output") -> None: +def save_visualizations(all_results: List[dict], samples, output_dir: str = "output") -> None: """ Save visualizations of predicted coordinates vs bboxes to an output folder. Args: all_results: List of evaluation results for each model - dataset_list: List of dataset samples + samples: List of sample dicts with image, bbox, instruction keys output_dir: Output directory path """ - # Create output directory os.makedirs(output_dir, exist_ok=True) for result in all_results: - model_name = result['model_name'].replace('/', '_').replace('.', '_') + model_name = result['model_name'].replace('/', '_').replace('\\', '_') model_dir = os.path.join(output_dir, model_name) os.makedirs(model_dir, exist_ok=True) - print(f"\nSaving visualizations for {result['model_name']}...") + print(f"Saving visualizations for {result['model_name']}...") + # Save first 10 samples for visualization for i, sample_result in enumerate(tqdm(result['results'][:10], desc=f"Saving {model_name} visualizations")): - try: - # Find the original sample - sample_id = sample_result['id'] - sample = None - for s in dataset_list: - if s['id'] == sample_id: - sample = s - break - - if sample is None: - continue - - # Get image and data - image = sample['image'].copy() - bbox = sample_result['bbox'] # [x1, y1, x2, y2] - predicted_coords = sample_result['predicted_coords'] - is_correct = sample_result['is_correct'] - - # Draw on image - draw = ImageDraw.Draw(image) - - # Draw bounding box (ground truth) in green - x1, y1, x2, y2 = bbox - draw.rectangle([x1, y1, x2, y2], outline="green", width=3) - draw.text((x1, y1-20), "Ground Truth", fill="green") - - # Draw predicted click in red or blue - if predicted_coords is not None: - px, py = predicted_coords - color = "blue" if is_correct else "red" - # Draw crosshair - crosshair_size = 15 - draw.line([(px-crosshair_size, py), (px+crosshair_size, py)], fill=color, width=3) - draw.line([(px, py-crosshair_size), (px, py+crosshair_size)], fill=color, width=3) - draw.text((px+10, py-20), f"Predicted ({px},{py})", fill=color) - - # Add status text - status = "CORRECT" if is_correct else "INCORRECT" - status_color = "blue" if is_correct else "red" - draw.text((10, 10), f"Status: {status}", fill=status_color) - draw.text((10, 30), f"Instruction: {sample_result['instruction'][:50]}...", fill="black") - - # Save image - filename = f"sample_{i+1:02d}_{sample_id}_{status.lower()}.png" - filepath = os.path.join(model_dir, filename) - image.save(filepath) - - except Exception as e: - print(f"Error saving visualization for sample {sample_id}: {e}") + # Get sample data using index + sample_idx = sample_result['sample_idx'] + + if sample_idx < len(samples): + sample = samples[sample_idx] + image = sample['image'].copy() # Make a copy to avoid modifying original + else: + print(f"Warning: Could not find sample at index {sample_idx}") continue + + bbox = sample_result['bbox'] + predicted_coords = sample_result['predicted_coords'] + is_correct = sample_result['is_correct'] + + # Draw on image + draw = ImageDraw.Draw(image) + + # Draw bounding box (ground truth) in green + x1, y1, x2, y2 = bbox + draw.rectangle([x1, y1, x2, y2], outline="green", width=3) + draw.text((x1, y1-20), "Ground Truth", fill="green") + + # Draw predicted click in red or blue + if predicted_coords is not None: + px, py = predicted_coords + color = "blue" if is_correct else "red" + # Draw crosshair + crosshair_size = 15 + draw.line([(px-crosshair_size, py), (px+crosshair_size, py)], fill=color, width=3) + draw.line([(px, py-crosshair_size), (px, py+crosshair_size)], fill=color, width=3) + draw.text((px+10, py-20), f"Predicted ({px},{py})", fill=color) + + # Add status text + status = "CORRECT" if is_correct else "INCORRECT" + status_color = "blue" if is_correct else "red" + draw.text((10, 10), f"Status: {status}", fill=status_color) + draw.text((10, 30), f"Instruction: {sample_result['instruction'][:50]}...", fill="black") + + # Save image + filename = f"sample_{i+1:02d}_idx{sample_idx}_{status.lower()}.png" + filepath = os.path.join(model_dir, filename) + image.save(filepath) print(f"Visualizations saved to: {model_dir}") From 8aef7b8b1a99954bab6fc4b85b15073f06d325be Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Wed, 30 Jul 2025 16:12:51 -0400 Subject: [PATCH 05/76] updated metrics --- libs/python/agent/benchmarks/README.md | 40 ++++---------------------- libs/python/agent/benchmarks/ss-pro.py | 31 ++++++++++++++++++-- libs/python/agent/benchmarks/ss-v2.py | 35 +++++++++++++++++++--- libs/python/agent/benchmarks/utils.py | 35 ++++++++++++++++++---- 4 files changed, 96 insertions(+), 45 deletions(-) diff --git a/libs/python/agent/benchmarks/README.md b/libs/python/agent/benchmarks/README.md index 033f0157..225fc30b 100644 --- a/libs/python/agent/benchmarks/README.md +++ b/libs/python/agent/benchmarks/README.md @@ -98,19 +98,13 @@ def get_available_models() -> List[Union[str, ModelProtocol]]: ### 1. Configure Models Edit `utils.py` to specify which models you want to test in `get_available_models()`. -### 2. Set Sample Count -Edit the benchmark script to change the number of samples: -```python -max_samples = 50 # Set to None to evaluate on full dataset -``` - -### 3. Run Benchmark +### 2. Run Benchmark ```bash # ScreenSpot-v2 benchmark -python ss-v2.py +python ss-v2.py --samples 50 # ScreenSpot-Pro benchmark -python ss-pro.py +python ss-pro.py --samples 50 # Interactive testing python interactive.py @@ -121,14 +115,9 @@ python interactive.py ### Console Output ``` Model Results: - Accuracy: 85.50% - Correct: 171/200 - Errors: 5 - Error Rate: 2.50% - Avg Time: 1.23s - Time Range: 0.89s - 2.45s - VRAM Max: 4.5GB - VRAM Avg: 3.4GB + Accuracy: 85.50% (171/200) + Avg Time: 1.23s (0.89s - 2.45s) + VRAM Usage: 4.5GB (max) / 3.4GB (avg) ``` ### Generated Files @@ -139,20 +128,10 @@ Model Results: ## Metrics Tracked - **Accuracy**: Percentage of clicks within bounding boxes -- **Error Rate**: Percentage of failed predictions - **Timing**: Average, min, max prediction times - **VRAM Usage**: Maximum and average GPU memory usage - **Per-sample Results**: Detailed breakdown for debugging -## Requirements - -- Python 3.8+ -- PyTorch (for VRAM tracking) -- PIL/Pillow (for image processing) -- datasets (for HuggingFace datasets) -- tqdm (for progress bars) -- Computer Agent SDK - ## Architecture The benchmark system is designed for: @@ -160,13 +139,6 @@ The benchmark system is designed for: - **Flexibility**: Works with any iterator of dicts with `image`, `bbox`, `instruction` keys - **Performance**: VRAM tracking and timing analysis - **Visualization**: Automatic generation of prediction visualizations -- **No Exception Handling**: Fails fast to surface real issues - -## Results Table - -| Model | Dataset | Accuracy | Error Rate | Avg Time | VRAM Max | VRAM Avg | -|-------|---------|----------|------------|----------|----------|----------| -| (coming soon) | | | | | | | ## Contributing diff --git a/libs/python/agent/benchmarks/ss-pro.py b/libs/python/agent/benchmarks/ss-pro.py index e1e54a1d..80e5e72f 100644 --- a/libs/python/agent/benchmarks/ss-pro.py +++ b/libs/python/agent/benchmarks/ss-pro.py @@ -6,7 +6,10 @@ Evaluates models on the ScreenSpot-Pro dataset for click prediction accuracy. Supports both ComputerAgent model strings and custom model classes. """ +import argparse import asyncio +import random +import statistics import time from typing import Optional @@ -18,7 +21,8 @@ from utils import ( is_click_in_bbox, save_results_to_markdown, save_visualizations, - get_available_models + get_available_models, + get_gpu_memory ) @@ -87,6 +91,7 @@ async def evaluate_model(model_wrapper: ModelWrapper, dataset, max_samples: Opti # Calculate timing statistics successful_times = [r['prediction_time'] for r in results if not r['failed']] avg_prediction_time = sum(successful_times) / len(successful_times) if successful_times else 0.0 + median_prediction_time = statistics.median(successful_times) if successful_times else 0.0 min_prediction_time = min(successful_times) if successful_times else 0.0 max_prediction_time = max(successful_times) if successful_times else 0.0 @@ -101,6 +106,7 @@ async def evaluate_model(model_wrapper: ModelWrapper, dataset, max_samples: Opti 'accuracy': accuracy, 'failure_rate': error_rate, 'avg_prediction_time': avg_prediction_time, + 'median_prediction_time': median_prediction_time, 'min_prediction_time': min_prediction_time, 'max_prediction_time': max_prediction_time, 'vram_max_mb': vram_stats['max_mb'], @@ -113,6 +119,17 @@ async def main(): """ Main function to run the benchmark. """ + # Parse command line arguments + parser = argparse.ArgumentParser(description='ScreenSpot-Pro Benchmark Script') + parser.add_argument('--samples', type=int, default=300, + help='Number of samples to evaluate (default: 300)') + parser.add_argument('--seed', type=int, default=42, + help='Random seed for shuffling (default: 42)') + args = parser.parse_args() + + # Set random seed + random.seed(args.seed) + # Load dataset print("Loading ScreenSpot-Pro dataset...") ds = load_dataset("lmms-lab/ScreenSpot-Pro") @@ -121,11 +138,15 @@ async def main(): dataset_list = list(dataset) print(f"Dataset loaded: {len(dataset_list)} samples") + # Shuffle dataset with seed + random.shuffle(dataset_list) + print(f"Dataset shuffled with seed {args.seed}") + # Get available models models = get_available_models() # Evaluation settings - max_samples = 300 # Set to None to evaluate on full dataset + max_samples = args.samples # Use command line argument # Run evaluations all_results = [] @@ -142,9 +163,15 @@ async def main(): print(f" Errors: {result['failed_predictions']}") print(f" Error Rate: {result['failure_rate']*100:.2f}%") print(f" Avg Time: {result['avg_prediction_time']:.2f}s") + print(f" Median Time: {result['median_prediction_time']:.2f}s") print(f" Time Range: {result['min_prediction_time']:.2f}s - {result['max_prediction_time']:.2f}s") print(f" VRAM Max: {result['vram_max_mb']:.1f}MB") print(f" VRAM Avg: {result['vram_avg_mb']:.1f}MB") + + # Print GPU memory info + gpu_memory = get_gpu_memory() + if gpu_memory and gpu_memory[0] > 0: + print(f" GPU Free Memory: {gpu_memory[0]:.1f}MB") # Save results if all_results: diff --git a/libs/python/agent/benchmarks/ss-v2.py b/libs/python/agent/benchmarks/ss-v2.py index 919a1001..dab1d4b1 100644 --- a/libs/python/agent/benchmarks/ss-v2.py +++ b/libs/python/agent/benchmarks/ss-v2.py @@ -1,12 +1,15 @@ #!/usr/bin/env python3 """ -ScreenSpot-Pro Benchmark Script +ScreenSpot-v2 Benchmark Script -Evaluates models on the ScreenSpot-Pro dataset for click prediction accuracy. +Evaluates models on the ScreenSpot-v2 dataset for click prediction accuracy. Supports both ComputerAgent model strings and custom model classes. """ +import argparse import asyncio +import random +import statistics import time from typing import Optional @@ -18,7 +21,8 @@ from utils import ( is_click_in_bbox, save_results_to_markdown, save_visualizations, - get_available_models + get_available_models, + get_gpu_memory ) @@ -93,6 +97,7 @@ async def evaluate_model(model_wrapper: ModelWrapper, samples, max_samples: Opti # Calculate timing statistics successful_times = [r['prediction_time'] for r in results if not r['failed']] avg_prediction_time = sum(successful_times) / len(successful_times) if successful_times else 0.0 + median_prediction_time = statistics.median(successful_times) if successful_times else 0.0 min_prediction_time = min(successful_times) if successful_times else 0.0 max_prediction_time = max(successful_times) if successful_times else 0.0 @@ -107,6 +112,7 @@ async def evaluate_model(model_wrapper: ModelWrapper, samples, max_samples: Opti 'accuracy': accuracy, 'failure_rate': error_rate, 'avg_prediction_time': avg_prediction_time, + 'median_prediction_time': median_prediction_time, 'min_prediction_time': min_prediction_time, 'max_prediction_time': max_prediction_time, 'vram_max_mb': vram_stats['max_mb'], @@ -119,6 +125,17 @@ async def main(): """ Main function to run the benchmark. """ + # Parse command line arguments + parser = argparse.ArgumentParser(description='ScreenSpot-v2 Benchmark Script') + parser.add_argument('--samples', type=int, default=500, + help='Number of samples to evaluate (default: 500)') + parser.add_argument('--seed', type=int, default=42, + help='Random seed for shuffling (default: 42)') + args = parser.parse_args() + + # Set random seed + random.seed(args.seed) + # Load dataset print("Loading ScreenSpot-v2 dataset...") ds = load_dataset("lmms-lab/ScreenSpot-v2") @@ -141,11 +158,15 @@ async def main(): }) print(f"Dataset loaded: {len(samples)} samples") + # Shuffle samples with seed + random.shuffle(samples) + print(f"Samples shuffled with seed {args.seed}") + # Get available models models = get_available_models() # Evaluation settings - max_samples = 500 # Set to None to evaluate on full dataset + max_samples = args.samples # Use command line argument # Run evaluations all_results = [] @@ -162,9 +183,15 @@ async def main(): print(f" Errors: {result['failed_predictions']}") print(f" Error Rate: {result['failure_rate']*100:.2f}%") print(f" Avg Time: {result['avg_prediction_time']:.2f}s") + print(f" Median Time: {result['median_prediction_time']:.2f}s") print(f" Time Range: {result['min_prediction_time']:.2f}s - {result['max_prediction_time']:.2f}s") print(f" VRAM Max: {result['vram_max_mb']:.1f}MB") print(f" VRAM Avg: {result['vram_avg_mb']:.1f}MB") + + # Print GPU memory info + gpu_memory = get_gpu_memory() + if gpu_memory and gpu_memory[0] > 0: + print(f" GPU Free Memory: {gpu_memory[0]:.1f}MB") # Save results if all_results: diff --git a/libs/python/agent/benchmarks/utils.py b/libs/python/agent/benchmarks/utils.py index 099499a5..7a3b70a3 100644 --- a/libs/python/agent/benchmarks/utils.py +++ b/libs/python/agent/benchmarks/utils.py @@ -7,6 +7,8 @@ import asyncio import base64 import os import sys +import subprocess as sp +import statistics from datetime import datetime from io import BytesIO from typing import List, Union, Tuple, Optional @@ -22,6 +24,28 @@ from agent.agent import ComputerAgent from models import GTA1Model from models.base import ModelProtocol +def get_gpu_memory() -> List[int]: + """ + Get GPU memory usage using nvidia-smi. + + Returns: + List of free memory values in MB for each GPU + """ + try: + command = "nvidia-smi --query-gpu=memory.free --format=csv" + memory_free_info = sp.check_output(command.split()).decode('ascii').split('\n')[:-1][1:] + memory_free_values = [int(x.split()[0]) for i, x in enumerate(memory_free_info)] + return memory_free_values + except (sp.CalledProcessError, FileNotFoundError, IndexError): + # Fallback to torch if nvidia-smi is not available + if torch.cuda.is_available(): + device = torch.cuda.current_device() + total = torch.cuda.get_device_properties(device).total_memory / 1024 / 1024 + reserved = torch.cuda.memory_reserved(device) / 1024 / 1024 + return [int(total - reserved)] + return [0] + + def get_vram_usage() -> dict: """ Get current VRAM usage statistics. @@ -61,11 +85,11 @@ def get_available_models() -> List[Union[str, ModelProtocol]]: models = [ # === ComputerAgent model strings === f"{local_provider}HelloKKMe/GTA1-7B", - f"{local_provider}HelloKKMe/GTA1-32B", + # f"{local_provider}HelloKKMe/GTA1-32B", # === Reference model classes === GTA1Model("HelloKKMe/GTA1-7B"), - GTA1Model("HelloKKMe/GTA1-32B"), + # GTA1Model("HelloKKMe/GTA1-32B"), ] return models @@ -203,8 +227,8 @@ def save_results_to_markdown(all_results: List[dict],output_file: str = "screens # Summary table f.write("## Summary\n\n") - f.write("| Model | Total Samples | Correct | Errors | Accuracy | Error Rate | Avg Time (s) | Time Range (s) | VRAM Max (GB) | VRAM Avg (GB) |\n") - f.write("|-------|---------------|---------|--------|----------|------------|--------------|----------------|---------------|---------------|\n") + f.write("| Model | Total Samples | Correct | Errors | Accuracy | Error Rate | Avg Time (s) | Median Time (s) | Time Range (s) | VRAM Max (GB) | VRAM Avg (GB) |\n") + f.write("|-------|---------------|---------|--------|----------|------------|--------------|-----------------|----------------|---------------|---------------|\n") for result in all_results: model_name = result['model_name'] @@ -214,13 +238,14 @@ def save_results_to_markdown(all_results: List[dict],output_file: str = "screens accuracy = result['accuracy'] * 100 error_rate = result['failure_rate'] * 100 avg_time = result.get('avg_prediction_time', 0.0) + median_time = result.get('median_prediction_time', 0.0) min_time = result.get('min_prediction_time', 0.0) max_time = result.get('max_prediction_time', 0.0) time_range = f"{min_time:.2f} - {max_time:.2f}" vram_max = result.get('vram_max_mb', 0.0) / 1024 vram_avg = result.get('vram_avg_mb', 0.0) / 1024 - f.write(f"| {model_name} | {total} | {correct} | {errors} | {accuracy:.2f}% | {error_rate:.2f}% | {avg_time:.2f} | {time_range} | {vram_max:.1f} | {vram_avg:.1f} |\n") + f.write(f"| {model_name} | {total} | {correct} | {errors} | {accuracy:.2f}% | {error_rate:.2f}% | {avg_time:.2f} | {median_time:.2f} | {time_range} | {vram_max:.1f} | {vram_avg:.1f} |\n") # Detailed results for each model for result in all_results: From a98acf96e9e8951f1bf74079180ae91d30f617eb Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Wed, 30 Jul 2025 16:18:12 -0400 Subject: [PATCH 06/76] updated docs --- libs/python/agent/benchmarks/README.md | 68 +--------- libs/python/agent/benchmarks/contrib.md | 163 ++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 67 deletions(-) create mode 100644 libs/python/agent/benchmarks/contrib.md diff --git a/libs/python/agent/benchmarks/README.md b/libs/python/agent/benchmarks/README.md index 225fc30b..3df840c1 100644 --- a/libs/python/agent/benchmarks/README.md +++ b/libs/python/agent/benchmarks/README.md @@ -31,68 +31,6 @@ The benchmark system evaluates models on GUI grounding tasks, specifically click - `quit`/`exit` → exit tool - **Output**: Visual predictions with crosshairs for each model -## Adding Reference Agent Implementations - -### 1. Implement the ModelProtocol - -Create a new file in `models/` directory implementing the `ModelProtocol`: - -```python -from models.base import ModelProtocol -from typing import Optional, Tuple -from PIL import Image - -class YourModelName(ModelProtocol): - def __init__(self, model_path: str): - self.model_path = model_path - self._model = None - - @property - def model_name(self) -> str: - return self.model_path - - async def load_model(self) -> None: - """Load the model into memory.""" - # Your model loading logic here - pass - - async def unload_model(self) -> None: - """Unload the model from memory.""" - # Your model cleanup logic here - pass - - async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: - """ - Predict click coordinates for the given image and instruction. - - Args: - image: PIL Image to analyze - instruction: Text instruction describing what to click - - Returns: - Tuple of (x, y) coordinates or None if prediction fails - """ - # Your prediction logic here - return (x, y) # Return predicted coordinates -``` - -### 2. Register Your Model - -Add your model to the `get_available_models()` function in `utils.py`: - -```python -def get_available_models() -> List[Union[str, ModelProtocol]]: - models = [ - # Computer Agent SDK providers - "huggingface-local/HelloKKMe/GTA1-7B", - - # Reference implementations - GTA1Model("HelloKKMe/GTA1-7B"), - YourModelName("path/to/your/model"), # Add your model here - ] - return models -``` - ## Running Benchmarks ### 1. Configure Models @@ -142,8 +80,4 @@ The benchmark system is designed for: ## Contributing -To add a new benchmark: -1. Create a new script following the pattern in `ss-v2.py` -2. Use the `evaluate_model()` function from utils -3. Ensure your dataset yields dicts with `image`, `bbox`, `instruction` keys -4. Update this README with benchmark details +To add a new reference model, follow the instructions in [contrib.md](contrib.md). \ No newline at end of file diff --git a/libs/python/agent/benchmarks/contrib.md b/libs/python/agent/benchmarks/contrib.md new file mode 100644 index 00000000..0bef9077 --- /dev/null +++ b/libs/python/agent/benchmarks/contrib.md @@ -0,0 +1,163 @@ +# Contributing Reference Agent Implementations + +This guide explains how to add your own reference agent implementations to the benchmark system. + +## Adding Reference Agent Implementations + +### 1. Implement the ModelProtocol + +Create a new file in `models/` directory implementing the `ModelProtocol`: + +```python +from models.base import ModelProtocol +from typing import Optional, Tuple +from PIL import Image + +class YourModelName(ModelProtocol): + def __init__(self, model_path: str): + self.model_path = model_path + self._model = None + + @property + def model_name(self) -> str: + return self.model_path + + async def load_model(self) -> None: + """Load the model into memory.""" + # Your model loading logic here + pass + + async def unload_model(self) -> None: + """Unload the model from memory.""" + # Your model cleanup logic here + pass + + async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates for the given image and instruction. + + Args: + image: PIL Image to analyze + instruction: Text instruction describing what to click + + Returns: + Tuple of (x, y) coordinates or None if prediction fails + """ + # Your prediction logic here + return (x, y) # Return predicted coordinates +``` + +### 2. Register Your Model + +Add your model to the `get_available_models()` function in `utils.py`: + +```python +def get_available_models() -> List[Union[str, ModelProtocol]]: + models = [ + # Computer Agent SDK providers + "huggingface-local/HelloKKMe/GTA1-7B", + + # Reference implementations + GTA1Model("HelloKKMe/GTA1-7B"), + YourModelName("path/to/your/model"), # Add your model here + ] + return models +``` + +### 3. Test Your Implementation + +Before submitting, test your model with the interactive tool: + +```bash +python interactive.py +``` + +This will help you verify that your model loads correctly and produces reasonable predictions. + +## Example: Adding a New Model + +Here's a complete example of adding a hypothetical "MyVisionModel": + +1. **Create `models/my_vision_model.py`:** +```python +import torch +from transformers import AutoModel, AutoProcessor +from models.base import ModelProtocol +from typing import Optional, Tuple +from PIL import Image + +class MyVisionModel(ModelProtocol): + def __init__(self, model_path: str): + self.model_path = model_path + self.model = None + self.processor = None + + @property + def model_name(self) -> str: + return f"MyVisionModel({self.model_path})" + + async def load_model(self) -> None: + """Load the model and processor.""" + self.processor = AutoProcessor.from_pretrained(self.model_path) + self.model = AutoModel.from_pretrained( + self.model_path, + torch_dtype=torch.float16, + device_map="auto" + ) + + async def unload_model(self) -> None: + """Clean up model resources.""" + del self.model + del self.processor + self.model = None + self.processor = None + torch.cuda.empty_cache() + + async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]: + """Predict click coordinates.""" + try: + # Preprocess inputs + inputs = self.processor( + text=instruction, + images=image, + return_tensors="pt" + ) + + # Run inference + with torch.no_grad(): + outputs = self.model(**inputs) + + # Extract coordinates (model-specific logic) + x, y = self._extract_coordinates(outputs) + return (int(x), int(y)) + + except Exception as e: + print(f"Prediction failed: {e}") + return None + + def _extract_coordinates(self, outputs): + """Extract x, y coordinates from model outputs.""" + # Your model-specific coordinate extraction logic + pass +``` + +2. **Update `models/__init__.py`:** +```python +from .gta1 import GTA1Model +from .my_vision_model import MyVisionModel + +__all__ = ["GTA1Model", "MyVisionModel"] +``` + +3. **Update `utils.py`:** +```python +from models import GTA1Model, MyVisionModel + +def get_available_models() -> List[Union[str, ModelProtocol]]: + models = [ + "huggingface-local/HelloKKMe/GTA1-7B", + GTA1Model("HelloKKMe/GTA1-7B"), + MyVisionModel("my-org/my-vision-model"), # Add here + ] + return models +``` From 5902be2917dd8b5e2f6206ffe8f141fb01e867d3 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Wed, 30 Jul 2025 16:19:37 -0400 Subject: [PATCH 07/76] updated docs --- libs/python/agent/benchmarks/README.md | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/libs/python/agent/benchmarks/README.md b/libs/python/agent/benchmarks/README.md index 3df840c1..03d1a789 100644 --- a/libs/python/agent/benchmarks/README.md +++ b/libs/python/agent/benchmarks/README.md @@ -25,8 +25,8 @@ The benchmark system evaluates models on GUI grounding tasks, specifically click ### 3. Interactive Testing (`interactive.py`) - **Real-time testing**: Take screenshots and visualize model predictions - **Commands**: - - Type instruction → screenshot + test all models - - `screenshot` → take screenshot without prediction + - Type instruction → test all models on last screenshot + - `screenshot` → take screenshot - `models` → list available models - `quit`/`exit` → exit tool - **Output**: Visual predictions with crosshairs for each model @@ -63,21 +63,6 @@ Model Results: - **Visualizations**: `output/` directory with prediction visualizations - **Interactive Output**: `interactive_output/` for interactive session results -## Metrics Tracked - -- **Accuracy**: Percentage of clicks within bounding boxes -- **Timing**: Average, min, max prediction times -- **VRAM Usage**: Maximum and average GPU memory usage -- **Per-sample Results**: Detailed breakdown for debugging - -## Architecture - -The benchmark system is designed for: -- **Modularity**: Easy to add new models and benchmarks -- **Flexibility**: Works with any iterator of dicts with `image`, `bbox`, `instruction` keys -- **Performance**: VRAM tracking and timing analysis -- **Visualization**: Automatic generation of prediction visualizations - ## Contributing To add a new reference model, follow the instructions in [contrib.md](contrib.md). \ No newline at end of file From 5bfadf8f9ada07926fdcfe7f10929ef66a092544 Mon Sep 17 00:00:00 2001 From: ddupont <3820588+ddupont808@users.noreply.github.com> Date: Fri, 1 Aug 2025 11:46:32 -0400 Subject: [PATCH 08/76] Update pii_anonymization.py --- .../agent/callbacks/pii_anonymization.py | 179 +----------------- 1 file changed, 8 insertions(+), 171 deletions(-) diff --git a/libs/python/agent/agent/callbacks/pii_anonymization.py b/libs/python/agent/agent/callbacks/pii_anonymization.py index f5c31a61..68f4b2fc 100644 --- a/libs/python/agent/agent/callbacks/pii_anonymization.py +++ b/libs/python/agent/agent/callbacks/pii_anonymization.py @@ -9,10 +9,7 @@ import io import logging try: - from presidio_analyzer import AnalyzerEngine - from presidio_anonymizer import AnonymizerEngine, DeanonymizeEngine - from presidio_anonymizer.entities import RecognizerResult, OperatorConfig - from presidio_image_redactor import ImageRedactorEngine + # TODO: Add Presidio dependencies from PIL import Image PRESIDIO_AVAILABLE = True except ImportError: @@ -32,11 +29,7 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): def __init__( self, - anonymize_text: bool = True, - anonymize_images: bool = True, - entities_to_anonymize: Optional[List[str]] = None, - anonymization_operator: str = "replace", - image_redaction_color: Tuple[int, int, int] = (255, 192, 203) # Pink + # TODO: Any extra kwargs if needed ): """ Initialize the PII anonymization callback. @@ -51,23 +44,10 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): if not PRESIDIO_AVAILABLE: raise ImportError( "Presidio is not available. Install with: " - "pip install presidio-analyzer presidio-anonymizer presidio-image-redactor" + "pip install cua-agent[pii-anonymization]" ) - self.anonymize_text = anonymize_text - self.anonymize_images = anonymize_images - self.entities_to_anonymize = entities_to_anonymize - self.anonymization_operator = anonymization_operator - self.image_redaction_color = image_redaction_color - - # Initialize Presidio engines - self.analyzer = AnalyzerEngine() - self.anonymizer = AnonymizerEngine() - self.deanonymizer = DeanonymizeEngine() - self.image_redactor = ImageRedactorEngine() - - # Store anonymization mappings for deanonymization - self.anonymization_mappings: Dict[str, Any] = {} + # TODO: Implement __init__ async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ @@ -79,9 +59,6 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): Returns: List of messages with PII anonymized """ - if not self.anonymize_text and not self.anonymize_images: - return messages - anonymized_messages = [] for msg in messages: anonymized_msg = await self._anonymize_message(msg) @@ -99,9 +76,6 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): Returns: List of output with PII deanonymized for tool calls """ - if not self.anonymize_text: - return output - deanonymized_output = [] for item in output: # Only deanonymize tool calls and computer_call messages @@ -114,146 +88,9 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): return deanonymized_output async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]: - """Anonymize PII in a single message.""" - msg_copy = message.copy() - - # Anonymize text content - if self.anonymize_text: - msg_copy = await self._anonymize_text_content(msg_copy) - - # Redact images in computer_call_output - if self.anonymize_images and msg_copy.get("type") == "computer_call_output": - msg_copy = await self._redact_image_content(msg_copy) - - return msg_copy - - async def _anonymize_text_content(self, message: Dict[str, Any]) -> Dict[str, Any]: - """Anonymize text content in a message.""" - msg_copy = message.copy() - - # Handle content array - content = msg_copy.get("content", []) - if isinstance(content, str): - anonymized_text, _ = await self._anonymize_text(content) - msg_copy["content"] = anonymized_text - elif isinstance(content, list): - anonymized_content = [] - for item in content: - if isinstance(item, dict) and item.get("type") == "text": - text = item.get("text", "") - anonymized_text, _ = await self._anonymize_text(text) - item_copy = item.copy() - item_copy["text"] = anonymized_text - anonymized_content.append(item_copy) - else: - anonymized_content.append(item) - msg_copy["content"] = anonymized_content - - return msg_copy - - async def _redact_image_content(self, message: Dict[str, Any]) -> Dict[str, Any]: - """Redact PII from images in computer_call_output messages.""" - msg_copy = message.copy() - output = msg_copy.get("output", {}) - - if isinstance(output, dict) and "image_url" in output: - try: - # Extract base64 image data - image_url = output["image_url"] - if image_url.startswith("data:image/"): - # Parse data URL - header, data = image_url.split(",", 1) - image_data = base64.b64decode(data) - - # Load image with PIL - image = Image.open(io.BytesIO(image_data)) - - # Redact PII from image - redacted_image = self.image_redactor.redact(image, self.image_redaction_color) - - # Convert back to base64 - buffer = io.BytesIO() - redacted_image.save(buffer, format="PNG") - redacted_data = base64.b64encode(buffer.getvalue()).decode() - - # Update image URL - output_copy = output.copy() - output_copy["image_url"] = f"data:image/png;base64,{redacted_data}" - msg_copy["output"] = output_copy - - except Exception as e: - logger.warning(f"Failed to redact image: {e}") - - return msg_copy + # TODO: Implement _anonymize_message + return message async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]: - """Deanonymize PII in tool calls and computer outputs.""" - item_copy = item.copy() - - # Handle computer_call arguments - if item.get("type") == "computer_call": - args = item_copy.get("args", {}) - if isinstance(args, dict): - deanonymized_args = {} - for key, value in args.items(): - if isinstance(value, str): - deanonymized_value, _ = await self._deanonymize_text(value) - deanonymized_args[key] = deanonymized_value - else: - deanonymized_args[key] = value - item_copy["args"] = deanonymized_args - - return item_copy - - async def _anonymize_text(self, text: str) -> Tuple[str, List[RecognizerResult]]: - """Anonymize PII in text and return the anonymized text and results.""" - if not text.strip(): - return text, [] - - try: - # Analyze text for PII - analyzer_results = self.analyzer.analyze( - text=text, - entities=self.entities_to_anonymize, - language="en" - ) - - if not analyzer_results: - return text, [] - - # Anonymize the text - anonymized_result = self.anonymizer.anonymize( - text=text, - analyzer_results=analyzer_results, - operators={entity_type: OperatorConfig(self.anonymization_operator) - for entity_type in set(result.entity_type for result in analyzer_results)} - ) - - # Store mapping for deanonymization - mapping_key = str(hash(text)) - self.anonymization_mappings[mapping_key] = { - "original": text, - "anonymized": anonymized_result.text, - "results": analyzer_results - } - - return anonymized_result.text, analyzer_results - - except Exception as e: - logger.warning(f"Failed to anonymize text: {e}") - return text, [] - - async def _deanonymize_text(self, text: str) -> Tuple[str, bool]: - """Attempt to deanonymize text using stored mappings.""" - try: - # Look for matching anonymized text in mappings - for mapping_key, mapping in self.anonymization_mappings.items(): - if mapping["anonymized"] == text: - return mapping["original"], True - - # If no mapping found, return original text - return text, False - - except Exception as e: - logger.warning(f"Failed to deanonymize text: {e}") - return text, False + # TODO: Implement _deanonymize_item + return item From d5564977f0004e840b23362d6a2e6ee7ca0f0d8d Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 1 Aug 2025 15:49:19 -0400 Subject: [PATCH 09/76] working gta1 loop --- libs/python/agent/agent/agent.py | 34 +- .../agent/callbacks/pii_anonymization.py | 179 +------ libs/python/agent/agent/cli.py | 4 +- libs/python/agent/agent/computer_handler.py | 4 +- libs/python/agent/agent/loops/gta1.py | 504 +++++++++++++++--- libs/python/agent/agent/loops/model_types.csv | 6 + libs/python/agent/agent/loops/omniparser.py | 5 +- libs/python/agent/example.py | 3 +- libs/python/agent/pyproject.toml | 2 +- 9 files changed, 484 insertions(+), 257 deletions(-) create mode 100644 libs/python/agent/agent/loops/model_types.csv diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py index efacea45..06f617c2 100644 --- a/libs/python/agent/agent/agent.py +++ b/libs/python/agent/agent/agent.py @@ -117,6 +117,13 @@ def sanitize_message(msg: Any) -> Any: return sanitized return msg +def get_output_call_ids(messages: List[Dict[str, Any]]) -> List[str]: + call_ids = [] + for message in messages: + if message.get("type") == "computer_call_output" or message.get("type") == "function_call_output": + call_ids.append(message.get("call_id")) + return call_ids + class ComputerAgent: """ Main agent class that automatically selects the appropriate agent loop @@ -207,6 +214,7 @@ class ComputerAgent: litellm.custom_provider_map = [ {"provider": "huggingface-local", "custom_handler": hf_adapter} ] + litellm.suppress_debug_info = True # == Initialize computer agent == @@ -390,8 +398,10 @@ class ComputerAgent: # AGENT OUTPUT PROCESSING # ============================================================================ - async def _handle_item(self, item: Any, computer: Optional[Computer] = None) -> List[Dict[str, Any]]: + async def _handle_item(self, item: Any, computer: Optional[Computer] = None, ignore_call_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]: """Handle each item; may cause a computer action + screenshot.""" + if ignore_call_ids and item.get("call_id") and item.get("call_id") in ignore_call_ids: + return [] item_type = item.get("type", None) @@ -437,7 +447,7 @@ class ComputerAgent: acknowledged_checks = [] for check in pending_checks: check_message = check.get("message", str(check)) - if acknowledge_safety_check_callback(check_message): + if acknowledge_safety_check_callback(check_message, allow_always=True): # TODO: implement a callback for safety checks acknowledged_checks.append(check) else: raise ValueError(f"Safety check failed: {check_message}") @@ -512,9 +522,12 @@ class ComputerAgent: Returns: AsyncGenerator that yields response chunks """ + if not self.agent_config_info: + raise ValueError("Agent configuration not found") + capabilities = self.get_capabilities() if "step" not in capabilities: - raise ValueError(f"Agent loop {self.agent_loop.__name__} does not support step predictions") + raise ValueError(f"Agent loop {self.agent_config_info.agent_class.__name__} does not support step predictions") await self._initialize_computers() @@ -529,7 +542,7 @@ class ComputerAgent: "messages": messages, "stream": stream, "model": self.model, - "agent_loop": self.agent_loop.__name__, + "agent_loop": self.agent_config_info.agent_class.__name__, **merged_kwargs } await self._on_run_start(run_kwargs, old_items) @@ -580,9 +593,12 @@ class ComputerAgent: # Add agent response to new_items new_items += result.get("output") + # Get output call ids + output_call_ids = get_output_call_ids(result.get("output", [])) + # Handle computer actions for item in result.get("output"): - partial_items = await self._handle_item(item, self.computer_handler) + partial_items = await self._handle_item(item, self.computer_handler, ignore_call_ids=output_call_ids) new_items += partial_items # Yield partial response @@ -612,9 +628,12 @@ class ComputerAgent: Returns: None or tuple with (x, y) coordinates """ + if not self.agent_config_info: + raise ValueError("Agent configuration not found") + capabilities = self.get_capabilities() if "click" not in capabilities: - raise ValueError(f"Agent loop {self.agent_loop.__name__} does not support click predictions") + raise ValueError(f"Agent loop {self.agent_config_info.agent_class.__name__} does not support click predictions") if hasattr(self.agent_loop, 'predict_click'): if not image_b64: if not self.computer_handler: @@ -634,6 +653,9 @@ class ComputerAgent: Returns: List of capability strings (e.g., ["step", "click"]) """ + if not self.agent_config_info: + raise ValueError("Agent configuration not found") + if hasattr(self.agent_loop, 'get_capabilities'): return self.agent_loop.get_capabilities() return ["step"] # Default capability \ No newline at end of file diff --git a/libs/python/agent/agent/callbacks/pii_anonymization.py b/libs/python/agent/agent/callbacks/pii_anonymization.py index f5c31a61..592b7273 100644 --- a/libs/python/agent/agent/callbacks/pii_anonymization.py +++ b/libs/python/agent/agent/callbacks/pii_anonymization.py @@ -9,10 +9,7 @@ import io import logging try: - from presidio_analyzer import AnalyzerEngine - from presidio_anonymizer import AnonymizerEngine, DeanonymizeEngine - from presidio_anonymizer.entities import RecognizerResult, OperatorConfig - from presidio_image_redactor import ImageRedactorEngine + # TODO: Add Presidio dependencies from PIL import Image PRESIDIO_AVAILABLE = True except ImportError: @@ -32,11 +29,7 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): def __init__( self, - anonymize_text: bool = True, - anonymize_images: bool = True, - entities_to_anonymize: Optional[List[str]] = None, - anonymization_operator: str = "replace", - image_redaction_color: Tuple[int, int, int] = (255, 192, 203) # Pink + # TODO: Any extra kwargs if needed ): """ Initialize the PII anonymization callback. @@ -51,23 +44,10 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): if not PRESIDIO_AVAILABLE: raise ImportError( "Presidio is not available. Install with: " - "pip install presidio-analyzer presidio-anonymizer presidio-image-redactor" + "pip install cua-agent[pii-anonymization]" ) - self.anonymize_text = anonymize_text - self.anonymize_images = anonymize_images - self.entities_to_anonymize = entities_to_anonymize - self.anonymization_operator = anonymization_operator - self.image_redaction_color = image_redaction_color - - # Initialize Presidio engines - self.analyzer = AnalyzerEngine() - self.anonymizer = AnonymizerEngine() - self.deanonymizer = DeanonymizeEngine() - self.image_redactor = ImageRedactorEngine() - - # Store anonymization mappings for deanonymization - self.anonymization_mappings: Dict[str, Any] = {} + # TODO: Implement __init__ async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ @@ -79,9 +59,6 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): Returns: List of messages with PII anonymized """ - if not self.anonymize_text and not self.anonymize_images: - return messages - anonymized_messages = [] for msg in messages: anonymized_msg = await self._anonymize_message(msg) @@ -99,9 +76,6 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): Returns: List of output with PII deanonymized for tool calls """ - if not self.anonymize_text: - return output - deanonymized_output = [] for item in output: # Only deanonymize tool calls and computer_call messages @@ -114,146 +88,9 @@ class PIIAnonymizationCallback(AsyncCallbackHandler): return deanonymized_output async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]: - """Anonymize PII in a single message.""" - msg_copy = message.copy() - - # Anonymize text content - if self.anonymize_text: - msg_copy = await self._anonymize_text_content(msg_copy) - - # Redact images in computer_call_output - if self.anonymize_images and msg_copy.get("type") == "computer_call_output": - msg_copy = await self._redact_image_content(msg_copy) - - return msg_copy - - async def _anonymize_text_content(self, message: Dict[str, Any]) -> Dict[str, Any]: - """Anonymize text content in a message.""" - msg_copy = message.copy() - - # Handle content array - content = msg_copy.get("content", []) - if isinstance(content, str): - anonymized_text, _ = await self._anonymize_text(content) - msg_copy["content"] = anonymized_text - elif isinstance(content, list): - anonymized_content = [] - for item in content: - if isinstance(item, dict) and item.get("type") == "text": - text = item.get("text", "") - anonymized_text, _ = await self._anonymize_text(text) - item_copy = item.copy() - item_copy["text"] = anonymized_text - anonymized_content.append(item_copy) - else: - anonymized_content.append(item) - msg_copy["content"] = anonymized_content - - return msg_copy - - async def _redact_image_content(self, message: Dict[str, Any]) -> Dict[str, Any]: - """Redact PII from images in computer_call_output messages.""" - msg_copy = message.copy() - output = msg_copy.get("output", {}) - - if isinstance(output, dict) and "image_url" in output: - try: - # Extract base64 image data - image_url = output["image_url"] - if image_url.startswith("data:image/"): - # Parse data URL - header, data = image_url.split(",", 1) - image_data = base64.b64decode(data) - - # Load image with PIL - image = Image.open(io.BytesIO(image_data)) - - # Redact PII from image - redacted_image = self.image_redactor.redact(image, self.image_redaction_color) - - # Convert back to base64 - buffer = io.BytesIO() - redacted_image.save(buffer, format="PNG") - redacted_data = base64.b64encode(buffer.getvalue()).decode() - - # Update image URL - output_copy = output.copy() - output_copy["image_url"] = f"data:image/png;base64,{redacted_data}" - msg_copy["output"] = output_copy - - except Exception as e: - logger.warning(f"Failed to redact image: {e}") - - return msg_copy + # TODO: Implement _anonymize_message + return message async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]: - """Deanonymize PII in tool calls and computer outputs.""" - item_copy = item.copy() - - # Handle computer_call arguments - if item.get("type") == "computer_call": - args = item_copy.get("args", {}) - if isinstance(args, dict): - deanonymized_args = {} - for key, value in args.items(): - if isinstance(value, str): - deanonymized_value, _ = await self._deanonymize_text(value) - deanonymized_args[key] = deanonymized_value - else: - deanonymized_args[key] = value - item_copy["args"] = deanonymized_args - - return item_copy - - async def _anonymize_text(self, text: str) -> Tuple[str, List[RecognizerResult]]: - """Anonymize PII in text and return the anonymized text and results.""" - if not text.strip(): - return text, [] - - try: - # Analyze text for PII - analyzer_results = self.analyzer.analyze( - text=text, - entities=self.entities_to_anonymize, - language="en" - ) - - if not analyzer_results: - return text, [] - - # Anonymize the text - anonymized_result = self.anonymizer.anonymize( - text=text, - analyzer_results=analyzer_results, - operators={entity_type: OperatorConfig(self.anonymization_operator) - for entity_type in set(result.entity_type for result in analyzer_results)} - ) - - # Store mapping for deanonymization - mapping_key = str(hash(text)) - self.anonymization_mappings[mapping_key] = { - "original": text, - "anonymized": anonymized_result.text, - "results": analyzer_results - } - - return anonymized_result.text, analyzer_results - - except Exception as e: - logger.warning(f"Failed to anonymize text: {e}") - return text, [] - - async def _deanonymize_text(self, text: str) -> Tuple[str, bool]: - """Attempt to deanonymize text using stored mappings.""" - try: - # Look for matching anonymized text in mappings - for mapping_key, mapping in self.anonymization_mappings.items(): - if mapping["anonymized"] == text: - return mapping["original"], True - - # If no mapping found, return original text - return text, False - - except Exception as e: - logger.warning(f"Failed to deanonymize text: {e}") - return text, False + # TODO: Implement _deanonymize_item + return item \ No newline at end of file diff --git a/libs/python/agent/agent/cli.py b/libs/python/agent/agent/cli.py index 6d767fc3..215c791c 100644 --- a/libs/python/agent/agent/cli.py +++ b/libs/python/agent/agent/cli.py @@ -120,7 +120,7 @@ async def ainput(prompt: str = ""): async def chat_loop(agent, model: str, container_name: str, initial_prompt: str = "", show_usage: bool = True): """Main chat loop with the agent.""" - print_welcome(model, agent.agent_loop.__name__, container_name) + print_welcome(model, agent.agent_config_info.agent_class.__name__, container_name) history = [] @@ -130,7 +130,7 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str total_cost = 0 while True: - if history[-1].get("role") != "user": + if len(history) == 0 or history[-1].get("role") != "user": # Get user input with prompt print_colored("> ", end="") user_input = await ainput() diff --git a/libs/python/agent/agent/computer_handler.py b/libs/python/agent/agent/computer_handler.py index 4a9f0186..1cf3c4d2 100644 --- a/libs/python/agent/agent/computer_handler.py +++ b/libs/python/agent/agent/computer_handler.py @@ -93,8 +93,10 @@ class OpenAIComputerHandler: return "" -def acknowledge_safety_check_callback(message: str) -> bool: +def acknowledge_safety_check_callback(message: str, allow_always: bool = False) -> bool: """Safety check callback for user acknowledgment.""" + if allow_always: + return True response = input( f"Safety Check Warning: {message}\nDo you want to acknowledge and proceed? (y/n): " ).lower() diff --git a/libs/python/agent/agent/loops/gta1.py b/libs/python/agent/agent/loops/gta1.py index fb272f30..29472107 100644 --- a/libs/python/agent/agent/loops/gta1.py +++ b/libs/python/agent/agent/loops/gta1.py @@ -10,8 +10,10 @@ import re import base64 from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple from io import BytesIO +import uuid from PIL import Image import litellm +import math from ..decorators import register_agent from ..types import Messages, AgentResponse, Tools, AgentCapability @@ -24,6 +26,73 @@ Output the coordinate pair exactly: (x,y) '''.strip() +# Global dictionary to map coordinates to descriptions +xy2desc: Dict[Tuple[float, float], str] = {} + +GTA1_TOOL_SCHEMA = { + "type": "function", + "name": "computer", + "description": "Control a computer by taking screenshots and interacting with UI elements. This tool uses element descriptions to locate and interact with UI elements on the screen (e.g., 'red submit button', 'search text field', 'hamburger menu icon', 'close button in top right corner').", + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": [ + "screenshot", + "click", + "double_click", + "drag", + "type", + "keypress", + "scroll", + "move", + "wait", + "get_current_url", + "get_dimensions", + "get_environment" + ], + "description": "The action to perform" + }, + "element_description": { + "type": "string", + "description": "Description of the element to interact with (required for click, double_click, move, scroll actions, and as start/end for drag)" + }, + "start_element_description": { + "type": "string", + "description": "Description of the element to start dragging from (required for drag action)" + }, + "end_element_description": { + "type": "string", + "description": "Description of the element to drag to (required for drag action)" + }, + "text": { + "type": "string", + "description": "The text to type (required for type action)" + }, + "keys": { + "type": "string", + "description": "Key combination to press (required for keypress action). Single key for individual key press, multiple keys for combinations (e.g., 'ctrl+c')" + }, + "button": { + "type": "string", + "description": "The mouse button to use for click action (left, right, wheel, back, forward) Default: left", + }, + "scroll_x": { + "type": "integer", + "description": "Horizontal scroll amount for scroll action (positive for right, negative for left)", + }, + "scroll_y": { + "type": "integer", + "description": "Vertical scroll amount for scroll action (positive for down, negative for up)", + }, + }, + "required": [ + "action" + ] + } +} + def extract_coordinates(raw_string: str) -> Tuple[float, float]: """Extract coordinates from model output.""" try: @@ -32,6 +101,173 @@ def extract_coordinates(raw_string: str) -> Tuple[float, float]: except: return (0.0, 0.0) +def get_last_computer_call_output(messages: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + """Get the last computer_call_output message from a messages list. + + Args: + messages: List of messages to search through + + Returns: + The last computer_call_output message dict, or None if not found + """ + for message in reversed(messages): + if isinstance(message, dict) and message.get("type") == "computer_call_output": + return message + return None + +def _prepare_tools_for_gta1(tool_schemas: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Prepare tools for GTA1 API format""" + gta1_tools = [] + + for schema in tool_schemas: + if schema["type"] == "computer": + gta1_tools.append(GTA1_TOOL_SCHEMA) + else: + gta1_tools.append(schema) + + return gta1_tools + +async def replace_function_with_computer_call_gta1(item: Dict[str, Any], agent_instance) -> List[Dict[str, Any]]: + """Convert function_call to computer_call format using GTA1 click prediction.""" + global xy2desc + item_type = item.get("type") + + async def _get_xy(element_description: Optional[str], last_image_b64: str) -> Union[Tuple[float, float], Tuple[None, None]]: + if element_description is None: + return (None, None) + # Use self.predict_click to get coordinates from description + coords = await agent_instance.predict_click( + model=agent_instance.current_model, + image_b64=last_image_b64, + instruction=element_description + ) + if coords: + # Store the mapping from coordinates to description + xy2desc[coords] = element_description + return coords + return (None, None) + + if item_type == "function_call": + fn_name = item.get("name") + fn_args = json.loads(item.get("arguments", "{}")) + + item_id = item.get("id") + call_id = item.get("call_id") + + if fn_name == "computer": + action = fn_args.get("action") + element_description = fn_args.get("element_description") + start_element_description = fn_args.get("start_element_description") + end_element_description = fn_args.get("end_element_description") + text = fn_args.get("text") + keys = fn_args.get("keys") + button = fn_args.get("button") + scroll_x = fn_args.get("scroll_x") + scroll_y = fn_args.get("scroll_y") + + # Get the last computer output image for click prediction + last_image_b64 = agent_instance.last_screenshot_b64 or "" + + x, y = await _get_xy(element_description, last_image_b64) + start_x, start_y = await _get_xy(start_element_description, last_image_b64) + end_x, end_y = await _get_xy(end_element_description, last_image_b64) + + action_args = { + "type": action, + "x": x, + "y": y, + "start_x": start_x, + "start_y": start_y, + "end_x": end_x, + "end_y": end_y, + "text": text, + "keys": keys, + "button": button, + "scroll_x": scroll_x, + "scroll_y": scroll_y + } + # Remove None values to keep the JSON clean + action_args = {k: v for k, v in action_args.items() if v is not None} + + return [{ + "type": "computer_call", + "action": action_args, + "id": item_id, + "call_id": call_id, + "status": "completed" + }] + + return [item] + +async def replace_computer_call_with_function_gta1(item: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Convert computer_call back to function_call format using descriptions. + Only READS from the global xy2desc dictionary. + + Args: + item: The item to convert + """ + global xy2desc + item_type = item.get("type") + + def _get_element_description(x: Optional[float], y: Optional[float]) -> Optional[str]: + """Get element description from coordinates, return None if coordinates are None""" + if x is None or y is None: + return None + return xy2desc.get((x, y)) + + if item_type == "computer_call": + action_data = item.get("action", {}) + + # Extract coordinates and convert back to element descriptions + element_description = _get_element_description(action_data.get("x"), action_data.get("y")) + start_element_description = _get_element_description(action_data.get("start_x"), action_data.get("start_y")) + end_element_description = _get_element_description(action_data.get("end_x"), action_data.get("end_y")) + + # Build function arguments + fn_args = { + "action": action_data.get("type"), + "element_description": element_description, + "start_element_description": start_element_description, + "end_element_description": end_element_description, + "text": action_data.get("text"), + "keys": action_data.get("keys"), + "button": action_data.get("button"), + "scroll_x": action_data.get("scroll_x"), + "scroll_y": action_data.get("scroll_y") + } + + # Remove None values to keep the JSON clean + fn_args = {k: v for k, v in fn_args.items() if v is not None} + + return [{ + "type": "function_call", + "name": "computer", + "arguments": json.dumps(fn_args), + "id": item.get("id"), + "call_id": item.get("call_id"), + "status": "completed", + # Fall back to string representation + # "content": f"Used tool: {action_data.get('type')}({json.dumps(fn_args)})" + }] + + elif item_type == "computer_call_output": + # Simple conversion: computer_call_output -> function_call_output (text only), user message (image) + return [ + { + "type": "function_call_output", + "call_id": item.get("call_id"), + "output": "Tool executed successfully. See the current computer screenshot below, if nothing has changed yet then you may need to wait before trying again.", + "id": item.get("id"), + "status": "completed" + }, { + "role": "user", + "content": [item.get("output")] + } + ] + + return [item] + def smart_resize(height: int, width: int, factor: int = 28, min_pixels: int = 3136, max_pixels: int = 8847360) -> Tuple[int, int]: """Smart resize function similar to qwen_vl_utils.""" # Calculate the total pixels @@ -64,10 +300,14 @@ def smart_resize(height: int, width: int, factor: int = 28, min_pixels: int = 31 return new_height, new_width -@register_agent(models=r".*GTA1-.*", priority=10) +@register_agent(models=r".*GTA1.*", priority=10) class GTA1Config(AsyncAgentConfig): """GTA1 agent configuration implementing AsyncAgentConfig protocol for click prediction.""" + def __init__(self): + self.current_model = None + self.last_screenshot_b64 = None + async def predict_step( self, messages: Messages, @@ -84,9 +324,136 @@ class GTA1Config(AsyncAgentConfig): **kwargs ) -> Dict[str, Any]: """ - GTA1 does not support step prediction - only click prediction. + GTA1 agent loop implementation using liteLLM responses with element descriptions. + + Follows the 4-step process: + 1. Prepare tools + 2. Replace computer calls with function calls (using descriptions) + 3. API call + 4. Replace function calls with computer calls (using predict_click) """ - raise NotImplementedError("GTA1 agent only supports click prediction via predict_click method") + models = model.split("+") + if len(models) != 2: + raise ValueError("GTA1 model must be in the format + to be used in an agent loop") + + gta1_model, llm_model = models + self.current_model = gta1_model + + tools = tools or [] + + # Step 0: Prepare tools + gta1_tools = _prepare_tools_for_gta1(tools) + + # Get last computer_call_output for screenshot reference + # Convert messages to list of dicts first + message_list = [] + for message in messages: + if not isinstance(message, dict): + message_list.append(message.__dict__) + else: + message_list.append(message) + + last_computer_call_output = get_last_computer_call_output(message_list) + if last_computer_call_output: + image_url = last_computer_call_output.get("output", {}).get("image_url", "") + if image_url.startswith("data:image/"): + self.last_screenshot_b64 = image_url.split(",")[-1] + else: + self.last_screenshot_b64 = image_url + + # Step 1: If there's no screenshot, simulate the model calling the screenshot function + pre_output = [] + if not self.last_screenshot_b64 and computer_handler: + screenshot_base64 = await computer_handler.screenshot() + if _on_screenshot: + await _on_screenshot(screenshot_base64, "screenshot_initial") + + call_id = uuid.uuid4().hex + pre_output += [ + { + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "Taking a screenshot to see the current computer screen." + } + ] + }, + { + "action": { + "type": "screenshot" + }, + "call_id": call_id, + "status": "completed", + "type": "computer_call" + }, + { + "type": "computer_call_output", + "call_id": call_id, + "output": { + "type": "input_image", + "image_url": f"data:image/png;base64,{screenshot_base64}" + } + }, + ] + + # Update the last screenshot for future use + self.last_screenshot_b64 = screenshot_base64 + + message_list += pre_output + + # Step 2: Replace computer calls with function calls (using descriptions) + new_messages = [] + for message in message_list: + new_messages += await replace_computer_call_with_function_gta1(message) + messages = new_messages + + # Step 3: API call + api_kwargs = { + "model": llm_model, + "input": messages, + "tools": gta1_tools if gta1_tools else None, + "stream": stream, + "truncation": "auto", + "num_retries": max_retries, + **kwargs + } + + # Call API start hook + if _on_api_start: + await _on_api_start(api_kwargs) + + # Use liteLLM responses + response = await litellm.aresponses(**api_kwargs) + + # Call API end hook + if _on_api_end: + await _on_api_end(api_kwargs, response) + + # Extract usage information + usage = { + **response.usage.model_dump(), # type: ignore + "response_cost": response._hidden_params.get("response_cost", 0.0), # type: ignore + } + if _on_usage: + await _on_usage(usage) + + # Step 4: Replace function calls with computer calls (using predict_click) + new_output = [] + for i in range(len(response.output)): # type: ignore + output_item = response.output[i] # type: ignore + # Convert to dict if it has model_dump method, otherwise use as-is + if hasattr(output_item, 'model_dump'): + item_dict = output_item.model_dump() # type: ignore + else: + item_dict = output_item # type: ignore + new_output += await replace_function_with_computer_call_gta1(item_dict, self) # type: ignore + + return { + "output": pre_output + new_output, + "usage": usage + } async def predict_click( self, @@ -106,75 +473,70 @@ class GTA1Config(AsyncAgentConfig): Returns: Tuple of (x, y) coordinates or None if prediction fails """ - try: - # Decode base64 image - image_data = base64.b64decode(image_b64) - image = Image.open(BytesIO(image_data)) - width, height = image.width, image.height - - # Smart resize the image (similar to qwen_vl_utils) - resized_height, resized_width = smart_resize( - height, width, - factor=28, # Default factor for Qwen models - min_pixels=3136, - max_pixels=4096 * 2160 - ) - resized_image = image.resize((resized_width, resized_height)) - scale_x, scale_y = width / resized_width, height / resized_height - - # Convert resized image back to base64 - buffered = BytesIO() - resized_image.save(buffered, format="PNG") - resized_image_b64 = base64.b64encode(buffered.getvalue()).decode() - - # Prepare system and user messages - system_message = { - "role": "system", - "content": SYSTEM_PROMPT.format(height=resized_height, width=resized_width) - } - - user_message = { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": f"data:image/png;base64,{resized_image_b64}" - } - }, - { - "type": "text", - "text": instruction + # Decode base64 image + image_data = base64.b64decode(image_b64) + image = Image.open(BytesIO(image_data)) + width, height = image.width, image.height + + # Smart resize the image (similar to qwen_vl_utils) + resized_height, resized_width = smart_resize( + height, width, + factor=28, # Default factor for Qwen models + min_pixels=3136, + max_pixels=4096 * 2160 + ) + resized_image = image.resize((resized_width, resized_height)) + scale_x, scale_y = width / resized_width, height / resized_height + + # Convert resized image back to base64 + buffered = BytesIO() + resized_image.save(buffered, format="PNG") + resized_image_b64 = base64.b64encode(buffered.getvalue()).decode() + + # Prepare system and user messages + system_message = { + "role": "system", + "content": SYSTEM_PROMPT.format(height=resized_height, width=resized_width) + } + + user_message = { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{resized_image_b64}" } - ] - } - - # Prepare API call kwargs - api_kwargs = { - "model": model, - "messages": [system_message, user_message], - "max_tokens": 32, - "temperature": 0.0, - **kwargs - } - - # Use liteLLM acompletion - response = await litellm.acompletion(**api_kwargs) - - # Extract response text - output_text = response.choices[0].message.content - - # Extract and rescale coordinates - pred_x, pred_y = extract_coordinates(output_text) - pred_x *= scale_x - pred_y *= scale_y - - return (pred_x, pred_y) - - except Exception as e: - print(f"GTA1 click prediction failed: {e}") - return None + }, + { + "type": "text", + "text": instruction + } + ] + } + + # Prepare API call kwargs + api_kwargs = { + "model": model, + "messages": [system_message, user_message], + "max_tokens": 32, + "temperature": 0.0, + **kwargs + } + + # Use liteLLM acompletion + response = await litellm.acompletion(**api_kwargs) + + # Extract response text + output_text = response.choices[0].message.content + + # Extract and rescale coordinates + pred_x, pred_y = extract_coordinates(output_text) + pred_x *= scale_x + pred_y *= scale_y + + return (math.floor(pred_x), math.floor(pred_y)) def get_capabilities(self) -> List[AgentCapability]: """Return the capabilities supported by this agent.""" - return ["click"] + return ["click", "step"] diff --git a/libs/python/agent/agent/loops/model_types.csv b/libs/python/agent/agent/loops/model_types.csv new file mode 100644 index 00000000..e43d4fb1 --- /dev/null +++ b/libs/python/agent/agent/loops/model_types.csv @@ -0,0 +1,6 @@ +model,predict_step,predict_point +anthropic,✅,✅ +openai,✅,✅ +uitars,✅,✅ +omniparser,❌,✅ +gta1,❌,✅ \ No newline at end of file diff --git a/libs/python/agent/agent/loops/omniparser.py b/libs/python/agent/agent/loops/omniparser.py index aff73edf..201d7fe6 100644 --- a/libs/python/agent/agent/loops/omniparser.py +++ b/libs/python/agent/agent/loops/omniparser.py @@ -310,7 +310,6 @@ class OmniparsrConfig(AsyncAgentConfig): "input": messages, "tools": openai_tools if openai_tools else None, "stream": stream, - "reasoning": {"summary": "concise"}, "truncation": "auto", "num_retries": max_retries, **kwargs @@ -331,7 +330,7 @@ class OmniparsrConfig(AsyncAgentConfig): # Extract usage information usage = { - **response.usage.model_dump(), + **response.usage.model_dump(), # type: ignore "response_cost": response._hidden_params.get("response_cost", 0.0), } if _on_usage: @@ -339,7 +338,7 @@ class OmniparsrConfig(AsyncAgentConfig): # handle som function calls -> xy computer calls new_output = [] - for i in range(len(response.output)): + for i in range(len(response.output)): # type: ignore new_output += await replace_function_with_computer_call(response.output[i].model_dump(), id2xy) return { diff --git a/libs/python/agent/example.py b/libs/python/agent/example.py index f686b790..485f484e 100644 --- a/libs/python/agent/example.py +++ b/libs/python/agent/example.py @@ -5,8 +5,7 @@ Example usage of the agent library with docstring-based tool definitions. import asyncio import logging -from agent import agent_loop, ComputerAgent -from agent.types import Messages +from agent import ComputerAgent from computer import Computer from computer.helpers import sandboxed diff --git a/libs/python/agent/pyproject.toml b/libs/python/agent/pyproject.toml index be10f729..232aaa48 100644 --- a/libs/python/agent/pyproject.toml +++ b/libs/python/agent/pyproject.toml @@ -22,7 +22,7 @@ dependencies = [ "cua-computer>=0.3.0,<0.5.0", "cua-core>=0.1.8,<0.2.0", "certifi>=2024.2.2", - "litellm>=1.74.8" + "litellm>=1.74.12" ] requires-python = ">=3.11" From b2df58a91ec7fe1b4de4b10ad3e132b93b84655b Mon Sep 17 00:00:00 2001 From: cgutierr Date: Sun, 3 Aug 2025 12:54:10 +0200 Subject: [PATCH 10/76] Fix quickstart script --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 87354dad..8d799d17 100644 --- a/README.md +++ b/README.md @@ -143,8 +143,9 @@ pip install "cua-computer[all]" "cua-agent[all]" ### Step 4: Use in Your Code ```python +import asyncio from computer import Computer -from agent import ComputerAgent, LLM +from agent import ComputerAgent async def main(): # Start a local macOS VM From 5b5f17af8e9a7465ceff533a561bd4fd0dd542c3 Mon Sep 17 00:00:00 2001 From: cgutierr Date: Sun, 3 Aug 2025 13:00:27 +0200 Subject: [PATCH 11/76] Use generic AutoModel not tied to specific arch --- libs/python/agent/agent/adapters/huggingfacelocal_adapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py index f8706868..22286059 100644 --- a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py +++ b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py @@ -8,7 +8,7 @@ from litellm import completion, acompletion # Try to import HuggingFace dependencies try: import torch - from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor + from transformers import AutoModelForImageTextToText, AutoProcessor HF_AVAILABLE = True except ImportError: HF_AVAILABLE = False @@ -40,7 +40,7 @@ class HuggingFaceLocalAdapter(CustomLLM): """ if model_name not in self.models: # Load model - model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + model = AutoModelForImageTextToText.from_pretrained( model_name, torch_dtype=torch.float16, device_map=self.device, From 978181045151fd8a973b109ae483afa37e73987a Mon Sep 17 00:00:00 2001 From: cgutierr Date: Sun, 3 Aug 2025 13:02:15 +0200 Subject: [PATCH 12/76] Move inputs to model's same device, previously hardcoded to CUDA --- libs/python/agent/agent/adapters/huggingfacelocal_adapter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py index 22286059..46be5e98 100644 --- a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py +++ b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py @@ -141,8 +141,7 @@ class HuggingFaceLocalAdapter(CustomLLM): ) # Move inputs to the same device as model - if torch.cuda.is_available() and self.device != "cpu": - inputs = inputs.to("cuda") + inputs = inputs.to(model.device) # Generate response with torch.no_grad(): From 77922c92fc999b011892227b3742599e4a602c39 Mon Sep 17 00:00:00 2001 From: cgutierr Date: Sun, 3 Aug 2025 13:04:29 +0200 Subject: [PATCH 13/76] Dummy (small) models can produce action_types=None, which will later produce a crash, avoid it --- libs/python/agent/agent/agent.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py index 0b9f243a..a36ef876 100644 --- a/libs/python/agent/agent/agent.py +++ b/libs/python/agent/agent/agent.py @@ -411,6 +411,9 @@ class ComputerAgent: # Perform computer actions action = item.get("action") action_type = action.get("type") + if action_type is None: + print(f"Action type cannot be `None`: action={action}, action_type={action_type}") + return [] # Extract action arguments (all fields except 'type') action_args = {k: v for k, v in action.items() if k != "type"} From f87b8eaea500c4db684f2a7673df4e75ae14fc19 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Mon, 4 Aug 2025 16:32:05 -0400 Subject: [PATCH 14/76] added grounding+planning composed loop --- libs/python/agent/agent/loops/__init__.py | 3 +- libs/python/agent/agent/loops/anthropic.py | 2 +- .../agent/agent/loops/composed_grounded.py | 318 ++++++++++++ libs/python/agent/agent/loops/gta1.py | 389 +------------- libs/python/agent/agent/loops/omniparser.py | 2 +- libs/python/agent/agent/loops/openai.py | 99 +++- libs/python/agent/agent/loops/uitars.py | 2 +- libs/python/agent/agent/responses.py | 478 +++++++++++++++++- .../agent/benchmarks/models/__init__.py | 3 +- libs/python/agent/benchmarks/utils.py | 8 +- 10 files changed, 904 insertions(+), 400 deletions(-) create mode 100644 libs/python/agent/agent/loops/composed_grounded.py diff --git a/libs/python/agent/agent/loops/__init__.py b/libs/python/agent/agent/loops/__init__.py index 91722e55..e3070a81 100644 --- a/libs/python/agent/agent/loops/__init__.py +++ b/libs/python/agent/agent/loops/__init__.py @@ -8,5 +8,6 @@ from . import openai from . import uitars from . import omniparser from . import gta1 +from . import composed_grounded -__all__ = ["anthropic", "openai", "uitars", "omniparser", "gta1"] +__all__ = ["anthropic", "openai", "uitars", "omniparser", "gta1", "composed_grounded"] diff --git a/libs/python/agent/agent/loops/anthropic.py b/libs/python/agent/agent/loops/anthropic.py index 91021ffc..8371352c 100644 --- a/libs/python/agent/agent/loops/anthropic.py +++ b/libs/python/agent/agent/loops/anthropic.py @@ -1285,7 +1285,7 @@ def _merge_consecutive_text(content_list: List[Dict[str, Any]]) -> List[Dict[str return merged -@register_agent(models=r".*claude-.*", priority=5) +@register_agent(models=r".*claude-.*") class AnthropicHostedToolsConfig(AsyncAgentConfig): """Anthropic hosted tools agent configuration implementing AsyncAgentConfig protocol.""" diff --git a/libs/python/agent/agent/loops/composed_grounded.py b/libs/python/agent/agent/loops/composed_grounded.py new file mode 100644 index 00000000..31b29372 --- /dev/null +++ b/libs/python/agent/agent/loops/composed_grounded.py @@ -0,0 +1,318 @@ +""" +Composed-grounded agent loop implementation that combines grounding and thinking models. +Uses a two-stage approach: grounding model for element detection, thinking model for reasoning. +""" + +import uuid +import asyncio +import json +import base64 +from typing import Dict, List, Any, Optional, Tuple +from io import BytesIO +from PIL import Image +import litellm + +from ..decorators import register_agent +from ..types import Messages, AgentResponse, Tools, AgentCapability +from ..loops.base import AsyncAgentConfig +from ..responses import ( + convert_computer_calls_xy2desc, + convert_responses_items_to_completion_messages, + convert_completion_messages_to_responses_items, + convert_computer_calls_desc2xy, + get_all_element_descriptions +) +from ..agent import find_agent_config + +GROUNDED_COMPUTER_TOOL_SCHEMA = { + "type": "function", + "function": { + "name": "computer", + "description": "Control a computer by taking screenshots and interacting with UI elements. This tool uses element descriptions to locate and interact with UI elements on the screen (e.g., 'red submit button', 'search text field', 'hamburger menu icon', 'close button in top right corner').", + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": [ + "screenshot", + "click", + "double_click", + "drag", + "type", + "keypress", + "scroll", + "move", + "wait", + "get_current_url", + "get_dimensions", + "get_environment" + ], + "description": "The action to perform" + }, + "element_description": { + "type": "string", + "description": "Description of the element to interact with (required for click, double_click, move, scroll actions, and as start/end for drag)" + }, + "start_element_description": { + "type": "string", + "description": "Description of the element to start dragging from (required for drag action)" + }, + "end_element_description": { + "type": "string", + "description": "Description of the element to drag to (required for drag action)" + }, + "text": { + "type": "string", + "description": "The text to type (required for type action)" + }, + "keys": { + "type": "string", + "description": "Key combination to press (required for keypress action). Single key for individual key press, multiple keys for combinations (e.g., 'ctrl+c')" + }, + "button": { + "type": "string", + "description": "The mouse button to use for click action (left, right, wheel, back, forward) Default: left", + }, + "scroll_x": { + "type": "integer", + "description": "Horizontal scroll amount for scroll action (positive for right, negative for left)", + }, + "scroll_y": { + "type": "integer", + "description": "Vertical scroll amount for scroll action (positive for down, negative for up)", + }, + }, + "required": [ + "action" + ] + } + } +} + +def _prepare_tools_for_grounded(tool_schemas: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Prepare tools for grounded API format""" + grounded_tools = [] + + for schema in tool_schemas: + if schema["type"] == "computer": + grounded_tools.append(GROUNDED_COMPUTER_TOOL_SCHEMA) + else: + grounded_tools.append(schema) + + return grounded_tools + +def get_last_computer_call_image(messages: List[Dict[str, Any]]) -> Optional[str]: + """Get the last computer call output image from messages.""" + for message in reversed(messages): + if (isinstance(message, dict) and + message.get("type") == "computer_call_output" and + isinstance(message.get("output"), dict) and + message["output"].get("type") == "input_image"): + image_url = message["output"].get("image_url", "") + if image_url.startswith("data:image/png;base64,"): + return image_url.split(",", 1)[1] + return None + + +@register_agent(r".*\+.*", priority=10) +class ComposedGroundedConfig: + """ + Composed-grounded agent configuration that uses both grounding and thinking models. + + The model parameter should be in format: "grounding_model+thinking_model" + e.g., "gpt-4o+claude-3-5-sonnet-20241022" + """ + + def __init__(self): + self.desc2xy: Dict[str, Tuple[float, float]] = {} + + async def predict_step( + self, + messages: List[Dict[str, Any]], + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + use_prompt_caching: Optional[bool] = False, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, + **kwargs + ) -> Dict[str, Any]: + """ + Composed-grounded predict step implementation. + + Process: + 0. Store last computer call image, if none then take a screenshot + 1. Convert computer calls from xy to descriptions + 2. Convert responses items to completion messages + 3. Call thinking model with litellm.acompletion + 4. Convert completion messages to responses items + 5. Get all element descriptions and populate desc2xy mapping + 6. Convert computer calls from descriptions back to xy coordinates + 7. Return output and usage + """ + # Parse the composed model + if "+" not in model: + raise ValueError(f"Composed model must be in format 'grounding_model+thinking_model', got: {model}") + grounding_model, thinking_model = model.split("+", 1) + + pre_output_items = [] + + # Step 0: Store last computer call image, if none then take a screenshot + last_image_b64 = get_last_computer_call_image(messages) + if last_image_b64 is None: + # Take a screenshot + screenshot_b64 = await computer_handler.screenshot() # type: ignore + if screenshot_b64: + + call_id = uuid.uuid4().hex + pre_output_items += [ + { + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "Taking a screenshot to see the current computer screen." + } + ] + }, + { + "action": { + "type": "screenshot" + }, + "call_id": call_id, + "status": "completed", + "type": "computer_call" + }, + { + "type": "computer_call_output", + "call_id": call_id, + "output": { + "type": "input_image", + "image_url": f"data:image/png;base64,{screenshot_b64}" + } + }, + ] + last_image_b64 = screenshot_b64 + + # Call screenshot callback if provided + if _on_screenshot: + await _on_screenshot(screenshot_b64) + + tool_schemas = _prepare_tools_for_grounded(tools) # type: ignore + + # Step 1: Convert computer calls from xy to descriptions + input_messages = messages + pre_output_items + messages_with_descriptions = convert_computer_calls_xy2desc(input_messages, self.desc2xy) + + # Step 2: Convert responses items to completion messages + completion_messages = convert_responses_items_to_completion_messages( + messages_with_descriptions, + allow_images_in_tool_results=False + ) + + # Step 3: Call thinking model with litellm.acompletion + api_kwargs = { + "model": thinking_model, + "messages": completion_messages, + "tools": tool_schemas, + "max_retries": max_retries, + "stream": stream, + **kwargs + } + + if use_prompt_caching: + api_kwargs["use_prompt_caching"] = use_prompt_caching + + # Call API start hook + if _on_api_start: + await _on_api_start(api_kwargs) + + # Make the completion call + response = await litellm.acompletion(**api_kwargs) + + # Call API end hook + if _on_api_end: + await _on_api_end(api_kwargs, response) + + # Extract usage information + usage = { + **response.usage.model_dump(), # type: ignore + "response_cost": response._hidden_params.get("response_cost", 0.0), + } + if _on_usage: + await _on_usage(usage) + + # Step 4: Convert completion messages back to responses items format + response_dict = response.model_dump() # type: ignore + choice_messages = [choice["message"] for choice in response_dict["choices"]] + thinking_output_items = [] + + for choice_message in choice_messages: + thinking_output_items.extend(convert_completion_messages_to_responses_items([choice_message])) + + # Step 5: Get all element descriptions and populate desc2xy mapping + element_descriptions = get_all_element_descriptions(thinking_output_items) + + if element_descriptions and last_image_b64: + # Use grounding model to predict coordinates for each description + grounding_agent_conf = find_agent_config(grounding_model) + if grounding_agent_conf: + grounding_agent = grounding_agent_conf.agent_class() + + for desc in element_descriptions: + coords = await grounding_agent.predict_click( + model=grounding_model, + image_b64=last_image_b64, + instruction=desc + ) + if coords: + self.desc2xy[desc] = coords + + # Step 6: Convert computer calls from descriptions back to xy coordinates + final_output_items = convert_computer_calls_desc2xy(thinking_output_items, self.desc2xy) + + # Step 7: Return output and usage + return { + "output": pre_output_items + final_output_items, + "usage": usage + } + + async def predict_click( + self, + model: str, + image_b64: str, + instruction: str, + **kwargs + ) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates using the grounding model. + + For composed models, uses only the grounding model part for click prediction. + """ + # Parse the composed model to get grounding model + if "+" not in model: + raise ValueError(f"Composed model must be in format 'grounding_model+thinking_model', got: {model}") + grounding_model, thinking_model = model.split("+", 1) + + # Find and use the grounding agent + grounding_agent_conf = find_agent_config(grounding_model) + if grounding_agent_conf: + grounding_agent = grounding_agent_conf.agent_class() + return await grounding_agent.predict_click( + model=grounding_model, + image_b64=image_b64, + instruction=instruction, + **kwargs + ) + + return None + + def get_capabilities(self) -> List[AgentCapability]: + """Return the capabilities supported by this agent.""" + return ["click", "step"] diff --git a/libs/python/agent/agent/loops/gta1.py b/libs/python/agent/agent/loops/gta1.py index 29472107..bf4da044 100644 --- a/libs/python/agent/agent/loops/gta1.py +++ b/libs/python/agent/agent/loops/gta1.py @@ -26,73 +26,6 @@ Output the coordinate pair exactly: (x,y) '''.strip() -# Global dictionary to map coordinates to descriptions -xy2desc: Dict[Tuple[float, float], str] = {} - -GTA1_TOOL_SCHEMA = { - "type": "function", - "name": "computer", - "description": "Control a computer by taking screenshots and interacting with UI elements. This tool uses element descriptions to locate and interact with UI elements on the screen (e.g., 'red submit button', 'search text field', 'hamburger menu icon', 'close button in top right corner').", - "parameters": { - "type": "object", - "properties": { - "action": { - "type": "string", - "enum": [ - "screenshot", - "click", - "double_click", - "drag", - "type", - "keypress", - "scroll", - "move", - "wait", - "get_current_url", - "get_dimensions", - "get_environment" - ], - "description": "The action to perform" - }, - "element_description": { - "type": "string", - "description": "Description of the element to interact with (required for click, double_click, move, scroll actions, and as start/end for drag)" - }, - "start_element_description": { - "type": "string", - "description": "Description of the element to start dragging from (required for drag action)" - }, - "end_element_description": { - "type": "string", - "description": "Description of the element to drag to (required for drag action)" - }, - "text": { - "type": "string", - "description": "The text to type (required for type action)" - }, - "keys": { - "type": "string", - "description": "Key combination to press (required for keypress action). Single key for individual key press, multiple keys for combinations (e.g., 'ctrl+c')" - }, - "button": { - "type": "string", - "description": "The mouse button to use for click action (left, right, wheel, back, forward) Default: left", - }, - "scroll_x": { - "type": "integer", - "description": "Horizontal scroll amount for scroll action (positive for right, negative for left)", - }, - "scroll_y": { - "type": "integer", - "description": "Vertical scroll amount for scroll action (positive for down, negative for up)", - }, - }, - "required": [ - "action" - ] - } -} - def extract_coordinates(raw_string: str) -> Tuple[float, float]: """Extract coordinates from model output.""" try: @@ -101,173 +34,6 @@ def extract_coordinates(raw_string: str) -> Tuple[float, float]: except: return (0.0, 0.0) -def get_last_computer_call_output(messages: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: - """Get the last computer_call_output message from a messages list. - - Args: - messages: List of messages to search through - - Returns: - The last computer_call_output message dict, or None if not found - """ - for message in reversed(messages): - if isinstance(message, dict) and message.get("type") == "computer_call_output": - return message - return None - -def _prepare_tools_for_gta1(tool_schemas: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Prepare tools for GTA1 API format""" - gta1_tools = [] - - for schema in tool_schemas: - if schema["type"] == "computer": - gta1_tools.append(GTA1_TOOL_SCHEMA) - else: - gta1_tools.append(schema) - - return gta1_tools - -async def replace_function_with_computer_call_gta1(item: Dict[str, Any], agent_instance) -> List[Dict[str, Any]]: - """Convert function_call to computer_call format using GTA1 click prediction.""" - global xy2desc - item_type = item.get("type") - - async def _get_xy(element_description: Optional[str], last_image_b64: str) -> Union[Tuple[float, float], Tuple[None, None]]: - if element_description is None: - return (None, None) - # Use self.predict_click to get coordinates from description - coords = await agent_instance.predict_click( - model=agent_instance.current_model, - image_b64=last_image_b64, - instruction=element_description - ) - if coords: - # Store the mapping from coordinates to description - xy2desc[coords] = element_description - return coords - return (None, None) - - if item_type == "function_call": - fn_name = item.get("name") - fn_args = json.loads(item.get("arguments", "{}")) - - item_id = item.get("id") - call_id = item.get("call_id") - - if fn_name == "computer": - action = fn_args.get("action") - element_description = fn_args.get("element_description") - start_element_description = fn_args.get("start_element_description") - end_element_description = fn_args.get("end_element_description") - text = fn_args.get("text") - keys = fn_args.get("keys") - button = fn_args.get("button") - scroll_x = fn_args.get("scroll_x") - scroll_y = fn_args.get("scroll_y") - - # Get the last computer output image for click prediction - last_image_b64 = agent_instance.last_screenshot_b64 or "" - - x, y = await _get_xy(element_description, last_image_b64) - start_x, start_y = await _get_xy(start_element_description, last_image_b64) - end_x, end_y = await _get_xy(end_element_description, last_image_b64) - - action_args = { - "type": action, - "x": x, - "y": y, - "start_x": start_x, - "start_y": start_y, - "end_x": end_x, - "end_y": end_y, - "text": text, - "keys": keys, - "button": button, - "scroll_x": scroll_x, - "scroll_y": scroll_y - } - # Remove None values to keep the JSON clean - action_args = {k: v for k, v in action_args.items() if v is not None} - - return [{ - "type": "computer_call", - "action": action_args, - "id": item_id, - "call_id": call_id, - "status": "completed" - }] - - return [item] - -async def replace_computer_call_with_function_gta1(item: Dict[str, Any]) -> List[Dict[str, Any]]: - """ - Convert computer_call back to function_call format using descriptions. - Only READS from the global xy2desc dictionary. - - Args: - item: The item to convert - """ - global xy2desc - item_type = item.get("type") - - def _get_element_description(x: Optional[float], y: Optional[float]) -> Optional[str]: - """Get element description from coordinates, return None if coordinates are None""" - if x is None or y is None: - return None - return xy2desc.get((x, y)) - - if item_type == "computer_call": - action_data = item.get("action", {}) - - # Extract coordinates and convert back to element descriptions - element_description = _get_element_description(action_data.get("x"), action_data.get("y")) - start_element_description = _get_element_description(action_data.get("start_x"), action_data.get("start_y")) - end_element_description = _get_element_description(action_data.get("end_x"), action_data.get("end_y")) - - # Build function arguments - fn_args = { - "action": action_data.get("type"), - "element_description": element_description, - "start_element_description": start_element_description, - "end_element_description": end_element_description, - "text": action_data.get("text"), - "keys": action_data.get("keys"), - "button": action_data.get("button"), - "scroll_x": action_data.get("scroll_x"), - "scroll_y": action_data.get("scroll_y") - } - - # Remove None values to keep the JSON clean - fn_args = {k: v for k, v in fn_args.items() if v is not None} - - return [{ - "type": "function_call", - "name": "computer", - "arguments": json.dumps(fn_args), - "id": item.get("id"), - "call_id": item.get("call_id"), - "status": "completed", - # Fall back to string representation - # "content": f"Used tool: {action_data.get('type')}({json.dumps(fn_args)})" - }] - - elif item_type == "computer_call_output": - # Simple conversion: computer_call_output -> function_call_output (text only), user message (image) - return [ - { - "type": "function_call_output", - "call_id": item.get("call_id"), - "output": "Tool executed successfully. See the current computer screenshot below, if nothing has changed yet then you may need to wait before trying again.", - "id": item.get("id"), - "status": "completed" - }, { - "role": "user", - "content": [item.get("output")] - } - ] - - return [item] - def smart_resize(height: int, width: int, factor: int = 28, min_pixels: int = 3136, max_pixels: int = 8847360) -> Tuple[int, int]: """Smart resize function similar to qwen_vl_utils.""" # Calculate the total pixels @@ -300,7 +66,7 @@ def smart_resize(height: int, width: int, factor: int = 28, min_pixels: int = 31 return new_height, new_width -@register_agent(models=r".*GTA1.*", priority=10) +@register_agent(models=r".*GTA1.*") class GTA1Config(AsyncAgentConfig): """GTA1 agent configuration implementing AsyncAgentConfig protocol for click prediction.""" @@ -308,153 +74,6 @@ class GTA1Config(AsyncAgentConfig): self.current_model = None self.last_screenshot_b64 = None - async def predict_step( - self, - messages: Messages, - model: str, - tools: Optional[List[Dict[str, Any]]] = None, - max_retries: Optional[int] = None, - stream: bool = False, - computer_handler=None, - use_prompt_caching: Optional[bool] = False, - _on_api_start=None, - _on_api_end=None, - _on_usage=None, - _on_screenshot=None, - **kwargs - ) -> Dict[str, Any]: - """ - GTA1 agent loop implementation using liteLLM responses with element descriptions. - - Follows the 4-step process: - 1. Prepare tools - 2. Replace computer calls with function calls (using descriptions) - 3. API call - 4. Replace function calls with computer calls (using predict_click) - """ - models = model.split("+") - if len(models) != 2: - raise ValueError("GTA1 model must be in the format + to be used in an agent loop") - - gta1_model, llm_model = models - self.current_model = gta1_model - - tools = tools or [] - - # Step 0: Prepare tools - gta1_tools = _prepare_tools_for_gta1(tools) - - # Get last computer_call_output for screenshot reference - # Convert messages to list of dicts first - message_list = [] - for message in messages: - if not isinstance(message, dict): - message_list.append(message.__dict__) - else: - message_list.append(message) - - last_computer_call_output = get_last_computer_call_output(message_list) - if last_computer_call_output: - image_url = last_computer_call_output.get("output", {}).get("image_url", "") - if image_url.startswith("data:image/"): - self.last_screenshot_b64 = image_url.split(",")[-1] - else: - self.last_screenshot_b64 = image_url - - # Step 1: If there's no screenshot, simulate the model calling the screenshot function - pre_output = [] - if not self.last_screenshot_b64 and computer_handler: - screenshot_base64 = await computer_handler.screenshot() - if _on_screenshot: - await _on_screenshot(screenshot_base64, "screenshot_initial") - - call_id = uuid.uuid4().hex - pre_output += [ - { - "type": "message", - "role": "assistant", - "content": [ - { - "type": "output_text", - "text": "Taking a screenshot to see the current computer screen." - } - ] - }, - { - "action": { - "type": "screenshot" - }, - "call_id": call_id, - "status": "completed", - "type": "computer_call" - }, - { - "type": "computer_call_output", - "call_id": call_id, - "output": { - "type": "input_image", - "image_url": f"data:image/png;base64,{screenshot_base64}" - } - }, - ] - - # Update the last screenshot for future use - self.last_screenshot_b64 = screenshot_base64 - - message_list += pre_output - - # Step 2: Replace computer calls with function calls (using descriptions) - new_messages = [] - for message in message_list: - new_messages += await replace_computer_call_with_function_gta1(message) - messages = new_messages - - # Step 3: API call - api_kwargs = { - "model": llm_model, - "input": messages, - "tools": gta1_tools if gta1_tools else None, - "stream": stream, - "truncation": "auto", - "num_retries": max_retries, - **kwargs - } - - # Call API start hook - if _on_api_start: - await _on_api_start(api_kwargs) - - # Use liteLLM responses - response = await litellm.aresponses(**api_kwargs) - - # Call API end hook - if _on_api_end: - await _on_api_end(api_kwargs, response) - - # Extract usage information - usage = { - **response.usage.model_dump(), # type: ignore - "response_cost": response._hidden_params.get("response_cost", 0.0), # type: ignore - } - if _on_usage: - await _on_usage(usage) - - # Step 4: Replace function calls with computer calls (using predict_click) - new_output = [] - for i in range(len(response.output)): # type: ignore - output_item = response.output[i] # type: ignore - # Convert to dict if it has model_dump method, otherwise use as-is - if hasattr(output_item, 'model_dump'): - item_dict = output_item.model_dump() # type: ignore - else: - item_dict = output_item # type: ignore - new_output += await replace_function_with_computer_call_gta1(item_dict, self) # type: ignore - - return { - "output": pre_output + new_output, - "usage": usage - } - async def predict_click( self, model: str, @@ -528,10 +147,10 @@ class GTA1Config(AsyncAgentConfig): response = await litellm.acompletion(**api_kwargs) # Extract response text - output_text = response.choices[0].message.content + output_text = response.choices[0].message.content # type: ignore # Extract and rescale coordinates - pred_x, pred_y = extract_coordinates(output_text) + pred_x, pred_y = extract_coordinates(output_text) # type: ignore pred_x *= scale_x pred_y *= scale_y @@ -539,4 +158,4 @@ class GTA1Config(AsyncAgentConfig): def get_capabilities(self) -> List[AgentCapability]: """Return the capabilities supported by this agent.""" - return ["click", "step"] + return ["click"] diff --git a/libs/python/agent/agent/loops/omniparser.py b/libs/python/agent/agent/loops/omniparser.py index 201d7fe6..2cf2d2c7 100644 --- a/libs/python/agent/agent/loops/omniparser.py +++ b/libs/python/agent/agent/loops/omniparser.py @@ -249,7 +249,7 @@ async def replace_computer_call_with_function(item: Dict[str, Any], xy2id: Dict[ return [item] -@register_agent(models=r"omniparser\+.*|omni\+.*", priority=10) +@register_agent(models=r"omniparser\+.*|omni\+.*") class OmniparsrConfig(AsyncAgentConfig): """Omniparser agent configuration implementing AsyncAgentConfig protocol.""" diff --git a/libs/python/agent/agent/loops/openai.py b/libs/python/agent/agent/loops/openai.py index 13bcb1f1..793c3ce6 100644 --- a/libs/python/agent/agent/loops/openai.py +++ b/libs/python/agent/agent/loops/openai.py @@ -3,9 +3,12 @@ OpenAI computer-use-preview agent loop implementation using liteLLM """ import asyncio +import base64 import json +from io import BytesIO from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple import litellm +from PIL import Image from ..decorators import register_agent from ..types import Messages, AgentResponse, Tools, AgentCapability @@ -36,7 +39,7 @@ def _prepare_tools_for_openai(tool_schemas: List[Dict[str, Any]]) -> Tools: return openai_tools -@register_agent(models=r".*computer-use-preview.*", priority=10) +@register_agent(models=r".*computer-use-preview.*") class OpenAIComputerUseConfig: """ OpenAI computer-use-preview agent configuration using liteLLM responses. @@ -128,8 +131,8 @@ class OpenAIComputerUseConfig: """ Predict click coordinates based on image and instruction. - Note: OpenAI computer-use-preview doesn't support direct click prediction, - so this returns None. + Uses OpenAI computer-use-preview with manually constructed input items + and a prompt that instructs the agent to only output clicks. Args: model: Model name to use @@ -137,8 +140,94 @@ class OpenAIComputerUseConfig: instruction: Instruction for where to click Returns: - None (not supported by OpenAI computer-use-preview) + Tuple of (x, y) coordinates or None if prediction fails """ + # TODO: implement this correctly + # Scale image to half size + try: + image_data = base64.b64decode(image_b64) + image = Image.open(BytesIO(image_data)) + + # Scale to half size + new_width = image.width // 2 + new_height = image.height // 2 + scaled_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) + + # Convert back to base64 + buffer = BytesIO() + scaled_image.save(buffer, format='PNG') + image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8') + except Exception: + # If scaling fails, use original image + pass + + # Manually construct input items with image and click instruction + input_items = [ + { + "role": "user", + "content": f"You are a UI grounding expert. Look at the image and {instruction}. Output ONLY a click action on the target element. No explanations, confirmations, or additional text." + }, + { + "role": "user", + "content": [ + { + "type": "input_image", + "image_url": f"data:image/png;base64,{image_b64}" + } + ] + } + ] + + # Get image dimensions from base64 data + try: + image_data = base64.b64decode(image_b64) + image = Image.open(BytesIO(image_data)) + display_width, display_height = image.size + except Exception: + # Fallback to default dimensions if image parsing fails + display_width, display_height = 1024, 768 + + # Prepare computer tool for click actions + computer_tool = { + "type": "computer_use_preview", + "display_width": display_width, + "display_height": display_height, + "environment": "linux" + } + + # Prepare API call kwargs + api_kwargs = { + "model": model, + "input": input_items, + "tools": [computer_tool], + "stream": False, + "reasoning": {"summary": "concise"}, + "truncation": "auto", + "max_tokens": 100 # Keep response short for click prediction + } + + # Use liteLLM responses + response = await litellm.aresponses(**api_kwargs) + + # Extract click coordinates from response output + output_dict = response.model_dump() + output_items = output_dict.get("output", []) + + # print(output_items) + + # Look for computer_call with click action + for item in output_items: + if (isinstance(item, dict) and + item.get("type") == "computer_call" and + isinstance(item.get("action"), dict)): + + action = item["action"] + if action.get("type") == "click": + x = action.get("x") + y = action.get("y") + if x is not None and y is not None: + return (int(x) * 2, int(y) * 2) + return None def get_capabilities(self) -> List[AgentCapability]: @@ -148,4 +237,4 @@ class OpenAIComputerUseConfig: Returns: List of capability strings """ - return ["step"] + return ["click", "step"] diff --git a/libs/python/agent/agent/loops/uitars.py b/libs/python/agent/agent/loops/uitars.py index f715ef61..2c1b41b9 100644 --- a/libs/python/agent/agent/loops/uitars.py +++ b/libs/python/agent/agent/loops/uitars.py @@ -515,7 +515,7 @@ def convert_uitars_messages_to_litellm(messages: Messages) -> List[Dict[str, Any return litellm_messages -@register_agent(models=r"(?i).*ui-?tars.*", priority=10) +@register_agent(models=r"(?i).*ui-?tars.*") class UITARSConfig: """ UITARS agent configuration using liteLLM for ByteDance-Seed/UI-TARS-1.5-7B model. diff --git a/libs/python/agent/agent/responses.py b/libs/python/agent/agent/responses.py index 2d7e85d0..52993750 100644 --- a/libs/python/agent/agent/responses.py +++ b/libs/python/agent/agent/responses.py @@ -40,7 +40,7 @@ def make_input_image_item(image_data: Union[str, bytes]) -> EasyInputMessagePara ResponseInputImageParam( type="input_image", image_url=f"data:image/png;base64,{base64.b64encode(image_data).decode('utf-8') if isinstance(image_data, bytes) else image_data}" - ) + ) # type: ignore ], role="user", type="message" @@ -205,3 +205,479 @@ def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallPar status="completed", type="computer_call" ) + + +# Conversion functions between element descriptions and coordinates +def convert_computer_calls_desc2xy(responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple]) -> List[Dict[str, Any]]: + """ + Convert computer calls from element descriptions to x,y coordinates. + + Args: + responses_items: List of response items containing computer calls with element_description + desc2xy: Dictionary mapping element descriptions to (x, y) coordinate tuples + + Returns: + List of response items with element_description replaced by x,y coordinates + """ + converted_items = [] + + for item in responses_items: + if item.get("type") == "computer_call" and "action" in item: + action = item["action"].copy() + + # Handle single element_description + if "element_description" in action: + desc = action["element_description"] + if desc in desc2xy: + x, y = desc2xy[desc] + action["x"] = x + action["y"] = y + del action["element_description"] + + # Handle start_element_description and end_element_description for drag operations + elif "start_element_description" in action and "end_element_description" in action: + start_desc = action["start_element_description"] + end_desc = action["end_element_description"] + + if start_desc in desc2xy and end_desc in desc2xy: + start_x, start_y = desc2xy[start_desc] + end_x, end_y = desc2xy[end_desc] + action["path"] = [{"x": start_x, "y": start_y}, {"x": end_x, "y": end_y}] + del action["start_element_description"] + del action["end_element_description"] + + converted_item = item.copy() + converted_item["action"] = action + converted_items.append(converted_item) + else: + converted_items.append(item) + + return converted_items + + +def convert_computer_calls_xy2desc(responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple]) -> List[Dict[str, Any]]: + """ + Convert computer calls from x,y coordinates to element descriptions. + + Args: + responses_items: List of response items containing computer calls with x,y coordinates + desc2xy: Dictionary mapping element descriptions to (x, y) coordinate tuples + + Returns: + List of response items with x,y coordinates replaced by element_description + """ + # Create reverse mapping from coordinates to descriptions + xy2desc = {coords: desc for desc, coords in desc2xy.items()} + + converted_items = [] + + for item in responses_items: + if item.get("type") == "computer_call" and "action" in item: + action = item["action"].copy() + + # Handle single x,y coordinates + if "x" in action and "y" in action: + coords = (action["x"], action["y"]) + if coords in xy2desc: + action["element_description"] = xy2desc[coords] + del action["x"] + del action["y"] + + # Handle path for drag operations + elif "path" in action and isinstance(action["path"], list) and len(action["path"]) == 2: + start_point = action["path"][0] + end_point = action["path"][1] + + if ("x" in start_point and "y" in start_point and + "x" in end_point and "y" in end_point): + + start_coords = (start_point["x"], start_point["y"]) + end_coords = (end_point["x"], end_point["y"]) + + if start_coords in xy2desc and end_coords in xy2desc: + action["start_element_description"] = xy2desc[start_coords] + action["end_element_description"] = xy2desc[end_coords] + del action["path"] + + converted_item = item.copy() + converted_item["action"] = action + converted_items.append(converted_item) + else: + converted_items.append(item) + + return converted_items + + +def get_all_element_descriptions(responses_items: List[Dict[str, Any]]) -> List[str]: + """ + Extract all element descriptions from computer calls in responses items. + + Args: + responses_items: List of response items containing computer calls + + Returns: + List of unique element descriptions found in computer calls + """ + descriptions = set() + + for item in responses_items: + if item.get("type") == "computer_call" and "action" in item: + action = item["action"] + + # Handle single element_description + if "element_description" in action: + descriptions.add(action["element_description"]) + + # Handle start_element_description and end_element_description for drag operations + if "start_element_description" in action: + descriptions.add(action["start_element_description"]) + + if "end_element_description" in action: + descriptions.add(action["end_element_description"]) + + return list(descriptions) + + +# Conversion functions between responses_items and completion messages formats +def convert_responses_items_to_completion_messages(messages: List[Dict[str, Any]], allow_images_in_tool_results: bool = True) -> List[Dict[str, Any]]: + """Convert responses_items message format to liteLLM completion format. + + Args: + messages: List of responses_items format messages + allow_images_in_tool_results: If True, include images in tool role messages. + If False, send tool message + separate user message with image. + """ + completion_messages = [] + + for message in messages: + msg_type = message.get("type") + role = message.get("role") + + # Handle user messages (both with and without explicit type) + if role == "user" or msg_type == "user": + content = message.get("content", "") + if isinstance(content, list): + # Handle list content (images, text blocks) + completion_content = [] + for item in content: + if item.get("type") == "input_image": + completion_content.append({ + "type": "image_url", + "image_url": { + "url": item.get("image_url") + } + }) + elif item.get("type") == "input_text": + completion_content.append({ + "type": "text", + "text": item.get("text") + }) + elif item.get("type") == "text": + completion_content.append({ + "type": "text", + "text": item.get("text") + }) + + completion_messages.append({ + "role": "user", + "content": completion_content + }) + elif isinstance(content, str): + # Handle string content + completion_messages.append({ + "role": "user", + "content": content + }) + + # Handle assistant messages + elif role == "assistant" or msg_type == "message": + content = message.get("content", []) + if isinstance(content, list): + text_parts = [] + for item in content: + if item.get("type") == "output_text": + text_parts.append(item.get("text", "")) + elif item.get("type") == "text": + text_parts.append(item.get("text", "")) + + if text_parts: + completion_messages.append({ + "role": "assistant", + "content": "\n".join(text_parts) + }) + + # Handle reasoning items (convert to assistant message) + elif msg_type == "reasoning": + summary = message.get("summary", []) + text_parts = [] + for item in summary: + if item.get("type") == "summary_text": + text_parts.append(item.get("text", "")) + + if text_parts: + completion_messages.append({ + "role": "assistant", + "content": "\n".join(text_parts) + }) + + # Handle function calls + elif msg_type == "function_call": + # Add tool call to last assistant message or create new one + if not completion_messages or completion_messages[-1]["role"] != "assistant": + completion_messages.append({ + "role": "assistant", + "content": "", + "tool_calls": [] + }) + + if "tool_calls" not in completion_messages[-1]: + completion_messages[-1]["tool_calls"] = [] + + completion_messages[-1]["tool_calls"].append({ + "id": message.get("call_id"), + "type": "function", + "function": { + "name": message.get("name"), + "arguments": message.get("arguments") + } + }) + + # Handle computer calls + elif msg_type == "computer_call": + # Add tool call to last assistant message or create new one + if not completion_messages or completion_messages[-1]["role"] != "assistant": + completion_messages.append({ + "role": "assistant", + "content": "", + "tool_calls": [] + }) + + if "tool_calls" not in completion_messages[-1]: + completion_messages[-1]["tool_calls"] = [] + + action = message.get("action", {}) + completion_messages[-1]["tool_calls"].append({ + "id": message.get("call_id"), + "type": "function", + "function": { + "name": "computer", + "arguments": json.dumps(action) + } + }) + + # Handle function/computer call outputs + elif msg_type in ["function_call_output", "computer_call_output"]: + output = message.get("output") + call_id = message.get("call_id") + + if isinstance(output, dict) and output.get("type") == "input_image": + if allow_images_in_tool_results: + # Handle image output as tool response (may not work with all APIs) + completion_messages.append({ + "role": "tool", + "tool_call_id": call_id, + "content": [{ + "type": "image_url", + "image_url": { + "url": output.get("image_url") + } + }] + }) + else: + # Send tool message + separate user message with image (OpenAI compatible) + completion_messages += [{ + "role": "tool", + "tool_call_id": call_id, + "content": "[Execution completed. See screenshot below]" + }, { + "role": "user", + "content": [{ + "type": "image_url", + "image_url": { + "url": output.get("image_url") + } + }] + }] + else: + # Handle text output as tool response + completion_messages.append({ + "role": "tool", + "tool_call_id": call_id, + "content": str(output) + }) + + return completion_messages + + +def convert_completion_messages_to_responses_items(completion_messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Convert completion messages format to responses_items message format.""" + responses_items = [] + skip_next = False + + for i, message in enumerate(completion_messages): + if skip_next: + skip_next = False + continue + + role = message.get("role") + content = message.get("content") + tool_calls = message.get("tool_calls", []) + + # Handle assistant messages with text content + if role == "assistant" and content and isinstance(content, str): + responses_items.append({ + "type": "message", + "role": "assistant", + "content": [{ + "type": "output_text", + "text": content + }] + }) + + # Handle tool calls + if tool_calls: + for tool_call in tool_calls: + if tool_call.get("type") == "function": + function = tool_call.get("function", {}) + function_name = function.get("name") + + if function_name == "computer": + # Parse computer action + try: + action = json.loads(function.get("arguments", "{}")) + # Change key from "action" -> "type" + if action.get("action"): + action["type"] = action["action"] + del action["action"] + responses_items.append({ + "type": "computer_call", + "call_id": tool_call.get("id"), + "action": action, + "status": "completed" + }) + except json.JSONDecodeError: + # Fallback to function call format + responses_items.append({ + "type": "function_call", + "call_id": tool_call.get("id"), + "name": function_name, + "arguments": function.get("arguments", "{}"), + "status": "completed" + }) + else: + # Regular function call + responses_items.append({ + "type": "function_call", + "call_id": tool_call.get("id"), + "name": function_name, + "arguments": function.get("arguments", "{}"), + "status": "completed" + }) + + # Handle tool messages (function/computer call outputs) + elif role == "tool" and content: + tool_call_id = message.get("tool_call_id") + if isinstance(content, str): + # Check if this is the "[Execution completed. See screenshot below]" pattern + if content == "[Execution completed. See screenshot below]": + # Look ahead for the next user message with image + next_idx = i + 1 + if (next_idx < len(completion_messages) and + completion_messages[next_idx].get("role") == "user" and + isinstance(completion_messages[next_idx].get("content"), list)): + # Found the pattern - extract image from next message + next_content = completion_messages[next_idx]["content"] + for item in next_content: + if item.get("type") == "image_url": + responses_items.append({ + "type": "computer_call_output", + "call_id": tool_call_id, + "output": { + "type": "input_image", + "image_url": item.get("image_url", {}).get("url") + } + }) + # Skip the next user message since we processed it + skip_next = True + break + else: + # No matching user message, treat as regular text + responses_items.append({ + "type": "computer_call_output", + "call_id": tool_call_id, + "output": content + }) + else: + # Determine if this is a computer call or function call output + try: + # Try to parse as structured output + parsed_content = json.loads(content) + if parsed_content.get("type") == "input_image": + responses_items.append({ + "type": "computer_call_output", + "call_id": tool_call_id, + "output": parsed_content + }) + else: + responses_items.append({ + "type": "computer_call_output", + "call_id": tool_call_id, + "output": content + }) + except json.JSONDecodeError: + # Plain text output - could be function or computer call + responses_items.append({ + "type": "function_call_output", + "call_id": tool_call_id, + "output": content + }) + elif isinstance(content, list): + # Handle structured content (e.g., images) + for item in content: + if item.get("type") == "image_url": + responses_items.append({ + "type": "computer_call_output", + "call_id": tool_call_id, + "output": { + "type": "input_image", + "image_url": item.get("image_url", {}).get("url") + } + }) + elif item.get("type") == "text": + responses_items.append({ + "type": "function_call_output", + "call_id": tool_call_id, + "output": item.get("text") + }) + + # Handle actual user messages + elif role == "user" and content: + if isinstance(content, list): + # Handle structured user content (e.g., text + images) + user_content = [] + for item in content: + if item.get("type") == "image_url": + user_content.append({ + "type": "input_image", + "image_url": item.get("image_url", {}).get("url") + }) + elif item.get("type") == "text": + user_content.append({ + "type": "input_text", + "text": item.get("text") + }) + + if user_content: + responses_items.append({ + "role": "user", + "type": "message", + "content": user_content + }) + elif isinstance(content, str): + # Handle simple text user message + responses_items.append({ + "role": "user", + "content": content + }) + + return responses_items diff --git a/libs/python/agent/benchmarks/models/__init__.py b/libs/python/agent/benchmarks/models/__init__.py index 51033a7b..8af66c3d 100644 --- a/libs/python/agent/benchmarks/models/__init__.py +++ b/libs/python/agent/benchmarks/models/__init__.py @@ -1,4 +1,3 @@ from .base import ModelProtocol -from .gta1 import GTA1Model -__all__ = ["ModelProtocol", "GTA1Model"] +__all__ = ["ModelProtocol"] diff --git a/libs/python/agent/benchmarks/utils.py b/libs/python/agent/benchmarks/utils.py index 7a3b70a3..aa99184f 100644 --- a/libs/python/agent/benchmarks/utils.py +++ b/libs/python/agent/benchmarks/utils.py @@ -21,7 +21,6 @@ import torch # Add parent directory to path for imports sys.path.append(os.path.join(os.path.dirname(__file__), '..')) from agent.agent import ComputerAgent -from models import GTA1Model from models.base import ModelProtocol def get_gpu_memory() -> List[int]: @@ -82,13 +81,16 @@ def get_available_models() -> List[Union[str, ModelProtocol]]: """ local_provider = "huggingface-local/" # Options: huggingface-local/ or mlx/ + # from models.gta1 import GTA1Model + models = [ # === ComputerAgent model strings === - f"{local_provider}HelloKKMe/GTA1-7B", + # f"{local_provider}HelloKKMe/GTA1-7B", # f"{local_provider}HelloKKMe/GTA1-32B", + "openai/computer-use-preview+openai/gpt-4o-mini" # === Reference model classes === - GTA1Model("HelloKKMe/GTA1-7B"), + # GTA1Model("HelloKKMe/GTA1-7B"), # GTA1Model("HelloKKMe/GTA1-32B"), ] From d27ee728b5cb8c321a34838318a7ff3cc44f0cd3 Mon Sep 17 00:00:00 2001 From: James Murdza Date: Mon, 4 Aug 2025 16:57:42 -0400 Subject: [PATCH 15/76] Fix broken import after refactor in 5bfadf8f9ada07926fdcfe7f10929ef66a092544 --- examples/agent_ui_examples.py | 2 +- notebooks/agent_nb.ipynb | 2 +- scripts/playground.sh | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/agent_ui_examples.py b/examples/agent_ui_examples.py index d5a37119..97f54856 100644 --- a/examples/agent_ui_examples.py +++ b/examples/agent_ui_examples.py @@ -13,7 +13,7 @@ from utils import load_dotenv_files load_dotenv_files() # Import the create_gradio_ui function -from agent.ui.gradio.app import create_gradio_ui +from agent.ui.gradio.ui_components import create_gradio_ui if __name__ == "__main__": print("Launching Computer-Use Agent Gradio UI with advanced features...") diff --git a/notebooks/agent_nb.ipynb b/notebooks/agent_nb.ipynb index 4c39c204..61e7288a 100644 --- a/notebooks/agent_nb.ipynb +++ b/notebooks/agent_nb.ipynb @@ -379,7 +379,7 @@ "metadata": {}, "outputs": [], "source": [ - "from agent.ui.gradio.app import create_gradio_ui\n", + "from agent.ui.gradio.ui_components import create_gradio_ui\n", "\n", "app = create_gradio_ui()\n", "app.launch(share=False)" diff --git a/scripts/playground.sh b/scripts/playground.sh index 39710e4c..0cde5a25 100755 --- a/scripts/playground.sh +++ b/scripts/playground.sh @@ -257,7 +257,7 @@ from pathlib import Path from dotenv import load_dotenv from computer import Computer from agent import ComputerAgent, LLM, AgentLoop, LLMProvider -from agent.ui.gradio.app import create_gradio_ui +from agent.ui.gradio.ui_components import create_gradio_ui # Load environment variables from .env.local load_dotenv(Path(__file__).parent / ".env.local") @@ -292,7 +292,7 @@ from pathlib import Path from dotenv import load_dotenv from computer import Computer from agent import ComputerAgent, LLM, AgentLoop, LLMProvider -from agent.ui.gradio.app import create_gradio_ui +from agent.ui.gradio.ui_components import create_gradio_ui # Load environment variables from .env.local load_dotenv(Path(__file__).parent / ".env.local") From 3e7bc0aa7941994bf0a87bf3303c9a427df3935f Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Mon, 4 Aug 2025 18:11:21 -0400 Subject: [PATCH 16/76] Add omniparser predict_click --- .../agent/agent/loops/composed_grounded.py | 2 +- libs/python/agent/agent/loops/omniparser.py | 88 +++++++++++++++++-- 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/libs/python/agent/agent/loops/composed_grounded.py b/libs/python/agent/agent/loops/composed_grounded.py index 31b29372..1371ff3f 100644 --- a/libs/python/agent/agent/loops/composed_grounded.py +++ b/libs/python/agent/agent/loops/composed_grounded.py @@ -115,7 +115,7 @@ def get_last_computer_call_image(messages: List[Dict[str, Any]]) -> Optional[str return None -@register_agent(r".*\+.*", priority=10) +@register_agent(r".*\+.*", priority=1) class ComposedGroundedConfig: """ Composed-grounded agent configuration that uses both grounding and thinking models. diff --git a/libs/python/agent/agent/loops/omniparser.py b/libs/python/agent/agent/loops/omniparser.py index 2cf2d2c7..d85d07de 100644 --- a/libs/python/agent/agent/loops/omniparser.py +++ b/libs/python/agent/agent/loops/omniparser.py @@ -249,13 +249,13 @@ async def replace_computer_call_with_function(item: Dict[str, Any], xy2id: Dict[ return [item] -@register_agent(models=r"omniparser\+.*|omni\+.*") -class OmniparsrConfig(AsyncAgentConfig): +@register_agent(models=r"omniparser\+.*|omni\+.*", priority=2) +class OmniparserConfig(AsyncAgentConfig): """Omniparser agent configuration implementing AsyncAgentConfig protocol.""" async def predict_step( self, - messages: Messages, + messages: List[Dict[str, Any]], model: str, tools: Optional[List[Dict[str, Any]]] = None, max_retries: Optional[int] = None, @@ -284,7 +284,7 @@ class OmniparsrConfig(AsyncAgentConfig): openai_tools, id2xy = _prepare_tools_for_omniparser(tools) # Find last computer_call_output - last_computer_call_output = get_last_computer_call_output(messages) + last_computer_call_output = get_last_computer_call_output(messages) # type: ignore if last_computer_call_output: image_url = last_computer_call_output.get("output", {}).get("image_url", "") image_data = image_url.split(",")[-1] @@ -301,7 +301,7 @@ class OmniparsrConfig(AsyncAgentConfig): for message in messages: if not isinstance(message, dict): message = message.__dict__ - new_messages += await replace_computer_call_with_function(message, id2xy) + new_messages += await replace_computer_call_with_function(message, id2xy) # type: ignore messages = new_messages # Prepare API call kwargs @@ -331,7 +331,7 @@ class OmniparsrConfig(AsyncAgentConfig): # Extract usage information usage = { **response.usage.model_dump(), # type: ignore - "response_cost": response._hidden_params.get("response_cost", 0.0), + "response_cost": response._hidden_params.get("response_cost", 0.0), # type: ignore } if _on_usage: await _on_usage(usage) @@ -339,7 +339,7 @@ class OmniparsrConfig(AsyncAgentConfig): # handle som function calls -> xy computer calls new_output = [] for i in range(len(response.output)): # type: ignore - new_output += await replace_function_with_computer_call(response.output[i].model_dump(), id2xy) + new_output += await replace_function_with_computer_call(response.output[i].model_dump(), id2xy) # type: ignore return { "output": new_output, @@ -353,7 +353,79 @@ class OmniparsrConfig(AsyncAgentConfig): instruction: str, **kwargs ) -> Optional[Tuple[float, float]]: - """Omniparser does not support click prediction.""" + """ + Predict click coordinates using OmniParser and LLM. + + Uses OmniParser to annotate the image with element IDs, then uses LLM + to identify the correct element ID based on the instruction. + """ + if not OMNIPARSER_AVAILABLE: + return None + + # Parse the image with OmniParser to get annotated image and elements + parser = get_parser() + result = parser.parse(image_b64) + + # Extract the LLM model from composed model string + llm_model = model.split('+')[-1] + + # Create system prompt for element ID prediction + SYSTEM_PROMPT = f''' +You are an expert UI element locator. Given a GUI image annotated with numerical IDs over each interactable element, along with a user's element description, provide the ID of the specified element. + +The image shows UI elements with numbered overlays. Each number corresponds to a clickable/interactable element. + +Output only the element ID as a single integer. +'''.strip() + + # Prepare messages for LLM + messages = [ + { + "role": "system", + "content": SYSTEM_PROMPT + }, + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{result.annotated_image_base64}" + } + }, + { + "type": "text", + "text": f"Find the element: {instruction}" + } + ] + } + ] + + # Call LLM to predict element ID + response = await litellm.acompletion( + model=llm_model, + messages=messages, + max_tokens=10, + temperature=0.1 + ) + + # Extract element ID from response + response_text = response.choices[0].message.content.strip() # type: ignore + + # Try to parse the element ID + try: + element_id = int(response_text) + + # Find the element with this ID and return its center coordinates + for element in result.elements: + if element.id == element_id: + center_x = (element.bbox.x1 + element.bbox.x2) / 2 + center_y = (element.bbox.y1 + element.bbox.y2) / 2 + return (center_x, center_y) + except ValueError: + # If we can't parse the ID, return None + pass + return None def get_capabilities(self) -> List[AgentCapability]: From 96e4b7586ab4e257adebd7f5d075056c29f40276 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 5 Aug 2025 10:51:21 -0400 Subject: [PATCH 17/76] update mcp server to cua-agent==0.4.x --- libs/python/mcp-server/README.md | 31 ++++--- libs/python/mcp-server/mcp_server/server.py | 97 ++++++++++----------- libs/python/mcp-server/pyproject.toml | 4 +- 3 files changed, 64 insertions(+), 68 deletions(-) diff --git a/libs/python/mcp-server/README.md b/libs/python/mcp-server/README.md index 3f3c8bbb..a94da8a7 100644 --- a/libs/python/mcp-server/README.md +++ b/libs/python/mcp-server/README.md @@ -16,6 +16,21 @@ **cua-mcp-server** is a MCP server for the Computer-Use Agent (CUA), allowing you to run CUA through Claude Desktop or other MCP clients. + +## LiteLLM Integration + +This MCP server features comprehensive liteLLM integration, allowing you to use any supported LLM provider with a simple model string configuration. + +- **Unified Configuration**: Use a single `CUA_MODEL_NAME` environment variable with a model string +- **Automatic Provider Detection**: The agent automatically detects the provider and capabilities from the model string +- **Extensive Provider Support**: Works with Anthropic, OpenAI, local models, and any liteLLM-compatible provider + +### Model String Examples: +- **Anthropic**: `"anthropic/claude-3-5-sonnet-20241022"` +- **OpenAI**: `"openai/computer-use-preview"` +- **UI-TARS**: `"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"` +- **Omni + Any LiteLLM**: `"omniparser+litellm/gpt-4o"`, `"omniparser+litellm/claude-3-haiku"`, `"omniparser+ollama_chat/gemma3"` + ### Get started with Agent ## Prerequisites @@ -65,10 +80,7 @@ You can then use the script in your MCP configuration like this: "command": "/bin/bash", "args": ["~/.cua/start_mcp_server.sh"], "env": { - "CUA_AGENT_LOOP": "OMNI", - "CUA_MODEL_PROVIDER": "ANTHROPIC", - "CUA_MODEL_NAME": "claude-3-7-sonnet-20250219", - "CUA_PROVIDER_API_KEY": "your-api-key" + "CUA_MODEL_NAME": "anthropic/claude-3-5-sonnet-20241022" } } } @@ -86,11 +98,7 @@ If you want to develop with the cua-mcp-server directly without installation, yo "command": "/bin/bash", "args": ["~/cua/libs/python/mcp-server/scripts/start_mcp_server.sh"], "env": { - "CUA_AGENT_LOOP": "UITARS", - "CUA_MODEL_PROVIDER": "OAICOMPAT", - "CUA_MODEL_NAME": "ByteDance-Seed/UI-TARS-1.5-7B", - "CUA_PROVIDER_BASE_URL": "https://****************.us-east-1.aws.endpoints.huggingface.cloud/v1", - "CUA_PROVIDER_API_KEY": "your-api-key" + "CUA_MODEL_NAME": "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B" } } } @@ -142,10 +150,7 @@ The server is configured using environment variables (can be set in the Claude D | Variable | Description | Default | |----------|-------------|---------| -| `CUA_AGENT_LOOP` | Agent loop to use (OPENAI, ANTHROPIC, UITARS, OMNI) | OMNI | -| `CUA_MODEL_PROVIDER` | Model provider (ANTHROPIC, OPENAI, OLLAMA, OAICOMPAT) | ANTHROPIC | -| `CUA_MODEL_NAME` | Model name to use | None (provider default) | -| `CUA_PROVIDER_BASE_URL` | Base URL for provider API | None | +| `CUA_MODEL_NAME` | Model string (e.g., "anthropic/claude-3-5-sonnet-20241022", "openai/computer-use-preview", "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", "omniparser+litellm/gpt-4o", "omniparser+ollama_chat/gemma3") | anthropic/claude-3-5-sonnet-20241022 | | `CUA_MAX_IMAGES` | Maximum number of images to keep in context | 3 | ## Available Tools diff --git a/libs/python/mcp-server/mcp_server/server.py b/libs/python/mcp-server/mcp_server/server.py index 03971cb6..73996d5e 100644 --- a/libs/python/mcp-server/mcp_server/server.py +++ b/libs/python/mcp-server/mcp_server/server.py @@ -3,6 +3,7 @@ import base64 import logging import os import sys +from tabnanny import verbose import traceback from typing import Any, Dict, List, Optional, Union, Tuple @@ -28,7 +29,7 @@ except ImportError as e: try: from computer import Computer - from agent import ComputerAgent, LLMProvider, LLM, AgentLoop + from agent import ComputerAgent logger.debug("Successfully imported Computer and Agent modules") except ImportError as e: @@ -92,49 +93,27 @@ def serve() -> FastMCP: global_computer = Computer(verbosity=logging.INFO) await global_computer.run() - # Determine which loop to use - loop_str = os.getenv("CUA_AGENT_LOOP", "OMNI") - loop = getattr(AgentLoop, loop_str) + # Get model name - this now determines the loop and provider + model_name = os.getenv("CUA_MODEL_NAME", "anthropic/claude-3-5-sonnet-20241022") + + logger.info(f"Using model: {model_name}") - # Determine provider - provider_str = os.getenv("CUA_MODEL_PROVIDER", "ANTHROPIC") - provider = getattr(LLMProvider, provider_str) - - # Get model name (if specified) - model_name = os.getenv("CUA_MODEL_NAME", None) - - # Get base URL for provider (if needed) - provider_base_url = os.getenv("CUA_PROVIDER_BASE_URL", None) - - # Get api key for provider (if needed) - api_key = os.getenv("CUA_PROVIDER_API_KEY", None) - - # Create agent with the specified configuration + # Create agent with the new v0.4.x API agent = ComputerAgent( - computer=global_computer, - loop=loop, - model=LLM( - provider=provider, - name=model_name, - provider_base_url=provider_base_url, - ), - api_key=api_key, - save_trajectory=False, + model=model_name, only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")), verbosity=logging.INFO, + tools=[global_computer] ) + # Create messages in the new v0.4.x format + messages = [{"role": "user", "content": task}] + # Collect all results full_result = "" - async for result in agent.run(task): - logger.info(f"Agent step complete: {result.get('id', 'unknown')}") - ctx.info(f"Agent step complete: {result.get('id', 'unknown')}") - - # Add response ID to output - full_result += f"\n[Response ID: {result.get('id', 'unknown')}]\n" - - if "content" in result: - full_result += f"Response: {result.get('content', '')}\n" + async for result in agent.run(messages): + logger.info(f"Agent processing step") + ctx.info(f"Agent processing step") # Process output if available outputs = result.get("output", []) @@ -145,25 +124,23 @@ def serve() -> FastMCP: content = output.get("content", []) for content_part in content: if content_part.get("text"): - full_result += f"\nMessage: {content_part.get('text', '')}\n" - elif output_type == "reasoning": - logger.debug(f"Reasoning: {output}") - - summary_content = output.get("summary", []) - if summary_content: - for summary_part in summary_content: - if summary_part.get("text"): - full_result += f"\nReasoning: {summary_part.get('text', '')}\n" + full_result += f"Message: {content_part.get('text', '')}\n" + elif output_type == "tool_use": + logger.debug(f"Tool use: {output}") + tool_name = output.get("name", "") + full_result += f"Tool: {tool_name}\n" + elif output_type == "tool_result": + logger.debug(f"Tool result: {output}") + result_content = output.get("content", "") + if isinstance(result_content, list): + for item in result_content: + if item.get("type") == "text": + full_result += f"Result: {item.get('text', '')}\n" else: - full_result += f"\nReasoning: {output.get('text', output.get('content', ''))}\n" - elif output_type == "computer_call": - logger.debug(f"Computer call: {output}") - action = output.get("action", "") - result_value = output.get("result", "") - full_result += f"\nComputer Action: {action}\nResult: {result_value}\n" + full_result += f"Result: {result_content}\n" # Add separator between steps - full_result += "\n" + "-" * 40 + "\n" + full_result += "\n" + "-" * 20 + "\n" logger.info(f"CUA task completed successfully") ctx.info(f"CUA task completed successfully") @@ -179,7 +156,21 @@ def serve() -> FastMCP: error_msg = f"Error running CUA task: {str(e)}\n{traceback.format_exc()}" logger.error(error_msg) ctx.error(error_msg) - return f"Error during task execution: {str(e)}" + # Return tuple with error message and a screenshot if possible + try: + if global_computer is not None: + screenshot = await global_computer.interface.screenshot() + return ( + f"Error during task execution: {str(e)}", + Image(format="png", data=screenshot) + ) + except: + pass + # If we can't get a screenshot, return a placeholder + return ( + f"Error during task execution: {str(e)}", + Image(format="png", data=b"") + ) @server.tool() async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> List: diff --git a/libs/python/mcp-server/pyproject.toml b/libs/python/mcp-server/pyproject.toml index ed2ad435..f80a1b6b 100644 --- a/libs/python/mcp-server/pyproject.toml +++ b/libs/python/mcp-server/pyproject.toml @@ -13,8 +13,8 @@ authors = [ ] dependencies = [ "mcp>=1.6.0,<2.0.0", - "cua-agent[all]>=0.3.0,<0.4.0", - "cua-computer>=0.3.0,<0.4.0", + "cua-agent[all]>=0.4.0,<0.5.0", + "cua-computer>=0.4.0,<0.5.0", ] [project.scripts] From 01371ed6cfadeab0488768fed7724298aa5b7cd7 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 5 Aug 2025 10:55:06 -0400 Subject: [PATCH 18/76] bump version, add localhost to ui --- libs/python/agent/agent/ui/gradio/app.py | 21 ++++++++++++------- .../agent/agent/ui/gradio/ui_components.py | 19 ++++++++++++++++- libs/python/agent/pyproject.toml | 2 +- libs/python/computer/pyproject.toml | 2 +- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/libs/python/agent/agent/ui/gradio/app.py b/libs/python/agent/agent/ui/gradio/app.py index 13c0786f..be04d931 100644 --- a/libs/python/agent/agent/ui/gradio/app.py +++ b/libs/python/agent/agent/ui/gradio/app.py @@ -178,13 +178,20 @@ def create_computer_instance( """Create or get the global Computer instance.""" global global_computer if global_computer is None: - global_computer = Computer( - verbosity=verbosity, - os_type=os_type, - provider_type=provider_type, - name=name if name else "", - api_key=api_key - ) + if provider_type == "localhost": + global_computer = Computer( + verbosity=verbosity, + os_type=os_type, + use_host_computer_server=True + ) + else: + global_computer = Computer( + verbosity=verbosity, + os_type=os_type, + provider_type=provider_type, + name=name if name else "", + api_key=api_key + ) return global_computer diff --git a/libs/python/agent/agent/ui/gradio/ui_components.py b/libs/python/agent/agent/ui/gradio/ui_components.py index dfcceb4e..c601fb6c 100644 --- a/libs/python/agent/agent/ui/gradio/ui_components.py +++ b/libs/python/agent/agent/ui/gradio/ui_components.py @@ -211,7 +211,7 @@ if __name__ == "__main__": is_windows = platform.system().lower() == "windows" is_mac = platform.system().lower() == "darwin" - providers = ["cloud"] + providers = ["cloud", "localhost"] if is_mac: providers += ["lume"] if is_windows: @@ -403,6 +403,23 @@ if __name__ == "__main__": type="password", ) + # Provider visibility update function + def update_provider_visibility(provider): + """Update visibility of container name and API key based on selected provider.""" + is_localhost = provider == "localhost" + return [ + gr.update(visible=not is_localhost), # container_name + gr.update(visible=not is_localhost and not has_cua_key) # cua_cloud_api_key + ] + + # Connect provider change event + computer_provider.change( + fn=update_provider_visibility, + inputs=[computer_provider], + outputs=[container_name, cua_cloud_api_key], + queue=False + ) + # Connect UI update events for dropdown in [agent_loop, omni_model_choice, uitars_model_choice, openai_model_choice, anthropic_model_choice]: dropdown.change( diff --git a/libs/python/agent/pyproject.toml b/libs/python/agent/pyproject.toml index be10f729..1280999b 100644 --- a/libs/python/agent/pyproject.toml +++ b/libs/python/agent/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "pydantic>=2.6.4", "rich>=13.7.1", "python-dotenv>=1.0.1", - "cua-computer>=0.3.0,<0.5.0", + "cua-computer>=0.4.0,<0.5.0", "cua-core>=0.1.8,<0.2.0", "certifi>=2024.2.2", "litellm>=1.74.8" diff --git a/libs/python/computer/pyproject.toml b/libs/python/computer/pyproject.toml index 2e564fa9..4a9b41bb 100644 --- a/libs/python/computer/pyproject.toml +++ b/libs/python/computer/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-computer" -version = "0.3.0" +version = "0.4.0" description = "Computer-Use Interface (CUI) framework powering Cua" readme = "README.md" authors = [ From 5e7b1e46b0d6df4825414b4c9595f2cdf93f4c3d Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 5 Aug 2025 11:23:08 -0400 Subject: [PATCH 19/76] fixed resolution & OS --- libs/python/agent/agent/loops/openai.py | 29 +++++++++++++++++++------ 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/libs/python/agent/agent/loops/openai.py b/libs/python/agent/agent/loops/openai.py index 793c3ce6..b3690019 100644 --- a/libs/python/agent/agent/loops/openai.py +++ b/libs/python/agent/agent/loops/openai.py @@ -13,24 +13,39 @@ from PIL import Image from ..decorators import register_agent from ..types import Messages, AgentResponse, Tools, AgentCapability -def _map_computer_tool_to_openai(computer_tool: Any) -> Dict[str, Any]: +async def _map_computer_tool_to_openai(computer_handler: Any) -> Dict[str, Any]: """Map a computer tool to OpenAI's computer-use-preview tool schema""" + # Get dimensions from the computer handler + try: + width, height = await computer_handler.get_dimensions() + except Exception: + # Fallback to default dimensions if method fails + width, height = 1024, 768 + + # Get environment from the computer handler + try: + environment = await computer_handler.get_environment() + except Exception: + # Fallback to default environment if method fails + environment = "linux" + return { "type": "computer_use_preview", - "display_width": getattr(computer_tool, 'display_width', 1024), - "display_height": getattr(computer_tool, 'display_height', 768), - "environment": getattr(computer_tool, 'environment', "linux") # mac, windows, linux, browser + "display_width": width, + "display_height": height, + "environment": environment # mac, windows, linux, browser } -def _prepare_tools_for_openai(tool_schemas: List[Dict[str, Any]]) -> Tools: +async def _prepare_tools_for_openai(tool_schemas: List[Dict[str, Any]]) -> Tools: """Prepare tools for OpenAI API format""" openai_tools = [] for schema in tool_schemas: if schema["type"] == "computer": # Map computer tool to OpenAI format - openai_tools.append(_map_computer_tool_to_openai(schema["computer"])) + computer_tool = await _map_computer_tool_to_openai(schema["computer"]) + openai_tools.append(computer_tool) elif schema["type"] == "function": # Function tools use OpenAI-compatible schema directly (liteLLM expects this format) # Schema should be: {type, name, description, parameters} @@ -84,7 +99,7 @@ class OpenAIComputerUseConfig: tools = tools or [] # Prepare tools for OpenAI API - openai_tools = _prepare_tools_for_openai(tools) + openai_tools = await _prepare_tools_for_openai(tools) # Prepare API call kwargs api_kwargs = { From dc0e47917081cbc54ec7e568a8c370a908a4433d Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 5 Aug 2025 11:25:29 -0400 Subject: [PATCH 20/76] changed comment --- libs/python/agent/agent/loops/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/python/agent/agent/loops/openai.py b/libs/python/agent/agent/loops/openai.py index b3690019..2979c4fb 100644 --- a/libs/python/agent/agent/loops/openai.py +++ b/libs/python/agent/agent/loops/openai.py @@ -157,7 +157,7 @@ class OpenAIComputerUseConfig: Returns: Tuple of (x, y) coordinates or None if prediction fails """ - # TODO: implement this correctly + # TODO: use computer tool to get dimensions + environment # Scale image to half size try: image_data = base64.b64decode(image_b64) From e1e6a7d6a721471e4110d633117b8791552dc5f5 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 5 Aug 2025 12:32:42 -0400 Subject: [PATCH 21/76] Added anthropic click mode --- libs/python/agent/agent/loops/anthropic.py | 119 +++++++++++++++++++-- libs/python/agent/agent/loops/openai.py | 26 +---- 2 files changed, 112 insertions(+), 33 deletions(-) diff --git a/libs/python/agent/agent/loops/anthropic.py b/libs/python/agent/agent/loops/anthropic.py index 8371352c..599d1dc4 100644 --- a/libs/python/agent/agent/loops/anthropic.py +++ b/libs/python/agent/agent/loops/anthropic.py @@ -65,21 +65,28 @@ def _get_tool_config_for_model(model: str) -> Dict[str, str]: "beta_flag": "computer-use-2024-10-22" } -def _map_computer_tool_to_anthropic(computer_tool: Any, tool_version: str) -> Dict[str, Any]: +async def _map_computer_tool_to_anthropic(computer_tool: Any, tool_version: str) -> Dict[str, Any]: """Map a computer tool to Anthropic's hosted tool schema.""" + # Get dimensions from the computer handler + try: + width, height = await computer_tool.get_dimensions() + except Exception: + # Fallback to default dimensions if method fails + width, height = 1024, 768 + return { "type": tool_version, "function": { "name": "computer", "parameters": { - "display_height_px": getattr(computer_tool, 'display_height', 768), - "display_width_px": getattr(computer_tool, 'display_width', 1024), - "display_number": getattr(computer_tool, 'display_number', 1), + "display_height_px": height, + "display_width_px": width, + "display_number": 1, }, }, } -def _prepare_tools_for_anthropic(tool_schemas: List[Dict[str, Any]], model: str) -> Tools: +async def _prepare_tools_for_anthropic(tool_schemas: List[Dict[str, Any]], model: str) -> Tools: """Prepare tools for Anthropic API format.""" tool_config = _get_tool_config_for_model(model) anthropic_tools = [] @@ -87,7 +94,7 @@ def _prepare_tools_for_anthropic(tool_schemas: List[Dict[str, Any]], model: str) for schema in tool_schemas: if schema["type"] == "computer": # Map computer tool to Anthropic format - anthropic_tools.append(_map_computer_tool_to_anthropic( + anthropic_tools.append(await _map_computer_tool_to_anthropic( schema["computer"], tool_config["tool_version"] )) @@ -1315,7 +1322,7 @@ class AnthropicHostedToolsConfig(AsyncAgentConfig): tool_config = _get_tool_config_for_model(model) # Prepare tools for Anthropic API - anthropic_tools = _prepare_tools_for_anthropic(tools, model) + anthropic_tools = await _prepare_tools_for_anthropic(tools, model) # Convert responses_items messages to completion format completion_messages = _convert_responses_items_to_completion_messages(messages) @@ -1375,10 +1382,102 @@ class AnthropicHostedToolsConfig(AsyncAgentConfig): image_b64: str, instruction: str, **kwargs - ) -> Optional[Tuple[float, float]]: - """Anthropic hosted tools does not support click prediction.""" + ) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates based on image and instruction. + + Uses Anthropic's computer use models with a custom prompt that instructs + the agent to only output clicks. + + Args: + model: Model name to use + image_b64: Base64 encoded image + instruction: Instruction for where to click + + Returns: + Tuple of (x, y) coordinates or None if prediction fails + """ + # Get image dimensions from base64 data + try: + import base64 + from PIL import Image + from io import BytesIO + + image_data = base64.b64decode(image_b64) + image = Image.open(BytesIO(image_data)) + display_width, display_height = image.size + except Exception: + # Fallback to default dimensions if image parsing fails + display_width, display_height = 1024, 768 + + # Get tool configuration for this model + tool_config = _get_tool_config_for_model(model) + + # Prepare computer tool for Anthropic format + computer_tool = { + "type": tool_config["tool_version"], + "function": { + "name": "computer", + "parameters": { + "display_height_px": display_height, + "display_width_px": display_width, + "display_number": 1, + }, + }, + } + + # Construct messages in OpenAI chat completion format for liteLLM + messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": f"You are a UI grounding expert. Look at the image and {instruction}. Output ONLY a click action on the target element. No explanations, confirmations, or additional text." + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{image_b64}" + } + } + ] + } + ] + + # Prepare API call kwargs + api_kwargs = { + "model": model, + "messages": messages, + "tools": [computer_tool], + "stream": False, + "max_tokens": 100, # Keep response short for click prediction + "headers": { + "anthropic-beta": tool_config["beta_flag"] + } + } + + # Use liteLLM acompletion + response = await litellm.acompletion(**api_kwargs) + + # Convert response to responses_items format to extract click coordinates + responses_items = _convert_completion_to_responses_items(response) + + # Look for computer_call with click action + for item in responses_items: + if (isinstance(item, dict) and + item.get("type") == "computer_call" and + isinstance(item.get("action"), dict)): + + action = item["action"] + if action.get("type") == "click": + x = action.get("x") + y = action.get("y") + if x is not None and y is not None: + return (int(x), int(y)) + return None def get_capabilities(self) -> List[AgentCapability]: """Return the capabilities supported by this agent.""" - return ["step"] + return ["click", "step"] diff --git a/libs/python/agent/agent/loops/openai.py b/libs/python/agent/agent/loops/openai.py index 2979c4fb..bb6a13a6 100644 --- a/libs/python/agent/agent/loops/openai.py +++ b/libs/python/agent/agent/loops/openai.py @@ -158,24 +158,6 @@ class OpenAIComputerUseConfig: Tuple of (x, y) coordinates or None if prediction fails """ # TODO: use computer tool to get dimensions + environment - # Scale image to half size - try: - image_data = base64.b64decode(image_b64) - image = Image.open(BytesIO(image_data)) - - # Scale to half size - new_width = image.width // 2 - new_height = image.height // 2 - scaled_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) - - # Convert back to base64 - buffer = BytesIO() - scaled_image.save(buffer, format='PNG') - image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8') - except Exception: - # If scaling fails, use original image - pass - # Manually construct input items with image and click instruction input_items = [ { @@ -207,7 +189,7 @@ class OpenAIComputerUseConfig: "type": "computer_use_preview", "display_width": display_width, "display_height": display_height, - "environment": "linux" + "environment": "windows" } # Prepare API call kwargs @@ -226,9 +208,7 @@ class OpenAIComputerUseConfig: # Extract click coordinates from response output output_dict = response.model_dump() - output_items = output_dict.get("output", []) - - # print(output_items) + output_items = output_dict.get("output", []) # Look for computer_call with click action for item in output_items: @@ -241,7 +221,7 @@ class OpenAIComputerUseConfig: x = action.get("x") y = action.get("y") if x is not None and y is not None: - return (int(x) * 2, int(y) * 2) + return (int(x), int(y)) return None From 3cc36905ff45f7d5fa385028dd4979eb20d1b163 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 5 Aug 2025 12:36:22 -0400 Subject: [PATCH 22/76] added missing abstract mthd --- libs/python/agent/agent/loops/gta1.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/libs/python/agent/agent/loops/gta1.py b/libs/python/agent/agent/loops/gta1.py index bf4da044..13678b48 100644 --- a/libs/python/agent/agent/loops/gta1.py +++ b/libs/python/agent/agent/loops/gta1.py @@ -74,6 +74,23 @@ class GTA1Config(AsyncAgentConfig): self.current_model = None self.last_screenshot_b64 = None + + async def predict_step( + self, + messages: List[Dict[str, Any]], + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, + **kwargs + ) -> Dict[str, Any]: + raise NotImplementedError() + async def predict_click( self, model: str, From 8eb662bf4dee862ddaec2a7f71fdb7ff55b203eb Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 5 Aug 2025 12:45:00 -0400 Subject: [PATCH 23/76] added base models to benchmark --- libs/python/agent/benchmarks/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libs/python/agent/benchmarks/utils.py b/libs/python/agent/benchmarks/utils.py index aa99184f..d7ef4445 100644 --- a/libs/python/agent/benchmarks/utils.py +++ b/libs/python/agent/benchmarks/utils.py @@ -3,6 +3,9 @@ Shared utilities for ScreenSpot-Pro benchmarking and interactive testing. """ +import dotenv +dotenv.load_dotenv() + import asyncio import base64 import os @@ -85,9 +88,12 @@ def get_available_models() -> List[Union[str, ModelProtocol]]: models = [ # === ComputerAgent model strings === + "openai/computer-use-preview", + "anthropic/claude-opus-4-20250514", # f"{local_provider}HelloKKMe/GTA1-7B", # f"{local_provider}HelloKKMe/GTA1-32B", - "openai/computer-use-preview+openai/gpt-4o-mini" + "openai/computer-use-preview+openai/gpt-4o-mini", + "anthropic/claude-opus-4-20250514+openai/gpt-4o-mini", # === Reference model classes === # GTA1Model("HelloKKMe/GTA1-7B"), From 74a25f2003ca6a6d42fdcfdc729a18e35e2bf860 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 5 Aug 2025 12:54:23 -0400 Subject: [PATCH 24/76] fixed docstring --- libs/python/agent/agent/loops/composed_grounded.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/python/agent/agent/loops/composed_grounded.py b/libs/python/agent/agent/loops/composed_grounded.py index 1371ff3f..cf029d13 100644 --- a/libs/python/agent/agent/loops/composed_grounded.py +++ b/libs/python/agent/agent/loops/composed_grounded.py @@ -121,7 +121,7 @@ class ComposedGroundedConfig: Composed-grounded agent configuration that uses both grounding and thinking models. The model parameter should be in format: "grounding_model+thinking_model" - e.g., "gpt-4o+claude-3-5-sonnet-20241022" + e.g., "huggingface-local/HelloKKMe/GTA1-7B+gemini/gemini-1.5-pro" """ def __init__(self): From 5168b6f0825bf6c843faee3de58736da141a00eb Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 5 Aug 2025 13:02:45 -0400 Subject: [PATCH 25/76] added docs for benchmarks and composed agents --- .../docs/agent-sdk/benchmarks/index.mdx | 28 +++++ .../docs/agent-sdk/benchmarks/interactive.mdx | 21 ++++ .../agent-sdk/benchmarks/introduction.mdx | 57 ++++++++++ .../docs/agent-sdk/benchmarks/meta.json | 8 ++ .../agent-sdk/benchmarks/screenspot-pro.mdx | 25 +++++ .../agent-sdk/benchmarks/screenspot-v2.mdx | 25 +++++ docs/content/docs/agent-sdk/meta.json | 3 +- .../docs/agent-sdk/supported-agents.mdx | 34 ------ .../supported-agents/composed-agents.mdx | 106 ++++++++++++++++++ .../supported-agents/computer-use-agents.mdx | 53 +++++++++ .../supported-agents/grounding-models.mdx | 69 ++++++++++++ .../docs/agent-sdk/supported-agents/meta.json | 9 ++ 12 files changed, 403 insertions(+), 35 deletions(-) create mode 100644 docs/content/docs/agent-sdk/benchmarks/index.mdx create mode 100644 docs/content/docs/agent-sdk/benchmarks/interactive.mdx create mode 100644 docs/content/docs/agent-sdk/benchmarks/introduction.mdx create mode 100644 docs/content/docs/agent-sdk/benchmarks/meta.json create mode 100644 docs/content/docs/agent-sdk/benchmarks/screenspot-pro.mdx create mode 100644 docs/content/docs/agent-sdk/benchmarks/screenspot-v2.mdx delete mode 100644 docs/content/docs/agent-sdk/supported-agents.mdx create mode 100644 docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx create mode 100644 docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx create mode 100644 docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx create mode 100644 docs/content/docs/agent-sdk/supported-agents/meta.json diff --git a/docs/content/docs/agent-sdk/benchmarks/index.mdx b/docs/content/docs/agent-sdk/benchmarks/index.mdx new file mode 100644 index 00000000..59e9b7ad --- /dev/null +++ b/docs/content/docs/agent-sdk/benchmarks/index.mdx @@ -0,0 +1,28 @@ +--- +title: Benchmarks +description: Computer Agent SDK benchmarks for agentic GUI tasks +--- + +The benchmark system evaluates models on GUI grounding tasks, specifically agent loop success rate and click prediction accuracy. It supports both: +- **Computer Agent SDK providers** (using model strings like `"huggingface-local/HelloKKMe/GTA1-7B"`) +- **Reference agent implementations** (custom model classes implementing the `ModelProtocol`) + +## Available Benchmarks + +- **[ScreenSpot-v2](./screenspot-v2)** - Standard resolution GUI grounding +- **[ScreenSpot-Pro](./screenspot-pro)** - High-resolution GUI grounding +- **[Interactive Testing](./interactive)** - Real-time testing and visualization + +## Quick Start + +```bash +# Clone the benchmark repository +git clone https://github.com/trycua/cua +cd libs/python/agent/benchmarks + +# Install dependencies +pip install "cua-agent[all]" + +# Run a benchmark +python ss-v2.py +``` diff --git a/docs/content/docs/agent-sdk/benchmarks/interactive.mdx b/docs/content/docs/agent-sdk/benchmarks/interactive.mdx new file mode 100644 index 00000000..43170ca4 --- /dev/null +++ b/docs/content/docs/agent-sdk/benchmarks/interactive.mdx @@ -0,0 +1,21 @@ +--- +title: Interactive Tool +description: Real-time testing and visualization tool for GUI grounding models +--- + +This tool allows you to test multiple models interactively by providing natural language instructions. It automatically captures screenshots and tests all configured models sequentially, providing immediate feedback and visual results. + +## Usage + +```bash +# Start the interactive tool +cd libs/python/agent/benchmarks +python interactive.py +``` + +## Commands + +- **Type instruction**: Screenshot + test all models +- **`screenshot`**: Take screenshot without prediction +- **`models`**: List available models +- **`quit`/`exit`**: Exit the tool diff --git a/docs/content/docs/agent-sdk/benchmarks/introduction.mdx b/docs/content/docs/agent-sdk/benchmarks/introduction.mdx new file mode 100644 index 00000000..3f2251f8 --- /dev/null +++ b/docs/content/docs/agent-sdk/benchmarks/introduction.mdx @@ -0,0 +1,57 @@ +--- +title: Introduction +description: Overview of benchmarking in the c/ua agent framework +--- + +The c/ua agent framework uses benchmarks to test the performance of supported models and providers at various agentic tasks. + +## Benchmark Types + +Computer-Agent benchmarks evaluate two key capabilities: +- **Plan Generation**: Breaking down complex tasks into a sequence of actions +- **Coordinate Generation**: Predicting precise click locations on GUI elements + +## Using State-of-the-Art Models + +Let's see how to use the SOTA vision-language models in the c/ua agent framework. + +### Plan Generation + Coordinate Generation + +**[OS-World](https://os-world.github.io/)** - Benchmark for complete computer-use agents + +This leaderboard tests models that can understand instructions and automatically perform the full sequence of actions needed to complete tasks. + +```python +# UI-TARS-1.5 is a SOTA unified plan generation + coordinate generation VLM +# This makes it suitable for agentic loops for computer-use +agent = ComputerAgent("huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", tools=[computer]) +agent.run("Open Firefox and go to github.com") +# Success! 🎉 +``` + +### Coordinate Generation Only + +**[GUI Agent Grounding Leaderboard](https://gui-agent.github.io/grounding-leaderboard/)** - Benchmark for click prediction accuracy + +This leaderboard tests models that specialize in finding exactly where to click on screen elements, but needs to be told what specific action to take. + +```python +# GTA1-7B is a SOTA coordinate generation VLM +# It can only generate coordinates, not plan: +agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B", tools=[computer]) +agent.predict_click("find the button to open the settings") # (27, 450) +# This will raise an error: +# agent.run("Open Firefox and go to github.com") +``` + +### Composed Agent + +The c/ua agent framework also supports composed agents, which combine a planning model with a clicking model for the best of both worlds. Any liteLLM model can be used as the plan generation model. + +```python +# It can be paired with any LLM to form a composed agent: +# "gemini/gemini-1.5-pro" will be used as the plan generation LLM +agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B+gemini/gemini-1.5-pro", tools=[computer]) +agent.run("Open Firefox and go to github.com") +# Success! 🎉 +``` diff --git a/docs/content/docs/agent-sdk/benchmarks/meta.json b/docs/content/docs/agent-sdk/benchmarks/meta.json new file mode 100644 index 00000000..aa49a156 --- /dev/null +++ b/docs/content/docs/agent-sdk/benchmarks/meta.json @@ -0,0 +1,8 @@ +{ + "pages": [ + "introduction", + "screenspot-v2", + "screenspot-pro", + "interactive" + ] +} \ No newline at end of file diff --git a/docs/content/docs/agent-sdk/benchmarks/screenspot-pro.mdx b/docs/content/docs/agent-sdk/benchmarks/screenspot-pro.mdx new file mode 100644 index 00000000..402b919e --- /dev/null +++ b/docs/content/docs/agent-sdk/benchmarks/screenspot-pro.mdx @@ -0,0 +1,25 @@ +--- +title: ScreenSpot-Pro +description: High-resolution GUI grounding benchmark +--- + +ScreenSpot-Pro is a benchmark for evaluating click prediction accuracy on high-resolution GUI screenshots with complex layouts. + +## Usage + +```bash +# Run the benchmark +cd libs/python/agent/benchmarks +python ss-pro.py + +# Run with custom sample limit +python ss-pro.py --samples 50 +``` + +## Results + +| Model | Accuracy | Failure Rate | Samples | +|-------|----------|--------------|---------| +| Coming Soon | - | - | - | + +Results will be populated after running benchmarks with various models. diff --git a/docs/content/docs/agent-sdk/benchmarks/screenspot-v2.mdx b/docs/content/docs/agent-sdk/benchmarks/screenspot-v2.mdx new file mode 100644 index 00000000..6cfcf1c1 --- /dev/null +++ b/docs/content/docs/agent-sdk/benchmarks/screenspot-v2.mdx @@ -0,0 +1,25 @@ +--- +title: ScreenSpot-v2 +description: Standard resolution GUI grounding benchmark +--- + +ScreenSpot-v2 is a benchmark for evaluating click prediction accuracy on standard resolution GUI screenshots. + +## Usage + +```bash +# Run the benchmark +cd libs/python/agent/benchmarks +python ss-v2.py + +# Run with custom sample limit +python ss-v2.py --samples 100 +``` + +## Results + +| Model | Accuracy | Failure Rate | Samples | +|-------|----------|--------------|---------| +| Coming Soon | - | - | - | + +Results will be populated after running benchmarks with various models. diff --git a/docs/content/docs/agent-sdk/meta.json b/docs/content/docs/agent-sdk/meta.json index 933452cb..fadc5a12 100644 --- a/docs/content/docs/agent-sdk/meta.json +++ b/docs/content/docs/agent-sdk/meta.json @@ -3,13 +3,14 @@ "description": "Build computer-using agents with the Agent SDK", "pages": [ "agent-loops", - "supported-agents", + "supported-agents", "chat-history", "callbacks", "sandboxed-tools", "local-models", "prompt-caching", "usage-tracking", + "benchmarks", "migration-guide" ] } diff --git a/docs/content/docs/agent-sdk/supported-agents.mdx b/docs/content/docs/agent-sdk/supported-agents.mdx deleted file mode 100644 index 61abf521..00000000 --- a/docs/content/docs/agent-sdk/supported-agents.mdx +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: Supported Agents ---- - -This page lists all supported agent loops and their compatible models/configurations in cua. - -All agent loops are compatible with any LLM provider supported by LiteLLM. - -See [Running Models Locally](./local-models) for how to use Hugging Face and MLX models on your own machine. - -## Anthropic CUAs - -- Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514` -- Claude 3.7: `claude-3-7-sonnet-20250219` -- Claude 3.5: `claude-3-5-sonnet-20240620` - -## OpenAI CUA Preview - -- Computer-use-preview: `computer-use-preview` - -## UI-TARS 1.5 - -- `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` -- `huggingface/ByteDance-Seed/UI-TARS-1.5-7B` (requires TGI endpoint) - -## Omniparser + LLMs - -- `omniparser+vertex_ai/gemini-pro` -- `omniparser+openai/gpt-4o` -- Any LiteLLM-compatible model combined with Omniparser - ---- - -For details on agent loop behavior and usage, see [Agent Loops](./agent-loops). diff --git a/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx b/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx new file mode 100644 index 00000000..50160fd8 --- /dev/null +++ b/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx @@ -0,0 +1,106 @@ +--- +title: Composed Agents +description: Combine grounding models with any LLM for computer-use capabilities +--- + +Composed agents combine the best of both worlds: specialized grounding models for precise click prediction and powerful LLMs for task planning and reasoning. + +Use the format `"grounding_model+thinking_model"` to create a composed agent with any vision-enabled LiteLLM-compatible model. + +## How Composed Agents Work + +1. **Planning Phase**: The thinking model (LLM) analyzes the task and decides what actions to take (e.g., `click("find the login button")`, `type("username")`) +2. **Grounding Phase**: The grounding model converts element descriptions to precise coordinates +3. **Execution**: Actions are performed using the predicted coordinates + +## Supported Grounding Models + +Any model that supports `predict_click()` can be used as the grounding component: + +- `omniparser` (OSS set-of-marks model) +- `huggingface-local/HelloKKMe/GTA1-7B` (OSS grounding model) +- `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` (OSS unified model) +- `claude-3-5-sonnet-20241022` (Anthropic CUA) +- `openai/computer-use-preview` (OpenAI CUA) + +## Supported Thinking Models + +Any vision-enabled LiteLLM-compatible model can be used as the thinking component: + +- **Anthropic**: `anthropic/claude-3-5-sonnet-20241022`, `anthropic/claude-3-opus-20240229` +- **OpenAI**: `openai/gpt-4o`, `openai/gpt-4-vision-preview` +- **Google**: `gemini/gemini-1.5-pro`, `vertex_ai/gemini-pro-vision` +- **Local models**: Any Hugging Face vision-language model + +## Usage Examples + +### GTA1 + Claude 3.5 Sonnet + +Combine state-of-the-art grounding with powerful reasoning: + +```python +agent = ComputerAgent( + "huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-3-5-sonnet-20241022", + tools=[computer] +) + +async for _ in agent.run("Open Firefox, navigate to github.com, and search for 'computer-use'"): + pass +# Success! 🎉 +# - Claude 3.5 Sonnet plans the sequence of actions +# - GTA1-7B provides precise click coordinates for each UI element +``` + +### GTA1 + Gemini Pro + +Use Google's Gemini for planning with specialized grounding: + +```python +agent = ComputerAgent( + "huggingface-local/HelloKKMe/GTA1-7B+gemini/gemini-1.5-pro", + tools=[computer] +) + +async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"): + pass +``` + +### UI-TARS + GPT-4o + +Combine two different vision models for enhanced capabilities: + +```python +agent = ComputerAgent( + "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B+openai/gpt-4o", + tools=[computer] +) + +async for _ in agent.run("Help me fill out this form with my personal information"): + pass +``` + +## Benefits of Composed Agents + +- **Specialized Grounding**: Use models optimized for click prediction accuracy +- **Flexible Planning**: Choose any LLM for task reasoning and planning +- **Cost Optimization**: Use smaller grounding models with larger planning models only when needed +- **Performance**: Leverage the strengths of different model architectures + +## Capabilities + +Composed agents support both capabilities: + +```python +agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-3-5-sonnet-20241022") + +# Full computer-use agent capabilities +async for _ in agent.run("Complete this online form"): + pass + +# Direct click prediction (uses grounding model only) +coords = agent.predict_click("find the submit button") +``` + +--- + +For more information on individual model capabilities, see [Computer-Use Agents](./computer-use-agents) and [Grounding Models](./grounding-models). diff --git a/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx b/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx new file mode 100644 index 00000000..e22e63cc --- /dev/null +++ b/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx @@ -0,0 +1,53 @@ +--- +title: Computer-Use Models +description: Models that support full computer-use agent capabilities with ComputerAgent.run() +--- + +These models support complete computer-use agent functionality through `ComputerAgent.run()`. They can understand natural language instructions and autonomously perform sequences of actions to complete tasks. + +All agent loops are compatible with any LLM provider supported by LiteLLM. + +See [Running Models Locally](../local-models) for how to use Hugging Face and MLX models on your own machine. + +## Anthropic CUAs + +Claude models with computer-use capabilities: + +- Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514` +- Claude 3.7: `claude-3-7-sonnet-20250219` +- Claude 3.5: `claude-3-5-sonnet-20240620` + +```python +agent = ComputerAgent("claude-3-5-sonnet-20241022", tools=[computer]) +async for _ in agent.run("Open Firefox and navigate to github.com"): + pass +``` + +## OpenAI CUA Preview + +OpenAI's computer-use preview model: + +- Computer-use-preview: `computer-use-preview` + +```python +agent = ComputerAgent("openai/computer-use-preview", tools=[computer]) +async for _ in agent.run("Take a screenshot and describe what you see"): + pass +``` + +## UI-TARS 1.5 + +Unified vision-language model for computer-use: + +- `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` +- `huggingface/ByteDance-Seed/UI-TARS-1.5-7B` (requires TGI endpoint) + +```python +agent = ComputerAgent("huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", tools=[computer]) +async for _ in agent.run("Open the settings menu and change the theme to dark mode"): + pass +``` + +--- + +For details on agent loop behavior and usage, see [Agent Loops](../agent-loops). diff --git a/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx b/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx new file mode 100644 index 00000000..14ff9c1e --- /dev/null +++ b/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx @@ -0,0 +1,69 @@ +--- +title: Grounding Models +description: Models that support click prediction with ComputerAgent.predict_click() +--- + +These models specialize in UI element grounding and click prediction. They can identify precise coordinates for UI elements based on natural language descriptions, but cannot perform autonomous task planning. + +Use `ComputerAgent.predict_click()` to get coordinates for specific UI elements. + +## All Computer-Use Agents + +All models that support `ComputerAgent.run()` also support `ComputerAgent.predict_click()`: + +### Anthropic CUAs +- Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514` +- Claude 3.7: `claude-3-7-sonnet-20250219` +- Claude 3.5: `claude-3-5-sonnet-20240620` + +### OpenAI CUA Preview +- Computer-use-preview: `computer-use-preview` + +### UI-TARS 1.5 +- `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` +- `huggingface/ByteDance-Seed/UI-TARS-1.5-7B` (requires TGI endpoint) + +## Specialized Grounding Models + +These models are optimized specifically for click prediction and UI element grounding: + +### GTA1-7B + +State-of-the-art grounding model from the [GUI Agent Grounding Leaderboard](https://gui-agent.github.io/grounding-leaderboard/): + +- `huggingface-local/HelloKKMe/GTA1-7B` + +```python +agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B", tools=[computer]) + +# Predict click coordinates for UI elements +coords = agent.predict_click("find the submit button") +print(f"Click coordinates: {coords}") # (450, 320) + +# Note: GTA1 cannot perform autonomous task planning +# This will raise an error: +# agent.run("Fill out the form and submit it") +``` + +## Usage Examples + +```python +# Using any grounding model for click prediction +agent = ComputerAgent("claude-3-5-sonnet-20241022", tools=[computer]) + +# Take a screenshot first +screenshot = agent.computer.screenshot() + +# Predict coordinates for specific elements +login_coords = agent.predict_click("find the login button") +search_coords = agent.predict_click("locate the search text field") +menu_coords = agent.predict_click("find the hamburger menu icon") + +print(f"Login button: {login_coords}") +print(f"Search field: {search_coords}") +print(f"Menu icon: {menu_coords}") +``` + +--- + +For information on combining grounding models with planning capabilities, see [Composed Agents](./composed-agents). diff --git a/docs/content/docs/agent-sdk/supported-agents/meta.json b/docs/content/docs/agent-sdk/supported-agents/meta.json new file mode 100644 index 00000000..092fd051 --- /dev/null +++ b/docs/content/docs/agent-sdk/supported-agents/meta.json @@ -0,0 +1,9 @@ +{ + "title": "Supported Agents", + "description": "Models and configurations supported by the Agent SDK", + "pages": [ + "computer-use-agents", + "grounding-models", + "composed-agents" + ] +} From a6a60b9fe04d92a355ae786e82cba485840bbdec Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 5 Aug 2025 13:24:29 -0400 Subject: [PATCH 26/76] added claude 4.1 to docs --- .../docs/agent-sdk/supported-agents/computer-use-agents.mdx | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx b/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx index e22e63cc..55b868b6 100644 --- a/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx +++ b/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx @@ -13,6 +13,7 @@ See [Running Models Locally](../local-models) for how to use Hugging Face and ML Claude models with computer-use capabilities: +- Claude 4.1: `claude-opus-4-1-20250805` - Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514` - Claude 3.7: `claude-3-7-sonnet-20250219` - Claude 3.5: `claude-3-5-sonnet-20240620` From 3631caa88358ee1e6ec3b1acf0fd35a6254f7f62 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Wed, 6 Aug 2025 10:27:59 -0400 Subject: [PATCH 27/76] Changed keypress to cast strings to lists --- libs/python/agent/agent/computer_handler.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libs/python/agent/agent/computer_handler.py b/libs/python/agent/agent/computer_handler.py index 1cf3c4d2..7425f59e 100644 --- a/libs/python/agent/agent/computer_handler.py +++ b/libs/python/agent/agent/computer_handler.py @@ -3,7 +3,7 @@ Computer handler implementation for OpenAI computer-use-preview protocol. """ import base64 -from typing import Dict, List, Any, Literal +from typing import Dict, List, Any, Literal, Union from .types import Computer @@ -61,8 +61,10 @@ class OpenAIComputerHandler: """Move cursor to coordinates.""" await self.interface.move_cursor(x, y) - async def keypress(self, keys: List[str]) -> None: + async def keypress(self, keys: Union[List[str], str]) -> None: """Press key combination.""" + if isinstance(keys, str): + keys = [keys] if len(keys) == 1: await self.interface.press_key(keys[0]) else: From 8e249174cdf4ebe9e2a99801a689f506647c1267 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Wed, 6 Aug 2025 10:53:51 -0400 Subject: [PATCH 28/76] Moved hotkey splitting into keypress handler --- libs/python/agent/agent/computer_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/python/agent/agent/computer_handler.py b/libs/python/agent/agent/computer_handler.py index 7425f59e..fb42295d 100644 --- a/libs/python/agent/agent/computer_handler.py +++ b/libs/python/agent/agent/computer_handler.py @@ -64,7 +64,7 @@ class OpenAIComputerHandler: async def keypress(self, keys: Union[List[str], str]) -> None: """Press key combination.""" if isinstance(keys, str): - keys = [keys] + keys = keys.replace("-", "+").split("+") if len(keys) == 1: await self.interface.press_key(keys[0]) else: From 760faf1b555e049773d7f54259b13ddd214120c3 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Wed, 6 Aug 2025 11:46:18 -0400 Subject: [PATCH 29/76] Added Claude 4.1 to docs --- .../docs/agent-sdk/supported-agents/grounding-models.mdx | 2 ++ docs/content/docs/quickstart-cli.mdx | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx b/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx index 14ff9c1e..bf13d5a0 100644 --- a/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx +++ b/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx @@ -12,6 +12,8 @@ Use `ComputerAgent.predict_click()` to get coordinates for specific UI elements. All models that support `ComputerAgent.run()` also support `ComputerAgent.predict_click()`: ### Anthropic CUAs + +- Claude 4.1: `claude-opus-4-1-20250805` - Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514` - Claude 3.7: `claude-3-7-sonnet-20250219` - Claude 3.5: `claude-3-5-sonnet-20240620` diff --git a/docs/content/docs/quickstart-cli.mdx b/docs/content/docs/quickstart-cli.mdx index 84aa80ae..ac11c726 100644 --- a/docs/content/docs/quickstart-cli.mdx +++ b/docs/content/docs/quickstart-cli.mdx @@ -169,18 +169,20 @@ python -m agent.cli openai/computer-use-preview ```bash -uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-3-5-sonnet-20241022 uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-20250514 +uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-opus-4-1-20250805 uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-sonnet-4-20250514 +uv run --with "cua-agent[cli]" -m agent.cli anthropic/claude-3-5-sonnet-20241022 ``` ```bash -python -m agent.cli anthropic/claude-3-5-sonnet-20241022 +python -m agent.cli anthropic/claude-opus-4-1-20250805 python -m agent.cli anthropic/claude-opus-4-20250514 python -m agent.cli anthropic/claude-sonnet-4-20250514 +python -m agent.cli anthropic/claude-3-5-sonnet-20241022 ``` From 4eccf059e506b400be354ed39f070904bc253cda Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Wed, 6 Aug 2025 11:54:36 -0400 Subject: [PATCH 30/76] Added omniparser to grounding page --- .../supported-agents/grounding-models.mdx | 54 ++++++++++++------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx b/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx index bf13d5a0..61c9a70b 100644 --- a/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx +++ b/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx @@ -29,12 +29,48 @@ All models that support `ComputerAgent.run()` also support `ComputerAgent.predic These models are optimized specifically for click prediction and UI element grounding: +### OmniParser + +OCR-focused set-of-marks model that requires an LLM for click prediction: + +- `omniparser` (requires combination with any LiteLLM vision model) + ### GTA1-7B State-of-the-art grounding model from the [GUI Agent Grounding Leaderboard](https://gui-agent.github.io/grounding-leaderboard/): - `huggingface-local/HelloKKMe/GTA1-7B` +## Usage Examples + +```python +# Using any grounding model for click prediction +agent = ComputerAgent("claude-3-5-sonnet-20241022", tools=[computer]) + +# Predict coordinates for specific elements +login_coords = agent.predict_click("find the login button") +search_coords = agent.predict_click("locate the search text field") +menu_coords = agent.predict_click("find the hamburger menu icon") + +print(f"Login button: {login_coords}") +print(f"Search field: {search_coords}") +print(f"Menu icon: {menu_coords}") +``` + +```python +# OmniParser is just for OCR, so it requires an LLM for predict_click +agent = ComputerAgent("omniparser+anthropic/claude-3-5-sonnet-20241022", tools=[computer]) + +# Predict click coordinates using composed agent +coords = agent.predict_click("find the submit button") +print(f"Click coordinates: {coords}") # (450, 320) + +# Note: Cannot use omniparser alone for click prediction +# This will raise an error: +# agent = ComputerAgent("omniparser", tools=[computer]) +# coords = agent.predict_click("find button") # Error! +``` + ```python agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B", tools=[computer]) @@ -47,24 +83,6 @@ print(f"Click coordinates: {coords}") # (450, 320) # agent.run("Fill out the form and submit it") ``` -## Usage Examples - -```python -# Using any grounding model for click prediction -agent = ComputerAgent("claude-3-5-sonnet-20241022", tools=[computer]) - -# Take a screenshot first -screenshot = agent.computer.screenshot() - -# Predict coordinates for specific elements -login_coords = agent.predict_click("find the login button") -search_coords = agent.predict_click("locate the search text field") -menu_coords = agent.predict_click("find the hamburger menu icon") - -print(f"Login button: {login_coords}") -print(f"Search field: {search_coords}") -print(f"Menu icon: {menu_coords}") -``` --- From 0056ae44f56d7b0a5bcca02f37a6a38b2cf08809 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Wed, 6 Aug 2025 14:47:29 -0400 Subject: [PATCH 31/76] Disabled pyautogui FAILSAFE --- libs/python/computer-server/computer_server/handlers/linux.py | 1 + libs/python/computer-server/computer_server/handlers/macos.py | 1 + libs/python/computer-server/computer_server/handlers/windows.py | 1 + 3 files changed, 3 insertions(+) diff --git a/libs/python/computer-server/computer_server/handlers/linux.py b/libs/python/computer-server/computer_server/handlers/linux.py index 5429b1a2..34a63de5 100644 --- a/libs/python/computer-server/computer_server/handlers/linux.py +++ b/libs/python/computer-server/computer_server/handlers/linux.py @@ -23,6 +23,7 @@ logger = logging.getLogger(__name__) # This allows the server to run in headless environments try: import pyautogui + pyautogui.FAILSAFE = False logger.info("pyautogui successfully imported, GUI automation available") except Exception as e: diff --git a/libs/python/computer-server/computer_server/handlers/macos.py b/libs/python/computer-server/computer_server/handlers/macos.py index 0cba0ca3..ded73408 100644 --- a/libs/python/computer-server/computer_server/handlers/macos.py +++ b/libs/python/computer-server/computer_server/handlers/macos.py @@ -1,4 +1,5 @@ import pyautogui +pyautogui.FAILSAFE = False from pynput.mouse import Button, Controller as MouseController from pynput.keyboard import Key, Controller as KeyboardController import time diff --git a/libs/python/computer-server/computer_server/handlers/windows.py b/libs/python/computer-server/computer_server/handlers/windows.py index 485aff4a..2d91ce53 100644 --- a/libs/python/computer-server/computer_server/handlers/windows.py +++ b/libs/python/computer-server/computer_server/handlers/windows.py @@ -18,6 +18,7 @@ logger = logging.getLogger(__name__) # Try to import pyautogui try: import pyautogui + pyautogui.FAILSAFE = False logger.info("pyautogui successfully imported, GUI automation available") except Exception as e: logger.error(f"pyautogui import failed: {str(e)}. GUI operations will not work.") From 1b406b197a3905197c6d29acc0c3d1dfa33e2980 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Thu, 7 Aug 2025 16:39:40 -0400 Subject: [PATCH 32/76] Updated anthropic loop to include function_call and function_call_output, along with passing tool exceptions to the agent --- libs/python/agent/agent/cli.py | 8 +- libs/python/agent/agent/computer_handler.py | 12 +- libs/python/agent/agent/loops/anthropic.py | 1085 ++++++++++--------- libs/python/agent/agent/responses.py | 45 + 4 files changed, 641 insertions(+), 509 deletions(-) diff --git a/libs/python/agent/agent/cli.py b/libs/python/agent/agent/cli.py index 215c791c..4d17ca15 100644 --- a/libs/python/agent/agent/cli.py +++ b/libs/python/agent/agent/cli.py @@ -94,14 +94,14 @@ def print_action(action_type: str, details: Dict[str, Any], total_cost: float): # Format action details args_str = "" if action_type == "click" and "x" in details and "y" in details: - args_str = f"({details['x']}, {details['y']})" + args_str = f"_{details['button']}({details['x']}, {details['y']})" elif action_type == "type" and "text" in details: text = details["text"] if len(text) > 50: text = text[:47] + "..." - args_str = f'"{text}"' - elif action_type == "key" and "key" in details: - args_str = f"'{details['key']}'" + args_str = f'("{text}")' + elif action_type == "key" and "text" in details: + args_str = f"('{details['text']}')" elif action_type == "scroll" and "x" in details and "y" in details: args_str = f"({details['x']}, {details['y']})" diff --git a/libs/python/agent/agent/computer_handler.py b/libs/python/agent/agent/computer_handler.py index fb42295d..ae8a02e2 100644 --- a/libs/python/agent/agent/computer_handler.py +++ b/libs/python/agent/agent/computer_handler.py @@ -14,11 +14,13 @@ class OpenAIComputerHandler: """Initialize with a computer interface (from tool schema).""" self.interface = computer_interface + # ==== Computer-Use-Preview Action Space ==== + async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: """Get the current environment type.""" # For now, return a default - this could be enhanced to detect actual environment return "windows" - + async def get_dimensions(self) -> tuple[int, int]: """Get screen dimensions as (width, height).""" screen_size = await self.interface.get_screen_size() @@ -94,6 +96,14 @@ class OpenAIComputerHandler: # For now, return empty string return "" + # ==== Anthropic Computer Action Space ==== + async def left_mouse_down(self, x: int, y: int) -> None: + """Left mouse down at coordinates.""" + await self.interface.mouse_down(x, y, button="left") + + async def left_mouse_up(self, x: int, y: int) -> None: + """Left mouse up at coordinates.""" + await self.interface.mouse_up(x, y, button="left") def acknowledge_safety_check_callback(message: str, allow_always: bool = False) -> bool: """Safety check callback for user acknowledgment.""" diff --git a/libs/python/agent/agent/loops/anthropic.py b/libs/python/agent/agent/loops/anthropic.py index 599d1dc4..8dcc5733 100644 --- a/libs/python/agent/agent/loops/anthropic.py +++ b/libs/python/agent/agent/loops/anthropic.py @@ -23,7 +23,10 @@ from ..responses import ( make_type_item, make_wait_item, make_input_image_item, - make_screenshot_item + make_screenshot_item, + make_failed_tool_call_items, + make_left_mouse_down_item, + make_left_mouse_up_item ) # Model version mapping to tool version and beta flag @@ -115,7 +118,8 @@ async def _prepare_tools_for_anthropic(tool_schemas: List[Dict[str, Any]], model def _convert_responses_items_to_completion_messages(messages: Messages) -> List[Dict[str, Any]]: """Convert responses_items message format to liteLLM completion format.""" completion_messages = [] - + call_id_to_fn_name = {} + for message in messages: msg_type = message.get("type") role = message.get("role") @@ -193,6 +197,43 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ "content": reasoning_text }) + elif msg_type == "function_call": + fn_name = message.get("name") + fn_args = message.get("arguments", "{}") + call_id = message.get("call_id", "call_1") + call_id_to_fn_name[call_id] = fn_name + openai_tool_calls = [{ + "id": call_id, + "type": "function", + "function": { + "name": fn_name, + "arguments": fn_args + } + }] # If the last completion message is an assistant message, extend the tool_calls + if completion_messages and completion_messages[-1].get("role") == "assistant": + if "tool_calls" not in completion_messages[-1]: + completion_messages[-1]["tool_calls"] = [] + completion_messages[-1]["tool_calls"].extend(openai_tool_calls) + else: + # Create new assistant message with tool calls + completion_messages.append({ + "role": "assistant", + "content": None, + "tool_calls": openai_tool_calls + }) + + elif msg_type == "function_call_output": + call_id = message.get("call_id", "call_1") + fn_output = message.get("output", "") + fn_name = call_id_to_fn_name.get(call_id, "computer") + + completion_messages.append({ + "role": "function", + "name": fn_name, + "tool_call_id": call_id, + "content": str(fn_output) + }) + elif msg_type == "computer_call": # Computer call becomes tool use in assistant message action = message.get("action", {}) @@ -611,45 +652,350 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # Action reference: # https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/computer-use-tool#available-actions + try: + # Basic actions (all versions) + if action_type == "screenshot": + responses_items.append(make_screenshot_item(call_id=call_id)) + elif action_type in ["click", "left_click"]: + coordinate = tool_input.get("coordinate", [0, 0]) + responses_items.append(make_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id + )) + elif action_type in ["type", "type_text"]: + responses_items.append(make_type_item( + text=tool_input.get("text", ""), + call_id=call_id + )) + elif action_type in ["key", "keypress", "hotkey"]: + responses_items.append(make_keypress_item( + keys=tool_input.get("text", "").replace("+", "-").split("-"), + call_id=call_id + )) + elif action_type in ["mouse_move", "move_cursor", "move"]: + # Mouse move - create a custom action item + coordinate = tool_input.get("coordinate", [0, 0]) + responses_items.append( + make_move_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id + ) + ) + + # Enhanced actions (computer_20250124) Available in Claude 4 and Claude Sonnet 3.7 + elif action_type == "scroll": + coordinate = tool_input.get("coordinate", [0, 0]) + scroll_amount = tool_input.get("scroll_amount", 3) + scroll_x = scroll_amount if tool_input.get("scroll_direction", "down") == "right" else \ + -scroll_amount if tool_input.get("scroll_direction", "down") == "left" else 0 + scroll_y = scroll_amount if tool_input.get("scroll_direction", "down") == "down" else \ + -scroll_amount if tool_input.get("scroll_direction", "down") == "up" else 0 + responses_items.append(make_scroll_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + scroll_x=scroll_x, + scroll_y=scroll_y, + call_id=call_id + )) + elif action_type in ["left_click_drag", "drag"]: + start_coord = tool_input.get("start_coordinate", [0, 0]) + end_coord = tool_input.get("end_coordinate", [0, 0]) + responses_items.append(make_drag_item( + path=[ + { + "x": start_coord[0] if len(start_coord) > 0 else 0, + "y": start_coord[1] if len(start_coord) > 1 else 0 + }, + { + "x": end_coord[0] if len(end_coord) > 0 else 0, + "y": end_coord[1] if len(end_coord) > 1 else 0 + } + ], + call_id=call_id + )) + elif action_type == "right_click": + coordinate = tool_input.get("coordinate", [0, 0]) + responses_items.append(make_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + button="right", + call_id=call_id + )) + elif action_type == "middle_click": + coordinate = tool_input.get("coordinate", [0, 0]) + responses_items.append(make_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + button="wheel", + call_id=call_id + )) + elif action_type == "double_click": + coordinate = tool_input.get("coordinate", [0, 0]) + responses_items.append(make_double_click_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id + )) + elif action_type == "triple_click": + # coordinate = tool_input.get("coordinate", [0, 0]) + # responses_items.append({ + # "type": "computer_call", + # "call_id": call_id, + # "action": { + # "type": "triple_click", + # "x": coordinate[0] if len(coordinate) > 0 else 0, + # "y": coordinate[1] if len(coordinate) > 1 else 0 + # } + # }) + raise NotImplementedError("triple_click") + elif action_type == "left_mouse_down": + # coordinate = tool_input.get("coordinate", [0, 0]) + # responses_items.append({ + # "type": "computer_call", + # "call_id": call_id, + # "action": { + # "type": "mouse_down", + # "button": "left", + # "x": coordinate[0] if len(coordinate) > 0 else 0, + # "y": coordinate[1] if len(coordinate) > 1 else 0 + # } + # }) + coordinate = tool_input.get("coordinate", [0, 0]) + responses_items.append(make_left_mouse_down_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id + )) + elif action_type == "left_mouse_up": + # coordinate = tool_input.get("coordinate", [0, 0]) + # responses_items.append({ + # "type": "computer_call", + # "call_id": call_id, + # "action": { + # "type": "mouse_up", + # "button": "left", + # "x": coordinate[0] if len(coordinate) > 0 else 0, + # "y": coordinate[1] if len(coordinate) > 1 else 0 + # } + # }) + coordinate = tool_input.get("coordinate", [0, 0]) + responses_items.append(make_left_mouse_up_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id + )) + elif action_type == "hold_key": + # responses_items.append({ + # "type": "computer_call", + # "call_id": call_id, + # "action": { + # "type": "key_hold", + # "key": tool_input.get("key", "") + # } + # }) + raise NotImplementedError("hold_key") + elif action_type == "wait": + responses_items.append(make_wait_item( + call_id=call_id + )) + else: + raise ValueError(f"Unknown action type: {action_type}") + except Exception as e: + responses_items.extend(make_failed_tool_call_items( + tool_name="computer", + tool_kwargs=tool_input, + error_message=repr(e), + call_id=call_id + )) + + # Handle tool calls (alternative format) + if hasattr(message, 'tool_calls') and message.tool_calls: + for tool_call in message.tool_calls: + if tool_call.function.name == "computer": + try: + try: + args = json.loads(tool_call.function.arguments) + action_type = args.get("action") + call_id = tool_call.id + # Basic actions (all versions) if action_type == "screenshot": - responses_items.append(make_screenshot_item(call_id=call_id)) + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "screenshot" + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { + # "type": "computer_call", + # "call_id": "call_1", + # "action": { + # "type": "screenshot" + # } + # } + responses_items.append(make_screenshot_item( + call_id=call_id + )) elif action_type in ["click", "left_click"]: - coordinate = tool_input.get("coordinate", [0, 0]) + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "click", + # "coordinate": [100, 200] + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { + # "type": "computer_call", + # "call_id": "call_1", + # "action": { + # "type": "click", + # "x": 100, + # "y": 200 + # } + # } + coordinate = args.get("coordinate", [0, 0]) responses_items.append(make_click_item( x=coordinate[0] if len(coordinate) > 0 else 0, y=coordinate[1] if len(coordinate) > 1 else 0, call_id=call_id )) elif action_type in ["type", "type_text"]: + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "type", + # "text": "Hello World" + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { + # "type": "computer_call", + # "call_id": "call_1", + # "action": { + # "type": "type", + # "text": "Hello World" + # } + # } responses_items.append(make_type_item( - text=tool_input.get("text", ""), + text=args.get("text", ""), call_id=call_id )) elif action_type in ["key", "keypress", "hotkey"]: + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "key", + # "text": "ctrl+c" + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { + # "type": "computer_call", + # "call_id": "call_1", + # "action": { + # "type": "keypress", + # "keys": ["ctrl", "c"] + # } + # } responses_items.append(make_keypress_item( - keys=tool_input.get("text", "").replace("+", "-").split("-"), + keys=args.get("text", "").replace("+", "-").split("-"), call_id=call_id )) elif action_type in ["mouse_move", "move_cursor", "move"]: - # Mouse move - create a custom action item - coordinate = tool_input.get("coordinate", [0, 0]) - responses_items.append( - make_move_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - call_id=call_id - ) - ) + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "mouse_move", + # "coordinate": [150, 250] + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { + # "type": "computer_call", + # "call_id": "call_1", + # "action": { + # "type": "mouse_move", + # "x": 150, + # "y": 250 + # } + # } + coordinate = args.get("coordinate", [0, 0]) + responses_items.append(make_move_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id + )) # Enhanced actions (computer_20250124) Available in Claude 4 and Claude Sonnet 3.7 elif action_type == "scroll": - coordinate = tool_input.get("coordinate", [0, 0]) - scroll_amount = tool_input.get("scroll_amount", 3) - scroll_x = scroll_amount if tool_input.get("scroll_direction", "down") == "right" else \ - -scroll_amount if tool_input.get("scroll_direction", "down") == "left" else 0 - scroll_y = scroll_amount if tool_input.get("scroll_direction", "down") == "down" else \ - -scroll_amount if tool_input.get("scroll_direction", "down") == "up" else 0 + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "scroll", + # "coordinate": [300, 400], + # "scroll_direction": "down", + # "scroll_amount": 5 + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { + # "type": "computer_call", + # "call_id": "call_1", + # "action": { + # "type": "scroll", + # "x": 300, + # "y": 400, + # "scroll_x": 0, + # "scroll_y": -5 + # } + # } + coordinate = args.get("coordinate", [0, 0]) + direction = args.get("scroll_direction", "down") + amount = args.get("scroll_amount", 3) + scroll_x = amount if direction == "left" else \ + -amount if direction == "right" else 0 + scroll_y = amount if direction == "up" else \ + -amount if direction == "down" else 0 responses_items.append(make_scroll_item( x=coordinate[0] if len(coordinate) > 0 else 0, y=coordinate[1] if len(coordinate) > 1 else 0, @@ -658,8 +1004,34 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] call_id=call_id )) elif action_type in ["left_click_drag", "drag"]: - start_coord = tool_input.get("start_coordinate", [0, 0]) - end_coord = tool_input.get("end_coordinate", [0, 0]) + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "left_click_drag", + # "start_coordinate": [100, 150], + # "end_coordinate": [200, 250] + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { + # "type": "computer_call", + # "call_id": "call_1", + # "action": { + # "type": "drag", + # "path": [ + # {"x": 100, "y": 150}, + # {"x": 200, "y": 250} + # ] + # } + # } + start_coord = args.get("start_coordinate", [0, 0]) + end_coord = args.get("end_coordinate", [0, 0]) responses_items.append(make_drag_item( path=[ { @@ -674,7 +1046,31 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] call_id=call_id )) elif action_type == "right_click": - coordinate = tool_input.get("coordinate", [0, 0]) + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "right_click", + # "coordinate": [120, 180] + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { + # "type": "computer_call", + # "call_id": "call_1", + # "action": { + # "type": "click", + # "x": 120, + # "y": 180, + # "button": "right" + # } + # } + coordinate = args.get("coordinate", [0, 0]) responses_items.append(make_click_item( x=coordinate[0] if len(coordinate) > 0 else 0, y=coordinate[1] if len(coordinate) > 1 else 0, @@ -682,7 +1078,31 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] call_id=call_id )) elif action_type == "middle_click": - coordinate = tool_input.get("coordinate", [0, 0]) + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "middle_click", + # "coordinate": [140, 220] + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { + # "type": "computer_call", + # "call_id": "call_1", + # "action": { + # "type": "click", + # "x": 140, + # "y": 220, + # "button": "wheel" + # } + # } + coordinate = args.get("coordinate", [0, 0]) responses_items.append(make_click_item( x=coordinate[0] if len(coordinate) > 0 else 0, y=coordinate[1] if len(coordinate) > 1 else 0, @@ -690,518 +1110,175 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] call_id=call_id )) elif action_type == "double_click": - coordinate = tool_input.get("coordinate", [0, 0]) + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "double_click", + # "coordinate": [160, 240] + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { + # "type": "computer_call", + # "call_id": "call_1", + # "action": { + # "type": "double_click", + # "x": 160, + # "y": 240 + # } + # } + coordinate = args.get("coordinate", [0, 0]) responses_items.append(make_double_click_item( x=coordinate[0] if len(coordinate) > 0 else 0, y=coordinate[1] if len(coordinate) > 1 else 0, call_id=call_id )) elif action_type == "triple_click": - # coordinate = tool_input.get("coordinate", [0, 0]) - # responses_items.append({ + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "triple_click", + # "coordinate": [180, 260] + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { # "type": "computer_call", - # "call_id": call_id, + # "call_id": "call_1", # "action": { # "type": "triple_click", - # "x": coordinate[0] if len(coordinate) > 0 else 0, - # "y": coordinate[1] if len(coordinate) > 1 else 0 + # "x": 180, + # "y": 260 # } - # }) + # } raise NotImplementedError("triple_click") elif action_type == "left_mouse_down": - # coordinate = tool_input.get("coordinate", [0, 0]) - # responses_items.append({ + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "left_mouse_down", + # "coordinate": [200, 280] + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { # "type": "computer_call", - # "call_id": call_id, + # "call_id": "call_1", # "action": { # "type": "mouse_down", # "button": "left", - # "x": coordinate[0] if len(coordinate) > 0 else 0, - # "y": coordinate[1] if len(coordinate) > 1 else 0 + # "x": 200, + # "y": 280 # } - # }) - raise NotImplementedError("left_mouse_down") + # } + coordinate = args.get("coordinate", [0, 0]) + responses_items.append(make_left_mouse_down_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id + )) elif action_type == "left_mouse_up": - # coordinate = tool_input.get("coordinate", [0, 0]) - # responses_items.append({ + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "left_mouse_up", + # "coordinate": [220, 300] + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { # "type": "computer_call", - # "call_id": call_id, + # "call_id": "call_1", # "action": { # "type": "mouse_up", # "button": "left", - # "x": coordinate[0] if len(coordinate) > 0 else 0, - # "y": coordinate[1] if len(coordinate) > 1 else 0 + # "x": 220, + # "y": 300 # } - # }) - raise NotImplementedError("left_mouse_up") + # } + coordinate = args.get("coordinate", [0, 0]) + responses_items.append(make_left_mouse_up_item( + x=coordinate[0] if len(coordinate) > 0 else 0, + y=coordinate[1] if len(coordinate) > 1 else 0, + call_id=call_id + )) elif action_type == "hold_key": - # responses_items.append({ + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "hold_key", + # "key": "shift" + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { # "type": "computer_call", - # "call_id": call_id, + # "call_id": "call_1", # "action": { # "type": "key_hold", - # "key": tool_input.get("key", "") + # "key": "shift" # } - # }) + # } raise NotImplementedError("hold_key") elif action_type == "wait": + # Input: + # { + # "function": { + # "name": "computer", + # "arguments": json.dumps({ + # "action": "wait" + # }) + # }, + # "id": "call_1", + # "type": "function" + # } + + # Output: + # { + # "type": "computer_call", + # "call_id": "call_1", + # "action": { + # "type": "wait" + # } + # } responses_items.append(make_wait_item( call_id=call_id )) - else: - raise ValueError(f"Unknown action type: {action_type}") - - # Handle tool calls (alternative format) - if hasattr(message, 'tool_calls') and message.tool_calls: - for tool_call in message.tool_calls: - if tool_call.function.name == "computer": - try: - args = json.loads(tool_call.function.arguments) - action_type = args.get("action") - call_id = tool_call.id - - # Basic actions (all versions) - if action_type == "screenshot": - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "screenshot" - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "screenshot" - # } - # } - responses_items.append(make_screenshot_item( - call_id=call_id - )) - elif action_type in ["click", "left_click"]: - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "click", - # "coordinate": [100, 200] - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "click", - # "x": 100, - # "y": 200 - # } - # } - coordinate = args.get("coordinate", [0, 0]) - responses_items.append(make_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - call_id=call_id - )) - elif action_type in ["type", "type_text"]: - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "type", - # "text": "Hello World" - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "type", - # "text": "Hello World" - # } - # } - responses_items.append(make_type_item( - text=args.get("text", ""), - call_id=call_id - )) - elif action_type in ["key", "keypress", "hotkey"]: - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "key", - # "text": "ctrl+c" - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "keypress", - # "keys": ["ctrl", "c"] - # } - # } - responses_items.append(make_keypress_item( - keys=args.get("text", "").replace("+", "-").split("-"), - call_id=call_id - )) - elif action_type in ["mouse_move", "move_cursor", "move"]: - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "mouse_move", - # "coordinate": [150, 250] - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "mouse_move", - # "x": 150, - # "y": 250 - # } - # } - coordinate = args.get("coordinate", [0, 0]) - responses_items.append(make_move_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - call_id=call_id - )) - - # Enhanced actions (computer_20250124) Available in Claude 4 and Claude Sonnet 3.7 - elif action_type == "scroll": - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "scroll", - # "coordinate": [300, 400], - # "scroll_direction": "down", - # "scroll_amount": 5 - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "scroll", - # "x": 300, - # "y": 400, - # "scroll_x": 0, - # "scroll_y": -5 - # } - # } - coordinate = args.get("coordinate", [0, 0]) - direction = args.get("scroll_direction", "down") - amount = args.get("scroll_amount", 3) - scroll_x = amount if direction == "left" else \ - -amount if direction == "right" else 0 - scroll_y = amount if direction == "up" else \ - -amount if direction == "down" else 0 - responses_items.append(make_scroll_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - scroll_x=scroll_x, - scroll_y=scroll_y, - call_id=call_id - )) - elif action_type in ["left_click_drag", "drag"]: - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "left_click_drag", - # "start_coordinate": [100, 150], - # "end_coordinate": [200, 250] - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "drag", - # "path": [ - # {"x": 100, "y": 150}, - # {"x": 200, "y": 250} - # ] - # } - # } - start_coord = args.get("start_coordinate", [0, 0]) - end_coord = args.get("end_coordinate", [0, 0]) - responses_items.append(make_drag_item( - path=[ - { - "x": start_coord[0] if len(start_coord) > 0 else 0, - "y": start_coord[1] if len(start_coord) > 1 else 0 - }, - { - "x": end_coord[0] if len(end_coord) > 0 else 0, - "y": end_coord[1] if len(end_coord) > 1 else 0 - } - ], - call_id=call_id - )) - elif action_type == "right_click": - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "right_click", - # "coordinate": [120, 180] - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "click", - # "x": 120, - # "y": 180, - # "button": "right" - # } - # } - coordinate = args.get("coordinate", [0, 0]) - responses_items.append(make_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - button="right", - call_id=call_id - )) - elif action_type == "middle_click": - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "middle_click", - # "coordinate": [140, 220] - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "click", - # "x": 140, - # "y": 220, - # "button": "wheel" - # } - # } - coordinate = args.get("coordinate", [0, 0]) - responses_items.append(make_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - button="wheel", - call_id=call_id - )) - elif action_type == "double_click": - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "double_click", - # "coordinate": [160, 240] - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "double_click", - # "x": 160, - # "y": 240 - # } - # } - coordinate = args.get("coordinate", [0, 0]) - responses_items.append(make_double_click_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, - call_id=call_id - )) - elif action_type == "triple_click": - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "triple_click", - # "coordinate": [180, 260] - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "triple_click", - # "x": 180, - # "y": 260 - # } - # } - raise NotImplementedError("triple_click") - elif action_type == "left_mouse_down": - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "left_mouse_down", - # "coordinate": [200, 280] - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "mouse_down", - # "button": "left", - # "x": 200, - # "y": 280 - # } - # } - raise NotImplementedError("left_mouse_down") - elif action_type == "left_mouse_up": - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "left_mouse_up", - # "coordinate": [220, 300] - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "mouse_up", - # "button": "left", - # "x": 220, - # "y": 300 - # } - # } - raise NotImplementedError("left_mouse_up") - elif action_type == "hold_key": - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "hold_key", - # "key": "shift" - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "key_hold", - # "key": "shift" - # } - # } - raise NotImplementedError("hold_key") - elif action_type == "wait": - # Input: - # { - # "function": { - # "name": "computer", - # "arguments": json.dumps({ - # "action": "wait" - # }) - # }, - # "id": "call_1", - # "type": "function" - # } - - # Output: - # { - # "type": "computer_call", - # "call_id": "call_1", - # "action": { - # "type": "wait" - # } - # } - responses_items.append(make_wait_item( + except Exception as e: + responses_items.extend(make_failed_tool_call_items( + tool_name="computer", + tool_kwargs=args, + error_message=repr(e), call_id=call_id )) except json.JSONDecodeError: diff --git a/libs/python/agent/agent/responses.py b/libs/python/agent/agent/responses.py index 52993750..925eca3b 100644 --- a/libs/python/agent/agent/responses.py +++ b/libs/python/agent/agent/responses.py @@ -206,6 +206,51 @@ def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallPar type="computer_call" ) +# Extra anthropic computer calls +def make_left_mouse_down_item(x: int, y: int, call_id: Optional[str] = None) -> Dict[str, Any]: + return { + "id": random_id(), + "call_id": call_id if call_id else random_id(), + "action": { + "type": "left_mouse_down", + "x": x, + "y": y + }, + "pending_safety_checks": [], + "status": "completed", + "type": "computer_call" + } + +def make_left_mouse_up_item(x: int, y: int, call_id: Optional[str] = None) -> Dict[str, Any]: + return { + "id": random_id(), + "call_id": call_id if call_id else random_id(), + "action": { + "type": "left_mouse_up", + "x": x, + "y": y + }, + "pending_safety_checks": [], + "status": "completed", + "type": "computer_call" + } + +def make_failed_tool_call_items(tool_name: str, tool_kwargs: Dict[str, Any], error_message: str, call_id: Optional[str] = None) -> List[Dict[str, Any]]: + call_id = call_id if call_id else random_id() + return [ + { + "type": "function_call", + "id": random_id(), + "call_id": call_id, + "name": tool_name, + "arguments": json.dumps(tool_kwargs), + }, + { + "type": "function_call_output", + "call_id": call_id, + "output": json.dumps({"error": error_message}), + } + ] # Conversion functions between element descriptions and coordinates def convert_computer_calls_desc2xy(responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple]) -> List[Dict[str, Any]]: From 4b0b07240ccb4657342f0a171c39265f63adbeae Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Thu, 7 Aug 2025 16:47:34 -0400 Subject: [PATCH 33/76] Made coordinates optional for left_mouse_up/down --- libs/python/agent/agent/computer_handler.py | 6 +-- libs/python/agent/agent/loops/anthropic.py | 44 +++++++++++++++------ libs/python/agent/agent/responses.py | 4 +- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/libs/python/agent/agent/computer_handler.py b/libs/python/agent/agent/computer_handler.py index ae8a02e2..53de49ed 100644 --- a/libs/python/agent/agent/computer_handler.py +++ b/libs/python/agent/agent/computer_handler.py @@ -3,7 +3,7 @@ Computer handler implementation for OpenAI computer-use-preview protocol. """ import base64 -from typing import Dict, List, Any, Literal, Union +from typing import Dict, List, Any, Literal, Union, Optional from .types import Computer @@ -97,11 +97,11 @@ class OpenAIComputerHandler: return "" # ==== Anthropic Computer Action Space ==== - async def left_mouse_down(self, x: int, y: int) -> None: + async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse down at coordinates.""" await self.interface.mouse_down(x, y, button="left") - async def left_mouse_up(self, x: int, y: int) -> None: + async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse up at coordinates.""" await self.interface.mouse_up(x, y, button="left") diff --git a/libs/python/agent/agent/loops/anthropic.py b/libs/python/agent/agent/loops/anthropic.py index 8dcc5733..50fbd24e 100644 --- a/libs/python/agent/agent/loops/anthropic.py +++ b/libs/python/agent/agent/loops/anthropic.py @@ -568,6 +568,26 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[ "action": "screenshot" } }) + elif action_type == "left_mouse_down": + tool_use_content.append({ + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": { + "action": "left_mouse_down", + "coordinate": [action.get("x", None), action.get("y", None)] + } + }) + elif action_type == "left_mouse_up": + tool_use_content.append({ + "type": "tool_use", + "id": call_id, + "name": "computer", + "input": { + "action": "left_mouse_up", + "coordinate": [action.get("x", None), action.get("y", None)] + } + }) # Convert tool_use_content to OpenAI tool_calls format openai_tool_calls = [] @@ -762,10 +782,10 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "y": coordinate[1] if len(coordinate) > 1 else 0 # } # }) - coordinate = tool_input.get("coordinate", [0, 0]) + coordinate = tool_input.get("coordinate", [None, None]) responses_items.append(make_left_mouse_down_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, + x=coordinate[0] if len(coordinate) > 0 else None, + y=coordinate[1] if len(coordinate) > 1 else None, call_id=call_id )) elif action_type == "left_mouse_up": @@ -780,10 +800,10 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "y": coordinate[1] if len(coordinate) > 1 else 0 # } # }) - coordinate = tool_input.get("coordinate", [0, 0]) + coordinate = tool_input.get("coordinate", [None, None]) responses_items.append(make_left_mouse_up_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, + x=coordinate[0] if len(coordinate) > 0 else None, + y=coordinate[1] if len(coordinate) > 1 else None, call_id=call_id )) elif action_type == "hold_key": @@ -1189,10 +1209,10 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "y": 280 # } # } - coordinate = args.get("coordinate", [0, 0]) + coordinate = args.get("coordinate", [None, None]) responses_items.append(make_left_mouse_down_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, + x=coordinate[0] if len(coordinate) > 0 else None, + y=coordinate[1] if len(coordinate) > 1 else None, call_id=call_id )) elif action_type == "left_mouse_up": @@ -1220,10 +1240,10 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any] # "y": 300 # } # } - coordinate = args.get("coordinate", [0, 0]) + coordinate = args.get("coordinate", [None, None]) responses_items.append(make_left_mouse_up_item( - x=coordinate[0] if len(coordinate) > 0 else 0, - y=coordinate[1] if len(coordinate) > 1 else 0, + x=coordinate[0] if len(coordinate) > 0 else None, + y=coordinate[1] if len(coordinate) > 1 else None, call_id=call_id )) elif action_type == "hold_key": diff --git a/libs/python/agent/agent/responses.py b/libs/python/agent/agent/responses.py index 925eca3b..fb034a70 100644 --- a/libs/python/agent/agent/responses.py +++ b/libs/python/agent/agent/responses.py @@ -207,7 +207,7 @@ def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallPar ) # Extra anthropic computer calls -def make_left_mouse_down_item(x: int, y: int, call_id: Optional[str] = None) -> Dict[str, Any]: +def make_left_mouse_down_item(x: Optional[int] = None, y: Optional[int] = None, call_id: Optional[str] = None) -> Dict[str, Any]: return { "id": random_id(), "call_id": call_id if call_id else random_id(), @@ -221,7 +221,7 @@ def make_left_mouse_down_item(x: int, y: int, call_id: Optional[str] = None) -> "type": "computer_call" } -def make_left_mouse_up_item(x: int, y: int, call_id: Optional[str] = None) -> Dict[str, Any]: +def make_left_mouse_up_item(x: Optional[int] = None, y: Optional[int] = None, call_id: Optional[str] = None) -> Dict[str, Any]: return { "id": random_id(), "call_id": call_id if call_id else random_id(), From d47ef3f1b66233c6e2781f486af5c048cf2b714e Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 10:22:10 -0400 Subject: [PATCH 34/76] Added protocol for custom computer handlers --- libs/python/agent/agent/agent.py | 26 ++++--- libs/python/agent/agent/computers/__init__.py | 30 ++++++++ libs/python/agent/agent/computers/base.py | 68 +++++++++++++++++++ .../{computer_handler.py => computers/cua.py} | 10 +-- libs/python/agent/agent/types.py | 52 -------------- 5 files changed, 119 insertions(+), 67 deletions(-) create mode 100644 libs/python/agent/agent/computers/__init__.py create mode 100644 libs/python/agent/agent/computers/base.py rename libs/python/agent/agent/{computer_handler.py => computers/cua.py} (95%) diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py index 79a4b9a6..48bd7b54 100644 --- a/libs/python/agent/agent/agent.py +++ b/libs/python/agent/agent/agent.py @@ -7,9 +7,8 @@ from typing import Dict, List, Any, Optional, AsyncGenerator, Union, cast, Calla from litellm.responses.utils import Usage -from .types import Messages, Computer, AgentCapability +from .types import Messages, AgentCapability from .decorators import find_agent_config -from .computer_handler import OpenAIComputerHandler, acknowledge_safety_check_callback, check_blocklisted_url import json import litellm import litellm.utils @@ -22,9 +21,13 @@ from .callbacks import ( BudgetManagerCallback, TelemetryCallback, ) +from .computers import ( + ComputerHandler, + make_computer_handler +) def get_json(obj: Any, max_depth: int = 10) -> Any: - def custom_serializer(o: Any, depth: int = 0, seen: Set[int] = None) -> Any: + def custom_serializer(o: Any, depth: int = 0, seen: Optional[Set[int]] = None) -> Any: if seen is None: seen = set() @@ -247,7 +250,7 @@ class ComputerAgent: computer_handler = None for schema in self.tool_schemas: if schema["type"] == "computer": - computer_handler = OpenAIComputerHandler(schema["computer"].interface) + computer_handler = make_computer_handler(schema["computer"]) break self.computer_handler = computer_handler @@ -398,7 +401,7 @@ class ComputerAgent: # AGENT OUTPUT PROCESSING # ============================================================================ - async def _handle_item(self, item: Any, computer: Optional[Computer] = None, ignore_call_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]: + async def _handle_item(self, item: Any, computer: Optional[ComputerHandler] = None, ignore_call_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]: """Handle each item; may cause a computer action + screenshot.""" if ignore_call_ids and item.get("call_id") and item.get("call_id") in ignore_call_ids: return [] @@ -450,10 +453,12 @@ class ComputerAgent: acknowledged_checks = [] for check in pending_checks: check_message = check.get("message", str(check)) - if acknowledge_safety_check_callback(check_message, allow_always=True): # TODO: implement a callback for safety checks - acknowledged_checks.append(check) - else: - raise ValueError(f"Safety check failed: {check_message}") + acknowledged_checks.append(check) + # TODO: implement a callback for safety checks + # if acknowledge_safety_check_callback(check_message, allow_always=True): + # acknowledged_checks.append(check) + # else: + # raise ValueError(f"Safety check failed: {check_message}") # Create call output call_output = { @@ -470,7 +475,8 @@ class ComputerAgent: if await computer.get_environment() == "browser": current_url = await computer.get_current_url() call_output["output"]["current_url"] = current_url - check_blocklisted_url(current_url) + # TODO: implement a callback for URL safety checks + # check_blocklisted_url(current_url) result = [call_output] await self._on_computer_call_end(item, result) diff --git a/libs/python/agent/agent/computers/__init__.py b/libs/python/agent/agent/computers/__init__.py new file mode 100644 index 00000000..e2c4a07a --- /dev/null +++ b/libs/python/agent/agent/computers/__init__.py @@ -0,0 +1,30 @@ +""" +Computer handler factory and interface definitions. + +This module provides a factory function to create computer handlers from different +computer interface types, supporting both the ComputerHandler protocol and the +Computer library interface. +""" + +from .base import ComputerHandler +from .cua import cuaComputerHandler +from computer import Computer + +def make_computer_handler(computer): + """ + Create a computer handler from a computer interface. + + Args: + computer: Either a ComputerHandler instance or a Computer instance + + Returns: + ComputerHandler: A computer handler instance + + Raises: + ValueError: If the computer type is not supported + """ + if isinstance(computer, ComputerHandler): + return computer + if isinstance(computer, Computer): + return cuaComputerHandler(computer) + raise ValueError(f"Unsupported computer type: {type(computer)}") \ No newline at end of file diff --git a/libs/python/agent/agent/computers/base.py b/libs/python/agent/agent/computers/base.py new file mode 100644 index 00000000..161d9fb8 --- /dev/null +++ b/libs/python/agent/agent/computers/base.py @@ -0,0 +1,68 @@ +""" +Base computer interface protocol for agent interactions. +""" + +from typing import Protocol, Literal, List, Dict, Any, Union, Optional, runtime_checkable + + +@runtime_checkable +class ComputerHandler(Protocol): + """Protocol defining the interface for computer interactions.""" + + # ==== Computer-Use-Preview Action Space ==== + + async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: + """Get the current environment type.""" + ... + + async def get_dimensions(self) -> tuple[int, int]: + """Get screen dimensions as (width, height).""" + ... + + async def screenshot(self) -> str: + """Take a screenshot and return as base64 string.""" + ... + + async def click(self, x: int, y: int, button: str = "left") -> None: + """Click at coordinates with specified button.""" + ... + + async def double_click(self, x: int, y: int) -> None: + """Double click at coordinates.""" + ... + + async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + """Scroll at coordinates with specified scroll amounts.""" + ... + + async def type(self, text: str) -> None: + """Type text.""" + ... + + async def wait(self, ms: int = 1000) -> None: + """Wait for specified milliseconds.""" + ... + + async def move(self, x: int, y: int) -> None: + """Move cursor to coordinates.""" + ... + + async def keypress(self, keys: Union[List[str], str]) -> None: + """Press key combination.""" + ... + + async def drag(self, path: List[Dict[str, int]]) -> None: + """Drag along specified path.""" + ... + + async def get_current_url(self) -> str: + """Get current URL (for browser environments).""" + ... + + async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: + """Left mouse down at coordinates.""" + ... + + async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: + """Left mouse up at coordinates.""" + ... diff --git a/libs/python/agent/agent/computer_handler.py b/libs/python/agent/agent/computers/cua.py similarity index 95% rename from libs/python/agent/agent/computer_handler.py rename to libs/python/agent/agent/computers/cua.py index 53de49ed..30663116 100644 --- a/libs/python/agent/agent/computer_handler.py +++ b/libs/python/agent/agent/computers/cua.py @@ -4,15 +4,15 @@ Computer handler implementation for OpenAI computer-use-preview protocol. import base64 from typing import Dict, List, Any, Literal, Union, Optional -from .types import Computer +from .base import ComputerHandler +from computer import Computer - -class OpenAIComputerHandler: +class cuaComputerHandler(ComputerHandler): """Computer handler that implements the Computer protocol using the computer interface.""" - def __init__(self, computer_interface): + def __init__(self, cua_computer: Computer): """Initialize with a computer interface (from tool schema).""" - self.interface = computer_interface + self.interface = cua_computer.interface # ==== Computer-Use-Preview Action Space ==== diff --git a/libs/python/agent/agent/types.py b/libs/python/agent/agent/types.py index 881e1c20..c56a9e5c 100644 --- a/libs/python/agent/agent/types.py +++ b/libs/python/agent/agent/types.py @@ -27,55 +27,3 @@ class AgentConfigInfo(BaseModel): def matches_model(self, model: str) -> bool: """Check if this agent config matches the given model""" return bool(re.match(self.models_regex, model)) - -# Computer tool interface -class Computer(Protocol): - """Protocol defining the interface for computer interactions.""" - - async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: - """Get the current environment type.""" - ... - - async def get_dimensions(self) -> tuple[int, int]: - """Get screen dimensions as (width, height).""" - ... - - async def screenshot(self) -> str: - """Take a screenshot and return as base64 string.""" - ... - - async def click(self, x: int, y: int, button: str = "left") -> None: - """Click at coordinates with specified button.""" - ... - - async def double_click(self, x: int, y: int) -> None: - """Double click at coordinates.""" - ... - - async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: - """Scroll at coordinates with specified scroll amounts.""" - ... - - async def type(self, text: str) -> None: - """Type text.""" - ... - - async def wait(self, ms: int = 1000) -> None: - """Wait for specified milliseconds.""" - ... - - async def move(self, x: int, y: int) -> None: - """Move cursor to coordinates.""" - ... - - async def keypress(self, keys: List[str]) -> None: - """Press key combination.""" - ... - - async def drag(self, path: List[Dict[str, int]]) -> None: - """Drag along specified path.""" - ... - - async def get_current_url(self) -> str: - """Get current URL (for browser environments).""" - ... From 73cd489ac355c9488fb72cf5912bcaf51bc7358b Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 10:53:26 -0400 Subject: [PATCH 35/76] Added dict-based custom computers --- libs/python/agent/agent/agent.py | 9 +- libs/python/agent/agent/computers/__init__.py | 21 +- libs/python/agent/agent/computers/cua.py | 41 ++-- libs/python/agent/agent/computers/custom.py | 185 ++++++++++++++++++ libs/python/agent/agent/types.py | 2 +- 5 files changed, 227 insertions(+), 31 deletions(-) create mode 100644 libs/python/agent/agent/computers/custom.py diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py index 48bd7b54..536fb341 100644 --- a/libs/python/agent/agent/agent.py +++ b/libs/python/agent/agent/agent.py @@ -23,6 +23,7 @@ from .callbacks import ( ) from .computers import ( ComputerHandler, + is_agent_computer, make_computer_handler ) @@ -239,10 +240,6 @@ class ComputerAgent: async def _initialize_computers(self): """Initialize computer objects""" if not self.tool_schemas: - for tool in self.tools: - if hasattr(tool, '_initialized') and not tool._initialized: - await tool.run() - # Process tools and create tool schemas self.tool_schemas = self._process_tools() @@ -250,7 +247,7 @@ class ComputerAgent: computer_handler = None for schema in self.tool_schemas: if schema["type"] == "computer": - computer_handler = make_computer_handler(schema["computer"]) + computer_handler = await make_computer_handler(schema["computer"]) break self.computer_handler = computer_handler @@ -266,7 +263,7 @@ class ComputerAgent: for tool in self.tools: # Check if it's a computer object (has interface attribute) - if hasattr(tool, 'interface'): + if is_agent_computer(tool): # This is a computer tool - will be handled by agent loop schemas.append({ "type": "computer", diff --git a/libs/python/agent/agent/computers/__init__.py b/libs/python/agent/agent/computers/__init__.py index e2c4a07a..9b60308a 100644 --- a/libs/python/agent/agent/computers/__init__.py +++ b/libs/python/agent/agent/computers/__init__.py @@ -8,14 +8,21 @@ Computer library interface. from .base import ComputerHandler from .cua import cuaComputerHandler -from computer import Computer +from .custom import CustomComputerHandler +from computer import Computer as cuaComputer -def make_computer_handler(computer): +def is_agent_computer(computer): + """Check if the given computer is a ComputerHandler or CUA Computer.""" + return isinstance(computer, ComputerHandler) or \ + isinstance(computer, cuaComputer) or \ + (isinstance(computer, dict)) #and "screenshot" in computer) + +async def make_computer_handler(computer): """ Create a computer handler from a computer interface. Args: - computer: Either a ComputerHandler instance or a Computer instance + computer: Either a ComputerHandler instance, Computer instance, or dict of functions Returns: ComputerHandler: A computer handler instance @@ -25,6 +32,10 @@ def make_computer_handler(computer): """ if isinstance(computer, ComputerHandler): return computer - if isinstance(computer, Computer): - return cuaComputerHandler(computer) + if isinstance(computer, cuaComputer): + computer_handler = cuaComputerHandler(computer) + await computer_handler._initialize() + return computer_handler + if isinstance(computer, dict): + return CustomComputerHandler(computer) raise ValueError(f"Unsupported computer type: {type(computer)}") \ No newline at end of file diff --git a/libs/python/agent/agent/computers/cua.py b/libs/python/agent/agent/computers/cua.py index 30663116..34a984df 100644 --- a/libs/python/agent/agent/computers/cua.py +++ b/libs/python/agent/agent/computers/cua.py @@ -12,27 +12,36 @@ class cuaComputerHandler(ComputerHandler): def __init__(self, cua_computer: Computer): """Initialize with a computer interface (from tool schema).""" - self.interface = cua_computer.interface + self.cua_computer = cua_computer + self.interface = None + + async def _initialize(self): + if hasattr(self.cua_computer, '_initialized') and not self.cua_computer._initialized: + await self.cua_computer.run() + self.interface = self.cua_computer.interface # ==== Computer-Use-Preview Action Space ==== async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: """Get the current environment type.""" - # For now, return a default - this could be enhanced to detect actual environment - return "windows" + # TODO: detect actual environment + return "linux" async def get_dimensions(self) -> tuple[int, int]: """Get screen dimensions as (width, height).""" + assert self.interface is not None screen_size = await self.interface.get_screen_size() return screen_size["width"], screen_size["height"] async def screenshot(self) -> str: """Take a screenshot and return as base64 string.""" + assert self.interface is not None screenshot_bytes = await self.interface.screenshot() return base64.b64encode(screenshot_bytes).decode('utf-8') async def click(self, x: int, y: int, button: str = "left") -> None: """Click at coordinates with specified button.""" + assert self.interface is not None if button == "left": await self.interface.left_click(x, y) elif button == "right": @@ -43,28 +52,34 @@ class cuaComputerHandler(ComputerHandler): async def double_click(self, x: int, y: int) -> None: """Double click at coordinates.""" + assert self.interface is not None await self.interface.double_click(x, y) async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: """Scroll at coordinates with specified scroll amounts.""" + assert self.interface is not None await self.interface.move_cursor(x, y) await self.interface.scroll(scroll_x, scroll_y) async def type(self, text: str) -> None: """Type text.""" + assert self.interface is not None await self.interface.type_text(text) async def wait(self, ms: int = 1000) -> None: """Wait for specified milliseconds.""" + assert self.interface is not None import asyncio await asyncio.sleep(ms / 1000.0) async def move(self, x: int, y: int) -> None: """Move cursor to coordinates.""" + assert self.interface is not None await self.interface.move_cursor(x, y) async def keypress(self, keys: Union[List[str], str]) -> None: """Press key combination.""" + assert self.interface is not None if isinstance(keys, str): keys = keys.replace("-", "+").split("+") if len(keys) == 1: @@ -75,6 +90,7 @@ class cuaComputerHandler(ComputerHandler): async def drag(self, path: List[Dict[str, int]]) -> None: """Drag along specified path.""" + assert self.interface is not None if not path: return @@ -99,23 +115,10 @@ class cuaComputerHandler(ComputerHandler): # ==== Anthropic Computer Action Space ==== async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse down at coordinates.""" + assert self.interface is not None await self.interface.mouse_down(x, y, button="left") async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse up at coordinates.""" - await self.interface.mouse_up(x, y, button="left") - -def acknowledge_safety_check_callback(message: str, allow_always: bool = False) -> bool: - """Safety check callback for user acknowledgment.""" - if allow_always: - return True - response = input( - f"Safety Check Warning: {message}\nDo you want to acknowledge and proceed? (y/n): " - ).lower() - return response.strip() == "y" - - -def check_blocklisted_url(url: str) -> None: - """Check if URL is blocklisted (placeholder implementation).""" - # This would contain actual URL checking logic - pass + assert self.interface is not None + await self.interface.mouse_up(x, y, button="left") \ No newline at end of file diff --git a/libs/python/agent/agent/computers/custom.py b/libs/python/agent/agent/computers/custom.py new file mode 100644 index 00000000..19079bad --- /dev/null +++ b/libs/python/agent/agent/computers/custom.py @@ -0,0 +1,185 @@ +""" +Custom computer handler implementation that accepts a dictionary of functions. +""" + +import base64 +from typing import Dict, List, Any, Literal, Union, Optional, Callable +from PIL import Image +import io +from .base import ComputerHandler + + +class CustomComputerHandler(ComputerHandler): + """Computer handler that implements the Computer protocol using a dictionary of custom functions.""" + + def __init__(self, functions: Dict[str, Callable]): + """ + Initialize with a dictionary of functions. + + Args: + functions: Dictionary where keys are method names and values are callable functions. + Only 'screenshot' is required, all others are optional. + + Raises: + ValueError: If required 'screenshot' function is not provided. + """ + if 'screenshot' not in functions: + raise ValueError("'screenshot' function is required in functions dictionary") + + self.functions = functions + self._last_screenshot_size: Optional[tuple[int, int]] = None + + async def _get_value(self, attribute: str): + """ + Get value for an attribute, checking both 'get_{attribute}' and '{attribute}' keys. + + Args: + attribute: The attribute name to look for + + Returns: + The value from the functions dict, called if callable, returned directly if not + """ + # Check for 'get_{attribute}' first + get_key = f"get_{attribute}" + if get_key in self.functions: + value = self.functions[get_key] + return await value() if callable(value) else value + + # Check for '{attribute}' + if attribute in self.functions: + value = self.functions[attribute] + return await value() if callable(value) else value + + return None + + def _to_b64_str(self, img: Union[bytes, Image.Image, str]) -> str: + """ + Convert image to base64 string. + + Args: + img: Image as bytes, PIL Image, or base64 string + + Returns: + str: Base64 encoded image string + """ + if isinstance(img, str): + # Already a base64 string + return img + elif isinstance(img, bytes): + # Raw bytes + return base64.b64encode(img).decode('utf-8') + elif isinstance(img, Image.Image): + # PIL Image + buffer = io.BytesIO() + img.save(buffer, format='PNG') + return base64.b64encode(buffer.getvalue()).decode('utf-8') + else: + raise ValueError(f"Unsupported image type: {type(img)}") + + # ==== Computer-Use-Preview Action Space ==== + + async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: + """Get the current environment type.""" + result = await self._get_value('environment') + return result if result is not None else "linux" + + async def get_dimensions(self) -> tuple[int, int]: + """Get screen dimensions as (width, height).""" + result = await self._get_value('dimensions') + if result is not None: + return result + + # Fallback: use last screenshot size if available + if not self._last_screenshot_size: + await self.screenshot() + assert self._last_screenshot_size is not None, "Failed to get screenshot size" + + return self._last_screenshot_size + + async def screenshot(self) -> str: + """Take a screenshot and return as base64 string.""" + result = await self.functions['screenshot']() + b64_str = self._to_b64_str(result) + + # Try to extract dimensions for fallback use + try: + if isinstance(result, Image.Image): + self._last_screenshot_size = result.size + elif isinstance(result, bytes): + # Try to decode bytes to get dimensions + img = Image.open(io.BytesIO(result)) + self._last_screenshot_size = img.size + except Exception: + # If we can't get dimensions, that's okay + pass + + return b64_str + + async def click(self, x: int, y: int, button: str = "left") -> None: + """Click at coordinates with specified button.""" + if 'click' in self.functions: + await self.functions['click'](x, y, button) + # No-op if not implemented + + async def double_click(self, x: int, y: int) -> None: + """Double click at coordinates.""" + if 'double_click' in self.functions: + await self.functions['double_click'](x, y) + # No-op if not implemented + + async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + """Scroll at coordinates with specified scroll amounts.""" + if 'scroll' in self.functions: + await self.functions['scroll'](x, y, scroll_x, scroll_y) + # No-op if not implemented + + async def type(self, text: str) -> None: + """Type text.""" + if 'type' in self.functions: + await self.functions['type'](text) + # No-op if not implemented + + async def wait(self, ms: int = 1000) -> None: + """Wait for specified milliseconds.""" + if 'wait' in self.functions: + await self.functions['wait'](ms) + else: + # Default implementation + import asyncio + await asyncio.sleep(ms / 1000.0) + + async def move(self, x: int, y: int) -> None: + """Move cursor to coordinates.""" + if 'move' in self.functions: + await self.functions['move'](x, y) + # No-op if not implemented + + async def keypress(self, keys: Union[List[str], str]) -> None: + """Press key combination.""" + if 'keypress' in self.functions: + await self.functions['keypress'](keys) + # No-op if not implemented + + async def drag(self, path: List[Dict[str, int]]) -> None: + """Drag along specified path.""" + if 'drag' in self.functions: + await self.functions['drag'](path) + # No-op if not implemented + + async def get_current_url(self) -> str: + """Get current URL (for browser environments).""" + if 'get_current_url' in self.functions: + return await self.functions['get_current_url']() + return "" # Default fallback + + async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: + """Left mouse down at coordinates.""" + if 'left_mouse_down' in self.functions: + await self.functions['left_mouse_down'](x, y) + # No-op if not implemented + + async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: + """Left mouse up at coordinates.""" + if 'left_mouse_up' in self.functions: + await self.functions['left_mouse_up'](x, y) + # No-op if not implemented diff --git a/libs/python/agent/agent/types.py b/libs/python/agent/agent/types.py index c56a9e5c..23946c86 100644 --- a/libs/python/agent/agent/types.py +++ b/libs/python/agent/agent/types.py @@ -9,7 +9,7 @@ from litellm import ResponseInputParam, ResponsesAPIResponse, ToolParam from collections.abc import Iterable # Agent input types -Messages = str | ResponseInputParam +Messages = str | ResponseInputParam | List[Dict[str, Any]] Tools = Optional[Iterable[ToolParam]] # Agent output types From feec92a1d6087a217f92ec763eb900afadbe9df1 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 11:06:43 -0400 Subject: [PATCH 36/76] added support for sync methods --- libs/python/agent/agent/computers/custom.py | 62 ++++++++++++++------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/libs/python/agent/agent/computers/custom.py b/libs/python/agent/agent/computers/custom.py index 19079bad..7ee027fd 100644 --- a/libs/python/agent/agent/computers/custom.py +++ b/libs/python/agent/agent/computers/custom.py @@ -29,6 +29,29 @@ class CustomComputerHandler(ComputerHandler): self.functions = functions self._last_screenshot_size: Optional[tuple[int, int]] = None + async def _call_function(self, func, *args, **kwargs): + """ + Call a function, handling both async and sync functions. + + Args: + func: The function to call + *args: Positional arguments to pass to the function + **kwargs: Keyword arguments to pass to the function + + Returns: + The result of the function call + """ + import asyncio + import inspect + + if callable(func): + if inspect.iscoroutinefunction(func): + return await func(*args, **kwargs) + else: + return func(*args, **kwargs) + else: + return func + async def _get_value(self, attribute: str): """ Get value for an attribute, checking both 'get_{attribute}' and '{attribute}' keys. @@ -42,13 +65,11 @@ class CustomComputerHandler(ComputerHandler): # Check for 'get_{attribute}' first get_key = f"get_{attribute}" if get_key in self.functions: - value = self.functions[get_key] - return await value() if callable(value) else value + return await self._call_function(self.functions[get_key]) # Check for '{attribute}' if attribute in self.functions: - value = self.functions[attribute] - return await value() if callable(value) else value + return await self._call_function(self.functions[attribute]) return None @@ -81,13 +102,16 @@ class CustomComputerHandler(ComputerHandler): async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: """Get the current environment type.""" result = await self._get_value('environment') - return result if result is not None else "linux" + if result is None: + return "linux" + assert result in ["windows", "mac", "linux", "browser"] + return result # type: ignore async def get_dimensions(self) -> tuple[int, int]: """Get screen dimensions as (width, height).""" result = await self._get_value('dimensions') if result is not None: - return result + return result # type: ignore # Fallback: use last screenshot size if available if not self._last_screenshot_size: @@ -98,8 +122,8 @@ class CustomComputerHandler(ComputerHandler): async def screenshot(self) -> str: """Take a screenshot and return as base64 string.""" - result = await self.functions['screenshot']() - b64_str = self._to_b64_str(result) + result = await self._call_function(self.functions['screenshot']) + b64_str = self._to_b64_str(result) # type: ignore # Try to extract dimensions for fallback use try: @@ -118,31 +142,31 @@ class CustomComputerHandler(ComputerHandler): async def click(self, x: int, y: int, button: str = "left") -> None: """Click at coordinates with specified button.""" if 'click' in self.functions: - await self.functions['click'](x, y, button) + await self._call_function(self.functions['click'], x, y, button) # No-op if not implemented async def double_click(self, x: int, y: int) -> None: """Double click at coordinates.""" if 'double_click' in self.functions: - await self.functions['double_click'](x, y) + await self._call_function(self.functions['double_click'], x, y) # No-op if not implemented async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: """Scroll at coordinates with specified scroll amounts.""" if 'scroll' in self.functions: - await self.functions['scroll'](x, y, scroll_x, scroll_y) + await self._call_function(self.functions['scroll'], x, y, scroll_x, scroll_y) # No-op if not implemented async def type(self, text: str) -> None: """Type text.""" if 'type' in self.functions: - await self.functions['type'](text) + await self._call_function(self.functions['type'], text) # No-op if not implemented async def wait(self, ms: int = 1000) -> None: """Wait for specified milliseconds.""" if 'wait' in self.functions: - await self.functions['wait'](ms) + await self._call_function(self.functions['wait'], ms) else: # Default implementation import asyncio @@ -151,35 +175,35 @@ class CustomComputerHandler(ComputerHandler): async def move(self, x: int, y: int) -> None: """Move cursor to coordinates.""" if 'move' in self.functions: - await self.functions['move'](x, y) + await self._call_function(self.functions['move'], x, y) # No-op if not implemented async def keypress(self, keys: Union[List[str], str]) -> None: """Press key combination.""" if 'keypress' in self.functions: - await self.functions['keypress'](keys) + await self._call_function(self.functions['keypress'], keys) # No-op if not implemented async def drag(self, path: List[Dict[str, int]]) -> None: """Drag along specified path.""" if 'drag' in self.functions: - await self.functions['drag'](path) + await self._call_function(self.functions['drag'], path) # No-op if not implemented async def get_current_url(self) -> str: """Get current URL (for browser environments).""" if 'get_current_url' in self.functions: - return await self.functions['get_current_url']() + return await self._get_value('current_url') # type: ignore return "" # Default fallback async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse down at coordinates.""" if 'left_mouse_down' in self.functions: - await self.functions['left_mouse_down'](x, y) + await self._call_function(self.functions['left_mouse_down'], x, y) # No-op if not implemented async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse up at coordinates.""" if 'left_mouse_up' in self.functions: - await self.functions['left_mouse_up'](x, y) + await self._call_function(self.functions['left_mouse_up'], x, y) # No-op if not implemented From f78d026060e8753de3d1c486d6cde9ba9313c84a Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 11:09:44 -0400 Subject: [PATCH 37/76] renamed ComputerHandler to AsyncComputerHandler --- libs/python/agent/agent/agent.py | 4 ++-- libs/python/agent/agent/computers/__init__.py | 6 +++--- libs/python/agent/agent/computers/base.py | 2 +- libs/python/agent/agent/computers/cua.py | 4 ++-- libs/python/agent/agent/computers/custom.py | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py index 536fb341..e4746878 100644 --- a/libs/python/agent/agent/agent.py +++ b/libs/python/agent/agent/agent.py @@ -22,7 +22,7 @@ from .callbacks import ( TelemetryCallback, ) from .computers import ( - ComputerHandler, + AsyncComputerHandler, is_agent_computer, make_computer_handler ) @@ -398,7 +398,7 @@ class ComputerAgent: # AGENT OUTPUT PROCESSING # ============================================================================ - async def _handle_item(self, item: Any, computer: Optional[ComputerHandler] = None, ignore_call_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]: + async def _handle_item(self, item: Any, computer: Optional[AsyncComputerHandler] = None, ignore_call_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]: """Handle each item; may cause a computer action + screenshot.""" if ignore_call_ids and item.get("call_id") and item.get("call_id") in ignore_call_ids: return [] diff --git a/libs/python/agent/agent/computers/__init__.py b/libs/python/agent/agent/computers/__init__.py index 9b60308a..7c7194b6 100644 --- a/libs/python/agent/agent/computers/__init__.py +++ b/libs/python/agent/agent/computers/__init__.py @@ -6,14 +6,14 @@ computer interface types, supporting both the ComputerHandler protocol and the Computer library interface. """ -from .base import ComputerHandler +from .base import AsyncComputerHandler from .cua import cuaComputerHandler from .custom import CustomComputerHandler from computer import Computer as cuaComputer def is_agent_computer(computer): """Check if the given computer is a ComputerHandler or CUA Computer.""" - return isinstance(computer, ComputerHandler) or \ + return isinstance(computer, AsyncComputerHandler) or \ isinstance(computer, cuaComputer) or \ (isinstance(computer, dict)) #and "screenshot" in computer) @@ -30,7 +30,7 @@ async def make_computer_handler(computer): Raises: ValueError: If the computer type is not supported """ - if isinstance(computer, ComputerHandler): + if isinstance(computer, AsyncComputerHandler): return computer if isinstance(computer, cuaComputer): computer_handler = cuaComputerHandler(computer) diff --git a/libs/python/agent/agent/computers/base.py b/libs/python/agent/agent/computers/base.py index 161d9fb8..82d54057 100644 --- a/libs/python/agent/agent/computers/base.py +++ b/libs/python/agent/agent/computers/base.py @@ -6,7 +6,7 @@ from typing import Protocol, Literal, List, Dict, Any, Union, Optional, runtime_ @runtime_checkable -class ComputerHandler(Protocol): +class AsyncComputerHandler(Protocol): """Protocol defining the interface for computer interactions.""" # ==== Computer-Use-Preview Action Space ==== diff --git a/libs/python/agent/agent/computers/cua.py b/libs/python/agent/agent/computers/cua.py index 34a984df..f935be5b 100644 --- a/libs/python/agent/agent/computers/cua.py +++ b/libs/python/agent/agent/computers/cua.py @@ -4,10 +4,10 @@ Computer handler implementation for OpenAI computer-use-preview protocol. import base64 from typing import Dict, List, Any, Literal, Union, Optional -from .base import ComputerHandler +from .base import AsyncComputerHandler from computer import Computer -class cuaComputerHandler(ComputerHandler): +class cuaComputerHandler(AsyncComputerHandler): """Computer handler that implements the Computer protocol using the computer interface.""" def __init__(self, cua_computer: Computer): diff --git a/libs/python/agent/agent/computers/custom.py b/libs/python/agent/agent/computers/custom.py index 7ee027fd..b5f801b6 100644 --- a/libs/python/agent/agent/computers/custom.py +++ b/libs/python/agent/agent/computers/custom.py @@ -6,10 +6,10 @@ import base64 from typing import Dict, List, Any, Literal, Union, Optional, Callable from PIL import Image import io -from .base import ComputerHandler +from .base import AsyncComputerHandler -class CustomComputerHandler(ComputerHandler): +class CustomComputerHandler(AsyncComputerHandler): """Computer handler that implements the Computer protocol using a dictionary of custom functions.""" def __init__(self, functions: Dict[str, Callable]): From 6ebf06e2faadb5b883fecacaef821c84e4f2204c Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 11:14:30 -0400 Subject: [PATCH 38/76] Documented custom computers --- .../agent-sdk/custom-computer-handlers.mdx | 130 ++++++++++++++++++ docs/content/docs/agent-sdk/meta.json | 1 + 2 files changed, 131 insertions(+) create mode 100644 docs/content/docs/agent-sdk/custom-computer-handlers.mdx diff --git a/docs/content/docs/agent-sdk/custom-computer-handlers.mdx b/docs/content/docs/agent-sdk/custom-computer-handlers.mdx new file mode 100644 index 00000000..a5b05960 --- /dev/null +++ b/docs/content/docs/agent-sdk/custom-computer-handlers.mdx @@ -0,0 +1,130 @@ +--- +title: Custom Computers +slug: custom-computer-handlers +--- + +The Agent SDK supports defining custom computer handlers using a simple dictionary interface. This enables integration with custom automation backends, testing frameworks, or specialized computer control systems. + +## Example: Defining a Custom Computer Handler + +```python +import asyncio +from PIL import Image + +# Define your custom computer functions +async def take_screenshot(): + """Your custom screenshot implementation""" + # Return PIL Image, bytes, or base64 string + return Image.new('RGB', (1920, 1080), color='white') + +# Create dict-based computer handler - only 'screenshot' is required +custom_computer = { + 'screenshot': take_screenshot, # required + + # everything below is optional + 'environment': 'linux', # linux, mac, windows, browser + 'dimensions': (1920, 1080), # (width, height) + 'click': lambda x, y, button: print(f"Clicking at ({x}, {y}) with {button} button"), +} +``` + +You can then use this as a tool for your agent: + +```python +from agent import ComputerAgent + +agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20240620", + tools=[custom_computer], +) + +# Agent will automatically convert dict to agent.computers.CustomComputerHandler +await agent.run("Take a screenshot and click at coordinates 100, 200") +``` + +## Class-Based Implementation + +For more complex implementations, you can create a custom class by inheriting from `AsyncComputerHandler`: + +```python +from agent.computers import AsyncComputerHandler +from PIL import Image +from typing import Literal, List, Dict, Union, Optional + +class MyCustomComputer(AsyncComputerHandler): + """Custom computer handler implementation.""" + + def __init__(self): + # Initialize your custom computer interface here + pass + + # ==== Computer-Use-Preview Action Space ==== + + async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: + """Get the current environment type.""" + ... + + async def get_dimensions(self) -> tuple[int, int]: + """Get screen dimensions as (width, height).""" + ... + + async def screenshot(self) -> str: + """Take a screenshot and return as base64 string.""" + ... + + async def click(self, x: int, y: int, button: str = "left") -> None: + """Click at coordinates with specified button.""" + ... + + async def double_click(self, x: int, y: int) -> None: + """Double click at coordinates.""" + ... + + async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + """Scroll at coordinates with specified scroll amounts.""" + ... + + async def type(self, text: str) -> None: + """Type text.""" + ... + + async def wait(self, ms: int = 1000) -> None: + """Wait for specified milliseconds.""" + ... + + async def move(self, x: int, y: int) -> None: + """Move cursor to coordinates.""" + ... + + async def keypress(self, keys: Union[List[str], str]) -> None: + """Press key combination.""" + ... + + async def drag(self, path: List[Dict[str, int]]) -> None: + """Drag along specified path.""" + ... + + async def get_current_url(self) -> str: + """Get current URL (for browser environments).""" + ... + + # ==== Anthropic Action Space ==== + + async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: + """Left mouse down at coordinates.""" + ... + + async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: + """Left mouse up at coordinates.""" + ... + +# Use with agent +custom_computer = MyCustomComputer() + +agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20240620", + tools=[custom_computer], +) + +await agent.run("Take a screenshot and click at coordinates 100, 200") +``` \ No newline at end of file diff --git a/docs/content/docs/agent-sdk/meta.json b/docs/content/docs/agent-sdk/meta.json index fadc5a12..4907fe13 100644 --- a/docs/content/docs/agent-sdk/meta.json +++ b/docs/content/docs/agent-sdk/meta.json @@ -7,6 +7,7 @@ "chat-history", "callbacks", "sandboxed-tools", + "custom-computer-handlers", "local-models", "prompt-caching", "usage-tracking", From b9f307a149370fd3cdabffd8b2f24c3c69248756 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 12:17:35 -0400 Subject: [PATCH 39/76] Added HUD integration --- .../docs/agent-sdk/benchmarks/meta.json | 3 +- .../agent-sdk/benchmarks/osworld-verified.mdx | 89 +++++++++++++++++++ .../docs/agent-sdk/integrations/hud.mdx | 43 +++++++++ .../docs/agent-sdk/integrations/meta.json | 4 + docs/content/docs/agent-sdk/meta.json | 3 +- 5 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx create mode 100644 docs/content/docs/agent-sdk/integrations/hud.mdx create mode 100644 docs/content/docs/agent-sdk/integrations/meta.json diff --git a/docs/content/docs/agent-sdk/benchmarks/meta.json b/docs/content/docs/agent-sdk/benchmarks/meta.json index aa49a156..3573a892 100644 --- a/docs/content/docs/agent-sdk/benchmarks/meta.json +++ b/docs/content/docs/agent-sdk/benchmarks/meta.json @@ -3,6 +3,7 @@ "introduction", "screenspot-v2", "screenspot-pro", - "interactive" + "interactive", + "osworld-verified" ] } \ No newline at end of file diff --git a/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx b/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx new file mode 100644 index 00000000..16e1ee2c --- /dev/null +++ b/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx @@ -0,0 +1,89 @@ +--- +title: OSWorld-Verified +description: Benchmark ComputerAgent on OSWorld tasks using HUD +--- + +OSWorld-Verified is a curated subset of OSWorld tasks that can be run using the HUD framework. Use ComputerAgent with HUD to benchmark on these tasks. + +## Setup + +```bash +pip install hud-python==0.2.10 +``` + +Set environment variables: +```bash +export HUD_API_KEY="your_hud_key" +export ANTHROPIC_API_KEY="your_anthropic_key" # For Claude +export OPENAI_API_KEY="your_openai_key" # For OpenAI +``` + +## Quick Start + +```python +import asyncio +from hud import gym, load_taskset +from agent.integrations.hud import ComputerAgent + +async def run_osworld(): + # Load taskset + taskset = await load_taskset("OSWorld-Verified") + test = taskset[144] # Example task + + # Create environment (~2.5 min startup) + env = await gym.make(test) + + # Create agent + agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20241022", # any ComputerAgent model string + environment="linux", + max_iterations=8 + ) + + # Run benchmark + obs, _ = await env.reset() + for i in range(agent.max_iterations): + action, done = await agent.predict(obs) + obs, reward, terminated, info = await env.step(action) + if done or terminated: + break + + # Evaluate results + result = await env.evaluate() + await env.close() + + return result + +# Run benchmark +result = asyncio.run(run_osworld()) +print(f"Success: {result.get('success', False)}") +``` + +## Parallel Execution + +Run all tasks in parallel using `run_job`: + +```python +from hud import run_job +from agent.integrations.hud import ComputerAgent +import logging + +logging.basicConfig(level=logging.INFO) + +# Load full taskset +taskset = await load_taskset("OSWorld-Verified") + +# Run parallel job +job = await run_job( + ComputerAgent, + taskset, + "osworld-computeragent", + max_steps_per_task=8, + max_concurrent_tasks=20, + auto_reply_question=True, + agent_kwargs={"model": "anthropic/claude-3-5-sonnet-20241022"} +) + +# Get analytics +analytics = await job.get_analytics() +``` diff --git a/docs/content/docs/agent-sdk/integrations/hud.mdx b/docs/content/docs/agent-sdk/integrations/hud.mdx new file mode 100644 index 00000000..786e45b5 --- /dev/null +++ b/docs/content/docs/agent-sdk/integrations/hud.mdx @@ -0,0 +1,43 @@ +--- +title: HUD Evals +description: Use ComputerAgent with HUD for benchmarking and evaluation +--- + +The HUD integration allows you to use ComputerAgent with the [HUD benchmarking framework](https://www.hud.so/), providing the same interface as existing HUD agents while leveraging ComputerAgent's capabilities. + +## Installation + +```bash +pip install "cua-agent[hud]" +## or install hud-python directly +# pip install hud-python==0.2.10 +``` + +## Usage + +```python +from agent.integrations.hud import ComputerAgent + +# Create agent with any ComputerAgent model +agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20241022", # or any model string + environment="linux" +) + +# Use exactly like other HUD agents +action, done = await agent.predict(observation) +``` + +## Environment Variables + +Set these environment variables: + +- `HUD_API_KEY` - Your HUD API key +- `ANTHROPIC_API_KEY` - For Claude models +- `OPENAI_API_KEY` - For OpenAI models + +## Example Benchmarks + +1. [OSWorld-Verified](/agent-sdk/benchmarks/osworld-verified) - Benchmark on OSWorld tasks with parallel execution + +See the [HUD docs](https://docs.hud.so/environment-creation) for more eval environments. \ No newline at end of file diff --git a/docs/content/docs/agent-sdk/integrations/meta.json b/docs/content/docs/agent-sdk/integrations/meta.json new file mode 100644 index 00000000..7b7ebb81 --- /dev/null +++ b/docs/content/docs/agent-sdk/integrations/meta.json @@ -0,0 +1,4 @@ +{ + "title": "Integrations", + "pages": ["hud"] +} diff --git a/docs/content/docs/agent-sdk/meta.json b/docs/content/docs/agent-sdk/meta.json index 4907fe13..5db33148 100644 --- a/docs/content/docs/agent-sdk/meta.json +++ b/docs/content/docs/agent-sdk/meta.json @@ -12,6 +12,7 @@ "prompt-caching", "usage-tracking", "benchmarks", - "migration-guide" + "migration-guide", + "integrations" ] } From 5c46ca0c9a30e374272c585a5db00813bf275b7f Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 12:40:50 -0400 Subject: [PATCH 40/76] removed max_its --- .../content/docs/agent-sdk/benchmarks/osworld-verified.mdx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx b/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx index 16e1ee2c..5284b11b 100644 --- a/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx +++ b/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx @@ -36,13 +36,12 @@ async def run_osworld(): # Create agent agent = ComputerAgent( model="anthropic/claude-3-5-sonnet-20241022", # any ComputerAgent model string - environment="linux", - max_iterations=8 + environment="linux" ) # Run benchmark obs, _ = await env.reset() - for i in range(agent.max_iterations): + for i in range(100): action, done = await agent.predict(obs) obs, reward, terminated, info = await env.step(action) if done or terminated: @@ -78,7 +77,7 @@ job = await run_job( ComputerAgent, taskset, "osworld-computeragent", - max_steps_per_task=8, + max_steps_per_task=100, max_concurrent_tasks=20, auto_reply_question=True, agent_kwargs={"model": "anthropic/claude-3-5-sonnet-20241022"} From 1882fb68e557a67f4d242955cf688c6ab1016c50 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 12:47:07 -0400 Subject: [PATCH 41/76] Added HUD integration --- libs/python/agent/agent/computers/base.py | 2 + .../agent/agent/integrations/hud/__init__.py | 7 + .../agent/agent/integrations/hud/adapter.py | 121 +++++++++ .../agent/agent/integrations/hud/agent.py | 231 ++++++++++++++++++ .../integrations/hud/computer_handler.py | 187 ++++++++++++++ 5 files changed, 548 insertions(+) create mode 100644 libs/python/agent/agent/integrations/hud/__init__.py create mode 100644 libs/python/agent/agent/integrations/hud/adapter.py create mode 100644 libs/python/agent/agent/integrations/hud/agent.py create mode 100644 libs/python/agent/agent/integrations/hud/computer_handler.py diff --git a/libs/python/agent/agent/computers/base.py b/libs/python/agent/agent/computers/base.py index 82d54057..7fbcb0f7 100644 --- a/libs/python/agent/agent/computers/base.py +++ b/libs/python/agent/agent/computers/base.py @@ -59,6 +59,8 @@ class AsyncComputerHandler(Protocol): """Get current URL (for browser environments).""" ... + # ==== Anthropic Action Space ==== + async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: """Left mouse down at coordinates.""" ... diff --git a/libs/python/agent/agent/integrations/hud/__init__.py b/libs/python/agent/agent/integrations/hud/__init__.py new file mode 100644 index 00000000..6459048d --- /dev/null +++ b/libs/python/agent/agent/integrations/hud/__init__.py @@ -0,0 +1,7 @@ +"""HUD integration for ComputerAgent.""" + +from .agent import ComputerAgent +from .adapter import ComputerAgentAdapter +from .computer_handler import HUDComputerHandler + +__all__ = ["ComputerAgent", "ComputerAgentAdapter", "HUDComputerHandler"] diff --git a/libs/python/agent/agent/integrations/hud/adapter.py b/libs/python/agent/agent/integrations/hud/adapter.py new file mode 100644 index 00000000..77c8dc7d --- /dev/null +++ b/libs/python/agent/agent/integrations/hud/adapter.py @@ -0,0 +1,121 @@ +"""HUD Adapter for ComputerAgent integration.""" + +from __future__ import annotations + +from typing import Any, ClassVar + +from hud.adapters.common import CLA, Adapter +from hud.adapters.common.types import ( + CLAButton, + CLAKey, + ClickAction, + CustomAction, + DragAction, + MoveAction, + Point, + PressAction, + ResponseAction, + ScreenshotFetch, + ScrollAction, + TypeAction, + WaitAction, +) + + +class ComputerAgentAdapter(Adapter): + """Adapter for ComputerAgent to work with HUD.""" + + KEY_MAP: ClassVar[dict[str, CLAKey]] = { + "return": "enter", + "arrowup": "up", + "arrowdown": "down", + "arrowleft": "left", + "arrowright": "right", + "cmd": "ctrl", + "super": "win", + "meta": "win", + } + + BUTTON_MAP: ClassVar[dict[str, CLAButton]] = { + "wheel": "middle", + "middle": "middle", + } + + def __init__(self) -> None: + super().__init__() + # ComputerAgent default dimensions (can be overridden) + self.agent_width = 1024 + self.agent_height = 768 + + def _map_key(self, key: str) -> CLAKey: + """Map a key to its standardized form.""" + return self.KEY_MAP.get(key.lower(), key.lower()) # type: ignore + + def convert(self, data: Any) -> CLA: + """Convert a ComputerAgent action to a HUD action.""" + try: + action_type = data.get("type") + + if action_type == "click": + x, y = data.get("x", 0), data.get("y", 0) + button = data.get("button", "left") + button = self.BUTTON_MAP.get(button, button) + if button is None: + button = "left" + converted_action = ClickAction(point=Point(x=x, y=y), button=button) + + elif action_type == "double_click": + x, y = data.get("x", 0), data.get("y", 0) + converted_action = ClickAction(point=Point(x=x, y=y), button="left", pattern=[100]) + + elif action_type == "scroll": + x, y = int(data.get("x", 0)), int(data.get("y", 0)) + scroll_x = int(data.get("scroll_x", 0)) + scroll_y = int(data.get("scroll_y", 0)) + converted_action = ScrollAction( + point=Point(x=x, y=y), scroll=Point(x=scroll_x, y=scroll_y) + ) + + elif action_type == "type": + text = data.get("text", "") + converted_action = TypeAction(text=text, enter_after=False) + + elif action_type == "wait": + ms = data.get("ms", 1000) + converted_action = WaitAction(time=ms) + + elif action_type == "move": + x, y = data.get("x", 0), data.get("y", 0) + converted_action = MoveAction(point=Point(x=x, y=y)) + + elif action_type == "keypress": + keys = data.get("keys", []) + if isinstance(keys, str): + keys = [keys] + converted_action = PressAction(keys=[self._map_key(k) for k in keys]) + + elif action_type == "drag": + path = data.get("path", []) + points = [Point(x=p.get("x", 0), y=p.get("y", 0)) for p in path] + converted_action = DragAction(path=points) + + elif action_type == "screenshot": + converted_action = ScreenshotFetch() + + elif action_type == "response": + converted_action = ResponseAction(text=data.get("text", "")) + + elif action_type == "custom": + converted_action = CustomAction(action=data.get("action", "")) + + else: + raise ValueError(f"Unsupported action type: {action_type}") + + # Add reasoning and logs if available + converted_action.reasoning = data.get("reasoning", "") + converted_action.logs = data.get("logs", "") + + return converted_action + + except Exception as e: + raise ValueError(f"Invalid action: {data}. Error: {e!s}") from e diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py new file mode 100644 index 00000000..6f246c20 --- /dev/null +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -0,0 +1,231 @@ +"""HUD ComputerAgent wrapper for OSWorld benchmarking.""" + +import logging +from typing import Any, Literal, Optional, Union, List, Dict +import asyncio + +from agent import ComputerAgent as BaseComputerAgent +from hud.adapters import Adapter +from hud.agent.base import Agent +from hud.utils.common import Observation +from hud.adapters.common.types import LogType +from hud.types import Gym + +from .adapter import ComputerAgentAdapter +from .computer_handler import HUDComputerHandler + +logger = logging.getLogger(__name__) + + +class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): + """ + A ComputerAgent wrapper for HUD integration. + + This agent wraps the base ComputerAgent to work with HUD environments, + providing the same interface as OperatorAgent but using ComputerAgent internally. + """ + + transfer_gyms: dict[Gym, Gym] = {"qa": "hud-browser"} + + def __init__( + self, + model: str = "anthropic/claude-3-5-sonnet-20241022", + environment: Literal["windows", "mac", "linux", "browser"] = "browser", + adapter: Optional[Adapter] = None, + name: Optional[str] = None, + **kwargs: Any, + ): + """ + Initialize the ComputerAgent for HUD. + + Args: + model: The model string for ComputerAgent (e.g., "anthropic/claude-3-5-sonnet-20241022") + environment: The environment type (windows, mac, linux, browser) + adapter: The adapter to use for preprocessing and postprocessing + name: The name of the agent + **kwargs: Additional arguments passed to ComputerAgent + """ + # Create adapter if not provided + adapter = adapter or ComputerAgentAdapter() + + if name is None: + name = f"computeragent-{model.split('/')[-1]}" + + # Initialize the base Agent class without client (we'll create it later) + super().__init__(client=None, adapter=adapter, name=name) + + self.model = model + self.environment = environment + self.kwargs = kwargs + + # Default dimensions + self.width = 1024 + self.height = 768 + + # Update dimensions if adapter is provided + if self.adapter: + self.width = self.adapter.agent_width + self.height = self.adapter.agent_height + + # Create HUD computer handler + self.hud_computer = HUDComputerHandler( + environment=environment, + dimensions=(self.width, self.height) + ) + + # Initialize ComputerAgent with HUD computer handler + self.computer_agent = BaseComputerAgent( + model=model, + tools=[self.hud_computer], + **kwargs + ) + + # Set the client to the computer_agent for compatibility + self.client = self.computer_agent + + # State tracking + self.conversation_history: List[Dict[str, Any]] = [] + self.initial_prompt: Optional[str] = None + + # System prompt for computer use tasks + self.base_system_prompt = """ + You are an autonomous computer-using agent. Follow these guidelines: + + 1. Be decisive and complete tasks without asking for confirmation unless absolutely necessary. + 2. If you need user confirmation for safety-critical actions, use the formal safety check mechanism. + 3. Do NOT ask questions like "Should I proceed?" or "Would you like me to continue?" - just proceed with the task. + 4. When you find what you're looking for (e.g., a file to upload), proceed with the action directly. + 5. Only stop when the task is fully complete or if you encounter an error that prevents completion. + 6. Trust that the user wants you to complete the entire task they've requested. + + Remember: You have been given permission to complete the requested task autonomously. + """ + + async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]: + """ + Fetch a response from ComputerAgent based on the observation. + + Args: + observation: The preprocessed observation + + Returns: + tuple[list[dict[str, Any]], bool, list[LogType] | None]: A tuple containing the list of raw actions, + boolean indicating if the agent believes the task is complete. + """ + try: + # Update the computer handler with the current screenshot + if observation.screenshot: + self.hud_computer.update_screenshot(observation.screenshot) + + # Set up action callback to capture actions + captured_actions = [] + action_done = False + + async def action_callback(action: Dict[str, Any]) -> None: + """Callback to capture actions from ComputerAgent.""" + nonlocal captured_actions, action_done + captured_actions.append(action) + + # Set the action callback + self.hud_computer.set_action_callback(action_callback) + + # Prepare the message for ComputerAgent + if not self.conversation_history: + # First interaction - use the observation text as initial prompt + if observation.text: + self.initial_prompt = observation.text + message = f"{self.base_system_prompt}\n\nTask: {observation.text}" + else: + message = f"{self.base_system_prompt}\n\nPlease analyze the current screen and determine what action to take." + + self.conversation_history.append({"role": "user", "content": message}) + else: + # Subsequent interactions - add context about the current state + message = "Continue with the task based on the current screen state." + self.conversation_history.append({"role": "user", "content": message}) + + # Run ComputerAgent + try: + # ComputerAgent.run returns an async generator + async for result in self.computer_agent.run(self.conversation_history, stream=False): + # Update conversation history with the output + self.conversation_history += result["output"] + + # Check if we captured any actions + if captured_actions: + # Extract reasoning from the conversation history + reasoning = "" + # Look for the latest reasoning message + for msg in reversed(self.conversation_history): + if msg.get("type") == "reasoning" and msg.get("summary"): + reasoning = " ".join([s.get("text", "") for s in msg["summary"] if s.get("type") == "summary_text"]) + break + elif msg.get("type") == "message" and msg.get("role") == "assistant": + content = msg.get("content", []) + if isinstance(content, list): + reasoning = " ".join([c.get("text", "") for c in content if c.get("type") == "output_text"]) + break + + # Add reasoning and logs to each action + for action in captured_actions: + action["reasoning"] = reasoning + action["logs"] = {"conversation_length": len(self.conversation_history)} + + # Check if task is done by looking for assistant message indicating completion + done = False + for msg in reversed(self.conversation_history): + if msg.get("type") == "message" and msg.get("role") == "assistant": + content = msg.get("content", []) + for c in content: + if c.get("type") == "output_text" and "task completed" in c.get("text", "").lower(): + done = True + break + break + + return captured_actions, done + else: + # No actions captured, task is likely complete + response_text = "Task completed." + for msg in reversed(self.conversation_history): + if msg.get("type") == "message" and msg.get("role") == "assistant": + content = msg.get("content", []) + for c in content: + if c.get("type") == "output_text": + response_text = c.get("text", response_text) + break + break + + response_action = { + "type": "response", + "text": response_text, + "reasoning": response_text, + "logs": {"conversation_length": len(self.conversation_history)} + } + + # Check if this indicates task completion or failure + done = True + if "task is infeasible" in response_text.lower(): + response_action = {"type": "custom", "action": "FAIL"} + + return [response_action], done + + except Exception as e: + logger.error(f"Error running ComputerAgent: {e}") + # Return an error response + error_action = { + "type": "response", + "text": f"Error occurred: {str(e)}", + "reasoning": f"ComputerAgent encountered an error: {str(e)}", + "logs": {"error": str(e)} + } + return [error_action], True + + except Exception as e: + logger.error(f"Error in fetch_response: {e}") + error_action = { + "type": "response", + "text": f"Error in agent processing: {str(e)}", + "reasoning": f"Agent processing error: {str(e)}", + "logs": {"error": str(e)} + } + return [error_action], True diff --git a/libs/python/agent/agent/integrations/hud/computer_handler.py b/libs/python/agent/agent/integrations/hud/computer_handler.py new file mode 100644 index 00000000..5bf86666 --- /dev/null +++ b/libs/python/agent/agent/integrations/hud/computer_handler.py @@ -0,0 +1,187 @@ +"""HUD Computer Handler for ComputerAgent integration.""" + +import base64 +from io import BytesIO +from typing import Literal, Optional, Any, Dict, Callable +from PIL import Image + +from agent.computers import AsyncComputerHandler + + +class HUDComputerHandler(AsyncComputerHandler): + """Computer handler that interfaces with HUD environment.""" + + def __init__( + self, + environment: Literal["windows", "mac", "linux", "browser"] = "browser", + dimensions: tuple[int, int] = (1024, 768), + screenshot_callback: Optional[Callable] = None, + action_callback: Optional[Callable] = None, + ): + """ + Initialize HUD computer handler. + + Args: + environment: The environment type for HUD + dimensions: Screen dimensions as (width, height) + screenshot_callback: Optional callback to get screenshots from HUD environment + action_callback: Optional callback to execute actions in HUD environment + """ + super().__init__() + self._environment = environment + self._dimensions = dimensions + self._screenshot_callback = screenshot_callback + self._action_callback = action_callback + + # Store the last screenshot for reuse + self._last_screenshot: Optional[str] = None + + def set_screenshot_callback(self, callback: Callable) -> None: + """Set the screenshot callback.""" + self._screenshot_callback = callback + + def set_action_callback(self, callback: Callable) -> None: + """Set the action callback.""" + self._action_callback = callback + + def update_screenshot(self, screenshot: str) -> None: + """Update the stored screenshot (base64 string).""" + self._last_screenshot = screenshot + + async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: + """Get the current environment type.""" + return self._environment # type: ignore + + async def get_dimensions(self) -> tuple[int, int]: + """Get screen dimensions as (width, height).""" + return self._dimensions + + async def screenshot(self) -> str: + """Take a screenshot and return as base64 string.""" + if self._screenshot_callback: + screenshot = await self._screenshot_callback() + if isinstance(screenshot, str): + self._last_screenshot = screenshot + return screenshot + elif isinstance(screenshot, Image.Image): + # Convert PIL Image to base64 + buffer = BytesIO() + screenshot.save(buffer, format="PNG") + screenshot_b64 = base64.b64encode(buffer.getvalue()).decode() + self._last_screenshot = screenshot_b64 + return screenshot_b64 + elif isinstance(screenshot, bytes): + screenshot_b64 = base64.b64encode(screenshot).decode() + self._last_screenshot = screenshot_b64 + return screenshot_b64 + + # Return last screenshot if available, otherwise create a blank one + if self._last_screenshot: + return self._last_screenshot + + # Create a blank screenshot as fallback + blank_image = Image.new('RGB', self._dimensions, color='white') + buffer = BytesIO() + blank_image.save(buffer, format="PNG") + screenshot_b64 = base64.b64encode(buffer.getvalue()).decode() + self._last_screenshot = screenshot_b64 + return screenshot_b64 + + async def click(self, x: int, y: int, button: str = "left") -> None: + """Click at coordinates with specified button.""" + if self._action_callback: + await self._action_callback({ + "type": "click", + "x": x, + "y": y, + "button": button + }) + + async def double_click(self, x: int, y: int) -> None: + """Double click at coordinates.""" + if self._action_callback: + await self._action_callback({ + "type": "double_click", + "x": x, + "y": y + }) + + async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + """Scroll at coordinates with specified scroll amounts.""" + if self._action_callback: + await self._action_callback({ + "type": "scroll", + "x": x, + "y": y, + "scroll_x": scroll_x, + "scroll_y": scroll_y + }) + + async def type(self, text: str) -> None: + """Type text.""" + if self._action_callback: + await self._action_callback({ + "type": "type", + "text": text + }) + + async def wait(self, ms: int = 1000) -> None: + """Wait for specified milliseconds.""" + if self._action_callback: + await self._action_callback({ + "type": "wait", + "ms": ms + }) + + async def move(self, x: int, y: int) -> None: + """Move cursor to coordinates.""" + if self._action_callback: + await self._action_callback({ + "type": "move", + "x": x, + "y": y + }) + + async def keypress(self, keys: list[str] | str) -> None: + """Press key combination.""" + if isinstance(keys, str): + keys = [keys] + if self._action_callback: + await self._action_callback({ + "type": "keypress", + "keys": keys + }) + + async def drag(self, path: list[dict[str, int]]) -> None: + """Drag along a path of points.""" + if self._action_callback: + await self._action_callback({ + "type": "drag", + "path": path + }) + + async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None: + """Left mouse down at coordinates.""" + if self._action_callback: + await self._action_callback({ + "type": "left_mouse_down", + "x": x, + "y": y + }) + + async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None: + """Left mouse up at coordinates.""" + if self._action_callback: + await self._action_callback({ + "type": "left_mouse_up", + "x": x, + "y": y + }) + + async def get_current_url(self) -> str: + """Get the current URL.""" + if self._action_callback: + return await self._action_callback({ + "type": "get_current_url" + }) + return "" \ No newline at end of file From f819c578b7ad590d912e385f1118cdd000b115d6 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 13:14:56 -0400 Subject: [PATCH 42/76] Add example notebook --- notebooks/eval_osworld.ipynb | 320 +++++++++++++++++++++++++++++++++++ 1 file changed, 320 insertions(+) create mode 100644 notebooks/eval_osworld.ipynb diff --git a/notebooks/eval_osworld.ipynb b/notebooks/eval_osworld.ipynb new file mode 100644 index 00000000..3d89464d --- /dev/null +++ b/notebooks/eval_osworld.ipynb @@ -0,0 +1,320 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ComputerAgent HUD Integration for OSWorld\n", + "\n", + "This notebook demonstrates how to use the ComputerAgent with HUD for OSWorld benchmarking.\n", + "The ComputerAgent integration provides the same interface as OperatorAgent but works with both Claude and OpenAI models." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Install dependencies if needed\n", + "# !uv venv \n", + "# !source .venv/bin/activate\n", + "# !uv sync" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Required environment variables:\n", + "# - HUD_API_KEY (for HUD access)\n", + "# - ANTHROPIC_API_KEY (for Claude models)\n", + "# - OPENAI_API_KEY (for OpenAI models)\n", + "\n", + "from hud import gym, load_taskset\n", + "from pprint import pprint\n", + "import asyncio" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/dillondupont/cua-clean/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "# Import the HUD-integrated ComputerAgent\n", + "from agent.integrations.hud import ComputerAgent" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total tasks in OSWorld: 367\n", + "Task prompt: Make the background color of slide 2 same as the color of its title.\n" + ] + } + ], + "source": [ + "# Load OSWorld taskset\n", + "taskset = await load_taskset(\"OSWorld-Verified\")\n", + "print(f\"Total tasks in OSWorld: {len(taskset)}\")\n", + "\n", + "# Select a test task\n", + "test = taskset[144]\n", + "print(f\"Task prompt: {test.prompt}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[ERROR] 2025-08-08 12:42:12,634 | hud.exceptions | HTTP error from HUD SDK: Request failed: Environment is in error state, cannot invoke functions | URL: https://orchestration.hud.so/hud-gym/api/v2/environments/525ea26c-096d-41bc-b968-54c62a7f1b9d/invoke | Status: 400 | Response: {\"detail\":\"Environment is in error state, cannot invoke functions\"}\n" + ] + }, + { + "ename": "GymMakeException", + "evalue": "Failed to create environment | Data: {'gym_name': 'OSWorld-Ubuntu', 'environment_prompt': None, 'exception': 'Request failed: Environment is in error state, cannot invoke functions | Status: 400 | Response Text: {\"detail\":\"Environment is in error state, cannot invoke functions\"} | Response JSON: {\\'detail\\': \\'Environment is in error state, cannot invoke functions\\'} | Headers: {\\'content-length\\': \\'67\\', \\'content-type\\': \\'application/json\\', \\'date\\': \\'Fri, 08 Aug 2025 16:42:11 GMT\\', \\'server\\': \\'railway-edge\\', \\'x-railway-edge\\': \\'railway/us-east4\\', \\'x-railway-request-id\\': \\'cH9FJpMKQIGTcIome6l53A\\'}'}", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mHudRequestError\u001b[39m Traceback (most recent call last)", + "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/gym.py:135\u001b[39m, in \u001b[36mmake\u001b[39m\u001b[34m(env_src, job, job_id, metadata)\u001b[39m\n\u001b[32m 134\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m task:\n\u001b[32m--> \u001b[39m\u001b[32m135\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m environment._setup()\n\u001b[32m 136\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m environment\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/env/environment.py:84\u001b[39m, in \u001b[36mEnvironment._setup\u001b[39m\u001b[34m(self, config)\u001b[39m\n\u001b[32m 83\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m.client, RemoteClient):\n\u001b[32m---> \u001b[39m\u001b[32m84\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m.get_urls()\n\u001b[32m 86\u001b[39m setup_request = SetupRequest()\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/env/environment.py:221\u001b[39m, in \u001b[36mEnvironment.get_urls\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 216\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Get URLs for the environment.\u001b[39;00m\n\u001b[32m 217\u001b[39m \n\u001b[32m 218\u001b[39m \u001b[33;03mReturns:\u001b[39;00m\n\u001b[32m 219\u001b[39m \u001b[33;03m dict: Dictionary of URLs for accessing the environment\u001b[39;00m\n\u001b[32m 220\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m221\u001b[39m data, _, _ = \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m.client.invoke(FunctionConfig(function=\u001b[33m\"\u001b[39m\u001b[33mget_urls\u001b[39m\u001b[33m\"\u001b[39m, args=[]))\n\u001b[32m 223\u001b[39m \u001b[38;5;28mself\u001b[39m.url = data.get(\u001b[33m\"\u001b[39m\u001b[33murl\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/env/remote_client.py:184\u001b[39m, in \u001b[36mRemoteClient.invoke\u001b[39m\u001b[34m(self, config)\u001b[39m\n\u001b[32m 181\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 182\u001b[39m \u001b[33;03mInvoke a function in the environment.\u001b[39;00m\n\u001b[32m 183\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m184\u001b[39m data = \u001b[38;5;28;01mawait\u001b[39;00m make_request(\n\u001b[32m 185\u001b[39m method=\u001b[33m\"\u001b[39m\u001b[33mPOST\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 186\u001b[39m url=\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msettings.base_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/v2/environments/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.env_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/invoke\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 187\u001b[39m json=config.model_dump(),\n\u001b[32m 188\u001b[39m api_key=settings.api_key,\n\u001b[32m 189\u001b[39m )\n\u001b[32m 191\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m data[\u001b[33m\"\u001b[39m\u001b[33mresult\u001b[39m\u001b[33m\"\u001b[39m], b64decode(data[\u001b[33m\"\u001b[39m\u001b[33mstdout\u001b[39m\u001b[33m\"\u001b[39m]), b64decode(data[\u001b[33m\"\u001b[39m\u001b[33mstderr\u001b[39m\u001b[33m\"\u001b[39m])\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/server/requests.py:135\u001b[39m, in \u001b[36mmake_request\u001b[39m\u001b[34m(method, url, json, api_key, max_retries, retry_delay, client)\u001b[39m\n\u001b[32m 134\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m httpx.HTTPStatusError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m--> \u001b[39m\u001b[32m135\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m HudRequestError.from_httpx_error(e) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 136\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m httpx.RequestError \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "\u001b[31mHudRequestError\u001b[39m: Request failed: Environment is in error state, cannot invoke functions | Status: 400 | Response Text: {\"detail\":\"Environment is in error state, cannot invoke functions\"} | Response JSON: {'detail': 'Environment is in error state, cannot invoke functions'} | Headers: {'content-length': '67', 'content-type': 'application/json', 'date': 'Fri, 08 Aug 2025 16:42:11 GMT', 'server': 'railway-edge', 'x-railway-edge': 'railway/us-east4', 'x-railway-request-id': 'cH9FJpMKQIGTcIome6l53A'}", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[31mGymMakeException\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# Create environment (takes ~2.5 minutes to start)\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m env = \u001b[38;5;28;01mawait\u001b[39;00m gym.make(test)\n\u001b[32m 3\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mEnvironment ready!\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/gym.py:139\u001b[39m, in \u001b[36mmake\u001b[39m\u001b[34m(env_src, job, job_id, metadata)\u001b[39m\n\u001b[32m 137\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 138\u001b[39m build_data[\u001b[33m\"\u001b[39m\u001b[33mexception\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28mstr\u001b[39m(e)\n\u001b[32m--> \u001b[39m\u001b[32m139\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m GymMakeException(\u001b[33m\"\u001b[39m\u001b[33mFailed to create environment\u001b[39m\u001b[33m\"\u001b[39m, build_data) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01me\u001b[39;00m\n", + "\u001b[31mGymMakeException\u001b[39m: Failed to create environment | Data: {'gym_name': 'OSWorld-Ubuntu', 'environment_prompt': None, 'exception': 'Request failed: Environment is in error state, cannot invoke functions | Status: 400 | Response Text: {\"detail\":\"Environment is in error state, cannot invoke functions\"} | Response JSON: {\\'detail\\': \\'Environment is in error state, cannot invoke functions\\'} | Headers: {\\'content-length\\': \\'67\\', \\'content-type\\': \\'application/json\\', \\'date\\': \\'Fri, 08 Aug 2025 16:42:11 GMT\\', \\'server\\': \\'railway-edge\\', \\'x-railway-edge\\': \\'railway/us-east4\\', \\'x-railway-request-id\\': \\'cH9FJpMKQIGTcIome6l53A\\'}'}" + ] + } + ], + "source": [ + "# Create environment (takes ~2.5 minutes to start)\n", + "env = await gym.make(test)\n", + "print(\"Environment ready!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test with Claude Model\n", + "\n", + "The ComputerAgent can use Claude models just like the original ClaudeAgent:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create ComputerAgent with Claude\n", + "claude_agent = ComputerAgent(\n", + " model=\"anthropic/claude-3-5-sonnet-20241022\",\n", + " environment=\"linux\", # OSWorld typically uses Linux\n", + ")\n", + "\n", + "print(f\"Created Claude agent: {claude_agent.name}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initial observation\n", + "obs, _ = await env.reset()\n", + "print(\"Initial observation complete\")\n", + "\n", + "# Agent loop with Claude\n", + "for i in range(8):\n", + " print(f\"========= Step {i + 1} ==========\")\n", + " \n", + " try:\n", + " action, done = await claude_agent.predict(obs)\n", + " print(f\"Agent's action: {action}\")\n", + "\n", + " obs, reward, terminated, info = await env.step(action)\n", + "\n", + " if done or terminated:\n", + " print(f\"Task completed after {i + 1} steps\")\n", + " break\n", + " \n", + " except Exception as e:\n", + " print(f\"Error in step {i + 1}: {e}\")\n", + " break" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test with OpenAI Model\n", + "\n", + "The same ComputerAgent can also use OpenAI models:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Reset environment for OpenAI test\n", + "await env.reset()\n", + "\n", + "# Create ComputerAgent with OpenAI\n", + "openai_agent = ComputerAgent(\n", + " model=\"openai/computer-use-preview\",\n", + " environment=\"linux\",\n", + ")\n", + "\n", + "print(f\"Created OpenAI agent: {openai_agent.name}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initial observation\n", + "obs, _ = await env.reset()\n", + "print(\"Initial observation complete\")\n", + "\n", + "# Agent loop with OpenAI\n", + "for i in range(8):\n", + " print(f\"========= Step {i + 1} ==========\")\n", + " \n", + " try:\n", + " action, done = await openai_agent.predict(obs)\n", + " print(f\"Agent's action: {action}\")\n", + "\n", + " obs, reward, terminated, info = await env.step(action)\n", + "\n", + " if done or terminated:\n", + " print(f\"Task completed after {i + 1} steps\")\n", + " break\n", + " \n", + " except Exception as e:\n", + " print(f\"Error in step {i + 1}: {e}\")\n", + " break" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate Results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Evaluate environment state\n", + "result = await env.evaluate()\n", + "print(\"=== Final Evaluation ===\")\n", + "pprint(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Clean up\n", + "await env.close()\n", + "print(\"Environment closed\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Comparison with Original Agents\n", + "\n", + "The ComputerAgent provides the same interface as ClaudeAgent and OperatorAgent:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compare with original HUD agents\n", + "from hud.agent import ClaudeAgent, OperatorAgent\n", + "\n", + "# Original agents\n", + "original_claude = ClaudeAgent()\n", + "original_operator = OperatorAgent(environment=\"linux\")\n", + "\n", + "# ComputerAgent versions\n", + "computer_claude = ComputerAgent(model=\"anthropic/claude-3-5-sonnet-20241022\", environment=\"linux\")\n", + "computer_openai = ComputerAgent(model=\"openai/computer-use-preview\", environment=\"linux\")\n", + "\n", + "print(\"Original agents:\")\n", + "print(f\" ClaudeAgent: {original_claude.name}\")\n", + "print(f\" OperatorAgent: {original_operator.name}\")\n", + "print(\"\\nComputerAgent versions:\")\n", + "print(f\" ComputerAgent (Claude): {computer_claude.name}\")\n", + "print(f\" ComputerAgent (OpenAI): {computer_openai.name}\")\n", + "\n", + "print(\"\\nAll agents have the same interface and can be used interchangeably!\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 8f15c21df96f9576f0979504db412ea514a2926b Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 18:15:56 -0400 Subject: [PATCH 43/76] added run_job --- .../agent-sdk/benchmarks/osworld-verified.mdx | 33 ++--- .../docs/agent-sdk/integrations/hud.mdx | 36 ++--- .../agent/agent/integrations/hud/__init__.py | 34 ++++- .../agent/agent/integrations/hud/agent.py | 131 +++++++++++------- 4 files changed, 144 insertions(+), 90 deletions(-) diff --git a/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx b/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx index 5284b11b..1bfc79f2 100644 --- a/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx +++ b/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx @@ -63,26 +63,23 @@ print(f"Success: {result.get('success', False)}") Run all tasks in parallel using `run_job`: ```python -from hud import run_job -from agent.integrations.hud import ComputerAgent -import logging +from agent.integrations.hud import run_job +from hud import load_taskset -logging.basicConfig(level=logging.INFO) +# Load taskset +taskset = await load_taskset("SheetBench-V2") -# Load full taskset -taskset = await load_taskset("OSWorld-Verified") - -# Run parallel job +# Run benchmark job job = await run_job( - ComputerAgent, - taskset, - "osworld-computeragent", - max_steps_per_task=100, - max_concurrent_tasks=20, - auto_reply_question=True, - agent_kwargs={"model": "anthropic/claude-3-5-sonnet-20241022"} + model="anthropic/claude-3-5-sonnet-20241022", + task_or_taskset=taskset, + job_name="test-computeragent-job", + # Any extra ComputerAgent kwargs: + # verbosity=logging.INFO, # Enable logging + # trajectory_dir=".." # Save trajectories locally ) -# Get analytics -analytics = await job.get_analytics() -``` +# Get results OR view them at app.hud.so +print(await job.get_analytics()) +print(f"View results at: https://app.hud.so/jobs/{job.id}") +``` \ No newline at end of file diff --git a/docs/content/docs/agent-sdk/integrations/hud.mdx b/docs/content/docs/agent-sdk/integrations/hud.mdx index 786e45b5..114c4c92 100644 --- a/docs/content/docs/agent-sdk/integrations/hud.mdx +++ b/docs/content/docs/agent-sdk/integrations/hud.mdx @@ -16,28 +16,28 @@ pip install "cua-agent[hud]" ## Usage ```python -from agent.integrations.hud import ComputerAgent +from agent.integrations.hud import run_job +from hud import load_taskset -# Create agent with any ComputerAgent model -agent = ComputerAgent( - model="anthropic/claude-3-5-sonnet-20241022", # or any model string - environment="linux" +# Load taskset +taskset = await load_taskset("OSWorld-Verified") + +# Run benchmark job +job = await run_job( + model="anthropic/claude-3-5-sonnet-20241022", + task_or_taskset=taskset, + job_name="test-computeragent-job", + # Any extra ComputerAgent kwargs: + # verbosity=logging.INFO, # Enable logging + # trajectory_dir=".." # Save trajectories locally ) -# Use exactly like other HUD agents -action, done = await agent.predict(observation) +# Get results OR view them at app.hud.so +print(await job.get_analytics()) +print(f"View results at: https://app.hud.so/jobs/{job.id}") ``` -## Environment Variables - -Set these environment variables: - -- `HUD_API_KEY` - Your HUD API key -- `ANTHROPIC_API_KEY` - For Claude models -- `OPENAI_API_KEY` - For OpenAI models - -## Example Benchmarks - -1. [OSWorld-Verified](/agent-sdk/benchmarks/osworld-verified) - Benchmark on OSWorld tasks with parallel execution +**Available Benchmarks:** +1. [OSWorld-Verified](/agent-sdk/benchmarks/osworld-verified) - Benchmark on OSWorld tasks See the [HUD docs](https://docs.hud.so/environment-creation) for more eval environments. \ No newline at end of file diff --git a/libs/python/agent/agent/integrations/hud/__init__.py b/libs/python/agent/agent/integrations/hud/__init__.py index 6459048d..993a3a76 100644 --- a/libs/python/agent/agent/integrations/hud/__init__.py +++ b/libs/python/agent/agent/integrations/hud/__init__.py @@ -1,7 +1,39 @@ """HUD integration for ComputerAgent.""" +from typing import Any, Optional, Dict +from hud import run_job as hud_run_job + from .agent import ComputerAgent from .adapter import ComputerAgentAdapter from .computer_handler import HUDComputerHandler -__all__ = ["ComputerAgent", "ComputerAgentAdapter", "HUDComputerHandler"] + +async def run_job( + model: str, + task_or_taskset: Any, + job_name: str, + job_kwargs: Optional[Dict[str, Any]] = None, + **agent_kwargs: Any +) -> Any: + """ + Run a job using ComputerAgent with the specified model. + + Args: + model: Model string for ComputerAgent (e.g., "anthropic/claude-3-5-sonnet-20241022") + task_or_taskset: Task or TaskSet to run + job_name: Name for the job + **agent_kwargs: Additional kwargs to pass to ComputerAgent + + Returns: + Job instance from HUD + """ + return await hud_run_job( + agent_cls=ComputerAgent, + agent_kwargs={"model": model, **agent_kwargs}, + task_or_taskset=task_or_taskset, + job_name=job_name, + **job_kwargs or {} + ) + + +__all__ = ["ComputerAgent", "ComputerAgentAdapter", "HUDComputerHandler", "run_job"] \ No newline at end of file diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py index 6f246c20..9156cf4a 100644 --- a/libs/python/agent/agent/integrations/hud/agent.py +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -16,6 +16,19 @@ from .computer_handler import HUDComputerHandler logger = logging.getLogger(__name__) +BASE_SYSTEM_PROMPT = """ +You are an autonomous computer-using agent. Follow these guidelines: + +1. Be decisive and complete tasks without asking for confirmation unless absolutely necessary. +2. If you need user confirmation for safety-critical actions, use the formal safety check mechanism. +3. Do NOT ask questions like "Should I proceed?" or "Would you like me to continue?" - just proceed with the task. +4. When you find what you're looking for (e.g., a file to upload), proceed with the action directly. +5. Only stop when the task is fully complete or if you encounter an error that prevents completion. +6. Trust that the user wants you to complete the entire task they've requested. +7. You must say "Task completed" when the task is complete. + +Remember: You have been given permission to complete the requested task autonomously. +""".strip() class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): """ @@ -88,25 +101,16 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): self.initial_prompt: Optional[str] = None # System prompt for computer use tasks - self.base_system_prompt = """ - You are an autonomous computer-using agent. Follow these guidelines: - - 1. Be decisive and complete tasks without asking for confirmation unless absolutely necessary. - 2. If you need user confirmation for safety-critical actions, use the formal safety check mechanism. - 3. Do NOT ask questions like "Should I proceed?" or "Would you like me to continue?" - just proceed with the task. - 4. When you find what you're looking for (e.g., a file to upload), proceed with the action directly. - 5. Only stop when the task is fully complete or if you encounter an error that prevents completion. - 6. Trust that the user wants you to complete the entire task they've requested. - - Remember: You have been given permission to complete the requested task autonomously. - """ + self.base_system_prompt = BASE_SYSTEM_PROMPT async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]: """ Fetch a response from ComputerAgent based on the observation. Args: - observation: The preprocessed observation + observation: The preprocessed observation, attributes: + screenshot: Base64 encoded PNG string of the screen + text: Text observation, if available Returns: tuple[list[dict[str, Any]], bool, list[LogType] | None]: A tuple containing the list of raw actions, @@ -140,9 +144,39 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): self.conversation_history.append({"role": "user", "content": message}) else: - # Subsequent interactions - add context about the current state - message = "Continue with the task based on the current screen state." - self.conversation_history.append({"role": "user", "content": message}) + # Subsequent interactions - check if last action was computer_call + # If so, add computer_call_output with screenshot instead of user message + last_computer_calls = [] + for msg in reversed(self.conversation_history): + if msg.get("type") == "computer_call" and msg.get("status") == "completed": + call_id = msg.get("call_id") + if call_id: + # Check if this call_id already has a computer_call_output + has_output = any( + m.get("type") == "computer_call_output" and m.get("call_id") == call_id + for m in self.conversation_history + ) + if not has_output: + last_computer_calls.append(call_id) + elif msg.get("role") == "user": + # Stop at the last user message + break + + if last_computer_calls and observation.screenshot: + # Add computer_call_output for each unresponded computer_call + for call_id in reversed(last_computer_calls): # Maintain order + self.conversation_history.append({ + "type": "computer_call_output", + "call_id": call_id, + "output": { + "type": "input_image", + "image_url": f"data:image/png;base64,{observation.screenshot}" + } + }) + else: + # No computer_call found, add regular user message + message = "Continue with the task based on the current screen state." + self.conversation_history.append({"role": "user", "content": message}) # Run ComputerAgent try: @@ -150,7 +184,8 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): async for result in self.computer_agent.run(self.conversation_history, stream=False): # Update conversation history with the output self.conversation_history += result["output"] - + break + # Check if we captured any actions if captured_actions: # Extract reasoning from the conversation history @@ -171,44 +206,34 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): action["reasoning"] = reasoning action["logs"] = {"conversation_length": len(self.conversation_history)} - # Check if task is done by looking for assistant message indicating completion - done = False - for msg in reversed(self.conversation_history): - if msg.get("type") == "message" and msg.get("role") == "assistant": - content = msg.get("content", []) - for c in content: - if c.get("type") == "output_text" and "task completed" in c.get("text", "").lower(): - done = True - break - break + return captured_actions, False - return captured_actions, done - else: - # No actions captured, task is likely complete - response_text = "Task completed." - for msg in reversed(self.conversation_history): - if msg.get("type") == "message" and msg.get("role") == "assistant": - content = msg.get("content", []) - for c in content: - if c.get("type") == "output_text": - response_text = c.get("text", response_text) - break - break - - response_action = { - "type": "response", - "text": response_text, - "reasoning": response_text, - "logs": {"conversation_length": len(self.conversation_history)} - } - - # Check if this indicates task completion or failure + # Check if the last message is "Task completed" + response_text = "" + for msg in reversed(self.conversation_history): + if msg.get("type") == "message" and msg.get("role") == "assistant": + content = msg.get("content", []) + for c in content: + if c.get("type") == "output_text": + response_text = c.get("text", response_text) + break + break + + done = "task completed" in response_text.lower() + + response_action = { + "type": "response", + "text": response_text, + "reasoning": response_text, + "logs": {"conversation_length": len(self.conversation_history)} + } + + # Check if this indicates task completion or failure + if "task is infeasible" in response_text.lower(): + response_action = {"type": "custom", "action": "FAIL"} done = True - if "task is infeasible" in response_text.lower(): - response_action = {"type": "custom", "action": "FAIL"} - - return [response_action], done - + + return [response_action], done except Exception as e: logger.error(f"Error running ComputerAgent: {e}") # Return an error response From ae128a2ae4525fefdd6bdcc4ee3054a64ee9a8aa Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 18:21:48 -0400 Subject: [PATCH 44/76] added run_job's kwargs --- .../agent/agent/integrations/hud/__init__.py | 41 ++++++++++++++++++- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/libs/python/agent/agent/integrations/hud/__init__.py b/libs/python/agent/agent/integrations/hud/__init__.py index 993a3a76..9c33bc90 100644 --- a/libs/python/agent/agent/integrations/hud/__init__.py +++ b/libs/python/agent/agent/integrations/hud/__init__.py @@ -1,5 +1,6 @@ """HUD integration for ComputerAgent.""" +import logging from typing import Any, Optional, Dict from hud import run_job as hud_run_job @@ -12,7 +13,17 @@ async def run_job( model: str, task_or_taskset: Any, job_name: str, - job_kwargs: Optional[Dict[str, Any]] = None, + # Job kwargs + auto_reply_question: bool = False, + adapter_cls: Any = None, + adapter_kwargs: Optional[Dict[str, Any]] = None, + max_steps_per_task: int = 20, + run_parallel: bool = True, + job_metadata: Optional[Dict[str, Any]] = None, + show_progress: bool = True, + max_concurrent_env_creations: Optional[int] = 30, # Limits gym.make calls + max_concurrent_agent_predictions: Optional[int] = None, # No limit on LLM calls + max_concurrent_tasks: Optional[int] = 30, # Limits overall task concurrency **agent_kwargs: Any ) -> Any: """ @@ -22,17 +33,43 @@ async def run_job( model: Model string for ComputerAgent (e.g., "anthropic/claude-3-5-sonnet-20241022") task_or_taskset: Task or TaskSet to run job_name: Name for the job + auto_reply_question: Whether to auto-reply to questions + adapter_cls: Custom adapter class (defaults to ComputerAgentAdapter) + adapter_kwargs: Additional kwargs for the adapter + max_steps_per_task: Maximum steps per task + run_parallel: Whether to run tasks in parallel + job_metadata: Additional metadata for the job + show_progress: Whether to show progress + max_concurrent_env_creations: Max concurrent environment creations + max_concurrent_agent_predictions: Max concurrent agent predictions + max_concurrent_tasks: Max concurrent tasks **agent_kwargs: Additional kwargs to pass to ComputerAgent Returns: Job instance from HUD """ + # combine verbose and verbosity kwargs + if "verbose" in agent_kwargs: + agent_kwargs["verbosity"] = logging.INFO + del agent_kwargs["verbose"] + verbose = True if agent_kwargs.get("verbosity", logging.WARNING) > logging.INFO else False + # run job return await hud_run_job( agent_cls=ComputerAgent, agent_kwargs={"model": model, **agent_kwargs}, task_or_taskset=task_or_taskset, job_name=job_name, - **job_kwargs or {} + auto_reply_question=auto_reply_question, + adapter_cls=adapter_cls, + adapter_kwargs=adapter_kwargs, + max_steps_per_task=max_steps_per_task, + run_parallel=run_parallel, + job_metadata=job_metadata, + show_progress=show_progress, + verbose=verbose, + max_concurrent_env_creations=max_concurrent_env_creations, + max_concurrent_agent_predictions=max_concurrent_agent_predictions, + max_concurrent_tasks=max_concurrent_tasks ) From 5495529462585eff08676d8bdc505855bb6275fc Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 18:26:44 -0400 Subject: [PATCH 45/76] limited tasks in notebook --- .../agent-sdk/benchmarks/osworld-verified.mdx | 11 +- .../docs/agent-sdk/integrations/hud.mdx | 1 + notebooks/eval_osworld.ipynb | 257 +++++++++++------- 3 files changed, 162 insertions(+), 107 deletions(-) diff --git a/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx b/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx index 1bfc79f2..6a1022ba 100644 --- a/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx +++ b/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx @@ -65,17 +65,20 @@ Run all tasks in parallel using `run_job`: ```python from agent.integrations.hud import run_job from hud import load_taskset +import logging # Load taskset -taskset = await load_taskset("SheetBench-V2") +taskset = await load_taskset("OSWorld-Verified") +taskset = taskset[:10] # limit to 10 tasks instead of all 370 # Run benchmark job job = await run_job( - model="anthropic/claude-3-5-sonnet-20241022", + model="openai/computer-use-preview", task_or_taskset=taskset, job_name="test-computeragent-job", - # Any extra ComputerAgent kwargs: - # verbosity=logging.INFO, # Enable logging + max_concurrent_tasks=5, + # add any extra ComputerAgent kwargs: + verbosity=logging.INFO, # Enable logging # trajectory_dir=".." # Save trajectories locally ) diff --git a/docs/content/docs/agent-sdk/integrations/hud.mdx b/docs/content/docs/agent-sdk/integrations/hud.mdx index 114c4c92..2975913c 100644 --- a/docs/content/docs/agent-sdk/integrations/hud.mdx +++ b/docs/content/docs/agent-sdk/integrations/hud.mdx @@ -21,6 +21,7 @@ from hud import load_taskset # Load taskset taskset = await load_taskset("OSWorld-Verified") +taskset = taskset[:10] # limit to 10 tasks instead of all 370 # Run benchmark job job = await run_job( diff --git a/notebooks/eval_osworld.ipynb b/notebooks/eval_osworld.ipynb index 3d89464d..0d58f58e 100644 --- a/notebooks/eval_osworld.ipynb +++ b/notebooks/eval_osworld.ipynb @@ -27,6 +27,16 @@ "execution_count": 1, "metadata": {}, "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], "source": [ "# Required environment variables:\n", "# - HUD_API_KEY (for HUD access)\n", @@ -40,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -59,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -67,7 +77,7 @@ "output_type": "stream", "text": [ "Total tasks in OSWorld: 367\n", - "Task prompt: Make the background color of slide 2 same as the color of its title.\n" + "Task prompt: Can you make my computer bring back the last tab I shut down?\n" ] } ], @@ -77,7 +87,7 @@ "print(f\"Total tasks in OSWorld: {len(taskset)}\")\n", "\n", "# Select a test task\n", - "test = taskset[144]\n", + "test = taskset[148]\n", "print(f\"Task prompt: {test.prompt}\")" ] }, @@ -85,32 +95,47 @@ "cell_type": "code", "execution_count": 4, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total tasks in SheetBench: 50\n", + "Task prompt: Given the Input data, determine the ticker with the greatest correlation between volume and next day price change.\n", + "- in ANSWER tab put the Ticker in A1 and the correlation in B1\n", + " - use CORREL to determine correlation\n", + "- be sure to first sort the date by ticker z to a and then date ascending before calculating nextdaypricechange %\n", + "Correlation should be rounded to 2 decimal points\n" + ] + } + ], + "source": [ + "# Load SheetBench taskset\n", + "taskset = await load_taskset(\"SheetBench-V2\")\n", + "print(f\"Total tasks in SheetBench: {len(taskset)}\")\n", + "\n", + "# Select a test task\n", + "test = taskset[0]\n", + "print(f\"Task prompt: {test.prompt}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "[ERROR] 2025-08-08 12:42:12,634 | hud.exceptions | HTTP error from HUD SDK: Request failed: Environment is in error state, cannot invoke functions | URL: https://orchestration.hud.so/hud-gym/api/v2/environments/525ea26c-096d-41bc-b968-54c62a7f1b9d/invoke | Status: 400 | Response: {\"detail\":\"Environment is in error state, cannot invoke functions\"}\n" + "[INFO] 2025-08-08 15:16:46,133 | hud.environment | View the live trace at https://app.hud.so/trace/662fd59f-5a8d-4205-9b88-32c00d0feab0\n" ] }, { - "ename": "GymMakeException", - "evalue": "Failed to create environment | Data: {'gym_name': 'OSWorld-Ubuntu', 'environment_prompt': None, 'exception': 'Request failed: Environment is in error state, cannot invoke functions | Status: 400 | Response Text: {\"detail\":\"Environment is in error state, cannot invoke functions\"} | Response JSON: {\\'detail\\': \\'Environment is in error state, cannot invoke functions\\'} | Headers: {\\'content-length\\': \\'67\\', \\'content-type\\': \\'application/json\\', \\'date\\': \\'Fri, 08 Aug 2025 16:42:11 GMT\\', \\'server\\': \\'railway-edge\\', \\'x-railway-edge\\': \\'railway/us-east4\\', \\'x-railway-request-id\\': \\'cH9FJpMKQIGTcIome6l53A\\'}'}", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mHudRequestError\u001b[39m Traceback (most recent call last)", - "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/gym.py:135\u001b[39m, in \u001b[36mmake\u001b[39m\u001b[34m(env_src, job, job_id, metadata)\u001b[39m\n\u001b[32m 134\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m task:\n\u001b[32m--> \u001b[39m\u001b[32m135\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m environment._setup()\n\u001b[32m 136\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m environment\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/env/environment.py:84\u001b[39m, in \u001b[36mEnvironment._setup\u001b[39m\u001b[34m(self, config)\u001b[39m\n\u001b[32m 83\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m.client, RemoteClient):\n\u001b[32m---> \u001b[39m\u001b[32m84\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m.get_urls()\n\u001b[32m 86\u001b[39m setup_request = SetupRequest()\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/env/environment.py:221\u001b[39m, in \u001b[36mEnvironment.get_urls\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 216\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Get URLs for the environment.\u001b[39;00m\n\u001b[32m 217\u001b[39m \n\u001b[32m 218\u001b[39m \u001b[33;03mReturns:\u001b[39;00m\n\u001b[32m 219\u001b[39m \u001b[33;03m dict: Dictionary of URLs for accessing the environment\u001b[39;00m\n\u001b[32m 220\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m221\u001b[39m data, _, _ = \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m.client.invoke(FunctionConfig(function=\u001b[33m\"\u001b[39m\u001b[33mget_urls\u001b[39m\u001b[33m\"\u001b[39m, args=[]))\n\u001b[32m 223\u001b[39m \u001b[38;5;28mself\u001b[39m.url = data.get(\u001b[33m\"\u001b[39m\u001b[33murl\u001b[39m\u001b[33m\"\u001b[39m)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/env/remote_client.py:184\u001b[39m, in \u001b[36mRemoteClient.invoke\u001b[39m\u001b[34m(self, config)\u001b[39m\n\u001b[32m 181\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 182\u001b[39m \u001b[33;03mInvoke a function in the environment.\u001b[39;00m\n\u001b[32m 183\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m184\u001b[39m data = \u001b[38;5;28;01mawait\u001b[39;00m make_request(\n\u001b[32m 185\u001b[39m method=\u001b[33m\"\u001b[39m\u001b[33mPOST\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 186\u001b[39m url=\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msettings.base_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/v2/environments/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.env_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/invoke\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 187\u001b[39m json=config.model_dump(),\n\u001b[32m 188\u001b[39m api_key=settings.api_key,\n\u001b[32m 189\u001b[39m )\n\u001b[32m 191\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m data[\u001b[33m\"\u001b[39m\u001b[33mresult\u001b[39m\u001b[33m\"\u001b[39m], b64decode(data[\u001b[33m\"\u001b[39m\u001b[33mstdout\u001b[39m\u001b[33m\"\u001b[39m]), b64decode(data[\u001b[33m\"\u001b[39m\u001b[33mstderr\u001b[39m\u001b[33m\"\u001b[39m])\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/server/requests.py:135\u001b[39m, in \u001b[36mmake_request\u001b[39m\u001b[34m(method, url, json, api_key, max_retries, retry_delay, client)\u001b[39m\n\u001b[32m 134\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m httpx.HTTPStatusError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m--> \u001b[39m\u001b[32m135\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m HudRequestError.from_httpx_error(e) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 136\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m httpx.RequestError \u001b[38;5;28;01mas\u001b[39;00m e:\n", - "\u001b[31mHudRequestError\u001b[39m: Request failed: Environment is in error state, cannot invoke functions | Status: 400 | Response Text: {\"detail\":\"Environment is in error state, cannot invoke functions\"} | Response JSON: {'detail': 'Environment is in error state, cannot invoke functions'} | Headers: {'content-length': '67', 'content-type': 'application/json', 'date': 'Fri, 08 Aug 2025 16:42:11 GMT', 'server': 'railway-edge', 'x-railway-edge': 'railway/us-east4', 'x-railway-request-id': 'cH9FJpMKQIGTcIome6l53A'}", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[31mGymMakeException\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# Create environment (takes ~2.5 minutes to start)\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m env = \u001b[38;5;28;01mawait\u001b[39;00m gym.make(test)\n\u001b[32m 3\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mEnvironment ready!\u001b[39m\u001b[33m\"\u001b[39m)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/cua-clean/.venv/lib/python3.12/site-packages/hud/gym.py:139\u001b[39m, in \u001b[36mmake\u001b[39m\u001b[34m(env_src, job, job_id, metadata)\u001b[39m\n\u001b[32m 137\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 138\u001b[39m build_data[\u001b[33m\"\u001b[39m\u001b[33mexception\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28mstr\u001b[39m(e)\n\u001b[32m--> \u001b[39m\u001b[32m139\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m GymMakeException(\u001b[33m\"\u001b[39m\u001b[33mFailed to create environment\u001b[39m\u001b[33m\"\u001b[39m, build_data) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01me\u001b[39;00m\n", - "\u001b[31mGymMakeException\u001b[39m: Failed to create environment | Data: {'gym_name': 'OSWorld-Ubuntu', 'environment_prompt': None, 'exception': 'Request failed: Environment is in error state, cannot invoke functions | Status: 400 | Response Text: {\"detail\":\"Environment is in error state, cannot invoke functions\"} | Response JSON: {\\'detail\\': \\'Environment is in error state, cannot invoke functions\\'} | Headers: {\\'content-length\\': \\'67\\', \\'content-type\\': \\'application/json\\', \\'date\\': \\'Fri, 08 Aug 2025 16:42:11 GMT\\', \\'server\\': \\'railway-edge\\', \\'x-railway-edge\\': \\'railway/us-east4\\', \\'x-railway-request-id\\': \\'cH9FJpMKQIGTcIome6l53A\\'}'}" + "name": "stdout", + "output_type": "stream", + "text": [ + "Environment ready!\n" ] } ], @@ -120,6 +145,45 @@ "print(\"Environment ready!\")" ] }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'\\n
\\n
\\n \\n
\\n
\\n '" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await env.stream() # vnc" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -131,14 +195,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created Claude agent: computeragent-claude-3-5-sonnet-20241022\n" + ] + } + ], "source": [ + "import logging\n", "# Create ComputerAgent with Claude\n", "claude_agent = ComputerAgent(\n", " model=\"anthropic/claude-3-5-sonnet-20241022\",\n", " environment=\"linux\", # OSWorld typically uses Linux\n", + " verbosity=logging.INFO,\n", ")\n", "\n", "print(f\"Created Claude agent: {claude_agent.name}\")" @@ -146,9 +220,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initial observation complete\n", + "========= Step 1 ==========\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-08 15:17:04,030 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Agent's action: [ResponseAction(type='response', reasoning='I\\'ll help you complete this task step by step, but I notice that I don\\'t have any input data or access to Excel through the available functions. The only function I have access to is the \"computer\" function which allows for basic desktop interaction.\\n\\nTo properly assist you, I would need:\\n1. The actual input data you want to analyze\\n2. Access to Excel or another spreadsheet tool to perform the calculations\\n\\nCould you please provide the input data and confirm if there\\'s a specific way to access Excel or the data file on this system?\\n\\nOnce provided, I can help calculate correlations between volume and next day price changes, sort the data as specified, and format the results according to your requirements.', logs={'conversation_length': 2}, text='I\\'ll help you complete this task step by step, but I notice that I don\\'t have any input data or access to Excel through the available functions. The only function I have access to is the \"computer\" function which allows for basic desktop interaction.\\n\\nTo properly assist you, I would need:\\n1. The actual input data you want to analyze\\n2. Access to Excel or another spreadsheet tool to perform the calculations\\n\\nCould you please provide the input data and confirm if there\\'s a specific way to access Excel or the data file on this system?\\n\\nOnce provided, I can help calculate correlations between volume and next day price changes, sort the data as specified, and format the results according to your requirements.')]\n", + "Task completed after 1 steps\n" + ] + } + ], "source": [ "# Initial observation\n", "obs, _ = await env.reset()\n", @@ -173,62 +271,6 @@ " break" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Test with OpenAI Model\n", - "\n", - "The same ComputerAgent can also use OpenAI models:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Reset environment for OpenAI test\n", - "await env.reset()\n", - "\n", - "# Create ComputerAgent with OpenAI\n", - "openai_agent = ComputerAgent(\n", - " model=\"openai/computer-use-preview\",\n", - " environment=\"linux\",\n", - ")\n", - "\n", - "print(f\"Created OpenAI agent: {openai_agent.name}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Initial observation\n", - "obs, _ = await env.reset()\n", - "print(\"Initial observation complete\")\n", - "\n", - "# Agent loop with OpenAI\n", - "for i in range(8):\n", - " print(f\"========= Step {i + 1} ==========\")\n", - " \n", - " try:\n", - " action, done = await openai_agent.predict(obs)\n", - " print(f\"Agent's action: {action}\")\n", - "\n", - " obs, reward, terminated, info = await env.step(action)\n", - "\n", - " if done or terminated:\n", - " print(f\"Task completed after {i + 1} steps\")\n", - " break\n", - " \n", - " except Exception as e:\n", - " print(f\"Error in step {i + 1}: {e}\")\n", - " break" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -250,9 +292,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Environment closed\n" + ] + } + ], "source": [ "# Clean up\n", "await env.close()\n", @@ -263,9 +313,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Comparison with Original Agents\n", - "\n", - "The ComputerAgent provides the same interface as ClaudeAgent and OperatorAgent:" + "## Run OSWorld-Verified in parallel" ] }, { @@ -274,25 +322,28 @@ "metadata": {}, "outputs": [], "source": [ - "# Compare with original HUD agents\n", - "from hud.agent import ClaudeAgent, OperatorAgent\n", + "from agent.integrations.hud import run_job\n", + "from hud import load_taskset\n", + "import logging\n", "\n", - "# Original agents\n", - "original_claude = ClaudeAgent()\n", - "original_operator = OperatorAgent(environment=\"linux\")\n", + "# Load taskset\n", + "taskset = await load_taskset(\"OSWorld-Verified\")\n", + "taskset = taskset[:10] # limit to 10 tasks instead of all 370\n", "\n", - "# ComputerAgent versions\n", - "computer_claude = ComputerAgent(model=\"anthropic/claude-3-5-sonnet-20241022\", environment=\"linux\")\n", - "computer_openai = ComputerAgent(model=\"openai/computer-use-preview\", environment=\"linux\")\n", + "# Run benchmark job\n", + "job = await run_job(\n", + " model=\"openai/computer-use-preview\",\n", + " task_or_taskset=taskset,\n", + " job_name=\"test-computeragent-job\",\n", + " max_concurrent_tasks=5,\n", + " # add any extra ComputerAgent kwargs:\n", + " verbosity=logging.INFO, # Enable logging\n", + " # trajectory_dir=\"..\" # Save trajectories locally\n", + ")\n", "\n", - "print(\"Original agents:\")\n", - "print(f\" ClaudeAgent: {original_claude.name}\")\n", - "print(f\" OperatorAgent: {original_operator.name}\")\n", - "print(\"\\nComputerAgent versions:\")\n", - "print(f\" ComputerAgent (Claude): {computer_claude.name}\")\n", - "print(f\" ComputerAgent (OpenAI): {computer_openai.name}\")\n", - "\n", - "print(\"\\nAll agents have the same interface and can be used interchangeably!\")" + "# Get results OR view them at app.hud.so\n", + "print(await job.get_analytics())\n", + "print(f\"View results at: https://app.hud.so/jobs/{job.id}\")" ] } ], From 9685833428dd0add2f35d9cf792a91f3f687dd6d Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 18:32:39 -0400 Subject: [PATCH 46/76] updated docs --- .../docs/agent-sdk/benchmarks/osworld-verified.mdx | 3 ++- docs/content/docs/agent-sdk/integrations/hud.mdx | 11 +++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx b/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx index 6a1022ba..8d82b205 100644 --- a/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx +++ b/docs/content/docs/agent-sdk/benchmarks/osworld-verified.mdx @@ -65,11 +65,12 @@ Run all tasks in parallel using `run_job`: ```python from agent.integrations.hud import run_job from hud import load_taskset +from hud.taskset import TaskSet import logging # Load taskset taskset = await load_taskset("OSWorld-Verified") -taskset = taskset[:10] # limit to 10 tasks instead of all 370 +taskset = TaskSet(tasks=taskset[:10]) # limit to 10 tasks instead of all 370 # Run benchmark job job = await run_job( diff --git a/docs/content/docs/agent-sdk/integrations/hud.mdx b/docs/content/docs/agent-sdk/integrations/hud.mdx index 2975913c..cebd36be 100644 --- a/docs/content/docs/agent-sdk/integrations/hud.mdx +++ b/docs/content/docs/agent-sdk/integrations/hud.mdx @@ -18,18 +18,21 @@ pip install "cua-agent[hud]" ```python from agent.integrations.hud import run_job from hud import load_taskset +from hud.taskset import TaskSet +import logging # Load taskset taskset = await load_taskset("OSWorld-Verified") -taskset = taskset[:10] # limit to 10 tasks instead of all 370 +taskset = TaskSet(tasks=taskset[:10]) # limit to 10 tasks instead of all 370 # Run benchmark job job = await run_job( - model="anthropic/claude-3-5-sonnet-20241022", + model="openai/computer-use-preview", task_or_taskset=taskset, job_name="test-computeragent-job", - # Any extra ComputerAgent kwargs: - # verbosity=logging.INFO, # Enable logging + max_concurrent_tasks=5, + # add any extra ComputerAgent kwargs: + verbosity=logging.INFO, # Enable logging # trajectory_dir=".." # Save trajectories locally ) From f45f6b84e94f69abc59b98fdd8ce014698bd366e Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 18:36:01 -0400 Subject: [PATCH 47/76] added gpt-5 + gpta1 examples --- .../docs/agent-sdk/integrations/hud.mdx | 2 ++ .../supported-agents/composed-agents.mdx | 30 +++++++++---------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/docs/content/docs/agent-sdk/integrations/hud.mdx b/docs/content/docs/agent-sdk/integrations/hud.mdx index cebd36be..b517121e 100644 --- a/docs/content/docs/agent-sdk/integrations/hud.mdx +++ b/docs/content/docs/agent-sdk/integrations/hud.mdx @@ -28,6 +28,8 @@ taskset = TaskSet(tasks=taskset[:10]) # limit to 10 tasks instead of all 370 # Run benchmark job job = await run_job( model="openai/computer-use-preview", + # model="anthropic/claude-3-5-sonnet-20241022", + # model="huggingface-local/HelloKKMe/GTA1-7B+openai/gpt-5", task_or_taskset=taskset, job_name="test-computeragent-job", max_concurrent_tasks=5, diff --git a/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx b/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx index 50160fd8..8040d2e5 100644 --- a/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx +++ b/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx @@ -28,12 +28,26 @@ Any model that supports `predict_click()` can be used as the grounding component Any vision-enabled LiteLLM-compatible model can be used as the thinking component: - **Anthropic**: `anthropic/claude-3-5-sonnet-20241022`, `anthropic/claude-3-opus-20240229` -- **OpenAI**: `openai/gpt-4o`, `openai/gpt-4-vision-preview` +- **OpenAI**: `openai/gpt-5`, `openai/gpt-o3`, `openai/gpt-4o` - **Google**: `gemini/gemini-1.5-pro`, `vertex_ai/gemini-pro-vision` - **Local models**: Any Hugging Face vision-language model ## Usage Examples +### GTA1 + GPT-5 + +Use Google's Gemini for planning with specialized grounding: + +```python +agent = ComputerAgent( + "huggingface-local/HelloKKMe/GTA1-7B+openai/gpt-5", + tools=[computer] +) + +async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"): + pass +``` + ### GTA1 + Claude 3.5 Sonnet Combine state-of-the-art grounding with powerful reasoning: @@ -51,20 +65,6 @@ async for _ in agent.run("Open Firefox, navigate to github.com, and search for ' # - GTA1-7B provides precise click coordinates for each UI element ``` -### GTA1 + Gemini Pro - -Use Google's Gemini for planning with specialized grounding: - -```python -agent = ComputerAgent( - "huggingface-local/HelloKKMe/GTA1-7B+gemini/gemini-1.5-pro", - tools=[computer] -) - -async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"): - pass -``` - ### UI-TARS + GPT-4o Combine two different vision models for enhanced capabilities: From fa888361321efcc9fea2ae493da44ae140ba7449 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 18:45:53 -0400 Subject: [PATCH 48/76] fixed missing screenshots in agent --- .../agent/agent/integrations/hud/agent.py | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py index 9156cf4a..8ade909c 100644 --- a/libs/python/agent/agent/integrations/hud/agent.py +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -142,7 +142,20 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): else: message = f"{self.base_system_prompt}\n\nPlease analyze the current screen and determine what action to take." - self.conversation_history.append({"role": "user", "content": message}) + input_content = [ + {"type": "input_text", "text": message} + ] + + # Add screenshot if present + if observation.screenshot: + input_content.append( + { + "type": "input_image", + "image_url": f"data:image/png;base64,{observation.screenshot}", + } + ) + + self.conversation_history.append({"role": "user", "content": input_content}) else: # Subsequent interactions - check if last action was computer_call # If so, add computer_call_output with screenshot instead of user message @@ -176,7 +189,20 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): else: # No computer_call found, add regular user message message = "Continue with the task based on the current screen state." - self.conversation_history.append({"role": "user", "content": message}) + input_content = [ + {"type": "input_text", "text": message} + ] + + # Add screenshot if present + if observation.screenshot: + input_content.append( + { + "type": "input_image", + "image_url": f"data:image/png;base64,{observation.screenshot}", + } + ) + + self.conversation_history.append({"role": "user", "content": input_content}) # Run ComputerAgent try: From 6d42c5d9392b829d8316d355b1dc2d043f4a39e2 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 18:50:12 -0400 Subject: [PATCH 49/76] ensure screenshot / output call exists --- libs/python/agent/agent/integrations/hud/agent.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py index 8ade909c..7740e9c8 100644 --- a/libs/python/agent/agent/integrations/hud/agent.py +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -175,7 +175,8 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): # Stop at the last user message break - if last_computer_calls and observation.screenshot: + if last_computer_calls: + screenshot_b64 = await self.hud_computer.screenshot() # Add computer_call_output for each unresponded computer_call for call_id in reversed(last_computer_calls): # Maintain order self.conversation_history.append({ @@ -183,7 +184,7 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): "call_id": call_id, "output": { "type": "input_image", - "image_url": f"data:image/png;base64,{observation.screenshot}" + "image_url": f"data:image/png;base64,{screenshot_b64}" } }) else: From b23cac9e8bfcfa0a6de148f3400db6ec5ad7ddb0 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 19:01:08 -0400 Subject: [PATCH 50/76] improved trajectory saving with run_job --- .../agent/agent/callbacks/trajectory_saver.py | 61 +++++++++++-------- .../agent/agent/integrations/hud/__init__.py | 11 ++++ 2 files changed, 46 insertions(+), 26 deletions(-) diff --git a/libs/python/agent/agent/callbacks/trajectory_saver.py b/libs/python/agent/agent/callbacks/trajectory_saver.py index b59563d5..805b535d 100644 --- a/libs/python/agent/agent/callbacks/trajectory_saver.py +++ b/libs/python/agent/agent/callbacks/trajectory_saver.py @@ -51,12 +51,14 @@ class TrajectorySaverCallback(AsyncCallbackHandler): within the trajectory gets its own folder with screenshots and responses. """ - def __init__(self, trajectory_dir: str): + def __init__(self, trajectory_dir: str, reset_on_run: bool = True): """ Initialize trajectory saver. Args: trajectory_dir: Base directory to save trajectories + reset_on_run: If True, reset trajectory_id/turn/artifact on each run. + If False, continue using existing trajectory_id if set. """ self.trajectory_dir = Path(trajectory_dir) self.trajectory_id: Optional[str] = None @@ -64,6 +66,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler): self.current_artifact: int = 0 self.model: Optional[str] = None self.total_usage: Dict[str, Any] = {} + self.reset_on_run = reset_on_run # Ensure trajectory directory exists self.trajectory_dir.mkdir(parents=True, exist_ok=True) @@ -113,32 +116,38 @@ class TrajectorySaverCallback(AsyncCallbackHandler): async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None: """Initialize trajectory tracking for a new run.""" model = kwargs.get("model", "unknown") - model_name_short = model.split("+")[-1].split("/")[-1].lower()[:16] - if "+" in model: - model_name_short = model.split("+")[0].lower()[:4] + "_" + model_name_short + + # Only reset trajectory state if reset_on_run is True or no trajectory exists + if self.reset_on_run or not self.trajectory_id: + model_name_short = model.split("+")[-1].split("/")[-1].lower()[:16] + if "+" in model: + model_name_short = model.split("+")[0].lower()[:4] + "_" + model_name_short - # id format: yyyy-mm-dd_model_hhmmss_uuid[:4] - now = datetime.now() - self.trajectory_id = f"{now.strftime('%Y-%m-%d')}_{model_name_short}_{now.strftime('%H%M%S')}_{str(uuid.uuid4())[:4]}" - self.current_turn = 0 - self.current_artifact = 0 - self.model = model - self.total_usage = {} - - # Create trajectory directory - trajectory_path = self.trajectory_dir / self.trajectory_id - trajectory_path.mkdir(parents=True, exist_ok=True) - - # Save trajectory metadata - metadata = { - "trajectory_id": self.trajectory_id, - "created_at": str(uuid.uuid1().time), - "status": "running", - "kwargs": kwargs, - } - - with open(trajectory_path / "metadata.json", "w") as f: - json.dump(metadata, f, indent=2) + # id format: yyyy-mm-dd_model_hhmmss_uuid[:4] + now = datetime.now() + self.trajectory_id = f"{now.strftime('%Y-%m-%d')}_{model_name_short}_{now.strftime('%H%M%S')}_{str(uuid.uuid4())[:4]}" + self.current_turn = 0 + self.current_artifact = 0 + self.model = model + self.total_usage = {} + + # Create trajectory directory + trajectory_path = self.trajectory_dir / self.trajectory_id + trajectory_path.mkdir(parents=True, exist_ok=True) + + # Save trajectory metadata + metadata = { + "trajectory_id": self.trajectory_id, + "created_at": str(uuid.uuid1().time), + "status": "running", + "kwargs": kwargs, + } + + with open(trajectory_path / "metadata.json", "w") as f: + json.dump(metadata, f, indent=2) + else: + # Continue with existing trajectory - just update model if needed + self.model = model @override async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None: diff --git a/libs/python/agent/agent/integrations/hud/__init__.py b/libs/python/agent/agent/integrations/hud/__init__.py index 9c33bc90..982c36c8 100644 --- a/libs/python/agent/agent/integrations/hud/__init__.py +++ b/libs/python/agent/agent/integrations/hud/__init__.py @@ -7,6 +7,7 @@ from hud import run_job as hud_run_job from .agent import ComputerAgent from .adapter import ComputerAgentAdapter from .computer_handler import HUDComputerHandler +from ..callbacks.trajectory_saver import TrajectorySaverCallback async def run_job( @@ -48,11 +49,21 @@ async def run_job( Returns: Job instance from HUD """ + # Handle trajectory_dir by adding TrajectorySaverCallback + trajectory_dir = agent_kwargs.pop("trajectory_dir", None) + callbacks = agent_kwargs.get("callbacks", []) + + if trajectory_dir: + trajectory_callback = TrajectorySaverCallback(trajectory_dir, reset_on_run=False) + callbacks = callbacks + [trajectory_callback] + agent_kwargs["callbacks"] = callbacks + # combine verbose and verbosity kwargs if "verbose" in agent_kwargs: agent_kwargs["verbosity"] = logging.INFO del agent_kwargs["verbose"] verbose = True if agent_kwargs.get("verbosity", logging.WARNING) > logging.INFO else False + # run job return await hud_run_job( agent_cls=ComputerAgent, From 30684e0e43991e1888ef8a401ae1d33fdf4c447e Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 19:04:05 -0400 Subject: [PATCH 51/76] changed default env --- libs/python/agent/agent/integrations/hud/agent.py | 6 ++++-- .../python/agent/agent/integrations/hud/computer_handler.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py index 7740e9c8..74075377 100644 --- a/libs/python/agent/agent/integrations/hud/agent.py +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -20,7 +20,7 @@ BASE_SYSTEM_PROMPT = """ You are an autonomous computer-using agent. Follow these guidelines: 1. Be decisive and complete tasks without asking for confirmation unless absolutely necessary. -2. If you need user confirmation for safety-critical actions, use the formal safety check mechanism. +2. Use the computer tools to complete the task and do not stop until the task is complete. 3. Do NOT ask questions like "Should I proceed?" or "Would you like me to continue?" - just proceed with the task. 4. When you find what you're looking for (e.g., a file to upload), proceed with the action directly. 5. Only stop when the task is fully complete or if you encounter an error that prevents completion. @@ -43,7 +43,7 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): def __init__( self, model: str = "anthropic/claude-3-5-sonnet-20241022", - environment: Literal["windows", "mac", "linux", "browser"] = "browser", + environment: Literal["windows", "mac", "linux", "browser"] = "linux", adapter: Optional[Adapter] = None, name: Optional[str] = None, **kwargs: Any, @@ -176,6 +176,8 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): break if last_computer_calls: + if not observation.screenshot: + print("No screenshot found, taking screenshot") screenshot_b64 = await self.hud_computer.screenshot() # Add computer_call_output for each unresponded computer_call for call_id in reversed(last_computer_calls): # Maintain order diff --git a/libs/python/agent/agent/integrations/hud/computer_handler.py b/libs/python/agent/agent/integrations/hud/computer_handler.py index 5bf86666..9fcc8245 100644 --- a/libs/python/agent/agent/integrations/hud/computer_handler.py +++ b/libs/python/agent/agent/integrations/hud/computer_handler.py @@ -13,7 +13,7 @@ class HUDComputerHandler(AsyncComputerHandler): def __init__( self, - environment: Literal["windows", "mac", "linux", "browser"] = "browser", + environment: Literal["windows", "mac", "linux", "browser"] = "linux", dimensions: tuple[int, int] = (1024, 768), screenshot_callback: Optional[Callable] = None, action_callback: Optional[Callable] = None, From c2011f95829d5810910279be9ada347d5f92fa41 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 19:06:58 -0400 Subject: [PATCH 52/76] Fixed broken import --- libs/python/agent/agent/integrations/hud/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/python/agent/agent/integrations/hud/__init__.py b/libs/python/agent/agent/integrations/hud/__init__.py index 982c36c8..81f82925 100644 --- a/libs/python/agent/agent/integrations/hud/__init__.py +++ b/libs/python/agent/agent/integrations/hud/__init__.py @@ -7,7 +7,7 @@ from hud import run_job as hud_run_job from .agent import ComputerAgent from .adapter import ComputerAgentAdapter from .computer_handler import HUDComputerHandler -from ..callbacks.trajectory_saver import TrajectorySaverCallback +from agent.callbacks.trajectory_saver import TrajectorySaverCallback async def run_job( From 3b31531cf8692e70ec94cbb00c9da1a97eadad5a Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 19:10:03 -0400 Subject: [PATCH 53/76] fixed agent loop --- libs/python/agent/agent/integrations/hud/agent.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py index 74075377..58095417 100644 --- a/libs/python/agent/agent/integrations/hud/agent.py +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -211,9 +211,11 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): try: # ComputerAgent.run returns an async generator async for result in self.computer_agent.run(self.conversation_history, stream=False): - # Update conversation history with the output + # if the result has computer_call_output, immediately exit + if result.get("output", [])[-1].get("type") == "computer_call_output": + break + # otherwise add agent output to conversation history self.conversation_history += result["output"] - break # Check if we captured any actions if captured_actions: From 570da7f60dba0ab43bd560f8c8c6def2fadd3234 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 19:13:46 -0400 Subject: [PATCH 54/76] remove url check --- libs/python/agent/agent/agent.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py index e4746878..14bd92aa 100644 --- a/libs/python/agent/agent/agent.py +++ b/libs/python/agent/agent/agent.py @@ -468,12 +468,12 @@ class ComputerAgent: }, } - # Additional URL safety checks for browser environments - if await computer.get_environment() == "browser": - current_url = await computer.get_current_url() - call_output["output"]["current_url"] = current_url - # TODO: implement a callback for URL safety checks - # check_blocklisted_url(current_url) + # # Additional URL safety checks for browser environments + # if await computer.get_environment() == "browser": + # current_url = await computer.get_current_url() + # call_output["output"]["current_url"] = current_url + # # TODO: implement a callback for URL safety checks + # # check_blocklisted_url(current_url) result = [call_output] await self._on_computer_call_end(item, result) From db1d8a4c5dbc4aae5ceb290fe349e6f99fe67133 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 19:20:17 -0400 Subject: [PATCH 55/76] moved trajectory dir fix --- libs/python/agent/agent/integrations/hud/__init__.py | 10 ---------- libs/python/agent/agent/integrations/hud/agent.py | 12 +++++++++++- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/libs/python/agent/agent/integrations/hud/__init__.py b/libs/python/agent/agent/integrations/hud/__init__.py index 81f82925..787613de 100644 --- a/libs/python/agent/agent/integrations/hud/__init__.py +++ b/libs/python/agent/agent/integrations/hud/__init__.py @@ -7,7 +7,6 @@ from hud import run_job as hud_run_job from .agent import ComputerAgent from .adapter import ComputerAgentAdapter from .computer_handler import HUDComputerHandler -from agent.callbacks.trajectory_saver import TrajectorySaverCallback async def run_job( @@ -49,15 +48,6 @@ async def run_job( Returns: Job instance from HUD """ - # Handle trajectory_dir by adding TrajectorySaverCallback - trajectory_dir = agent_kwargs.pop("trajectory_dir", None) - callbacks = agent_kwargs.get("callbacks", []) - - if trajectory_dir: - trajectory_callback = TrajectorySaverCallback(trajectory_dir, reset_on_run=False) - callbacks = callbacks + [trajectory_callback] - agent_kwargs["callbacks"] = callbacks - # combine verbose and verbosity kwargs if "verbose" in agent_kwargs: agent_kwargs["verbosity"] = logging.INFO diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py index 58095417..cb810b36 100644 --- a/libs/python/agent/agent/integrations/hud/agent.py +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -86,6 +86,16 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): dimensions=(self.width, self.height) ) + # Handle trajectory_dir by adding TrajectorySaverCallback + trajectory_dir = kwargs.pop("trajectory_dir", None) + callbacks = kwargs.get("callbacks", []) + + if trajectory_dir: + from agent.callbacks.trajectory_saver import TrajectorySaverCallback + trajectory_callback = TrajectorySaverCallback(trajectory_dir, reset_on_run=False) + callbacks = callbacks + [trajectory_callback] + kwargs["callbacks"] = callbacks + # Initialize ComputerAgent with HUD computer handler self.computer_agent = BaseComputerAgent( model=model, @@ -212,7 +222,7 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): # ComputerAgent.run returns an async generator async for result in self.computer_agent.run(self.conversation_history, stream=False): # if the result has computer_call_output, immediately exit - if result.get("output", [])[-1].get("type") == "computer_call_output": + if result.get("output", []) and result.get("output", [])[-1].get("type") == "computer_call_output": break # otherwise add agent output to conversation history self.conversation_history += result["output"] From cc73f4504e76e05db0b86fe9bc37f0601c55deed Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 19:29:16 -0400 Subject: [PATCH 56/76] add optional dep --- libs/python/agent/pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/python/agent/pyproject.toml b/libs/python/agent/pyproject.toml index 75f8159a..b4580fbd 100644 --- a/libs/python/agent/pyproject.toml +++ b/libs/python/agent/pyproject.toml @@ -47,6 +47,9 @@ ui = [ cli = [ "yaspin>=3.1.0", ] +hud = [ + "hud-python==0.2.10", +] all = [ # omni requirements "ultralytics>=8.0.0", @@ -59,6 +62,8 @@ all = [ "python-dotenv>=1.0.1", # cli requirements "yaspin>=3.1.0", + # hud requirements + "hud-python==0.2.10", ] [tool.uv] From 3e734c93b0500ec29f544af47065bdf46930ede3 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 19:44:27 -0400 Subject: [PATCH 57/76] fixed trailing reasoning --- .../agent/agent/integrations/hud/agent.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py index cb810b36..602a3a0c 100644 --- a/libs/python/agent/agent/integrations/hud/agent.py +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -217,6 +217,35 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): self.conversation_history.append({"role": "user", "content": input_content}) + # If the last message is a reasoning message, change it to output_text + if (self.conversation_history and + self.conversation_history[-1].get("type") == "reasoning" and + self.conversation_history[-1].get("summary")): + + reasoning_msg = self.conversation_history[-1] + summary_texts = [] + + # Extract all summary_text entries + for summary_item in reasoning_msg["summary"]: + if summary_item.get("type") == "summary_text": + summary_texts.append(summary_item.get("text", "")) + + # Convert to message format with output_text + if summary_texts: + converted_message = { + "type": "message", + "role": "assistant", + "content": [ + { + "text": " ".join(summary_texts), + "type": "output_text" + } + ] + } + + # Replace the reasoning message with the converted message + self.conversation_history[-1] = converted_message + # Run ComputerAgent try: # ComputerAgent.run returns an async generator From 8bbcbec54bd5dc971731d0d5281ee24757019f59 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Fri, 8 Aug 2025 19:46:19 -0400 Subject: [PATCH 58/76] updated notebook --- notebooks/eval_osworld.ipynb | 931 +++++++++++++++++++++++++++++++++-- 1 file changed, 895 insertions(+), 36 deletions(-) diff --git a/notebooks/eval_osworld.ipynb b/notebooks/eval_osworld.ipynb index 0d58f58e..a287022c 100644 --- a/notebooks/eval_osworld.ipynb +++ b/notebooks/eval_osworld.ipynb @@ -50,18 +50,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/dillondupont/cua-clean/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "# Import the HUD-integrated ComputerAgent\n", "from agent.integrations.hud import ComputerAgent" @@ -93,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -121,14 +112,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "[INFO] 2025-08-08 15:16:46,133 | hud.environment | View the live trace at https://app.hud.so/trace/662fd59f-5a8d-4205-9b88-32c00d0feab0\n" + "[INFO] 2025-08-08 19:08:17,078 | hud.environment | View the live trace at https://app.hud.so/trace/ca88c178-cf40-499b-8ad3-d5d60348d9fe\n" ] }, { @@ -147,7 +138,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -156,7 +147,7 @@ "\n", "
\n", "
\n", - " \n", "
\n", "
\n", @@ -172,10 +163,10 @@ { "data": { "text/plain": [ - "'\\n
\\n
\\n \\n
\\n
\\n '" + "'\\n
\\n
\\n \\n
\\n
\\n '" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -188,21 +179,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Test with Claude Model\n", + "## Test with any supported CUA model\n", "\n", - "The ComputerAgent can use Claude models just like the original ClaudeAgent:" + "The ComputerAgent integration can use Claude, OpenAI, UI-TARS, or composed models just like the original ComputerAgent:" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Created Claude agent: computeragent-claude-3-5-sonnet-20241022\n" + "Created agent: computeragent-computer-use-preview\n" ] } ], @@ -210,17 +201,20 @@ "import logging\n", "# Create ComputerAgent with Claude\n", "claude_agent = ComputerAgent(\n", - " model=\"anthropic/claude-3-5-sonnet-20241022\",\n", - " environment=\"linux\", # OSWorld typically uses Linux\n", + " # model=\"anthropic/claude-3-5-sonnet-20241022\",\n", + " model=\"openai/computer-use-preview\",\n", + " # environment=\"linux\", # OSWorld typically uses Linux\n", + " environment=\"browser\", # SheetBench uses the browser\n", + " trajectory_dir=\"trajectories\",\n", " verbosity=logging.INFO,\n", ")\n", "\n", - "print(f\"Created Claude agent: {claude_agent.name}\")" + "print(f\"Created agent: {claude_agent.name}\")" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -235,15 +229,127 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-08-08 15:17:04,030 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n" + "2025-08-08 19:14:10,479 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "2025-08-08 19:14:18,867 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 55, 'y': 149})\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Agent's action: [ResponseAction(type='response', reasoning='I\\'ll help you complete this task step by step, but I notice that I don\\'t have any input data or access to Excel through the available functions. The only function I have access to is the \"computer\" function which allows for basic desktop interaction.\\n\\nTo properly assist you, I would need:\\n1. The actual input data you want to analyze\\n2. Access to Excel or another spreadsheet tool to perform the calculations\\n\\nCould you please provide the input data and confirm if there\\'s a specific way to access Excel or the data file on this system?\\n\\nOnce provided, I can help calculate correlations between volume and next day price changes, sort the data as specified, and format the results according to your requirements.', logs={'conversation_length': 2}, text='I\\'ll help you complete this task step by step, but I notice that I don\\'t have any input data or access to Excel through the available functions. The only function I have access to is the \"computer\" function which allows for basic desktop interaction.\\n\\nTo properly assist you, I would need:\\n1. The actual input data you want to analyze\\n2. Access to Excel or another spreadsheet tool to perform the calculations\\n\\nCould you please provide the input data and confirm if there\\'s a specific way to access Excel or the data file on this system?\\n\\nOnce provided, I can help calculate correlations between volume and next day price changes, sort the data as specified, and format the results according to your requirements.')]\n", - "Task completed after 1 steps\n" + "Agent's action: [ClickAction(type='click', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 3}, point=Point(x=77, y=174), button='left', pattern=None, hold_keys=None)]\n", + "========= Step 2 ==========\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-08 19:14:24,566 - agent.ComputerAgent - INFO - LLM processing started with 4 messages\n", + "2025-08-08 19:14:30,430 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'A']})\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Agent's action: [PressAction(type='press', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 5}, keys=['ctrl', 'a'])]\n", + "========= Step 3 ==========\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-08 19:14:36,137 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "2025-08-08 19:14:42,483 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 73, 'y': 151})\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Agent's action: [ClickAction(type='click', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 7}, point=Point(x=102, y=176), button='left', pattern=None, hold_keys=None)]\n", + "========= Step 4 ==========\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-08 19:14:48,687 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "2025-08-08 19:14:59,516 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'A']})\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Agent's action: [PressAction(type='press', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 9}, keys=['ctrl', 'a'])]\n", + "========= Step 5 ==========\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-08 19:15:05,229 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "2025-08-08 19:15:15,153 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 55, 'y': 147}, {'x': 319, 'y': 713}]})\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Agent's action: [DragAction(type='drag', reasoning='Highlighting data for sorting preparation', logs={'conversation_length': 12}, path=[Point(x=77, y=172), Point(x=448, y=835)], pattern=None, hold_keys=None)]\n", + "========= Step 6 ==========\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-08 19:15:21,362 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n", + "2025-08-08 19:15:33,774 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 229, 'y': 41})\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Agent's action: [ClickAction(type='click', reasoning='Opening sort options for data', logs={'conversation_length': 15}, point=Point(x=322, y=48), button='left', pattern=None, hold_keys=None)]\n", + "========= Step 7 ==========\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-08 19:15:39,973 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "2025-08-08 19:15:52,928 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 430, 'y': 96})\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Agent's action: [ClickAction(type='click', reasoning='Choosing \"Sort range\" for sorting', logs={'conversation_length': 18}, point=Point(x=604, y=112), button='left', pattern=None, hold_keys=None)]\n", + "========= Step 8 ==========\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-08 19:15:59,611 - agent.ComputerAgent - INFO - LLM processing started with 19 messages\n", + "2025-08-08 19:16:17,003 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 530, 'y': 172})\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Agent's action: [ClickAction(type='click', reasoning='Accessing advanced sorting options now', logs={'conversation_length': 21}, point=Point(x=745, y=201), button='left', pattern=None, hold_keys=None)]\n" ] } ], @@ -280,9 +386,353 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Final Evaluation ===\n", + "{'error': None,\n", + " 'gold_file_url': 'https://gahludmjcsmszgyufydt.supabase.co//storage/v1/object/public/sheetbench/615426c8-9df7-4ffa-92e9-200134a84da9/gold_solution_2.xlsx?',\n", + " 'logs': 'INFO: Starting evaluation with evaluator: sheets_cell_values\\n'\n", + " \"INFO: Evaluator args: [{'A1': 'ABC', 'B1': '-0.08'}]\\n\"\n", + " 'INFO: Partial rewarding: False\\n'\n", + " 'INFO: Starting sheets_cell_values evaluation for environment: '\n", + " 'af7a34a0-43b0-44d2-82d0-2b66ed16f1ea\\n'\n", + " \"INFO: Raw args received: [{'A1': 'ABC', 'B1': '-0.08'}] (type: \"\n", + " \")\\n\"\n", + " 'INFO: Partial rewarding enabled: False\\n'\n", + " 'INFO: === Google Sheets Cell Value Verification ===\\n'\n", + " 'INFO: Current page URL: '\n", + " 'https://docs.google.com/spreadsheets/d/1h-Ec3rW9sAME2sTn8qxIvFxO6qXtdURPacEFL5DJnqw/edit?gid=700326861#gid=700326861\\n'\n", + " 'INFO: ✅ Confirmed on Google Sheets page\\n'\n", + " 'INFO: Processing args parameter...\\n'\n", + " 'INFO: Args is a list with 1 items, extracting first item\\n'\n", + " \"INFO: Extracted: {'A1': 'ABC', 'B1': '-0.08'} (type: )\\n\"\n", + " 'INFO: Cell checks to perform: 2 cells\\n'\n", + " \"INFO: A1 -> expected: 'ABC'\\n\"\n", + " \"INFO: B1 -> expected: '-0.08'\\n\"\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " \"sheets_cell_values: Checking cells: {'A1': 'ABC', 'B1': '-0.08'}\\n\"\n", + " 'INFO: === ANSWER Sheet Navigation ===\\n'\n", + " 'INFO: Attempt 1/3: Attempting to find and navigate to ANSWER sheet '\n", + " 'tab...\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Attempt 1/3: Attempting to navigate to ANSWER '\n", + " 'sheet\\n'\n", + " 'INFO: Searching for ANSWER tab with selector: '\n", + " 'span.docs-sheet-tab-name:has-text(\"ANSWER\")\\n'\n", + " 'INFO: ANSWER tab search result (attempt 1): Found\\n'\n", + " 'INFO: ✅ Found ANSWER sheet tab on attempt 1, clicking on it...\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Found ANSWER sheet tab on attempt 1, clicking on '\n", + " 'it\\n'\n", + " 'ERROR: ❌ Error navigating to ANSWER sheet on attempt 1: '\n", + " 'Locator.click: Timeout 30000ms exceeded.\\n'\n", + " 'Call log:\\n'\n", + " ' - waiting for '\n", + " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n", + " ' - - locator resolved to ANSWER\\n'\n", + " ' - - attempting click action\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 20ms\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 100ms\\n'\n", + " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 500ms\\n'\n", + " '\\n'\n", + " 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Error navigating to ANSWER sheet on attempt 1: '\n", + " 'Locator.click: Timeout 30000ms exceeded.\\n'\n", + " 'Call log:\\n'\n", + " ' - waiting for '\n", + " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n", + " ' - - locator resolved to ANSWER\\n'\n", + " ' - - attempting click action\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 20ms\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 100ms\\n'\n", + " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 500ms\\n'\n", + " '\\n'\n", + " 'INFO: Waiting 500ms before retry 2...\\n'\n", + " 'INFO: Attempt 2/3: Attempting to find and navigate to ANSWER sheet '\n", + " 'tab...\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Attempt 2/3: Attempting to navigate to ANSWER '\n", + " 'sheet\\n'\n", + " 'INFO: Searching for ANSWER tab with selector: '\n", + " 'span.docs-sheet-tab-name:has-text(\"ANSWER\")\\n'\n", + " 'INFO: ANSWER tab search result (attempt 2): Found\\n'\n", + " 'INFO: ✅ Found ANSWER sheet tab on attempt 2, clicking on it...\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Found ANSWER sheet tab on attempt 2, clicking on '\n", + " 'it\\n'\n", + " 'ERROR: ❌ Error navigating to ANSWER sheet on attempt 2: '\n", + " 'Locator.click: Timeout 30000ms exceeded.\\n'\n", + " 'Call log:\\n'\n", + " ' - waiting for '\n", + " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n", + " ' - - locator resolved to ANSWER\\n'\n", + " ' - - attempting click action\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 20ms\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 100ms\\n'\n", + " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 500ms\\n'\n", + " '\\n'\n", + " 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Error navigating to ANSWER sheet on attempt 2: '\n", + " 'Locator.click: Timeout 30000ms exceeded.\\n'\n", + " 'Call log:\\n'\n", + " ' - waiting for '\n", + " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n", + " ' - - locator resolved to ANSWER\\n'\n", + " ' - - attempting click action\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 20ms\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 100ms\\n'\n", + " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 500ms\\n'\n", + " '\\n'\n", + " 'INFO: Waiting 500ms before retry 3...\\n'\n", + " 'INFO: Attempt 3/3: Attempting to find and navigate to ANSWER sheet '\n", + " 'tab...\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Attempt 3/3: Attempting to navigate to ANSWER '\n", + " 'sheet\\n'\n", + " 'INFO: Searching for ANSWER tab with selector: '\n", + " 'span.docs-sheet-tab-name:has-text(\"ANSWER\")\\n'\n", + " 'INFO: ANSWER tab search result (attempt 3): Found\\n'\n", + " 'INFO: ✅ Found ANSWER sheet tab on attempt 3, clicking on it...\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Found ANSWER sheet tab on attempt 3, clicking on '\n", + " 'it\\n'\n", + " 'ERROR: ❌ Error navigating to ANSWER sheet on attempt 3: '\n", + " 'Locator.click: Timeout 30000ms exceeded.\\n'\n", + " 'Call log:\\n'\n", + " ' - waiting for '\n", + " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n", + " ' - - locator resolved to ANSWER\\n'\n", + " ' - - attempting click action\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 20ms\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 100ms\\n'\n", + " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 500ms\\n'\n", + " '\\n'\n", + " 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Error navigating to ANSWER sheet on attempt 3: '\n", + " 'Locator.click: Timeout 30000ms exceeded.\\n'\n", + " 'Call log:\\n'\n", + " ' - waiting for '\n", + " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n", + " ' - - locator resolved to ANSWER\\n'\n", + " ' - - attempting click action\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 20ms\\n'\n", + " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 100ms\\n'\n", + " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n", + " ' - - element is visible, enabled and stable\\n'\n", + " ' - - scrolling into view if needed\\n'\n", + " ' - - done scrolling\\n'\n", + " ' - -
'\n", + " 'intercepts pointer events\\n'\n", + " ' - - retrying click action\\n'\n", + " ' - - waiting 500ms\\n'\n", + " '\\n'\n", + " 'WARNING: ⚠️ Failed to navigate to ANSWER sheet after 3 attempts, '\n", + " 'proceeding with current sheet\\n'\n", + " 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Failed to navigate to ANSWER sheet after 3 '\n", + " 'attempts, proceeding with current sheet\\n'\n", + " 'INFO: === File Content Extraction ===\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Granted read-write permissions\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Extracting page contents\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Selecting content\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Successfully extracted 157940 characters from '\n", + " 'file\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Found 5003 rows in content\\n'\n", + " 'INFO: Content extracted: 157940 characters\\n'\n", + " 'INFO: === Cell Content Parsing ===\\n'\n", + " 'INFO: Split file content into 5003 rows\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Found 5003 rows in content\\n'\n", + " 'INFO: First few rows of content:\\n'\n", + " \"INFO: Row 1: 'TradeDate | Ticker | ClosePrice | Volume | | '\\n\"\n", + " \"INFO: Row 2: '2023-01-02 | ABC | 476.87 | 2225355 | | '\\n\"\n", + " \"INFO: Row 3: '2023-01-02 | DEF | 322.21 | 3778582 | | '\\n\"\n", + " 'INFO: ... and 5000 more rows\\n'\n", + " 'INFO: === Cell Reference Parsing ===\\n'\n", + " \"INFO: Processing cell reference: 'A1' -> expected: 'ABC'\\n\"\n", + " \"INFO: Parsed 'A1' -> row=1 (0-indexed: 0), col=A (0-indexed: 0)\\n\"\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Parsed cell A1 as row=0, col=0\\n'\n", + " 'INFO: Row 1 exists in content\\n'\n", + " \"INFO: Row 1 has 6 columns: ['Col1', 'Col2', 'Col3', 'Col4', \"\n", + " \"'Col5', 'Col6']\\n\"\n", + " \"INFO: ✅ Found value for A1: 'TradeDate'\\n\"\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " \"sheets_cell_values: Found value for A1: 'TradeDate'\\n\"\n", + " \"INFO: Processing cell reference: 'B1' -> expected: '-0.08'\\n\"\n", + " \"INFO: Parsed 'B1' -> row=1 (0-indexed: 0), col=B (0-indexed: 1)\\n\"\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Parsed cell B1 as row=0, col=1\\n'\n", + " 'INFO: Row 1 exists in content\\n'\n", + " \"INFO: Row 1 has 6 columns: ['Col1', 'Col2', 'Col3', 'Col4', \"\n", + " \"'Col5', 'Col6']\\n\"\n", + " \"INFO: ✅ Found value for B1: 'Ticker'\\n\"\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " \"sheets_cell_values: Found value for B1: 'Ticker'\\n\"\n", + " 'INFO: === Cell Value Comparison ===\\n'\n", + " 'INFO: Comparing cell A1:\\n'\n", + " \"INFO: Expected: 'ABC' (type: )\\n\"\n", + " \"INFO: Actual: 'TradeDate' (type: )\\n\"\n", + " \"INFO: ❌ VALUE MISMATCH: 'TradeDate' != 'ABC'\\n\"\n", + " 'INFO: Comparing cell B1:\\n'\n", + " \"INFO: Expected: '-0.08' (type: )\\n\"\n", + " \"INFO: Actual: 'Ticker' (type: )\\n\"\n", + " \"INFO: ❌ VALUE MISMATCH: 'Ticker' != '-0.08'\\n\"\n", + " 'INFO: === Final Results ===\\n'\n", + " 'INFO: Cell comparison summary:\\n'\n", + " 'INFO: Total cells checked: 2\\n'\n", + " 'INFO: Matches: 0\\n'\n", + " 'INFO: Mismatches: 2\\n'\n", + " \"INFO: Failed cells: ['A1:', 'B1:']\\n\"\n", + " 'INFO: ❌ NOT all cells match expected values\\n'\n", + " 'INFO: Mismatches: [\"Cell A1: expected \\'ABC\\', got \\'TradeDate\\'\", '\n", + " '\"Cell B1: expected \\'-0.08\\', got \\'Ticker\\'\"]\\n'\n", + " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n", + " 'sheets_cell_values: Mismatches found: [\"Cell A1: expected \\'ABC\\', '\n", + " 'got \\'TradeDate\\'\", \"Cell B1: expected \\'-0.08\\', got \\'Ticker\\'\"]\\n'\n", + " 'INFO: Final reward: 0.0\\n'\n", + " 'INFO: === Sheets Cell Values Evaluation Complete ===\\n'\n", + " 'INFO: Evaluation completed. Final reward: 0.0\\n',\n", + " 'reward': 0.0}\n" + ] + } + ], "source": [ "# Evaluate environment state\n", "result = await env.evaluate()\n", @@ -292,7 +742,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -318,17 +768,426 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%|----------------------------------------| 0/200 [1:24 Date: Mon, 11 Aug 2025 10:31:49 -0400 Subject: [PATCH 59/76] Limited pytorch inference to 1 thread --- .../agent/agent/adapters/huggingfacelocal_adapter.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py index e8281114..11f03c0f 100644 --- a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py +++ b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py @@ -1,5 +1,6 @@ import asyncio import warnings +from concurrent.futures import ThreadPoolExecutor from typing import Iterator, AsyncIterator, Dict, List, Any, Optional from litellm.types.utils import GenericStreamingChunk, ModelResponse from litellm.llms.custom_llm import CustomLLM @@ -28,6 +29,7 @@ class HuggingFaceLocalAdapter(CustomLLM): self.device = device self.models = {} # Cache for loaded models self.processors = {} # Cache for loaded processors + self._executor = ThreadPoolExecutor(max_workers=1) # Single thread pool def _load_model_and_processor(self, model_name: str): """Load model and processor if not already cached. @@ -51,7 +53,8 @@ class HuggingFaceLocalAdapter(CustomLLM): processor = AutoProcessor.from_pretrained( model_name, min_pixels=3136, - max_pixels=4096 * 2160 + max_pixels=4096 * 2160, + device_map=self.device ) # Cache them @@ -185,7 +188,8 @@ class HuggingFaceLocalAdapter(CustomLLM): ModelResponse with generated text """ # Run _generate in thread pool to avoid blocking - generated_text = await asyncio.to_thread(self._generate, **kwargs) + loop = asyncio.get_event_loop() + generated_text = await loop.run_in_executor(self._executor, self._generate, **kwargs) return await acompletion( model=f"huggingface-local/{kwargs['model']}", @@ -218,7 +222,8 @@ class HuggingFaceLocalAdapter(CustomLLM): AsyncIterator of GenericStreamingChunk """ # Run _generate in thread pool to avoid blocking - generated_text = await asyncio.to_thread(self._generate, **kwargs) + loop = asyncio.get_event_loop() + generated_text = await loop.run_in_executor(self._executor, self._generate, **kwargs) generic_streaming_chunk: GenericStreamingChunk = { "finish_reason": "stop", From d172ca85639fb79bee0710e223fd82e4933fdbde Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Mon, 11 Aug 2025 10:54:21 -0400 Subject: [PATCH 60/76] fixed improper kwargs --- .../agent/agent/adapters/huggingfacelocal_adapter.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py index 11f03c0f..46d72db3 100644 --- a/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py +++ b/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py @@ -1,4 +1,5 @@ import asyncio +import functools import warnings from concurrent.futures import ThreadPoolExecutor from typing import Iterator, AsyncIterator, Dict, List, Any, Optional @@ -189,7 +190,10 @@ class HuggingFaceLocalAdapter(CustomLLM): """ # Run _generate in thread pool to avoid blocking loop = asyncio.get_event_loop() - generated_text = await loop.run_in_executor(self._executor, self._generate, **kwargs) + generated_text = await loop.run_in_executor( + self._executor, + functools.partial(self._generate, **kwargs) + ) return await acompletion( model=f"huggingface-local/{kwargs['model']}", @@ -223,7 +227,10 @@ class HuggingFaceLocalAdapter(CustomLLM): """ # Run _generate in thread pool to avoid blocking loop = asyncio.get_event_loop() - generated_text = await loop.run_in_executor(self._executor, self._generate, **kwargs) + generated_text = await loop.run_in_executor( + self._executor, + functools.partial(self._generate, **kwargs) + ) generic_streaming_chunk: GenericStreamingChunk = { "finish_reason": "stop", From 1ffb7d1a9a783049a5ca67cd791a005dc91db502 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Mon, 11 Aug 2025 11:48:26 -0400 Subject: [PATCH 61/76] changed reasoning search --- .../agent/agent/integrations/hud/agent.py | 49 ++++++++++++++++--- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py index 602a3a0c..43fc7367 100644 --- a/libs/python/agent/agent/integrations/hud/agent.py +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -248,20 +248,47 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): # Run ComputerAgent try: + new_items = [] + # ComputerAgent.run returns an async generator - async for result in self.computer_agent.run(self.conversation_history, stream=False): - # if the result has computer_call_output, immediately exit - if result.get("output", []) and result.get("output", [])[-1].get("type") == "computer_call_output": - break - # otherwise add agent output to conversation history - self.conversation_history += result["output"] + try: + async for result in self.computer_agent.run(self.conversation_history, stream=False): + # if the result has computer_call_output, immediately exit + if result.get("output", []) and result.get("output", [])[-1].get("type") == "computer_call_output": + break + # otherwise add agent output to conversation history + new_items += result["output"] + self.conversation_history += result["output"] + except Exception as e: + # if the last message is reasoning, change it to output_text + if new_items and new_items[-1].get("type") == "reasoning": + new_items[-1] = { + "type": "message", + "role": "assistant", + "content": [ + { + "text": new_items[-1].get("summary", [{}])[0].get("text", ""), + "type": "output_text" + } + ] + } + # add error message to conversation history + new_items.append({ + "type": "user", + "content": [ + { + "type": "input_text", + "text": f"Error during previous attempted action: {repr(e)}" + } + ] + }) # Check if we captured any actions if captured_actions: # Extract reasoning from the conversation history reasoning = "" # Look for the latest reasoning message - for msg in reversed(self.conversation_history): + for msg in reversed(new_items): if msg.get("type") == "reasoning" and msg.get("summary"): reasoning = " ".join([s.get("text", "") for s in msg["summary"] if s.get("type") == "summary_text"]) break @@ -271,6 +298,9 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): reasoning = " ".join([c.get("text", "") for c in content if c.get("type") == "output_text"]) break + # update conversation history + self.conversation_history += new_items + # Add reasoning and logs to each action for action in captured_actions: action["reasoning"] = reasoning @@ -280,7 +310,7 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): # Check if the last message is "Task completed" response_text = "" - for msg in reversed(self.conversation_history): + for msg in reversed(new_items): if msg.get("type") == "message" and msg.get("role") == "assistant": content = msg.get("content", []) for c in content: @@ -291,6 +321,9 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): done = "task completed" in response_text.lower() + # update conversation history + self.conversation_history += new_items + response_action = { "type": "response", "text": response_text, From d8b1792cc57e28071d932237e6608405b71192b0 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Mon, 11 Aug 2025 15:24:56 -0400 Subject: [PATCH 62/76] Added human/ provider to liteLLM --- .../agent/agent/adapters/human_adapter.py | 348 ++++++++++ libs/python/agent/agent/agent.py | 9 +- .../python/agent/agent/human_tool/__init__.py | 29 + .../python/agent/agent/human_tool/__main__.py | 38 ++ libs/python/agent/agent/human_tool/server.py | 234 +++++++ libs/python/agent/agent/human_tool/ui.py | 630 ++++++++++++++++++ 6 files changed, 1286 insertions(+), 2 deletions(-) create mode 100644 libs/python/agent/agent/adapters/human_adapter.py create mode 100644 libs/python/agent/agent/human_tool/__init__.py create mode 100644 libs/python/agent/agent/human_tool/__main__.py create mode 100644 libs/python/agent/agent/human_tool/server.py create mode 100644 libs/python/agent/agent/human_tool/ui.py diff --git a/libs/python/agent/agent/adapters/human_adapter.py b/libs/python/agent/agent/adapters/human_adapter.py new file mode 100644 index 00000000..0cd4fe02 --- /dev/null +++ b/libs/python/agent/agent/adapters/human_adapter.py @@ -0,0 +1,348 @@ +import os +import asyncio +import requests +from typing import List, Dict, Any, Iterator, AsyncIterator +from litellm.types.utils import GenericStreamingChunk, ModelResponse +from litellm.llms.custom_llm import CustomLLM +from litellm import completion, acompletion + + +class HumanAdapter(CustomLLM): + """Human Adapter for human-in-the-loop completions. + + This adapter sends completion requests to a human completion server + where humans can review and respond to AI requests. + """ + + def __init__(self, base_url: str | None = None, timeout: float = 300.0, **kwargs): + """Initialize the human adapter. + + Args: + base_url: Base URL for the human completion server. + Defaults to HUMAN_BASE_URL environment variable or http://localhost:8002 + timeout: Timeout in seconds for waiting for human response + **kwargs: Additional arguments + """ + super().__init__() + self.base_url = base_url or os.getenv('HUMAN_BASE_URL', 'http://localhost:8002') + self.timeout = timeout + + # Ensure base_url doesn't end with slash + self.base_url = self.base_url.rstrip('/') + + def _queue_completion(self, messages: List[Dict[str, Any]], model: str) -> str: + """Queue a completion request and return the call ID. + + Args: + messages: Messages in OpenAI format + model: Model name + + Returns: + Call ID for tracking the request + + Raises: + Exception: If queueing fails + """ + try: + response = requests.post( + f"{self.base_url}/queue", + json={"messages": messages, "model": model}, + timeout=10 + ) + response.raise_for_status() + return response.json()["id"] + except requests.RequestException as e: + raise Exception(f"Failed to queue completion request: {e}") + + def _wait_for_completion(self, call_id: str) -> Dict[str, Any]: + """Wait for human to complete the call. + + Args: + call_id: ID of the queued completion call + + Returns: + Dict containing response and/or tool_calls + + Raises: + TimeoutError: If timeout is exceeded + Exception: If completion fails + """ + import time + + start_time = time.time() + + while True: + try: + # Check status + status_response = requests.get(f"{self.base_url}/status/{call_id}") + status_response.raise_for_status() + status_data = status_response.json() + + if status_data["status"] == "completed": + result = {} + if "response" in status_data and status_data["response"]: + result["response"] = status_data["response"] + if "tool_calls" in status_data and status_data["tool_calls"]: + result["tool_calls"] = status_data["tool_calls"] + return result + elif status_data["status"] == "failed": + error_msg = status_data.get("error", "Unknown error") + raise Exception(f"Completion failed: {error_msg}") + + # Check timeout + if time.time() - start_time > self.timeout: + raise TimeoutError(f"Timeout waiting for human response after {self.timeout} seconds") + + # Wait before checking again + time.sleep(1.0) + + except requests.RequestException as e: + if time.time() - start_time > self.timeout: + raise TimeoutError(f"Timeout waiting for human response: {e}") + # Continue trying if we haven't timed out + time.sleep(1.0) + + async def _async_wait_for_completion(self, call_id: str) -> Dict[str, Any]: + """Async version of wait_for_completion. + + Args: + call_id: ID of the queued completion call + + Returns: + Dict containing response and/or tool_calls + + Raises: + TimeoutError: If timeout is exceeded + Exception: If completion fails + """ + import aiohttp + import time + + start_time = time.time() + + async with aiohttp.ClientSession() as session: + while True: + try: + # Check status + async with session.get(f"{self.base_url}/status/{call_id}") as response: + response.raise_for_status() + status_data = await response.json() + + if status_data["status"] == "completed": + result = {} + if "response" in status_data and status_data["response"]: + result["response"] = status_data["response"] + if "tool_calls" in status_data and status_data["tool_calls"]: + result["tool_calls"] = status_data["tool_calls"] + return result + elif status_data["status"] == "failed": + error_msg = status_data.get("error", "Unknown error") + raise Exception(f"Completion failed: {error_msg}") + + # Check timeout + if time.time() - start_time > self.timeout: + raise TimeoutError(f"Timeout waiting for human response after {self.timeout} seconds") + + # Wait before checking again + await asyncio.sleep(1.0) + + except Exception as e: + if time.time() - start_time > self.timeout: + raise TimeoutError(f"Timeout waiting for human response: {e}") + # Continue trying if we haven't timed out + await asyncio.sleep(1.0) + + def _generate_response(self, messages: List[Dict[str, Any]], model: str) -> Dict[str, Any]: + """Generate a human response for the given messages. + + Args: + messages: Messages in OpenAI format + model: Model name + + Returns: + Dict containing response and/or tool_calls + """ + # Queue the completion request + call_id = self._queue_completion(messages, model) + + # Wait for human response + response = self._wait_for_completion(call_id) + + return response + + async def _async_generate_response(self, messages: List[Dict[str, Any]], model: str) -> Dict[str, Any]: + """Async version of _generate_response. + + Args: + messages: Messages in OpenAI format + model: Model name + + Returns: + Dict containing response and/or tool_calls + """ + # Queue the completion request (sync operation) + call_id = self._queue_completion(messages, model) + + # Wait for human response (async) + response = await self._async_wait_for_completion(call_id) + + return response + + def completion(self, *args, **kwargs) -> ModelResponse: + """Synchronous completion method. + + Returns: + ModelResponse with human-generated text or tool calls + """ + messages = kwargs.get('messages', []) + model = kwargs.get('model', 'human') + + # Generate human response + human_response_data = self._generate_response(messages, model) + + # Create ModelResponse with proper structure + from litellm.types.utils import ModelResponse, Choices, Message + import uuid + import time + + # Create message content based on response type + if "tool_calls" in human_response_data and human_response_data["tool_calls"]: + # Tool calls response + message = Message( + role="assistant", + content=human_response_data.get("response", ""), + tool_calls=human_response_data["tool_calls"] + ) + else: + # Text response + message = Message( + role="assistant", + content=human_response_data.get("response", "") + ) + + choice = Choices( + finish_reason="stop", + index=0, + message=message + ) + + result = ModelResponse( + id=f"human-{uuid.uuid4()}", + choices=[choice], + created=int(time.time()), + model=f"human/{model}", + object="chat.completion" + ) + + return result + + async def acompletion(self, *args, **kwargs) -> ModelResponse: + """Asynchronous completion method. + + Returns: + ModelResponse with human-generated text or tool calls + """ + messages = kwargs.get('messages', []) + model = kwargs.get('model', 'human') + + # Generate human response + human_response_data = await self._async_generate_response(messages, model) + + # Create ModelResponse with proper structure + from litellm.types.utils import ModelResponse, Choices, Message + import uuid + import time + + # Create message content based on response type + if "tool_calls" in human_response_data and human_response_data["tool_calls"]: + # Tool calls response + message = Message( + role="assistant", + content=human_response_data.get("response", ""), + tool_calls=human_response_data["tool_calls"] + ) + else: + # Text response + message = Message( + role="assistant", + content=human_response_data.get("response", "") + ) + + choice = Choices( + finish_reason="stop", + index=0, + message=message + ) + + result = ModelResponse( + id=f"human-{uuid.uuid4()}", + choices=[choice], + created=int(time.time()), + model=f"human/{model}", + object="chat.completion" + ) + + return result + + def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]: + """Synchronous streaming method. + + Yields: + Streaming chunks with human-generated text or tool calls + """ + messages = kwargs.get('messages', []) + model = kwargs.get('model', 'human') + + # Generate human response + human_response_data = self._generate_response(messages, model) + + import time + + # Handle tool calls vs text response + if "tool_calls" in human_response_data and human_response_data["tool_calls"]: + # Stream tool calls as a single chunk + generic_chunk: GenericStreamingChunk = { + "finish_reason": "tool_calls", + "index": 0, + "is_finished": True, + "text": human_response_data.get("response", ""), + "tool_use": human_response_data["tool_calls"], + "usage": {"completion_tokens": 1, "prompt_tokens": 0, "total_tokens": 1}, + } + yield generic_chunk + else: + # Stream text response + response_text = human_response_data.get("response", "") + generic_chunk: GenericStreamingChunk = { + "finish_reason": "stop", + "index": 0, + "is_finished": True, + "text": response_text, + "tool_use": None, + "usage": {"completion_tokens": len(response_text.split()), "prompt_tokens": 0, "total_tokens": len(response_text.split())}, + } + yield generic_chunk + + async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]: + """Asynchronous streaming method. + + Yields: + Streaming chunks with human-generated text or tool calls + """ + messages = kwargs.get('messages', []) + model = kwargs.get('model', 'human') + + # Generate human response + human_response = await self._async_generate_response(messages, model) + + # Return as single streaming chunk + generic_streaming_chunk: GenericStreamingChunk = { + "finish_reason": "stop", + "index": 0, + "is_finished": True, + "text": human_response, + "tool_use": None, + "usage": {"completion_tokens": len(human_response.split()), "prompt_tokens": 0, "total_tokens": len(human_response.split())}, + } + + yield generic_streaming_chunk \ No newline at end of file diff --git a/libs/python/agent/agent/agent.py b/libs/python/agent/agent/agent.py index 14bd92aa..7f30166f 100644 --- a/libs/python/agent/agent/agent.py +++ b/libs/python/agent/agent/agent.py @@ -13,7 +13,10 @@ import json import litellm import litellm.utils import inspect -from .adapters import HuggingFaceLocalAdapter +from .adapters import ( + HuggingFaceLocalAdapter, + HumanAdapter, +) from .callbacks import ( ImageRetentionCallback, LoggingCallback, @@ -215,8 +218,10 @@ class ComputerAgent: hf_adapter = HuggingFaceLocalAdapter( device="auto" ) + human_adapter = HumanAdapter() litellm.custom_provider_map = [ - {"provider": "huggingface-local", "custom_handler": hf_adapter} + {"provider": "huggingface-local", "custom_handler": hf_adapter}, + {"provider": "human", "custom_handler": human_adapter} ] litellm.suppress_debug_info = True diff --git a/libs/python/agent/agent/human_tool/__init__.py b/libs/python/agent/agent/human_tool/__init__.py new file mode 100644 index 00000000..f57fb305 --- /dev/null +++ b/libs/python/agent/agent/human_tool/__init__.py @@ -0,0 +1,29 @@ +""" +Human-in-the-Loop Completion Tool + +This package provides a human-in-the-loop completion system that allows +AI agents to request human assistance for complex decisions or responses. + +Components: +- server.py: FastAPI server with completion queue management +- ui.py: Gradio UI for human interaction +- __main__.py: Combined server and UI application + +Usage: + # Run the server and UI + python -m agent.human_tool + + # Or run components separately + python -m agent.human_tool.server # API server only + python -m agent.human_tool.ui # UI only +""" + +from .server import CompletionQueue, completion_queue +from .ui import HumanCompletionUI, create_ui + +__all__ = [ + "CompletionQueue", + "completion_queue", + "HumanCompletionUI", + "create_ui" +] diff --git a/libs/python/agent/agent/human_tool/__main__.py b/libs/python/agent/agent/human_tool/__main__.py new file mode 100644 index 00000000..e1ceed50 --- /dev/null +++ b/libs/python/agent/agent/human_tool/__main__.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +""" +Human-in-the-Loop Completion Server and UI + +This module combines the FastAPI server for handling completion requests +with a Gradio UI for human interaction. +""" + +import gradio as gr +from fastapi import FastAPI +from .server import app as fastapi_app +from .ui import create_ui + +# Create the Gradio demo +gradio_demo = create_ui() + +# Mount Gradio on FastAPI +CUSTOM_PATH = "/gradio" +app = gr.mount_gradio_app(fastapi_app, gradio_demo, path=CUSTOM_PATH) + +# Add a redirect from root to Gradio UI +@fastapi_app.get("/") +async def redirect_to_ui(): + """Redirect root to Gradio UI.""" + return { + "message": "Human Completion Server is running", + "ui_url": "/gradio", + "api_docs": "/docs" + } + +if __name__ == "__main__": + import uvicorn + print("🚀 Starting Human-in-the-Loop Completion Server...") + print("📊 API Server: http://localhost:8002") + print("🎨 Gradio UI: http://localhost:8002/gradio") + print("📚 API Docs: http://localhost:8002/docs") + + uvicorn.run(app, host="0.0.0.0", port=8002) diff --git a/libs/python/agent/agent/human_tool/server.py b/libs/python/agent/agent/human_tool/server.py new file mode 100644 index 00000000..c5d08cfe --- /dev/null +++ b/libs/python/agent/agent/human_tool/server.py @@ -0,0 +1,234 @@ +import asyncio +import uuid +from datetime import datetime +from typing import Dict, List, Any, Optional +from dataclasses import dataclass, asdict +from enum import Enum + +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel + + +class CompletionStatus(str, Enum): + PENDING = "pending" + COMPLETED = "completed" + FAILED = "failed" + + +@dataclass +class CompletionCall: + id: str + messages: List[Dict[str, Any]] + model: str + status: CompletionStatus + created_at: datetime + completed_at: Optional[datetime] = None + response: Optional[str] = None + tool_calls: Optional[List[Dict[str, Any]]] = None + error: Optional[str] = None + + +class ToolCall(BaseModel): + id: str + type: str = "function" + function: Dict[str, Any] + + +class CompletionRequest(BaseModel): + messages: List[Dict[str, Any]] + model: str + + +class CompletionResponse(BaseModel): + response: Optional[str] = None + tool_calls: Optional[List[Dict[str, Any]]] = None + + +class CompletionQueue: + def __init__(self): + self._queue: Dict[str, CompletionCall] = {} + self._pending_order: List[str] = [] + self._lock = asyncio.Lock() + + async def add_completion(self, messages: List[Dict[str, Any]], model: str) -> str: + """Add a completion call to the queue.""" + async with self._lock: + call_id = str(uuid.uuid4()) + completion_call = CompletionCall( + id=call_id, + messages=messages, + model=model, + status=CompletionStatus.PENDING, + created_at=datetime.now() + ) + self._queue[call_id] = completion_call + self._pending_order.append(call_id) + return call_id + + async def get_pending_calls(self) -> List[Dict[str, Any]]: + """Get all pending completion calls.""" + async with self._lock: + pending_calls = [] + for call_id in self._pending_order: + if call_id in self._queue and self._queue[call_id].status == CompletionStatus.PENDING: + call = self._queue[call_id] + pending_calls.append({ + "id": call.id, + "model": call.model, + "created_at": call.created_at.isoformat(), + "messages": call.messages + }) + return pending_calls + + async def get_call_status(self, call_id: str) -> Optional[Dict[str, Any]]: + """Get the status of a specific completion call.""" + async with self._lock: + if call_id not in self._queue: + return None + + call = self._queue[call_id] + result = { + "id": call.id, + "status": call.status.value, + "created_at": call.created_at.isoformat(), + "model": call.model, + "messages": call.messages + } + + if call.completed_at: + result["completed_at"] = call.completed_at.isoformat() + if call.response: + result["response"] = call.response + if call.tool_calls: + result["tool_calls"] = call.tool_calls + if call.error: + result["error"] = call.error + + return result + + async def complete_call(self, call_id: str, response: Optional[str] = None, tool_calls: Optional[List[Dict[str, Any]]] = None) -> bool: + """Mark a completion call as completed with a response or tool calls.""" + async with self._lock: + if call_id not in self._queue: + return False + + call = self._queue[call_id] + if call.status != CompletionStatus.PENDING: + return False + + call.status = CompletionStatus.COMPLETED + call.completed_at = datetime.now() + call.response = response + call.tool_calls = tool_calls + + # Remove from pending order + if call_id in self._pending_order: + self._pending_order.remove(call_id) + + return True + + async def fail_call(self, call_id: str, error: str) -> bool: + """Mark a completion call as failed with an error.""" + async with self._lock: + if call_id not in self._queue: + return False + + call = self._queue[call_id] + if call.status != CompletionStatus.PENDING: + return False + + call.status = CompletionStatus.FAILED + call.completed_at = datetime.now() + call.error = error + + # Remove from pending order + if call_id in self._pending_order: + self._pending_order.remove(call_id) + + return True + + async def wait_for_completion(self, call_id: str, timeout: float = 300.0) -> Optional[str]: + """Wait for a completion call to be completed and return the response.""" + start_time = asyncio.get_event_loop().time() + + while True: + status = await self.get_call_status(call_id) + if not status: + return None + + if status["status"] == CompletionStatus.COMPLETED.value: + return status.get("response") + elif status["status"] == CompletionStatus.FAILED.value: + raise Exception(f"Completion failed: {status.get('error', 'Unknown error')}") + + # Check timeout + if asyncio.get_event_loop().time() - start_time > timeout: + await self.fail_call(call_id, "Timeout waiting for human response") + raise TimeoutError("Timeout waiting for human response") + + # Wait a bit before checking again + await asyncio.sleep(0.5) + + +# Global queue instance +completion_queue = CompletionQueue() + +# FastAPI app +app = FastAPI(title="Human Completion Server", version="1.0.0") + + +@app.post("/queue", response_model=Dict[str, str]) +async def queue_completion(request: CompletionRequest): + """Add a completion request to the queue.""" + call_id = await completion_queue.add_completion(request.messages, request.model) + return {"id": call_id, "status": "queued"} + + +@app.get("/pending") +async def list_pending(): + """List all pending completion calls.""" + pending_calls = await completion_queue.get_pending_calls() + return {"pending_calls": pending_calls} + + +@app.get("/status/{call_id}") +async def get_status(call_id: str): + """Get the status of a specific completion call.""" + status = await completion_queue.get_call_status(call_id) + if not status: + raise HTTPException(status_code=404, detail="Completion call not found") + return status + + +@app.post("/complete/{call_id}") +async def complete_call(call_id: str, response: CompletionResponse): + """Complete a call with a human response.""" + success = await completion_queue.complete_call( + call_id, + response=response.response, + tool_calls=response.tool_calls + ) + if success: + return {"status": "success", "message": "Call completed"} + else: + raise HTTPException(status_code=404, detail="Call not found or already completed") + + +@app.post("/fail/{call_id}") +async def fail_call(call_id: str, error: Dict[str, str]): + """Mark a call as failed.""" + success = await completion_queue.fail_call(call_id, error.get("error", "Unknown error")) + if not success: + raise HTTPException(status_code=404, detail="Completion call not found or already completed") + return {"status": "failed"} + + +@app.get("/") +async def root(): + """Root endpoint.""" + return {"message": "Human Completion Server is running"} + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8002) diff --git a/libs/python/agent/agent/human_tool/ui.py b/libs/python/agent/agent/human_tool/ui.py new file mode 100644 index 00000000..f4a9fb4f --- /dev/null +++ b/libs/python/agent/agent/human_tool/ui.py @@ -0,0 +1,630 @@ +import gradio as gr +import json +import time +from typing import List, Dict, Any, Optional +from datetime import datetime +import requests +from .server import completion_queue +import base64 +import io +from PIL import Image + +class HumanCompletionUI: + def __init__(self, server_url: str = "http://localhost:8002"): + self.server_url = server_url + self.current_call_id: Optional[str] = None + self.refresh_interval = 2.0 # seconds + self.last_image = None # Store the last image for display + + def format_messages_for_chatbot(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Format messages for display in gr.Chatbot with type='messages'.""" + formatted = [] + for msg in messages: + role = msg.get("role", "user") + content = msg.get("content", "") + tool_calls = msg.get("tool_calls", []) + + # Handle different content formats + if isinstance(content, list): + # Multi-modal content - can include text and images + formatted_content = [] + for item in content: + if item.get("type") == "text": + text = item.get("text", "") + if text.strip(): # Only add non-empty text + formatted_content.append(text) + elif item.get("type") == "image_url": + image_url = item.get("image_url", {}).get("url", "") + if image_url: + # Check if it's a base64 image or URL + if image_url.startswith("data:image"): + # For base64 images, decode and create gr.Image + try: + header, data = image_url.split(",", 1) + image_data = base64.b64decode(data) + image = Image.open(io.BytesIO(image_data)) + formatted_content.append(gr.Image(value=image)) + except Exception as e: + print(f"Error loading image: {e}") + formatted_content.append(f"[Image loading error: {e}]") + else: + # For URL images, create gr.Image with URL + formatted_content.append(gr.Image(value=image_url)) + + # Determine final content format + if len(formatted_content) == 1: + content = formatted_content[0] + elif len(formatted_content) > 1: + content = formatted_content + else: + content = "[Empty content]" + + # Ensure role is valid for Gradio Chatbot + if role not in ["user", "assistant"]: + role = "assistant" if role == "system" else "user" + + # Invert roles for better display in human UI context + # (what the AI says becomes "user", what human should respond becomes "assistant") + if role == "user": + role = "assistant" + else: + role = "user" + + # Add the main message if it has content + if content and str(content).strip(): + formatted.append({"role": role, "content": content}) + + # Handle tool calls - create separate messages for each tool call + if tool_calls: + for tool_call in tool_calls: + function_name = tool_call.get("function", {}).get("name", "unknown") + arguments_str = tool_call.get("function", {}).get("arguments", "{}") + + try: + # Parse arguments to format them nicely + arguments = json.loads(arguments_str) + formatted_args = json.dumps(arguments, indent=2) + except json.JSONDecodeError: + # If parsing fails, use the raw string + formatted_args = arguments_str + + # Create a formatted message for the tool call + tool_call_content = f"```json\n{formatted_args}\n```" + + formatted.append({ + "role": role, + "content": tool_call_content, + "metadata": {"title": f"🛠️ Used {function_name}"} + }) + + return formatted + + def get_pending_calls(self) -> List[Dict[str, Any]]: + """Get pending calls from the server.""" + try: + response = requests.get(f"{self.server_url}/pending", timeout=5) + if response.status_code == 200: + return response.json().get("pending_calls", []) + except Exception as e: + print(f"Error fetching pending calls: {e}") + return [] + + def complete_call_with_response(self, call_id: str, response: str) -> bool: + """Complete a call with a text response.""" + try: + response_data = {"response": response} + response_obj = requests.post( + f"{self.server_url}/complete/{call_id}", + json=response_data, + timeout=10 + ) + response_obj.raise_for_status() + return True + except requests.RequestException as e: + print(f"Error completing call: {e}") + return False + + def complete_call_with_tool_calls(self, call_id: str, tool_calls: List[Dict[str, Any]]) -> bool: + """Complete a call with tool calls.""" + try: + response_data = {"tool_calls": tool_calls} + response_obj = requests.post( + f"{self.server_url}/complete/{call_id}", + json=response_data, + timeout=10 + ) + response_obj.raise_for_status() + return True + except requests.RequestException as e: + print(f"Error completing call: {e}") + return False + + def complete_call(self, call_id: str, response: Optional[str] = None, tool_calls: Optional[List[Dict[str, Any]]] = None) -> bool: + """Complete a call with either a response or tool calls.""" + try: + response_data = {} + if response: + response_data["response"] = response + if tool_calls: + response_data["tool_calls"] = tool_calls + + response_obj = requests.post( + f"{self.server_url}/complete/{call_id}", + json=response_data, + timeout=10 + ) + response_obj.raise_for_status() + return True + except requests.RequestException as e: + print(f"Error completing call: {e}") + return False + + def get_last_image_from_messages(self, messages: List[Dict[str, Any]]) -> Optional[Any]: + """Extract the last image from the messages for display above conversation.""" + last_image = None + + for msg in reversed(messages): # Start from the last message + content = msg.get("content", "") + + if isinstance(content, list): + for item in reversed(content): # Get the last image in the message + if item.get("type") == "image_url": + image_url = item.get("image_url", {}).get("url", "") + if image_url: + if image_url.startswith("data:image"): + # For base64 images, create a gr.Image component + try: + header, data = image_url.split(",", 1) + image_data = base64.b64decode(data) + image = Image.open(io.BytesIO(image_data)) + return image + except Exception as e: + print(f"Error loading image: {e}") + continue + else: + # For URL images, return the URL + return image_url + + return last_image + + def refresh_pending_calls(self): + """Refresh the list of pending calls.""" + pending_calls = self.get_pending_calls() + + if not pending_calls: + return ( + gr.update(choices=["latest"], value="latest"), # dropdown + gr.update(value=None), # image (no image) + gr.update(value=[]), # chatbot (empty messages) + gr.update(interactive=False) # submit button + ) + + # Sort pending calls by created_at to get oldest first + sorted_calls = sorted(pending_calls, key=lambda x: x.get("created_at", "")) + + # Create choices for dropdown + choices = [("latest", "latest")] # Add "latest" option first + + for call in sorted_calls: + call_id = call["id"] + model = call.get("model", "unknown") + created_at = call.get("created_at", "") + # Format timestamp + try: + dt = datetime.fromisoformat(created_at.replace('Z', '+00:00')) + time_str = dt.strftime("%H:%M:%S") + except: + time_str = created_at + + choice_label = f"{call_id[:8]}... ({model}) - {time_str}" + choices.append((choice_label, call_id)) + + # Default to "latest" which shows the oldest pending conversation + selected_call_id = "latest" + if selected_call_id == "latest" and sorted_calls: + # Use the oldest call (first in sorted list) + selected_call = sorted_calls[0] + conversation = self.format_messages_for_chatbot(selected_call.get("messages", [])) + self.current_call_id = selected_call["id"] + # Get the last image from messages + self.last_image = self.get_last_image_from_messages(selected_call.get("messages", [])) + else: + conversation = [] + self.current_call_id = None + self.last_image = None + + return ( + gr.update(choices=choices, value="latest"), + gr.update(value=self.last_image), + gr.update(value=conversation), + gr.update(interactive=bool(choices)) + ) + + def on_call_selected(self, selected_choice): + """Handle when a call is selected from the dropdown.""" + if not selected_choice: + return ( + gr.update(value=None), # no image + gr.update(value=[]), # empty chatbot + gr.update(interactive=False) + ) + + pending_calls = self.get_pending_calls() + if not pending_calls: + return ( + gr.update(value=None), # no image + gr.update(value=[]), # empty chatbot + gr.update(interactive=False) + ) + + # Handle "latest" option + if selected_choice == "latest": + # Sort calls by created_at to get oldest first + sorted_calls = sorted(pending_calls, key=lambda x: x.get("created_at", "")) + selected_call = sorted_calls[0] # Get the oldest call + call_id = selected_call["id"] + else: + # Extract call_id from the choice for specific calls + call_id = None + for call in pending_calls: + call_id_short = call["id"][:8] + if call_id_short in selected_choice: + call_id = call["id"] + break + + if not call_id: + return ( + gr.update(value=None), # no image + gr.update(value=[]), # empty chatbot + gr.update(interactive=False) + ) + + # Find the selected call + selected_call = next((c for c in pending_calls if c["id"] == call_id), None) + + if not selected_call: + return ( + gr.update(value=None), # no image + gr.update(value=[]), # empty chatbot + gr.update(interactive=False) + ) + + conversation = self.format_messages_for_chatbot(selected_call.get("messages", [])) + self.current_call_id = call_id + # Get the last image from messages + self.last_image = self.get_last_image_from_messages(selected_call.get("messages", [])) + + return ( + gr.update(value=self.last_image), + gr.update(value=conversation), + gr.update(interactive=True) + ) + + def submit_response(self, response_text: str): + """Submit a text response to the current call.""" + if not self.current_call_id: + return ( + gr.update(value=response_text), # keep response text + gr.update(value="❌ No call selected") # status + ) + + if not response_text.strip(): + return ( + gr.update(value=response_text), # keep response text + gr.update(value="❌ Response cannot be empty") # status + ) + + success = self.complete_call_with_response(self.current_call_id, response_text) + + if success: + status_msg = "✅ Response submitted successfully!" + return ( + gr.update(value=""), # clear response text + gr.update(value=status_msg) # status + ) + else: + return ( + gr.update(value=response_text), # keep response text + gr.update(value="❌ Failed to submit response") # status + ) + + def submit_action(self, action_type: str, **kwargs) -> str: + """Submit a computer action as a tool call.""" + if not self.current_call_id: + return "❌ No call selected" + + import uuid + + # Create tool call structure + action_data = {"type": action_type, **kwargs} + tool_call = { + "id": f"call_{uuid.uuid4().hex[:24]}", + "type": "function", + "function": { + "name": "computer", + "arguments": json.dumps(action_data) + } + } + + success = self.complete_call_with_tool_calls(self.current_call_id, [tool_call]) + + if success: + return f"✅ {action_type.capitalize()} action submitted as tool call" + else: + return f"❌ Failed to submit {action_type} action" + + def submit_click_action(self, x: int, y: int, action_type: str = "click", button: str = "left") -> str: + """Submit a coordinate-based action.""" + if action_type == "click": + return self.submit_action(action_type, x=x, y=y, button=button) + else: + return self.submit_action(action_type, x=x, y=y) + + def submit_type_action(self, text: str) -> str: + """Submit a type action.""" + return self.submit_action("type", text=text) + + def submit_hotkey_action(self, keys: str) -> str: + """Submit a hotkey action.""" + return self.submit_action("keypress", keys=keys) + + def submit_description_click(self, description: str, action_type: str = "click", button: str = "left") -> str: + """Submit a description-based action.""" + if action_type == "click": + return self.submit_action(action_type, element_description=description, button=button) + else: + return self.submit_action(action_type, element_description=description) + + def wait_for_pending_calls(self, max_seconds: float = 10.0, check_interval: float = 0.2): + """Wait for pending calls to appear or until max_seconds elapsed. + + This method loops and checks for pending calls at regular intervals, + returning as soon as a pending call is found or the maximum wait time is reached. + + Args: + max_seconds: Maximum number of seconds to wait + check_interval: How often to check for pending calls (in seconds) + """ + import time + + start_time = time.time() + + while time.time() - start_time < max_seconds: + # Check if there are any pending calls + pending_calls = self.get_pending_calls() + if pending_calls: + # Found pending calls, return immediately + return self.refresh_pending_calls() + + # Wait before checking again + time.sleep(check_interval) + + # Max wait time reached, return current state + return self.refresh_pending_calls() + + +def create_ui(): + """Create the Gradio interface.""" + ui_handler = HumanCompletionUI() + + with gr.Blocks(title="Human-in-the-Loop Agent Tool") as demo: + gr.Markdown("# 🤖 Human-in-the-Loop Agent Tool") + gr.Markdown("Review AI conversation requests and provide human responses.") + + with gr.Row(): + with gr.Column(scale=2): + with gr.Group(): + screenshot_image = gr.Image( + label="Screenshot", + interactive=False, + height=600 + ) + + # Action type selection for image clicks + with gr.Row(): + action_type_radio = gr.Radio( + label="Action Type", + choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"], + value="click", + scale=2 + ) + action_button_radio = gr.Radio( + label="Button (for click only)", + choices=["left", "right", "wheel", "back", "forward"], + value="left", + visible=True, + scale=1 + ) + + conversation_chatbot = gr.Chatbot( + label="Messages", + type="messages", + height=500, + show_copy_button=True + ) + + with gr.Column(scale=1): + with gr.Group(): + call_dropdown = gr.Dropdown( + label="Select a pending call", + choices=["latest"], + interactive=True, + value="latest" + ) + refresh_btn = gr.Button("🔄 Refresh", variant="secondary") + + with gr.Group(): + response_text = gr.Textbox( + label="Response", + lines=3, + placeholder="Enter your response here..." + ) + submit_btn = gr.Button("📤 Submit Response", variant="primary", interactive=False) + + # Action Accordions + with gr.Accordion("🖱️ Click Actions", open=False): + with gr.Group(): + with gr.Row(): + click_x = gr.Number(label="X", value=0, minimum=0) + click_y = gr.Number(label="Y", value=0, minimum=0) + with gr.Row(): + click_action_type = gr.Dropdown( + label="Action Type", + choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"], + value="click" + ) + click_button = gr.Dropdown( + label="Button (for click only)", + choices=["left", "right", "wheel", "back", "forward"], + value="left" + ) + click_submit_btn = gr.Button("Submit Action") + + with gr.Accordion("📝 Type Action", open=False): + with gr.Group(): + type_text = gr.Textbox( + label="Text to Type", + placeholder="Enter text to type..." + ) + type_submit_btn = gr.Button("Submit Type") + + with gr.Accordion("⌨️ Keypress Action", open=False): + with gr.Group(): + keypress_text = gr.Textbox( + label="Keys", + placeholder="e.g., ctrl+c, alt+tab" + ) + keypress_submit_btn = gr.Button("Submit Keypress") + + with gr.Accordion("🎯 Description Action", open=False): + with gr.Group(): + description_text = gr.Textbox( + label="Element Description", + placeholder="e.g., 'Privacy and security option in left sidebar'" + ) + with gr.Row(): + description_action_type = gr.Dropdown( + label="Action Type", + choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"], + value="click" + ) + description_button = gr.Radio( + label="Button (for click only)", + choices=["left", "right", "wheel", "back", "forward"], + value="left" + ) + description_submit_btn = gr.Button("Submit Description Action") + + status_display = gr.Textbox( + label="Status", + interactive=False, + value="Ready to receive calls..." + ) + + # Event handlers + refresh_btn.click( + fn=ui_handler.refresh_pending_calls, + outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn] + ) + + call_dropdown.change( + fn=ui_handler.on_call_selected, + inputs=[call_dropdown], + outputs=[screenshot_image, conversation_chatbot, submit_btn] + ) + + def handle_image_click(evt: gr.SelectData): + if evt.index is not None: + x, y = evt.index + action_type = action_type_radio.value or "click" + button = action_button_radio.value or "left" + result = ui_handler.submit_click_action(x, y, action_type, button) + ui_handler.wait_for_pending_calls() + return result + return "No coordinates selected" + + screenshot_image.select( + fn=handle_image_click, + outputs=[status_display] + ).then( + fn=ui_handler.wait_for_pending_calls, + outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn] + ) + + # Response submission + submit_btn.click( + fn=ui_handler.submit_response, + inputs=[response_text], + outputs=[response_text, status_display] + ).then( + fn=ui_handler.refresh_pending_calls, + outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn] + ) + + # Toggle button radio visibility based on action type + def toggle_button_visibility(action_type): + return gr.update(visible=(action_type == "click")) + + action_type_radio.change( + fn=toggle_button_visibility, + inputs=[action_type_radio], + outputs=[action_button_radio] + ) + + # Action accordion handlers + click_submit_btn.click( + fn=ui_handler.submit_click_action, + inputs=[click_x, click_y, click_action_type, click_button], + outputs=[status_display] + ).then( + fn=ui_handler.wait_for_pending_calls, + outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn] + ) + + type_submit_btn.click( + fn=ui_handler.submit_type_action, + inputs=[type_text], + outputs=[status_display] + ).then( + fn=ui_handler.wait_for_pending_calls, + outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn] + ) + + keypress_submit_btn.click( + fn=ui_handler.submit_hotkey_action, + inputs=[keypress_text], + outputs=[status_display] + ).then( + fn=ui_handler.wait_for_pending_calls, + outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn] + ) + + def handle_description_submit(description, action_type, button): + if description: + result = ui_handler.submit_description_click(description, action_type, button) + ui_handler.wait_for_pending_calls() + return result + return "Please enter a description" + + description_submit_btn.click( + fn=handle_description_submit, + inputs=[description_text, description_action_type, description_button], + outputs=[status_display] + ).then( + fn=ui_handler.wait_for_pending_calls, + outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn] + ) + + # Load initial data + demo.load( + fn=ui_handler.refresh_pending_calls, + outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn] + ) + + return demo + + +if __name__ == "__main__": + demo = create_ui() + demo.queue() + demo.launch(server_name="0.0.0.0", server_port=7860) From 7e5ee5010b2b98f855320c8f6544ec5fba45f09d Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Mon, 11 Aug 2025 15:57:16 -0400 Subject: [PATCH 63/76] added human adapter --- libs/python/agent/agent/adapters/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/python/agent/agent/adapters/__init__.py b/libs/python/agent/agent/adapters/__init__.py index 2d9abbe3..3a5c0301 100644 --- a/libs/python/agent/agent/adapters/__init__.py +++ b/libs/python/agent/agent/adapters/__init__.py @@ -3,7 +3,9 @@ Adapters package for agent - Custom LLM adapters for LiteLLM """ from .huggingfacelocal_adapter import HuggingFaceLocalAdapter +from .human_adapter import HumanAdapter __all__ = [ "HuggingFaceLocalAdapter", + "HumanAdapter", ] From 31f983c96c0d9e6b8c5e5dc57760993fd75af31f Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 12 Aug 2025 10:01:36 -0400 Subject: [PATCH 64/76] Add GLM-4.5V CUA model config --- libs/python/agent/README.md | 1 + libs/python/agent/agent/loops/__init__.py | 3 +- libs/python/agent/agent/loops/glm45v.py | 748 ++++++++++++++++++++++ libs/python/agent/pyproject.toml | 9 + 4 files changed, 760 insertions(+), 1 deletion(-) create mode 100644 libs/python/agent/agent/loops/glm45v.py diff --git a/libs/python/agent/README.md b/libs/python/agent/README.md index 0c5595e1..f34692db 100644 --- a/libs/python/agent/README.md +++ b/libs/python/agent/README.md @@ -37,6 +37,7 @@ pip install "cua-agent[omni]" # Omniparser + any LLM support pip install "cua-agent[uitars]" # UI-TARS pip install "cua-agent[uitars-mlx]" # UI-TARS + MLX support pip install "cua-agent[uitars-hf]" # UI-TARS + Huggingface support +pip install "cua-agent[glm45v-hf]" # GLM-4.5V + Huggingface support pip install "cua-agent[ui]" # Gradio UI support ``` diff --git a/libs/python/agent/agent/loops/__init__.py b/libs/python/agent/agent/loops/__init__.py index e3070a81..45f70e20 100644 --- a/libs/python/agent/agent/loops/__init__.py +++ b/libs/python/agent/agent/loops/__init__.py @@ -9,5 +9,6 @@ from . import uitars from . import omniparser from . import gta1 from . import composed_grounded +from . import glm45v -__all__ = ["anthropic", "openai", "uitars", "omniparser", "gta1", "composed_grounded"] +__all__ = ["anthropic", "openai", "uitars", "omniparser", "gta1", "composed_grounded", "glm45v"] diff --git a/libs/python/agent/agent/loops/glm45v.py b/libs/python/agent/agent/loops/glm45v.py new file mode 100644 index 00000000..e7230044 --- /dev/null +++ b/libs/python/agent/agent/loops/glm45v.py @@ -0,0 +1,748 @@ +""" +GLM-4.5V agent loop implementation using liteLLM for GLM-4.5V model. +Supports vision-language models for computer control with bounding box parsing. +""" + +import asyncio +import json +import base64 +import re +from typing import Dict, List, Any, Optional, Tuple +from io import BytesIO +from PIL import Image +import litellm +from litellm.types.utils import ModelResponse +from litellm.responses.litellm_completion_transformation.transformation import LiteLLMCompletionResponsesConfig + +from ..decorators import register_agent +from ..types import Messages, AgentResponse, Tools, AgentCapability +from ..loops.base import AsyncAgentConfig +from ..responses import ( + convert_responses_items_to_completion_messages, + convert_completion_messages_to_responses_items, + make_reasoning_item, + make_output_text_item, + make_click_item, + make_double_click_item, + make_drag_item, + make_keypress_item, + make_scroll_item, + make_type_item, + make_wait_item, + make_input_image_item +) + +# GLM-4.5V specific constants +GLM_ACTION_SPACE = """ +### {left,right,middle}_click + +Call rule: `{left,right,middle}_click(start_box='[x,y]', element_info='')` +{ + 'name': ['left_click', 'right_click', 'middle_click'], + 'description': 'Perform a left/right/middle mouse click at the specified coordinates on the screen.', + 'parameters': { + 'type': 'object', + 'properties': { + 'start_box': { + 'type': 'array', + 'items': { + 'type': 'integer' + }, + 'description': 'Coordinates [x,y] where to perform the click, normalized to 0-999 range.' + }, + 'element_info': { + 'type': 'string', + 'description': 'Optional text description of the UI element being clicked.' + } + }, + 'required': ['start_box'] + } +} + +### hover + +Call rule: `hover(start_box='[x,y]', element_info='')` +{ + 'name': 'hover', + 'description': 'Move the mouse pointer to the specified coordinates without performing any click action.', + 'parameters': { + 'type': 'object', + 'properties': { + 'start_box': { + 'type': 'array', + 'items': { + 'type': 'integer' + }, + 'description': 'Coordinates [x,y] where to move the mouse pointer, normalized to 0-999 range.' + }, + 'element_info': { + 'type': 'string', + 'description': 'Optional text description of the UI element being hovered over.' + } + }, + 'required': ['start_box'] + } +} + +### left_double_click + +Call rule: `left_double_click(start_box='[x,y]', element_info='')` +{ + 'name': 'left_double_click', + 'description': 'Perform a left mouse double-click at the specified coordinates on the screen.', + 'parameters': { + 'type': 'object', + 'properties': { + 'start_box': { + 'type': 'array', + 'items': { + 'type': 'integer' + }, + 'description': 'Coordinates [x,y] where to perform the double-click, normalized to 0-999 range.' + }, + 'element_info': { + 'type': 'string', + 'description': 'Optional text description of the UI element being double-clicked.' + } + }, + 'required': ['start_box'] + } +} + +### left_drag + +Call rule: `left_drag(start_box='[x1,y1]', end_box='[x2,y2]', element_info='')` +{ + 'name': 'left_drag', + 'description': 'Drag the mouse from starting coordinates to ending coordinates while holding the left mouse button.', + 'parameters': { + 'type': 'object', + 'properties': { + 'start_box': { + 'type': 'array', + 'items': { + 'type': 'integer' + }, + 'description': 'Starting coordinates [x1,y1] for the drag operation, normalized to 0-999 range.' + }, + 'end_box': { + 'type': 'array', + 'items': { + 'type': 'integer' + }, + 'description': 'Ending coordinates [x2,y2] for the drag operation, normalized to 0-999 range.' + }, + 'element_info': { + 'type': 'string', + 'description': 'Optional text description of the UI element being dragged.' + } + }, + 'required': ['start_box', 'end_box'] + } +} + +### key + +Call rule: `key(keys='')` +{ + 'name': 'key', + 'description': 'Simulate pressing a single key or combination of keys on the keyboard.', + 'parameters': { + 'type': 'object', + 'properties': { + 'keys': { + 'type': 'string', + 'description': 'The key or key combination to press. Use '+' to separate keys in combinations (e.g., 'ctrl+c', 'alt+tab').' + } + }, + 'required': ['keys'] + } +} + +### type + +Call rule: `type(content='')` +{ + 'name': 'type', + 'description': 'Type text content into the currently focused text input field. This action only performs typing and does not handle field activation or clearing.', + 'parameters': { + 'type': 'object', + 'properties': { + 'content': { + 'type': 'string', + 'description': 'The text content to be typed into the active text field.' + } + }, + 'required': ['content'] + } +} + +### scroll + +Call rule: `scroll(start_box='[x,y]', direction='', step=5, element_info='')` +{ + 'name': 'scroll', + 'description': 'Scroll an element at the specified coordinates in the specified direction by a given number of wheel steps.', + 'parameters': { + 'type': 'object', + 'properties': { + 'start_box': { + 'type': 'array', + 'items': { + 'type': 'integer' + }, + 'description': 'Coordinates [x,y] of the element or area to scroll, normalized to 0-999 range.' + }, + 'direction': { + 'type': 'string', + 'enum': ['down', 'up'], + 'description': 'The direction to scroll: 'down' or 'up'.' + }, + 'step': { + 'type': 'integer', + 'default': 5, + 'description': 'Number of wheel steps to scroll, default is 5.' + }, + 'element_info': { + 'type': 'string', + 'description': 'Optional text description of the UI element being scrolled.' + } + }, + 'required': ['start_box', 'direction'] + } +} + +### WAIT + +Call rule: `WAIT()` +{ + 'name': 'WAIT', + 'description': 'Wait for 5 seconds before proceeding to the next action.', + 'parameters': { + 'type': 'object', + 'properties': {}, + 'required': [] + } +} + +### DONE + +Call rule: `DONE()` +{ + 'name': 'DONE', + 'description': 'Indicate that the current task has been completed successfully and no further actions are needed.', + 'parameters': { + 'type': 'object', + 'properties': {}, + 'required': [] + } +} + +### FAIL + +Call rule: `FAIL()` +{ + 'name': 'FAIL', + 'description': 'Indicate that the current task cannot be completed or is impossible to accomplish.', + 'parameters': { + 'type': 'object', + 'properties': {}, + 'required': [] + } +}""" + +GLM_PROMPT_TEMPLATE = """You are a GUI Agent, and your primary task is to respond accurately to user requests or questions. In addition to directly answering the user's queries, you can also use tools or perform GUI operations directly until you fulfill the user's request or provide a correct answer. You should carefully read and understand the images and questions provided by the user, and engage in thinking and reflection when appropriate. The coordinates involved are all represented in thousandths (0-999). + +# Task: +{task} + +# Task Platform +Desktop + +# Action Space +{action_space} + +# Output Format +Plain text explanation with action(param='...') +Memory: +[{{"key": "value"}}, ...] + +# Some Additional Notes +- You should put the key information you *have to remember* in a separated memory part and I'll give it to you in the next round. The content in this part should be a dict list. If you no longer need some given information, you should remove it from the memory. Even if you don't need to remember anything, you should also output an empty list. +- My computer's password is "password", feel free to use it when you need sudo rights. + +Current Screenshot: +""" + +def encode_image_to_base64(image_path: str) -> str: + """Encode image file to base64 string with data URI.""" + with open(image_path, "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()).decode("utf-8") + return f"data:image/png;base64,{encoded_string}" + +def parse_glm_response(response: str) -> Dict[str, Any]: + """ + Parse GLM-4.5V response to extract action and memory. + + The special tokens <|begin_of_box|> and <|end_of_box|> mark bounding boxes. + Coordinates are normalized values between 0 and 1000. + """ + # Extract action from between special tokens + pattern = r"<\|begin_of_box\|>(.*?)<\|end_of_box\|>" + match = re.search(pattern, response) + if match: + action = match.group(1).strip() + else: + # Fallback: look for function call patterns + action_pattern = r"[\w_]+\([^)]*\)" + matches = re.findall(action_pattern, response) + action = matches[0] if matches else None + + # Extract memory section + memory_pattern = r"Memory:(.*?)$" + memory_match = re.search(memory_pattern, response, re.DOTALL) + memory = memory_match.group(1).strip() if memory_match else "[]" + + # Extract action text (everything before Memory:) + action_text_pattern = r'^(.*?)Memory:' + action_text_match = re.search(action_text_pattern, response, re.DOTALL) + action_text = action_text_match.group(1).strip() if action_text_match else response + + # Clean up action text by removing special tokens + if action_text: + action_text = action_text.replace("<|begin_of_box|>", "").replace("<|end_of_box|>", "") + + return { + "action": action, + "action_text": action_text, + "memory": memory + } + +def get_last_image_from_messages(messages: Messages) -> Optional[str]: + """Extract the last image from messages for processing.""" + for message in reversed(messages): + if message.get("type") == "computer_call_output": + output = message.get("output", {}) + if output.get("type") == "input_image": + image_url = output.get("image_url", "") + if image_url.startswith("data:image/"): + # Extract base64 part + return image_url.split(",", 1)[1] + elif message.get("role") == "user": + content = message.get("content", []) + if isinstance(content, list): + for item in reversed(content): + if item.get("type") == "image_url": + image_url = item.get("image_url", {}).get("url", "") + if image_url.startswith("data:image/"): + return image_url.split(",", 1)[1] + return None + +def model_dump(obj) -> Dict[str, Any]: + if isinstance(obj, dict): + return {k: model_dump(v) for k, v in obj.items()} + elif hasattr(obj, "model_dump"): + return obj.model_dump() + else: + return obj + +def convert_glm_completion_to_responses_items(response: ModelResponse, image_width: int, image_height: int) -> List[Dict[str, Any]]: + """ + Convert GLM-4.5V completion response to responses items format. + + Args: + response: LiteLLM ModelResponse from GLM-4.5V + image_width: Original image width for coordinate scaling + image_height: Original image height for coordinate scaling + + Returns: + List of response items in the proper format + """ + import uuid + + response_items = [] + + if not response.choices or not response.choices[0].message: + return response_items + + message = response.choices[0].message + content = message.content or "" + reasoning_content = getattr(message, 'reasoning_content', None) + + # Add reasoning item if present + if reasoning_content: + reasoning_item = model_dump(make_reasoning_item(reasoning_content)) + response_items.append(reasoning_item) + + # Parse the content to extract action and text + parsed_response = parse_glm_response(content) + action = parsed_response.get("action", "") + action_text = parsed_response.get("action_text", "") + + # Add message item with text content (excluding action and memory) + if action_text: + # Remove action from action_text if it's there + clean_text = action_text + if action and action in clean_text: + clean_text = clean_text.replace(action, "").strip() + + # Remove memory section + memory_pattern = r"Memory:\s*\[.*?\]\s*$" + clean_text = re.sub(memory_pattern, "", clean_text, flags=re.DOTALL).strip() + + if clean_text: + message_item = model_dump(make_output_text_item(clean_text)) + response_items.append(message_item) + + # Convert action to computer call if present + if action: + call_id = f"call_{uuid.uuid4().hex[:8]}" + + # Parse different action types and create appropriate computer calls + if action.startswith("left_click"): + coord_match = re.search(r"start_box='?\[(\d+),\s*(\d+)\]'?", action) + if coord_match: + x, y = int(coord_match.group(1)), int(coord_match.group(2)) + # Convert from 0-999 to actual pixel coordinates + actual_x = int((x / 999.0) * image_width) + actual_y = int((y / 999.0) * image_height) + computer_call = model_dump(make_click_item(actual_x, actual_y)) + computer_call["call_id"] = call_id + computer_call["status"] = "completed" + response_items.append(computer_call) + + elif action.startswith("right_click"): + coord_match = re.search(r"start_box='?\[(\d+),\s*(\d+)\]'?", action) + if coord_match: + x, y = int(coord_match.group(1)), int(coord_match.group(2)) + actual_x = int((x / 999.0) * image_width) + actual_y = int((y / 999.0) * image_height) + computer_call = model_dump(make_click_item(actual_x, actual_y, button="right")) + computer_call["call_id"] = call_id + computer_call["status"] = "completed" + response_items.append(computer_call) + + elif action.startswith("left_double_click"): + coord_match = re.search(r"start_box='?\[(\d+),\s*(\d+)\]'?", action) + if coord_match: + x, y = int(coord_match.group(1)), int(coord_match.group(2)) + actual_x = int((x / 999.0) * image_width) + actual_y = int((y / 999.0) * image_height) + computer_call = model_dump(make_double_click_item(actual_x, actual_y)) + computer_call["call_id"] = call_id + computer_call["status"] = "completed" + response_items.append(computer_call) + + elif action.startswith("left_drag"): + start_match = re.search(r"start_box='?\[(\d+),\s*(\d+)\]'?", action) + end_match = re.search(r"end_box='?\[(\d+),\s*(\d+)\]'?", action) + if start_match and end_match: + x1, y1 = int(start_match.group(1)), int(start_match.group(2)) + x2, y2 = int(end_match.group(1)), int(end_match.group(2)) + actual_x1 = int((x1 / 999.0) * image_width) + actual_y1 = int((y1 / 999.0) * image_height) + actual_x2 = int((x2 / 999.0) * image_width) + actual_y2 = int((y2 / 999.0) * image_height) + # Create path for drag operation + drag_path = [{"x": actual_x1, "y": actual_y1}, {"x": actual_x2, "y": actual_y2}] + computer_call = model_dump(make_drag_item(drag_path)) + computer_call["call_id"] = call_id + computer_call["status"] = "completed" + response_items.append(computer_call) + + elif action.startswith("key"): + key_match = re.search(r"keys='([^']+)'", action) + if key_match: + keys = key_match.group(1) + # Split keys by '+' for key combinations, or use as single key + key_list = keys.split('+') if '+' in keys else [keys] + computer_call = model_dump(make_keypress_item(key_list)) + computer_call["call_id"] = call_id + computer_call["status"] = "completed" + response_items.append(computer_call) + + elif action.startswith("type"): + content_match = re.search(r"content='([^']*)'", action) + if content_match: + content = content_match.group(1) + computer_call = model_dump(make_type_item(content)) + computer_call["call_id"] = call_id + computer_call["status"] = "completed" + response_items.append(computer_call) + + elif action.startswith("scroll"): + coord_match = re.search(r"start_box='?\[(\d+),\s*(\d+)\]'?", action) + direction_match = re.search(r"direction='([^']+)'", action) + if coord_match and direction_match: + x, y = int(coord_match.group(1)), int(coord_match.group(2)) + direction = direction_match.group(1) + actual_x = int((x / 999.0) * image_width) + actual_y = int((y / 999.0) * image_height) + # Convert direction to scroll amounts + scroll_x, scroll_y = 0, 0 + if direction == "up": + scroll_y = -5 + elif direction == "down": + scroll_y = 5 + elif direction == "left": + scroll_x = -5 + elif direction == "right": + scroll_x = 5 + computer_call = model_dump(make_scroll_item(actual_x, actual_y, scroll_x, scroll_y)) + computer_call["call_id"] = call_id + computer_call["status"] = "completed" + response_items.append(computer_call) + + elif action == "WAIT()": + computer_call = model_dump(make_wait_item()) + computer_call["call_id"] = call_id + computer_call["status"] = "completed" + response_items.append(computer_call) + + return response_items + +@register_agent(models=r"(?i).*GLM-4\.5V.*") +class Glm4vConfig(AsyncAgentConfig): + """GLM-4.5V agent configuration using liteLLM.""" + + async def predict_step( + self, + messages: List[Dict[str, Any]], + model: str, + tools: Optional[List[Dict[str, Any]]] = None, + max_retries: Optional[int] = None, + stream: bool = False, + computer_handler=None, + use_prompt_caching: Optional[bool] = False, + _on_api_start=None, + _on_api_end=None, + _on_usage=None, + _on_screenshot=None, + **kwargs + ) -> Dict[str, Any]: + """ + Predict the next step using GLM-4.5V model. + + Args: + messages: Input messages following Responses format + model: Model name to use + tools: Optional list of tool schemas + max_retries: Maximum number of retries for API calls + stream: Whether to stream the response + computer_handler: Computer handler for taking screenshots + use_prompt_caching: Whether to use prompt caching + _on_api_start: Callback for API start + _on_api_end: Callback for API end + _on_usage: Callback for usage tracking + _on_screenshot: Callback for screenshot events + + Returns: + Dict with "output" and "usage" keys + """ + # Convert responses items to completion messages + completion_messages = convert_responses_items_to_completion_messages( + messages, + allow_images_in_tool_results=True + ) + + # Get the last image for processing + last_image_b64 = get_last_image_from_messages(messages) + if not last_image_b64 and computer_handler: + # Take a screenshot if no image available + screenshot_b64 = await computer_handler.screenshot() + if screenshot_b64: + last_image_b64 = screenshot_b64 + if _on_screenshot: + await _on_screenshot(screenshot_b64) + + if not last_image_b64: + raise ValueError("No image available for GLM-4.5V processing") + + # Get the user instruction from the last user message + user_instruction = "" + for message in reversed(completion_messages): + if message.get("role") == "user": + content = message.get("content", "") + if isinstance(content, str): + user_instruction = content + elif isinstance(content, list): + for item in content: + if item.get("type") == "text": + user_instruction = item.get("text", "") + break + break + + # Construct prompt using GLM template + prompt = GLM_PROMPT_TEMPLATE.format( + task=user_instruction, + action_space=GLM_ACTION_SPACE + ) + + # Prepare messages for liteLLM + litellm_messages = [ + { + "role": "system", + "content": "You are a helpful GUI agent assistant." + }, + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{last_image_b64}"}} + ] + } + ] + + # Prepare API call kwargs + api_kwargs = { + "model": model, + "messages": litellm_messages, + # "max_tokens": 2048, + # "temperature": 0.001, + # "extra_body": { + # "skip_special_tokens": False, + # } + } + + # Add API callbacks + if _on_api_start: + await _on_api_start(api_kwargs) + + # Call liteLLM + response = await litellm.acompletion(**api_kwargs) + + if _on_api_end: + await _on_api_end(api_kwargs, response) + + # Get image dimensions for coordinate scaling + image_width, image_height = 1920, 1080 # Default dimensions + + # Try to get actual dimensions from the image + try: + image_data = base64.b64decode(last_image_b64) + image = Image.open(BytesIO(image_data)) + image_width, image_height = image.size + except Exception: + pass # Use default dimensions + + # Convert GLM completion response to responses items + response_items = convert_glm_completion_to_responses_items(response, image_width, image_height) + + # Extract usage information + response_usage = { + **LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(response.usage).model_dump(), + "response_cost": response._hidden_params.get("response_cost", 0.0), + } + if _on_usage: + await _on_usage(response_usage) + + # Create agent response + agent_response = { + "output": response_items, + "usage": response_usage + } + + return agent_response + + async def predict_click( + self, + model: str, + image_b64: str, + instruction: str, + **kwargs + ) -> Optional[Tuple[int, int]]: + """ + Predict click coordinates using GLM-4.5V model. + + Args: + model: Model name to use + image_b64: Base64 encoded image + instruction: Instruction for where to click + + Returns: + Tuple with (x, y) coordinates or None + """ + try: + # Create a simple click instruction prompt + click_prompt = f"""You are a GUI agent. Look at the screenshot and identify where to click for: {instruction} + +Respond with a single click action in this format: +left_click(start_box='[x,y]') + +Where x,y are coordinates normalized to 0-999 range.""" + + # Prepare messages for liteLLM + litellm_messages = [ + { + "role": "system", + "content": "You are a helpful GUI agent assistant." + }, + { + "role": "user", + "content": [ + {"type": "text", "text": click_prompt}, + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}} + ] + } + ] + + # Prepare API call kwargs + api_kwargs = { + "model": model, + "messages": litellm_messages, + "max_tokens": 100, + "temperature": 0.001, + "extra_body": { + "skip_special_tokens": False, + } + } + + # Call liteLLM + response = await litellm.acompletion(**api_kwargs) + + # Extract response content + response_content = response.choices[0].message.content.strip() + + # Parse response for click coordinates + # Look for coordinates in the response, handling special tokens + coord_pattern = r"<\|begin_of_box\|>.*?left_click\(start_box='?\[(\d+),(\d+)\]'?\).*?<\|end_of_box\|>" + match = re.search(coord_pattern, response_content) + + if not match: + # Fallback: look for coordinates without special tokens + coord_pattern = r"left_click\(start_box='?\[(\d+),(\d+)\]'?\)" + match = re.search(coord_pattern, response_content) + + if match: + x, y = int(match.group(1)), int(match.group(2)) + + # Get actual image dimensions for scaling + try: + image_data = base64.b64decode(image_b64) + image = Image.open(BytesIO(image_data)) + image_width, image_height = image.size + except Exception: + # Use default dimensions + image_width, image_height = 1920, 1080 + + # Convert from 0-999 normalized coordinates to actual pixel coordinates + actual_x = int((x / 999.0) * image_width) + actual_y = int((y / 999.0) * image_height) + + return (actual_x, actual_y) + + return None + + except Exception as e: + # Log error and return None + print(f"Error in predict_click: {e}") + return None + + def get_capabilities(self) -> List[AgentCapability]: + """ + Get list of capabilities supported by this agent config. + + Returns: + List of capability strings + """ + return ["step", "click"] diff --git a/libs/python/agent/pyproject.toml b/libs/python/agent/pyproject.toml index 75f8159a..98c8147e 100644 --- a/libs/python/agent/pyproject.toml +++ b/libs/python/agent/pyproject.toml @@ -38,6 +38,8 @@ uitars-mlx = [ "mlx-vlm>=0.1.27; sys_platform == 'darwin'" ] uitars-hf = [ + "accelerate", + "torch", "transformers>=4.54.0" ] ui = [ @@ -47,12 +49,19 @@ ui = [ cli = [ "yaspin>=3.1.0", ] +glm45v-hf = [ + "accelerate", + "torch", + "transformers-v4.55.0-GLM-4.5V-preview" +] all = [ # omni requirements "ultralytics>=8.0.0", "cua-som>=0.1.0,<0.2.0", # uitars requirements "mlx-vlm>=0.1.27; sys_platform == 'darwin'", + "accelerate", + "torch", "transformers>=4.54.0", # ui requirements "gradio>=5.23.3", From ef79a6c37e4c71d59f52478950b17fb6f74ea3f8 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 12 Aug 2025 10:23:20 -0400 Subject: [PATCH 65/76] Added correct glm-45v prompt --- libs/python/agent/agent/loops/glm45v.py | 282 ++++++++++++++++++------ 1 file changed, 218 insertions(+), 64 deletions(-) diff --git a/libs/python/agent/agent/loops/glm45v.py b/libs/python/agent/agent/loops/glm45v.py index e7230044..adc87026 100644 --- a/libs/python/agent/agent/loops/glm45v.py +++ b/libs/python/agent/agent/loops/glm45v.py @@ -251,29 +251,6 @@ Call rule: `FAIL()` } }""" -GLM_PROMPT_TEMPLATE = """You are a GUI Agent, and your primary task is to respond accurately to user requests or questions. In addition to directly answering the user's queries, you can also use tools or perform GUI operations directly until you fulfill the user's request or provide a correct answer. You should carefully read and understand the images and questions provided by the user, and engage in thinking and reflection when appropriate. The coordinates involved are all represented in thousandths (0-999). - -# Task: -{task} - -# Task Platform -Desktop - -# Action Space -{action_space} - -# Output Format -Plain text explanation with action(param='...') -Memory: -[{{"key": "value"}}, ...] - -# Some Additional Notes -- You should put the key information you *have to remember* in a separated memory part and I'll give it to you in the next round. The content in this part should be a dict list. If you no longer need some given information, you should remove it from the memory. Even if you don't need to remember anything, you should also output an empty list. -- My computer's password is "password", feel free to use it when you need sudo rights. - -Current Screenshot: -""" - def encode_image_to_base64(image_path: str) -> str: """Encode image file to base64 string with data URI.""" with open(image_path, "rb") as image_file: @@ -321,23 +298,202 @@ def parse_glm_response(response: str) -> Dict[str, Any]: def get_last_image_from_messages(messages: Messages) -> Optional[str]: """Extract the last image from messages for processing.""" for message in reversed(messages): - if message.get("type") == "computer_call_output": - output = message.get("output", {}) - if output.get("type") == "input_image": - image_url = output.get("image_url", "") - if image_url.startswith("data:image/"): - # Extract base64 part - return image_url.split(",", 1)[1] - elif message.get("role") == "user": - content = message.get("content", []) - if isinstance(content, list): - for item in reversed(content): - if item.get("type") == "image_url": - image_url = item.get("image_url", {}).get("url", "") - if image_url.startswith("data:image/"): - return image_url.split(",", 1)[1] + if isinstance(message, dict): + if message.get("type") == "computer_call_output": + output = message.get("output", {}) + if isinstance(output, dict) and output.get("type") == "input_image": + image_url = output.get("image_url", "") + if isinstance(image_url, str) and image_url.startswith("data:image/"): + # Extract base64 part + return image_url.split(",", 1)[1] + elif message.get("role") == "user": + content = message.get("content", []) + if isinstance(content, list): + for item in reversed(content): + if isinstance(item, dict) and item.get("type") == "image_url": + image_url_obj = item.get("image_url", {}) + if isinstance(image_url_obj, dict): + image_url = image_url_obj.get("url", "") + if isinstance(image_url, str) and image_url.startswith("data:image/"): + return image_url.split(",", 1)[1] return None +def convert_responses_items_to_glm45v_pc_prompt(messages: Messages, task: str, memory: str = "") -> List[Dict[str, Any]]: + """Convert responses items to GLM-4.5V PC prompt format with historical actions. + + Args: + messages: List of message items from the conversation + task: The task description + memory: Current memory state + + Returns: + List of content items for the prompt (text and image_url items) + """ + action_space = GLM_ACTION_SPACE + + # Template head + head_text = f"""You are a GUI Agent, and your primary task is to respond accurately to user requests or questions. In addition to directly answering the user's queries, you can also use tools or perform GUI operations directly until you fulfill the user's request or provide a correct answer. You should carefully read and understand the images and questions provided by the user, and engage in thinking and reflection when appropriate. The coordinates involved are all represented in thousandths (0-999). + +# Task: +{task} + +# Task Platform +Ubuntu + +# Action Space +{action_space} + +# Historical Actions and Current Memory +History:""" + + # Template tail + tail_text = f""" +Memory: +{memory} +# Output Format +Plain text explanation with action(param='...') +Memory: +[{{"key": "value"}}, ...] + +# Some Additional Notes +- I'll give you the most recent 4 history screenshots(shrunked to 50%*50%) along with the historical action steps. +- You should put the key information you *have to remember* in a seperated memory part and I'll give it to you in the next round. The content in this part should be a dict list. If you no longer need some given information, you should remove it from the memory. Even if you don't need to remember anything, you should also output an empty list. +- My computer's password is "password", feel free to use it when you need sudo rights. +- For the thunderbird account "anonym-x2024@outlook.com", the password is "gTCI";=@y7|QJ0nDa_kN3Sb&>". + +Current Screenshot: +""" + + # Build history from messages + history = [] + history_images = [] + + # Group messages into steps + current_step = [] + step_num = 0 + + for message in messages: + msg_type = message.get("type") + + if msg_type == "reasoning": + current_step.append(message) + elif msg_type == "message" and message.get("role") == "assistant": + current_step.append(message) + elif msg_type == "computer_call": + current_step.append(message) + elif msg_type == "computer_call_output": + current_step.append(message) + # End of step - process it + if current_step: + step_num += 1 + + # Extract bot thought from message content + bot_thought = "" + for item in current_step: + if item.get("type") == "message" and item.get("role") == "assistant": + content = item.get("content", []) + for content_item in content: + if content_item.get("type") == "output_text": + bot_thought = content_item.get("text", "") + break + break + + # Extract action from computer_call + action_text = "" + for item in current_step: + if item.get("type") == "computer_call": + action = item.get("action", {}) + action_type = action.get("type", "") + + if action_type == "click": + x, y = action.get("x", 0), action.get("y", 0) + # Convert to 0-999 range (assuming screen dimensions) + # For now, use direct coordinates - this may need adjustment + action_text = f"left_click(start_box='[{x},{y}]')" + elif action_type == "double_click": + x, y = action.get("x", 0), action.get("y", 0) + action_text = f"left_double_click(start_box='[{x},{y}]')" + elif action_type == "right_click": + x, y = action.get("x", 0), action.get("y", 0) + action_text = f"right_click(start_box='[{x},{y}]')" + elif action_type == "drag": + # Handle drag with path + path = action.get("path", []) + if len(path) >= 2: + start = path[0] + end = path[-1] + action_text = f"left_drag(start_box='[{start.get('x', 0)},{start.get('y', 0)}]', end_box='[{end.get('x', 0)},{end.get('y', 0)}]')" + elif action_type == "keypress": + key = action.get("key", "") + action_text = f"key(keys='{key}')" + elif action_type == "type": + text = action.get("text", "") + action_text = f"type(content='{text}')" + elif action_type == "scroll": + x, y = action.get("x", 0), action.get("y", 0) + direction = action.get("direction", "down") + action_text = f"scroll(start_box='[{x},{y}]', direction='{direction}')" + elif action_type == "wait": + action_text = "WAIT()" + break + + # Extract screenshot from computer_call_output + screenshot_url = None + for item in current_step: + if item.get("type") == "computer_call_output": + output = item.get("output", {}) + if output.get("type") == "input_image": + screenshot_url = output.get("image_url", "") + break + + # Store step info + step_info = { + "step_num": step_num, + "bot_thought": bot_thought, + "action_text": action_text, + "screenshot_url": screenshot_url + } + history.append(step_info) + + # Store screenshot for last 4 steps + if screenshot_url: + history_images.append(screenshot_url) + + current_step = [] + + # Build content array with head, history, and tail + content = [] + current_text = head_text + + total_history_steps = len(history) + history_image_count = min(4, len(history_images)) # Last 4 images + + for step_idx, step_info in enumerate(history): + step_num = step_info["step_num"] + bot_thought = step_info["bot_thought"] + action_text = step_info["action_text"] + + if step_idx < total_history_steps - history_image_count: + # For steps beyond the last 4, use text placeholder + current_text += f"\nstep {step_num}: Screenshot:(Omitted in context.) Thought: {bot_thought}\nAction: {action_text}" + else: + # For the last 4 steps, insert images + current_text += f"\nstep {step_num}: Screenshot:" + content.append({"type": "text", "text": current_text}) + + # Add image + img_idx = step_idx - (total_history_steps - history_image_count) + if img_idx < len(history_images): + content.append({"type": "image_url", "image_url": {"url": history_images[img_idx]}}) + + current_text = f" Thought: {bot_thought}\nAction: {action_text}" + + # Add tail + current_text += tail_text + content.append({"type": "text", "text": current_text}) + + return content + def model_dump(obj) -> Dict[str, Any]: if isinstance(obj, dict): return {k: model_dump(v) for k, v in obj.items()} @@ -539,11 +695,19 @@ class Glm4vConfig(AsyncAgentConfig): Returns: Dict with "output" and "usage" keys """ - # Convert responses items to completion messages - completion_messages = convert_responses_items_to_completion_messages( - messages, - allow_images_in_tool_results=True - ) + # Get the user instruction from the last user message + user_instruction = "" + for message in reversed(messages): + if isinstance(message, dict) and message.get("role") == "user": + content = message.get("content", "") + if isinstance(content, str): + user_instruction = content + elif isinstance(content, list): + for item in content: + if isinstance(item, dict) and item.get("type") == "text": + user_instruction = item.get("text", "") + break + break # Get the last image for processing last_image_b64 = get_last_image_from_messages(messages) @@ -558,26 +722,19 @@ class Glm4vConfig(AsyncAgentConfig): if not last_image_b64: raise ValueError("No image available for GLM-4.5V processing") - # Get the user instruction from the last user message - user_instruction = "" - for message in reversed(completion_messages): - if message.get("role") == "user": - content = message.get("content", "") - if isinstance(content, str): - user_instruction = content - elif isinstance(content, list): - for item in content: - if item.get("type") == "text": - user_instruction = item.get("text", "") - break - break - - # Construct prompt using GLM template - prompt = GLM_PROMPT_TEMPLATE.format( + # Convert responses items to GLM-4.5V PC prompt format with historical actions + prompt_content = convert_responses_items_to_glm45v_pc_prompt( + messages=messages, task=user_instruction, - action_space=GLM_ACTION_SPACE + memory="[]" # Initialize with empty memory for now ) + # Add the current screenshot to the end + prompt_content.append({ + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{last_image_b64}"} + }) + # Prepare messages for liteLLM litellm_messages = [ { @@ -586,10 +743,7 @@ class Glm4vConfig(AsyncAgentConfig): }, { "role": "user", - "content": [ - {"type": "text", "text": prompt}, - {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{last_image_b64}"}} - ] + "content": prompt_content } ] From 3866279b37645036675f02510323b76a3c3f20d8 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 12 Aug 2025 10:47:20 -0400 Subject: [PATCH 66/76] Docs rewrite... again --- README.md | 400 ++++++++++++++---------------------------------------- 1 file changed, 104 insertions(+), 296 deletions(-) diff --git a/README.md b/README.md index 8d799d17..b2410388 100644 --- a/README.md +++ b/README.md @@ -16,224 +16,138 @@ **cua** ("koo-ah") is Docker for [Computer-Use Agents](https://www.oneusefulthing.org/p/when-you-give-a-claude-a-mouse) - it enables AI agents to control full operating systems in virtual containers and deploy them locally or to the cloud.
- +
-
-Check out more demos of the Computer-Use Agent in action - -
-MCP Server: Work with Claude Desktop and Tableau -
-
- -
-
+With the Computer SDK, you can: +- automate Windows, Linux, and macOS VMs with a consistent, pyautogui-like API +- create & manage VMs [locally](https://docs.trycua.com/docs/computer-sdk/computers#cua-local-containers) or using the [c/ua cloud](https://www.trycua.com/) -
-AI-Gradio: Multi-app workflow with browser, VS Code and terminal -
-
- -
-
- -
-Notebook: Fix GitHub issue in Cursor -
-
- -
-
-

- -# 🚀 Quick Start with a Computer-Use Agent UI - -**Need to automate desktop tasks? Launch the Computer-Use Agent UI with a single command.** - -### Option 1: Fully-managed install with Docker (recommended) - -*Docker-based guided install for quick use* - -**macOS/Linux/Windows (via WSL):** - -```bash -# Requires Docker -/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/scripts/playground-docker.sh)" -``` - -This script will guide you through setup using Docker containers and launch the Computer-Use Agent UI. - ---- - -### Option 2: [Dev Container](./.devcontainer/README.md) - -*Best for contributors and development* - -This repository includes a [Dev Container](./.devcontainer/README.md) configuration that simplifies setup to a few steps: - -1. **Install the Dev Containers extension ([VS Code](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [WindSurf](https://docs.windsurf.com/windsurf/advanced#dev-containers-beta))** -2. **Open the repository in the Dev Container:** - - Press `Ctrl+Shift+P` (or `⌘+Shift+P` on macOS) - - Select `Dev Containers: Clone Repository in Container Volume...` and paste the repository URL: `https://github.com/trycua/cua.git` (if not cloned) or `Dev Containers: Open Folder in Container...` (if git cloned). - > **Note**: On WindSurf, the post install hook might not run automatically. If so, run `/bin/bash .devcontainer/post-install.sh` manually. -3. **Open the VS Code workspace:** Once the post-install.sh is done running, open the `.vscode/py.code-workspace` workspace and press ![Open Workspace](https://github.com/user-attachments/assets/923bdd43-8c8f-4060-8d78-75bfa302b48c) -. -4. **Run the Agent UI example:** Click ![Run Agent UI](https://github.com/user-attachments/assets/7a61ef34-4b22-4dab-9864-f86bf83e290b) - to start the Gradio UI. If prompted to install **debugpy (Python Debugger)** to enable remote debugging, select 'Yes' to proceed. -5. **Access the Gradio UI:** The Gradio UI will be available at `http://localhost:7860` and will automatically forward to your host machine. - ---- - -### Option 3: PyPI - -*Direct Python package installation* - -```bash -# conda create -yn cua python==3.12 - -pip install -U "cua-computer[all]" "cua-agent[all]" -python -m agent.ui # Start the agent UI -``` - -Or check out the [Usage Guide](#-usage-guide) to learn how to use our Python SDK in your own code. - ---- - -## Supported [Agent Loops](https://github.com/trycua/cua/blob/main/libs/python/agent/README.md#agent-loops) - -- [UITARS-1.5](https://github.com/trycua/cua/blob/main/libs/python/agent/README.md#agent-loops) - Run locally on Apple Silicon with MLX, or use cloud providers -- [OpenAI CUA](https://github.com/trycua/cua/blob/main/libs/python/agent/README.md#agent-loops) - Use OpenAI's Computer-Use Preview model -- [Anthropic CUA](https://github.com/trycua/cua/blob/main/libs/python/agent/README.md#agent-loops) - Use Anthropic's Computer-Use capabilities -- [OmniParser-v2.0](https://github.com/trycua/cua/blob/main/libs/python/agent/README.md#agent-loops) - Control UI with [Set-of-Marks prompting](https://som-gpt4v.github.io/) using any vision model - -## 🖥️ Compatibility - -For detailed compatibility information including host OS support, VM emulation capabilities, and model provider compatibility, see the [Compatibility Matrix](./COMPATIBILITY.md). +With the Agent SDK, you can: +- run computer-use models with a [consistent output](https://docs.trycua.com/docs/agent-sdk/chat-history#message-array-structure) +- run composed agents using UI grounding models and any liteLLM compatible vision model +- quickly evaluate new UI agent models and UI grounding models + - `anthropic/claude-opus-4-1-20250805` + - `openai/computer-use-preview` + - `openrouter/z-ai/glm-4.5v` + - `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` + - `omniparser+any LLM` + - `huggingface-local/HelloKKMe/GTA1-7B+any LLM` (using [Composed Agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents))
+ +# Quick Start + +- [Get started with a Computer-Use Agent UI](https://docs.trycua.com/docs/quickstart-ui) +- [Get started with the Computer-Use Agent CLI](https://docs.trycua.com/docs/quickstart-cli) +- [Get Started with the Python SDKs](https://docs.trycua.com/docs/quickstart-devs) +
-# 🐍 Usage Guide - -Follow these steps to use Cua in your own Python code. See [Developer Guide](./docs/Developer-Guide.md) for building from source. - -### Step 1: Install Lume CLI +# Usage ([Docs](https://docs.trycua.com/docs)) ```bash -/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" +pip install cua-agent[all] ``` - -Lume CLI manages high-performance macOS/Linux VMs with near-native speed on Apple Silicon. - -### Step 2: Pull the macOS CUA Image - -```bash -lume pull macos-sequoia-cua:latest -``` - -The macOS CUA image contains the default Mac apps and the Computer Server for easy automation. - -### Step 3: Install Python SDK - -```bash -pip install "cua-computer[all]" "cua-agent[all]" -``` - -### Step 4: Use in Your Code - ```python -import asyncio -from computer import Computer from agent import ComputerAgent -async def main(): - # Start a local macOS VM - computer = Computer(os_type="macos") - await computer.run() +agent = ComputerAgent( + model="anthropic/claude-3-5-sonnet-20241022", + tools=[computer], + max_trajectory_budget=5.0 +) - # Or with Cua Cloud Container - computer = Computer( - os_type="linux", - api_key="your_cua_api_key_here", - name="your_container_name_here" - ) +messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}] - # Example: Direct control of a macOS VM with Computer - computer.interface.delay = 0.1 # Wait 0.1 seconds between kb/m actions - await computer.interface.left_click(100, 200) - await computer.interface.type_text("Hello, world!") - screenshot_bytes = await computer.interface.screenshot() - - # Example: Create and run an agent locally using mlx-community/UI-TARS-1.5-7B-6bit - agent = ComputerAgent( - model="mlx/mlx-community/UI-TARS-1.5-7B-6bit", - tools=[computer], - ) - async for result in agent.run("Find the trycua/cua repository on GitHub and follow the quick start guide"): - print(result) - -if __name__ == "__main__": - asyncio.run(main()) +async for result in agent.run(messages): + for item in result["output"]: + if item["type"] == "message": + print(item["content"][0]["text"]) ``` -For ready-to-use examples, check out our [Notebooks](./notebooks/) collection. - -### Lume CLI Reference - -```bash -# Install Lume CLI and background service -curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash - -# List all VMs -lume ls - -# Pull a VM image -lume pull macos-sequoia-cua:latest - -# Create a new VM -lume create my-vm --os macos --cpu 4 --memory 8GB --disk-size 50GB - -# Run a VM (creates and starts if it doesn't exist) -lume run macos-sequoia-cua:latest - -# Stop a VM -lume stop macos-sequoia-cua_latest - -# Delete a VM -lume delete macos-sequoia-cua_latest +### Output format (OpenAI Agent Responses Format): +```json +{ + "output": [ + # user input + { + "role": "user", + "content": "go to trycua on gh" + }, + # first agent turn adds the model output to the history + { + "summary": [ + { + "text": "Searching Firefox for Trycua GitHub", + "type": "summary_text" + } + ], + "type": "reasoning" + }, + { + "action": { + "text": "Trycua GitHub", + "type": "type" + }, + "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq", + "status": "completed", + "type": "computer_call" + }, + # second agent turn adds the computer output to the history + { + "type": "computer_call_output", + "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq", + "output": { + "type": "input_image", + "image_url": "data:image/png;base64,..." + } + }, + # final agent turn adds the agent output text to the history + { + "type": "message", + "role": "assistant", + "content": [ + { + "text": "Success! The Trycua GitHub page has been opened.", + "type": "output_text" + } + ] + } + ], + "usage": { + "prompt_tokens": 150, + "completion_tokens": 75, + "total_tokens": 225, + "response_cost": 0.01, + } +} ``` -### Lumier CLI Reference +# Computer ([Docs](https://docs.trycua.com/docs/computer-sdk/computers)) +```python +from computer import Computer -For advanced container-like virtualization, check out [Lumier](./libs/lumier/README.md) - a Docker interface for macOS and Linux VMs. +async with Computer( + os_type="linux", + provider_type="cloud", + name="your-container-name", + api_key="your-api-key" +) as computer: + # Take screenshot + screenshot = await computer.interface.screenshot() -```bash -# Install Lume CLI and background service -curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash - -# Run macOS in a Docker container -docker run -it --rm \ - --name lumier-vm \ - -p 8006:8006 \ - -v $(pwd)/storage:/storage \ - -v $(pwd)/shared:/shared \ - -e VM_NAME=lumier-vm \ - -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \ - -e CPU_CORES=4 \ - -e RAM_SIZE=8192 \ - -e HOST_STORAGE_PATH=$(pwd)/storage \ - -e HOST_SHARED_PATH=$(pwd)/shared \ - trycua/lumier:latest + # Click and type + await computer.interface.left_click(100, 100) + await computer.interface.type("Hello!") ``` -## Resources +# Resources - [How to use the MCP Server with Claude Desktop or other MCP clients](./libs/python/mcp-server/README.md) - One of the easiest ways to get started with Cua - [How to use OpenAI Computer-Use, Anthropic, OmniParser, or UI-TARS for your Computer-Use Agent](./libs/python/agent/README.md) - [How to use Lume CLI for managing desktops](./libs/lume/README.md) - [Training Computer-Use Models: Collecting Human Trajectories with Cua (Part 1)](https://www.trycua.com/blog/training-computer-use-models-trajectories-1) -- [Build Your Own Operator on macOS (Part 1)](https://www.trycua.com/blog/build-your-own-operator-on-macos-1) ## Modules @@ -250,112 +164,6 @@ docker run -it --rm \ | [**Core (Python)**](./libs/python/core/README.md) | Python Core utilities | `pip install cua-core` | | [**Core (Typescript)**](./libs/typescript/core/README.md) | Typescript Core utilities | `npm install @trycua/core` | -## Computer Interface Reference - -For complete examples, see [computer_examples.py](./examples/computer_examples.py) or [computer_nb.ipynb](./notebooks/computer_nb.ipynb) - -```python -# Shell Actions -result = await computer.interface.run_command(cmd) # Run shell command -# result.stdout, result.stderr, result.returncode - -# Mouse Actions -await computer.interface.left_click(x, y) # Left click at coordinates -await computer.interface.right_click(x, y) # Right click at coordinates -await computer.interface.double_click(x, y) # Double click at coordinates -await computer.interface.move_cursor(x, y) # Move cursor to coordinates -await computer.interface.drag_to(x, y, duration) # Drag to coordinates -await computer.interface.get_cursor_position() # Get current cursor position -await computer.interface.mouse_down(x, y, button="left") # Press and hold a mouse button -await computer.interface.mouse_up(x, y, button="left") # Release a mouse button - -# Keyboard Actions -await computer.interface.type_text("Hello") # Type text -await computer.interface.press_key("enter") # Press a single key -await computer.interface.hotkey("command", "c") # Press key combination -await computer.interface.key_down("command") # Press and hold a key -await computer.interface.key_up("command") # Release a key - -# Scrolling Actions -await computer.interface.scroll(x, y) # Scroll the mouse wheel -await computer.interface.scroll_down(clicks) # Scroll down -await computer.interface.scroll_up(clicks) # Scroll up - -# Screen Actions -await computer.interface.screenshot() # Take a screenshot -await computer.interface.get_screen_size() # Get screen dimensions - -# Clipboard Actions -await computer.interface.set_clipboard(text) # Set clipboard content -await computer.interface.copy_to_clipboard() # Get clipboard content - -# File System Operations -await computer.interface.file_exists(path) # Check if file exists -await computer.interface.directory_exists(path) # Check if directory exists -await computer.interface.read_text(path, encoding="utf-8") # Read file content -await computer.interface.write_text(path, content, encoding="utf-8") # Write file content -await computer.interface.read_bytes(path) # Read file content as bytes -await computer.interface.write_bytes(path, content) # Write file content as bytes -await computer.interface.delete_file(path) # Delete file -await computer.interface.create_dir(path) # Create directory -await computer.interface.delete_dir(path) # Delete directory -await computer.interface.list_dir(path) # List directory contents - -# Accessibility -await computer.interface.get_accessibility_tree() # Get accessibility tree - -# Delay Configuration -# Set default delay between all actions (in seconds) -computer.interface.delay = 0.5 # 500ms delay between actions - -# Or specify delay for individual actions -await computer.interface.left_click(x, y, delay=1.0) # 1 second delay after click -await computer.interface.type_text("Hello", delay=0.2) # 200ms delay after typing -await computer.interface.press_key("enter", delay=0.5) # 500ms delay after key press - -# Python Virtual Environment Operations -await computer.venv_install("demo_venv", ["requests", "macos-pyxa"]) # Install packages in a virtual environment -await computer.venv_cmd("demo_venv", "python -c 'import requests; print(requests.get(`https://httpbin.org/ip`).json())'") # Run a shell command in a virtual environment -await computer.venv_exec("demo_venv", python_function_or_code, *args, **kwargs) # Run a Python function in a virtual environment and return the result / raise an exception - -# Example: Use sandboxed functions to execute code in a Cua Container -from computer.helpers import sandboxed - -@sandboxed("demo_venv") -def greet_and_print(name): - """Get the HTML of the current Safari tab""" - import PyXA - safari = PyXA.Application("Safari") - html = safari.current_document.source() - print(f"Hello from inside the container, {name}!") - return {"greeted": name, "safari_html": html} - -# When a @sandboxed function is called, it will execute in the container -result = await greet_and_print("Cua") -# Result: {"greeted": "Cua", "safari_html": "..."} -# stdout and stderr are also captured and printed / raised -print("Result from sandboxed function:", result) -``` - -## ComputerAgent Reference - -For complete examples, see [agent_examples.py](./examples/agent_examples.py) or [agent_nb.ipynb](./notebooks/agent_nb.ipynb) - -```python -# Import necessary components -from agent import ComputerAgent - -# UI-TARS-1.5 agent for local execution with MLX -ComputerAgent(model="mlx/mlx-community/UI-TARS-1.5-7B-6bit") -# OpenAI Computer-Use agent using OPENAI_API_KEY -ComputerAgent(model="computer-use-preview") -# Anthropic Claude agent using ANTHROPIC_API_KEY -ComputerAgent(model="anthropic/claude-3-5-sonnet-20240620") - -# OmniParser loop for UI control using Set-of-Marks (SOM) prompting and any vision LLM -ComputerAgent(model="omniparser+ollama_chat/gemma3:12b-it-q4_K_M") -``` - ## Community Join our [Discord community](https://discord.com/invite/mVnXXpdE85) to discuss ideas, get assistance, or share your demos! From 3cf818aa1ba09674725f4b88318bfed34f05345e Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 12 Aug 2025 10:50:49 -0400 Subject: [PATCH 67/76] Added feature link --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b2410388..b5bd5f41 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,8 @@ With the Computer SDK, you can: With the Agent SDK, you can: - run computer-use models with a [consistent output](https://docs.trycua.com/docs/agent-sdk/chat-history#message-array-structure) -- run composed agents using UI grounding models and any liteLLM compatible vision model +- run composed agents using UI grounding models and any LLM +- use any liteLLM provider (`openai/`, `openrouter/`, etc.) or our included local providers (`huggingface-local/`, `mlx/`) - quickly evaluate new UI agent models and UI grounding models - `anthropic/claude-opus-4-1-20250805` - `openai/computer-use-preview` @@ -34,6 +35,8 @@ With the Agent SDK, you can: - `omniparser+any LLM` - `huggingface-local/HelloKKMe/GTA1-7B+any LLM` (using [Composed Agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents)) +Missing a model? [Raise a feature request](https://github.com/trycua/cua/issues/new?assignees=&labels=enhancement&projects=&title=%5BAgent%5D%3A+Add+model+support+for+) or [contribute](https://github.com/trycua/cua/blob/main/CONTRIBUTING.md)! +
# Quick Start From 85cdaceca4e0566805615c02718c7529df1945ea Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 12 Aug 2025 10:53:50 -0400 Subject: [PATCH 68/76] Updated docs --- docs/content/docs/agent-sdk/agent-loops.mdx | 9 +-------- .../supported-agents/computer-use-agents.mdx | 13 +++++++++++++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/docs/content/docs/agent-sdk/agent-loops.mdx b/docs/content/docs/agent-sdk/agent-loops.mdx index bc26cf26..0be4e009 100644 --- a/docs/content/docs/agent-sdk/agent-loops.mdx +++ b/docs/content/docs/agent-sdk/agent-loops.mdx @@ -29,11 +29,4 @@ async for result in agent.run(prompt): print("Agent:", result["output"][-1]["content"][0]["text"]) ``` -We currently support 4 computer-using agent loops: - -- Anthropic CUAs -- OpenAI CUA Preview -- UI-TARS 1.5 -- Omniparser + LLMs - -For a full list of supported models and configurations, see the [Supported Agents](./supported-agents) page. +For a list of supported models and configurations, see the [Supported Agents](./supported-agents/computer-use-agents) page. diff --git a/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx b/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx index 55b868b6..7aeab043 100644 --- a/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx +++ b/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx @@ -49,6 +49,19 @@ async for _ in agent.run("Open the settings menu and change the theme to dark mo pass ``` +## GLM-4.5V + +Zhipu AI's GLM-4.5V vision-language model with computer-use capabilities: + +- `openrouter/z-ai/glm-4.5v` +- `huggingface-local/zai-org/GLM-4.5V` + +```python +agent = ComputerAgent("openrouter/z-ai/glm-4.5v", tools=[computer]) +async for _ in agent.run("Click on the search bar and type 'hello world'"): + pass +``` + --- For details on agent loop behavior and usage, see [Agent Loops](../agent-loops). From a42fd9f964d9fe9a8b4f0f960d74bea03bf01622 Mon Sep 17 00:00:00 2001 From: ddupont <3820588+ddupont808@users.noreply.github.com> Date: Tue, 12 Aug 2025 11:27:00 -0400 Subject: [PATCH 69/76] Link to computer SDK reference --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b5bd5f41..0b05550b 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ With the Computer SDK, you can: -- automate Windows, Linux, and macOS VMs with a consistent, pyautogui-like API +- automate Windows, Linux, and macOS VMs with a consistent, [pyautogui-like API](https://docs.trycua.com/docs/libraries/computer#interface-actions) - create & manage VMs [locally](https://docs.trycua.com/docs/computer-sdk/computers#cua-local-containers) or using the [c/ua cloud](https://www.trycua.com/) With the Agent SDK, you can: @@ -221,4 +221,4 @@ Thank you to all our supporters! - \ No newline at end of file + From 609859c46805c0827cb5c1d100a0ecb61d4bc4a7 Mon Sep 17 00:00:00 2001 From: ddupont <3820588+ddupont808@users.noreply.github.com> Date: Tue, 12 Aug 2025 11:28:19 -0400 Subject: [PATCH 70/76] Add cua-computer pip install --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 0b05550b..fcac4a75 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,10 @@ async for result in agent.run(messages): ``` # Computer ([Docs](https://docs.trycua.com/docs/computer-sdk/computers)) + +```bash +pip install cua-computer[all] +``` ```python from computer import Computer From a60cf26bb8ba153823d44c226a9daaca7473930c Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 12 Aug 2025 12:00:35 -0400 Subject: [PATCH 71/76] Added last run --- notebooks/eval_osworld.ipynb | 109598 +++++++++++++++++++++++++++++++- 1 file changed, 109209 insertions(+), 389 deletions(-) diff --git a/notebooks/eval_osworld.ipynb b/notebooks/eval_osworld.ipynb index a287022c..7b00795a 100644 --- a/notebooks/eval_osworld.ipynb +++ b/notebooks/eval_osworld.ipynb @@ -768,189 +768,262 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " 0%|----------------------------------------| 0/200 [1:24 output.csv\\nlibreoffice --calc output.csv\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'cd ~/Desktop\\nlibreoffice --headless --convert-to csv file1.xlsx\\nlibreoffice --headless --convert-to csv file2.ods\\ncat file1.csv file2.csv > output.csv\\nlibreoffice --calc output.csv\\n'})\n", + "2025-08-11 15:32:49,711 - agent.ComputerAgent - INFO - Computer: click({'x': 694, 'y': 248})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 694, 'y': 248})\n", + " 2%|----------------------------------------| 155/7340 [6:28<300:27, 23.9 steps/min]2025-08-11 15:32:50,329 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m15:32:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:32:51,007 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m15:32:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 2%|----------------------------------------| 157/7340 [6:30<297:33, 24.1 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:32:51,672 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m15:32:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:32:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:32:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 2%|----------------------------------------| 157/7340 [6:32<299:06, 24.0 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:32:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:32:54,187 - agent.ComputerAgent - INFO - Computer: click({'x': 463, 'y': 136})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 463, 'y': 136})\n", + " 2%|----------------------------------------| 157/7340 [6:33<299:58, 23.9 steps/min]\u001b[92m15:32:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:32:54,845 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 103, 'y': 380})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 103, 'y': 380})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/edaeedb6-9993-4b6f-b226-19e2768a5736/invoke \"HTTP/1.1 200 OK\"\n", + " 2%|----------------------------------------| 158/7340 [6:34<298:46, 24.0 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a91cea7-3ffe-41c2-9405-1151904aee0c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e7ac3560-cea1-4b97-a59c-4b3038bec6c7/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:32:55,978 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m15:32:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 2%|----------------------------------------| 159/7340 [6:35<297:36, 24.1 steps/min]2025-08-11 15:32:56,643 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m15:32:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:32:57,305 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m15:32:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 2%|----------------------------------------| 159/7340 [6:36<298:28, 24.1 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 2%|----------------------------------------| 159/7340 [6:37<299:14, 24.0 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0b1cfd32-0cbc-48e7-890d-9ec0ac043035/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:32:59,978 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m15:33:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/77f486b6-dc2a-4a1d-bf54-fc05f9a8c3d7/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 2%|----------------------------------------| 159/7340 [6:39<300:32, 23.9 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:01,288 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:33:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/835128b8-2a29-46f4-853f-4d70bb46a9d6/invoke \"HTTP/1.1 200 OK\"\n", + " 2%|----------------------------------------| 159/7340 [6:41<302:27, 23.7 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:03,240 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:33:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:33:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:03,930 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 390, 'y': 345})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 390, 'y': 345})\n", + " 2%|----------------------------------------| 159/7340 [6:43<303:26, 23.7 steps/min]\u001b[92m15:33:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:04,616 - agent.ComputerAgent - INFO - Computer: click({'x': 101, 'y': 295})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 101, 'y': 295})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:06,000 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + "2025-08-11 15:33:06,671 - agent.ComputerAgent - INFO - Computer: click({'x': 219, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 219, 'y': 53})\n", + " 2%|----------------------------------------| 160/7340 [6:45<303:33, 23.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:33:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:08,650 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m15:33:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:33:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 2%|----------------------------------------| 162/7340 [6:48<301:44, 23.8 steps/min]\u001b[92m15:33:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:09,987 - agent.ComputerAgent - INFO - Computer: click({'x': 812, 'y': 189})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 812, 'y': 189})\n", + "\u001b[92m15:33:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:33:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:11,307 - agent.ComputerAgent - INFO - Computer: click({'x': 102, 'y': 238})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 102, 'y': 238})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:12,659 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 2%|----------------------------------------| 162/7340 [6:52<304:40, 23.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:14,031 - agent.ComputerAgent - INFO - Computer: click({'x': 309, 'y': 116})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 309, 'y': 116})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:15,326 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + "2025-08-11 15:33:15,962 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m15:33:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:33:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:33:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + " 2%|----------------------------------------| 164/7340 [6:55<303:17, 23.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:17,260 - agent.ComputerAgent - INFO - Computer: click({'x': 652, 'y': 139})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 652, 'y': 139})\n", + "2025-08-11 15:33:17,929 - agent.ComputerAgent - INFO - Computer: click({'x': 212, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 212, 'y': 53})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:33:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:33:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:20,602 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+alt+t'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+alt+t'})\n", + " 2%|----------------------------------------| 166/7340 [6:59<302:22, 23.7 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:21,282 - agent.ComputerAgent - INFO - Computer: click({'x': 371, 'y': 624})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 371, 'y': 624})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:22,607 - agent.ComputerAgent - INFO - Computer: type({'text': '100'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '100'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8d197f4f-b7b0-4196-9681-135d7bc3a45b/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:33:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e9d83ed4-d6d0-46f7-982b-98433769e30b/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:33:23,270 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + " 2%|----------------------------------------| 168/7340 [7:02<300:36, 23.9 steps/min]\u001b[92m15:33:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:33:23,919 - agent.ComputerAgent - INFO - Computer: click({'x': 414, 'y': 75})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 414, 'y': 75})\n", + "2025-08-11 15:33:24,594 - agent.ComputerAgent - INFO - Computer: double_click({'x': 473, 'y': 93})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 473, 'y': 93})\n", + " 2%|----------------------------------------| 170/7340 [7:03<297:55, 24.1 steps/min]2025-08-11 15:33:25,220 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m15:33:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ae9871c0-5cb9-4c5b-9c02-c899819f9f81/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:33:25,919 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m15:33:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 2%|----------------------------------------| 172/7340 [7:05<295:18, 24.3 steps/min]2025-08-11 15:33:26,562 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m15:33:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:33:27,860 - agent.ComputerAgent - INFO - Computer: type({'text': 'edited_colorful.png'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'edited_colorful.png'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a6ee00b-4e8c-4a3f-bac1-9baec4d920a2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e1e61614-8290-4d90-9feb-594d2a7199e8/invoke \"HTTP/1.1 200 OK\"\n", + " 2%|----------------------------------------| 172/7340 [7:07<297:07, 24.1 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:33:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:29,689 - agent.ComputerAgent - INFO - Computer: click({'x': 693, 'y': 130})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 693, 'y': 130})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/77f486b6-dc2a-4a1d-bf54-fc05f9a8c3d7/invoke \"HTTP/1.1 200 OK\"\n", + " 2%|----------------------------------------| 173/7340 [7:08<296:07, 24.2 steps/min]2025-08-11 15:33:30,343 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m15:33:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cfefeec4-603f-4657-b0fe-7a641734693c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/982f8f16-b578-409f-8388-d8d5ee68ccee/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:33:31,382 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m15:33:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2d349f43-6c63-4144-9bd3-bbd16183b16d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/69393c41-bcaa-4752-9a82-e3b105fae459/invoke \"HTTP/1.1 200 OK\"\n", + " 2%|----------------------------------------| 174/7340 [7:10<295:35, 24.2 steps/min]2025-08-11 15:33:32,020 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m15:33:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a91cea7-3ffe-41c2-9405-1151904aee0c/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:33:32,699 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m15:33:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 2%|----------------------------------------| 174/7340 [7:11<296:28, 24.2 steps/min]2025-08-11 15:33:33,362 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:33:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/0a6ee00b-4e8c-4a3f-bac1-9baec4d920a2/reset \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:33:34,058 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m15:33:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5a854981-aa94-433f-9381-2964f1117035/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/edaeedb6-9993-4b6f-b226-19e2768a5736/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e7ac3560-cea1-4b97-a59c-4b3038bec6c7/invoke \"HTTP/1.1 200 OK\"\n", + " 2%|----------------------------------------| 174/7340 [7:13<297:25, 24.1 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0b1cfd32-0cbc-48e7-890d-9ec0ac043035/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:33:34,700 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:33:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:33:35,380 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m15:33:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 2%|----------------------------------------| 174/7340 [7:14<298:20, 24.0 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/835128b8-2a29-46f4-853f-4d70bb46a9d6/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:33:36,059 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m15:33:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:33:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/49f1eefe-9bc4-430c-a6c8-83675960a057/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a6ee00b-4e8c-4a3f-bac1-9baec4d920a2/invoke \"HTTP/1.1 200 OK\"\n", + " 2%|----------------------------------------| 174/7340 [7:16<299:43, 23.9 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:38,043 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m15:33:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:33:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:39,360 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 285})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 285})\n", + "\u001b[92m15:33:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 2%|----------------------------------------| 174/7340 [7:18<301:01, 23.8 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:33:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:40,000 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:33:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:33:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:40,660 - agent.ComputerAgent - INFO - Computer: click({'x': 20, 'y': 139})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 20, 'y': 139})\n", + "\u001b[92m15:33:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:42,006 - agent.ComputerAgent - INFO - Computer: type({'text': 'clear cookies on exit'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'clear cookies on exit'})\n", + " 2%|----------------------------------------| 175/7340 [7:21<301:04, 23.8 steps/min]2025-08-11 15:33:42,645 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:33:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:33:43,335 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 46, 'y': 166}, {'x': 386, 'y': 356}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 46, 'y': 166}, {'x': 386, 'y': 356}]})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 2%|----------------------------------------| 177/7340 [7:23<298:59, 24.0 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:33:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:33:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:45,327 - agent.ComputerAgent - INFO - Computer: double_click({'x': 244, 'y': 155})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 244, 'y': 155})\n", + "\u001b[92m15:33:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:46,591 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 15:33:46,592 - agent.ComputerAgent - INFO - Agent: Opening the desktop and launching GIMP to convert the image to SVG.\n", + "INFO:agent.ComputerAgent:Agent: Opening the desktop and launching GIMP to convert the image to SVG.\n", + "2025-08-11 15:33:46,593 - agent.ComputerAgent - INFO - Computer: screenshot({})\n", + "INFO:agent.ComputerAgent:Computer: screenshot({})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 2%|----------------------------------------| 178/7340 [7:26<299:25, 23.9 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:47,979 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': -525, 'scroll_x': 0, 'x': 126, 'y': 419})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': -525, 'scroll_x': 0, 'x': 126, 'y': 419})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:48,644 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m15:33:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 2%|----------------------------------------| 180/7340 [7:27<296:56, 24.1 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:33:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:49,323 - agent.ComputerAgent - INFO - Computer: click({'x': 249, 'y': 81})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 249, 'y': 81})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:33:51,332 - agent.ComputerAgent - INFO - Computer: type({'text': 'focus editor'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'focus editor'})\n", + " 2%|----------------------------------------| 181/7340 [7:30<296:59, 24.1 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:33:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:33:52,512 - agent.ComputerAgent - INFO - Computer: click({'x': 416, 'y': 74})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 416, 'y': 74})\n", + " 2%|----------------------------------------| 183/7340 [7:31<294:26, 24.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a6ee00b-4e8c-4a3f-bac1-9baec4d920a2/invoke \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No screenshot found, taking screenshot\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-11 15:33:53,159 - agent.ComputerAgent - INFO - LLM processing started with 7 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 7 messages\n", + "\u001b[92m15:33:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3d9da005-d40d-4335-86ec-275c2ec5665b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:33:53,831 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m15:33:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:33:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 184/7340 [7:33<294:07, 24.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/77f486b6-dc2a-4a1d-bf54-fc05f9a8c3d7/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:33:55,575 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m15:33:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:33:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8d197f4f-b7b0-4196-9681-135d7bc3a45b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/69393c41-bcaa-4752-9a82-e3b105fae459/invoke \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 184/7340 [7:34<294:47, 24.3 steps/min]2025-08-11 15:33:56,223 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 336, 'y': 493})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 336, 'y': 493})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/982f8f16-b578-409f-8388-d8d5ee68ccee/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:33:56,861 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:33:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cfefeec4-603f-4657-b0fe-7a641734693c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e9d83ed4-d6d0-46f7-982b-98433769e30b/invoke \"HTTP/1.1 200 OK\"\n", + "ERROR:asyncio:Unclosed client session\n", + "client_session: \n", + " 3%|█---------------------------------------| 184/7340 [7:36<295:51, 24.2 steps/min]2025-08-11 15:33:58,012 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m15:33:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/49f1eefe-9bc4-430c-a6c8-83675960a057/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:33:58,652 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m15:33:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 185/7340 [7:37<295:08, 24.2 steps/min]2025-08-11 15:33:59,334 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m15:33:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:33:59,993 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:34:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 185/7340 [7:39<296:01, 24.2 steps/min]2025-08-11 15:34:01,015 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m15:34:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 185/7340 [7:40<297:07, 24.1 steps/min]\u001b[92m15:34:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e7ac3560-cea1-4b97-a59c-4b3038bec6c7/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:02,373 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:34:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:34:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:34:03,423 - agent.ComputerAgent - INFO - Computer: click({'x': 692, 'y': 624})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 692, 'y': 624})\n", + " 3%|█---------------------------------------| 185/7340 [7:42<298:12, 24.0 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 186/7340 [7:44<297:45, 24.0 steps/min]\u001b[92m15:34:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5a854981-aa94-433f-9381-2964f1117035/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:34:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:07,220 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'meta'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'meta'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/a74f1790-a107-43c9-8389-0a50a5192c5f/reset \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:08,609 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 550, 'y': 627})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 550, 'y': 627})\n", + "\u001b[92m15:34:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 186/7340 [7:49<300:46, 23.8 steps/min]\u001b[92m15:34:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:10,587 - agent.ComputerAgent - INFO - Computer: click({'x': 515, 'y': 457})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 515, 'y': 457})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a91cea7-3ffe-41c2-9405-1151904aee0c/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:11,253 - agent.ComputerAgent - INFO - Computer: click({'x': 905, 'y': 50})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 905, 'y': 50})\n", + "\u001b[92m15:34:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:34:11,905 - agent.ComputerAgent - INFO - Computer: click({'x': 476, 'y': 169})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 476, 'y': 169})\n", + " 3%|█---------------------------------------| 188/7340 [7:51<298:42, 23.9 steps/min]2025-08-11 15:34:12,560 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': -659, 'x': 18, 'y': 13})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_x': 0, 'scroll_y': -659, 'x': 18, 'y': 13})\n", + "\u001b[92m15:34:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:34:13,202 - agent.ComputerAgent - INFO - Computer: click({'x': 19, 'y': 44})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 19, 'y': 44})\n", + " 3%|█---------------------------------------| 191/7340 [7:52<294:43, 24.3 steps/min]2025-08-11 15:34:13,860 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m15:34:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a6ee00b-4e8c-4a3f-bac1-9baec4d920a2/invoke \"HTTP/1.1 502 Bad Gateway\"\n", + "2025-08-11 15:34:14,514 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m15:34:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 193/7340 [7:53<292:23, 24.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 193/7340 [7:55<293:37, 24.3 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:34:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:17,714 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': -575, 'scroll_x': 0, 'x': 90, 'y': 194})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': -575, 'scroll_x': 0, 'x': 90, 'y': 194})\n", + " 3%|█---------------------------------------| 193/7340 [7:56<294:20, 24.3 steps/min]\u001b[92m15:34:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:34:18,367 - agent.ComputerAgent - INFO - Computer: click({'x': 120, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 120, 'y': 53})\n", + "\u001b[92m15:34:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/982f8f16-b578-409f-8388-d8d5ee68ccee/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f1593044-fc61-4fc8-b29d-87e37914d5c2/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:34:19,040 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 430})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 430})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a74f1790-a107-43c9-8389-0a50a5192c5f/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0b1cfd32-0cbc-48e7-890d-9ec0ac043035/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/835128b8-2a29-46f4-853f-4d70bb46a9d6/invoke \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 194/7340 [7:58<293:36, 24.3 steps/min]2025-08-11 15:34:19,683 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m15:34:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:34:20,361 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m15:34:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2d349f43-6c63-4144-9bd3-bbd16183b16d/invoke \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 196/7340 [7:59<291:21, 24.5 steps/min]2025-08-11 15:34:21,003 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m15:34:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/edaeedb6-9993-4b6f-b226-19e2768a5736/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:34:21,660 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m15:34:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a6ee00b-4e8c-4a3f-bac1-9baec4d920a2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 196/7340 [8:01<292:33, 24.4 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:23,387 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m15:34:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:34:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 196/7340 [8:02<293:16, 24.4 steps/min]2025-08-11 15:34:24,065 - agent.ComputerAgent - INFO - Computer: click({'x': 414, 'y': 75})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 414, 'y': 75})\n", + "2025-08-11 15:34:24,731 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m15:34:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:34:25,787 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m15:34:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8d197f4f-b7b0-4196-9681-135d7bc3a45b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/77f486b6-dc2a-4a1d-bf54-fc05f9a8c3d7/invoke \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 197/7340 [8:06<294:10, 24.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:28,860 - agent.ComputerAgent - INFO - Computer: type({'text': ' active editor group'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': ' active editor group'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cfefeec4-603f-4657-b0fe-7a641734693c/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 197/7340 [8:08<294:56, 24.2 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:34:29,500 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m15:34:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:34:30,194 - agent.ComputerAgent - INFO - Computer: click({'x': 625, 'y': 427})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 625, 'y': 427})\n", + "2025-08-11 15:34:30,876 - agent.ComputerAgent - INFO - Computer: click({'x': 904, 'y': 558})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 904, 'y': 558})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 198/7340 [8:11<295:24, 24.2 steps/min]\u001b[92m15:34:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:34:32,811 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m15:34:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:34,147 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + "\u001b[92m15:34:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 200/7340 [8:13<293:33, 24.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:34:34,769 - agent.ComputerAgent - INFO - Computer: click({'x': 183, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 183, 'y': 53})\n", + "2025-08-11 15:34:35,412 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 15:34:35,413 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 428})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 428})\n", + "2025-08-11 15:34:36,077 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m15:34:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 200/7340 [8:16<295:30, 24.2 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:38,113 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m15:34:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:34:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/e1e61614-8290-4d90-9feb-594d2a7199e8/reset \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:34:40,185 - agent.ComputerAgent - INFO - LLM processing started with 9 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 9 messages\n", + "\u001b[92m15:34:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:34:40,912 - agent.ComputerAgent - INFO - Computer: move({'x': 230, 'y': 128})\n", + "INFO:agent.ComputerAgent:Computer: move({'x': 230, 'y': 128})\n", + "\u001b[92m15:34:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 202/7340 [8:20<294:32, 24.2 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:41,633 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 361, 'y': 549})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 361, 'y': 549})\n", + "\u001b[92m15:34:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:34:42,290 - agent.ComputerAgent - INFO - Computer: click({'x': 93, 'y': 184})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 93, 'y': 184})\n", + "\u001b[92m15:34:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 203/7340 [8:21<293:50, 24.3 steps/min]2025-08-11 15:34:42,946 - agent.ComputerAgent - INFO - Computer: click({'x': 17, 'y': 382})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 17, 'y': 382})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e7ac3560-cea1-4b97-a59c-4b3038bec6c7/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/49f1eefe-9bc4-430c-a6c8-83675960a057/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:34:43,638 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m15:34:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 205/7340 [8:23<292:06, 24.4 steps/min]\u001b[92m15:34:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:44,985 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m15:34:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:34:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:34:45,658 - agent.ComputerAgent - INFO - Computer: click({'x': 332, 'y': 92})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 332, 'y': 92})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3d9da005-d40d-4335-86ec-275c2ec5665b/invoke \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 206/7340 [8:24<291:23, 24.5 steps/min]2025-08-11 15:34:46,281 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m15:34:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e9d83ed4-d6d0-46f7-982b-98433769e30b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:34:47,314 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:34:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e1e61614-8290-4d90-9feb-594d2a7199e8/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 207/7340 [8:26<290:55, 24.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/982f8f16-b578-409f-8388-d8d5ee68ccee/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ae9871c0-5cb9-4c5b-9c02-c899819f9f81/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a74f1790-a107-43c9-8389-0a50a5192c5f/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:34:47,977 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m15:34:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/77f486b6-dc2a-4a1d-bf54-fc05f9a8c3d7/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:34:48,632 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m15:34:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/835128b8-2a29-46f4-853f-4d70bb46a9d6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 207/7340 [8:28<292:01, 24.4 steps/min]2025-08-11 15:34:49,913 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m15:34:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:34:50,584 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m15:34:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:51,263 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m15:34:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a91cea7-3ffe-41c2-9405-1151904aee0c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 207/7340 [8:31<293:58, 24.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:34:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:53,944 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 148, 'y': 105})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 148, 'y': 105})\n", + "\u001b[92m15:34:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0b1cfd32-0cbc-48e7-890d-9ec0ac043035/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 207/7340 [8:33<295:06, 24.2 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:55,256 - agent.ComputerAgent - INFO - Computer: click({'x': 18, 'y': 477})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 18, 'y': 477})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:34:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:34:57,289 - agent.ComputerAgent - INFO - Computer: type({'text': '100'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '100'})\n", + "2025-08-11 15:34:57,983 - agent.ComputerAgent - INFO - Computer: click({'x': 462, 'y': 133})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 462, 'y': 133})\n", + "\u001b[92m15:34:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:34:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 208/7340 [8:37<295:33, 24.1 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:34:58,660 - agent.ComputerAgent - INFO - Computer: click({'x': 308, 'y': 116})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 308, 'y': 116})\n", + "2025-08-11 15:34:59,285 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 15:34:59,286 - agent.ComputerAgent - INFO - Computer: click({'x': 387, 'y': 158})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 387, 'y': 158})\n", + "\u001b[92m15:34:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:35:00,674 - agent.ComputerAgent - INFO - Computer: click({'x': 640, 'y': 436, 'button': 'left'})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 640, 'y': 436, 'button': 'left'})\n", + " 3%|█---------------------------------------| 211/7340 [8:39<292:45, 24.4 steps/min]2025-08-11 15:35:01,337 - agent.ComputerAgent - INFO - Computer: click({'x': 420, 'y': 101})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 420, 'y': 101})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:35:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 214/7340 [8:41<289:39, 24.6 steps/min]\u001b[92m15:35:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:35:03,280 - agent.ComputerAgent - INFO - Computer: double_click({'x': 213, 'y': 117})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 213, 'y': 117})\n", + "\u001b[92m15:35:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:35:03,948 - agent.ComputerAgent - INFO - Computer: click({'x': 416, 'y': 75})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 416, 'y': 75})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 215/7340 [8:43<289:17, 24.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:35:05,212 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m15:35:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:35:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:35:05,863 - agent.ComputerAgent - INFO - Computer: click({'x': 610, 'y': 60})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 610, 'y': 60})\n", + " 3%|█---------------------------------------| 217/7340 [8:45<287:15, 24.8 steps/min]2025-08-11 15:35:06,527 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m15:35:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:07,204 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m15:35:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 218/7340 [8:47<287:00, 24.8 steps/min]\u001b[92m15:35:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:35:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8d197f4f-b7b0-4196-9681-135d7bc3a45b/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:35:09,599 - agent.ComputerAgent - INFO - Computer: click({'x': 385, 'y': 35})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 385, 'y': 35})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:35:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/69393c41-bcaa-4752-9a82-e3b105fae459/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 218/7340 [8:49<288:18, 24.7 steps/min]2025-08-11 15:35:10,889 - agent.ComputerAgent - INFO - Computer: click({'x': 237, 'y': 123})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 237, 'y': 123})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:35:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/edaeedb6-9993-4b6f-b226-19e2768a5736/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5a854981-aa94-433f-9381-2964f1117035/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e1e61614-8290-4d90-9feb-594d2a7199e8/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e7ac3560-cea1-4b97-a59c-4b3038bec6c7/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a6ee00b-4e8c-4a3f-bac1-9baec4d920a2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e9d83ed4-d6d0-46f7-982b-98433769e30b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2d349f43-6c63-4144-9bd3-bbd16183b16d/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/77f486b6-dc2a-4a1d-bf54-fc05f9a8c3d7/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 219/7340 [8:50<287:40, 24.8 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:35:12,208 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': -496, 'scroll_x': 0, 'x': 90, 'y': 219})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': -496, 'scroll_x': 0, 'x': 90, 'y': 219})\n", + "\u001b[92m15:35:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:35:12,847 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m15:35:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:13,519 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 141})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 141})\n", + "2025-08-11 15:35:14,161 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m15:35:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:14,807 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m15:35:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 220/7340 [8:54<288:03, 24.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a74f1790-a107-43c9-8389-0a50a5192c5f/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:35:15,833 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m15:35:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 222/7340 [8:55<285:57, 24.9 steps/min]2025-08-11 15:35:16,495 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m15:35:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:17,131 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m15:35:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 222/7340 [8:56<286:37, 24.8 steps/min]2025-08-11 15:35:17,814 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m15:35:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:18,873 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m15:35:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:19,539 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:35:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 222/7340 [8:58<287:54, 24.7 steps/min]2025-08-11 15:35:20,204 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m15:35:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:20,855 - agent.ComputerAgent - INFO - LLM processing started with 11 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 11 messages\n", + "\u001b[92m15:35:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 222/7340 [9:00<288:36, 24.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ae9871c0-5cb9-4c5b-9c02-c899819f9f81/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:35:22,016 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m15:35:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f1593044-fc61-4fc8-b29d-87e37914d5c2/invoke \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 222/7340 [9:01<289:13, 24.6 steps/min]2025-08-11 15:35:22,704 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m15:35:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3d9da005-d40d-4335-86ec-275c2ec5665b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:23,375 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:35:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 222/7340 [9:02<289:57, 24.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cfefeec4-603f-4657-b0fe-7a641734693c/invoke \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 222/7340 [9:03<290:29, 24.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:35:25,703 - agent.ComputerAgent - INFO - Computer: type({'text': 'delete browsing data on exit'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'delete browsing data on exit'})\n", + " 3%|█---------------------------------------| 222/7340 [9:04<291:11, 24.4 steps/min]2025-08-11 15:35:26,361 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m15:35:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 223/7340 [9:07<291:14, 24.4 steps/min]\u001b[92m15:35:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:35:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:35:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:35:29,669 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 585, 'y': 449})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 585, 'y': 449})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 3%|█---------------------------------------| 223/7340 [9:09<292:18, 24.3 steps/min]\u001b[92m15:35:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:35:30,981 - agent.ComputerAgent - INFO - Computer: click({'x': 586, 'y': 134})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 586, 'y': 134})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:35:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:35:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:35:32,967 - agent.ComputerAgent - INFO - Computer: double_click({'x': 244, 'y': 154})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 244, 'y': 154})\n", + "\u001b[92m15:35:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:35:34,310 - agent.ComputerAgent - INFO - Agent: I enhanced the color vibrancy of your photo and exported it as edited_colorful.png to your Desktop.\n", + "\n", + "Task completed\n", + "INFO:agent.ComputerAgent:Agent: I enhanced the color vibrancy of your photo and exported it as edited_colorful.png to your Desktop.\n", + "\n", + "Task completed\n", + "2025-08-11 15:35:34,935 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 288\n", + " - prompt_tokens: 10800\n", + " - total_tokens: 11088\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 256\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0164\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 288\n", + " - prompt_tokens: 10800\n", + " - total_tokens: 11088\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 256\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0164\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/69393c41-bcaa-4752-9a82-e3b105fae459/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 225/7340 [9:14<292:25, 24.3 steps/min]2025-08-11 15:35:36,296 - agent.ComputerAgent - INFO - Computer: click({'x': 332, 'y': 105})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 332, 'y': 105})\n", + "2025-08-11 15:35:36,947 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 650, 'x': 261, 'y': 230})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 650, 'x': 261, 'y': 230})\n", + "\u001b[92m15:35:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:35:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:35:38,290 - agent.ComputerAgent - INFO - Computer: click({'x': 955, 'y': 130})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 955, 'y': 130})\n", + "2025-08-11 15:35:38,926 - agent.ComputerAgent - INFO - Computer: click({'x': 414, 'y': 75})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 414, 'y': 75})\n", + "\u001b[92m15:35:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 228/7340 [9:18<290:09, 24.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:35:39,569 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m15:35:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:40,246 - agent.ComputerAgent - INFO - Computer: click({'x': 16, 'y': 478})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 16, 'y': 478})\n", + "\u001b[92m15:35:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 231/7340 [9:20<287:30, 24.7 steps/min]2025-08-11 15:35:41,893 - agent.ComputerAgent - INFO - Computer: click({'x': 183, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 183, 'y': 53})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 3%|█---------------------------------------| 232/7340 [9:21<286:44, 24.8 steps/min]\u001b[92m15:35:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:35:43,077 - agent.ComputerAgent - INFO - Computer: click({'x': 506, 'y': 190})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 506, 'y': 190})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/835128b8-2a29-46f4-853f-4d70bb46a9d6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a74f1790-a107-43c9-8389-0a50a5192c5f/invoke \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 233/7340 [9:22<286:10, 24.8 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No screenshot found, taking screenshot\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-11 15:35:44,347 - agent.ComputerAgent - INFO - LLM processing started with 11 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 11 messages\n", + "\u001b[92m15:35:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:35:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:35:45,032 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 547, 'scroll_x': 0, 'x': 125, 'y': 629})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 547, 'scroll_x': 0, 'x': 125, 'y': 629})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 15:35:46,313 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + " 3%|█---------------------------------------| 234/7340 [9:25<286:13, 24.8 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8d197f4f-b7b0-4196-9681-135d7bc3a45b/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:35:47,613 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m15:35:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:35:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0b1cfd32-0cbc-48e7-890d-9ec0ac043035/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e9d83ed4-d6d0-46f7-982b-98433769e30b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/49f1eefe-9bc4-430c-a6c8-83675960a057/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e7ac3560-cea1-4b97-a59c-4b3038bec6c7/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a6ee00b-4e8c-4a3f-bac1-9baec4d920a2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e1e61614-8290-4d90-9feb-594d2a7199e8/invoke \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 236/7340 [9:26<284:25, 25.0 steps/min]2025-08-11 15:35:48,312 - agent.ComputerAgent - INFO - Computer: click({'x': 877, 'y': 537})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 877, 'y': 537})\n", + "2025-08-11 15:35:49,338 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m15:35:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:50,012 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m15:35:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 236/7340 [9:29<285:36, 24.9 steps/min]2025-08-11 15:35:50,692 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m15:35:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:51,345 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m15:35:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a74f1790-a107-43c9-8389-0a50a5192c5f/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:35:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 237/7340 [9:31<285:21, 24.9 steps/min]2025-08-11 15:35:52,714 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m15:35:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:53,753 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 13 messages\n", + "\u001b[92m15:35:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:54,426 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 13 messages\n", + "\u001b[92m15:35:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/982f8f16-b578-409f-8388-d8d5ee68ccee/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/77f486b6-dc2a-4a1d-bf54-fc05f9a8c3d7/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 3%|█---------------------------------------| 237/7340 [9:34<286:52, 24.8 steps/min]\u001b[92m15:35:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:35:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:35:55,767 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m15:35:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 15:35:56,466 - agent.ComputerAgent - INFO - Computer: click({'x': 501, 'y': 55})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 501, 'y': 55})\n", + "\u001b[92m15:35:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:35:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cfefeec4-603f-4657-b0fe-7a641734693c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:35:58,194 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m15:35:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 3%|█---------------------------------------| 238/7340 [9:37<287:10, 24.7 steps/min]2025-08-11 15:35:58,837 - agent.ComputerAgent - INFO - Computer: click({'x': 347, 'y': 186})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 347, 'y': 186})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:35:59,467 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m15:35:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:35:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 239/7340 [9:38<286:34, 24.8 steps/min]2025-08-11 15:36:00,161 - agent.ComputerAgent - INFO - Computer: click({'x': 309, 'y': 116})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 309, 'y': 116})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:36:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 240/7340 [9:40<285:59, 24.8 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:36:01,470 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m15:36:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a74f1790-a107-43c9-8389-0a50a5192c5f/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:36:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:36:02,165 - agent.ComputerAgent - INFO - LLM processing started with 15 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 15 messages\n", + "\u001b[92m15:36:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m15:36:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 3%|█---------------------------------------| 241/7340 [9:41<285:27, 24.9 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/835128b8-2a29-46f4-853f-4d70bb46a9d6/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:36:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:36:03,352 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 633, 'y': 320}, {'x': 422, 'y': 393}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 633, 'y': 320}, {'x': 422, 'y': 393}]})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 3%|█---------------------------------------| 249/7340 [9:42<276:30, 25.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/835128b8-2a29-46f4-853f-4d70bb46a9d6/close \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 250/7340 [9:43<275:49, 25.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:36:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:36:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a91cea7-3ffe-41c2-9405-1151904aee0c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/edaeedb6-9993-4b6f-b226-19e2768a5736/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a74f1790-a107-43c9-8389-0a50a5192c5f/invoke \"HTTP/1.1 200 OK\"\n", + " 3%|█---------------------------------------| 250/7340 [9:45<276:35, 25.6 steps/min]INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 output.csv\n", + "- libreoffice --calc output.csv\n", + "\n", + "This converts both spreadsheets to CSV, concatenates their single columns into one file named output.csv, and opens it in LibreOffice Calc from the terminal.\n", + "\n", + "Task completed.\n", + "INFO:agent.ComputerAgent:Agent: Done via Terminal.\n", + "\n", + "Commands executed:\n", + "- cd ~/Desktop\n", + "- libreoffice --headless --convert-to csv file1.xlsx\n", + "- libreoffice --headless --convert-to csv file2.ods\n", + "- cat file1.csv file2.csv > output.csv\n", + "- libreoffice --calc output.csv\n", + "\n", + "This converts both spreadsheets to CSV, concatenates their single columns into one file named output.csv, and opens it in LibreOffice Calc from the terminal.\n", + "\n", + "Task completed.\n", + "2025-08-11 15:39:44,642 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 679\n", + " - prompt_tokens: 8809\n", + " - total_tokens: 9488\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 576\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0178\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 679\n", + " - prompt_tokens: 8809\n", + " - total_tokens: 9488\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 576\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0178\n", + " 5%|█---------------------------------------| 359/7340 [13:23<260:31, 26.8 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:39:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:39:45,831 - agent.ComputerAgent - INFO - Computer: click({'x': 111, 'y': 213})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 111, 'y': 213})\n", + " 5%|█---------------------------------------| 359/7340 [13:25<260:54, 26.8 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a91cea7-3ffe-41c2-9405-1151904aee0c/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:39:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:39:46,456 - agent.ComputerAgent - INFO - Computer: click({'x': 148, 'y': 739})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 148, 'y': 739})\n", + "\u001b[92m15:39:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:39:47,105 - agent.ComputerAgent - INFO - Computer: click({'x': 984, 'y': 68})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 984, 'y': 68})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:39:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:39:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + " 5%|█---------------------------------------| 360/7340 [13:26<260:46, 26.8 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:39:48,477 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 607, 'scroll_x': 0, 'x': 91, 'y': 464})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 607, 'scroll_x': 0, 'x': 91, 'y': 464})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m15:39:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m15:39:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a91cea7-3ffe-41c2-9405-1151904aee0c/invoke \"HTTP/1.1 200 OK\"\n", + " 5%|█---------------------------------------| 362/7340 [13:28<259:43, 26.9 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 15:39:49,752 - agent.ComputerAgent - INFO - Computer: click({'x': 219, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 219, 'y': 53})\n", + "\u001b[92m15:39:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 15:39:50,434 - agent.ComputerAgent - INFO - Computer: click({'x': 416, 'y': 74})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 416, 'y': 74})\n", + " 5%|██--------------------------------------| 374/7340 [13:30<251:38, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0a91cea7-3ffe-41c2-9405-1151904aee0c/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:39:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c8c54705-3689-4d05-b8e1-7a57903f3a21/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/63010886-f715-4208-aef0-b98c456e7e98/invoke \"HTTP/1.1 200 OK\"\n", + " 5%|██--------------------------------------| 374/7340 [13:32<252:04, 27.6 steps/min]INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 Preferences > Input / Codecs > Record directory or filename and saved the setting.\n", + "\n", + "Task completed.\n", + "INFO:agent.ComputerAgent:Agent: I have changed VLC’s recordings folder to Desktop via Tools > Preferences > Input / Codecs > Record directory or filename and saved the setting.\n", + "\n", + "Task completed.\n", + "2025-08-11 15:53:48,419 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 232\n", + " - prompt_tokens: 8729\n", + " - total_tokens: 8961\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 192\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0132\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 232\n", + " - prompt_tokens: 8729\n", + " - total_tokens: 8961\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 192\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0132\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8a8f1594-3659-4132-9059-6fa366033df0/invoke \"HTTP/1.1 200 OK\"\n", + " 11%|████------------------------------------| 777/7340 [27:30<232:18, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:53:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m15:53:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 11%|████------------------------------------| 779/7340 [27:31<231:49, 28.3 steps/min]INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 Open File (or drag-and-drop the files into VLC).\n", + "\n", + "If you want me to, I can play the show in the browser via Google TV, or play any DRM‑free local media in VLC.\n", + "\n", + "Task completed.\n", + "INFO:agent.ComputerAgent:Agent: I can’t complete this as requested. Playing a Google Play Movies & TV purchase directly in VLC would require bypassing Google’s DRM (Widevine), which I won’t help with. Additionally, Stranger Things is a Netflix original and is typically not sold on Google Play, so it may not even be available for purchase there.\n", + "\n", + "Legal ways to watch:\n", + "- In a browser: Go to play.google.com/tv (or tv.google.com), sign in to your Google account, search for the title in your Library, and play it in the browser.\n", + "- On devices: Use the Google TV app on Android/iOS/Android TV or YouTube app where your Google Play purchases appear, and watch or cast to a TV.\n", + "- If you have DRM‑free video files you own, you can play them in VLC via Media > Open File (or drag-and-drop the files into VLC).\n", + "\n", + "If you want me to, I can play the show in the browser via Google TV, or play any DRM‑free local media in VLC.\n", + "\n", + "Task completed.\n", + "2025-08-11 16:05:33,041 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 1371\n", + " - prompt_tokens: 8756\n", + " - total_tokens: 10127\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 1152\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 2560\n", + " - response_cost: $0.0218\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 1371\n", + " - prompt_tokens: 8756\n", + " - total_tokens: 10127\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 1152\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 2560\n", + " - response_cost: $0.0218\n", + " 16%|██████----------------------------------| 1143/7340 [39:14<212:46, 29.1 steps/min]2025-08-11 16:05:33,721 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:05:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:05:34,369 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m16:05:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 16%|██████----------------------------------| 1144/7340 [39:16<212:41, 29.1 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:05:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:05:36,085 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:05:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:05:37,424 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+='})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+='})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 16%|██████----------------------------------| 1144/7340 [39:19<213:01, 29.1 steps/min]\u001b[92m16:05:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:05:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:05:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:05:39,411 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m16:05:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:05:40,051 - agent.ComputerAgent - INFO - Computer: click({'x': 247, 'y': 103})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 247, 'y': 103})\n", + "\u001b[92m16:05:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 16%|██████----------------------------------| 1144/7340 [39:21<213:11, 29.1 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:05:40,712 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:05:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:05:41,401 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 16:05:41,402 - agent.ComputerAgent - INFO - Computer: move({'x': 512, 'y': 32})\n", + "INFO:agent.ComputerAgent:Computer: move({'x': 512, 'y': 32})\n", + "\u001b[92m16:05:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:05:42,101 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m16:05:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 16%|██████----------------------------------| 1145/7340 [39:23<213:09, 29.1 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:05:42,775 - agent.ComputerAgent - INFO - Computer: click({'x': 87, 'y': 165})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 87, 'y': 165})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/932fb6ee-8e77-41ca-8220-27e0c8783ced/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:05:43,825 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m16:05:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9618024b-01b2-4c48-8a72-2ec16bffcf41/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 16%|██████----------------------------------| 1146/7340 [39:26<213:09, 29.1 steps/min]\u001b[92m16:05:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:05:45,138 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:05:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:05:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:05:45,820 - agent.ComputerAgent - INFO - Computer: click({'x': 709, 'y': 305})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 709, 'y': 305})\n", + " 16%|██████----------------------------------| 1147/7340 [39:27<213:03, 29.1 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9618024b-01b2-4c48-8a72-2ec16bffcf41/invoke \"HTTP/1.1 200 OK\"\n", + " 16%|██████----------------------------------| 1157/7340 [39:28<210:57, 29.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9618024b-01b2-4c48-8a72-2ec16bffcf41/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8bb6b36b-e7fb-4e80-916a-501fa7ad17f9/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:05:48,172 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m16:05:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:05:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c3f7029e-7bbd-43fb-bea4-c66cc9ae685d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 16%|██████----------------------------------| 1157/7340 [39:30<211:08, 29.3 steps/min]INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:05:49,481 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:05:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7955abad-b178-4311-85d5-7f1dedbecbcc/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:05:50,142 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:05:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ffbf23fa-9bd6-4b26-befa-cb45d31fc4fa/invoke \"HTTP/1.1 200 OK\"\n", + " 16%|██████----------------------------------| 1157/7340 [39:31<211:15, 29.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00/dev/null || true'\"})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': \"bash -lc 'cd ~/Desktop && ls -1 *.png 2>/dev/null || true'\"})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:10:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1308/7340 [44:13<203:56, 29.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:10:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:10:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/d8b3a739-de56-40fe-896f-831373c8ecee/reset \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:10:32,953 - agent.ComputerAgent - INFO - Computer: click({'x': 512, 'y': 384})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 512, 'y': 384})\n", + "\u001b[92m16:10:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 18%|███████---------------------------------| 1310/7340 [44:15<203:42, 29.6 steps/min]\u001b[92m16:10:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:10:34,242 - agent.ComputerAgent - INFO - Computer: double_click({'x': 984, 'y': 658})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 984, 'y': 658})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:10:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:10:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:10:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:10:36,222 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 284})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 284})\n", + "\u001b[92m16:10:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1311/7340 [44:17<203:43, 29.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:10:36,912 - agent.ComputerAgent - INFO - Computer: click({'x': 205, 'y': 735})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 205, 'y': 735})\n", + "\u001b[92m16:10:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:10:38,184 - agent.ComputerAgent - INFO - Computer: type({'text': 'sudo find . -type f -exec chmod 644 {} +'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'sudo find . -type f -exec chmod 644 {} +'})\n", + "2025-08-11 16:10:38,849 - agent.ComputerAgent - INFO - Computer: click({'x': 100, 'y': 390})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 100, 'y': 390})\n", + "\u001b[92m16:10:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1313/7340 [44:21<203:36, 29.6 steps/min]\u001b[92m16:10:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:10:40,187 - agent.ComputerAgent - INFO - Computer: click({'x': 359, 'y': 258})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 359, 'y': 258})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:10:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:10:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1316/7340 [44:22<203:08, 29.7 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:10:41,570 - agent.ComputerAgent - INFO - Computer: click({'x': 131, 'y': 91, 'button': 'left'})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 131, 'y': 91, 'button': 'left'})\n", + "2025-08-11 16:10:42,221 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m16:10:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:10:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1317/7340 [44:24<203:03, 29.7 steps/min]2025-08-11 16:10:42,902 - agent.ComputerAgent - INFO - Computer: click({'x': 910, 'y': 233})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 910, 'y': 233})\n", + " 18%|███████---------------------------------| 1318/7340 [44:25<202:56, 29.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:10:45,238 - agent.ComputerAgent - INFO - Computer: type({'text': 'https://upload.wikimedia.org/wikipedia/en/thumb/1/1e/The_University_of_Hong_Kong_crest.svg/1200px-The_University_of_Hong_Kong_crest.svg.png'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'https://upload.wikimedia.org/wikipedia/en/thumb/1/1e/The_University_of_Hong_Kong_crest.svg/1200px-The_University_of_Hong_Kong_crest.svg.png'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a23ddde7-5509-407d-af64-ea09807c1af1/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa4f593f-4977-4dc4-9238-0a67602a0900/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fafe8f9a-bc46-42ad-b3ca-7190a64ab552/invoke \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1319/7340 [44:26<202:54, 29.7 steps/min]2025-08-11 16:10:45,918 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m16:10:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ca85c226-0c49-4084-b2bc-86bd540c8bce/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/79295f2f-2987-488c-b4b7-c968f71c7597/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b2656d0e-a6f4-4ecb-a099-cfe8471c4998/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:10:46,633 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m16:10:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1320/7340 [44:28<202:49, 29.7 steps/min]2025-08-11 16:10:47,821 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m16:10:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0cad7a26-2224-4401-9a66-57daca76d380/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3b3e7fbd-8c02-45a6-bb3d-83c056398d3f/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d8b3a739-de56-40fe-896f-831373c8ecee/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/80299c20-3bcf-48b1-a471-299a1eda0a00/invoke \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1320/7340 [44:29<202:55, 29.7 steps/min]2025-08-11 16:10:48,618 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m16:10:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/89cdf329-a61d-4d69-9c6c-5d0ea35677b6/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:10:49,365 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:10:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c3f7029e-7bbd-43fb-bea4-c66cc9ae685d/invoke \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1320/7340 [44:31<203:01, 29.7 steps/min]2025-08-11 16:10:50,151 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:10:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:10:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:10:51,510 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m16:10:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1320/7340 [44:33<203:11, 29.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ffbf23fa-9bd6-4b26-befa-cb45d31fc4fa/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:10:52,195 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m16:10:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:10:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/932fb6ee-8e77-41ca-8220-27e0c8783ced/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:10:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:10:52,855 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m16:10:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:10:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1320/7340 [44:34<203:17, 29.6 steps/min]2025-08-11 16:10:53,549 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m16:10:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:10:54,253 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 420, 'y': 162}, {'x': 170, 'y': 133}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 420, 'y': 162}, {'x': 170, 'y': 133}]})\n", + " 18%|███████---------------------------------| 1320/7340 [44:36<203:24, 29.6 steps/min]2025-08-11 16:10:55,291 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m16:10:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1321/7340 [44:37<203:17, 29.6 steps/min]2025-08-11 16:10:55,940 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:10:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:10:56,604 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m16:10:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1321/7340 [44:38<203:23, 29.6 steps/min]2025-08-11 16:10:57,291 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m16:10:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1321/7340 [44:40<203:32, 29.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:10:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:10:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:00,151 - agent.ComputerAgent - INFO - Computer: click({'x': 122, 'y': 219})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 122, 'y': 219})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1321/7340 [44:42<203:43, 29.5 steps/min]\u001b[92m16:11:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:11:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:02,396 - agent.ComputerAgent - INFO - Computer: click({'x': 16, 'y': 429})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 16, 'y': 429})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e8a299f4-d946-4970-b9a4-2503717de8ce/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:11:03,765 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + " 18%|███████---------------------------------| 1322/7340 [44:45<203:44, 29.5 steps/min]2025-08-11 16:11:04,441 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:11:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:11:05,798 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cc2e38be-6768-4928-bfe5-d7f31cb68b24/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1324/7340 [44:48<203:37, 29.5 steps/min]\u001b[92m16:11:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:11:07,755 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m16:11:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:11:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:11:09,113 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + "2025-08-11 16:11:09,745 - agent.ComputerAgent - INFO - Computer: click({'x': 434, 'y': 418})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 434, 'y': 418})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1325/7340 [44:52<203:44, 29.5 steps/min]\u001b[92m16:11:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:11:12,504 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "2025-08-11 16:11:13,143 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m16:11:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:11:13,809 - agent.ComputerAgent - INFO - Computer: click({'x': 248, 'y': 291})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 248, 'y': 291})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ca85c226-0c49-4084-b2bc-86bd540c8bce/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:11:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fafe8f9a-bc46-42ad-b3ca-7190a64ab552/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/39724bde-60dd-471d-ba25-1ac9b1405c76/invoke \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1326/7340 [44:56<203:48, 29.5 steps/min]\u001b[92m16:11:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:11:15,137 - agent.ComputerAgent - INFO - Computer: click({'x': 293, 'y': 185})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 293, 'y': 185})\n", + "\u001b[92m16:11:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:15,821 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:11:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:11:16,500 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 16:11:16,501 - agent.ComputerAgent - INFO - Computer: click({'x': 650, 'y': 362})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 650, 'y': 362})\n", + "\u001b[92m16:11:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1328/7340 [44:58<203:35, 29.5 steps/min]2025-08-11 16:11:17,188 - agent.ComputerAgent - INFO - Computer: double_click({'x': 247, 'y': 153})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 247, 'y': 153})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1330/7340 [44:59<203:19, 29.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:11:18,491 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m16:11:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:11:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:19,525 - agent.ComputerAgent - INFO - Computer: click({'x': 867, 'y': 233})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 867, 'y': 233})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1331/7340 [45:01<203:18, 29.6 steps/min]\u001b[92m16:11:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:11:21,243 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m16:11:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1332/7340 [45:02<203:11, 29.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:11:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:21,948 - agent.ComputerAgent - INFO - Computer: click({'x': 573, 'y': 249})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 573, 'y': 249})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1332/7340 [45:04<203:18, 29.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/932fb6ee-8e77-41ca-8220-27e0c8783ced/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d8b3a739-de56-40fe-896f-831373c8ecee/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:23,815 - agent.ComputerAgent - INFO - Computer: click({'x': 254, 'y': 736})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 254, 'y': 736})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/89cdf329-a61d-4d69-9c6c-5d0ea35677b6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3b3e7fbd-8c02-45a6-bb3d-83c056398d3f/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/80299c20-3bcf-48b1-a471-299a1eda0a00/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b2656d0e-a6f4-4ecb-a099-cfe8471c4998/invoke \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1333/7340 [45:05<203:12, 29.6 steps/min]2025-08-11 16:11:24,492 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:11:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:11:25,193 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:11:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1334/7340 [45:06<203:07, 29.6 steps/min]2025-08-11 16:11:26,204 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:11:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1334/7340 [45:08<203:12, 29.6 steps/min]2025-08-11 16:11:27,268 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:11:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c3f7029e-7bbd-43fb-bea4-c66cc9ae685d/invoke \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1334/7340 [45:09<203:16, 29.5 steps/min]2025-08-11 16:11:27,895 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:11:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:11:29,210 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a23ddde7-5509-407d-af64-ea09807c1af1/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1334/7340 [45:11<203:29, 29.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:11:30,853 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m16:11:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:11:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:31,535 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 685, 'x': 633, 'y': 405})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_x': 0, 'scroll_y': 685, 'x': 633, 'y': 405})\n", + " 18%|███████---------------------------------| 1334/7340 [45:13<203:35, 29.5 steps/min]2025-08-11 16:11:32,197 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m16:11:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1335/7340 [45:14<203:31, 29.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:11:33,951 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m16:11:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:11:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:35,279 - agent.ComputerAgent - INFO - Computer: type({'text': \"bash -lc 'cd ~/Desktop && ls -1 *.png'\"})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': \"bash -lc 'cd ~/Desktop && ls -1 *.png'\"})\n", + "2025-08-11 16:11:35,955 - agent.ComputerAgent - INFO - Computer: double_click({'x': 49, 'y': 431})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 49, 'y': 431})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ffbf23fa-9bd6-4b26-befa-cb45d31fc4fa/invoke \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1335/7340 [45:18<203:47, 29.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:11:37,317 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m16:11:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:11:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:37,999 - agent.ComputerAgent - INFO - Computer: click({'x': 122, 'y': 176})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 122, 'y': 176})\n", + " 18%|███████---------------------------------| 1337/7340 [45:19<203:31, 29.5 steps/min]2025-08-11 16:11:38,642 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m16:11:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:11:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1338/7340 [45:21<203:26, 29.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:11:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:40,487 - agent.ComputerAgent - INFO - Computer: click({'x': 397, 'y': 624})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 397, 'y': 624})\n", + " 18%|███████---------------------------------| 1339/7340 [45:23<203:24, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0cad7a26-2224-4401-9a66-57daca76d380/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/79295f2f-2987-488c-b4b7-c968f71c7597/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:11:42,633 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m16:11:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ca85c226-0c49-4084-b2bc-86bd540c8bce/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:11:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1339/7340 [45:25<203:35, 29.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:11:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:45,987 - agent.ComputerAgent - INFO - Computer: type({'text': 'find . -type f -perm -not -0644 -ls | head -n 20'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'find . -type f -perm -not -0644 -ls | head -n 20'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cc2e38be-6768-4928-bfe5-d7f31cb68b24/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:11:46,670 - agent.ComputerAgent - INFO - Computer: click({'x': 249, 'y': 339})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 249, 'y': 339})\n", + "\u001b[92m16:11:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1339/7340 [45:28<203:47, 29.4 steps/min]\u001b[92m16:11:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:47,338 - agent.ComputerAgent - INFO - Computer: click({'x': 982, 'y': 741})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 982, 'y': 741})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d8b3a739-de56-40fe-896f-831373c8ecee/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:48,711 - agent.ComputerAgent - INFO - Computer: click({'x': 1000, 'y': 739})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1000, 'y': 739})\n", + " 18%|███████---------------------------------| 1341/7340 [45:30<203:34, 29.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:11:49,375 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m16:11:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:11:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:50,053 - agent.ComputerAgent - INFO - Computer: click({'x': 553, 'y': 275})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 553, 'y': 275})\n", + " 18%|███████---------------------------------| 1343/7340 [45:31<203:18, 29.5 steps/min]2025-08-11 16:11:50,708 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m16:11:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1344/7340 [45:33<203:14, 29.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:11:52,081 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m16:11:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:11:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:52,743 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m16:11:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:11:54,178 - agent.ComputerAgent - INFO - Computer: click({'x': 49, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 49, 'y': 53})\n", + " 18%|███████---------------------------------| 1344/7340 [45:35<203:25, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:11:55,482 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+shift+v'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+shift+v'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/80299c20-3bcf-48b1-a471-299a1eda0a00/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3b3e7fbd-8c02-45a6-bb3d-83c056398d3f/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:11:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/932fb6ee-8e77-41ca-8220-27e0c8783ced/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1345/7340 [45:37<203:23, 29.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:11:56,832 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:11:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:11:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:11:57,492 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:11:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:11:58,155 - agent.ComputerAgent - INFO - Computer: click({'x': 496, 'y': 256})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 496, 'y': 256})\n", + " 18%|███████---------------------------------| 1345/7340 [45:39<203:32, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:11:59,512 - agent.ComputerAgent - INFO - Computer: click({'x': 188, 'y': 54})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 188, 'y': 54})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/89cdf329-a61d-4d69-9c6c-5d0ea35677b6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:12:00,870 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+f'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+f'})\n", + " 18%|███████---------------------------------| 1346/7340 [45:42<203:33, 29.4 steps/min]2025-08-11 16:12:01,532 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:12:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:12:02,223 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:12:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e8a299f4-d946-4970-b9a4-2503717de8ce/invoke \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1347/7340 [45:44<203:28, 29.5 steps/min]2025-08-11 16:12:02,913 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:12:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:12:03,573 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:12:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1347/7340 [45:45<203:34, 29.4 steps/min]2025-08-11 16:12:04,252 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:12:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1347/7340 [45:46<203:38, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:12:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 18%|███████---------------------------------| 1347/7340 [45:47<203:43, 29.4 steps/min]\u001b[92m16:12:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:12:06,650 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 677, 'x': 633, 'y': 362})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_x': 0, 'scroll_y': 677, 'x': 633, 'y': 362})\n", + " 18%|███████---------------------------------| 1347/7340 [45:48<203:47, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:12:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b2656d0e-a6f4-4ecb-a099-cfe8471c4998/invoke \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1348/7340 [45:49<203:41, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa4f593f-4977-4dc4-9238-0a67602a0900/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:12:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:12:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:12:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c3f7029e-7bbd-43fb-bea4-c66cc9ae685d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:12:09,759 - agent.ComputerAgent - INFO - Computer: click({'x': 296, 'y': 736})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 296, 'y': 736})\n", + " 18%|███████---------------------------------| 1348/7340 [45:51<203:50, 29.4 steps/min]\u001b[92m16:12:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:12:10,408 - agent.ComputerAgent - INFO - Computer: click({'x': 234, 'y': 97})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 234, 'y': 97})\n", + "\u001b[92m16:12:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:12:11,099 - agent.ComputerAgent - INFO - Computer: click({'x': 332, 'y': 162})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 332, 'y': 162})\n", + " 18%|███████---------------------------------| 1349/7340 [45:52<203:45, 29.4 steps/min]2025-08-11 16:12:11,772 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m16:12:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:12:12,423 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m16:12:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1351/7340 [45:54<203:29, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0cad7a26-2224-4401-9a66-57daca76d380/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:12:13,589 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m16:12:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:12:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1351/7340 [45:56<203:37, 29.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:12:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:12:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:12:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1351/7340 [45:58<203:46, 29.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:12:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:12:16,944 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m16:12:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:12:17,601 - agent.ComputerAgent - INFO - Computer: click({'x': 684, 'y': 41})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 684, 'y': 41})\n", + "\u001b[92m16:12:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:12:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:12:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1351/7340 [46:00<203:55, 29.4 steps/min]\u001b[92m16:12:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:12:18,978 - agent.ComputerAgent - INFO - Computer: click({'x': 122, 'y': 213})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 122, 'y': 213})\n", + "2025-08-11 16:12:19,629 - agent.ComputerAgent - INFO - Computer: click({'x': 244, 'y': 149})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 244, 'y': 149})\n", + "2025-08-11 16:12:20,317 - agent.ComputerAgent - INFO - Computer: click({'x': 397, 'y': 564})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 397, 'y': 564})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/80299c20-3bcf-48b1-a471-299a1eda0a00/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fafe8f9a-bc46-42ad-b3ca-7190a64ab552/invoke \"HTTP/1.1 200 OK\"\n", + " 18%|███████---------------------------------| 1352/7340 [46:02<203:53, 29.4 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:12:20,972 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:12:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:12:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ffbf23fa-9bd6-4b26-befa-cb45d31fc4fa/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:12:21,645 - agent.ComputerAgent - INFO - Computer: click({'x': 249, 'y': 339})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 249, 'y': 339})\n", + "2025-08-11 16:12:22,272 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:12:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 18%|███████---------------------------------| 1355/7340 [46:04<203:28, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:12:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 18%|███████---------------------------------| 1356/7340 [46:05<203:22, 29.4 steps/min]2025-08-11 16:12:24,123 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m16:12:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:12:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:12:24,852 - agent.ComputerAgent - INFO - Computer: click({'x': 623, 'y': 359})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 623, 'y': 359})\n", + " 18%|███████---------------------------------| 1356/7340 [46:06<203:28, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:12:26,220 - agent.ComputerAgent - INFO - Computer: type({'text': 'source=~/Desktop/dir1; target=~/Desktop/dir3; if [ -d \"$source\" ] && [ -d \"$target\" ]; then rsync -a -f\"+ */\" -f\"- *\" \"$source\" \"$target\"; echo \"Copied directory hierarchy.\"; else echo \"Source or target directory not found\"; fi'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'source=~/Desktop/dir1; target=~/Desktop/dir3; if [ -d \"$source\" ] && [ -d \"$target\" ]; then rsync -a -f\"+ */\" -f\"- *\" \"$source\" \"$target\"; echo \"Copied directory hierarchy.\"; else echo \"Source or target directory not found\"; fi'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:12:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 18%|███████---------------------------------| 1357/7340 [46:08<203:27, 29.4 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:12:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:12:28,062 - agent.ComputerAgent - INFO - Computer: click({'x': 72, 'y': 90})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 72, 'y': 90})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:12:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:12:30,106 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/932fb6ee-8e77-41ca-8220-27e0c8783ced/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cc2e38be-6768-4928-bfe5-d7f31cb68b24/invoke \"HTTP/1.1 200 OK\"\n", + " 19%|███████---------------------------------| 1358/7340 [46:11<203:29, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/89cdf329-a61d-4d69-9c6c-5d0ea35677b6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d8b3a739-de56-40fe-896f-831373c8ecee/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:12:31,450 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 676, 'scroll_x': 0})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 676, 'scroll_x': 0})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ca85c226-0c49-4084-b2bc-86bd540c8bce/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:12:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:12:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 19%|███████---------------------------------| 1361/7340 [46:13<203:05, 29.4 steps/min]2025-08-11 16:12:32,793 - agent.ComputerAgent - INFO - Computer: click({'x': 553, 'y': 280})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 553, 'y': 280})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:12:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:12:34,134 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:12:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a23ddde7-5509-407d-af64-ea09807c1af1/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:12:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/39724bde-60dd-471d-ba25-1ac9b1405c76/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 19%|███████---------------------------------| 1361/7340 [46:15<203:14, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:12:35,495 - agent.ComputerAgent - INFO - Computer: type({'text': \"bash -lc 'cd ~/Desktop && ls -1'}\"})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': \"bash -lc 'cd ~/Desktop && ls -1'}\"})\n", + "2025-08-11 16:12:36,154 - agent.ComputerAgent - INFO - Computer: click({'x': 263, 'y': 318})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 263, 'y': 318})\n", + "2025-08-11 16:12:36,816 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m16:12:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:12:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 19%|███████---------------------------------| 1362/7340 [46:18<203:15, 29.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:12:37,525 - agent.ComputerAgent - INFO - Computer: click({'x': 426, 'y': 257})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 426, 'y': 257})\n", + "2025-08-11 16:12:38,193 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m16:12:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 19%|███████---------------------------------| 1364/7340 [46:20<203:02, 29.4 steps/min]\u001b[92m16:12:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0cad7a26-2224-4401-9a66-57daca76d380/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No screenshot found, taking screenshot\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-11 16:12:39,517 - agent.ComputerAgent - INFO - LLM processing started with 31 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 31 messages\n", + "\u001b[92m16:12:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:12:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:12:40,192 - agent.ComputerAgent - INFO - Computer: click({'x': 526, 'y': 232})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 526, 'y': 232})\n", + "2025-08-11 16:12:40,843 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m16:12:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 19%|███████---------------------------------| 1365/7340 [46:23<203:03, 29.4 steps/min]\u001b[92m16:12:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:12:42,204 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:12:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:12:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:12:43,551 - agent.ComputerAgent - INFO - Computer: click({'x': 835, 'y': 36})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 835, 'y': 36})\n", + "2025-08-11 16:12:44,241 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m16:12:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:12:44,933 - agent.ComputerAgent - INFO - Computer: click({'x': 433, 'y': 635})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 433, 'y': 635})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b2656d0e-a6f4-4ecb-a099-cfe8471c4998/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3b3e7fbd-8c02-45a6-bb3d-83c056398d3f/invoke \"HTTP/1.1 200 OK\"\n", + " 19%|███████---------------------------------| 1367/7340 [46:26<202:56, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e8a299f4-d946-4970-b9a4-2503717de8ce/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/79295f2f-2987-488c-b4b7-c968f71c7597/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:12:45,580 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m16:12:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:12:46,215 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:12:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 19%|███████---------------------------------| 1369/7340 [46:28<202:40, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0cad7a26-2224-4401-9a66-57daca76d380/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:12:46,933 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m16:12:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:12:47,614 - agent.ComputerAgent - INFO - LLM processing started with 33 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 33 messages\n", + "\u001b[92m16:12:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/80299c20-3bcf-48b1-a471-299a1eda0a00/invoke \"HTTP/1.1 200 OK\"\n", + " 19%|███████---------------------------------| 1369/7340 [46:29<202:46, 29.4 steps/min]2025-08-11 16:12:48,293 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m16:12:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:12:48,973 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:12:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 19%|███████---------------------------------| 1369/7340 [46:30<202:52, 29.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:12:50,174 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m16:12:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fafe8f9a-bc46-42ad-b3ca-7190a64ab552/invoke \"HTTP/1.1 200 OK\"\n", + " 19%|███████---------------------------------| 1369/7340 [46:31<202:57, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c3f7029e-7bbd-43fb-bea4-c66cc9ae685d/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:12:51,233 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:12:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/89cdf329-a61d-4d69-9c6c-5d0ea35677b6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:12:52,561 - agent.ComputerAgent - INFO - Computer: type({'text': 'file1'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'file1'})\n", + " 19%|███████---------------------------------| 1369/7340 [46:34<203:07, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:12:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa4f593f-4977-4dc4-9238-0a67602a0900/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:12:53,913 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:12:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:12:55,675 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+,'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+,'})\n", + " 19%|███████---------------------------------| 1371/7340 [46:37<202:59, 29.4 steps/min]\u001b[92m16:12:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:12:56,375 - agent.ComputerAgent - INFO - Computer: click({'x': 304, 'y': 735})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 304, 'y': 735})\n", + "2025-08-11 16:12:57,004 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m16:12:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 19%|███████---------------------------------| 1371/7340 [46:38<203:05, 29.4 steps/min]2025-08-11 16:12:57,682 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m16:12:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 19%|███████---------------------------------| 1372/7340 [46:39<202:58, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:13:00,048 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+v'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+v'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0cad7a26-2224-4401-9a66-57daca76d380/invoke \"HTTP/1.1 200 OK\"\n", + " 19%|███████---------------------------------| 1372/7340 [46:41<203:07, 29.4 steps/min]2025-08-11 16:13:00,705 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 35 messages\n", + "\u001b[92m16:13:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:13:01,364 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m16:13:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 19%|███████---------------------------------| 1372/7340 [46:45<203:22, 29.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ca85c226-0c49-4084-b2bc-86bd540c8bce/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa4f593f-4977-4dc4-9238-0a67602a0900/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:13:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 19%|███████---------------------------------| 1374/7340 [46:46<203:05, 29.4 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:13:05,255 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:13:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:13:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa4f593f-4977-4dc4-9238-0a67602a0900/close \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:13:06,551 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "2025-08-11 16:13:07,203 - agent.ComputerAgent - INFO - Computer: click({'x': 232, 'y': 97})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 232, 'y': 97})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ffbf23fa-9bd6-4b26-befa-cb45d31fc4fa/invoke \"HTTP/1.1 200 OK\"\n", + " 19%|███████---------------------------------| 1376/7340 [46:49<202:59, 29.4 steps/min]2025-08-11 16:13:08,546 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m16:13:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:13:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0cad7a26-2224-4401-9a66-57daca76d380/invoke \"HTTP/1.1 200 OK\"\n", + " 19%|███████---------------------------------| 1376/7340 [46:51<203:04, 29.4 steps/min]2025-08-11 16:13:09,882 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 37 messages\n", + "\u001b[92m16:13:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 19%|███████---------------------------------| 1376/7340 [46:52<203:08, 29.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:13:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': ''})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a4e9c5c3-fa17-4f05-8383-03a3cb3c1fba/invoke \"HTTP/1.1 200 OK\"\n", + " 23%|█████████-------------------------------| 1664/7340 [58:10<198:24, 28.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/4813e5e3-be12-40e2-9cc0-d5be0ad320cf/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:24:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:24:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c062c21a-1b89-4117-86d3-d763f8af4cbd/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a0a74ba-160b-41ee-a6d2-6dc61c143d94/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:24:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:24:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 23%|█████████-------------------------------| 1684/7340 [58:12<195:28, 28.9 steps/min]2025-08-11 16:24:31,005 - agent.ComputerAgent - INFO - Computer: click({'x': 369, 'y': 564})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 369, 'y': 564})\n", + "\u001b[92m16:24:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:24:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:24:31,655 - agent.ComputerAgent - INFO - Computer: click({'x': 842, 'y': 571})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 842, 'y': 571})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:24:32,328 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m16:24:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:24:32,999 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:24:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:24:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:24:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 23%|█████████-------------------------------| 1684/7340 [58:14<195:37, 28.9 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:24:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:24:34,996 - agent.ComputerAgent - INFO - Computer: type({'text': '3'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '3'})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:24:35,673 - agent.ComputerAgent - INFO - Computer: click({'button': 'right', 'x': 987, 'y': 658})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'right', 'x': 987, 'y': 658})\n", + "2025-08-11 16:24:36,338 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:24:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 23%|█████████-------------------------------| 1686/7340 [58:18<195:30, 28.9 steps/min]\u001b[92m16:24:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:24:37,031 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 280, 'y': 375}, {'x': 802, 'y': 446}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 280, 'y': 375}, {'x': 802, 'y': 446}]})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:24:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:24:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 23%|█████████-------------------------------| 1688/7340 [58:19<195:17, 28.9 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:24:38,396 - agent.ComputerAgent - INFO - Computer: click({'x': 60, 'y': 35})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 60, 'y': 35})\n", + "\u001b[92m16:24:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e8a299f4-d946-4970-b9a4-2503717de8ce/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7bc07116-76e3-42fb-a0e3-a2273a5caa64/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d8b3a739-de56-40fe-896f-831373c8ecee/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:24:39,048 - agent.ComputerAgent - INFO - Computer: click({'x': 478, 'y': 256})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 478, 'y': 256})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a4e9c5c3-fa17-4f05-8383-03a3cb3c1fba/close \"HTTP/1.1 200 OK\"\n", + " 23%|█████████-------------------------------| 1693/7340 [58:22<194:41, 29.0 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3ad517be-7b27-424d-b632-3ba6ff1a1e71/invoke \"HTTP/1.1 200 OK\"\n", + " 23%|█████████-------------------------------| 1695/7340 [58:23<194:26, 29.0 steps/min]INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d8b3a739-de56-40fe-896f-831373c8ecee/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3980166d-0a7d-4a58-a915-07dbe8b607bb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b6b06a1a-197c-499e-a884-cc6bce509fa3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 23%|█████████-------------------------------| 1695/7340 [58:25<194:33, 29.0 steps/min]\u001b[92m16:24:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9dca7e41-642b-4cca-8758-834cef0e844c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5da0c259-034b-4ba2-9e95-9d4ae99c7475/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00\n", + "INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n", + " 26%|██████████------------------------------| 1938/7340 [66:27<185:15, 29.2 steps/min]\u001b[92m16:32:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8956c64b-871b-43e2-84de-047c8ce2a839/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.48s/it]\n", + "2025-08-11 16:32:46,573 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m16:32:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 26%|██████████------------------------------| 1939/7340 [66:29<185:13, 29.2 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e7117b51-399c-45d8-88a1-c54a00b2bc38/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:32:48,835 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:32:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 26%|██████████------------------------------| 1939/7340 [66:32<185:19, 29.1 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:32:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.38s/it]29.1 steps/min]\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:32:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:32:53,100 - agent.ComputerAgent - INFO - Computer: type({'text': '\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08LARS Resources (Backup)'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08\\x08LARS Resources (Backup)'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:32:54,407 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+l'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+l'})\n", + " 26%|██████████------------------------------| 1939/7340 [66:36<185:31, 29.1 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:32:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:32:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:32:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:32:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:32:55,743 - agent.ComputerAgent - INFO - Computer: click({'x': 1009, 'y': 101})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1009, 'y': 101})\n", + "2025-08-11 16:32:56,380 - agent.ComputerAgent - INFO - Computer: click({'x': 46, 'y': 527})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 46, 'y': 527})\n", + "2025-08-11 16:32:57,051 - agent.ComputerAgent - INFO - Computer: click({'x': 72, 'y': 244})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 72, 'y': 244})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:32:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:32:58,377 - agent.ComputerAgent - INFO - Computer: screenshot({})\n", + "INFO:agent.ComputerAgent:Computer: screenshot({})\n", + "2025-08-11 16:32:59,058 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m16:32:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:32:59,707 - agent.ComputerAgent - INFO - Computer: click({'x': 693, 'y': 698})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 693, 'y': 698})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:32:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:33:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:33:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 26%|██████████------------------------------| 1940/7340 [66:43<185:43, 29.1 steps/min]\u001b[92m16:33:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:33:02,296 - agent.ComputerAgent - INFO - Computer: click({'x': 70, 'y': 77})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 70, 'y': 77})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 26%|██████████------------------------------| 1945/7340 [66:44<185:07, 29.1 steps/min]\u001b[92m16:33:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:33:03,527 - agent.ComputerAgent - INFO - Computer: click({'x': 635, 'y': 468})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 635, 'y': 468})\n", + "\u001b[92m16:33:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:33:04,208 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': -628, 'scroll_x': 0, 'x': 526, 'y': 463})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': -628, 'scroll_x': 0, 'x': 526, 'y': 463})\n", + "\u001b[92m16:33:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/4adb2bbf-d6e6-4d15-9e9a-c199cf02d5d6/invoke \"HTTP/1.1 200 OK\"\n", + " 27%|██████████------------------------------| 1946/7340 [66:45<185:03, 29.1 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No screenshot found, taking screenshot\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-11 16:33:04,854 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m16:33:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:33:05,505 - agent.ComputerAgent - INFO - Computer: click({'x': 969, 'y': 169})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 969, 'y': 169})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:33:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 27%|██████████------------------------------| 1948/7340 [66:47<184:53, 29.2 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:33:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:33:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:33:07,437 - agent.ComputerAgent - INFO - Computer: click({'x': 87, 'y': 181})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 87, 'y': 181})\n", + "\u001b[92m16:33:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3980166d-0a7d-4a58-a915-07dbe8b607bb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f83df7e3-6ab0-404e-9745-09768e42b6fb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6d8a38cc-c8f6-484c-9a6d-e6c404b2c7f9/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/4813e5e3-be12-40e2-9cc0-d5be0ad320cf/invoke \"HTTP/1.1 200 OK\"\n", + " 27%|██████████------------------------------| 1949/7340 [66:49<184:49, 29.2 steps/min]2025-08-11 16:33:08,125 - agent.ComputerAgent - INFO - Computer: click({'x': 76, 'y': 321})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 76, 'y': 321})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a3ea8855-19d9-4e10-8208-fd9e060997e3/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:33:08,772 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m16:33:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:33:09,435 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m16:33:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:33:10,071 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m16:33:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 27%|██████████------------------------------| 1950/7340 [66:51<184:49, 29.2 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:33:11,418 - agent.ComputerAgent - INFO - Computer: type({'text': 'orig=$(find . -path ./fails -prune -o -type f -name \"*failed.ipynb\" -print | wc -l); copied=$(find ./fails -type f -name \"*failed.ipynb\" -print | wc -l); echo \"orig=$orig copied=$copied\"'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'orig=$(find . -path ./fails -prune -o -type f -name \"*failed.ipynb\" -print | wc -l); copied=$(find ./fails -type f -name \"*failed.ipynb\" -print | wc -l); echo \"orig=$orig copied=$copied\"'})\n", + "2025-08-11 16:33:12,446 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m16:33:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ea9e43cc-3d54-4c89-bb53-a189a3ae9a25/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d0000302-258b-4660-9baa-e149c2ad83fd/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b2ca79e3-4425-4cd4-a9dd-42e2431eb008/invoke \"HTTP/1.1 200 OK\"\n", + " 27%|██████████------------------------------| 1951/7340 [66:54<184:48, 29.2 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:33:14,195 - agent.ComputerAgent - INFO - Computer: type({'text': 'sudo apt-get update -y && sudo apt-get install -y steghide binwalk exiftool ffmpeg\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'sudo apt-get update -y && sudo apt-get install -y steghide binwalk exiftool ffmpeg\\n'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e7117b51-399c-45d8-88a1-c54a00b2bc38/invoke \"HTTP/1.1 200 OK\"\n", + " 27%|██████████------------------------------| 1952/7340 [66:55<184:44, 29.2 steps/min]2025-08-11 16:33:14,816 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:33:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:33:15,505 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m16:33:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:33:16,172 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:33:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/6b741091-faa0-4d97-9592-0dc410b6cc53/reset \"HTTP/1.1 200 OK\"\n", + " 27%|██████████------------------------------| 1953/7340 [66:57<184:42, 29.2 steps/min]2025-08-11 16:33:16,865 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m16:33:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 27%|██████████------------------------------| 1953/7340 [66:58<184:45, 29.2 steps/min]2025-08-11 16:33:17,496 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m16:33:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 27%|██████████------------------------------| 1953/7340 [67:01<184:53, 29.1 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6b741091-faa0-4d97-9592-0dc410b6cc53/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:33:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/81398d20-3c85-489b-9abc-2af244ec1feb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a0a74ba-160b-41ee-a6d2-6dc61c143d94/invoke \"HTTP/1.1 200 OK\"\n", + " 27%|██████████------------------------------| 1953/7340 [67:03<184:58, 29.1 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:33:22,384 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:33:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:33:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8956c64b-871b-43e2-84de-047c8ce2a839/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:33:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:33:23,684 - agent.ComputerAgent - INFO - Computer: click({'x': 237, 'y': 95})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 237, 'y': 95})\n", + " 27%|██████████------------------------------| 1953/7340 [67:05<185:03, 29.1 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:33:24,329 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m16:33:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/488d7653-4f2d-4576-85c7-d87dc7a875ef/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:33:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:33:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:33:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:33:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:33:27,033 - agent.ComputerAgent - INFO - Computer: click({'x': 592, 'y': 568})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 592, 'y': 568})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:33:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 27%|██████████------------------------------| 1954/7340 [67:09<185:06, 29.1 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:33:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:33:28,365 - agent.ComputerAgent - INFO - Computer: click({'x': 664, 'y': 213})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 664, 'y': 213})\n", + "\u001b[92m16:33:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:33:29,040 - agent.ComputerAgent - INFO - Computer: click({'x': 489, 'y': 427})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 489, 'y': 427})\n", + "\u001b[92m16:33:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:33:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 27%|██████████------------------------------| 1955/7340 [67:10<185:02, 29.1 steps/min]2025-08-11 16:33:29,694 - agent.ComputerAgent - INFO - Computer: click({'x': 83, 'y': 139})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 83, 'y': 139})\n", + "2025-08-11 16:33:30,372 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': -658, 'scroll_x': 0, 'x': 526, 'y': 432})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': -658, 'scroll_x': 0, 'x': 526, 'y': 432})\n", + " 27%|██████████------------------------------| 1957/7340 [67:12<184:50, 29.1 steps/min]2025-08-11 16:33:31,077 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m16:33:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:33:31,780 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m16:33:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:33:33,106 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + " 27%|██████████------------------------------| 1959/7340 [67:14<184:42, 29.1 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:33:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:33:34,429 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:33:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 27%|██████████------------------------------| 1959/7340 [67:16<184:46, 29.1 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:33:35,086 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m16:33:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:33:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6d8a38cc-c8f6-484c-9a6d-e6c404b2c7f9/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f83df7e3-6ab0-404e-9745-09768e42b6fb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/4adb2bbf-d6e6-4d15-9e9a-c199cf02d5d6/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:33:35,779 - agent.ComputerAgent - INFO - Computer: click({'x': 86, 'y': 73})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 86, 'y': 73})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/4813e5e3-be12-40e2-9cc0-d5be0ad320cf/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/485267e4-f348-45f0-a08d-1d1f28a01f1d/invoke \"HTTP/1.1 200 OK\"\n", + " 27%|██████████------------------------------| 1959/7340 [67:17<184:50, 29.1 steps/min]2025-08-11 16:33:36,470 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:33:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:33:37,105 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m16:33:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 27%|██████████------------------------------| 1960/7340 [67:19<184:48, 29.1 steps/min]\u001b[92m16:33:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:33:38,439 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m16:33:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:33:39,107 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m16:33:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 27%|██████████------------------------------| 1960/7340 [67:20<184:51, 29.1 steps/min]\u001b[92m16:33:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:33:39,749 - agent.ComputerAgent - INFO - Computer: click({'x': 715, 'y': 627})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 715, 'y': 627})\n", + " 27%|██████████------------------------------| 1960/7340 [67:21<184:54, 29.1 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/485267e4-f348-45f0-a08d-1d1f28a01f1d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e7117b51-399c-45d8-88a1-c54a00b2bc38/invoke \"HTTP/1.1 200 OK\"\n", + " 27%|██████████------------------------------| 1962/7340 [67:22<184:41, 29.1 steps/min]2025-08-11 16:33:41,945 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:33:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/485267e4-f348-45f0-a08d-1d1f28a01f1d/close \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 27%|██████████------------------------------| 1962/7340 [67:24<184:45, 29.1 steps/min]INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + " 27%|██████████------------------------------| 1962/7340 [67:25<184:48, 29.1 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:33:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ea9e43cc-3d54-4c89-bb53-a189a3ae9a25/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00\\n2
\\n3
'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '1
\\n2
\\n3
'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2c254802-788e-4b4b-98dc-68cd2c6bcce4/invoke \"HTTP/1.1 200 OK\"\n", + " 30%|███████████-----------------------------| 2167/7340 [73:25<175:17, 29.5 steps/min]2025-08-11 16:39:45,389 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:39:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 30%|███████████-----------------------------| 2168/7340 [73:27<175:13, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8956c64b-871b-43e2-84de-047c8ce2a839/invoke \"HTTP/1.1 200 OK\"\n", + " 30%|███████████-----------------------------| 2168/7340 [73:28<175:16, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:39:48,220 - agent.ComputerAgent - INFO - Computer: type({'text': 'Manchester, GB'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'Manchester, GB'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8956c64b-871b-43e2-84de-047c8ce2a839/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 30%|███████████-----------------------------| 2168/7340 [73:30<175:22, 29.5 steps/min]\u001b[92m16:39:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 30%|███████████-----------------------------| 2169/7340 [73:31<175:17, 29.5 steps/min]\u001b[92m16:39:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:39:50,735 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 16:39:50,735 - agent.ComputerAgent - INFO - Computer: click({'x': 256, 'y': 173})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 256, 'y': 173})\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/71840850-9565-4ed2-8fa2-e4f2ba6ec6a9/invoke \"HTTP/1.1 200 OK\"\n", + " 30%|███████████-----------------------------| 2169/7340 [73:32<175:20, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:39:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00> ~/.vimrc && echo Done\\''})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'bash -lc \\'printf \"\\\\n\\\\\" Enable absolute line numbers by default\\\\nset number\\\\n\" >> ~/.vimrc && echo Done\\''})\n", + " 31%|████████████----------------------------| 2276/7340 [77:50<173:11, 29.2 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0803e2c2-9de2-40ff-93da-cb49f156cbba/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/029e3386-8f59-44a0-9dba-b14a4d9825ef/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7e0b0038-3a97-4d93-8c5c-154cc0b95af9/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6195bb79-4eff-4d3b-8b67-f28a4e6a73fa/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/74442f45-62aa-40d1-9499-ea3e8e0a4c18/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:44:10,219 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m16:44:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a8ddfa4f-6f9b-4ad8-b763-1881394c9926/invoke \"HTTP/1.1 200 OK\"\n", + " 31%|████████████----------------------------| 2277/7340 [77:52<173:08, 29.2 steps/min]2025-08-11 16:44:10,891 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:44:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:44:11,562 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m16:44:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 31%|████████████----------------------------| 2277/7340 [77:53<173:11, 29.2 steps/min]2025-08-11 16:44:12,212 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:44:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:44:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:44:14,267 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/475030e1-7ae3-4ced-91fb-2221d956a2ad/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "\u001b[92m16:44:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:44:16,173 - agent.ComputerAgent - INFO - Agent: The heading is now center-aligned and the document has been saved. Task completed.\n", + "INFO:agent.ComputerAgent:Agent: The heading is now center-aligned and the document has been saved. Task completed.\n", + "2025-08-11 16:44:16,851 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 218\n", + " - prompt_tokens: 4658\n", + " - total_tokens: 4876\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 192\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0080\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 218\n", + " - prompt_tokens: 4658\n", + " - total_tokens: 4876\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 192\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0080\n", + " 31%|████████████----------------------------| 2279/7340 [77:58<173:09, 29.2 steps/min]2025-08-11 16:44:17,521 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m16:44:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:44:18,174 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m16:44:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:44:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:44:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:44:19,530 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m16:44:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 31%|████████████----------------------------| 2279/7340 [78:01<173:17, 29.2 steps/min]\u001b[92m16:44:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:44:20,843 - agent.ComputerAgent - INFO - Computer: click({'x': 1008, 'y': 193})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1008, 'y': 193})\n", + "\u001b[92m16:44:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:44:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:44:22,141 - agent.ComputerAgent - INFO - Computer: click({'x': 776, 'y': 643})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 776, 'y': 643})\n", + "2025-08-11 16:44:22,791 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + " 31%|████████████----------------------------| 2279/7340 [78:04<173:22, 29.2 steps/min]\u001b[92m16:44:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:44:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:44:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:44:23,485 - agent.ComputerAgent - INFO - Computer: click({'x': 977, 'y': 16})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 977, 'y': 16})\n", + "2025-08-11 16:44:24,132 - agent.ComputerAgent - INFO - Computer: click({'x': 111, 'y': 162})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 111, 'y': 162})\n", + "\u001b[92m16:44:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 31%|████████████----------------------------| 2281/7340 [78:05<173:12, 29.2 steps/min]2025-08-11 16:44:24,809 - agent.ComputerAgent - INFO - Computer: click({'x': 625, 'y': 248})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 625, 'y': 248})\n", + " 31%|████████████----------------------------| 2283/7340 [78:06<173:01, 29.2 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ed3b76a0-7eeb-473c-8da1-310545515f47/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:44:26,462 - agent.ComputerAgent - INFO - LLM processing started with 39 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 39 messages\n", + "\u001b[92m16:44:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5180ec6f-26a5-4ab4-8ca3-87f128083da1/invoke \"HTTP/1.1 200 OK\"\n", + " 31%|████████████----------------------------| 2284/7340 [78:08<172:58, 29.2 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:44:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:44:28,788 - agent.ComputerAgent - INFO - Computer: type({'text': 'none.png'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'none.png'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:44:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 31%|████████████----------------------------| 2284/7340 [78:11<173:06, 29.2 steps/min]\u001b[92m16:44:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:44:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:44:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:44:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ea9e43cc-3d54-4c89-bb53-a189a3ae9a25/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ff8b808f-c3a6-4979-8f9a-c6a25905116c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a46ee6f6-d167-47c4-ad83-e16b88450253/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a39ee9df-d3ba-456a-95cf-3a11a826583b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:44:32,685 - agent.ComputerAgent - INFO - Computer: type({'text': 'pkill -f soffice.bin\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'pkill -f soffice.bin\\n'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3e4ea7d7-21a2-4b07-abd4-a3e280e44e0b/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:44:33,322 - agent.ComputerAgent - INFO - Computer: click({'x': 982, 'y': 632})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 982, 'y': 632})\n", + "\u001b[92m16:44:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:44:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:44:34,677 - agent.ComputerAgent - INFO - Agent: All .jpg files from the photos directory (recursively) have been copied into cpjpg on the Desktop. Verified: 4 .jpg files found and 4 copied.\n", + "\n", + "Task completed\n", + "INFO:agent.ComputerAgent:Agent: All .jpg files from the photos directory (recursively) have been copied into cpjpg on the Desktop. Verified: 4 .jpg files found and 4 copied.\n", + "\n", + "Task completed\n", + "2025-08-11 16:44:35,306 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 494\n", + " - prompt_tokens: 8268\n", + " - total_tokens: 8762\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 448\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0153\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 494\n", + " - prompt_tokens: 8268\n", + " - total_tokens: 8762\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 448\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0153\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:44:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 31%|████████████----------------------------| 2287/7340 [78:17<172:59, 29.2 steps/min]2025-08-11 16:44:36,616 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 666, 'scroll_x': 0, 'x': 336, 'y': 152})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 666, 'scroll_x': 0, 'x': 336, 'y': 152})\n", + "2025-08-11 16:44:37,280 - agent.ComputerAgent - INFO - Computer: click({'x': 520, 'y': 437})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 520, 'y': 437})\n", + "\u001b[92m16:44:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:44:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:44:37,941 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m16:44:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:44:38,620 - agent.ComputerAgent - INFO - Computer: click({'x': 514, 'y': 304})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 514, 'y': 304})\n", + "2025-08-11 16:44:39,286 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': -200, 'scroll_x': 0, 'x': 589, 'y': 128})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': -200, 'scroll_x': 0, 'x': 589, 'y': 128})\n", + " 31%|████████████----------------------------| 2289/7340 [78:21<172:53, 29.2 steps/min]\u001b[92m16:44:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:44:39,956 - agent.ComputerAgent - INFO - Computer: click({'x': 351, 'y': 153})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 351, 'y': 153})\n", + "2025-08-11 16:44:40,591 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m16:44:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:44:41,646 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m16:44:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 31%|████████████----------------------------| 2293/7340 [78:23<172:32, 29.3 steps/min]2025-08-11 16:44:42,285 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:44:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:44:42,973 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:44:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:44:44,693 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + " 31%|████████████----------------------------| 2294/7340 [78:26<172:32, 29.2 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ed3b76a0-7eeb-473c-8da1-310545515f47/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fd628f34-1346-4947-bfa4-cf698adb3472/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:44:45,371 - agent.ComputerAgent - INFO - LLM processing started with 41 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 41 messages\n", + "\u001b[92m16:44:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5180ec6f-26a5-4ab4-8ca3-87f128083da1/invoke \"HTTP/1.1 200 OK\"\n", + " 31%|████████████----------------------------| 2311/7340 [78:27<170:43, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5180ec6f-26a5-4ab4-8ca3-87f128083da1/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6195bb79-4eff-4d3b-8b67-f28a4e6a73fa/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fd628f34-1346-4947-bfa4-cf698adb3472/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2c254802-788e-4b4b-98dc-68cd2c6bcce4/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/74442f45-62aa-40d1-9499-ea3e8e0a4c18/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d4054e85-5304-43a3-b6d7-128e302780cb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7e0b0038-3a97-4d93-8c5c-154cc0b95af9/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0803e2c2-9de2-40ff-93da-cb49f156cbba/invoke \"HTTP/1.1 200 OK\"\n", + " 31%|████████████----------------------------| 2311/7340 [78:30<170:49, 29.4 steps/min]2025-08-11 16:44:49,145 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m16:44:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:44:49,827 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:44:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:44:50,495 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:44:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fd628f34-1346-4947-bfa4-cf698adb3472/close \"HTTP/1.1 200 OK\"\n", + " 32%|████████████----------------------------| 2321/7340 [78:32<169:49, 29.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/029e3386-8f59-44a0-9dba-b14a4d9825ef/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a8ddfa4f-6f9b-4ad8-b763-1881394c9926/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/475030e1-7ae3-4ced-91fb-2221d956a2ad/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 32%|████████████----------------------------| 2322/7340 [78:33<169:45, 29.6 steps/min]2025-08-11 16:44:53,297 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m16:44:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:44:53,963 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m16:44:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:44:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 32%|████████████----------------------------| 2322/7340 [78:36<169:52, 29.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:44:55,334 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:44:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 ~/vim_test.txt && vim ~/vim_test.txt\\''})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'bash -lc \\'printf \"line a\\\\nline b\\\\nline c\\\\n\" > ~/vim_test.txt && vim ~/vim_test.txt\\''})\n", + " 33%|█████████████---------------------------| 2397/7340 [80:43<166:27, 29.7 steps/min]\u001b[92m16:47:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:02,258 - agent.ComputerAgent - INFO - Computer: click({'x': 855, 'y': 476})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 855, 'y': 476})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:47:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 33%|█████████████---------------------------| 2399/7340 [80:44<166:18, 29.7 steps/min]\u001b[92m16:47:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:03,544 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 604, 'scroll_x': 0, 'x': 307, 'y': 666})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 604, 'scroll_x': 0, 'x': 307, 'y': 666})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:47:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 33%|█████████████---------------------------| 2401/7340 [80:46<166:09, 29.7 steps/min]\u001b[92m16:47:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:47:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:47:05,490 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 16:47:05,491 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 465, 'y': 294})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 465, 'y': 294})\n", + " 33%|█████████████---------------------------| 2402/7340 [80:47<166:05, 29.7 steps/min]\u001b[92m16:47:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:06,659 - agent.ComputerAgent - INFO - Computer: click({'x': 637, 'y': 471})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 637, 'y': 471})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3e4ea7d7-21a2-4b07-abd4-a3e280e44e0b/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:47:07,327 - agent.ComputerAgent - INFO - LLM processing started with 31 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 31 messages\n", + "\u001b[92m16:47:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 33%|█████████████---------------------------| 2403/7340 [80:49<166:02, 29.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:47:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:08,385 - agent.ComputerAgent - INFO - Computer: click({'x': 111, 'y': 270})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 111, 'y': 270})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6195bb79-4eff-4d3b-8b67-f28a4e6a73fa/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/475030e1-7ae3-4ced-91fb-2221d956a2ad/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2404/7340 [80:50<165:58, 29.7 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:09,003 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m16:47:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6fcb07bb-6857-4888-82a0-1fd0dbf2d722/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:47:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:47:09,698 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 530, 'scroll_x': 0, 'x': 574, 'y': 736})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 530, 'scroll_x': 0, 'x': 574, 'y': 736})\n", + "2025-08-11 16:47:10,366 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m16:47:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 33%|█████████████---------------------------| 2406/7340 [80:52<165:50, 29.8 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:47:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:47:11,392 - agent.ComputerAgent - INFO - Computer: click({'x': 1008, 'y': 164})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1008, 'y': 164})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d4054e85-5304-43a3-b6d7-128e302780cb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/81b23870-39ed-4649-9729-1d4809f713ec/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2407/7340 [80:53<165:46, 29.8 steps/min]2025-08-11 16:47:12,011 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:47:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:47:12,685 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m16:47:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3e4ea7d7-21a2-4b07-abd4-a3e280e44e0b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:47:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 33%|█████████████---------------------------| 2408/7340 [80:55<165:44, 29.8 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:47:14,028 - agent.ComputerAgent - INFO - LLM processing started with 33 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 33 messages\n", + "\u001b[92m16:47:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7e0b0038-3a97-4d93-8c5c-154cc0b95af9/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1473c3f2-39e1-4aff-8d55-0e23dc25a055/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:14,693 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:47:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2408/7340 [80:57<165:48, 29.7 steps/min]\u001b[92m16:47:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:47:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a46ee6f6-d167-47c4-ad83-e16b88450253/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:16,409 - agent.ComputerAgent - INFO - Computer: click({'x': 237, 'y': 75})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 237, 'y': 75})\n", + "2025-08-11 16:47:17,070 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m16:47:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:47:18,388 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "\u001b[92m16:47:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/029e3386-8f59-44a0-9dba-b14a4d9825ef/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2409/7340 [81:00<165:48, 29.7 steps/min]2025-08-11 16:47:19,055 - agent.ComputerAgent - INFO - Computer: click({'x': 458, 'y': 275})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 458, 'y': 275})\n", + "2025-08-11 16:47:19,739 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m16:47:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 33%|█████████████---------------------------| 2410/7340 [81:01<165:44, 29.7 steps/min]2025-08-11 16:47:20,387 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m16:47:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:47:21,068 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m16:47:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 33%|█████████████---------------------------| 2411/7340 [81:02<165:41, 29.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:47:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ff8b808f-c3a6-4979-8f9a-c6a25905116c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3e4ea7d7-21a2-4b07-abd4-a3e280e44e0b/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:22,362 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 35 messages\n", + "\u001b[92m16:47:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:47:23,717 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + " 33%|█████████████---------------------------| 2411/7340 [81:05<165:46, 29.7 steps/min]\u001b[92m16:47:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:47:24,730 - agent.ComputerAgent - INFO - Computer: double_click({'x': 331, 'y': 111})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 331, 'y': 111})\n", + " 33%|█████████████---------------------------| 2412/7340 [81:06<165:42, 29.7 steps/min]2025-08-11 16:47:25,410 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m16:47:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/e2ffab0a-c998-4bbf-906b-d3aad0586220/reset \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2c254802-788e-4b4b-98dc-68cd2c6bcce4/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2413/7340 [81:07<165:38, 29.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0803e2c2-9de2-40ff-93da-cb49f156cbba/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:47:26,610 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m16:47:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:47:27,269 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m16:47:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 33%|█████████████---------------------------| 2413/7340 [81:09<165:43, 29.7 steps/min]\u001b[92m16:47:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e2ffab0a-c998-4bbf-906b-d3aad0586220/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "\u001b[92m16:47:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:29,307 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m16:47:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 33%|█████████████---------------------------| 2414/7340 [81:11<165:39, 29.7 steps/min]\u001b[92m16:47:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:30,494 - agent.ComputerAgent - INFO - Computer: click({'x': 946, 'y': 738})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 946, 'y': 738})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/475030e1-7ae3-4ced-91fb-2221d956a2ad/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2414/7340 [81:12<165:42, 29.7 steps/min]2025-08-11 16:47:31,166 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m16:47:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3e4ea7d7-21a2-4b07-abd4-a3e280e44e0b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:47:31,814 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 37 messages\n", + "\u001b[92m16:47:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:47:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6fcb07bb-6857-4888-82a0-1fd0dbf2d722/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2415/7340 [81:13<165:39, 29.7 steps/min]2025-08-11 16:47:32,487 - agent.ComputerAgent - INFO - Computer: click({'x': 351, 'y': 294})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 351, 'y': 294})\n", + "2025-08-11 16:47:33,177 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m16:47:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 33%|█████████████---------------------------| 2415/7340 [81:14<165:41, 29.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 33%|█████████████---------------------------| 2416/7340 [81:15<165:37, 29.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2416/7340 [81:16<165:39, 29.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e5b48f23-7687-494c-b68c-ebdfc70d085f/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2416/7340 [81:17<165:41, 29.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:47:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 33%|█████████████---------------------------| 2417/7340 [81:19<165:37, 29.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3e4ea7d7-21a2-4b07-abd4-a3e280e44e0b/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:38,035 - agent.ComputerAgent - INFO - LLM processing started with 39 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 39 messages\n", + "\u001b[92m16:47:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6195bb79-4eff-4d3b-8b67-f28a4e6a73fa/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/e5b48f23-7687-494c-b68c-ebdfc70d085f/reset \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c3518cd0-0df6-44e9-8393-0c62002bc984/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/81b23870-39ed-4649-9729-1d4809f713ec/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:47:38,680 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m16:47:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 33%|█████████████---------------------------| 2417/7340 [81:20<165:40, 29.7 steps/min]2025-08-11 16:47:39,336 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m16:47:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:47:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:47:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:47:41,078 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 16:47:41,079 - agent.ComputerAgent - INFO - Computer: click({'x': 92, 'y': 359})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 92, 'y': 359})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2417/7340 [81:23<165:46, 29.7 steps/min]\u001b[92m16:47:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 33%|█████████████---------------------------| 2418/7340 [81:24<165:42, 29.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:47:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:47:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:47:44,087 - agent.ComputerAgent - INFO - Computer: click({'x': 982, 'y': 760})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 982, 'y': 760})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e5b48f23-7687-494c-b68c-ebdfc70d085f/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 33%|█████████████---------------------------| 2419/7340 [81:25<165:39, 29.7 steps/min]2025-08-11 16:47:44,730 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:47:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:47:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:47:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:47:46,038 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 654, 'scroll_x': 0, 'x': 283, 'y': 664})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 654, 'scroll_x': 0, 'x': 283, 'y': 664})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 33%|█████████████---------------------------| 2420/7340 [81:28<165:38, 29.7 steps/min]\u001b[92m16:47:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:47:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e2ffab0a-c998-4bbf-906b-d3aad0586220/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:47:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:47,983 - agent.ComputerAgent - INFO - Computer: click({'x': 585, 'y': 355})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 585, 'y': 355})\n", + "\u001b[92m16:47:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 33%|█████████████---------------------------| 2421/7340 [81:29<165:34, 29.7 steps/min]2025-08-11 16:47:48,672 - agent.ComputerAgent - INFO - Computer: click({'x': 962, 'y': 234})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 962, 'y': 234})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:49,341 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:47:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:47:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 33%|█████████████---------------------------| 2422/7340 [81:31<165:31, 29.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:47:50,011 - agent.ComputerAgent - INFO - Computer: click({'x': 392, 'y': 275})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 392, 'y': 275})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:47:51,361 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 16:47:51,362 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'super'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'super'})\n", + " 33%|█████████████---------------------------| 2423/7340 [81:33<165:29, 29.7 steps/min]\u001b[92m16:47:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3e4ea7d7-21a2-4b07-abd4-a3e280e44e0b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:47:52,058 - agent.ComputerAgent - INFO - LLM processing started with 41 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 41 messages\n", + "\u001b[92m16:47:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:47:52,742 - agent.ComputerAgent - INFO - Computer: click({'x': 196, 'y': 237})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 196, 'y': 237})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:47:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 33%|█████████████---------------------------| 2425/7340 [81:35<165:21, 29.7 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:47:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 33%|█████████████---------------------------| 2426/7340 [81:36<165:17, 29.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7e0b0038-3a97-4d93-8c5c-154cc0b95af9/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/029e3386-8f59-44a0-9dba-b14a4d9825ef/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:47:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:47:55,312 - agent.ComputerAgent - INFO - Computer: click({'x': 351, 'y': 294})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 351, 'y': 294})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a39ee9df-d3ba-456a-95cf-3a11a826583b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "2025-08-11 16:47:55,965 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m16:47:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:47:57,620 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d4054e85-5304-43a3-b6d7-128e302780cb/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2427/7340 [81:39<165:17, 29.7 steps/min]\u001b[92m16:47:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:47:58,279 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m16:47:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:47:58,928 - agent.ComputerAgent - INFO - Computer: click({'x': 316, 'y': 101})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 316, 'y': 101})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0803e2c2-9de2-40ff-93da-cb49f156cbba/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2429/7340 [81:40<165:08, 29.7 steps/min]2025-08-11 16:47:59,585 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m16:47:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:48:00,942 - agent.ComputerAgent - INFO - Computer: type({'text': ':q'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': ':q'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:48:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:48:02,299 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m16:48:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6fcb07bb-6857-4888-82a0-1fd0dbf2d722/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e5b48f23-7687-494c-b68c-ebdfc70d085f/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2430/7340 [81:44<165:09, 29.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:48:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 33%|█████████████---------------------------| 2431/7340 [81:45<165:04, 29.7 steps/min]2025-08-11 16:48:03,692 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m16:48:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3e4ea7d7-21a2-4b07-abd4-a3e280e44e0b/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:48:04,331 - agent.ComputerAgent - INFO - LLM processing started with 43 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 43 messages\n", + "\u001b[92m16:48:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 33%|█████████████---------------------------| 2431/7340 [81:46<165:07, 29.7 steps/min]\u001b[92m16:48:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:48:04,993 - agent.ComputerAgent - INFO - Computer: click({'x': 458, 'y': 422})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 458, 'y': 422})\n", + "2025-08-11 16:48:05,658 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:48:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d4054e85-5304-43a3-b6d7-128e302780cb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:48:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 33%|█████████████---------------------------| 2431/7340 [81:47<165:09, 29.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:48:07,064 - agent.ComputerAgent - INFO - Computer: click({'x': 474, 'y': 332})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 474, 'y': 332})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/81b23870-39ed-4649-9729-1d4809f713ec/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2436/7340 [81:48<164:42, 29.8 steps/min]2025-08-11 16:48:07,712 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m16:48:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/475030e1-7ae3-4ced-91fb-2221d956a2ad/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:48:08,382 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m16:48:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a46ee6f6-d167-47c4-ad83-e16b88450253/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d4054e85-5304-43a3-b6d7-128e302780cb/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 33%|█████████████---------------------------| 2438/7340 [81:50<164:32, 29.8 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6195bb79-4eff-4d3b-8b67-f28a4e6a73fa/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:48:09,668 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m16:48:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2438/7340 [81:51<164:35, 29.8 steps/min]2025-08-11 16:48:10,320 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m16:48:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3e4ea7d7-21a2-4b07-abd4-a3e280e44e0b/invoke \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2438/7340 [81:52<164:37, 29.8 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/1473c3f2-39e1-4aff-8d55-0e23dc25a055/reset \"HTTP/1.1 200 OK\"\n", + " 33%|█████████████---------------------------| 2438/7340 [81:53<164:39, 29.8 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ff8b808f-c3a6-4979-8f9a-c6a25905116c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e2ffab0a-c998-4bbf-906b-d3aad0586220/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:48:12,541 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m16:48:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:48:13,223 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m16:48:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 33%|█████████████---------------------------| 2438/7340 [81:54<164:42, 29.8 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1473c3f2-39e1-4aff-8d55-0e23dc25a055/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:48:14,391 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m16:48:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 33%|█████████████---------------------------| 2438/7340 [81:56<164:44, 29.8 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3e4ea7d7-21a2-4b07-abd4-a3e280e44e0b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:48:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 33%|█████████████---------------------------| 2440/7340 [81:57<164:35, 29.8 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3e4ea7d7-21a2-4b07-abd4-a3e280e44e0b/close \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:005'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'WEEKDAY(B3;2)>5'})\n", + " 34%|█████████████---------------------------| 2477/7340 [83:44<164:25, 29.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:50:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6195bb79-4eff-4d3b-8b67-f28a4e6a73fa/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/422a20c8-b318-46e4-9f06-d599c9ed261c/reset \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:50:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:50:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 34%|█████████████---------------------------| 2479/7340 [83:46<164:15, 29.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:50:05,091 - agent.ComputerAgent - INFO - Computer: click({'x': 351, 'y': 294})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 351, 'y': 294})\n", + "\u001b[92m16:50:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:50:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:05,754 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m16:50:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 34%|█████████████---------------------------| 2479/7340 [83:48<164:19, 29.6 steps/min]\u001b[92m16:50:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:50:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:50:07,118 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 471, 'y': 328}, {'x': 351, 'y': 709}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 471, 'y': 328}, {'x': 351, 'y': 709}]})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:50:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:50:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/422a20c8-b318-46e4-9f06-d599c9ed261c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:08,416 - agent.ComputerAgent - INFO - Computer: click({'x': 268, 'y': 188})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 268, 'y': 188})\n", + "2025-08-11 16:50:09,045 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m16:50:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 34%|█████████████---------------------------| 2480/7340 [83:50<164:18, 29.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:50:09,730 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m16:50:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:50:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0803e2c2-9de2-40ff-93da-cb49f156cbba/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:10,446 - agent.ComputerAgent - INFO - Computer: double_click({'x': 618, 'y': 483})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 618, 'y': 483})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e5b48f23-7687-494c-b68c-ebdfc70d085f/invoke \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2482/7340 [83:52<164:09, 29.6 steps/min]2025-08-11 16:50:11,451 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m16:50:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f73836c4-d8e3-425b-a750-f2319c89164e/invoke \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2483/7340 [83:53<164:05, 29.6 steps/min]2025-08-11 16:50:12,123 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m16:50:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:50:13,563 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m16:50:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 34%|█████████████---------------------------| 2483/7340 [83:55<164:09, 29.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/514e0362-c0b3-4216-989f-d260ec405efb/reset \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2483/7340 [83:56<164:11, 29.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/81b23870-39ed-4649-9729-1d4809f713ec/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:50:15,740 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:50:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 34%|█████████████---------------------------| 2483/7340 [83:57<164:13, 29.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:50:17,970 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 16:50:17,970 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'win+e'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'win+e'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/514e0362-c0b3-4216-989f-d260ec405efb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a39ee9df-d3ba-456a-95cf-3a11a826583b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1473c3f2-39e1-4aff-8d55-0e23dc25a055/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e2ffab0a-c998-4bbf-906b-d3aad0586220/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2483/7340 [84:00<164:19, 29.6 steps/min]\u001b[92m16:50:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:50:19,965 - agent.ComputerAgent - INFO - Computer: type({'text': 'Orchis theme gnome-look'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'Orchis theme gnome-look'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:50:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:21,285 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:50:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:50:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 34%|█████████████---------------------------| 2483/7340 [84:03<164:25, 29.5 steps/min]\u001b[92m16:50:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:50:22,607 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:50:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:50:23,283 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 626, 'scroll_x': 0, 'x': 588, 'y': 446})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 626, 'scroll_x': 0, 'x': 588, 'y': 446})\n", + "\u001b[92m16:50:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/daac505f-9423-4b29-b11c-9b23c5c9e3ee/reset \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:50:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:23,909 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 16:50:23,909 - agent.ComputerAgent - INFO - Computer: double_click({'x': 989, 'y': 713})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 989, 'y': 713})\n", + " 34%|█████████████---------------------------| 2484/7340 [84:05<164:23, 29.5 steps/min]2025-08-11 16:50:24,578 - agent.ComputerAgent - INFO - Computer: click({'x': 412, 'y': 128})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 412, 'y': 128})\n", + "2025-08-11 16:50:25,256 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m16:50:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 34%|█████████████---------------------------| 2486/7340 [84:07<164:14, 29.6 steps/min]2025-08-11 16:50:25,924 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m16:50:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:50:26,594 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m16:50:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2487/7340 [84:09<164:12, 29.6 steps/min]\u001b[92m16:50:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:50:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:28,422 - agent.ComputerAgent - INFO - Computer: double_click({'x': 960, 'y': 713})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 960, 'y': 713})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:50:29,722 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'win'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'win'})\n", + " 34%|█████████████---------------------------| 2487/7340 [84:11<164:17, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/daac505f-9423-4b29-b11c-9b23c5c9e3ee/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:50:30,365 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m16:50:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fed9747f-6005-4d29-b83e-afc7934c0ff5/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:50:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e5b48f23-7687-494c-b68c-ebdfc70d085f/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2489/7340 [84:13<164:09, 29.6 steps/min]\u001b[92m16:50:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2b43eb21-4025-495a-8c66-358bfcac034b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:50:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/029e3386-8f59-44a0-9dba-b14a4d9825ef/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:50:33,742 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'esc'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'esc'})\n", + "\u001b[92m16:50:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:50:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 34%|█████████████---------------------------| 2489/7340 [84:16<164:14, 29.5 steps/min]\u001b[92m16:50:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:35,048 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:50:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:50:35,696 - agent.ComputerAgent - INFO - Computer: move({'x': 887, 'y': 167})\n", + "INFO:agent.ComputerAgent:Computer: move({'x': 887, 'y': 167})\n", + "2025-08-11 16:50:36,379 - agent.ComputerAgent - INFO - Computer: click({'x': 260, 'y': 101})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 260, 'y': 101})\n", + "\u001b[92m16:50:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:50:37,041 - agent.ComputerAgent - INFO - Computer: click({'x': 537, 'y': 304})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 537, 'y': 304})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:50:38,373 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'right'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'right'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f73836c4-d8e3-425b-a750-f2319c89164e/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/422a20c8-b318-46e4-9f06-d599c9ed261c/invoke \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2490/7340 [84:20<164:16, 29.5 steps/min]\u001b[92m16:50:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:39,696 - agent.ComputerAgent - INFO - Computer: type({'text': 'Mumbai'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'Mumbai'})\n", + "2025-08-11 16:50:40,392 - agent.ComputerAgent - INFO - Computer: click({'x': 746, 'y': 651})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 746, 'y': 651})\n", + " 34%|█████████████---------------------------| 2494/7340 [84:22<163:56, 29.6 steps/min]2025-08-11 16:50:41,039 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m16:50:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a46ee6f6-d167-47c4-ad83-e16b88450253/invoke \"HTTP/1.1 502 Bad Gateway\"\n", + "2025-08-11 16:50:41,701 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m16:50:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:50:42,355 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:50:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 34%|█████████████---------------------------| 2496/7340 [84:24<163:47, 29.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:50:43,043 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m16:50:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 34%|█████████████---------------------------| 2496/7340 [84:25<163:49, 29.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:50:45,382 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+home'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+home'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0803e2c2-9de2-40ff-93da-cb49f156cbba/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1473c3f2-39e1-4aff-8d55-0e23dc25a055/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ff8b808f-c3a6-4979-8f9a-c6a25905116c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/81b23870-39ed-4649-9729-1d4809f713ec/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e2ffab0a-c998-4bbf-906b-d3aad0586220/invoke \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2496/7340 [84:27<163:53, 29.6 steps/min]2025-08-11 16:50:46,040 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:50:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:50:46,708 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m16:50:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:50:47,380 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m16:50:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 34%|█████████████---------------------------| 2496/7340 [84:29<163:57, 29.5 steps/min]2025-08-11 16:50:48,016 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m16:50:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:50:49,339 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 16:50:49,340 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+f'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+f'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a46ee6f6-d167-47c4-ad83-e16b88450253/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6195bb79-4eff-4d3b-8b67-f28a4e6a73fa/invoke \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2496/7340 [84:31<164:01, 29.5 steps/min]2025-08-11 16:50:49,998 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m16:50:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:50:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:51,356 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:50:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:50:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 34%|█████████████---------------------------| 2496/7340 [84:34<164:08, 29.5 steps/min]\u001b[92m16:50:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:50:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m16:50:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:54,063 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 398, 'y': 89})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 398, 'y': 89})\n", + "\u001b[92m16:50:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:50:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:50:54,739 - agent.ComputerAgent - INFO - Computer: double_click({'x': 960, 'y': 713})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 960, 'y': 713})\n", + " 34%|█████████████---------------------------| 2496/7340 [84:36<164:11, 29.5 steps/min]2025-08-11 16:50:55,391 - agent.ComputerAgent - INFO - Computer: click({'x': 793, 'y': 41})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 793, 'y': 41})\n", + "\u001b[92m16:50:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:56,056 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m16:50:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:50:56,694 - agent.ComputerAgent - INFO - Computer: click({'x': 17, 'y': 427})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 17, 'y': 427})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:50:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 34%|█████████████---------------------------| 2498/7340 [84:39<164:06, 29.5 steps/min]\u001b[92m16:50:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:50:58,732 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m16:50:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:50:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:50:59,814 - agent.ComputerAgent - INFO - Computer: click({'x': 318, 'y': 237})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 318, 'y': 237})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:50:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:51:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:51:02,000 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'right'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'right'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/029e3386-8f59-44a0-9dba-b14a4d9825ef/invoke \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2500/7340 [84:43<164:02, 29.5 steps/min]2025-08-11 16:51:02,679 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 16:51:02,680 - agent.ComputerAgent - INFO - Computer: click({'x': 95, 'y': 185})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 95, 'y': 185})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:51:04,082 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+home'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+home'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:51:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:51:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 34%|█████████████---------------------------| 2502/7340 [84:46<163:55, 29.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:51:05,471 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m16:51:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m16:51:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m16:51:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 16:51:06,153 - agent.ComputerAgent - INFO - Computer: double_click({'x': 615, 'y': 483})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 615, 'y': 483})\n", + "\u001b[92m16:51:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 34%|█████████████---------------------------| 2503/7340 [84:47<163:52, 29.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 16:51:06,824 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 483, 'y': 328}, {'x': 411, 'y': 711}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 483, 'y': 328}, {'x': 411, 'y': 711}]})\n", + " 34%|█████████████---------------------------| 2504/7340 [84:48<163:48, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/029e3386-8f59-44a0-9dba-b14a4d9825ef/close \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2505/7340 [84:49<163:44, 29.5 steps/min]INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2505/7340 [84:50<163:46, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e5b48f23-7687-494c-b68c-ebdfc70d085f/invoke \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2505/7340 [84:51<163:48, 29.5 steps/min]2025-08-11 16:51:11,251 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m16:51:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2b43eb21-4025-495a-8c66-358bfcac034b/invoke \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2505/7340 [84:53<163:50, 29.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/daac505f-9423-4b29-b11c-9b23c5c9e3ee/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f73836c4-d8e3-425b-a750-f2319c89164e/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/81b23870-39ed-4649-9729-1d4809f713ec/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/422a20c8-b318-46e4-9f06-d599c9ed261c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1473c3f2-39e1-4aff-8d55-0e23dc25a055/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:51:11,946 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m16:51:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:51:12,584 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:51:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e2ffab0a-c998-4bbf-906b-d3aad0586220/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a39ee9df-d3ba-456a-95cf-3a11a826583b/invoke \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2505/7340 [84:54<163:52, 29.5 steps/min]2025-08-11 16:51:13,334 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m16:51:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:51:13,968 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m16:51:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 34%|█████████████---------------------------| 2505/7340 [84:55<163:55, 29.5 steps/min]2025-08-11 16:51:14,650 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m16:51:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:51:15,336 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m16:51:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 34%|█████████████---------------------------| 2505/7340 [84:57<163:58, 29.5 steps/min]2025-08-11 16:51:15,998 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m16:51:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 16:51:16,677 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m16:51:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 34%|█████████████---------------------------| 2505/7340 [84:58<164:00, 29.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 34%|█████████████---------------------------| 2505/7340 [84:59<164:02, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/c3518cd0-0df6-44e9-8393-0c62002bc984/reset \"HTTP/1.1 200 OK\"\n", + " 34%|█████████████---------------------------| 2505/7340 [85:00<164:04, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:51:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 16:51:20,733 - agent.ComputerAgent - INFO - Computer: type({'text': 'Stockholm'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'Stockholm'})\n", + " 34%|█████████████---------------------------| 2505/7340 [85:02<164:08, 29.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m16:51:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 Slide Pane to toggle it back on.\n", + "- You can resize it by dragging its right edge. If you lose the right-side Properties panel, use View > Sidebar.\n", + "\n", + "Task completed\n", + "INFO:agent.ComputerAgent:Agent: I restored the Slides panel on the left in your LibreOffice Impress.\n", + "\n", + "If it disappears again:\n", + "- Go to the top menu: View > Slide Pane to toggle it back on.\n", + "- You can resize it by dragging its right edge. If you lose the right-side Properties panel, use View > Sidebar.\n", + "\n", + "Task completed\n", + "2025-08-11 17:05:27,005 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 712\n", + " - prompt_tokens: 7355\n", + " - total_tokens: 8067\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 640\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0163\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 712\n", + " - prompt_tokens: 7355\n", + " - total_tokens: 8067\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 640\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0163\n", + " 38%|███████████████-------------------------| 2809/7340 [99:08<159:55, 28.3 steps/min]\u001b[92m17:05:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f141a0f-f4b0-4f99-b4c4-5217b268c96b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:05:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/025be48d-d757-4973-8c17-e42b8f6814b0/reset \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d2c8050e-87aa-487a-9555-884be2298ade/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:05:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:05:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:05:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:05:30,349 - agent.ComputerAgent - INFO - Computer: click({'x': 87, 'y': 274})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 87, 'y': 274})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9b6fc8c3-534a-4e7d-9a9b-4c6bad0e0619/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 Appearance > Mode = Light\n", + "\n", + "This forces Chrome to use a light theme regardless of your device theme.\n", + "\n", + "If any websites still appear dark, it could be due to:\n", + "- A dark-mode extension (e.g., Dark Reader). Disable it under Extensions.\n", + "- The experimental flag “Auto Dark Mode for Web Contents” being enabled. Visit chrome://flags and ensure it’s Disabled.\n", + "\n", + "Task completed.\n", + "INFO:agent.ComputerAgent:Agent: I’ve turned off Dark Mode in Google Chrome by setting the browser’s appearance to Light:\n", + "- Settings > Appearance > Mode = Light\n", + "\n", + "This forces Chrome to use a light theme regardless of your device theme.\n", + "\n", + "If any websites still appear dark, it could be due to:\n", + "- A dark-mode extension (e.g., Dark Reader). Disable it under Extensions.\n", + "- The experimental flag “Auto Dark Mode for Web Contents” being enabled. Visit chrome://flags and ensure it’s Disabled.\n", + "\n", + "Task completed.\n", + "2025-08-11 17:10:01,595 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 876\n", + " - prompt_tokens: 8770\n", + " - total_tokens: 9646\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 768\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0197\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 876\n", + " - prompt_tokens: 8770\n", + " - total_tokens: 9646\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 768\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0197\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:10:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:10:02,937 - agent.ComputerAgent - INFO - Computer: click({'x': 400, 'y': 77})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 400, 'y': 77})\n", + "2025-08-11 17:10:03,606 - agent.ComputerAgent - INFO - Computer: double_click({'x': 476, 'y': 392})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 476, 'y': 392})\n", + "\u001b[92m17:10:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:10:04,225 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + " 40%|████████████████------------------------| 2945/7340 [103:45<154:51, 28.4 steps/min]\u001b[92m17:10:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:10:04,897 - agent.ComputerAgent - INFO - Computer: click({'x': 986, 'y': 578})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 986, 'y': 578})\n", + "\u001b[92m17:10:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:10:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 40%|████████████████------------------------| 2948/7340 [103:46<154:37, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:10:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:10:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:10:06,706 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 143, 'y': 188}, {'x': 147, 'y': 229}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 143, 'y': 188}, {'x': 147, 'y': 229}]})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:10:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:10:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f141a0f-f4b0-4f99-b4c4-5217b268c96b/invoke \"HTTP/1.1 200 OK\"\n", + " 40%|████████████████------------------------| 2949/7340 [103:49<154:35, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:10:08,028 - agent.ComputerAgent - INFO - Computer: double_click({'x': 730, 'y': 648})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 730, 'y': 648})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 40%|████████████████------------------------| 2950/7340 [103:50<154:31, 28.4 steps/min]\u001b[92m17:10:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:10:09,210 - agent.ComputerAgent - INFO - Computer: click({'x': 982, 'y': 167})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 982, 'y': 167})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f141a0f-f4b0-4f99-b4c4-5217b268c96b/invoke \"HTTP/1.1 200 OK\"\n", + " 40%|████████████████------------------------| 2960/7340 [103:51<153:40, 28.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/091ec079-295e-4528-bad5-f34604d013c2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/085317e9-3b47-437e-8528-0a0fc0e6e688/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:10:10,343 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m17:10:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9b6fc8c3-534a-4e7d-9a9b-4c6bad0e0619/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9f1aa688-383a-44cc-9959-87e1e32dd55c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f141a0f-f4b0-4f99-b4c4-5217b268c96b/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/55d96e84-fdd8-4854-8eca-195f1a84dbc3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/aa0172ad-f4a9-4f1a-9e06-2d510775dbd0/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1f48e361-2592-41ee-8818-d6e9174fe800/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:10:11,005 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m17:10:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/797f1798-0199-4d66-a503-1c5a8d488911/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:10:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/88706cb5-896e-4bf5-8b52-5df252945e00/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 40%|████████████████------------------------| 2961/7340 [103:54<153:39, 28.5 steps/min]2025-08-11 17:10:12,983 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m17:10:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:10:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ecc77db9-1735-4a1a-ab51-39c4517489fb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6b818afb-eeae-43f9-b1ff-7f82844997e2/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 Options > LibreOffice Impress > General: unchecked “Enable Presenter Console,” then applied and saved.\n", + "\n", + "How to choose which single monitor shows the slideshow (do this once per machine/setup):\n", + "- Slide Show > Slide Show Settings…\n", + "- Under Multiple displays, set “Presentation display” to the monitor you want (Display 1 or Display 2) and keep Presentation mode as “Full screen.”\n", + "- Click OK.\n", + "\n", + "Result:\n", + "- When you start the slideshow, it will fill only the selected monitor.\n", + "- The second monitor will remain free for other work, and the “Current/Next slide” presenter view will no longer appear.\n", + "\n", + "Task completed.\n", + "INFO:agent.ComputerAgent:Agent: I configured LibreOffice Impress to use only one monitor for the slideshow and to disable the Presenter Console.\n", + "\n", + "What I changed:\n", + "- Tools > Options > LibreOffice Impress > General: unchecked “Enable Presenter Console,” then applied and saved.\n", + "\n", + "How to choose which single monitor shows the slideshow (do this once per machine/setup):\n", + "- Slide Show > Slide Show Settings…\n", + "- Under Multiple displays, set “Presentation display” to the monitor you want (Display 1 or Display 2) and keep Presentation mode as “Full screen.”\n", + "- Click OK.\n", + "\n", + "Result:\n", + "- When you start the slideshow, it will fill only the selected monitor.\n", + "- The second monitor will remain free for other work, and the “Current/Next slide” presenter view will no longer appear.\n", + "\n", + "Task completed.\n", + "2025-08-11 17:13:46,540 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 1315\n", + " - prompt_tokens: 13574\n", + " - total_tokens: 14889\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 1152\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 9472\n", + " - response_cost: $0.0195\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 1315\n", + " - prompt_tokens: 13574\n", + " - total_tokens: 14889\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 1152\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 9472\n", + " - response_cost: $0.0195\n", + " 42%|████████████████------------------------| 3054/7340 [107:28<150:49, 28.4 steps/min]2025-08-11 17:13:47,229 - agent.ComputerAgent - INFO - Computer: click({'x': 969, 'y': 218})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 969, 'y': 218})\n", + "2025-08-11 17:13:47,886 - agent.ComputerAgent - INFO - Computer: double_click({'x': 205, 'y': 214})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 205, 'y': 214})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:13:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:13:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:13:50,419 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+alt+t'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+alt+t'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3055/7340 [107:32<150:50, 28.4 steps/min]\u001b[92m17:13:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:13:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:13:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:13:51,700 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m17:13:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:13:52,387 - agent.ComputerAgent - INFO - Computer: click({'x': 954, 'y': 232})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 954, 'y': 232})\n", + "2025-08-11 17:13:53,035 - agent.ComputerAgent - INFO - Computer: click({'x': 589, 'y': 143})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 589, 'y': 143})\n", + "\u001b[92m17:13:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3057/7340 [107:34<150:43, 28.4 steps/min]2025-08-11 17:13:53,673 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 660, 'scroll_x': 0, 'x': 658, 'y': 467})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 660, 'scroll_x': 0, 'x': 658, 'y': 467})\n", + " 42%|████████████████------------------------| 3059/7340 [107:35<150:34, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1f48e361-2592-41ee-8818-d6e9174fe800/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9b6fc8c3-534a-4e7d-9a9b-4c6bad0e0619/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:13:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3060/7340 [107:37<150:31, 28.4 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:13:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:13:56,551 - agent.ComputerAgent - INFO - Computer: click({'x': 660, 'y': 104})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 660, 'y': 104})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1f48e361-2592-41ee-8818-d6e9174fe800/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3060/7340 [107:38<150:33, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1f48e361-2592-41ee-8818-d6e9174fe800/close \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3063/7340 [107:39<150:19, 28.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/025be48d-d757-4973-8c17-e42b8f6814b0/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/afb4e623-39bf-4f23-ac18-6c4a71f53c62/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:13:58,853 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m17:13:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/085317e9-3b47-437e-8528-0a0fc0e6e688/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9f1aa688-383a-44cc-9959-87e1e32dd55c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6bacb467-6eb5-4ead-ac71-a185d2fa5e80/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/55d96e84-fdd8-4854-8eca-195f1a84dbc3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3063/7340 [107:40<150:21, 28.4 steps/min]2025-08-11 17:13:59,510 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m17:13:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:00,189 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m17:14:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:00,831 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m17:14:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/aa0172ad-f4a9-4f1a-9e06-2d510775dbd0/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3063/7340 [107:42<150:23, 28.4 steps/min]2025-08-11 17:14:01,477 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m17:14:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:02,169 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m17:14:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f0190121-650c-4779-b26d-2480f313dc84/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3063/7340 [107:43<150:25, 28.4 steps/min]2025-08-11 17:14:02,841 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m17:14:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:03,496 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m17:14:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3063/7340 [107:45<150:27, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:14:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:14:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:14:06,224 - agent.ComputerAgent - INFO - Computer: type({'text': 'conda create -n hf python=3.11 --override-channels -c conda-forge -y\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'conda create -n hf python=3.11 --override-channels -c conda-forge -y\\n'})\n", + " 42%|████████████████------------------------| 3064/7340 [107:53<150:33, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/091ec079-295e-4528-bad5-f34604d013c2/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 50%|█████ | 2/4 [00:03<00:03, 1.58s/it]2025-08-11 17:14:12,505 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m17:14:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3064/7340 [107:55<150:36, 28.4 steps/min]\u001b[92m17:14:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.32s/it]\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:14:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:14:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:14:15,999 - agent.ComputerAgent - INFO - Computer: type({'text': 'spider-man toys kids'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'spider-man toys kids'})\n", + " 42%|████████████████------------------------| 3064/7340 [107:57<150:40, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:14:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:14:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:14:17,286 - agent.ComputerAgent - INFO - Computer: click({'button': 'right', 'x': 512, 'y': 384})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'right', 'x': 512, 'y': 384})\n", + "\u001b[92m17:14:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:14:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:14:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:14:17,916 - agent.ComputerAgent - INFO - Computer: click({'x': 175, 'y': 183})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 175, 'y': 183})\n", + " 42%|████████████████------------------------| 3065/7340 [107:59<150:37, 28.4 steps/min]2025-08-11 17:14:18,581 - agent.ComputerAgent - INFO - Computer: click({'x': 730, 'y': 275})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 730, 'y': 275})\n", + "2025-08-11 17:14:19,257 - agent.ComputerAgent - INFO - Computer: click({'x': 125, 'y': 182})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 125, 'y': 182})\n", + "\u001b[92m17:14:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:14:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:14:19,937 - agent.ComputerAgent - INFO - Computer: click({'x': 184, 'y': 178})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 184, 'y': 178})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:14:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3067/7340 [108:02<150:31, 28.4 steps/min]2025-08-11 17:14:21,216 - agent.ComputerAgent - INFO - Computer: double_click({'x': 757, 'y': 644})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 757, 'y': 644})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:14:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:14:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:14:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3070/7340 [108:04<150:19, 28.4 steps/min]\u001b[92m17:14:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:14:23,849 - agent.ComputerAgent - INFO - Computer: click({'x': 318, 'y': 59})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 318, 'y': 59})\n", + "\u001b[92m17:14:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:14:25,159 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + "2025-08-11 17:14:25,792 - agent.ComputerAgent - INFO - Computer: click({'x': 910, 'y': 254})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 910, 'y': 254})\n", + "\u001b[92m17:14:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:14:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/094ee49d-29b5-4911-bfc8-7d0e73a55c44/reset \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3071/7340 [108:07<150:18, 28.4 steps/min]2025-08-11 17:14:26,480 - agent.ComputerAgent - INFO - Computer: click({'x': 652, 'y': 178})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 652, 'y': 178})\n", + "2025-08-11 17:14:27,163 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 654, 'scroll_x': 0, 'x': 654, 'y': 467})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 654, 'scroll_x': 0, 'x': 654, 'y': 467})\n", + " 42%|████████████████------------------------| 3076/7340 [108:09<149:56, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9b6fc8c3-534a-4e7d-9a9b-4c6bad0e0619/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3078/7340 [108:11<149:49, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/reset \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9b6fc8c3-534a-4e7d-9a9b-4c6bad0e0619/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/094ee49d-29b5-4911-bfc8-7d0e73a55c44/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3078/7340 [108:13<149:50, 28.4 steps/min]2025-08-11 17:14:32,090 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m17:14:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/55d96e84-fdd8-4854-8eca-195f1a84dbc3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/025be48d-d757-4973-8c17-e42b8f6814b0/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/085317e9-3b47-437e-8528-0a0fc0e6e688/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/4b18a76d-ef46-4622-9643-9ee6fe4900a3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/afb4e623-39bf-4f23-ac18-6c4a71f53c62/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6bacb467-6eb5-4ead-ac71-a185d2fa5e80/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/055e9f8b-8c01-4732-8b5f-ef4fc732f122/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/35bb6fb7-5b34-473c-a541-13215a694bc6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f0190121-650c-4779-b26d-2480f313dc84/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:14:32,771 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m17:14:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9f1aa688-383a-44cc-9959-87e1e32dd55c/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3078/7340 [108:14<149:52, 28.4 steps/min]2025-08-11 17:14:33,404 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m17:14:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:34,070 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m17:14:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:34,696 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m17:14:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:35,372 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m17:14:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:36,032 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m17:14:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/091ec079-295e-4528-bad5-f34604d013c2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/96765d66-53fb-41dd-99b6-cd96984e52b3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/aa0172ad-f4a9-4f1a-9e06-2d510775dbd0/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3078/7340 [108:17<149:57, 28.4 steps/min]2025-08-11 17:14:37,022 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m17:14:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:37,680 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m17:14:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3078/7340 [108:19<149:59, 28.4 steps/min]2025-08-11 17:14:38,337 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m17:14:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:39,001 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m17:14:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:39,701 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m17:14:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3078/7340 [108:21<150:02, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:14:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Loading checkpoint shards: 25%|██▌ | 1/4 [00:01<00:04, 1.63s/it] 28.4 steps/min]2025-08-11 17:14:42,801 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m17:14:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3078/7340 [108:24<150:06, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:14:43,842 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m17:14:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3078/7340 [108:25<150:08, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.33s/it] 28.4 steps/min]\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:14:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3078/7340 [108:28<150:12, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 42%|████████████████------------------------| 3078/7340 [108:29<150:14, 28.4 steps/min]\u001b[92m17:14:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:14:48,883 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 17:14:48,885 - agent.ComputerAgent - INFO - Computer: click({'x': 314, 'y': 121})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 314, 'y': 121})\n", + "\u001b[92m17:14:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:14:49,521 - agent.ComputerAgent - INFO - Computer: double_click({'x': 193, 'y': 178})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 193, 'y': 178})\n", + " 42%|████████████████------------------------| 3080/7340 [108:32<150:07, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3080/7340 [108:33<150:08, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d351b561-0537-4e9c-84fc-8e1905f2f2c8/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3080/7340 [108:34<150:09, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:14:53,931 - agent.ComputerAgent - INFO - Computer: type({'text': 'Paper Recommendation'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'Paper Recommendation'})\n", + " 42%|████████████████------------------------| 3080/7340 [108:35<150:11, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/085317e9-3b47-437e-8528-0a0fc0e6e688/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/094ee49d-29b5-4911-bfc8-7d0e73a55c44/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:14:55,591 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m17:14:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3081/7340 [108:37<150:09, 28.4 steps/min]2025-08-11 17:14:56,259 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m17:14:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:14:57,587 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3081/7340 [108:39<150:12, 28.4 steps/min]\u001b[92m17:14:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:14:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:14:59,416 - agent.ComputerAgent - INFO - Computer: click({'x': 107, 'y': 33})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 107, 'y': 33})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:15:00,701 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3082/7340 [108:43<150:12, 28.3 steps/min]\u001b[92m17:15:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:15:02,638 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+f'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+f'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9f1aa688-383a-44cc-9959-87e1e32dd55c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 42%|████████████████------------------------| 3084/7340 [108:45<150:04, 28.4 steps/min]\u001b[92m17:15:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:15:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:04,289 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m17:15:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:15:04,939 - agent.ComputerAgent - INFO - Computer: click({'x': 572, 'y': 551})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 572, 'y': 551})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/091ec079-295e-4528-bad5-f34604d013c2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3084/7340 [108:47<150:07, 28.3 steps/min]\u001b[92m17:15:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:15:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:15:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:06,301 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m17:15:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:15:07,327 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m17:15:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:15:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:15:08,631 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 17:15:08,632 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'win'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'win'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:15:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3085/7340 [108:51<150:07, 28.3 steps/min]2025-08-11 17:15:09,958 - agent.ComputerAgent - INFO - Computer: click({'x': 349, 'y': 207})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 349, 'y': 207})\n", + "\u001b[92m17:15:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:15:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:15:11,270 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 147, 'y': 581}, {'x': 147, 'y': 678}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 147, 'y': 581}, {'x': 147, 'y': 678}]})\n", + " 42%|████████████████------------------------| 3086/7340 [108:52<150:05, 28.3 steps/min]\u001b[92m17:15:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:15:11,906 - agent.ComputerAgent - INFO - Computer: click({'x': 880, 'y': 203})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 880, 'y': 203})\n", + "\u001b[92m17:15:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:12,582 - agent.ComputerAgent - INFO - Computer: click({'x': 918, 'y': 243})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 918, 'y': 243})\n", + " 42%|████████████████------------------------| 3090/7340 [108:55<149:48, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:15:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3090/7340 [108:56<149:50, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:15:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:15,903 - agent.ComputerAgent - INFO - Computer: click({'x': 894, 'y': 232})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 894, 'y': 232})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/afb4e623-39bf-4f23-ac18-6c4a71f53c62/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/55d96e84-fdd8-4854-8eca-195f1a84dbc3/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3090/7340 [108:57<149:51, 28.4 steps/min]2025-08-11 17:15:16,561 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m17:15:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:15:17,873 - agent.ComputerAgent - INFO - Computer: type({'text': 'conda activate hf\\nconda install -y -c conda-forge datasets\\npython -c \"import datasets, sys; print(\\'datasets version:\\', datasets.__version__)\"\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'conda activate hf\\nconda install -y -c conda-forge datasets\\npython -c \"import datasets, sys; print(\\'datasets version:\\', datasets.__version__)\"\\n'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/96765d66-53fb-41dd-99b6-cd96984e52b3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/35bb6fb7-5b34-473c-a541-13215a694bc6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/094ee49d-29b5-4911-bfc8-7d0e73a55c44/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:15:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:19,817 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+z'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+z'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6bacb467-6eb5-4ead-ac71-a185d2fa5e80/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f0190121-650c-4779-b26d-2480f313dc84/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3091/7340 [109:01<149:52, 28.4 steps/min]2025-08-11 17:15:20,461 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m17:15:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:15:21,839 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:15:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:22,509 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + " 42%|████████████████------------------------| 3092/7340 [109:04<149:50, 28.3 steps/min]\u001b[92m17:15:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:15:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:23,191 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m17:15:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:15:23,827 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m17:15:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:15:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3093/7340 [109:05<149:47, 28.4 steps/min]2025-08-11 17:15:24,498 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 194, 'y': 182}, {'x': 183, 'y': 294}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 194, 'y': 182}, {'x': 183, 'y': 294}]})\n", + "2025-08-11 17:15:25,826 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m17:15:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3093/7340 [109:08<149:51, 28.3 steps/min]\u001b[92m17:15:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:15:27,861 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + "\u001b[92m17:15:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3094/7340 [109:09<149:48, 28.3 steps/min]2025-08-11 17:15:28,558 - agent.ComputerAgent - INFO - Computer: click({'x': 205, 'y': 175})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 205, 'y': 175})\n", + "2025-08-11 17:15:29,222 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m17:15:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3095/7340 [109:11<149:45, 28.3 steps/min]2025-08-11 17:15:29,891 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m17:15:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:15:30,530 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m17:15:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/025be48d-d757-4973-8c17-e42b8f6814b0/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3096/7340 [109:12<149:41, 28.4 steps/min]2025-08-11 17:15:31,171 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m17:15:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3096/7340 [109:13<149:43, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/091ec079-295e-4528-bad5-f34604d013c2/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3096/7340 [109:14<149:44, 28.3 steps/min]2025-08-11 17:15:32,801 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m17:15:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9f1aa688-383a-44cc-9959-87e1e32dd55c/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:15:33,431 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m17:15:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3096/7340 [109:15<149:45, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:15:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/085317e9-3b47-437e-8528-0a0fc0e6e688/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3096/7340 [109:16<149:47, 28.3 steps/min]\u001b[92m17:15:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:35,248 - agent.ComputerAgent - INFO - Computer: click({'x': 804, 'y': 654})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 804, 'y': 654})\n", + "2025-08-11 17:15:35,931 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m17:15:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/aa0172ad-f4a9-4f1a-9e06-2d510775dbd0/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3096/7340 [109:17<149:49, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:15:37,102 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m17:15:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/55d96e84-fdd8-4854-8eca-195f1a84dbc3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3097/7340 [109:18<149:45, 28.3 steps/min]2025-08-11 17:15:37,759 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m17:15:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3097/7340 [109:22<149:51, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/094ee49d-29b5-4911-bfc8-7d0e73a55c44/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:15:42,613 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+p'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+p'})\n", + " 42%|████████████████------------------------| 3097/7340 [109:24<149:53, 28.3 steps/min]2025-08-11 17:15:43,243 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m17:15:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:15:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:44,564 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m17:15:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3097/7340 [109:26<149:56, 28.3 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:15:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:45,740 - agent.ComputerAgent - INFO - Computer: click({'x': 408, 'y': 279})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 408, 'y': 279})\n", + " 42%|████████████████------------------------| 3097/7340 [109:27<149:57, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:15:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3098/7340 [109:28<149:54, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:15:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:48,097 - agent.ComputerAgent - INFO - Computer: click({'x': 880, 'y': 203})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 880, 'y': 203})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:15:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:50,080 - agent.ComputerAgent - INFO - Computer: type({'text': 'conda install -y -c conda-forge --override-channels datasets\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'conda install -y -c conda-forge --override-channels datasets\\n'})\n", + " 42%|████████████████------------------------| 3098/7340 [109:31<149:58, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:15:51,375 - agent.ComputerAgent - INFO - Computer: type({'text': 'python --version\\npython3 --version\\nls /usr/bin/python* | head -n 20\\napt-cache policy python4 || apt-cache search python4 | head\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'python --version\\npython3 --version\\nls /usr/bin/python* | head -n 20\\napt-cache policy python4 || apt-cache search python4 | head\\n'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:15:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:15:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:15:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3100/7340 [109:34<149:52, 28.3 steps/min]2025-08-11 17:15:53,291 - agent.ComputerAgent - INFO - Computer: move({'x': 914, 'y': 232})\n", + "INFO:agent.ComputerAgent:Computer: move({'x': 914, 'y': 232})\n", + "\u001b[92m17:15:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:15:53,963 - agent.ComputerAgent - INFO - Computer: click({'x': 935, 'y': 351})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 935, 'y': 351})\n", + "\u001b[92m17:15:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:15:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3101/7340 [109:36<149:49, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:55,270 - agent.ComputerAgent - INFO - Computer: click({'x': 225, 'y': 520})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 225, 'y': 520})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:15:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3103/7340 [109:37<149:41, 28.3 steps/min]\u001b[92m17:15:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:15:56,620 - agent.ComputerAgent - INFO - Computer: click({'x': 235, 'y': 206})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 235, 'y': 206})\n", + "\u001b[92m17:15:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:15:57,299 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 659, 'scroll_x': 0, 'x': 840, 'y': 467})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 659, 'scroll_x': 0, 'x': 840, 'y': 467})\n", + " 42%|████████████████------------------------| 3104/7340 [109:39<149:38, 28.3 steps/min]2025-08-11 17:15:57,924 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m17:15:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:15:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3106/7340 [109:40<149:30, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:15:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:15:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:15:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:16:00,272 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 193, 'y': 180}, {'x': 184, 'y': 293}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 193, 'y': 180}, {'x': 184, 'y': 293}]})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3106/7340 [109:42<149:33, 28.3 steps/min]\u001b[92m17:16:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:16:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:16:02,072 - agent.ComputerAgent - INFO - Computer: click({'x': 1011, 'y': 62})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1011, 'y': 62})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6bacb467-6eb5-4ead-ac71-a185d2fa5e80/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f0190121-650c-4779-b26d-2480f313dc84/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3107/7340 [109:43<149:29, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/091ec079-295e-4528-bad5-f34604d013c2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/094ee49d-29b5-4911-bfc8-7d0e73a55c44/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:16:02,765 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m17:16:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/96765d66-53fb-41dd-99b6-cd96984e52b3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/025be48d-d757-4973-8c17-e42b8f6814b0/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:16:03,441 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m17:16:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:16:04,480 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m17:16:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/55d96e84-fdd8-4854-8eca-195f1a84dbc3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/aa0172ad-f4a9-4f1a-9e06-2d510775dbd0/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3108/7340 [109:46<149:28, 28.3 steps/min]2025-08-11 17:16:05,143 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m17:16:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:16:05,823 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m17:16:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/085317e9-3b47-437e-8528-0a0fc0e6e688/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3108/7340 [109:47<149:29, 28.3 steps/min]2025-08-11 17:16:06,914 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m17:16:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/dc026dd3-8d59-43e0-a475-ecef72f1db12/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3108/7340 [109:48<149:31, 28.3 steps/min]2025-08-11 17:16:07,563 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m17:16:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:16:08,252 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m17:16:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3108/7340 [109:50<149:34, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:16:10,625 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m17:16:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:16:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/afb4e623-39bf-4f23-ac18-6c4a71f53c62/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3108/7340 [109:52<149:36, 28.3 steps/min]2025-08-11 17:16:11,298 - agent.ComputerAgent - INFO - Computer: click({'x': 422, 'y': 249})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 422, 'y': 249})\n", + "\u001b[92m17:16:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:16:11,982 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m17:16:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:16:12,642 - agent.ComputerAgent - INFO - Computer: click({'x': 381, 'y': 91})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 381, 'y': 91})\n", + " 42%|████████████████------------------------| 3110/7340 [109:55<149:30, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:16:15,007 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + " 42%|████████████████------------------------| 3111/7340 [109:57<149:28, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3111/7340 [109:59<149:30, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:16:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:16:18,043 - agent.ComputerAgent - INFO - Computer: click({'x': 413, 'y': 587})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 413, 'y': 587})\n", + "\u001b[92m17:16:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:16:18,709 - agent.ComputerAgent - INFO - Computer: click({'x': 125, 'y': 182})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 125, 'y': 182})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9f1aa688-383a-44cc-9959-87e1e32dd55c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3111/7340 [110:00<149:32, 28.3 steps/min]2025-08-11 17:16:19,335 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m17:16:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:16:20,773 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m17:16:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3113/7340 [110:03<149:26, 28.3 steps/min]\u001b[92m17:16:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/091ec079-295e-4528-bad5-f34604d013c2/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:16:22,077 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m17:16:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:16:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:16:23,083 - agent.ComputerAgent - INFO - Computer: click({'x': 839, 'y': 234})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 839, 'y': 234})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:16:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/730002fc-5760-41b0-97b8-f6783353a242/reset \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3113/7340 [110:06<149:30, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:16:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/055e9f8b-8c01-4732-8b5f-ef4fc732f122/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:16:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/094ee49d-29b5-4911-bfc8-7d0e73a55c44/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:16:25,728 - agent.ComputerAgent - INFO - Computer: click({'x': 925, 'y': 244})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 925, 'y': 244})\n", + "\u001b[92m17:16:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3114/7340 [110:07<149:26, 28.3 steps/min]2025-08-11 17:16:26,375 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m17:16:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:16:27,003 - agent.ComputerAgent - INFO - Computer: click({'x': 847, 'y': 404})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 847, 'y': 404})\n", + "\u001b[92m17:16:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3115/7340 [110:08<149:23, 28.3 steps/min]2025-08-11 17:16:28,017 - agent.ComputerAgent - INFO - Computer: click({'x': 880, 'y': 203})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 880, 'y': 203})\n", + "2025-08-11 17:16:28,682 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m17:16:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3116/7340 [110:10<149:21, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:16:30,033 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3117/7340 [110:12<149:18, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/730002fc-5760-41b0-97b8-f6783353a242/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:16:31,839 - agent.ComputerAgent - INFO - Computer: click({'x': 131, 'y': 181})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 131, 'y': 181})\n", + " 42%|████████████████------------------------| 3118/7340 [110:13<149:15, 28.3 steps/min]2025-08-11 17:16:32,503 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m17:16:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 42%|████████████████------------------------| 3119/7340 [110:14<149:11, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/55d96e84-fdd8-4854-8eca-195f1a84dbc3/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3119/7340 [110:15<149:12, 28.3 steps/min]2025-08-11 17:16:34,177 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m17:16:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6bacb467-6eb5-4ead-ac71-a185d2fa5e80/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:16:36,680 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+f'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+f'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/afb4e623-39bf-4f23-ac18-6c4a71f53c62/invoke \"HTTP/1.1 200 OK\"\n", + " 42%|████████████████------------------------| 3119/7340 [110:18<149:16, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f0190121-650c-4779-b26d-2480f313dc84/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:16:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/091ec079-295e-4528-bad5-f34604d013c2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:16:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/35bb6fb7-5b34-473c-a541-13215a694bc6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:16:37,927 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m17:16:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:16:38,544 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m17:16:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:16:39,208 - agent.ComputerAgent - INFO - Computer: click({'x': 940, 'y': 202})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 940, 'y': 202})\n", + "\u001b[92m17:16:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 42%|████████████████------------------------| 3119/7340 [110:21<149:21, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:16:40,538 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 600, 'x': 422, 'y': 249})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 600, 'x': 422, 'y': 249})\n", + "2025-08-11 17:16:41,185 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m17:16:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6bacb467-6eb5-4ead-ac71-a185d2fa5e80/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 43%|█████████████████-----------------------| 3121/7340 [110:23<149:14, 28.3 steps/min]\u001b[92m17:16:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:16:42,861 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m17:16:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:16:44,179 - agent.ComputerAgent - INFO - Computer: click({'x': 115, 'y': 184})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 115, 'y': 184})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:16:44,861 - agent.ComputerAgent - INFO - Computer: click({'x': 185, 'y': 177})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 185, 'y': 177})\n", + " 43%|█████████████████-----------------------| 3121/7340 [110:26<149:17, 28.3 steps/min]\u001b[92m17:16:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:16:45,524 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 17:16:45,525 - agent.ComputerAgent - INFO - Computer: click({'x': 345, 'y': 202})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 345, 'y': 202})\n", + "2025-08-11 17:16:46,155 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m17:16:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 43%|█████████████████-----------------------| 3123/7340 [110:27<149:09, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:16:48,493 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + "\u001b[92m17:16:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6bacb467-6eb5-4ead-ac71-a185d2fa5e80/close \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3124/7340 [110:30<149:07, 28.3 steps/min]2025-08-11 17:16:49,152 - agent.ComputerAgent - INFO - Computer: click({'x': 964, 'y': 734})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 964, 'y': 734})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3125/7340 [110:31<149:04, 28.3 steps/min]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No screenshot found, taking screenshot\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-11 17:16:50,459 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 13 messages\n", + "\u001b[92m17:16:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 43%|█████████████████-----------------------| 3126/7340 [110:32<149:01, 28.3 steps/min]INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/055e9f8b-8c01-4732-8b5f-ef4fc732f122/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:16:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/025be48d-d757-4973-8c17-e42b8f6814b0/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00/dev/null || echo 'no python4 found'\\n\"})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': \"which python4 || command -v python4 || ls /usr/bin/python4* 2>/dev/null || echo 'no python4 found'\\n\"})\n", + " 43%|█████████████████-----------------------| 3141/7340 [111:10<148:37, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ae2379a3-a039-4954-afc2-582f8ebffdd2/invoke \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3143/7340 [111:11<148:28, 28.3 steps/min]2025-08-11 17:17:30,119 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m17:17:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/091ec079-295e-4528-bad5-f34604d013c2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:17:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:17:32,137 - agent.ComputerAgent - INFO - Computer: type({'text': '=DATEDIF(DATEVALUE(REGEX(A2;\".* \";\"\"));TODAY();\"y\")'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '=DATEDIF(DATEVALUE(REGEX(A2;\".* \";\"\"));TODAY();\"y\")'})\n", + "2025-08-11 17:17:32,777 - agent.ComputerAgent - INFO - LLM processing started with 23 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 23 messages\n", + "\u001b[92m17:17:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:17:34,149 - agent.ComputerAgent - INFO - Computer: type({'text': 'LLM Powered Autonomous Agents.pdf'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'LLM Powered Autonomous Agents.pdf'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:17:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 43%|█████████████████-----------------------| 3143/7340 [111:17<148:36, 28.2 steps/min]\u001b[92m17:17:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:17:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:17:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/96765d66-53fb-41dd-99b6-cd96984e52b3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:17:36,722 - agent.ComputerAgent - INFO - Computer: click({'x': 488, 'y': 368})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 488, 'y': 368})\n", + "\u001b[92m17:17:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:17:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 43%|█████████████████-----------------------| 3145/7340 [111:18<148:28, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:17:37,353 - agent.ComputerAgent - INFO - Computer: click({'x': 349, 'y': 201})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 349, 'y': 201})\n", + "2025-08-11 17:17:37,994 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 660, 'scroll_x': 0, 'x': 706, 'y': 659})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 660, 'scroll_x': 0, 'x': 706, 'y': 659})\n", + "\u001b[92m17:17:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 43%|█████████████████-----------------------| 3147/7340 [111:19<148:19, 28.3 steps/min]\u001b[92m17:17:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:17:38,649 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m17:17:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:17:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:17:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:17:40,658 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + "2025-08-11 17:17:41,329 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 194, 'y': 183}, {'x': 184, 'y': 291}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 194, 'y': 183}, {'x': 184, 'y': 291}]})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/091ec079-295e-4528-bad5-f34604d013c2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3149/7340 [111:23<148:14, 28.3 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:17:41,970 - agent.ComputerAgent - INFO - LLM processing started with 25 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 25 messages\n", + "\u001b[92m17:17:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:17:42,600 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m17:17:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:17:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/afb4e623-39bf-4f23-ac18-6c4a71f53c62/invoke \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3150/7340 [111:24<148:11, 28.3 steps/min]2025-08-11 17:17:43,277 - agent.ComputerAgent - INFO - Computer: click({'x': 850, 'y': 202})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 850, 'y': 202})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/055e9f8b-8c01-4732-8b5f-ef4fc732f122/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/730002fc-5760-41b0-97b8-f6783353a242/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9f1aa688-383a-44cc-9959-87e1e32dd55c/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:17:43,930 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m17:17:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 43%|█████████████████-----------------------| 3150/7340 [111:25<148:13, 28.3 steps/min]2025-08-11 17:17:44,570 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m17:17:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:17:45,261 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m17:17:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:17:45,939 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m17:17:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/091ec079-295e-4528-bad5-f34604d013c2/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3151/7340 [111:27<148:10, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 43%|█████████████████-----------------------| 3152/7340 [111:31<148:11, 28.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/aa0172ad-f4a9-4f1a-9e06-2d510775dbd0/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/085317e9-3b47-437e-8528-0a0fc0e6e688/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:17:51,370 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m17:17:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3152/7340 [111:33<148:13, 28.3 steps/min]\u001b[92m17:17:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:17:52,759 - agent.ComputerAgent - INFO - LLM processing started with 27 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 27 messages\n", + "\u001b[92m17:17:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 43%|█████████████████-----------------------| 3152/7340 [111:34<148:14, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:17:53,422 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m17:17:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:17:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 43%|█████████████████-----------------------| 3152/7340 [111:36<148:18, 28.2 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:17:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 43%|█████████████████-----------------------| 3153/7340 [111:37<148:14, 28.2 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f0190121-650c-4779-b26d-2480f313dc84/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 25%|██▌ | 1/4 [00:01<00:05, 1.77s/it]2025-08-11 17:17:57,266 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m17:17:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3153/7340 [111:39<148:15, 28.2 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:17:57,933 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 29 messages\n", + "\u001b[92m17:17:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.39s/it] 28.2 steps/min]\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/094ee49d-29b5-4911-bfc8-7d0e73a55c44/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:18:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 43%|█████████████████-----------------------| 3154/7340 [111:43<148:16, 28.2 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:18:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:18:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:18:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3167/7340 [111:44<147:14, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:18:03,401 - agent.ComputerAgent - INFO - LLM processing started with 31 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 31 messages\n", + "\u001b[92m17:18:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:18:04,060 - agent.ComputerAgent - INFO - Computer: click({'x': 666, 'y': 219})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 666, 'y': 219})\n", + "\u001b[92m17:18:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:18:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:18:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/094ee49d-29b5-4911-bfc8-7d0e73a55c44/close \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3167/7340 [111:45<147:15, 28.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:18:04,733 - agent.ComputerAgent - INFO - Computer: click({'x': 442, 'y': 162})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 442, 'y': 162})\n", + "2025-08-11 17:18:05,375 - agent.ComputerAgent - INFO - Computer: click({'x': 811, 'y': 336})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 811, 'y': 336})\n", + "2025-08-11 17:18:06,055 - agent.ComputerAgent - INFO - Computer: double_click({'x': 347, 'y': 222})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 347, 'y': 222})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "\u001b[92m17:18:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:18:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 43%|█████████████████-----------------------| 3169/7340 [111:48<147:09, 28.3 steps/min]2025-08-11 17:18:07,401 - agent.ComputerAgent - INFO - Computer: click({'x': 536, 'y': 276})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 536, 'y': 276})\n", + "2025-08-11 17:18:08,031 - agent.ComputerAgent - INFO - Computer: double_click({'x': 489, 'y': 368})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 489, 'y': 368})\n", + " 43%|█████████████████-----------------------| 3172/7340 [111:49<146:56, 28.4 steps/min]INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3174/7340 [111:50<146:48, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:18:09,694 - agent.ComputerAgent - INFO - LLM processing started with 33 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 33 messages\n", + "\u001b[92m17:18:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 43%|█████████████████-----------------------| 3174/7340 [111:52<146:50, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3175/7340 [111:53<146:47, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:18:13,931 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+c'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+c'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:18:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9f1aa688-383a-44cc-9959-87e1e32dd55c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6a6179f5-13f9-4283-a0d1-aaafd881b00a/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/730002fc-5760-41b0-97b8-f6783353a242/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/55d96e84-fdd8-4854-8eca-195f1a84dbc3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ae2379a3-a039-4954-afc2-582f8ebffdd2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/afb4e623-39bf-4f23-ac18-6c4a71f53c62/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:18:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1909e6f5-b395-4e1d-b1f7-b06406f8731b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/025be48d-d757-4973-8c17-e42b8f6814b0/invoke \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3175/7340 [111:56<146:51, 28.4 steps/min]2025-08-11 17:18:15,929 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 35 messages\n", + "\u001b[92m17:18:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:18:16,592 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m17:18:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/4b18a76d-ef46-4622-9643-9ee6fe4900a3/reset \"HTTP/1.1 200 OK\"\n", + " 43%|█████████████████-----------------------| 3175/7340 [111:58<146:53, 28.4 steps/min]2025-08-11 17:18:17,271 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m17:18:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:18:17,933 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m17:18:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 43%|█████████████████-----------------------| 3175/7340 [111:59<146:55, 28.3 steps/min]2025-08-11 17:18:18,584 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m17:18:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/b6bbc5bc-5598-4043-be1e-6ebf2da5f046/reset \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 arr[j + 1]:\\n arr[j], arr[j + 1] = arr[j + 1], arr[j]\\n swapped = True\\n if not swapped:\\n break'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '\\n n = len(arr)\\n for i in range(n):\\n swapped = False\\n for j in range(0, n - i - 1):\\n if arr[j] > arr[j + 1]:\\n arr[j], arr[j + 1] = arr[j + 1], arr[j]\\n swapped = True\\n if not swapped:\\n break'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a4b4d291-1fca-4038-8670-448014a55182/invoke \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3439/7340 [120:46<137:00, 28.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b3bde0e0-c60f-4177-b7dd-15e361558126/invoke \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3439/7340 [120:47<137:01, 28.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:27:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:27:07,882 - agent.ComputerAgent - INFO - Computer: type({'text': 'libreoffice --headless --convert-to pdf --outdir . -- *.doc\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'libreoffice --headless --convert-to pdf --outdir . -- *.doc\\n'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:27:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:27:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/dc026dd3-8d59-43e0-a475-ecef72f1db12/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b6bbc5bc-5598-4043-be1e-6ebf2da5f046/invoke \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3439/7340 [120:50<137:04, 28.5 steps/min]2025-08-11 17:27:09,249 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 17:27:09,250 - agent.ComputerAgent - INFO - Computer: double_click({'x': 984, 'y': 491})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 984, 'y': 491})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:27:09,926 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m17:27:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:27:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/94463065-a78e-479a-b964-45ad23a48cbb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:27:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 47%|██████████████████----------------------| 3440/7340 [120:52<137:02, 28.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:27:11,254 - agent.ComputerAgent - INFO - Computer: click({'x': 153, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 153, 'y': 53})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:27:12,616 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+s'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+s'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:27:13,257 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m17:27:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:27:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:27:14,573 - agent.ComputerAgent - INFO - Computer: type({'text': 'do not track'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'do not track'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:27:15,903 - agent.ComputerAgent - INFO - Computer: type({'text': '30'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '30'})\n", + " 47%|██████████████████----------------------| 3441/7340 [120:57<137:03, 28.4 steps/min]2025-08-11 17:27:16,549 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m17:27:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:27:17,244 - agent.ComputerAgent - INFO - Computer: click({'x': 268, 'y': 329})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 268, 'y': 329})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3444/7340 [120:59<136:52, 28.5 steps/min]\u001b[92m17:27:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:27:18,557 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m17:27:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:27:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:27:19,249 - agent.ComputerAgent - INFO - Computer: click({'x': 955, 'y': 130})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 955, 'y': 130})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/127b9298-d3cc-4b90-8567-e45146efa729/invoke \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3445/7340 [121:00<136:49, 28.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:27:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 47%|██████████████████----------------------| 3446/7340 [121:02<136:46, 28.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:27:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:27:21,579 - agent.ComputerAgent - INFO - Computer: click({'x': 188, 'y': 105})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 188, 'y': 105})\n", + " 47%|██████████████████----------------------| 3446/7340 [121:03<136:47, 28.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f8984906-7392-4305-88fa-ae9a4808fa8d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/050a0934-63e8-46a0-8868-de32b28174ef/invoke \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3447/7340 [121:04<136:44, 28.5 steps/min]2025-08-11 17:27:23,268 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m17:27:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/57944bbf-74a1-4e6d-9401-f7b0144460f7/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b28701c2-0fa4-4b07-bace-735fd2133893/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:27:23,935 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m17:27:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0322563b-daf3-41ae-8a08-f5ecd9282bcc/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/792a6953-2092-47e4-a8a8-57a4af4e3be1/invoke \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3447/7340 [121:05<136:45, 28.5 steps/min]2025-08-11 17:27:24,567 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m17:27:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:27:25,196 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m17:27:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 47%|██████████████████----------------------| 3447/7340 [121:07<136:47, 28.5 steps/min]2025-08-11 17:27:25,887 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m17:27:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1909e6f5-b395-4e1d-b1f7-b06406f8731b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:27:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 47%|██████████████████----------------------| 3447/7340 [121:08<136:48, 28.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:27:27,295 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m17:27:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:27:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fde8bca8-8a90-4fed-b46f-c24829445665/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:27:27,985 - agent.ComputerAgent - INFO - Computer: click({'x': 262, 'y': 479})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 262, 'y': 479})\n", + " 47%|██████████████████----------------------| 3447/7340 [121:09<136:50, 28.4 steps/min]2025-08-11 17:27:28,643 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m17:27:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:27:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 47%|██████████████████----------------------| 3448/7340 [121:11<136:47, 28.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:27:30,354 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m17:27:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 47%|██████████████████----------------------| 3448/7340 [121:12<136:48, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:27:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:27:31,027 - agent.ComputerAgent - INFO - Computer: click({'x': 219, 'y': 134})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 219, 'y': 134})\n", + " 47%|██████████████████----------------------| 3449/7340 [121:14<136:46, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:27:33,880 - agent.ComputerAgent - INFO - Agent: Added the Dissolve slide transition to the first slide and saved the presentation.\n", + "\n", + "Task completed.\n", + "INFO:agent.ComputerAgent:Agent: Added the Dissolve slide transition to the first slide and saved the presentation.\n", + "\n", + "Task completed.\n", + "2025-08-11 17:27:34,565 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 475\n", + " - prompt_tokens: 6691\n", + " - total_tokens: 7166\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 448\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 4608\n", + " - response_cost: $0.0079\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 475\n", + " - prompt_tokens: 6691\n", + " - total_tokens: 7166\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 448\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 4608\n", + " - response_cost: $0.0079\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3450/7340 [121:17<136:45, 28.4 steps/min]\u001b[92m17:27:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b6bbc5bc-5598-4043-be1e-6ebf2da5f046/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:27:35,955 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m17:27:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:27:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:27:36,603 - agent.ComputerAgent - INFO - Computer: move({'x': 166, 'y': 68})\n", + "INFO:agent.ComputerAgent:Computer: move({'x': 166, 'y': 68})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/4b18a76d-ef46-4622-9643-9ee6fe4900a3/invoke \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3450/7340 [121:18<136:46, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b3b14802-9f99-46f5-8fa9-9661af7a973d/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:27:37,265 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m17:27:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:27:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 47%|██████████████████----------------------| 3451/7340 [121:19<136:43, 28.4 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:27:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:27:39,152 - agent.ComputerAgent - INFO - Computer: click({'x': 87, 'y': 158})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 87, 'y': 158})\n", + " 47%|██████████████████----------------------| 3451/7340 [121:20<136:44, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:27:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:27:41,144 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+f'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+f'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 47%|██████████████████----------------------| 3452/7340 [121:23<136:43, 28.4 steps/min]\u001b[92m17:27:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:27:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:27:42,465 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m17:27:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:27:43,111 - agent.ComputerAgent - INFO - Computer: double_click({'x': 984, 'y': 145})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 984, 'y': 145})\n", + "\u001b[92m17:27:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/57944bbf-74a1-4e6d-9401-f7b0144460f7/invoke \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3452/7340 [121:24<136:45, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:27:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:27:44,178 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m17:27:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:27:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:27:45,504 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:27:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b3b14802-9f99-46f5-8fa9-9661af7a973d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 47%|██████████████████----------------------| 3453/7340 [121:27<136:43, 28.4 steps/min]2025-08-11 17:27:46,827 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 86, 'y': 123}, {'x': 83, 'y': 250}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 86, 'y': 123}, {'x': 83, 'y': 250}]})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:27:47,486 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m17:27:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 47%|██████████████████----------------------| 3466/7340 [121:29<135:47, 28.5 steps/min]\u001b[92m17:27:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:27:48,140 - agent.ComputerAgent - INFO - Computer: click({'x': 225, 'y': 564})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 225, 'y': 564})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:27:49,491 - agent.ComputerAgent - INFO - Computer: type({'text': 'ls -1 *.doc\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'ls -1 *.doc\\n'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/a4b4d291-1fca-4038-8670-448014a55182/reset \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3467/7340 [121:31<135:45, 28.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b3b14802-9f99-46f5-8fa9-9661af7a973d/close \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3469/7340 [121:32<135:38, 28.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/dc026dd3-8d59-43e0-a475-ecef72f1db12/invoke \"HTTP/1.1 200 OK\"\n", + " 47%|██████████████████----------------------| 3469/7340 [121:33<135:39, 28.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:27:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:27:53,445 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m17:27:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a4b4d291-1fca-4038-8670-448014a55182/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:28:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:29:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:01,218 - agent.ComputerAgent - INFO - Computer: click({'x': 521, 'y': 422})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 521, 'y': 422})\n", + "2025-08-11 17:29:01,891 - agent.ComputerAgent - INFO - Computer: click({'x': 17, 'y': 335})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 17, 'y': 335})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:29:02,530 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m17:29:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 48%|███████████████████---------------------| 3490/7340 [122:44<135:23, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:29:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:29:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:03,204 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 17:29:03,205 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 960, 'y': 713})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 960, 'y': 713})\n", + "2025-08-11 17:29:04,634 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': -646, 'scroll_x': 0, 'x': 890, 'y': 760})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': -646, 'scroll_x': 0, 'x': 890, 'y': 760})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 48%|███████████████████---------------------| 3492/7340 [122:47<135:18, 28.4 steps/min]\u001b[92m17:29:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:29:05,965 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m17:29:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:29:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:06,998 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 17:29:06,998 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 432})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 432})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:29:08,322 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + " 48%|███████████████████---------------------| 3496/7340 [122:54<135:08, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/57944bbf-74a1-4e6d-9401-f7b0144460f7/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3bde46c9-685b-4102-9ef4-a1535d5fcc85/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/479a3737-3ad4-48da-b73f-c8ea6e38d096/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:29:13,557 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m17:29:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2a28af1e-e61d-489c-a18e-23c5071c9aff/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a4b4d291-1fca-4038-8670-448014a55182/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1909e6f5-b395-4e1d-b1f7-b06406f8731b/invoke \"HTTP/1.1 200 OK\"\n", + " 48%|███████████████████---------------------| 3496/7340 [122:55<135:09, 28.4 steps/min]2025-08-11 17:29:14,241 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m17:29:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0322563b-daf3-41ae-8a08-f5ecd9282bcc/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:29:14,878 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m17:29:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:29:15,558 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m17:29:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 48%|███████████████████---------------------| 3496/7340 [122:57<135:12, 28.4 steps/min]\u001b[92m17:29:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/792a6953-2092-47e4-a8a8-57a4af4e3be1/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:29:16,889 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m17:29:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:29:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:17,556 - agent.ComputerAgent - INFO - Computer: click({'x': 599, 'y': 760})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 599, 'y': 760})\n", + " 48%|███████████████████---------------------| 3496/7340 [122:59<135:13, 28.4 steps/min]2025-08-11 17:29:18,237 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m17:29:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:29:18,917 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m17:29:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 48%|███████████████████---------------------| 3497/7340 [123:00<135:10, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:29:19,548 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m17:29:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a5f69ad6-9361-4670-b101-61761113341c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 48%|███████████████████---------------------| 3497/7340 [123:01<135:12, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:29:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 48%|███████████████████---------------------| 3497/7340 [123:03<135:14, 28.4 steps/min]\u001b[92m17:29:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:29:23,251 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:29:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:29:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:29:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fde8bca8-8a90-4fed-b46f-c24829445665/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:29:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 48%|███████████████████---------------------| 3497/7340 [123:06<135:17, 28.4 steps/min]\u001b[92m17:29:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:29:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:25,847 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 612, 'x': 655, 'y': 419})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 612, 'x': 655, 'y': 419})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:29:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:29:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 48%|███████████████████---------------------| 3499/7340 [123:08<135:11, 28.4 steps/min]\u001b[92m17:29:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:29:27,867 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m17:29:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:29:28,508 - agent.ComputerAgent - INFO - Computer: click({'x': 256, 'y': 128})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 256, 'y': 128})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:29:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:29:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:29:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:31,110 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + "\u001b[92m17:29:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:29:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:29:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:31,743 - agent.ComputerAgent - INFO - Computer: click({'x': 182, 'y': 105})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 182, 'y': 105})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:29:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:29:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 48%|███████████████████---------------------| 3499/7340 [123:14<135:16, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:33,138 - agent.ComputerAgent - INFO - Computer: click({'x': 634, 'y': 529})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 634, 'y': 529})\n", + "2025-08-11 17:29:33,791 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 82, 'y': 124}, {'x': 75, 'y': 124}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 82, 'y': 124}, {'x': 75, 'y': 124}]})\n", + "2025-08-11 17:29:34,450 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': -517, 'scroll_x': 0, 'x': 46, 'y': 762})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': -517, 'scroll_x': 0, 'x': 46, 'y': 762})\n", + "\u001b[92m17:29:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:29:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:29:35,110 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m17:29:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:29:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:29:35,790 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 673, 'scroll_x': 0, 'x': 86, 'y': 245})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 673, 'scroll_x': 0, 'x': 86, 'y': 245})\n", + "\u001b[92m17:29:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 48%|███████████████████---------------------| 3501/7340 [123:17<135:11, 28.4 steps/min]2025-08-11 17:29:36,477 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 333})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 333})\n", + "\u001b[92m17:29:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:37,144 - agent.ComputerAgent - INFO - Computer: click({'x': 268, 'y': 329})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 268, 'y': 329})\n", + "2025-08-11 17:29:37,807 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 332, 'y': 308}, {'x': 345, 'y': 308}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 332, 'y': 308}, {'x': 345, 'y': 308}]})\n", + " 48%|███████████████████---------------------| 3508/7340 [123:20<134:44, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f8984906-7392-4305-88fa-ae9a4808fa8d/invoke \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No screenshot found, taking screenshot\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-11 17:29:39,468 - agent.ComputerAgent - INFO - LLM processing started with 39 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 39 messages\n", + "\u001b[92m17:29:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:29:40,852 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+f'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+f'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:29:42,174 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + " 48%|███████████████████---------------------| 3508/7340 [123:23<134:47, 28.4 steps/min]2025-08-11 17:29:43,192 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m17:29:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a4b4d291-1fca-4038-8670-448014a55182/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b6bbc5bc-5598-4043-be1e-6ebf2da5f046/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/479a3737-3ad4-48da-b73f-c8ea6e38d096/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0322563b-daf3-41ae-8a08-f5ecd9282bcc/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/dc026dd3-8d59-43e0-a475-ecef72f1db12/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/050a0934-63e8-46a0-8868-de32b28174ef/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/4b18a76d-ef46-4622-9643-9ee6fe4900a3/invoke \"HTTP/1.1 200 OK\"\n", + " 48%|███████████████████---------------------| 3509/7340 [123:25<134:44, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/94463065-a78e-479a-b964-45ad23a48cbb/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:29:43,878 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m17:29:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:29:44,537 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m17:29:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:29:45,557 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m17:29:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b28701c2-0fa4-4b07-bace-735fd2133893/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:29:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2a28af1e-e61d-489c-a18e-23c5071c9aff/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 48%|███████████████████---------------------| 3510/7340 [123:28<134:43, 28.4 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:29:47,830 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m17:29:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:29:48,461 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m17:29:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:29:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 48%|███████████████████---------------------| 3510/7340 [123:30<134:45, 28.4 steps/min]2025-08-11 17:29:49,532 - agent.ComputerAgent - INFO - Computer: click({'x': 728, 'y': 179})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 728, 'y': 179})\n", + "2025-08-11 17:29:50,219 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m17:29:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:29:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 48%|███████████████████---------------------| 3510/7340 [123:32<134:48, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:29:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:29:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:29:52,661 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m17:29:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:29:53,352 - agent.ComputerAgent - INFO - Computer: double_click({'x': 181, 'y': 105})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 181, 'y': 105})\n", + "\u001b[92m17:29:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:29:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f8984906-7392-4305-88fa-ae9a4808fa8d/invoke \"HTTP/1.1 200 OK\"\n", + " 48%|███████████████████---------------------| 3511/7340 [123:35<134:47, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:54,670 - agent.ComputerAgent - INFO - Computer: click({'x': 399, 'y': 541})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 399, 'y': 541})\n", + "2025-08-11 17:29:55,330 - agent.ComputerAgent - INFO - LLM processing started with 41 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 41 messages\n", + "\u001b[92m17:29:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b6bbc5bc-5598-4043-be1e-6ebf2da5f046/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 48%|███████████████████---------------------| 3513/7340 [123:37<134:40, 28.4 steps/min]\u001b[92m17:29:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:57,015 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m17:29:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:29:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3bde46c9-685b-4102-9ef4-a1535d5fcc85/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 48%|███████████████████---------------------| 3514/7340 [123:38<134:37, 28.4 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:29:57,710 - agent.ComputerAgent - INFO - Computer: click({'x': 525, 'y': 400})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 525, 'y': 400})\n", + "\u001b[92m17:29:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m17:29:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:29:59,392 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m17:29:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:30:00,039 - agent.ComputerAgent - INFO - Computer: click({'x': 1009, 'y': 101})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1009, 'y': 101})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/792a6953-2092-47e4-a8a8-57a4af4e3be1/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 48%|███████████████████---------------------| 3515/7340 [123:41<134:36, 28.4 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 17:30:00,720 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m17:30:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:30:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:30:01,758 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': -657, 'scroll_x': 0, 'x': 988, 'y': 427})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': -657, 'scroll_x': 0, 'x': 988, 'y': 427})\n", + " 48%|███████████████████---------------------| 3517/7340 [123:43<134:29, 28.4 steps/min]2025-08-11 17:30:02,459 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m17:30:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m17:30:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b6bbc5bc-5598-4043-be1e-6ebf2da5f046/close \"HTTP/1.1 200 OK\"\n", + " 48%|███████████████████---------------------| 3518/7340 [123:44<134:26, 28.4 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m17:30:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:30:04,432 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 638, 'scroll_x': 0, 'x': 90, 'y': 244})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 638, 'scroll_x': 0, 'x': 90, 'y': 244})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f8984906-7392-4305-88fa-ae9a4808fa8d/invoke \"HTTP/1.1 200 OK\"\n", + " 48%|███████████████████---------------------| 3518/7340 [123:46<134:27, 28.4 steps/min]2025-08-11 17:30:05,089 - agent.ComputerAgent - INFO - LLM processing started with 43 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 43 messages\n", + "\u001b[92m17:30:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 48%|███████████████████---------------------| 3519/7340 [123:47<134:24, 28.4 steps/min]INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/57944bbf-74a1-4e6d-9401-f7b0144460f7/invoke \"HTTP/1.1 200 OK\"\n", + " 48%|███████████████████---------------------| 3519/7340 [123:48<134:25, 28.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a4b4d291-1fca-4038-8670-448014a55182/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fde8bca8-8a90-4fed-b46f-c24829445665/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/479a3737-3ad4-48da-b73f-c8ea6e38d096/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 17:30:07,300 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m17:30:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m17:30:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 17:30:08,649 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m17:30:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 48%|███████████████████---------------------| 3520/7340 [123:50<134:23, 28.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 17:30:09,313 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m17:30:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00&1 | sed -n '1,120p'\\n\"})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': \"ffmpeg -hide_banner -i video.mp4 2>&1 | sed -n '1,120p'\\n\"})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/83c40b56-f0bf-4b3a-97a5-8a1ae567e0a1/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8d107e49-ae48-4b20-a0a1-7facc71e66f7/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:10:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 62%|████████████████████████----------------| 4522/7340 [164:12<102:20, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:10:31,841 - agent.ComputerAgent - INFO - Computer: click({'x': 802, 'y': 437})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 802, 'y': 437})\n", + "2025-08-11 18:10:32,505 - agent.ComputerAgent - INFO - Computer: click({'x': 745, 'y': 540})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 745, 'y': 540})\n", + "2025-08-11 18:10:33,146 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m18:10:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f096381e-eb5b-49dc-8943-c821405cce10/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/afdc88be-f209-412c-8905-25f3e8cbf43a/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/53e1a378-de8f-4a22-9dc0-27eef85d8356/invoke \"HTTP/1.1 200 OK\"\n", + " 62%|████████████████████████----------------| 4523/7340 [164:14<102:17, 27.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:10:33,803 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m18:10:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:10:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:10:34,476 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m18:10:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 18:10:35,141 - agent.ComputerAgent - INFO - Computer: click({'x': 205, 'y': 152})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 205, 'y': 152})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:10:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/91803c09-cf12-4c24-92ec-24bcf68c0897/invoke \"HTTP/1.1 200 OK\"\n", + " 62%|████████████████████████----------------| 4525/7340 [164:17<102:12, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:10:36,437 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m18:10:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:10:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:10:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:10:37,794 - agent.ComputerAgent - INFO - Computer: click({'x': 1014, 'y': 31})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1014, 'y': 31})\n", + " 62%|████████████████████████----------------| 4526/7340 [164:19<102:10, 27.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:10:38,466 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m18:10:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:10:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:10:39,125 - agent.ComputerAgent - INFO - Computer: click({'x': 399, 'y': 354})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 399, 'y': 354})\n", + " 62%|████████████████████████----------------| 4527/7340 [164:20<102:07, 27.5 steps/min]2025-08-11 18:10:39,795 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m18:10:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:10:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 62%|████████████████████████----------------| 4528/7340 [164:22<102:04, 27.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:10:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m18:10:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:10:42,145 - agent.ComputerAgent - INFO - Computer: click({'x': 437, 'y': 99})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 437, 'y': 99})\n", + "\u001b[92m18:10:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f55f73a3-1816-4f61-8ec1-88f743cec333/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b01cd4a6-3203-476b-8ece-c651b889f821/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 62%|████████████████████████----------------| 4528/7340 [164:24<102:06, 27.5 steps/min]\u001b[92m18:10:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:10:43,477 - agent.ComputerAgent - INFO - Computer: click({'x': 46, 'y': 52})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 46, 'y': 52})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/941d9ec3-7c28-40f6-b948-70db95115571/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/91803c09-cf12-4c24-92ec-24bcf68c0897/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c83605a3-e62d-48d7-8568-f181d5627773/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fcdab7d3-0448-49dd-b2db-f79a7c74a08b/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:10:44,127 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m18:10:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 18:10:44,765 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m18:10:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 18:10:45,424 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m18:10:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:10:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 62%|████████████████████████----------------| 4545/7340 [164:27<101:07, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f0aa6a3e-e61f-49b1-ade9-e8150e333596/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb48f65f-d00e-465a-a0ea-394e844382ca/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:10:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:10:46,114 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m18:10:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:10:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:10:46,753 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 51, 'y': 730}, {'x': 991, 'y': 759}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 51, 'y': 730}, {'x': 991, 'y': 759}]})\n", + "2025-08-11 18:10:47,399 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m18:10:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 62%|████████████████████████----------------| 4546/7340 [164:29<101:05, 27.6 steps/min]2025-08-11 18:10:48,065 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m18:10:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:10:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/91803c09-cf12-4c24-92ec-24bcf68c0897/close \"HTTP/1.1 200 OK\"\n", + " 62%|████████████████████████----------------| 4547/7340 [164:30<101:02, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m18:10:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m18:10:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:10:50,780 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 18:10:50,781 - agent.ComputerAgent - INFO - Computer: click({'x': 18, 'y': 237})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 18, 'y': 237})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:10:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 62%|████████████████████████----------------| 4547/7340 [164:33<101:04, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00\n", + " 64%|█████████████████████████---------------| 4702/7340 [170:06<95:26, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f55f73a3-1816-4f61-8ec1-88f743cec333/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2bf9cd89-2d6a-4856-a09d-a771bc278600/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 Settings > Privacy and security > Security > Safe Browsing.\n", + "\n", + "Task completed\n", + "INFO:agent.ComputerAgent:Agent: Chrome’s Safe Browsing is now set to Enhanced protection. You’ll get real-time warnings about potentially harmful or deceptive websites, dangerous downloads, and malicious extensions.\n", + "\n", + "If you ever want to change this: Chrome menu > Settings > Privacy and security > Security > Safe Browsing.\n", + "\n", + "Task completed\n", + "2025-08-11 18:22:14,920 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 514\n", + " - prompt_tokens: 6726\n", + " - total_tokens: 7240\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 448\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0135\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 514\n", + " - prompt_tokens: 6726\n", + " - total_tokens: 7240\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 448\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0135\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:22:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26d5566b-d949-4b71-accb-45197078f693/invoke \"HTTP/1.1 200 OK\"\n", + " 66%|██████████████████████████--------------| 4823/7340 [175:57<91:49, 27.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:22:16,260 - agent.ComputerAgent - INFO - LLM processing started with 21 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 21 messages\n", + "\u001b[92m18:22:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 66%|██████████████████████████--------------| 4823/7340 [175:58<91:50, 27.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cfe4e097-0434-4025-a00a-78e26d753e51/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d9bc2461-8bd1-4c45-bebd-f473293c581c/invoke \"HTTP/1.1 200 OK\"\n", + " 66%|██████████████████████████--------------| 4824/7340 [175:59<91:47, 27.4 steps/min]2025-08-11 18:22:17,920 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m18:22:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26d5566b-d949-4b71-accb-45197078f693/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:22:19,103 - agent.ComputerAgent - INFO - LLM processing started with 23 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 23 messages\n", + " 66%|██████████████████████████--------------| 4824/7340 [176:00<91:48, 27.4 steps/min]\u001b[92m18:22:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:22:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:22:19,764 - agent.ComputerAgent - INFO - Computer: click({'x': 120, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 120, 'y': 53})\n", + " 66%|██████████████████████████--------------| 4824/7340 [176:01<91:48, 27.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d9bc2461-8bd1-4c45-bebd-f473293c581c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 66%|██████████████████████████--------------| 4838/7340 [176:02<91:02, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d9bc2461-8bd1-4c45-bebd-f473293c581c/close \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m18:22:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26d5566b-d949-4b71-accb-45197078f693/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a4a2a38e-bec8-46b5-b9c9-3e82144e6ff7/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:22:22,630 - agent.ComputerAgent - INFO - LLM processing started with 25 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 25 messages\n", + "\u001b[92m18:22:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 18:22:23,272 - agent.ComputerAgent - INFO - Computer: click({'x': 833, 'y': 385})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 833, 'y': 385})\n", + " 66%|██████████████████████████--------------| 4838/7340 [176:05<91:03, 27.5 steps/min]2025-08-11 18:22:23,933 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m18:22:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:22:25,259 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+alt+t'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+alt+t'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 66%|██████████████████████████--------------| 4840/7340 [176:07<90:58, 27.5 steps/min]\u001b[92m18:22:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:22:25,941 - agent.ComputerAgent - INFO - Computer: click({'x': 989, 'y': 73})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 989, 'y': 73})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/06215fad-881b-4e96-84a9-854f2d453fc5/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:22:26,601 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m18:22:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 66%|██████████████████████████--------------| 4840/7340 [176:08<90:58, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:22:27,300 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m18:22:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:22:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:22:27,963 - agent.ComputerAgent - INFO - Computer: click({'x': 901, 'y': 312})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 901, 'y': 312})\n", + " 66%|██████████████████████████--------------| 4841/7340 [176:09<90:56, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:22:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26d5566b-d949-4b71-accb-45197078f693/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ad87d89c-437d-4ed4-b0f0-a157e7d11bbd/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:22:29,233 - agent.ComputerAgent - INFO - LLM processing started with 27 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 27 messages\n", + "\u001b[92m18:22:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 Lock).\n", + "2) Select the text box from the Layers/Selection panel (don’t click on the canvas).\n", + "3) Snap it to the left edge:\n", + "- Either set the X position to 0.\n", + "- Or use the Align Left to canvas/page/artboard command.\n", + "4) Unlock/show the image layer again if needed.\n", + "\n", + "Exact steps by app\n", + "- Figma:\n", + " - Lock image: Ctrl+Shift+L (Cmd+Shift+L on Mac) or click the lock icon in Layers.\n", + " - Select the text layer in Layers.\n", + " - In the right panel, set X = 0. Or select the frame and the text, then click Align left. Tip: Cmd/Ctrl+Click cycles through stacked layers; Right‑click > Select layer also helps.\n", + "\n", + "- Photoshop:\n", + " - Lock image layer (padlock in Layers).\n", + " - Select text layer in Layers.\n", + " - Press V for Move tool. Press Ctrl+A (Cmd+A) to Select All, click Align left edges in the top options bar, then Ctrl+D (Cmd+D) to deselect. Alternatively, Window > Properties and set X = 0 px.\n", + "\n", + "- Illustrator:\n", + " - Lock image: Object > Lock > Selection (Ctrl+2 / Cmd+2) or lock in Layers.\n", + " - Select text object.\n", + " - Align panel: set Align To = Artboard, then Horizontal Align Left. Or Transform panel X = 0.\n", + "\n", + "- InDesign:\n", + " - Lock image (Ctrl+L / Cmd+L) or via Layers.\n", + " - Select text frame.\n", + " - Window > Object & Layout > Align. Set Align To = Page, then Align Left. Or set X = 0 in the Control/Properties panel.\n", + "\n", + "- Canva:\n", + " - Select image > Lock (padlock on toolbar).\n", + " - Select text > Position > Left. Nudge with arrow keys if needed; it will snap to the page edge.\n", + "\n", + "- PowerPoint:\n", + " - Home > Select > Selection Pane: click the eye to hide the image (or select the text from the pane).\n", + " - Shape Format > Align > Align to Slide > Align Left.\n", + "\n", + "- Google Slides:\n", + " - Arrange > Order to move im...\n", + "INFO:agent.ComputerAgent:Agent: I can’t see your design app/document open on this computer, so I can’t move the text box directly. Here’s the fastest, foolproof way to get your text box flush left without ever clicking the image again:\n", + "\n", + "Universal quick steps (work in any editor)\n", + "1) Lock or hide the image layer.\n", + "- Use the Layers/Selection panel and click the lock icon on the image layer (or right‑click > Lock).\n", + "2) Select the text box from the Layers/Selection panel (don’t click on the canvas).\n", + "3) Snap it to the left edge:\n", + "- Either set the X position to 0.\n", + "- Or use the Align Left to canvas/page/artboard command.\n", + "4) Unlock/show the image layer again if needed.\n", + "\n", + "Exact steps by app\n", + "- Figma:\n", + " - Lock image: Ctrl+Shift+L (Cmd+Shift+L on Mac) or click the lock icon in Layers.\n", + " - Select the text layer in Layers.\n", + " - In the right panel, set X = 0. Or select the frame and the text, then click Align left. Tip: Cmd/Ctrl+Click cycles through stacked layers; Right‑click > Select layer also helps.\n", + "\n", + "- Photoshop:\n", + " - Lock image layer (padlock in Layers).\n", + " - Select text layer in Layers.\n", + " - Press V for Move tool. Press Ctrl+A (Cmd+A) to Select All, click Align left edges in the top options bar, then Ctrl+D (Cmd+D) to deselect. Alternatively, Window > Properties and set X = 0 px.\n", + "\n", + "- Illustrator:\n", + " - Lock image: Object > Lock > Selection (Ctrl+2 / Cmd+2) or lock in Layers.\n", + " - Select text object.\n", + " - Align panel: set Align To = Artboard, then Horizontal Align Left. Or Transform panel X = 0.\n", + "\n", + "- InDesign:\n", + " - Lock image (Ctrl+L / Cmd+L) or via Layers.\n", + " - Select text frame.\n", + " - Window > Object & Layout > Align. Set Align To = Page, then Align Left. Or set X = 0 in the Control/Properties panel.\n", + "\n", + "- Canva:\n", + " - Select image > Lock (padlock on toolbar).\n", + " - Select text > Position > Left. Nudge with arrow keys if needed; it will snap to the page edge.\n", + "\n", + "- PowerPoint:\n", + " - Home > Select > Selection Pane: click the eye to hide the image (or select the text from the pane).\n", + " - Shape Format > Align > Align to Slide > Align Left.\n", + "\n", + "- Google Slides:\n", + " - Arrange > Order to move im...\n", + "2025-08-11 18:35:23,380 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 3277\n", + " - prompt_tokens: 1951\n", + " - total_tokens: 5228\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 2624\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0352\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 3277\n", + " - prompt_tokens: 1951\n", + " - total_tokens: 5228\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 2624\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0352\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d05e9e78-ad03-41fc-a347-043ec46bd299/invoke \"HTTP/1.1 200 OK\"\n", + " 70%|████████████████████████████------------| 5147/7340 [189:05<80:33, 27.2 steps/min]2025-08-11 18:35:24,059 - agent.ComputerAgent - INFO - LLM processing started with 33 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 33 messages\n", + "\u001b[92m18:35:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 70%|████████████████████████████------------| 5147/7340 [189:06<80:34, 27.2 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:35:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 70%|████████████████████████████------------| 5147/7340 [189:07<80:34, 27.2 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c268b680-eafe-4b8d-914a-28e5540231cd/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:35:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m18:35:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m18:35:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0d923fcd-4666-4869-8ad2-17460c904167/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:35:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 70%|████████████████████████████------------| 5147/7340 [189:08<80:35, 27.2 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:35:27,111 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 76, 'y': 181}, {'x': 124, 'y': 181}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 76, 'y': 181}, {'x': 124, 'y': 181}]})\n", + "\u001b[92m18:35:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:35:27,806 - agent.ComputerAgent - INFO - Computer: click({'x': 290, 'y': 149})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 290, 'y': 149})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 70%|████████████████████████████------------| 5148/7340 [189:09<80:32, 27.2 steps/min]2025-08-11 18:35:28,462 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m18:35:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 70%|████████████████████████████------------| 5150/7340 [189:10<80:26, 27.2 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d05e9e78-ad03-41fc-a347-043ec46bd299/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:35:29,639 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 35 messages\n", + "\u001b[92m18:35:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 70%|████████████████████████████------------| 5150/7340 [189:13<80:28, 27.2 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2350cfc0-8e2e-4302-b87c-df7d35bd6eea/invoke \"HTTP/1.1 200 OK\"\n", + " 70%|████████████████████████████------------| 5151/7340 [189:14<80:25, 27.2 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d05e9e78-ad03-41fc-a347-043ec46bd299/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:35:33,840 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 37 messages\n", + "\u001b[92m18:35:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/89880137-9134-4973-9389-b3535802254c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 70%|████████████████████████████------------| 5151/7340 [189:15<80:25, 27.2 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:35:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 70%|████████████████████████████------------| 5151/7340 [189:16<80:26, 27.2 steps/min]\u001b[92m18:35:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:35:35,720 - agent.ComputerAgent - INFO - Computer: click({'x': 207, 'y': 488})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 207, 'y': 488})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 70%|████████████████████████████------------| 5152/7340 [189:17<80:23, 27.2 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2350cfc0-8e2e-4302-b87c-df7d35bd6eea/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c268b680-eafe-4b8d-914a-28e5540231cd/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:35:38,050 - agent.ComputerAgent - INFO - Agent: I converted the opened ODS on your Desktop to a CSV with the same base name while LibreOffice was running.\n", + "\n", + "Command used:\n", + "mkdir -p /tmp/lo-temp-profile && libreoffice --headless -env:UserInstallation=file:///tmp/lo-temp-profile --convert-to \"csv:Text - txt - csv (StarCalc):44,34,0\" --outdir ~/Desktop ~/Desktop/file_example_ODS_5000.ods\n", + "\n", + "Notes:\n", + "- A temporary LibreOffice profile was used (-env:UserInstallation=…) so the conversion works even with the GUI instance open.\n", + "- The CSV file has been created on your Desktop as file_example_ODS_5000.csv.\n", + "\n", + "Task completed.\n", + "INFO:agent.ComputerAgent:Agent: I converted the opened ODS on your Desktop to a CSV with the same base name while LibreOffice was running.\n", + "\n", + "Command used:\n", + "mkdir -p /tmp/lo-temp-profile && libreoffice --headless -env:UserInstallation=file:///tmp/lo-temp-profile --convert-to \"csv:Text - txt - csv (StarCalc):44,34,0\" --outdir ~/Desktop ~/Desktop/file_example_ODS_5000.ods\n", + "\n", + "Notes:\n", + "- A temporary LibreOffice profile was used (-env:UserInstallation=…) so the conversion works even with the GUI instance open.\n", + "- The CSV file has been created on your Desktop as file_example_ODS_5000.csv.\n", + "\n", + "Task completed.\n", + "2025-08-11 18:35:38,700 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 920\n", + " - prompt_tokens: 10826\n", + " - total_tokens: 11746\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 768\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0227\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 920\n", + " - prompt_tokens: 10826\n", + " - total_tokens: 11746\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 768\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0227\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2350cfc0-8e2e-4302-b87c-df7d35bd6eea/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c268b680-eafe-4b8d-914a-28e5540231cd/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d05e9e78-ad03-41fc-a347-043ec46bd299/invoke \"HTTP/1.1 200 OK\"\n", + " 70%|████████████████████████████------------| 5173/7340 [189:20<79:18, 27.3 steps/min]2025-08-11 18:35:39,393 - agent.ComputerAgent - INFO - LLM processing started with 39 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 39 messages\n", + "\u001b[92m18:35:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/055914cd-07b0-4dcd-9407-c6975b1eccbf/invoke \"HTTP/1.1 200 OK\"\n", + " 70%|████████████████████████████------------| 5173/7340 [189:22<79:19, 27.3 steps/min]2025-08-11 18:35:41,381 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m18:35:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/655a0f34-fb5e-49f8-9a65-531af668d6c6/reset \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:35:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5afdf327-0d8f-4749-8016-19cb1aedf273/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/7a6ead00-3730-4f34-9acb-3c8109ec140a/reset \"HTTP/1.1 200 OK\"\n", + " 70%|████████████████████████████------------| 5173/7340 [189:24<79:20, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 70%|████████████████████████████------------| 5174/7340 [189:25<79:18, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5afdf327-0d8f-4749-8016-19cb1aedf273/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d05e9e78-ad03-41fc-a347-043ec46bd299/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 25%|██▌ | 1/4 [00:01<00:04, 1.67s/it]2025-08-11 18:35:45,241 - agent.ComputerAgent - INFO - LLM processing started with 41 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 41 messages\n", + "\u001b[92m18:35:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5182/7340 [189:26<78:53, 27.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5afdf327-0d8f-4749-8016-19cb1aedf273/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a6ead00-3730-4f34-9acb-3c8109ec140a/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/655a0f34-fb5e-49f8-9a65-531af668d6c6/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 50%|█████ | 2/4 [00:03<00:03, 1.62s/it]27.4 steps/min]2025-08-11 18:35:47,491 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m18:35:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 75%|███████▌ | 3/4 [00:04<00:01, 1.58s/it]27.4 steps/min]2025-08-11 18:35:48,319 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m18:35:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.33s/it]27.4 steps/min]\n", + " 71%|████████████████████████████------------| 5183/7340 [189:31<78:52, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d05e9e78-ad03-41fc-a347-043ec46bd299/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:35:50,250 - agent.ComputerAgent - INFO - LLM processing started with 43 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 43 messages\n", + "\u001b[92m18:35:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5183/7340 [189:32<78:52, 27.3 steps/min]\u001b[92m18:35:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:35:50,943 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 713, 'scroll_x': 0, 'x': 716, 'y': 646})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 713, 'scroll_x': 0, 'x': 716, 'y': 646})\n", + " 71%|████████████████████████████------------| 5184/7340 [189:34<78:50, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 71%|████████████████████████████------------| 5185/7340 [189:35<78:47, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d05e9e78-ad03-41fc-a347-043ec46bd299/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5185/7340 [189:38<78:49, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:35:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 71%|████████████████████████████------------| 5185/7340 [189:39<78:49, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9b006d7b-b853-41ed-8a84-b7eaa5b6e94b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d05e9e78-ad03-41fc-a347-043ec46bd299/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 25%|██▌ | 1/4 [00:01<00:04, 1.66s/it]2025-08-11 18:35:59,836 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+shift+p'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+shift+p'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d05e9e78-ad03-41fc-a347-043ec46bd299/close \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 50%|█████ | 2/4 [00:03<00:03, 1.61s/it]27.3 steps/min]2025-08-11 18:36:01,386 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m18:36:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5185/7340 [189:43<78:51, 27.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 18:36:02,052 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m18:36:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.40s/it]27.3 steps/min]\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5185/7340 [189:46<78:52, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m18:36:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 71%|████████████████████████████------------| 5185/7340 [189:47<78:52, 27.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Loading checkpoint shards: 25%|██▌ | 1/4 [00:01<00:04, 1.64s/it]27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/89880137-9134-4973-9389-b3535802254c/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5186/7340 [189:49<78:50, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/89880137-9134-4973-9389-b3535802254c/close \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5186/7340 [189:51<78:51, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/497d5104-1e6e-44a9-a164-fec745a337b6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.35s/it]27.3 steps/min]\n", + "\u001b[92m18:36:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:36:12,164 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 18:36:12,165 - agent.ComputerAgent - INFO - Computer: click({'x': 16, 'y': 428})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 16, 'y': 428})\n", + " 71%|████████████████████████████------------| 5186/7340 [189:54<78:52, 27.3 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 71%|████████████████████████████------------| 5187/7340 [189:56<78:50, 27.3 steps/min]\u001b[92m18:36:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:36:15,365 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 18:36:15,367 - agent.ComputerAgent - INFO - Computer: double_click({'x': 989, 'y': 650})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 989, 'y': 650})\n", + " 71%|████████████████████████████------------| 5188/7340 [189:59<78:48, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/655a0f34-fb5e-49f8-9a65-531af668d6c6/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:36:18,079 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m18:36:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5188/7340 [190:02<78:49, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a6ead00-3730-4f34-9acb-3c8109ec140a/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:36:21,321 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m18:36:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5188/7340 [190:04<78:50, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:36:24,203 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+c'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+c'})\n", + " 71%|████████████████████████████------------| 5188/7340 [190:05<78:51, 27.3 steps/min]2025-08-11 18:36:25,341 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m18:36:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5188/7340 [190:07<78:51, 27.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5188/7340 [190:10<78:52, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:36:29,780 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'pagedown'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'pagedown'})\n", + " 71%|████████████████████████████------------| 5189/7340 [190:12<78:50, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:36:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 71%|████████████████████████████------------| 5189/7340 [190:13<78:51, 27.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Loading checkpoint shards: 50%|█████ | 2/4 [00:03<00:03, 1.58s/it]27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9b006d7b-b853-41ed-8a84-b7eaa5b6e94b/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5189/7340 [190:17<78:53, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 75%|███████▌ | 3/4 [00:04<00:01, 1.56s/it]\u001b[92m18:36:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.31s/it]27.3 steps/min]\n", + "2025-08-11 18:36:37,871 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m18:36:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:36:39,406 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+shift+p'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+shift+p'})\n", + " 71%|████████████████████████████------------| 5189/7340 [190:21<78:54, 27.3 steps/min]\u001b[92m18:36:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:36:40,047 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 142})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 142})\n", + "\u001b[92m18:36:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:36:40,733 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 629})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 629})\n", + " 71%|████████████████████████████------------| 5189/7340 [190:22<78:54, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0d923fcd-4666-4869-8ad2-17460c904167/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5201/7340 [190:23<78:18, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0d923fcd-4666-4869-8ad2-17460c904167/close \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5201/7340 [190:24<78:18, 27.3 steps/min]INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5201/7340 [190:26<78:19, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a6ead00-3730-4f34-9acb-3c8109ec140a/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:36:46,172 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m18:36:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:36:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/055914cd-07b0-4dcd-9407-c6975b1eccbf/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 71%|████████████████████████████------------| 5201/7340 [190:28<78:20, 27.3 steps/min]2025-08-11 18:36:47,482 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m18:36:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5201/7340 [190:30<78:21, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b3df65c5-9d1c-44fd-b9bb-37f1f0cd64dc/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5201/7340 [190:32<78:21, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5201/7340 [190:33<78:22, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e42da596-e101-4fd3-9dea-8a1d63615dad/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1fef1c7a-93ef-4a63-b067-399dfc4ff08a/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/b3df65c5-9d1c-44fd-b9bb-37f1f0cd64dc/reset \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 75%|███████▌ | 3/4 [00:04<00:01, 1.57s/it]27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/497d5104-1e6e-44a9-a164-fec745a337b6/reset \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.32s/it]\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:36:55,357 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b3df65c5-9d1c-44fd-b9bb-37f1f0cd64dc/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5201/7340 [190:37<78:23, 27.3 steps/min]2025-08-11 18:36:56,039 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m18:36:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:36:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:36:56,709 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 387})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 387})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/497d5104-1e6e-44a9-a164-fec745a337b6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/e42da596-e101-4fd3-9dea-8a1d63615dad/reset \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5202/7340 [190:38<78:21, 27.3 steps/min]2025-08-11 18:36:57,382 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m18:36:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/1fef1c7a-93ef-4a63-b067-399dfc4ff08a/reset \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:36:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 71%|████████████████████████████------------| 5203/7340 [190:39<78:18, 27.3 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e42da596-e101-4fd3-9dea-8a1d63615dad/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:36:58,641 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m18:36:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:36:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:36:59,325 - agent.ComputerAgent - INFO - Computer: click({'x': 368, 'y': 561})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 368, 'y': 561})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5203/7340 [190:41<78:19, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1fef1c7a-93ef-4a63-b067-399dfc4ff08a/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:37:00,501 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m18:37:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5204/7340 [190:42<78:16, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/af5d4b08-d761-4bf8-a2c7-7ce16ed98ab9/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5204/7340 [190:43<78:16, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:37:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a6ead00-3730-4f34-9acb-3c8109ec140a/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:37:03,933 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 18:37:03,935 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'meta'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'meta'})\n", + " 71%|████████████████████████████------------| 5204/7340 [190:45<78:17, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m18:37:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:37:05,291 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m18:37:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/655a0f34-fb5e-49f8-9a65-531af668d6c6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:37:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:37:06,012 - agent.ComputerAgent - INFO - Computer: click({'x': 524, 'y': 503})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 524, 'y': 503})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/055914cd-07b0-4dcd-9407-c6975b1eccbf/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5205/7340 [190:47<78:15, 27.3 steps/min]\u001b[92m18:37:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:37:06,653 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 18:37:06,654 - agent.ComputerAgent - INFO - Computer: click({'x': 13, 'y': 41})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 13, 'y': 41})\n", + "2025-08-11 18:37:07,313 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m18:37:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:37:08,648 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 18:37:08,649 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'win'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'win'})\n", + " 71%|████████████████████████████------------| 5206/7340 [190:50<78:13, 27.3 steps/min]2025-08-11 18:37:09,323 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m18:37:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5208/7340 [190:55<78:09, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9b006d7b-b853-41ed-8a84-b7eaa5b6e94b/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5208/7340 [190:56<78:09, 27.3 steps/min]2025-08-11 18:37:15,613 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m18:37:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b3df65c5-9d1c-44fd-b9bb-37f1f0cd64dc/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e42da596-e101-4fd3-9dea-8a1d63615dad/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1fef1c7a-93ef-4a63-b067-399dfc4ff08a/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:openai._base_client:Retrying request to /chat/completions in 0.403242 seconds\n", + " 71%|████████████████████████████------------| 5208/7340 [190:57<78:10, 27.3 steps/min]2025-08-11 18:37:16,773 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m18:37:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:37:18,458 - agent.ComputerAgent - INFO - Computer: type({'text': 'drive.google.com'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'drive.google.com'})\n", + " 71%|████████████████████████████------------| 5208/7340 [191:00<78:11, 27.3 steps/min]2025-08-11 18:37:19,460 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m18:37:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5209/7340 [191:01<78:08, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:37:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 71%|████████████████████████████------------| 5209/7340 [191:02<78:09, 27.3 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:37:21,836 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m18:37:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5209/7340 [191:03<78:09, 27.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:37:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:37:22,505 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 18:37:22,506 - agent.ComputerAgent - INFO - Computer: click({'x': 1006, 'y': 9})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1006, 'y': 9})\n", + " 71%|████████████████████████████------------| 5209/7340 [191:04<78:10, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:37:24,905 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+v'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+v'})\n", + " 71%|████████████████████████████------------| 5210/7340 [191:06<78:07, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a6ead00-3730-4f34-9acb-3c8109ec140a/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:37:26,064 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m18:37:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 18:37:26,733 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + " 71%|████████████████████████████------------| 5210/7340 [191:08<78:08, 27.3 steps/min]\u001b[92m18:37:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:37:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:37:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m18:37:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/497d5104-1e6e-44a9-a164-fec745a337b6/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5210/7340 [191:10<78:09, 27.3 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m18:37:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:37:29,395 - agent.ComputerAgent - INFO - Computer: click({'x': 16, 'y': 429})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 16, 'y': 429})\n", + "\u001b[92m18:37:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:37:30,066 - agent.ComputerAgent - INFO - Computer: click({'x': 18, 'y': 45})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 18, 'y': 45})\n", + "2025-08-11 18:37:30,744 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m18:37:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5210/7340 [191:12<78:10, 27.2 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:37:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:37:31,387 - agent.ComputerAgent - INFO - Computer: click({'x': 18, 'y': 239})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 18, 'y': 239})\n", + " 71%|████████████████████████████------------| 5212/7340 [191:13<78:04, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:37:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 71%|████████████████████████████------------| 5213/7340 [191:14<78:01, 27.3 steps/min]\u001b[92m18:37:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:37:33,218 - agent.ComputerAgent - INFO - Computer: click({'x': 49, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 49, 'y': 53})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:37:34,578 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c1b31663-de2f-4fd6-a091-28bf62a74f86/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5214/7340 [191:16<77:59, 27.3 steps/min]INFO:openai._base_client:Retrying request to /chat/completions in 0.421017 seconds\n", + " 71%|████████████████████████████------------| 5215/7340 [191:17<77:56, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1fef1c7a-93ef-4a63-b067-399dfc4ff08a/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:37:36,730 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m18:37:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b3df65c5-9d1c-44fd-b9bb-37f1f0cd64dc/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5215/7340 [191:18<77:57, 27.3 steps/min]2025-08-11 18:37:37,733 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m18:37:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e42da596-e101-4fd3-9dea-8a1d63615dad/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5215/7340 [191:19<77:57, 27.3 steps/min]2025-08-11 18:37:38,404 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m18:37:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:37:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:37:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/655a0f34-fb5e-49f8-9a65-531af668d6c6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 71%|████████████████████████████------------| 5215/7340 [191:21<77:58, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a6ead00-3730-4f34-9acb-3c8109ec140a/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:37:40,395 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m18:37:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:37:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:37:41,082 - agent.ComputerAgent - INFO - Computer: click({'x': 605, 'y': 527})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 605, 'y': 527})\n", + "2025-08-11 18:37:41,718 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m18:37:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5215/7340 [191:23<77:59, 27.2 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:37:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:37:42,379 - agent.ComputerAgent - INFO - Computer: click({'x': 525, 'y': 502})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 525, 'y': 502})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:37:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5216/7340 [191:25<77:56, 27.2 steps/min]\u001b[92m18:37:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m18:37:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:37:44,917 - agent.ComputerAgent - INFO - Computer: click({'x': 652, 'y': 139})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 652, 'y': 139})\n", + " 71%|████████████████████████████------------| 5217/7340 [191:26<77:54, 27.3 steps/min]\u001b[92m18:37:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:37:45,530 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': -1169, 'scroll_x': 0, 'x': 526, 'y': 427})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': -1169, 'scroll_x': 0, 'x': 526, 'y': 427})\n", + " 71%|████████████████████████████------------| 5219/7340 [191:29<77:49, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3077c8ef-543a-4fa8-b46c-49b632230eed/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:37:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 71%|████████████████████████████------------| 5219/7340 [191:30<77:49, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9b006d7b-b853-41ed-8a84-b7eaa5b6e94b/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:37:50,126 - agent.ComputerAgent - INFO - Computer: type({'text': 'Thunderbird'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'Thunderbird'})\n", + "\u001b[92m18:37:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/1064657b-b89a-4eeb-8197-1c110af6b752/reset \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 71%|████████████████████████████------------| 5219/7340 [191:31<77:50, 27.2 steps/min]2025-08-11 18:37:50,787 - agent.ComputerAgent - INFO - Computer: click({'x': 749, 'y': 440})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 749, 'y': 440})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/055914cd-07b0-4dcd-9407-c6975b1eccbf/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:37:51,454 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m18:37:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3077c8ef-543a-4fa8-b46c-49b632230eed/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:37:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 71%|████████████████████████████------------| 5220/7340 [191:34<77:48, 27.2 steps/min]\u001b[92m18:37:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:37:53,444 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m18:37:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:37:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:37:54,116 - agent.ComputerAgent - INFO - Computer: click({'x': 1008, 'y': 10})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1008, 'y': 10})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a72854f0-3bb0-4711-a18e-7a467a56390e/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/3077c8ef-543a-4fa8-b46c-49b632230eed/close \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5223/7340 [191:35<77:39, 27.3 steps/min]\u001b[92m18:37:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:37:55,819 - agent.ComputerAgent - INFO - Computer: click({'x': 80, 'y': 430})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 80, 'y': 430})\n", + "2025-08-11 18:37:56,505 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m18:37:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1fef1c7a-93ef-4a63-b067-399dfc4ff08a/invoke \"HTTP/1.1 200 OK\"\n", + " 71%|████████████████████████████------------| 5224/7340 [191:38<77:37, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:37:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:37:57,824 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m18:37:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 71%|████████████████████████████------------| 5225/7340 [191:39<77:34, 27.3 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/e42da596-e101-4fd3-9dea-8a1d63615dad/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/af5d4b08-d761-4bf8-a2c7-7ce16ed98ab9/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 Settings > On startup.\n", + "- Select “Open the New Tab page,” or “Continue where you left off.”\n", + "- If “Open a specific page or set of pages” is selected, remove funbrain.com (click the three dots beside it > Remove).\n", + "- Also check Settings > Appearance > Show home button. If enabled, set it to “New Tab page” and make sure funbrain.com isn’t listed there.\n", + "\n", + "2) Unpin any pinned tab\n", + "- If you see a tiny pin icon on the funbrain tab, right‑click the tab > Unpin. Close it.\n", + "\n", + "3) Remove any extension hijacking your homepage/new tab\n", + "- Go to chrome://extensions\n", + "- Toggle off or Remove any unknown extensions, especially ones referencing “New Tab,” “Home,” “Search,” or anything you don’t recognize.\n", + "- Restart Chrome.\n", + "\n", + "4) Check if Chrome is “Managed by your organization” (policy hijack)\n", + "- Go to chrome://policy\n", + "- If you see policies like HomepageLocation, NewTabPageLocation, RestoreOnStartup, or StartupURLs pointing to funbrain.com (and this is your personal device), remove the policies:\n", + " - Press Windows key, type “regedit”, open Registry Editor.\n", + " - Navigate to:\n", + " - HKEY_CURRENT_USER\\Software\\Policies\\Google\\Chrome\n", + " - HKEY_LOCAL_MACHINE\\SOFTWARE\\Policies\\Google\\Chrome\n", + " - Delete values like HomepageLocation, StartupURLs, RestoreOnStartup, NewTabPageLocation, or delete the Chrome key entirely if it only contains unwanted values.\n", + " - Easier method (PowerShell as Administrator):\n", + " Remove-Item -Path \"HKCU:\\Software\\Policies\\Google\\Chrome\" -Recurse -Force -ErrorAction SilentlyContinue\n", + " Remove-Item -Path \"HKLM:\\SOFTWARE\\Policies\\Google\\Chrome\" -Recurse -Force -ErrorAction SilentlyContinue\n", + " - Reboot and recheck chrome://policy (it should be empty or unrelated).\n", + "\n", + "5) Fix the shortcut that launches Chrome\n", + "- Right‑click the Chrome shortcut you normall...\n", + "INFO:agent.ComputerAgent:Agent: This isn’t a cache issue. Chrome is being told to open that site by a setting, an extension/policy, or a modified shortcut. Do the steps below in order (quickest first). They’re written for Windows on your Surface Pro.\n", + "\n", + "1) Fix Chrome’s startup settings\n", + "- Open Chrome.\n", + "- Menu (three dots) > Settings > On startup.\n", + "- Select “Open the New Tab page,” or “Continue where you left off.”\n", + "- If “Open a specific page or set of pages” is selected, remove funbrain.com (click the three dots beside it > Remove).\n", + "- Also check Settings > Appearance > Show home button. If enabled, set it to “New Tab page” and make sure funbrain.com isn’t listed there.\n", + "\n", + "2) Unpin any pinned tab\n", + "- If you see a tiny pin icon on the funbrain tab, right‑click the tab > Unpin. Close it.\n", + "\n", + "3) Remove any extension hijacking your homepage/new tab\n", + "- Go to chrome://extensions\n", + "- Toggle off or Remove any unknown extensions, especially ones referencing “New Tab,” “Home,” “Search,” or anything you don’t recognize.\n", + "- Restart Chrome.\n", + "\n", + "4) Check if Chrome is “Managed by your organization” (policy hijack)\n", + "- Go to chrome://policy\n", + "- If you see policies like HomepageLocation, NewTabPageLocation, RestoreOnStartup, or StartupURLs pointing to funbrain.com (and this is your personal device), remove the policies:\n", + " - Press Windows key, type “regedit”, open Registry Editor.\n", + " - Navigate to:\n", + " - HKEY_CURRENT_USER\\Software\\Policies\\Google\\Chrome\n", + " - HKEY_LOCAL_MACHINE\\SOFTWARE\\Policies\\Google\\Chrome\n", + " - Delete values like HomepageLocation, StartupURLs, RestoreOnStartup, NewTabPageLocation, or delete the Chrome key entirely if it only contains unwanted values.\n", + " - Easier method (PowerShell as Administrator):\n", + " Remove-Item -Path \"HKCU:\\Software\\Policies\\Google\\Chrome\" -Recurse -Force -ErrorAction SilentlyContinue\n", + " Remove-Item -Path \"HKLM:\\SOFTWARE\\Policies\\Google\\Chrome\" -Recurse -Force -ErrorAction SilentlyContinue\n", + " - Reboot and recheck chrome://policy (it should be empty or unrelated).\n", + "\n", + "5) Fix the shortcut that launches Chrome\n", + "- Right‑click the Chrome shortcut you normall...\n", + "2025-08-11 18:49:11,237 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 2424\n", + " - prompt_tokens: 1953\n", + " - total_tokens: 4377\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 1600\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0267\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 2424\n", + " - prompt_tokens: 1953\n", + " - total_tokens: 4377\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 1600\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0267\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/497d5104-1e6e-44a9-a164-fec745a337b6/invoke \"HTTP/1.1 200 OK\"\n", + " 76%|██████████████████████████████----------| 5568/7340 [202:52<64:34, 27.4 steps/min]2025-08-11 18:49:11,903 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m18:49:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 18:49:12,597 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m18:49:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0ce9d177-2b9a-4fde-a8a5-eb1b59248c8f/invoke \"HTTP/1.1 200 OK\"\n", + " 76%|██████████████████████████████----------| 5568/7340 [202:54<64:34, 27.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:49:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:49:13,963 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m18:49:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 76%|██████████████████████████████----------| 5568/7340 [202:55<64:34, 27.4 steps/min]\u001b[92m18:49:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:49:14,626 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 18, 'y': 237})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 18, 'y': 237})\n", + "2025-08-11 18:49:16,032 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m18:49:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8a9dd85a-f951-495e-aea0-d3864853591e/invoke \"HTTP/1.1 200 OK\"\n", + " 76%|██████████████████████████████----------| 5568/7340 [202:57<64:35, 27.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:49:17,933 - agent.ComputerAgent - INFO - Computer: type({'text': 'ICLR 2013 location'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'ICLR 2013 location'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:49:19,279 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + " 76%|██████████████████████████████----------| 5569/7340 [203:01<64:33, 27.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8a9dd85a-f951-495e-aea0-d3864853591e/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:49:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:49:21,241 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 18:49:21,241 - agent.ComputerAgent - INFO - Agent: Yes. VLC is set by default to reuse the first instance when you open a file (especially from the file manager). Turn that off and you can run multiple, independent players.\n", + "\n", + "Do this:\n", + "- Open VLC.\n", + "- Go to Tools -> Preferences (Ctrl+P).\n", + "- In the Simple settings view on the Interface tab, find Playlist and Instances.\n", + "- Uncheck:\n", + " - Allow only one instance\n", + " - Use only one instance when started from file manager\n", + " - Enqueue items into playlist in one instance (if you see it)\n", + "- Click Save.\n", + "- Quit all VLC windows completely (Ctrl+Q) so the setting takes effect, then start VLC again and open a different file in each window.\n", + "\n", + "Command-line alternative (Windows/macOS/Linux):\n", + "- Start each player with:\n", + " - vlc --no-one-instance --no-one-instance-when-started-from-file-manager file1.mp4\n", + " - vlc --no-one-instance file2.mp4\n", + "\n", + "After this, each VLC window will play its own video simultaneously.\n", + "\n", + "Task completed\n", + "INFO:agent.ComputerAgent:Agent: Yes. VLC is set by default to reuse the first instance when you open a file (especially from the file manager). Turn that off and you can run multiple, independent players.\n", + "\n", + "Do this:\n", + "- Open VLC.\n", + "- Go to Tools -> Preferences (Ctrl+P).\n", + "- In the Simple settings view on the Interface tab, find Playlist and Instances.\n", + "- Uncheck:\n", + " - Allow only one instance\n", + " - Use only one instance when started from file manager\n", + " - Enqueue items into playlist in one instance (if you see it)\n", + "- Click Save.\n", + "- Quit all VLC windows completely (Ctrl+Q) so the setting takes effect, then start VLC again and open a different file in each window.\n", + "\n", + "Command-line alternative (Windows/macOS/Linux):\n", + "- Start each player with:\n", + " - vlc --no-one-instance --no-one-instance-when-started-from-file-manager file1.mp4\n", + " - vlc --no-one-instance file2.mp4\n", + "\n", + "After this, each VLC window will play its own video simultaneously.\n", + "\n", + "Task completed\n", + "2025-08-11 18:49:21,909 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 1307\n", + " - prompt_tokens: 1975\n", + " - total_tokens: 3282\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 1088\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0155\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 1307\n", + " - prompt_tokens: 1975\n", + " - total_tokens: 3282\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 1088\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0155\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/655a0f34-fb5e-49f8-9a65-531af668d6c6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m18:49:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 76%|██████████████████████████████----------| 5590/7340 [203:04<63:34, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/35ee5a0d-d8f9-4419-b253-d394f35fe993/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:49:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m18:49:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/0835b0ab-8369-4209-a85b-955dbb271b54/reset \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:49:23,196 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m18:49:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m18:49:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 18:49:23,915 - agent.ComputerAgent - INFO - Computer: click({'x': 109, 'y': 125})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 109, 'y': 125})\n", + "\u001b[92m18:49:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8a9dd85a-f951-495e-aea0-d3864853591e/close \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/655a0f34-fb5e-49f8-9a65-531af668d6c6/close \"HTTP/1.1 200 OK\"\n", + " 76%|██████████████████████████████----------| 5591/7340 [203:05<63:31, 27.5 steps/min]2025-08-11 18:49:24,590 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 75, 'y': 177}, {'x': 278, 'y': 177}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 75, 'y': 177}, {'x': 278, 'y': 177}]})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/aa800986-7030-4845-b4a1-82119abb97e9/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/84265bb9-b6f6-479e-8a58-920cfa2b7c69/invoke \"HTTP/1.1 200 OK\"\n", + " 76%|██████████████████████████████----------| 5592/7340 [203:07<63:29, 27.5 steps/min]INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + " 76%|██████████████████████████████----------| 5600/7340 [203:08<63:07, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:49:28,277 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+c'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+c'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0835b0ab-8369-4209-a85b-955dbb271b54/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/a72854f0-3bb0-4711-a18e-7a467a56390e/invoke \"HTTP/1.1 200 OK\"\n", + " 76%|██████████████████████████████----------| 5600/7340 [203:10<63:07, 27.6 steps/min]2025-08-11 18:49:28,957 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m18:49:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:49:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 18:49:30,327 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m18:49:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m18:49:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/84265bb9-b6f6-479e-8a58-920cfa2b7c69/close \"HTTP/1.1 200 OK\"\n", + " 76%|██████████████████████████████----------| 5600/7340 [203:12<63:08, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1064657b-b89a-4eeb-8197-1c110af6b752/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/584f1ba5-3dc8-4b11-9242-7100c4e1133e/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/aa800986-7030-4845-b4a1-82119abb97e9/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 18:49:32,369 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m18:49:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 log.txt\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'cd ~/Desktop\\npython3 calculator.py > log.txt\\n'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/10c454d7-987f-4a23-83d6-534bd9ba42c2/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 83%|█████████████████████████████████-------| 6089/7340 [220:38<45:19, 27.6 steps/min]\u001b[92m19:06:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b641dbb7-3e3c-437d-bc11-5e038171855d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/43a383a0-163d-4a8b-8494-0e1d1eab6cd6/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/27fc4825-1617-494a-9308-b128bd8af05a/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:06:57,016 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m19:06:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:06:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/80d19a15-b1ca-43cc-8d1b-1f86242172b5/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 Modify > Font > choose Liberation Sans Narrow > OK.\n", + "- Drawing styles (for manual text boxes):\n", + " - Click the Drawing Styles icon.\n", + " - Right‑click Default > Modify > Font > Liberation Sans Narrow > OK.\n", + "- Remove any direct overrides so objects follow the styles:\n", + " - In the Slides pane, select all slides (click first, Shift‑click last).\n", + " - Press Ctrl+M (Format > Clear Direct Formatting). This makes existing text boxes pick up the new style font.\n", + "\n", + "Method 2: One‑click change with a macro (works even if text has direct formatting)\n", + "- Tools > Macros > Organize Macros > LibreOffice Basic > select your presentation > New.\n", + "- Paste this macro into the new module and Run:\n", + "\n", + "Sub SetAllTextToLiberationNarrow\n", + " Dim oDoc As Object, oSlides As Object, oSlide As Object\n", + " Dim oShape As Object\n", + " oDoc = ThisComponent\n", + " oSlides = oDoc.getDrawPages()\n", + " Dim i As Long, j As Long\n", + " For i = 0 To oSlides.getCount() - 1\n", + " oSlide = oSlides.getByIndex(i)\n", + " For j = 0 To oSlide.getCount() - 1\n", + " oShape = oSlide.getByIndex(j)\n", + " Call SetFontRecursive(oShape)\n", + " Next j\n", + " Next i\n", + "End Sub\n", + "\n", + "Sub SetFontRecursive(oShape As Object)\n", + " If oShape.supportsService(\"com.sun.star.drawing.GroupShape\") Then\n", + " Dim k As Long\n", + " For k = 0 To oShape.getCount() - 1\n", + " Call SetFontRecursive(oShape.getByIndex(k))\n", + " Next k\n", + " ElseIf oShape.supportsService(\"com.sun.star.drawing.Text\") Then\n", + " Dim oText As Object, oCur As Object\n", + " oText = oShape.getText()\n", + " oCur = oText.createTextCursor()\n", + " oCur.CharFontName = \"Liberation Sans Narrow\"\n", + " oCur.CharF...\n", + "INFO:agent.ComputerAgent:Agent: Yes—here are reliable ways to standardize every text box to Liberation Sans Narrow in LibreOffice Impress without clicking each one.\n", + "\n", + "Method 1 (recommended): Change styles so all objects inherit the font\n", + "- Open your presentation.\n", + "- Press F11 to open the Styles deck.\n", + "- Presentation styles (for placeholders):\n", + " - Click the Presentation Styles icon.\n", + " - Right‑click each of Title, Subtitle, Outline 1–10 (or Outline), Notes, etc. > Modify > Font > choose Liberation Sans Narrow > OK.\n", + "- Drawing styles (for manual text boxes):\n", + " - Click the Drawing Styles icon.\n", + " - Right‑click Default > Modify > Font > Liberation Sans Narrow > OK.\n", + "- Remove any direct overrides so objects follow the styles:\n", + " - In the Slides pane, select all slides (click first, Shift‑click last).\n", + " - Press Ctrl+M (Format > Clear Direct Formatting). This makes existing text boxes pick up the new style font.\n", + "\n", + "Method 2: One‑click change with a macro (works even if text has direct formatting)\n", + "- Tools > Macros > Organize Macros > LibreOffice Basic > select your presentation > New.\n", + "- Paste this macro into the new module and Run:\n", + "\n", + "Sub SetAllTextToLiberationNarrow\n", + " Dim oDoc As Object, oSlides As Object, oSlide As Object\n", + " Dim oShape As Object\n", + " oDoc = ThisComponent\n", + " oSlides = oDoc.getDrawPages()\n", + " Dim i As Long, j As Long\n", + " For i = 0 To oSlides.getCount() - 1\n", + " oSlide = oSlides.getByIndex(i)\n", + " For j = 0 To oSlide.getCount() - 1\n", + " oShape = oSlide.getByIndex(j)\n", + " Call SetFontRecursive(oShape)\n", + " Next j\n", + " Next i\n", + "End Sub\n", + "\n", + "Sub SetFontRecursive(oShape As Object)\n", + " If oShape.supportsService(\"com.sun.star.drawing.GroupShape\") Then\n", + " Dim k As Long\n", + " For k = 0 To oShape.getCount() - 1\n", + " Call SetFontRecursive(oShape.getByIndex(k))\n", + " Next k\n", + " ElseIf oShape.supportsService(\"com.sun.star.drawing.Text\") Then\n", + " Dim oText As Object, oCur As Object\n", + " oText = oShape.getText()\n", + " oCur = oText.createTextCursor()\n", + " oCur.CharFontName = \"Liberation Sans Narrow\"\n", + " oCur.CharF...\n", + "2025-08-11 19:26:25,833 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 3294\n", + " - prompt_tokens: 1964\n", + " - total_tokens: 5258\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 2560\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0354\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 3294\n", + " - prompt_tokens: 1964\n", + " - total_tokens: 5258\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 2560\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0354\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a2c2835-d21e-4e04-babb-e8305a4f1f9d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1a178f89-87e5-46d9-a114-22d5fcc5c630/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f055f32-c606-4a58-91bc-c5990d4e8280/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f209659e-2ed9-40fa-ae61-5359fb9ef290/invoke \"HTTP/1.1 200 OK\"\n", + " 90%|████████████████████████████████████----| 6611/7340 [240:07<26:28, 27.5 steps/min]2025-08-11 19:26:26,517 - agent.ComputerAgent - INFO - Computer: double_click({'x': 884, 'y': 123})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 884, 'y': 123})\n", + "2025-08-11 19:26:27,223 - agent.ComputerAgent - INFO - Computer: click({'x': 344, 'y': 34})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 344, 'y': 34})\n", + "2025-08-11 19:26:27,897 - agent.ComputerAgent - INFO - Computer: click({'x': 548, 'y': 249})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 548, 'y': 249})\n", + "2025-08-11 19:26:28,563 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m19:26:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 90%|████████████████████████████████████----| 6613/7340 [240:10<26:24, 27.5 steps/min]2025-08-11 19:26:29,242 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:26:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:26:30,582 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'f1'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'f1'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:26:31,909 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + " 90%|████████████████████████████████████----| 6616/7340 [240:13<26:17, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:26:33,178 - agent.ComputerAgent - INFO - Computer: screenshot({})\n", + "INFO:agent.ComputerAgent:Computer: screenshot({})\n", + " 90%|████████████████████████████████████----| 6618/7340 [240:14<26:12, 27.5 steps/min]2025-08-11 19:26:33,810 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m19:26:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:26:34,481 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m19:26:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/09c933ad-61bf-4498-b248-0df86e3aea78/invoke \"HTTP/1.1 200 OK\"\n", + " 90%|████████████████████████████████████----| 6619/7340 [240:16<26:10, 27.5 steps/min]2025-08-11 19:26:35,113 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m19:26:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:26:35,982 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m19:26:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7f4008ee-6c98-4905-9ade-965ea7842b64/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 90%|████████████████████████████████████----| 6619/7340 [240:17<26:10, 27.5 steps/min]2025-08-11 19:26:37,325 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m19:26:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/18debd9e-6c58-4504-8a04-13cba683a254/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:26:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 90%|████████████████████████████████████----| 6619/7340 [240:19<26:10, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2d8a6e51-acdb-47b9-8ee4-f3085c741fd5/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:26:38,682 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m19:26:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/bcec4523-df7a-48b5-aea1-8d7c632a6dc4/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/51954fb4-34ed-4511-b2fd-a6169b5ea5d3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9a544504-3e48-48b2-8429-0a97e266ebfb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/84a5d283-63f1-43fc-b483-76116d67f385/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c915dbd9-32bc-40a7-9c07-d437c737419f/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:26:39,380 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m19:26:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:26:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/775a5b67-2406-42b8-86e5-243e01b8dc27/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/052ac585-1998-46b2-9ac5-0dc192aeba02/invoke \"HTTP/1.1 200 OK\"\n", + " 90%|████████████████████████████████████----| 6619/7340 [240:21<26:10, 27.5 steps/min]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No screenshot found, taking screenshot\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-11 19:26:40,063 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m19:26:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:26:40,762 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 344, 'y': 137})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 344, 'y': 137})\n", + " 90%|████████████████████████████████████----| 6619/7340 [240:22<26:11, 27.5 steps/min]2025-08-11 19:26:41,401 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:26:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:26:42,073 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m19:26:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:26:42,723 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:26:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 90%|████████████████████████████████████----| 6620/7340 [240:24<26:08, 27.5 steps/min]2025-08-11 19:26:43,402 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:26:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:26:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:26:45,407 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:26:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:26:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 90%|████████████████████████████████████----| 6620/7340 [240:29<26:09, 27.5 steps/min]\u001b[92m19:26:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:26:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:26:48,083 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m19:26:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:26:48,753 - agent.ComputerAgent - INFO - Computer: click({'x': 914, 'y': 660})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 914, 'y': 660})\n", + "\u001b[92m19:26:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 90%|████████████████████████████████████----| 6621/7340 [240:31<26:07, 27.5 steps/min]\u001b[92m19:26:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:26:50,093 - agent.ComputerAgent - INFO - Computer: click({'x': 641, 'y': 498})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 641, 'y': 498})\n", + "\u001b[92m19:26:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:26:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:26:50,755 - agent.ComputerAgent - INFO - Computer: click({'x': 893, 'y': 134})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 893, 'y': 134})\n", + "2025-08-11 19:26:51,455 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 19:26:51,456 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 386, 'y': 250})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 386, 'y': 250})\n", + "\u001b[92m19:26:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 90%|████████████████████████████████████----| 6622/7340 [240:33<26:04, 27.5 steps/min]2025-08-11 19:26:52,153 - agent.ComputerAgent - INFO - Computer: click({'x': 173, 'y': 150})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 173, 'y': 150})\n", + "2025-08-11 19:26:52,821 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m19:26:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 90%|████████████████████████████████████----| 6626/7340 [240:35<25:55, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:26:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 90%|████████████████████████████████████----| 6626/7340 [240:36<25:55, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/77892268-14f2-4dfa-b58c-6a682f258679/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:26:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:26:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 90%|████████████████████████████████████----| 6626/7340 [240:37<25:55, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:26:56,855 - agent.ComputerAgent - INFO - Computer: click({'x': 46, 'y': 528})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 46, 'y': 528})\n", + "\u001b[92m19:26:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:26:57,527 - agent.ComputerAgent - INFO - Computer: click({'x': 731, 'y': 617})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 731, 'y': 617})\n", + "2025-08-11 19:26:58,183 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:26:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 90%|████████████████████████████████████----| 6626/7340 [240:39<25:56, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a2c2835-d21e-4e04-babb-e8305a4f1f9d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f055f32-c606-4a58-91bc-c5990d4e8280/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1a178f89-87e5-46d9-a114-22d5fcc5c630/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:26:58,873 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m19:26:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/73c70c0d-c1a0-401f-83c0-063e983abd6c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/af58ffed-65a3-4c4a-a9fe-5c940230627d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f209659e-2ed9-40fa-ae61-5359fb9ef290/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:26:59,543 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:26:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:27:00,884 - agent.ComputerAgent - INFO - Computer: type({'text': '20'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '20'})\n", + " 90%|████████████████████████████████████----| 6628/7340 [240:42<25:51, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7f4008ee-6c98-4905-9ade-965ea7842b64/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:27:01,553 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m19:27:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:27:02,260 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m19:27:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:27:02,945 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m19:27:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7f4008ee-6c98-4905-9ade-965ea7842b64/close \"HTTP/1.1 200 OK\"\n", + " 91%|████████████████████████████████████----| 6648/7340 [240:44<25:03, 27.6 steps/min]2025-08-11 19:27:04,256 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m19:27:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 91%|████████████████████████████████████----| 6648/7340 [240:46<25:03, 27.6 steps/min]INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/2d8a6e51-acdb-47b9-8ee4-f3085c741fd5/invoke \"HTTP/1.1 200 OK\"\n", + " 91%|████████████████████████████████████----| 6648/7340 [240:47<25:03, 27.6 steps/min]2025-08-11 19:27:05,962 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:27:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 91%|████████████████████████████████████----| 6648/7340 [240:48<25:03, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:27:07,803 - agent.ComputerAgent - INFO - Computer: type({'text': 'Extensions: Install from VSIX'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'Extensions: Install from VSIX'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/775a5b67-2406-42b8-86e5-243e01b8dc27/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:27:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:27:09,824 - agent.ComputerAgent - INFO - Computer: get_current_url({})\n", + "INFO:agent.ComputerAgent:Computer: get_current_url({})\n", + " 91%|████████████████████████████████████----| 6648/7340 [240:51<25:04, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/18debd9e-6c58-4504-8a04-13cba683a254/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 View > Folders > Unified.\n", + "- In the left sidebar you’ll now see a “Unified Folders” section with a single “Inbox” that aggregates all accounts.\n", + "\n", + "Optional: choose exactly which accounts/folders are included\n", + "- In the left sidebar, under Unified Folders, right‑click Inbox > Properties.\n", + "- Click “Select the folders to search,” then check the Inbox for each account you want included. Click Update.\n", + "\n", + "Notes and tips\n", + "- Thunderbird remembers this view; if it ever changes, repeat View > Folders > Unified.\n", + "- If you use POP and want all mail to physically go to one inbox, set a Global Inbox: Account Settings > your POP account > Server Settings > Advanced > Global Inbox (Local Folders).\n", + "- If a folder is missing from the Unified view, right‑click that folder > Subscribe (for IMAP) to ensure it’s subscribed, and confirm it’s checked in the Unified Inbox Properties.\n", + "\n", + "Task completed\n", + "INFO:agent.ComputerAgent:Agent: Here’s how to set up a unified inbox in Thunderbird so all your accounts’ emails show in one place:\n", + "\n", + "- Open Thunderbird.\n", + "- Go to the application menu (three horizontal lines in the top-right) > View > Folders > Unified.\n", + "- In the left sidebar you’ll now see a “Unified Folders” section with a single “Inbox” that aggregates all accounts.\n", + "\n", + "Optional: choose exactly which accounts/folders are included\n", + "- In the left sidebar, under Unified Folders, right‑click Inbox > Properties.\n", + "- Click “Select the folders to search,” then check the Inbox for each account you want included. Click Update.\n", + "\n", + "Notes and tips\n", + "- Thunderbird remembers this view; if it ever changes, repeat View > Folders > Unified.\n", + "- If you use POP and want all mail to physically go to one inbox, set a Global Inbox: Account Settings > your POP account > Server Settings > Advanced > Global Inbox (Local Folders).\n", + "- If a folder is missing from the Unified view, right‑click that folder > Subscribe (for IMAP) to ensure it’s subscribed, and confirm it’s checked in the Unified Inbox Properties.\n", + "\n", + "Task completed\n", + "Loading checkpoint shards: 25%|██▌ | 1/4 [00:01<00:05, 1.72s/it]2025-08-11 19:29:25,906 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 1266\n", + " - prompt_tokens: 12578\n", + " - total_tokens: 13844\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 1024\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 11776\n", + " - response_cost: $0.0151\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 1266\n", + " - prompt_tokens: 12578\n", + " - total_tokens: 13844\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 1024\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 11776\n", + " - response_cost: $0.0151\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:29:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Loading checkpoint shards: 50%|█████ | 2/4 [00:03<00:03, 1.67s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:29:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:29:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 91%|████████████████████████████████████----| 6708/7340 [243:09<22:54, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 75%|███████▌ | 3/4 [00:04<00:01, 1.64s/it]2025-08-11 19:29:29,309 - agent.ComputerAgent - INFO - Computer: type({'text': 'sar -V\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'sar -V\\n'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.38s/it]\n", + "\u001b[92m19:29:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 91%|████████████████████████████████████----| 6709/7340 [243:11<22:52, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:29:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 91%|████████████████████████████████████----| 6710/7340 [243:13<22:50, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:29:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/73c70c0d-c1a0-401f-83c0-063e983abd6c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f055f32-c606-4a58-91bc-c5990d4e8280/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:29:32,337 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 29 messages\n", + "\u001b[92m19:29:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:29:33,023 - agent.ComputerAgent - INFO - Computer: click({'x': 115, 'y': 635})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 115, 'y': 635})\n", + "\u001b[92m19:29:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 91%|████████████████████████████████████----| 6710/7340 [243:14<22:50, 27.6 steps/min]\u001b[92m19:29:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:29:33,677 - agent.ComputerAgent - INFO - Computer: click({'x': 28, 'y': 739})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 28, 'y': 739})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:29:34,335 - agent.ComputerAgent - INFO - Computer: click({'x': 530, 'y': 417})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 530, 'y': 417})\n", + "\u001b[92m19:29:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:29:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:29:34,988 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 19:29:34,989 - agent.ComputerAgent - INFO - Computer: click({'x': 80, 'y': 181})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 80, 'y': 181})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 91%|████████████████████████████████████----| 6711/7340 [243:17<22:48, 27.6 steps/min]\u001b[92m19:29:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:29:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c915dbd9-32bc-40a7-9c07-d437c737419f/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:29:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:29:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/775a5b67-2406-42b8-86e5-243e01b8dc27/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/18debd9e-6c58-4504-8a04-13cba683a254/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a2c2835-d21e-4e04-babb-e8305a4f1f9d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/77892268-14f2-4dfa-b58c-6a682f258679/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:29:36,366 - agent.ComputerAgent - INFO - Computer: click({'x': 186, 'y': 148})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 186, 'y': 148})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:29:36,994 - agent.ComputerAgent - INFO - Computer: click({'x': 85, 'y': 234})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 85, 'y': 234})\n", + "\u001b[92m19:29:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:29:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 91%|████████████████████████████████████----| 6714/7340 [243:18<22:41, 27.6 steps/min]2025-08-11 19:29:37,669 - agent.ComputerAgent - INFO - Computer: click({'x': 483, 'y': 267})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 483, 'y': 267})\n", + "2025-08-11 19:29:38,353 - agent.ComputerAgent - INFO - Computer: click({'x': 974, 'y': 34})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 974, 'y': 34})\n", + "2025-08-11 19:29:38,999 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m19:29:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:29:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + " 92%|████████████████████████████████████----| 6717/7340 [243:20<22:34, 27.6 steps/min]2025-08-11 19:29:39,730 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 914, 'y': 671}, {'x': 984, 'y': 467}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 914, 'y': 671}, {'x': 984, 'y': 467}]})\n", + "2025-08-11 19:29:40,390 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:29:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 92%|████████████████████████████████████----| 6719/7340 [243:22<22:29, 27.6 steps/min]2025-08-11 19:29:41,077 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m19:29:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:29:42,161 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m19:29:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/73c70c0d-c1a0-401f-83c0-063e983abd6c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f055f32-c606-4a58-91bc-c5990d4e8280/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/18debd9e-6c58-4504-8a04-13cba683a254/close \"HTTP/1.1 200 OK\"\n", + " 92%|████████████████████████████████████----| 6720/7340 [243:23<22:27, 27.6 steps/min]2025-08-11 19:29:42,846 - agent.ComputerAgent - INFO - LLM processing started with 31 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 31 messages\n", + "\u001b[92m19:29:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 92%|████████████████████████████████████----| 6725/7340 [243:26<22:15, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:29:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0180c5d2-a012-4261-b093-ed34f443f269/invoke \"HTTP/1.1 200 OK\"\n", + " 92%|████████████████████████████████████----| 6725/7340 [243:27<22:15, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/052ac585-1998-46b2-9ac5-0dc192aeba02/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f055f32-c606-4a58-91bc-c5990d4e8280/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/963f0b0a-47d1-479c-9077-6c59023108fe/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/84a5d283-63f1-43fc-b483-76116d67f385/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1a178f89-87e5-46d9-a114-22d5fcc5c630/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f209659e-2ed9-40fa-ae61-5359fb9ef290/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/bcec4523-df7a-48b5-aea1-8d7c632a6dc4/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9a544504-3e48-48b2-8429-0a97e266ebfb/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/af58ffed-65a3-4c4a-a9fe-5c940230627d/invoke \"HTTP/1.1 200 OK\"\n", + " 92%|████████████████████████████████████----| 6725/7340 [243:28<22:15, 27.6 steps/min]2025-08-11 19:29:48,280 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m19:29:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 System_Resources_Report.txt\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'sar -u 1 30 > System_Resources_Report.txt\\n'})\n", + " 92%|████████████████████████████████████----| 6784/7340 [245:45<20:08, 27.6 steps/min]2025-08-11 19:32:03,959 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m19:32:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:32:04,639 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:32:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c915dbd9-32bc-40a7-9c07-d437c737419f/invoke \"HTTP/1.1 200 OK\"\n", + " 92%|████████████████████████████████████----| 6785/7340 [245:46<20:06, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7633715b-dde0-4c56-a4b6-22ccee78f5f5/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/963f0b0a-47d1-479c-9077-6c59023108fe/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:32:05,820 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m19:32:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/6f3b006b-141d-439d-b6cb-eed7bd6483c3/reset \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7f112db6-0b60-4e6c-86f5-0d87dc91f371/invoke \"HTTP/1.1 200 OK\"\n", + " 92%|████████████████████████████████████----| 6785/7340 [245:47<20:06, 27.6 steps/min]2025-08-11 19:32:06,520 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m19:32:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 92%|████████████████████████████████████----| 6785/7340 [245:48<20:06, 27.6 steps/min]2025-08-11 19:32:07,704 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:32:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:32:08,380 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m19:32:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 92%|████████████████████████████████████----| 6785/7340 [245:50<20:06, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/51954fb4-34ed-4511-b2fd-a6169b5ea5d3/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:32:10,389 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:32:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/775a5b67-2406-42b8-86e5-243e01b8dc27/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0c64a3b4-e9b0-46c1-a580-cdcf62b74e44/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 92%|████████████████████████████████████----| 6785/7340 [245:53<20:06, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:32:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f3b006b-141d-439d-b6cb-eed7bd6483c3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:12,779 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m19:32:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:32:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 92%|████████████████████████████████████----| 6785/7340 [245:54<20:06, 27.6 steps/min]2025-08-11 19:32:13,458 - agent.ComputerAgent - INFO - Computer: click({'x': 461, 'y': 169})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 461, 'y': 169})\n", + "\u001b[92m19:32:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:32:14,530 - agent.ComputerAgent - INFO - Computer: click({'x': 125, 'y': 182})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 125, 'y': 182})\n", + " 92%|████████████████████████████████████----| 6785/7340 [245:56<20:07, 27.6 steps/min]2025-08-11 19:32:15,198 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m19:32:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:32:16,538 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'win'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'win'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 92%|████████████████████████████████████----| 6787/7340 [245:58<20:02, 27.6 steps/min]\u001b[92m19:32:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:19,209 - agent.ComputerAgent - INFO - Computer: type({'text': 'Dublin'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'Dublin'})\n", + " 92%|████████████████████████████████████----| 6788/7340 [246:00<20:00, 27.6 steps/min]\u001b[92m19:32:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:32:19,877 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:32:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:32:20,569 - agent.ComputerAgent - INFO - Computer: click({'x': 90, 'y': 183})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 90, 'y': 183})\n", + "\u001b[92m19:32:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:32:21,243 - agent.ComputerAgent - INFO - Computer: click({'x': 430, 'y': 219})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 430, 'y': 219})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 92%|████████████████████████████████████----| 6789/7340 [246:04<19:58, 27.6 steps/min]\u001b[92m19:32:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:32:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:32:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:23,644 - agent.ComputerAgent - INFO - Computer: click({'x': 188, 'y': 190})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 188, 'y': 190})\n", + " 93%|█████████████████████████████████████---| 6791/7340 [246:05<19:53, 27.6 steps/min]\u001b[92m19:32:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:32:24,280 - agent.ComputerAgent - INFO - Computer: click({'x': 123, 'y': 178})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 123, 'y': 178})\n", + " 93%|█████████████████████████████████████---| 6793/7340 [246:07<19:49, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 93%|█████████████████████████████████████---| 6793/7340 [246:08<19:49, 27.6 steps/min]\u001b[92m19:32:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:32:27,608 - agent.ComputerAgent - INFO - Computer: click({'x': 18, 'y': 476})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 18, 'y': 476})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b928bd01-f1b7-4f34-accf-acb6aec5d8cd/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a2c2835-d21e-4e04-babb-e8305a4f1f9d/invoke \"HTTP/1.1 200 OK\"\n", + " 93%|█████████████████████████████████████---| 6793/7340 [246:09<19:49, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0180c5d2-a012-4261-b093-ed34f443f269/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:32:28,295 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m19:32:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7633715b-dde0-4c56-a4b6-22ccee78f5f5/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:32:28,953 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:32:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/acf3037a-4b6c-4ea8-b81c-ffc2e76132e1/reset \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1a178f89-87e5-46d9-a114-22d5fcc5c630/invoke \"HTTP/1.1 200 OK\"\n", + " 93%|█████████████████████████████████████---| 6794/7340 [246:10<19:47, 27.6 steps/min]2025-08-11 19:32:29,623 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m19:32:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/963f0b0a-47d1-479c-9077-6c59023108fe/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f209659e-2ed9-40fa-ae61-5359fb9ef290/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7f112db6-0b60-4e6c-86f5-0d87dc91f371/invoke \"HTTP/1.1 200 OK\"\n", + " 93%|█████████████████████████████████████---| 6794/7340 [246:12<19:47, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:30,976 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m19:32:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:32:31,628 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m19:32:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:32:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6794/7340 [246:13<19:47, 27.6 steps/min]2025-08-11 19:32:32,293 - agent.ComputerAgent - INFO - Computer: click({'x': 534, 'y': 554})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 534, 'y': 554})\n", + "2025-08-11 19:32:32,937 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m19:32:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 93%|█████████████████████████████████████---| 6794/7340 [246:14<19:47, 27.6 steps/min]2025-08-11 19:32:33,607 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:32:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:32:34,249 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:32:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 93%|█████████████████████████████████████---| 6795/7340 [246:17<19:45, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0c64a3b4-e9b0-46c1-a580-cdcf62b74e44/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:32:36,916 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m19:32:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + " 93%|█████████████████████████████████████---| 6795/7340 [246:18<19:45, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/acf3037a-4b6c-4ea8-b81c-ffc2e76132e1/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 93%|█████████████████████████████████████---| 6795/7340 [246:19<19:45, 27.6 steps/min]2025-08-11 19:32:38,805 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m19:32:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/51c56274-d8ae-4edf-8ff1-b637cd2fff66/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:32:39,495 - agent.ComputerAgent - INFO - Computer: click({'x': 173, 'y': 149})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 173, 'y': 149})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/af58ffed-65a3-4c4a-a9fe-5c940230627d/invoke \"HTTP/1.1 200 OK\"\n", + " 93%|█████████████████████████████████████---| 6795/7340 [246:21<19:45, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:32:40,803 - agent.ComputerAgent - INFO - Computer: type({'text': 'wc -l System_Resources_Report.txt\\n'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'wc -l System_Resources_Report.txt\\n'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6796/7340 [246:23<19:43, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:42,099 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m19:32:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 93%|█████████████████████████████████████---| 6797/7340 [246:24<19:41, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:32:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:44,597 - agent.ComputerAgent - INFO - Computer: double_click({'x': 482, 'y': 277})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 482, 'y': 277})\n", + " 93%|█████████████████████████████████████---| 6797/7340 [246:26<19:41, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:32:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:32:46,644 - agent.ComputerAgent - INFO - Computer: type({'text': '=SUM(Sheet1.B2:B11)'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '=SUM(Sheet1.B2:B11)'})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:47,299 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 19:32:47,300 - agent.ComputerAgent - INFO - Computer: click({'x': 578, 'y': 286})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 578, 'y': 286})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6798/7340 [246:29<19:39, 27.6 steps/min]\u001b[92m19:32:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:32:48,654 - agent.ComputerAgent - INFO - Computer: click({'x': 316, 'y': 416})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 316, 'y': 416})\n", + "\u001b[92m19:32:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/775a5b67-2406-42b8-86e5-243e01b8dc27/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c915dbd9-32bc-40a7-9c07-d437c737419f/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:49,313 - agent.ComputerAgent - INFO - Computer: click({'x': 306, 'y': 416})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 306, 'y': 416})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:32:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6800/7340 [246:31<19:34, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:50,681 - agent.ComputerAgent - INFO - Computer: click({'x': 237, 'y': 254})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 237, 'y': 254})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:32:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6802/7340 [246:33<19:30, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:52,640 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 19:32:52,640 - agent.ComputerAgent - INFO - Computer: click({'x': 14, 'y': 524})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 14, 'y': 524})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:54,035 - agent.ComputerAgent - INFO - Computer: type({'text': '=A2/1000000'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '=A2/1000000'})\n", + "\u001b[92m19:32:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:32:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6803/7340 [246:35<19:27, 27.6 steps/min]2025-08-11 19:32:54,689 - agent.ComputerAgent - INFO - Computer: click({'x': 19, 'y': 481})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 19, 'y': 481})\n", + "2025-08-11 19:32:55,406 - agent.ComputerAgent - INFO - Computer: click({'x': 237, 'y': 193})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 237, 'y': 193})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:32:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6805/7340 [246:37<19:23, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:32:56,702 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:32:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:32:57,376 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:32:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:32:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f3b006b-141d-439d-b6cb-eed7bd6483c3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/963f0b0a-47d1-479c-9077-6c59023108fe/invoke \"HTTP/1.1 200 OK\"\n", + " 93%|█████████████████████████████████████---| 6807/7340 [246:39<19:18, 27.6 steps/min]2025-08-11 19:32:58,080 - agent.ComputerAgent - INFO - Computer: click({'x': 343, 'y': 183})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 343, 'y': 183})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b928bd01-f1b7-4f34-accf-acb6aec5d8cd/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:32:58,735 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m19:32:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:32:59,441 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m19:32:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 93%|█████████████████████████████████████---| 6807/7340 [246:41<19:18, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:33:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:33:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7633715b-dde0-4c56-a4b6-22ccee78f5f5/invoke \"HTTP/1.1 200 OK\"\n", + " 93%|█████████████████████████████████████---| 6808/7340 [246:42<19:16, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:33:01,450 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m19:33:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7f112db6-0b60-4e6c-86f5-0d87dc91f371/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a2c2835-d21e-4e04-babb-e8305a4f1f9d/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:33:02,154 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m19:33:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:33:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0c64a3b4-e9b0-46c1-a580-cdcf62b74e44/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6808/7340 [246:43<19:16, 27.6 steps/min]2025-08-11 19:33:02,855 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:33:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:33:03,530 - agent.ComputerAgent - INFO - Computer: click({'x': 633, 'y': 473})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 633, 'y': 473})\n", + "\u001b[92m19:33:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/acf3037a-4b6c-4ea8-b81c-ffc2e76132e1/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/51954fb4-34ed-4511-b2fd-a6169b5ea5d3/invoke \"HTTP/1.1 200 OK\"\n", + " 93%|█████████████████████████████████████---| 6808/7340 [246:45<19:16, 27.6 steps/min]\u001b[92m19:33:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:33:04,220 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m19:33:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/f209659e-2ed9-40fa-ae61-5359fb9ef290/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:33:04,896 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m19:33:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:33:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6809/7340 [246:46<19:14, 27.6 steps/min]2025-08-11 19:33:05,586 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 424, 'y': 418}, {'x': 527, 'y': 226}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 424, 'y': 418}, {'x': 527, 'y': 226}]})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:33:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0180c5d2-a012-4261-b093-ed34f443f269/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6809/7340 [246:48<19:14, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:33:06,917 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m19:33:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:33:07,597 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m19:33:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:33:07 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6810/7340 [246:49<19:12, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:33:08,272 - agent.ComputerAgent - INFO - Computer: click({'x': 946, 'y': 750})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 946, 'y': 750})\n", + "2025-08-11 19:33:08,957 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:33:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 93%|█████████████████████████████████████---| 6811/7340 [246:51<19:10, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/51954fb4-34ed-4511-b2fd-a6169b5ea5d3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/af58ffed-65a3-4c4a-a9fe-5c940230627d/invoke \"HTTP/1.1 200 OK\"\n", + " 93%|█████████████████████████████████████---| 6812/7340 [246:52<19:08, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/51954fb4-34ed-4511-b2fd-a6169b5ea5d3/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:33:13,491 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + " 93%|█████████████████████████████████████---| 6812/7340 [246:55<19:08, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:33:14,808 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:33:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:33:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b928bd01-f1b7-4f34-accf-acb6aec5d8cd/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1a178f89-87e5-46d9-a114-22d5fcc5c630/invoke \"HTTP/1.1 200 OK\"\n", + " 93%|█████████████████████████████████████---| 6813/7340 [246:57<19:06, 27.6 steps/min]2025-08-11 19:33:16,837 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:33:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:33:17,526 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m19:33:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 93%|█████████████████████████████████████---| 6814/7340 [246:59<19:03, 27.6 steps/min]2025-08-11 19:33:18,187 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:33:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 93%|█████████████████████████████████████---| 6814/7340 [247:00<19:04, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:33:20,027 - agent.ComputerAgent - INFO - Computer: click({'x': 89, 'y': 185})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 89, 'y': 185})\n", + " 93%|█████████████████████████████████████---| 6814/7340 [247:01<19:04, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:33:21,352 - agent.ComputerAgent - INFO - Computer: type({'text': 'find / -type f -name \"secret.docx\" 2>/dev/null'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'find / -type f -name \"secret.docx\" 2>/dev/null'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:33:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 93%|█████████████████████████████████████---| 6815/7340 [247:03<19:01, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7a2c2835-d21e-4e04-babb-e8305a4f1f9d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/963f0b0a-47d1-479c-9077-6c59023108fe/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:33:22,639 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:33:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:33:23,336 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m19:33:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 93%|█████████████████████████████████████---| 6816/7340 [247:05<18:59, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00/dev/null | head -n1); printf \"%s\\\\n\" \"$path\"; echo -n \"$path\" | xclip -selection clipboard 2>/dev/null || echo -n \"$path\" | wl-copy 2>/dev/null || true'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'path=$(find / -type f -name \"secret.docx\" 2>/dev/null | head -n1); printf \"%s\\\\n\" \"$path\"; echo -n \"$path\" | xclip -selection clipboard 2>/dev/null || echo -n \"$path\" | wl-copy 2>/dev/null || true'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 50%|█████ | 2/4 [00:03<00:03, 1.76s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:37:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Loading checkpoint shards: 75%|███████▌ | 3/4 [00:05<00:01, 1.72s/it]2025-08-11 19:37:50,401 - agent.ComputerAgent - INFO - Agent: The video has been converted to an MP3 and saved on the Desktop as “Baby Justin Bieber.mp3.” You can now play it anytime.\n", + "\n", + "Task completed\n", + "INFO:agent.ComputerAgent:Agent: The video has been converted to an MP3 and saved on the Desktop as “Baby Justin Bieber.mp3.” You can now play it anytime.\n", + "\n", + "Task completed\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.45s/it]\n", + "2025-08-11 19:37:51,060 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 488\n", + " - prompt_tokens: 12887\n", + " - total_tokens: 13375\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 448\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0210\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 488\n", + " - prompt_tokens: 12887\n", + " - total_tokens: 13375\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 448\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0210\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:37:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:37:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|█████████████████████████████████████---| 6944/7340 [251:34<14:20, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:37:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:37:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:37:53,761 - agent.ComputerAgent - INFO - Computer: click({'x': 515, 'y': 232})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 515, 'y': 232})\n", + "\u001b[92m19:37:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|█████████████████████████████████████---| 6945/7340 [251:35<14:18, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:37:54,423 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 640, 'scroll_x': 0, 'x': 989, 'y': 366})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 640, 'scroll_x': 0, 'x': 989, 'y': 366})\n", + "\u001b[92m19:37:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:37:55,120 - agent.ComputerAgent - INFO - Computer: double_click({'x': 94, 'y': 453})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 94, 'y': 453})\n", + "\u001b[92m19:37:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|█████████████████████████████████████---| 6946/7340 [251:36<14:16, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:37:55,799 - agent.ComputerAgent - INFO - Computer: click({'x': 153, 'y': 52})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 153, 'y': 52})\n", + "\u001b[92m19:37:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:37:56,468 - agent.ComputerAgent - INFO - Computer: double_click({'x': 354, 'y': 136})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 354, 'y': 136})\n", + "\u001b[92m19:37:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:37:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|█████████████████████████████████████---| 6948/7340 [251:38<14:11, 27.6 steps/min]2025-08-11 19:37:57,837 - agent.ComputerAgent - INFO - Computer: click({'x': 471, 'y': 205})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 471, 'y': 205})\n", + "\u001b[92m19:37:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/af58ffed-65a3-4c4a-a9fe-5c940230627d/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:37:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:37:58,540 - agent.ComputerAgent - INFO - Computer: click({'x': 945, 'y': 500})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 945, 'y': 500})\n", + "2025-08-11 19:37:59,244 - agent.ComputerAgent - INFO - Computer: double_click({'x': 989, 'y': 713})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 989, 'y': 713})\n", + " 95%|█████████████████████████████████████---| 6950/7340 [251:40<14:07, 27.6 steps/min]\u001b[92m19:37:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:37:59,918 - agent.ComputerAgent - INFO - Computer: click({'x': 913, 'y': 184})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 913, 'y': 184})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0180c5d2-a012-4261-b093-ed34f443f269/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:38:00,585 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:38:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 95%|█████████████████████████████████████---| 6953/7340 [251:42<14:00, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/acf3037a-4b6c-4ea8-b81c-ffc2e76132e1/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|█████████████████████████████████████---| 6954/7340 [251:43<13:58, 27.6 steps/min]2025-08-11 19:38:02,285 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m19:38:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 95%|█████████████████████████████████████---| 6954/7340 [251:44<13:58, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7f112db6-0b60-4e6c-86f5-0d87dc91f371/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|█████████████████████████████████████---| 6954/7340 [251:45<13:58, 27.6 steps/min]2025-08-11 19:38:04,439 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:38:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/51c56274-d8ae-4edf-8ff1-b637cd2fff66/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|█████████████████████████████████████---| 6954/7340 [251:46<13:58, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8e75deb1-3c97-408b-8c7d-f4681b322141/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:38:05,613 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m19:38:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c915dbd9-32bc-40a7-9c07-d437c737419f/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:38:07,021 - agent.ComputerAgent - INFO - Agent: I configured your system to lock automatically when you’re away.\n", + "\n", + "What I set\n", + "- Enabled Automatic Screen Lock.\n", + "- Set Blank Screen Delay to 5 minutes, so after 5 minutes of inactivity the screen turns off and locks immediately.\n", + "- Kept “Lock screen on Suspend” enabled, so any suspend will also require a password on wake.\n", + "\n", + "How to adjust later\n", + "- Settings > Privacy > Screen:\n", + " - Change “Blank Screen Delay” to your preferred timeout (e.g., 1–2 minutes for tighter security).\n", + " - “Automatic Screen Lock Delay” is set to lock as soon as the screen turns off.\n", + "\n", + "Tip\n", + "- Press Super+L anytime to lock instantly.\n", + "\n", + "Task completed.\n", + "INFO:agent.ComputerAgent:Agent: I configured your system to lock automatically when you’re away.\n", + "\n", + "What I set\n", + "- Enabled Automatic Screen Lock.\n", + "- Set Blank Screen Delay to 5 minutes, so after 5 minutes of inactivity the screen turns off and locks immediately.\n", + "- Kept “Lock screen on Suspend” enabled, so any suspend will also require a password on wake.\n", + "\n", + "How to adjust later\n", + "- Settings > Privacy > Screen:\n", + " - Change “Blank Screen Delay” to your preferred timeout (e.g., 1–2 minutes for tighter security).\n", + " - “Automatic Screen Lock Delay” is set to lock as soon as the screen turns off.\n", + "\n", + "Tip\n", + "- Press Super+L anytime to lock instantly.\n", + "\n", + "Task completed.\n", + "2025-08-11 19:38:07,641 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 790\n", + " - prompt_tokens: 10768\n", + " - total_tokens: 11558\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 640\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0214\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 790\n", + " - prompt_tokens: 10768\n", + " - total_tokens: 11558\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 640\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 0\n", + " - response_cost: $0.0214\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5e73167c-1836-4752-b7e8-57434e5d7875/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ba3f752c-7268-49e8-819c-5b471e52bd54/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0c64a3b4-e9b0-46c1-a580-cdcf62b74e44/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/25f45afe-ee57-4629-9991-c515438accab/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/68f513cf-ec21-4216-bab9-84c5bfcfce88/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|█████████████████████████████████████---| 6955/7340 [251:49<13:56, 27.6 steps/min]2025-08-11 19:38:08,315 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m19:38:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:38:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:38:09,672 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m19:38:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 95%|█████████████████████████████████████---| 6955/7340 [251:52<13:56, 27.6 steps/min]\u001b[92m19:38:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:38:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:38:11,032 - agent.ComputerAgent - INFO - Computer: click({'x': 375, 'y': 75})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 375, 'y': 75})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/af58ffed-65a3-4c4a-a9fe-5c940230627d/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:38:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:38:11,731 - agent.ComputerAgent - INFO - Computer: click({'x': 210, 'y': 202})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 210, 'y': 202})\n", + " 95%|█████████████████████████████████████---| 6958/7340 [251:53<13:49, 27.6 steps/min]2025-08-11 19:38:12,415 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:38:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7633715b-dde0-4c56-a4b6-22ccee78f5f5/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:38:13,115 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m19:38:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 95%|█████████████████████████████████████---| 6960/7340 [251:54<13:45, 27.6 steps/min]2025-08-11 19:38:13,805 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m19:38:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/af58ffed-65a3-4c4a-a9fe-5c940230627d/close \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:38:14,486 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m19:38:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:38:15,845 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m19:38:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 95%|█████████████████████████████████████---| 6960/7340 [251:57<13:45, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:38:17,724 - agent.ComputerAgent - INFO - Computer: type({'text': 'Vienna'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'Vienna'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7633715b-dde0-4c56-a4b6-22ccee78f5f5/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:38:19,084 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ENTER'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ENTER'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/40c5f987-3d81-47fe-8798-4e45d9755f93/reset \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f3b006b-141d-439d-b6cb-eed7bd6483c3/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|█████████████████████████████████████---| 6960/7340 [252:00<13:45, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:38:20,825 - agent.ComputerAgent - INFO - Computer: click({'x': 342, 'y': 184})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 342, 'y': 184})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/31367309-0055-409a-a992-edf729fb010c/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|█████████████████████████████████████---| 6969/7340 [252:02<13:25, 27.7 steps/min]2025-08-11 19:38:21,487 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m19:38:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:38:22,165 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m19:38:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:38:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|█████████████████████████████████████---| 6970/7340 [252:05<13:22, 27.6 steps/min]\u001b[92m19:38:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:38:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00/dev/null || echo 'Requesting sudo password if needed'\"})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': \"sudo -n true 2>/dev/null || echo 'Requesting sudo password if needed'\"})\n", + " 95%|██████████████████████████████████████--| 6973/7340 [252:29<13:17, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:38:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:38:49,132 - agent.ComputerAgent - INFO - Computer: click({'x': 178, 'y': 305})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 178, 'y': 305})\n", + " 95%|██████████████████████████████████████--| 6974/7340 [252:30<13:15, 27.6 steps/min]\u001b[92m19:38:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:38:49,800 - agent.ComputerAgent - INFO - Computer: click({'x': 932, 'y': 296})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 932, 'y': 296})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:38:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|██████████████████████████████████████--| 6975/7340 [252:32<13:12, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + " 95%|██████████████████████████████████████--| 6976/7340 [252:33<13:10, 27.6 steps/min]\u001b[92m19:38:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:38:52,154 - agent.ComputerAgent - INFO - Computer: click({'x': 88, 'y': 313})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 88, 'y': 313})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/51c56274-d8ae-4edf-8ff1-b637cd2fff66/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:38:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/fa081188-4859-4858-9d33-0f9675111182/reset \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 95%|██████████████████████████████████████--| 6976/7340 [252:35<13:10, 27.6 steps/min]\u001b[92m19:38:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1a178f89-87e5-46d9-a114-22d5fcc5c630/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:38:54,176 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m19:38:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:38:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:38:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8e75deb1-3c97-408b-8c7d-f4681b322141/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/68f513cf-ec21-4216-bab9-84c5bfcfce88/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/25f45afe-ee57-4629-9991-c515438accab/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|██████████████████████████████████████--| 6977/7340 [252:36<13:08, 27.6 steps/min]2025-08-11 19:38:55,531 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 19:38:55,532 - agent.ComputerAgent - INFO - Computer: double_click({'x': 379, 'y': 105})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 379, 'y': 105})\n", + "2025-08-11 19:38:56,196 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m19:38:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 95%|██████████████████████████████████████--| 6977/7340 [252:37<13:08, 27.6 steps/min]2025-08-11 19:38:56,856 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m19:38:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:38:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:38:57,552 - agent.ComputerAgent - INFO - Computer: click({'x': 351, 'y': 75})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 351, 'y': 75})\n", + " 95%|██████████████████████████████████████--| 6978/7340 [252:39<13:06, 27.6 steps/min]2025-08-11 19:38:58,981 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m19:38:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|██████████████████████████████████████--| 6979/7340 [252:40<13:04, 27.6 steps/min]2025-08-11 19:38:59,669 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m19:38:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:38:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:38:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|██████████████████████████████████████--| 6979/7340 [252:42<13:04, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/40c5f987-3d81-47fe-8798-4e45d9755f93/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:39:02,357 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m19:39:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:39:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:39:03,722 - agent.ComputerAgent - INFO - Computer: wait({})\n", + "INFO:agent.ComputerAgent:Computer: wait({})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ba3f752c-7268-49e8-819c-5b471e52bd54/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|██████████████████████████████████████--| 6979/7340 [252:45<13:04, 27.6 steps/min]2025-08-11 19:39:04,403 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 640, 'scroll_x': 0, 'x': 993, 'y': 732})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 640, 'scroll_x': 0, 'x': 993, 'y': 732})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f3b006b-141d-439d-b6cb-eed7bd6483c3/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:39:05,068 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m19:39:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:39:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|██████████████████████████████████████--| 6980/7340 [252:46<13:02, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:39:06,128 - agent.ComputerAgent - INFO - Computer: click({'x': 316, 'y': 183})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 316, 'y': 183})\n", + "2025-08-11 19:39:06,806 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m19:39:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:39:08,174 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ENTER'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ENTER'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 95%|██████████████████████████████████████--| 6981/7340 [252:50<13:00, 27.6 steps/min]\u001b[92m19:39:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:39:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:39:10,148 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 19:39:10,149 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'meta'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'meta'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:39:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:39:11,496 - agent.ComputerAgent - INFO - Computer: click({'x': 569, 'y': 372})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 569, 'y': 372})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:39:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:39:12,817 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + " 95%|██████████████████████████████████████--| 6983/7340 [252:54<12:55, 27.6 steps/min]2025-08-11 19:39:13,521 - agent.ComputerAgent - INFO - Computer: click({'x': 81, 'y': 148})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 81, 'y': 148})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:39:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:39:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/25f45afe-ee57-4629-9991-c515438accab/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:39:14,827 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 210, 'y': 177}, {'x': 210, 'y': 457}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 210, 'y': 177}, {'x': 210, 'y': 457}]})\n", + "2025-08-11 19:39:15,498 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m19:39:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:39:16,177 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + " 95%|██████████████████████████████████████--| 6985/7340 [252:57<12:51, 27.6 steps/min]\u001b[92m19:39:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:39:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:39:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|██████████████████████████████████████--| 6987/7340 [252:58<12:46, 27.6 steps/min]\u001b[92m19:39:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:39:17,905 - agent.ComputerAgent - INFO - Computer: click({'x': 461, 'y': 321})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 461, 'y': 321})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9882ec8e-4618-4be3-802e-bb5c58c9fbbc/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:39:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|██████████████████████████████████████--| 6988/7340 [253:01<12:44, 27.6 steps/min]\u001b[92m19:39:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:39:20,770 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 430})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 430})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0180c5d2-a012-4261-b093-ed34f443f269/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|██████████████████████████████████████--| 6988/7340 [253:02<12:44, 27.6 steps/min]2025-08-11 19:39:21,798 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m19:39:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5e73167c-1836-4752-b7e8-57434e5d7875/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/68f513cf-ec21-4216-bab9-84c5bfcfce88/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/31367309-0055-409a-a992-edf729fb010c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/c915dbd9-32bc-40a7-9c07-d437c737419f/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|██████████████████████████████████████--| 6989/7340 [253:03<12:42, 27.6 steps/min]2025-08-11 19:39:22,498 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m19:39:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:39:23,147 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m19:39:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:39:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|██████████████████████████████████████--| 6989/7340 [253:04<12:42, 27.6 steps/min]2025-08-11 19:39:23,865 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 209, 'y': 146}, {'x': 281, 'y': 396}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 209, 'y': 146}, {'x': 281, 'y': 396}]})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0c64a3b4-e9b0-46c1-a580-cdcf62b74e44/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7f112db6-0b60-4e6c-86f5-0d87dc91f371/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:39:24,516 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m19:39:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1a178f89-87e5-46d9-a114-22d5fcc5c630/invoke \"HTTP/1.1 200 OK\"\n", + " 95%|██████████████████████████████████████--| 6989/7340 [253:06<12:42, 27.6 steps/min]2025-08-11 19:39:25,167 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:39:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:39:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:39:27,161 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'esc'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'esc'})\n", + "2025-08-11 19:39:27,846 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:39:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:39:28,870 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:39:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/1a178f89-87e5-46d9-a114-22d5fcc5c630/close \"HTTP/1.1 200 OK\"\n", + " 95%|██████████████████████████████████████--| 6990/7340 [253:10<12:40, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:39:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:39:30,222 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+a'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+a'})\n", + "2025-08-11 19:39:30,916 - agent.ComputerAgent - INFO - Computer: click({'x': 91, 'y': 314})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 91, 'y': 314})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:39:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/40c5f987-3d81-47fe-8798-4e45d9755f93/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|██████████████████████████████████████--| 6991/7340 [253:13<12:38, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/acf3037a-4b6c-4ea8-b81c-ffc2e76132e1/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:39:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:39:33,548 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m19:39:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:39:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 95%|██████████████████████████████████████--| 6992/7340 [253:15<12:36, 27.6 steps/min]2025-08-11 19:39:34,246 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m19:39:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:39:34,914 - agent.ComputerAgent - INFO - Computer: click({'x': 341, 'y': 305})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 341, 'y': 305})\n", + "2025-08-11 19:39:35,577 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m19:39:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 95%|██████████████████████████████████████--| 6992/7340 [253:17<12:36, 27.6 steps/min]2025-08-11 19:39:36,257 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:39:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v1/gyms/OSWorld-Ubuntu \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00/dev/null 2>&1; then\\n echo \\'User charles already exists\\'\\nelse\\n sudo useradd -M -s /usr/sbin/nologin -d / charles\\nfi\\n\\necho \\'charles:Ex@mpleP@55w0rd!\\' | sudo chpasswd\\n\\n# Give the user write access inside the jail via a subdirectory\\nsudo mkdir -p /home/test1/uploads\\nsudo chown charles:charles /home/test1/uploads\\n\\n# Configure OpenSSH to chroot the user and force SFTP only\\nsudo bash -lc \\'cat >>/etc/ssh/sshd_config <<\"EOF\"\\n\\nMatch User charles\\n ChrootDirectory /home/test1\\n ForceCommand internal-sftp\\n AllowTCPForwarding no\\n X11Forwarding no\\nEOF\\'\\n\\n# Restart SSH to apply changes\\nsudo systemctl restart ssh || sudo systemctl restart sshd\\n\\necho \\'Setup complete: user charles restricted to /home/test1 via SFTP.\\''})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'set -e\\nsudo mkdir -p /home/test1/uploads\\nsudo chown root:root /home/test1\\nsudo chmod 755 /home/test1\\n\\n# Create the user for SFTP-only access chrooted to /home/test1\\nif id -u charles >/dev/null 2>&1; then\\n echo \\'User charles already exists\\'\\nelse\\n sudo useradd -M -s /usr/sbin/nologin -d / charles\\nfi\\n\\necho \\'charles:Ex@mpleP@55w0rd!\\' | sudo chpasswd\\n\\n# Give the user write access inside the jail via a subdirectory\\nsudo mkdir -p /home/test1/uploads\\nsudo chown charles:charles /home/test1/uploads\\n\\n# Configure OpenSSH to chroot the user and force SFTP only\\nsudo bash -lc \\'cat >>/etc/ssh/sshd_config <<\"EOF\"\\n\\nMatch User charles\\n ChrootDirectory /home/test1\\n ForceCommand internal-sftp\\n AllowTCPForwarding no\\n X11Forwarding no\\nEOF\\'\\n\\n# Restart SSH to apply changes\\nsudo systemctl restart ssh || sudo systemctl restart sshd\\n\\necho \\'Setup complete: user charles restricted to /home/test1 via SFTP.\\''})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.34s/it]\n", + "\u001b[92m19:41:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 96%|██████████████████████████████████████--| 7029/7340 [255:01<11:17, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:41:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 96%|██████████████████████████████████████--| 7030/7340 [255:03<11:14, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:41:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:41:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:41:22,776 - agent.ComputerAgent - INFO - Computer: click({'x': 187, 'y': 52})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 187, 'y': 52})\n", + " 96%|██████████████████████████████████████--| 7030/7340 [255:04<11:14, 27.6 steps/min]\u001b[92m19:41:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:41:23,473 - agent.ComputerAgent - INFO - Computer: click({'x': 540, 'y': 471})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 540, 'y': 471})\n", + "\u001b[92m19:41:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:41:24,156 - agent.ComputerAgent - INFO - Computer: click({'x': 865, 'y': 201})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 865, 'y': 201})\n", + "\u001b[92m19:41:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:41:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 96%|██████████████████████████████████████--| 7031/7340 [255:06<11:12, 27.6 steps/min]\u001b[92m19:41:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:41:25,511 - agent.ComputerAgent - INFO - Computer: click({'x': 91, 'y': 314, 'button': 'left'})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 91, 'y': 314, 'button': 'left'})\n", + "2025-08-11 19:41:26,190 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 640, 'scroll_x': 0, 'x': 990, 'y': 709})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 640, 'scroll_x': 0, 'x': 990, 'y': 709})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/25f45afe-ee57-4629-9991-c515438accab/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:41:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:41:26,861 - agent.ComputerAgent - INFO - Computer: click({'x': 13, 'y': 673})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 13, 'y': 673})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:41:28,168 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'F11'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'F11'})\n", + " 96%|██████████████████████████████████████--| 7033/7340 [255:09<11:08, 27.6 steps/min]2025-08-11 19:41:28,840 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m19:41:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/68f513cf-ec21-4216-bab9-84c5bfcfce88/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:41:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:41:29,543 - agent.ComputerAgent - INFO - Computer: click({'x': 461, 'y': 101})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 461, 'y': 101})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 96%|██████████████████████████████████████--| 7037/7340 [255:11<10:59, 27.6 steps/min]\u001b[92m19:41:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:41:30,890 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:41:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:41:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:41:31,564 - agent.ComputerAgent - INFO - Computer: click({'x': 510, 'y': 283})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 510, 'y': 283})\n", + " 96%|██████████████████████████████████████--| 7039/7340 [255:16<10:54, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/31367309-0055-409a-a992-edf729fb010c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8e75deb1-3c97-408b-8c7d-f4681b322141/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ba3f752c-7268-49e8-819c-5b471e52bd54/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/51c56274-d8ae-4edf-8ff1-b637cd2fff66/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:41:35,309 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:41:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0c64a3b4-e9b0-46c1-a580-cdcf62b74e44/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:41:36,009 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:41:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5e73167c-1836-4752-b7e8-57434e5d7875/invoke \"HTTP/1.1 200 OK\"\n", + " 96%|██████████████████████████████████████--| 7039/7340 [255:17<10:55, 27.6 steps/min]2025-08-11 19:41:36,709 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m19:41:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:41:37,392 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m19:41:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:41:38,081 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:41:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:41:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:41:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7f112db6-0b60-4e6c-86f5-0d87dc91f371/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/40c5f987-3d81-47fe-8798-4e45d9755f93/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 96%|██████████████████████████████████████--| 7039/7340 [255:21<10:55, 27.6 steps/min]\u001b[92m19:41:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:41:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:41:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:41:41,479 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m19:41:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:41:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:41:42,130 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:41:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:41:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 96%|██████████████████████████████████████--| 7039/7340 [255:23<10:55, 27.6 steps/min]\u001b[92m19:41:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:41:42,794 - agent.ComputerAgent - INFO - Computer: click({'x': 996, 'y': 732})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 996, 'y': 732})\n", + "2025-08-11 19:41:43,448 - agent.ComputerAgent - INFO - Computer: click({'x': 223, 'y': 35})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 223, 'y': 35})\n", + "\u001b[92m19:41:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:41:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 96%|██████████████████████████████████████--| 7039/7340 [255:25<10:55, 27.6 steps/min]2025-08-11 19:41:44,120 - agent.ComputerAgent - INFO - Computer: click({'x': 343, 'y': 195})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 343, 'y': 195})\n", + "2025-08-11 19:41:44,810 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 209, 'y': 146}, {'x': 407, 'y': 399}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 209, 'y': 146}, {'x': 407, 'y': 399}]})\n", + " 96%|██████████████████████████████████████--| 7041/7340 [255:26<10:50, 27.6 steps/min]2025-08-11 19:41:45,450 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m19:41:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:41:46,139 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m19:41:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 96%|██████████████████████████████████████--| 7043/7340 [255:30<10:46, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:41:50,003 - agent.ComputerAgent - INFO - Computer: type({'text': 'chrome refresh 2023'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'chrome refresh 2023'})\n", + " 96%|██████████████████████████████████████--| 7043/7340 [255:31<10:46, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:41:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:41:51,944 - agent.ComputerAgent - INFO - Computer: type({'text': 'Yann LeCun Google Scholar'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'Yann LeCun Google Scholar'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/25f45afe-ee57-4629-9991-c515438accab/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0180c5d2-a012-4261-b093-ed34f443f269/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/acf3037a-4b6c-4ea8-b81c-ffc2e76132e1/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f3b006b-141d-439d-b6cb-eed7bd6483c3/invoke \"HTTP/1.1 200 OK\"\n", + " 96%|██████████████████████████████████████--| 7044/7340 [255:33<10:44, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:41:52,582 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:41:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:41:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:41:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 96%|██████████████████████████████████████--| 7045/7340 [255:35<10:42, 27.6 steps/min]\u001b[92m19:41:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:41:54,575 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:41:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:41:55,282 - agent.ComputerAgent - INFO - Computer: click({'x': 90, 'y': 314, 'button': 'left'})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 90, 'y': 314, 'button': 'left'})\n", + "\u001b[92m19:41:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/reset \"HTTP/1.1 200 OK\"\n", + " 96%|██████████████████████████████████████--| 7045/7340 [255:37<10:42, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:41:55,970 - agent.ComputerAgent - INFO - Computer: double_click({'x': 12, 'y': 524})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 12, 'y': 524})\n", + "\u001b[92m19:41:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:41:56,628 - agent.ComputerAgent - INFO - Computer: click({'x': 164, 'y': 427})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 164, 'y': 427})\n", + " 96%|██████████████████████████████████████--| 7046/7340 [255:38<10:40, 27.6 steps/min]2025-08-11 19:41:57,310 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:41:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:41:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 96%|██████████████████████████████████████--| 7048/7340 [255:40<10:35, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:41:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:41:59,486 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 590, 'scroll_x': 0, 'x': 991, 'y': 420})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 590, 'scroll_x': 0, 'x': 991, 'y': 420})\n", + " 96%|██████████████████████████████████████--| 7048/7340 [255:41<10:35, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0180c5d2-a012-4261-b093-ed34f443f269/invoke \"HTTP/1.1 200 OK\"\n", + " 96%|██████████████████████████████████████--| 7049/7340 [255:42<10:33, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:42:02,043 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m19:42:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ba3f752c-7268-49e8-819c-5b471e52bd54/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/40c5f987-3d81-47fe-8798-4e45d9755f93/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0180c5d2-a012-4261-b093-ed34f443f269/close \"HTTP/1.1 200 OK\"\n", + " 96%|██████████████████████████████████████--| 7049/7340 [255:43<10:33, 27.6 steps/min]2025-08-11 19:42:03,400 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m19:42:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:42:04,040 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m19:42:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:42:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:42:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/68f513cf-ec21-4216-bab9-84c5bfcfce88/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/51c56274-d8ae-4edf-8ff1-b637cd2fff66/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:42:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 96%|██████████████████████████████████████--| 7049/7340 [255:47<10:33, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5e73167c-1836-4752-b7e8-57434e5d7875/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:42:06,703 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:42:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00Background Cover'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '>Background Cover'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/7f112db6-0b60-4e6c-86f5-0d87dc91f371/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/create_environment \"HTTP/1.1 200 OK\"\n", + " 96%|██████████████████████████████████████--| 7072/7340 [256:50<9:43, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9882ec8e-4618-4be3-802e-bb5c58c9fbbc/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:43:08,956 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m19:43:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:43:10,282 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ENTER'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ENTER'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:43:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:43:12,251 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ENTER'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ENTER'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:43:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ba3f752c-7268-49e8-819c-5b471e52bd54/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 96%|██████████████████████████████████████--| 7074/7340 [256:54<9:39, 27.5 steps/min]2025-08-11 19:43:13,540 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 1 messages\n", + "\u001b[92m19:43:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:43:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:43:14,917 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "2025-08-11 19:43:15,565 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 19:43:15,566 - agent.ComputerAgent - INFO - Computer: click({'x': 75, 'y': 166})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 75, 'y': 166})\n", + "\u001b[92m19:43:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 96%|██████████████████████████████████████--| 7076/7340 [256:57<9:35, 27.5 steps/min]2025-08-11 19:43:16,239 - agent.ComputerAgent - INFO - Computer: click({'x': 985, 'y': 759})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 985, 'y': 759})\n", + "2025-08-11 19:43:16,881 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:43:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 96%|██████████████████████████████████████--| 7078/7340 [256:58<9:30, 27.5 steps/min]2025-08-11 19:43:17,557 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:43:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:43:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/765aa707-cf44-4dd0-8933-2c2b94870afd/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:43:18,888 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m19:43:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 96%|██████████████████████████████████████--| 7079/7340 [257:00<9:28, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:43:20,265 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl++'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl++'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:43:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 96%|██████████████████████████████████████--| 7079/7340 [257:02<9:28, 27.5 steps/min]\u001b[92m19:43:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:43:21,621 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_y': 588, 'scroll_x': 0, 'x': 991, 'y': 433})\n", + "INFO:agent.ComputerAgent:Computer: scroll({'scroll_y': 588, 'scroll_x': 0, 'x': 991, 'y': 433})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:43:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/51c56274-d8ae-4edf-8ff1-b637cd2fff66/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:43:23,319 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:43:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/25f45afe-ee57-4629-9991-c515438accab/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:43:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/68f513cf-ec21-4216-bab9-84c5bfcfce88/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0c64a3b4-e9b0-46c1-a580-cdcf62b74e44/invoke \"HTTP/1.1 200 OK\"\n", + " 96%|██████████████████████████████████████--| 7079/7340 [257:05<9:28, 27.5 steps/min]\u001b[92m19:43:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:43:24,668 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:43:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:43:25,366 - agent.ComputerAgent - INFO - Computer: click({'x': 341, 'y': 75})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 341, 'y': 75})\n", + "\u001b[92m19:43:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:43:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:43:26,376 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 19:43:26,377 - agent.ComputerAgent - INFO - Computer: click({'x': 48, 'y': 52})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 48, 'y': 52})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:43:27,706 - agent.ComputerAgent - INFO - Computer: click({'x': 213, 'y': 183})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 213, 'y': 183})\n", + " 96%|██████████████████████████████████████--| 7080/7340 [257:09<9:26, 27.5 steps/min]2025-08-11 19:43:28,338 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:43:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:43:29,016 - agent.ComputerAgent - INFO - Computer: click({'x': 666, 'y': 279})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 666, 'y': 279})\n", + " 96%|██████████████████████████████████████--| 7083/7340 [257:10<9:19, 27.5 steps/min]2025-08-11 19:43:29,687 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m19:43:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:43:30,358 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m19:43:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 97%|██████████████████████████████████████--| 7084/7340 [257:12<9:17, 27.5 steps/min]2025-08-11 19:43:31,038 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 8 messages\n", + "\u001b[92m19:43:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:43:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v1/environments/765aa707-cf44-4dd0-8933-2c2b94870afd/reset \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 97%|██████████████████████████████████████--| 7084/7340 [257:13<9:17, 27.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:43:32,348 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:43:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:43:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:43:33,008 - agent.ComputerAgent - INFO - Agent: Taking a screenshot to see the current computer screen.\n", + "INFO:agent.ComputerAgent:Agent: Taking a screenshot to see the current computer screen.\n", + "2025-08-11 19:43:33,009 - agent.ComputerAgent - INFO - Computer: click({'x': 79, 'y': 157})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 79, 'y': 157})\n", + " 97%|██████████████████████████████████████--| 7084/7340 [257:14<9:17, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:43:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0c64a3b4-e9b0-46c1-a580-cdcf62b74e44/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 97%|██████████████████████████████████████--| 7085/7340 [257:15<9:15, 27.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:43:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:43:35,405 - agent.ComputerAgent - INFO - Computer: click({'x': 679, 'y': 563})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 679, 'y': 563})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:43:36,736 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl++'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl++'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f3b006b-141d-439d-b6cb-eed7bd6483c3/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:43:37 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9882ec8e-4618-4be3-802e-bb5c58c9fbbc/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/0c64a3b4-e9b0-46c1-a580-cdcf62b74e44/close \"HTTP/1.1 200 OK\"\n", + " 97%|██████████████████████████████████████--| 7086/7340 [257:19<9:13, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/765aa707-cf44-4dd0-8933-2c2b94870afd/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5e73167c-1836-4752-b7e8-57434e5d7875/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:43:38,000 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:43:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/31367309-0055-409a-a992-edf729fb010c/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:43:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + " 97%|██████████████████████████████████████--| 7087/7340 [257:20<9:11, 27.5 steps/min]2025-08-11 19:43:39,348 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 308, 'y': 153})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'left', 'x': 308, 'y': 153})\n", + "2025-08-11 19:43:40,018 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:43:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:43:40,687 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 6 messages\n", + "\u001b[92m19:43:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:43:41,369 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m19:43:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 97%|██████████████████████████████████████--| 7087/7340 [257:23<9:11, 27.5 steps/min]\u001b[92m19:43:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:43:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:43:43,783 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:43:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00>/etc/ssh/sshd_config <<\"EOF\"\\n\\nMatch User charles\\n ChrootDirectory /home/test1\\n ForceCommand internal-sftp\\n AllowTCPForwarding no\\n X11Forwarding no\\nEOF\\'\\n\\n# Apply changes\\nsudo systemctl restart ssh || sudo systemctl restart sshd\\n\\n# Show resulting setup\\nid charles\\nls -ld /home/test1 /home/test1/uploads'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'set -e\\nsudo mkdir -p /home/test1\\nsudo chown root:root /home/test1\\nsudo chmod 755 /home/test1\\n\\n# Create user with no shell access\\nsudo useradd -M -s /usr/sbin/nologin -d / charles || true\\n\\necho \\'charles:Ex@mpleP@55w0rd!\\' | sudo chpasswd\\n\\n# Writable directory inside the jail\\nsudo mkdir -p /home/test1/uploads\\nsudo chown charles:charles /home/test1/uploads\\n\\n# Configure SSH to restrict user to SFTP chroot at /home/test1\\nsudo bash -lc \\'cat >>/etc/ssh/sshd_config <<\"EOF\"\\n\\nMatch User charles\\n ChrootDirectory /home/test1\\n ForceCommand internal-sftp\\n AllowTCPForwarding no\\n X11Forwarding no\\nEOF\\'\\n\\n# Apply changes\\nsudo systemctl restart ssh || sudo systemctl restart sshd\\n\\n# Show resulting setup\\nid charles\\nls -ld /home/test1 /home/test1/uploads'})\n", + " 97%|██████████████████████████████████████--| 7110/7340 [257:49<8:20, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:44:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9882ec8e-4618-4be3-802e-bb5c58c9fbbc/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/40c5f987-3d81-47fe-8798-4e45d9755f93/invoke \"HTTP/1.1 200 OK\"\n", + " 97%|██████████████████████████████████████--| 7112/7340 [257:50<8:15, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/acf3037a-4b6c-4ea8-b81c-ffc2e76132e1/invoke \"HTTP/1.1 200 OK\"\n", + "Loading checkpoint shards: 0%| | 0/4 [00:00 Settings > General > scroll to the bottom > Config Editor…\n", + "- In the search box, type: applyIncomingFilters\n", + "- If mail.server.default.applyIncomingFilters is false or missing, set it to true.\n", + "- If you prefer per-account control, also set mail.server.serverN.applyIncomingFilters = true for the specific account (N is the server number for that account; you can set both default and per-account if you like).\n", + "\n", + "2) Ensure Thunderbird actually checks those subfolders for new mail\n", + "- For each subfolder you want filters to run on: right‑click the folder > Properties > check “When getting new messages, always check this folder” > OK.\n", + "- Alternatively (account-wide): Account Settings > Server Settings (for the IMAP account) > enable “Check for new messages at startup” and “Check for new messages every X minutes.” In Advanced IMAP settings (or via Config Editor), ensure mail.server.serverN.check_all_folders_for_new = true to have Thunderbird poll all folders.\n", + "\n", + "3) Make sure your filters are set to run on incoming mail\n", + "- Tools (≡ menu) > Message Filters.\n", + "- Choose the account, edit each filter you want, and under “Apply filter when,” tick the “Getting New Mail” options (before/after junk classification as appropriate). Save.\n", + "\n", + "4) Restart Thunderbird\n", + "- Close and reopen Thunderbird so the new prefs apply cleanly.\n", + "\n", + "5) Test\n", + "- Deliver a message directly to a subfolder (e.g., by a server-side rule or by sending to an address that your server routes into that folder). When Thunderbird detects new mail in that subfolder, your filters should run automatically.\n", + "\n", + "Notes and limitations\n", + "- IMAP required: This works when new mail arrives directly into that subfolder on the server (IMAP). For POP accounts and Local Folders, Thunderbird only “receives” mail into Inbox; filters won’t auto-trigger simply because a message...\n", + "INFO:agent.ComputerAgent:Agent: Yes—you can make Thunderbird run filters automatically on subfolders (not just Inbox). Do the following once per account you want this behavior for.\n", + "\n", + "1) Turn on the hidden preference to run filters on non-Inbox folders\n", + "- Thunderbird > Settings > General > scroll to the bottom > Config Editor…\n", + "- In the search box, type: applyIncomingFilters\n", + "- If mail.server.default.applyIncomingFilters is false or missing, set it to true.\n", + "- If you prefer per-account control, also set mail.server.serverN.applyIncomingFilters = true for the specific account (N is the server number for that account; you can set both default and per-account if you like).\n", + "\n", + "2) Ensure Thunderbird actually checks those subfolders for new mail\n", + "- For each subfolder you want filters to run on: right‑click the folder > Properties > check “When getting new messages, always check this folder” > OK.\n", + "- Alternatively (account-wide): Account Settings > Server Settings (for the IMAP account) > enable “Check for new messages at startup” and “Check for new messages every X minutes.” In Advanced IMAP settings (or via Config Editor), ensure mail.server.serverN.check_all_folders_for_new = true to have Thunderbird poll all folders.\n", + "\n", + "3) Make sure your filters are set to run on incoming mail\n", + "- Tools (≡ menu) > Message Filters.\n", + "- Choose the account, edit each filter you want, and under “Apply filter when,” tick the “Getting New Mail” options (before/after junk classification as appropriate). Save.\n", + "\n", + "4) Restart Thunderbird\n", + "- Close and reopen Thunderbird so the new prefs apply cleanly.\n", + "\n", + "5) Test\n", + "- Deliver a message directly to a subfolder (e.g., by a server-side rule or by sending to an address that your server routes into that folder). When Thunderbird detects new mail in that subfolder, your filters should run automatically.\n", + "\n", + "Notes and limitations\n", + "- IMAP required: This works when new mail arrives directly into that subfolder on the server (IMAP). For POP accounts and Local Folders, Thunderbird only “receives” mail into Inbox; filters won’t auto-trigger simply because a message...\n", + "2025-08-11 19:47:20,255 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 3021\n", + " - prompt_tokens: 3980\n", + " - total_tokens: 7001\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 2496\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 1920\n", + " - response_cost: $0.0330\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 3021\n", + " - prompt_tokens: 3980\n", + " - total_tokens: 7001\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 2496\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 1920\n", + " - response_cost: $0.0330\n", + " 98%|███████████████████████████████████████-| 7212/7340 [261:01<4:37, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:47:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:47:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:47:21,621 - agent.ComputerAgent - INFO - Computer: type({'text': ' chrome refresh 2023'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': ' chrome refresh 2023'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5e73167c-1836-4752-b7e8-57434e5d7875/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:47:22,286 - agent.ComputerAgent - INFO - Computer: click({'x': 239, 'y': 178})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 239, 'y': 178})\n", + "2025-08-11 19:47:22,928 - agent.ComputerAgent - INFO - Computer: click({'x': 980, 'y': 60})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 980, 'y': 60})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/40c5f987-3d81-47fe-8798-4e45d9755f93/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/25f45afe-ee57-4629-9991-c515438accab/invoke \"HTTP/1.1 200 OK\"\n", + " 98%|███████████████████████████████████████-| 7213/7340 [261:04<4:35, 27.6 steps/min]2025-08-11 19:47:23,594 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:47:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:47:24,237 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m19:47:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 98%|███████████████████████████████████████-| 7216/7340 [261:06<4:29, 27.6 steps/min]2025-08-11 19:47:24,914 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m19:47:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:47:26,264 - agent.ComputerAgent - INFO - Agent: The Impress slide was exported to PNG using the default settings and saved as “res.png” on the Desktop. Task completed.\n", + "INFO:agent.ComputerAgent:Agent: The Impress slide was exported to PNG using the default settings and saved as “res.png” on the Desktop. Task completed.\n", + "2025-08-11 19:47:26,906 - agent.ComputerAgent - INFO - Total usage:\n", + " - completion_tokens: 162\n", + " - prompt_tokens: 8778\n", + " - total_tokens: 8940\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 128\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 6656\n", + " - response_cost: $0.0051\n", + "INFO:agent.ComputerAgent:Total usage:\n", + " - completion_tokens: 162\n", + " - prompt_tokens: 8778\n", + " - total_tokens: 8940\n", + " - completion_tokens_details:\n", + " - accepted_prediction_tokens: 0\n", + " - audio_tokens: 0\n", + " - reasoning_tokens: 128\n", + " - rejected_prediction_tokens: 0\n", + " - prompt_tokens_details:\n", + " - audio_tokens: 0\n", + " - cached_tokens: 6656\n", + " - response_cost: $0.0051\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cd53d966-2507-485c-bbe9-ee55dbbeddd0/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/5e73167c-1836-4752-b7e8-57434e5d7875/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/25f45afe-ee57-4629-9991-c515438accab/invoke \"HTTP/1.1 200 OK\"\n", + " 98%|███████████████████████████████████████-| 7217/7340 [261:09<4:27, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/25f45afe-ee57-4629-9991-c515438accab/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9882ec8e-4618-4be3-802e-bb5c58c9fbbc/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f3b006b-141d-439d-b6cb-eed7bd6483c3/invoke \"HTTP/1.1 200 OK\"\n", + " 98%|███████████████████████████████████████-| 7217/7340 [261:10<4:27, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cd53d966-2507-485c-bbe9-ee55dbbeddd0/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:47:30,124 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m19:47:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:47:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/31367309-0055-409a-a992-edf729fb010c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7233/7340 [261:12<3:51, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cd53d966-2507-485c-bbe9-ee55dbbeddd0/close \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:47:31,452 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 10 messages\n", + "\u001b[92m19:47:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:47:32,084 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:47:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:47:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7233/7340 [261:13<3:51, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:47:32,764 - agent.ComputerAgent - INFO - Computer: click({'x': 76, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 76, 'y': 53})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f3b006b-141d-439d-b6cb-eed7bd6483c3/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:47:33,390 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m19:47:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9882ec8e-4618-4be3-802e-bb5c58c9fbbc/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7235/7340 [261:15<3:47, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/6f3b006b-141d-439d-b6cb-eed7bd6483c3/close \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7245/7340 [261:16<3:25, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9882ec8e-4618-4be3-802e-bb5c58c9fbbc/close \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7245/7340 [261:17<3:25, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:47:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:47:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7245/7340 [261:18<3:25, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:47:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:47:37,446 - agent.ComputerAgent - INFO - Computer: click({'x': 901, 'y': 579})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 901, 'y': 579})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:47:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:47:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7245/7340 [261:20<3:25, 27.7 steps/min]\u001b[92m19:47:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:47:39,450 - agent.ComputerAgent - INFO - Computer: click({'x': 1011, 'y': 194})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1011, 'y': 194})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:47:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7246/7340 [261:21<3:23, 27.7 steps/min]\u001b[92m19:47:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:47:40,815 - agent.ComputerAgent - INFO - Computer: click({'x': 243, 'y': 52})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 243, 'y': 52})\n", + "\u001b[92m19:47:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:47:41,487 - agent.ComputerAgent - INFO - Computer: click({'x': 259, 'y': 178})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 259, 'y': 178})\n", + "\u001b[92m19:47:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7247/7340 [261:23<3:21, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:47:42,158 - agent.ComputerAgent - INFO - Computer: click({'x': 893, 'y': 296})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 893, 'y': 296})\n", + "2025-08-11 19:47:42,787 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:47:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7250/7340 [261:28<3:14, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:47:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8e75deb1-3c97-408b-8c7d-f4681b322141/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:47:48,590 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 12 messages\n", + "\u001b[92m19:47:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:47:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ba3f752c-7268-49e8-819c-5b471e52bd54/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7250/7340 [261:30<3:14, 27.7 steps/min]\u001b[92m19:47:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:47:49,265 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m19:47:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:47:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7250/7340 [261:31<3:14, 27.7 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:47:50,594 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m19:47:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:47:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:47:51,304 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m19:47:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:47:51,990 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 275, 'y': 181}, {'x': 79, 'y': 182}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 275, 'y': 181}, {'x': 79, 'y': 182}]})\n", + " 99%|███████████████████████████████████████-| 7250/7340 [261:33<3:14, 27.7 steps/min]\u001b[92m19:47:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:47:52,662 - agent.ComputerAgent - INFO - Computer: click({'x': 252, 'y': 230})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 252, 'y': 230})\n", + " 99%|███████████████████████████████████████-| 7252/7340 [261:35<3:10, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:47:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:47:56,131 - agent.ComputerAgent - INFO - Computer: type({'text': '=A2+B2'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': '=A2+B2'})\n", + " 99%|███████████████████████████████████████-| 7252/7340 [261:37<3:10, 27.7 steps/min]\u001b[92m19:47:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:47:56,793 - agent.ComputerAgent - INFO - Computer: click({'x': 563, 'y': 101})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 563, 'y': 101})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:47:58,146 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'alt+left'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'alt+left'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:47:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7253/7340 [261:40<3:08, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ba3f752c-7268-49e8-819c-5b471e52bd54/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:47:59,467 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m19:47:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:48:00,135 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m19:48:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:48:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7254/7340 [261:41<3:06, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:48:00,837 - agent.ComputerAgent - INFO - Computer: click({'x': 111, 'y': 52})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 111, 'y': 52})\n", + " 99%|███████████████████████████████████████-| 7254/7340 [261:42<3:06, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/ba3f752c-7268-49e8-819c-5b471e52bd54/close \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7255/7340 [261:44<3:04, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:48:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7255/7340 [261:45<3:04, 27.7 steps/min]\u001b[92m19:48:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:48:04,749 - agent.ComputerAgent - INFO - Computer: click({'x': 19, 'y': 45})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 19, 'y': 45})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:48:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7255/7340 [261:47<3:04, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:48:06,105 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:48:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:48:06,746 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 14 messages\n", + "\u001b[92m19:48:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7256/7340 [261:48<3:01, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:48:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:48:07,447 - agent.ComputerAgent - INFO - Computer: click({'x': 273, 'y': 90})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 273, 'y': 90})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7256/7340 [261:49<3:01, 27.7 steps/min]2025-08-11 19:48:08,626 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:48:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7257/7340 [261:51<2:59, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:48:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7257/7340 [261:52<2:59, 27.7 steps/min]2025-08-11 19:48:11,495 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m19:48:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:48:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:48:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7257/7340 [261:53<2:59, 27.7 steps/min]\u001b[92m19:48:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:48:12,687 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 211, 'y': 178}, {'x': 211, 'y': 473}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 211, 'y': 178}, {'x': 211, 'y': 473}]})\n", + " 99%|███████████████████████████████████████-| 7257/7340 [261:54<2:59, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:48:14,357 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m19:48:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7258/7340 [261:56<2:57, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/40c5f987-3d81-47fe-8798-4e45d9755f93/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7263/7340 [261:57<2:46, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:48:17,188 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:48:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/40c5f987-3d81-47fe-8798-4e45d9755f93/close \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7263/7340 [261:59<2:46, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/31367309-0055-409a-a992-edf729fb010c/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:48:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:48:19,057 - agent.ComputerAgent - INFO - Computer: click({'x': 257, 'y': 152})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 257, 'y': 152})\n", + " 99%|███████████████████████████████████████-| 7265/7340 [262:03<2:42, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/31367309-0055-409a-a992-edf729fb010c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:48:23,287 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 16 messages\n", + "\u001b[92m19:48:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:48:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/31367309-0055-409a-a992-edf729fb010c/close \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7265/7340 [262:05<2:42, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:48:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:48:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/68f513cf-ec21-4216-bab9-84c5bfcfce88/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:48:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7265/7340 [262:07<2:42, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:48:25,935 - agent.ComputerAgent - INFO - Computer: click({'x': 95, 'y': 74})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 95, 'y': 74})\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:48:26,606 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m19:48:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7266/7340 [262:08<2:40, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:48:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:48:27,282 - agent.ComputerAgent - INFO - Computer: click({'x': 986, 'y': 133})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 986, 'y': 133})\n", + "\u001b[92m19:48:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:48:27,901 - agent.ComputerAgent - INFO - Computer: click({'x': 528, 'y': 50})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 528, 'y': 50})\n", + " 99%|███████████████████████████████████████-| 7267/7340 [262:09<2:38, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:48:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/68f513cf-ec21-4216-bab9-84c5bfcfce88/close \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7269/7340 [262:10<2:33, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:48:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:48:30,287 - agent.ComputerAgent - INFO - Computer: click({'x': 105, 'y': 230})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 105, 'y': 230})\n", + " 99%|███████████████████████████████████████-| 7270/7340 [262:14<2:31, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8e75deb1-3c97-408b-8c7d-f4681b322141/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:48:33,466 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m19:48:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7270/7340 [262:15<2:31, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:48:34,163 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:48:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:48:34,825 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m19:48:34 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7270/7340 [262:16<2:31, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:48:36,005 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:48:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7270/7340 [262:17<2:31, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7270/7340 [262:21<2:31, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:48:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7270/7340 [262:22<2:31, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:48:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:48:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:48:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:48:43,277 - agent.ComputerAgent - INFO - Computer: click({'x': 66, 'y': 164})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 66, 'y': 164})\n", + " 99%|███████████████████████████████████████-| 7270/7340 [262:25<2:31, 27.7 steps/min]\u001b[92m19:48:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:48:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:48:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:48:44,471 - agent.ComputerAgent - INFO - Computer: click({'x': 248, 'y': 173})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 248, 'y': 173})\n", + " 99%|███████████████████████████████████████-| 7271/7340 [262:26<2:29, 27.7 steps/min]\u001b[92m19:48:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:48:45,143 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 288, 'y': 153}, {'x': 81, 'y': 155}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 288, 'y': 153}, {'x': 81, 'y': 155}]})\n", + " 99%|███████████████████████████████████████-| 7273/7340 [262:29<2:25, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7273/7340 [262:30<2:25, 27.7 steps/min]2025-08-11 19:48:49,336 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m19:48:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7273/7340 [262:31<2:25, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:48:50,547 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 18 messages\n", + "\u001b[92m19:48:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:48:51,906 - agent.ComputerAgent - INFO - Computer: type({'text': 'webui refresh 2023'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'webui refresh 2023'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7273/7340 [262:33<2:25, 27.7 steps/min]2025-08-11 19:48:52,576 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m19:48:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:48:53,923 - agent.ComputerAgent - INFO - Computer: type({'text': 'Thunderbird'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'Thunderbird'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7274/7340 [262:36<2:22, 27.7 steps/min]\u001b[92m19:48:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:48:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:48:56,172 - agent.ComputerAgent - INFO - Computer: double_click({'x': 144, 'y': 167})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 144, 'y': 167})\n", + " 99%|███████████████████████████████████████-| 7276/7340 [262:39<2:18, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:48:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7276/7340 [262:40<2:18, 27.7 steps/min]\u001b[92m19:48:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:49:00,051 - agent.ComputerAgent - INFO - Computer: click({'x': 761, 'y': 229})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 761, 'y': 229})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:49:00,717 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m19:49:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7276/7340 [262:42<2:18, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:49:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7277/7340 [262:43<2:16, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:49:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:49:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7277/7340 [262:44<2:16, 27.7 steps/min]2025-08-11 19:49:03,565 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:49:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:49:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:49:04,222 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 237, 'y': 178}, {'x': 258, 'y': 280}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 237, 'y': 178}, {'x': 258, 'y': 280}]})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7277/7340 [262:45<2:16, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/fa081188-4859-4858-9d33-0f9675111182/close \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7278/7340 [262:46<2:14, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:49:06,401 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:49:06 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7278/7340 [262:48<2:14, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7278/7340 [262:51<2:14, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:49:10,616 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 20 messages\n", + "\u001b[92m19:49:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7278/7340 [262:52<2:14, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7278/7340 [262:53<2:14, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:49:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:49:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7278/7340 [262:55<2:14, 27.7 steps/min]\u001b[92m19:49:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:49:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:49:14,305 - agent.ComputerAgent - INFO - Computer: click({'x': 1008, 'y': 223})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1008, 'y': 223})\n", + "\u001b[92m19:49:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:49:14,966 - agent.ComputerAgent - INFO - Computer: click({'x': 75, 'y': 135})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 75, 'y': 135})\n", + " 99%|███████████████████████████████████████-| 7278/7340 [262:56<2:14, 27.7 steps/min]\u001b[92m19:49:15 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:49:15,652 - agent.ComputerAgent - INFO - Computer: double_click({'x': 540, 'y': 128})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 540, 'y': 128})\n", + " 99%|███████████████████████████████████████-| 7281/7340 [262:58<2:07, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:49:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7281/7340 [262:59<2:07, 27.7 steps/min]\u001b[92m19:49:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:49:18,506 - agent.ComputerAgent - INFO - Computer: click({'x': 229, 'y': 157})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 229, 'y': 157})\n", + " 99%|███████████████████████████████████████-| 7281/7340 [263:00<2:07, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8e75deb1-3c97-408b-8c7d-f4681b322141/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:49:20,217 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:49:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7282/7340 [263:02<2:05, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:49:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7282/7340 [263:03<2:05, 27.7 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:49:23,091 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:49:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:49:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7282/7340 [263:04<2:05, 27.7 steps/min]2025-08-11 19:49:23,787 - agent.ComputerAgent - INFO - Computer: click({'x': 237, 'y': 178})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 237, 'y': 178})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:49:24,438 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m19:49:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:49:25,775 - agent.ComputerAgent - INFO - Computer: type({'text': \"=VLOOKUP(C2,'Retail Price'.$A$2:$B$200,2,0)*E2*(1-F2)\"})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': \"=VLOOKUP(C2,'Retail Price'.$A$2:$B$200,2,0)*E2*(1-F2)\"})\n", + " 99%|███████████████████████████████████████-| 7282/7340 [263:07<2:05, 27.7 steps/min]2025-08-11 19:49:26,452 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m19:49:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7284/7340 [263:12<2:01, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:49:31,690 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 22 messages\n", + "\u001b[92m19:49:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:49:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7284/7340 [263:14<2:01, 27.7 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:49:33,032 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:49:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:49:33 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:49:33,738 - agent.ComputerAgent - INFO - Computer: click({'x': 1004, 'y': 60})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 1004, 'y': 60})\n", + " 99%|███████████████████████████████████████-| 7285/7340 [263:16<1:59, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:49:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7285/7340 [263:17<1:59, 27.7 steps/min]\u001b[92m19:49:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:49:36,569 - agent.ComputerAgent - INFO - Computer: click({'x': 980, 'y': 60})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 980, 'y': 60})\n", + " 99%|███████████████████████████████████████-| 7286/7340 [263:20<1:57, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7286/7340 [263:21<1:57, 27.7 steps/min]2025-08-11 19:49:40,273 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:49:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7286/7340 [263:22<1:57, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:49:42,639 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'CTRL+H'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'CTRL+H'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7286/7340 [263:24<1:57, 27.7 steps/min]2025-08-11 19:49:43,312 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m19:49:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7286/7340 [263:25<1:57, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:49:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:49:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7286/7340 [263:26<1:57, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:49:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:49:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7286/7340 [263:27<1:57, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:49:47,535 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "\u001b[92m19:49:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8e75deb1-3c97-408b-8c7d-f4681b322141/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:49:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:49:48,871 - agent.ComputerAgent - INFO - Computer: screenshot({})\n", + "INFO:agent.ComputerAgent:Computer: screenshot({})\n", + " 99%|███████████████████████████████████████-| 7286/7340 [263:30<1:57, 27.6 steps/min]2025-08-11 19:49:49,547 - agent.ComputerAgent - INFO - Computer: click({'x': 259, 'y': 180})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 259, 'y': 180})\n", + "2025-08-11 19:49:50,246 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 55, 'y': 164}, {'x': 55, 'y': 600}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 55, 'y': 164}, {'x': 55, 'y': 600}]})\n", + " 99%|███████████████████████████████████████-| 7291/7340 [263:33<1:46, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/8e75deb1-3c97-408b-8c7d-f4681b322141/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7291/7340 [263:34<1:46, 27.7 steps/min]\u001b[92m19:49:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No screenshot found, taking screenshot\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-11 19:49:53,552 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:49:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:49:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:49:54,210 - agent.ComputerAgent - INFO - Computer: click({'x': 867, 'y': 296})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 867, 'y': 296})\n", + " 99%|███████████████████████████████████████-| 7291/7340 [263:35<1:46, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:49:55,903 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 24 messages\n", + "\u001b[92m19:49:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7292/7340 [263:37<1:44, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:49:56,562 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:49:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:49:57,261 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:49:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7292/7340 [263:39<1:44, 27.7 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7292/7340 [263:41<1:44, 27.7 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7292/7340 [263:42<1:44, 27.7 steps/min]2025-08-11 19:50:00,993 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:50:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7292/7340 [263:49<1:44, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:50:08,913 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ctrl+c'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ctrl+c'})\n", + " 99%|███████████████████████████████████████-| 7292/7340 [263:50<1:44, 27.6 steps/min]2025-08-11 19:50:10,074 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 26 messages\n", + "\u001b[92m19:50:10 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7292/7340 [263:51<1:44, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7292/7340 [263:52<1:44, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:50:12 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7292/7340 [263:54<1:44, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:50:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7292/7340 [263:55<1:44, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:50:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:50:14,024 - agent.ComputerAgent - INFO - Computer: click({'x': 871, 'y': 135})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 871, 'y': 135})\n", + "\u001b[92m19:50:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:50:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7292/7340 [263:56<1:44, 27.6 steps/min]\u001b[92m19:50:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:50:15,204 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 275, 'y': 152}, {'x': 79, 'y': 154}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 275, 'y': 152}, {'x': 79, 'y': 154}]})\n", + " 99%|███████████████████████████████████████-| 7293/7340 [263:57<1:42, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:50:17,610 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ALT+TAB'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ALT+TAB'})\n", + " 99%|███████████████████████████████████████-| 7294/7340 [263:59<1:39, 27.6 steps/min]2025-08-11 19:50:18,781 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m19:50:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7294/7340 [264:00<1:39, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7294/7340 [264:01<1:39, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:50:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:50:21,132 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:50:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7294/7340 [264:02<1:39, 27.6 steps/min]2025-08-11 19:50:22,169 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:50:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7294/7340 [264:03<1:39, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:50:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:50:22,830 - agent.ComputerAgent - INFO - Computer: click({'x': 120, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 120, 'y': 53})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:50:24,191 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 257, 'y': 153}, {'x': 259, 'y': 281}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 257, 'y': 153}, {'x': 259, 'y': 281}]})\n", + " 99%|███████████████████████████████████████-| 7296/7340 [264:08<1:35, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:50:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7296/7340 [264:09<1:35, 27.6 steps/min]\u001b[92m19:50:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:50:29,058 - agent.ComputerAgent - INFO - Computer: click({'x': 749, 'y': 229})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 749, 'y': 229})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7296/7340 [264:10<1:35, 27.6 steps/min]2025-08-11 19:50:29,733 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:50:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:50:30,431 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 28 messages\n", + "\u001b[92m19:50:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7297/7340 [264:12<1:33, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7297/7340 [264:16<1:33, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:50:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7297/7340 [264:17<1:33, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:50:36,275 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:50:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:50:36 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:50:37,340 - agent.ComputerAgent - INFO - Computer: click({'x': 229, 'y': 91})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 229, 'y': 91})\n", + " 99%|███████████████████████████████████████-| 7298/7340 [264:21<1:31, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:50:41,280 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ALT+TAB'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ALT+TAB'})\n", + " 99%|███████████████████████████████████████-| 7298/7340 [264:23<1:31, 27.6 steps/min]2025-08-11 19:50:42,453 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m19:50:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7298/7340 [264:24<1:31, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7298/7340 [264:25<1:31, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:50:44,654 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:50:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7298/7340 [264:26<1:31, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7298/7340 [264:28<1:31, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:50:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7298/7340 [264:29<1:31, 27.6 steps/min]\u001b[92m19:50:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:50:48,511 - agent.ComputerAgent - INFO - Computer: click({'x': 243, 'y': 178})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 243, 'y': 178})\n", + " 99%|███████████████████████████████████████-| 7298/7340 [264:30<1:31, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:50:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7299/7340 [264:31<1:29, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:50:50 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:50:50,849 - agent.ComputerAgent - INFO - Computer: click({'x': 138, 'y': 90})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 138, 'y': 90})\n", + " 99%|███████████████████████████████████████-| 7299/7340 [264:32<1:29, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:50:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + " 99%|███████████████████████████████████████-| 7300/7340 [264:33<1:26, 27.6 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:50:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:50:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7300/7340 [264:34<1:26, 27.6 steps/min]\u001b[92m19:50:53 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:50:53,680 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 749, 'y': 183}, {'x': 837, 'y': 244}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 749, 'y': 183}, {'x': 837, 'y': 244}]})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:50:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + " 99%|███████████████████████████████████████-| 7300/7340 [264:36<1:26, 27.6 steps/min]\u001b[92m19:50:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:50:55,684 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:50:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:50:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:50:56,394 - agent.ComputerAgent - INFO - Computer: click({'x': 258, 'y': 155})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 258, 'y': 155})\n", + " 99%|███████████████████████████████████████-| 7301/7340 [264:38<1:24, 27.6 steps/min]\u001b[92m19:50:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:50:57,053 - agent.ComputerAgent - INFO - Computer: click({'button': 'right', 'x': 118, 'y': 182})\n", + "INFO:agent.ComputerAgent:Computer: click({'button': 'right', 'x': 118, 'y': 182})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:50:57,705 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:50:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7302/7340 [264:39<1:22, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7303/7340 [264:42<1:20, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:51:01,944 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m19:51:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7303/7340 [264:43<1:20, 27.6 steps/min]2025-08-11 19:51:02,644 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 30 messages\n", + "\u001b[92m19:51:02 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7303/7340 [264:44<1:20, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:51:04,313 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:51:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + " 99%|███████████████████████████████████████-| 7303/7340 [264:46<1:20, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:51:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + " 99%|███████████████████████████████████████-| 7303/7340 [264:47<1:20, 27.6 steps/min]\u001b[92m19:51:05 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:51:06,170 - agent.ComputerAgent - INFO - Computer: click({'x': 285, 'y': 98})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 285, 'y': 98})\n", + "100%|███████████████████████████████████████-| 7304/7340 [264:53<1:18, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:51:12,936 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m19:51:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7304/7340 [264:54<1:18, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7304/7340 [264:57<1:18, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:51:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:51:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "100%|███████████████████████████████████████-| 7304/7340 [264:59<1:18, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:51:17 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:51:18,032 - agent.ComputerAgent - INFO - Computer: click({'x': 259, 'y': 180})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 259, 'y': 180})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:51:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "100%|███████████████████████████████████████-| 7304/7340 [265:00<1:18, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:51:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:51:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:51:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:51:19,864 - agent.ComputerAgent - INFO - Computer: click({'x': 151, 'y': 232})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 151, 'y': 232})\n", + "100%|███████████████████████████████████████-| 7305/7340 [265:01<1:16, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:51:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:51:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:51:21,195 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 298, 'y': 152}, {'x': 81, 'y': 155}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 298, 'y': 152}, {'x': 81, 'y': 155}]})\n", + "100%|███████████████████████████████████████-| 7306/7340 [265:02<1:14, 27.6 steps/min]\u001b[92m19:51:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:51:21,882 - agent.ComputerAgent - INFO - Computer: click({'x': 59, 'y': 157})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 59, 'y': 157})\n", + "100%|███████████████████████████████████████-| 7307/7340 [265:03<1:11, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7308/7340 [265:04<1:09, 27.6 steps/min]2025-08-11 19:51:23,543 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:51:23 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7308/7340 [265:06<1:09, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:51:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7308/7340 [265:07<1:09, 27.6 steps/min]\u001b[92m19:51:26 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:51:26,961 - agent.ComputerAgent - INFO - Computer: click({'x': 762, 'y': 230})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 762, 'y': 230})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:51:27,615 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 32 messages\n", + "\u001b[92m19:51:27 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7308/7340 [265:09<1:09, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:51:28,303 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:51:28 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:51:28,994 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:51:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7309/7340 [265:15<1:07, 27.6 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:51:35,225 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m19:51:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7309/7340 [265:17<1:07, 27.6 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7309/7340 [265:18<1:07, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:51:38,065 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'ALT+TAB'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'ALT+TAB'})\n", + "100%|███████████████████████████████████████-| 7309/7340 [265:19<1:07, 27.5 steps/min]2025-08-11 19:51:39,205 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:51:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7309/7340 [265:21<1:07, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7309/7340 [265:22<1:07, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:51:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7309/7340 [265:23<1:07, 27.5 steps/min]\u001b[92m19:51:41 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:51:42,081 - agent.ComputerAgent - INFO - Computer: click({'x': 237, 'y': 95})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 237, 'y': 95})\n", + "100%|███████████████████████████████████████-| 7310/7340 [265:25<1:05, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:51:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7310/7340 [265:26<1:05, 27.5 steps/min]\u001b[92m19:51:44 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:51:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7310/7340 [265:27<1:05, 27.5 steps/min]\u001b[92m19:51:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:51:45 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:51:46 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:51:46,954 - agent.ComputerAgent - INFO - Computer: drag({'start_element_description': 'Cell C2', 'end_element_description': 'Cell C10', 'x': 268, 'y': 188})\n", + "INFO:agent.ComputerAgent:Computer: drag({'start_element_description': 'Cell C2', 'end_element_description': 'Cell C10', 'x': 268, 'y': 188})\n", + "100%|███████████████████████████████████████-| 7311/7340 [265:29<1:03, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No screenshot found, taking screenshot\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-11 19:51:48,615 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 35 messages\n", + "\u001b[92m19:51:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7311/7340 [265:30<1:03, 27.5 steps/min]2025-08-11 19:51:49,299 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m19:51:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7311/7340 [265:31<1:03, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:51:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "100%|███████████████████████████████████████-| 7311/7340 [265:32<1:03, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "\u001b[92m19:51:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:51:52,164 - agent.ComputerAgent - INFO - Computer: click({'x': 87, 'y': 166})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 87, 'y': 166})\n", + "100%|███████████████████████████████████████-| 7312/7340 [265:33<1:01, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:51:52,805 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 37 messages\n", + "\u001b[92m19:51:52 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7313/7340 [265:35<0:58, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:51:54 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7313/7340 [265:36<0:58, 27.5 steps/min]\u001b[92m19:51:55 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:51:55,701 - agent.ComputerAgent - INFO - Computer: double_click({'x': 841, 'y': 244})\n", + "INFO:agent.ComputerAgent:Computer: double_click({'x': 841, 'y': 244})\n", + "100%|███████████████████████████████████████-| 7313/7340 [265:37<0:58, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "100%|███████████████████████████████████████-| 7315/7340 [265:38<0:54, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:51:57,894 - agent.ComputerAgent - INFO - LLM processing started with 39 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 39 messages\n", + "\u001b[92m19:51:57 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "2025-08-11 19:51:58,566 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 34 messages\n", + "\u001b[92m19:51:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7315/7340 [265:40<0:54, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:51:59 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "100%|███████████████████████████████████████-| 7315/7340 [265:41<0:54, 27.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:52:00 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:52:00,770 - agent.ComputerAgent - INFO - Computer: click({'x': 422, 'y': 360})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 422, 'y': 360})\n", + "100%|███████████████████████████████████████-| 7315/7340 [265:42<0:54, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:52:01,926 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m19:52:01 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7316/7340 [265:43<0:52, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "100%|███████████████████████████████████████-| 7317/7340 [265:44<0:50, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:52:03 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7317/7340 [265:45<0:50, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:52:04,815 - agent.ComputerAgent - INFO - LLM processing started with 41 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 41 messages\n", + "\u001b[92m19:52:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:52:04 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:52:05,539 - agent.ComputerAgent - INFO - Computer: click({'x': 15, 'y': 526})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 15, 'y': 526})\n", + "100%|███████████████████████████████████████-| 7317/7340 [265:47<0:50, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:52:07,328 - agent.ComputerAgent - INFO - Computer: type({'text': 'contacts.csv'})\n", + "INFO:agent.ComputerAgent:Computer: type({'text': 'contacts.csv'})\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "100%|███████████████████████████████████████-| 7319/7340 [265:49<0:45, 27.5 steps/min]2025-08-11 19:52:07,988 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 36 messages\n", + "\u001b[92m19:52:08 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7320/7340 [265:50<0:43, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:52:09,185 - agent.ComputerAgent - INFO - LLM processing started with 43 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 43 messages\n", + "\u001b[92m19:52:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7320/7340 [265:53<0:43, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7320/7340 [265:54<0:43, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7321/7340 [265:55<0:41, 27.5 steps/min]2025-08-11 19:52:13,898 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m19:52:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7321/7340 [265:58<0:41, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:52:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "100%|███████████████████████████████████████-| 7321/7340 [265:59<0:41, 27.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:52:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:52:18 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/9053f5ae-149f-4a73-a89e-977f3e750435/close \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7325/7340 [266:00<0:32, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/26dc2412-0699-4a4e-a272-dc576348a5c8/close \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:52:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:52:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7325/7340 [266:01<0:32, 27.5 steps/min]\u001b[92m19:52:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:52:20,887 - agent.ComputerAgent - INFO - Computer: drag({'start_element_description': 'column header A', 'end_element_description': 'column header H', 'x': 90, 'y': 155})\n", + "INFO:agent.ComputerAgent:Computer: drag({'start_element_description': 'column header A', 'end_element_description': 'column header H', 'x': 90, 'y': 155})\n", + "100%|███████████████████████████████████████-| 7326/7340 [266:02<0:30, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:52:23,480 - agent.ComputerAgent - INFO - Computer: keypress({'keys': 'enter'})\n", + "INFO:agent.ComputerAgent:Computer: keypress({'keys': 'enter'})\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7326/7340 [266:05<0:30, 27.5 steps/min]\u001b[92m19:52:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No screenshot found, taking screenshot\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-11 19:52:24,777 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 37 messages\n", + "\u001b[92m19:52:24 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7327/7340 [266:06<0:28, 27.5 steps/min]\u001b[92m19:52:25 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:52:26,307 - agent.ComputerAgent - INFO - Computer: click({'x': 828, 'y': 35})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 828, 'y': 35})\n", + "100%|███████████████████████████████████████-| 7328/7340 [266:10<0:26, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "100%|███████████████████████████████████████-| 7329/7340 [266:11<0:23, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:52:30,881 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:52:30 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7329/7340 [266:12<0:23, 27.5 steps/min]2025-08-11 19:52:31,557 - agent.ComputerAgent - INFO - LLM processing started with 39 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 39 messages\n", + "\u001b[92m19:52:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7329/7340 [266:13<0:23, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:52:32,766 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m19:52:32 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7329/7340 [266:19<0:23, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "100%|███████████████████████████████████████-| 7330/7340 [266:20<0:21, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:52:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7330/7340 [266:21<0:21, 27.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:52:40,663 - agent.ComputerAgent - INFO - LLM processing started with 41 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 41 messages\n", + "\u001b[92m19:52:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:52:40 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:52:41,739 - agent.ComputerAgent - INFO - Computer: click({'x': 328, 'y': 286})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 328, 'y': 286})\n", + "100%|███████████████████████████████████████-| 7331/7340 [266:26<0:19, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "100%|███████████████████████████████████████-| 7332/7340 [266:27<0:17, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:52:46,968 - agent.ComputerAgent - INFO - LLM processing started with 43 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 43 messages\n", + "\u001b[92m19:52:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7332/7340 [266:29<0:17, 27.5 steps/min]\u001b[92m19:52:47 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "2025-08-11 19:52:48,348 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 38 messages\n", + "\u001b[92m19:52:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "\u001b[92m19:52:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "\u001b[92m19:52:48 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7332/7340 [266:30<0:17, 27.5 steps/min]\u001b[92m19:52:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:52:49,902 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 749, 'y': 229}, {'x': 749, 'y': 732}]})\n", + "INFO:agent.ComputerAgent:Computer: drag({'path': [{'x': 749, 'y': 229}, {'x': 749, 'y': 732}]})\n", + "100%|███████████████████████████████████████-| 7332/7340 [266:31<0:17, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:52:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 400 Bad Request\"\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "100%|███████████████████████████████████████-| 7334/7340 [266:32<0:13, 27.5 steps/min]Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:52:51 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:52:52,289 - agent.ComputerAgent - INFO - Computer: click({'x': 17, 'y': 386})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 17, 'y': 386})\n", + "100%|███████████████████████████████████████-| 7334/7340 [266:34<0:13, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7335/7340 [266:37<0:10, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7335/7340 [266:38<0:10, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/invoke \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b14fe395-5fa2-43f0-9d0b-23c42f3e9093/close \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7335/7340 [266:39<0:10, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:52:58,539 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:52:58 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7335/7340 [266:40<0:10, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7335/7340 [266:41<0:10, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/cb64a220-43d8-4373-bd2a-e73bacb4a122/close \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7335/7340 [266:54<0:10, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:53:13 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7335/7340 [266:55<0:10, 27.5 steps/min]\u001b[92m19:53:14 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:53:14,687 - agent.ComputerAgent - INFO - Computer: click({'x': 318, 'y': 306})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 318, 'y': 306})\n", + "100%|███████████████████████████████████████-| 7336/7340 [266:57<0:08, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:53:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7336/7340 [266:58<0:08, 27.5 steps/min]\u001b[92m19:53:16 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:53:17,018 - agent.ComputerAgent - INFO - Computer: click({'x': 49, 'y': 53})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 49, 'y': 53})\n", + "100%|███████████████████████████████████████-| 7337/7340 [267:01<0:06, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "2025-08-11 19:53:20,724 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 40 messages\n", + "\u001b[92m19:53:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7337/7340 [267:02<0:06, 27.5 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7337/7340 [267:03<0:06, 27.5 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7337/7340 [267:23<0:06, 27.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:53:42 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "100%|███████████████████████████████████████-| 7337/7340 [267:24<0:06, 27.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/invoke \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:53:43 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:53:43,990 - agent.ComputerAgent - INFO - Computer: click({'x': 432, 'y': 314})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 432, 'y': 314})\n", + "100%|███████████████████████████████████████-| 7337/7340 [267:25<0:06, 27.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/b4eee866-c191-4acf-b232-9b18a3c888ef/close \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7338/7340 [267:29<0:04, 27.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7338/7340 [267:30<0:04, 27.4 steps/min]2025-08-11 19:53:49,710 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n", + "INFO:agent.ComputerAgent:LLM processing started with 42 messages\n", + "\u001b[92m19:53:49 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= gpt-5; provider = openai\n", + "100%|███████████████████████████████████████-| 7338/7340 [268:00<0:04, 27.4 steps/min]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\u001b[92m19:54:20 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "100%|███████████████████████████████████████-| 7338/7340 [268:02<0:04, 27.4 steps/min]INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n", + "\u001b[92m19:54:21 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "INFO:LiteLLM:\n", + "LiteLLM completion() model= HelloKKMe/GTA1-7B; provider = huggingface-local\n", + "2025-08-11 19:54:21,613 - agent.ComputerAgent - INFO - Computer: click({'x': 469, 'y': 487})\n", + "INFO:agent.ComputerAgent:Computer: click({'x': 469, 'y': 487})\n", + "100%|███████████████████████████████████████-| 7339/7340 [268:08<0:02, 27.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "100%|███████████████████████████████████████-| 7339/7340 [268:12<0:02, 27.4 steps/min]INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/invoke \"HTTP/1.1 200 OK\"\n", + "100%|████████████████████████████████████████| 7340/7340 [268:13<0:00, 27.4 steps/min]\n", + "INFO:httpx:HTTP Request: POST https://orchestration.hud.so/hud-gym/api/v2/environments/d71be89e-00e2-40e7-8b8d-38e36bc6d26c/close \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: GET https://orchestration.hud.so/hud-gym/api/v2/jobs/a2c1347a-2925-45ed-b86a-6b475b0dc4eb/trajectories \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'task_count': 360, 'avg_reward': 0.21677517254432735, 'success_rate': 18.333333333333332}\n", + "View results at: https://app.hud.so/jobs/a2c1347a-2925-45ed-b86a-6b475b0dc4eb\n" ] } ], @@ -1184,20 +109999,25 @@ "from hud import load_taskset\n", "from hud.taskset import TaskSet\n", "import logging\n", + "import uuid\n", "\n", "# Load taskset\n", "taskset = await load_taskset(\"OSWorld-Verified\")\n", - "taskset = TaskSet(tasks=taskset[:10]) # limit to 10 tasks instead of all 370\n", + "# taskset = TaskSet(tasks=taskset[:20]) # limit to 10 tasks instead of all 370\n", + "\n", + "job_name = \"osworld-gta-gpt5\"\n", + "job_name = f\"{job_name}-{str(uuid.uuid4())[:4]}\"\n", "\n", "# Run benchmark job\n", "job = await run_job(\n", - " model=\"openai/computer-use-preview\",\n", + " # model=\"openai/computer-use-preview\",\n", + " model=\"huggingface-local/HelloKKMe/GTA1-7B+openai/gpt-5\",\n", " task_or_taskset=taskset,\n", - " job_name=\"test-computeragent-job\",\n", - " max_concurrent_tasks=5,\n", + " job_name=job_name,\n", + " max_concurrent_tasks=20,\n", " # add any extra ComputerAgent kwargs:\n", " verbosity=logging.INFO, # Enable logging\n", - " trajectory_dir=\"trajectories\" # Save trajectories locally\n", + " trajectory_dir=f\"trajectories/{job_name}\" # Save trajectories locally\n", ")\n", "\n", "# Get results OR view them at app.hud.so\n", @@ -1208,7 +110028,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "cua", "language": "python", "name": "python3" }, From cf1c3b5b6ee472222716b7c047f0d6b45dcaa437 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 12 Aug 2025 12:12:42 -0400 Subject: [PATCH 72/76] add human/ to docs --- README.md | 1 + .../supported-agents/human-in-the-loop.mdx | 66 +++++++++++++++++++ .../docs/agent-sdk/supported-agents/meta.json | 3 +- 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 docs/content/docs/agent-sdk/supported-agents/human-in-the-loop.mdx diff --git a/README.md b/README.md index fcac4a75..3da4464e 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ With the Agent SDK, you can: - `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` - `omniparser+any LLM` - `huggingface-local/HelloKKMe/GTA1-7B+any LLM` (using [Composed Agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents)) + - `human/human` (using [Human-in-the-Loop](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop)) Missing a model? [Raise a feature request](https://github.com/trycua/cua/issues/new?assignees=&labels=enhancement&projects=&title=%5BAgent%5D%3A+Add+model+support+for+) or [contribute](https://github.com/trycua/cua/blob/main/CONTRIBUTING.md)! diff --git a/docs/content/docs/agent-sdk/supported-agents/human-in-the-loop.mdx b/docs/content/docs/agent-sdk/supported-agents/human-in-the-loop.mdx new file mode 100644 index 00000000..8d084d7e --- /dev/null +++ b/docs/content/docs/agent-sdk/supported-agents/human-in-the-loop.mdx @@ -0,0 +1,66 @@ +--- +title: Human-In-The-Loop +description: Use humans as agents for evaluation, demonstrations, and interactive control +--- + +The Agent SDK provides a human tool, with native support for using a human-in-the-loop as a way to evaluate your environment, tools, or to create demonstrations. You can use it by doing `grounding_model+human/human` or `human/human` directly. + +## Getting Started + +To start the human agent tool, simply run: + +```bash +python -m agent.human_tool +``` + +The UI will show you pending completions. Select a completion to take control of the agent. + +## Usage Examples + +### Direct Human Agent + +```python +from agent import ComputerAgent +from agent.computer import computer + +agent = ComputerAgent( + "human/human", + tools=[computer] +) + +async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"): + pass +``` + +### Composed with Grounding Model + +```python +agent = ComputerAgent( + "huggingface-local/HelloKKMe/GTA1-7B+human/human", + tools=[computer] +) + +async for _ in agent.run("Navigate to the settings page and enable dark mode"): + pass +``` + +## Features + +The human-in-the-loop interface provides: + +- **Interactive UI**: Web-based interface for reviewing and responding to agent requests +- **Image Display**: Screenshots with click handlers for direct interaction +- **Action Accordions**: Support for various computer actions (click, type, keypress, etc.) +- **Tool Calls**: Full OpenAI-compatible tool call support +- **Real-time Updates**: Smart polling for responsive UI updates + +## Use Cases + +- **Evaluation**: Have humans evaluate agent performance and provide ground truth responses +- **Demonstrations**: Create training data by having humans demonstrate tasks +- **Interactive Control**: Take manual control when automated agents need human guidance +- **Testing**: Validate agent, tool, and environment behavior manually + +--- + +For more details on the human tool implementation, see the [Human Tool Documentation](../../tools/human-tool). diff --git a/docs/content/docs/agent-sdk/supported-agents/meta.json b/docs/content/docs/agent-sdk/supported-agents/meta.json index 092fd051..5d50b124 100644 --- a/docs/content/docs/agent-sdk/supported-agents/meta.json +++ b/docs/content/docs/agent-sdk/supported-agents/meta.json @@ -4,6 +4,7 @@ "pages": [ "computer-use-agents", "grounding-models", - "composed-agents" + "composed-agents", + "human-in-the-loop" ] } From a0c59656208eeda4300fc9028ee996af496bc408 Mon Sep 17 00:00:00 2001 From: ddupont <3820588+ddupont808@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:08:29 -0400 Subject: [PATCH 73/76] Changed c/ua to cua --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fcac4a75..f263a5e0 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ With the Computer SDK, you can: - automate Windows, Linux, and macOS VMs with a consistent, [pyautogui-like API](https://docs.trycua.com/docs/libraries/computer#interface-actions) -- create & manage VMs [locally](https://docs.trycua.com/docs/computer-sdk/computers#cua-local-containers) or using the [c/ua cloud](https://www.trycua.com/) +- create & manage VMs [locally](https://docs.trycua.com/docs/computer-sdk/computers#cua-local-containers) or using [cua cloud](https://www.trycua.com/) With the Agent SDK, you can: - run computer-use models with a [consistent output](https://docs.trycua.com/docs/agent-sdk/chat-history#message-array-structure) From c992203f327de742b8281d0b9cfce225d792b4f4 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 12 Aug 2025 14:08:48 -0400 Subject: [PATCH 74/76] mentioned HUD integration in readme --- README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 79ce94e2..85cf88ce 100644 --- a/README.md +++ b/README.md @@ -28,13 +28,16 @@ With the Agent SDK, you can: - run composed agents using UI grounding models and any LLM - use any liteLLM provider (`openai/`, `openrouter/`, etc.) or our included local providers (`huggingface-local/`, `mlx/`) - quickly evaluate new UI agent models and UI grounding models - - `anthropic/claude-opus-4-1-20250805` + - `anthropic/claude-opus-4-1-20250805` (using [Computer-Use Models](https://docs.trycua.com/docs/agent-sdk/supported-agents/computer-use-agents)) - `openai/computer-use-preview` - `openrouter/z-ai/glm-4.5v` - `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` - - `omniparser+any LLM` - - `huggingface-local/HelloKKMe/GTA1-7B+any LLM` (using [Composed Agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents)) + - `omniparser+{any LLM}` (using [Composed Agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents)) + - `huggingface-local/HelloKKMe/GTA1-7B+{any LLM}` + - `huggingface/HelloKKMe/GTA1-32B+{any LLM}` + - `vllm_hosted/HelloKKMe/GTA1-72B+{any LLM}` - `human/human` (using [Human-in-the-Loop](https://docs.trycua.com/docs/agent-sdk/supported-agents/human-in-the-loop)) +- benchmark on OSWorld-Verified, SheetBench-V2, and more [with a single line of code using HUD](https://docs.trycua.com/docs/agent-sdk/integrations/hud) ([Notebook](https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb)) Missing a model? [Raise a feature request](https://github.com/trycua/cua/issues/new?assignees=&labels=enhancement&projects=&title=%5BAgent%5D%3A+Add+model+support+for+) or [contribute](https://github.com/trycua/cua/blob/main/CONTRIBUTING.md)! From 49ebe9c55f84f297283b15967f221c95b2ab967f Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 12 Aug 2025 14:22:10 -0400 Subject: [PATCH 75/76] changed error handling for hud agent --- .../agent/agent/integrations/hud/agent.py | 38 ++++++++++++++----- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py index 43fc7367..97d1e7f8 100644 --- a/libs/python/agent/agent/integrations/hud/agent.py +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -5,6 +5,7 @@ from typing import Any, Literal, Optional, Union, List, Dict import asyncio from agent import ComputerAgent as BaseComputerAgent +from agent.responses import make_failed_tool_call_items from hud.adapters import Adapter from hud.agent.base import Agent from hud.utils.common import Observation @@ -272,16 +273,33 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): } ] } - # add error message to conversation history - new_items.append({ - "type": "user", - "content": [ - { - "type": "input_text", - "text": f"Error during previous attempted action: {repr(e)}" - } - ] - }) + # Check if there are any computer_call items in new_items + computer_calls = [item for item in new_items if item.get("type") == "computer_call"] + if computer_calls: + # Remove computer_call items from new_items + new_items = [item for item in new_items if item.get("type") != "computer_call"] + + # Add failed tool call items for each computer call + for computer_call in computer_calls: + tool_input = computer_call.get("action", {}) + call_id = computer_call.get("call_id") + new_items.extend(make_failed_tool_call_items( + tool_name="computer", + tool_kwargs=tool_input, + error_message=repr(e), + call_id=call_id + )) + else: + # add error message to conversation history (fallback for non-computer-call errors) + new_items.append({ + "type": "user", + "content": [ + { + "type": "input_text", + "text": f"Error during previous attempted action: {repr(e)}" + } + ] + }) # Check if we captured any actions if captured_actions: From 9ef59ba977317d35f36dcfcec72b2acc91b70112 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 12 Aug 2025 15:00:32 -0400 Subject: [PATCH 76/76] Fixed bug in hud agent --- libs/python/agent/agent/integrations/hud/agent.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/libs/python/agent/agent/integrations/hud/agent.py b/libs/python/agent/agent/integrations/hud/agent.py index 97d1e7f8..abbf5f8c 100644 --- a/libs/python/agent/agent/integrations/hud/agent.py +++ b/libs/python/agent/agent/integrations/hud/agent.py @@ -172,7 +172,7 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): # If so, add computer_call_output with screenshot instead of user message last_computer_calls = [] for msg in reversed(self.conversation_history): - if msg.get("type") == "computer_call" and msg.get("status") == "completed": + if msg.get("type") == "computer_call": call_id = msg.get("call_id") if call_id: # Check if this call_id already has a computer_call_output @@ -182,9 +182,6 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): ) if not has_output: last_computer_calls.append(call_id) - elif msg.get("role") == "user": - # Stop at the last user message - break if last_computer_calls: if not observation.screenshot: @@ -259,7 +256,6 @@ class ComputerAgent(Agent[BaseComputerAgent, dict[str, Any]]): break # otherwise add agent output to conversation history new_items += result["output"] - self.conversation_history += result["output"] except Exception as e: # if the last message is reasoning, change it to output_text if new_items and new_items[-1].get("type") == "reasoning":