Files
computer/notebooks/eval_osworld.ipynb
2025-08-08 19:46:19 -04:00

1231 lines
113 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ComputerAgent HUD Integration for OSWorld\n",
"\n",
"This notebook demonstrates how to use the ComputerAgent with HUD for OSWorld benchmarking.\n",
"The ComputerAgent integration provides the same interface as OperatorAgent but works with both Claude and OpenAI models."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# # Install dependencies if needed\n",
"# !uv venv \n",
"# !source .venv/bin/activate\n",
"# !uv sync"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Required environment variables:\n",
"# - HUD_API_KEY (for HUD access)\n",
"# - ANTHROPIC_API_KEY (for Claude models)\n",
"# - OPENAI_API_KEY (for OpenAI models)\n",
"\n",
"from hud import gym, load_taskset\n",
"from pprint import pprint\n",
"import asyncio"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Import the HUD-integrated ComputerAgent\n",
"from agent.integrations.hud import ComputerAgent"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total tasks in OSWorld: 367\n",
"Task prompt: Can you make my computer bring back the last tab I shut down?\n"
]
}
],
"source": [
"# Load OSWorld taskset\n",
"taskset = await load_taskset(\"OSWorld-Verified\")\n",
"print(f\"Total tasks in OSWorld: {len(taskset)}\")\n",
"\n",
"# Select a test task\n",
"test = taskset[148]\n",
"print(f\"Task prompt: {test.prompt}\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total tasks in SheetBench: 50\n",
"Task prompt: Given the Input data, determine the ticker with the greatest correlation between volume and next day price change.\n",
"- in ANSWER tab put the Ticker in A1 and the correlation in B1\n",
" - use CORREL to determine correlation\n",
"- be sure to first sort the date by ticker z to a and then date ascending before calculating nextdaypricechange %\n",
"Correlation should be rounded to 2 decimal points\n"
]
}
],
"source": [
"# Load SheetBench taskset\n",
"taskset = await load_taskset(\"SheetBench-V2\")\n",
"print(f\"Total tasks in SheetBench: {len(taskset)}\")\n",
"\n",
"# Select a test task\n",
"test = taskset[0]\n",
"print(f\"Task prompt: {test.prompt}\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[INFO] 2025-08-08 19:08:17,078 | hud.environment | View the live trace at https://app.hud.so/trace/ca88c178-cf40-499b-8ad3-d5d60348d9fe\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Environment ready!\n"
]
}
],
"source": [
"# Create environment (takes ~2.5 minutes to start)\n",
"env = await gym.make(test)\n",
"print(\"Environment ready!\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div style=\"width: 960px; height: 540px; overflow: hidden;\">\n",
" <div style=\"transform: scale(0.5); transform-origin: top left;\">\n",
" <iframe src=\"https://live.anchorbrowser.io?sessionId=21376c89-e539-4f07-b23f-db4a3749d61a\" width=\"1920\" height=\"1080\" style=\"border: 1px solid #ddd;\">\n",
" </iframe>\n",
" </div>\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'\\n <div style=\"width: 960px; height: 540px; overflow: hidden;\">\\n <div style=\"transform: scale(0.5); transform-origin: top left;\">\\n <iframe src=\"https://live.anchorbrowser.io?sessionId=21376c89-e539-4f07-b23f-db4a3749d61a\" width=\"1920\" height=\"1080\" style=\"border: 1px solid #ddd;\">\\n </iframe>\\n </div>\\n </div>\\n '"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"await env.stream() # vnc"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test with any supported CUA model\n",
"\n",
"The ComputerAgent integration can use Claude, OpenAI, UI-TARS, or composed models just like the original ComputerAgent:"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Created agent: computeragent-computer-use-preview\n"
]
}
],
"source": [
"import logging\n",
"# Create ComputerAgent with Claude\n",
"claude_agent = ComputerAgent(\n",
" # model=\"anthropic/claude-3-5-sonnet-20241022\",\n",
" model=\"openai/computer-use-preview\",\n",
" # environment=\"linux\", # OSWorld typically uses Linux\n",
" environment=\"browser\", # SheetBench uses the browser\n",
" trajectory_dir=\"trajectories\",\n",
" verbosity=logging.INFO,\n",
")\n",
"\n",
"print(f\"Created agent: {claude_agent.name}\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Initial observation complete\n",
"========= Step 1 ==========\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-08-08 19:14:10,479 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
"2025-08-08 19:14:18,867 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 55, 'y': 149})\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Agent's action: [ClickAction(type='click', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 3}, point=Point(x=77, y=174), button='left', pattern=None, hold_keys=None)]\n",
"========= Step 2 ==========\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-08-08 19:14:24,566 - agent.ComputerAgent - INFO - LLM processing started with 4 messages\n",
"2025-08-08 19:14:30,430 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'A']})\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Agent's action: [PressAction(type='press', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 5}, keys=['ctrl', 'a'])]\n",
"========= Step 3 ==========\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-08-08 19:14:36,137 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n",
"2025-08-08 19:14:42,483 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 73, 'y': 151})\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Agent's action: [ClickAction(type='click', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 7}, point=Point(x=102, y=176), button='left', pattern=None, hold_keys=None)]\n",
"========= Step 4 ==========\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-08-08 19:14:48,687 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n",
"2025-08-08 19:14:59,516 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'A']})\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Agent's action: [PressAction(type='press', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 9}, keys=['ctrl', 'a'])]\n",
"========= Step 5 ==========\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-08-08 19:15:05,229 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
"2025-08-08 19:15:15,153 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 55, 'y': 147}, {'x': 319, 'y': 713}]})\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Agent's action: [DragAction(type='drag', reasoning='Highlighting data for sorting preparation', logs={'conversation_length': 12}, path=[Point(x=77, y=172), Point(x=448, y=835)], pattern=None, hold_keys=None)]\n",
"========= Step 6 ==========\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-08-08 19:15:21,362 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
"2025-08-08 19:15:33,774 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 229, 'y': 41})\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Agent's action: [ClickAction(type='click', reasoning='Opening sort options for data', logs={'conversation_length': 15}, point=Point(x=322, y=48), button='left', pattern=None, hold_keys=None)]\n",
"========= Step 7 ==========\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-08-08 19:15:39,973 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n",
"2025-08-08 19:15:52,928 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 430, 'y': 96})\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Agent's action: [ClickAction(type='click', reasoning='Choosing \"Sort range\" for sorting', logs={'conversation_length': 18}, point=Point(x=604, y=112), button='left', pattern=None, hold_keys=None)]\n",
"========= Step 8 ==========\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-08-08 19:15:59,611 - agent.ComputerAgent - INFO - LLM processing started with 19 messages\n",
"2025-08-08 19:16:17,003 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 530, 'y': 172})\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Agent's action: [ClickAction(type='click', reasoning='Accessing advanced sorting options now', logs={'conversation_length': 21}, point=Point(x=745, y=201), button='left', pattern=None, hold_keys=None)]\n"
]
}
],
"source": [
"# Initial observation\n",
"obs, _ = await env.reset()\n",
"print(\"Initial observation complete\")\n",
"\n",
"# Agent loop with Claude\n",
"for i in range(8):\n",
" print(f\"========= Step {i + 1} ==========\")\n",
" \n",
" try:\n",
" action, done = await claude_agent.predict(obs)\n",
" print(f\"Agent's action: {action}\")\n",
"\n",
" obs, reward, terminated, info = await env.step(action)\n",
"\n",
" if done or terminated:\n",
" print(f\"Task completed after {i + 1} steps\")\n",
" break\n",
" \n",
" except Exception as e:\n",
" print(f\"Error in step {i + 1}: {e}\")\n",
" break"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Evaluate Results"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"=== Final Evaluation ===\n",
"{'error': None,\n",
" 'gold_file_url': 'https://gahludmjcsmszgyufydt.supabase.co//storage/v1/object/public/sheetbench/615426c8-9df7-4ffa-92e9-200134a84da9/gold_solution_2.xlsx?',\n",
" 'logs': 'INFO: Starting evaluation with evaluator: sheets_cell_values\\n'\n",
" \"INFO: Evaluator args: [{'A1': 'ABC', 'B1': '-0.08'}]\\n\"\n",
" 'INFO: Partial rewarding: False\\n'\n",
" 'INFO: Starting sheets_cell_values evaluation for environment: '\n",
" 'af7a34a0-43b0-44d2-82d0-2b66ed16f1ea\\n'\n",
" \"INFO: Raw args received: [{'A1': 'ABC', 'B1': '-0.08'}] (type: \"\n",
" \"<class 'list'>)\\n\"\n",
" 'INFO: Partial rewarding enabled: False\\n'\n",
" 'INFO: === Google Sheets Cell Value Verification ===\\n'\n",
" 'INFO: Current page URL: '\n",
" 'https://docs.google.com/spreadsheets/d/1h-Ec3rW9sAME2sTn8qxIvFxO6qXtdURPacEFL5DJnqw/edit?gid=700326861#gid=700326861\\n'\n",
" 'INFO: ✅ Confirmed on Google Sheets page\\n'\n",
" 'INFO: Processing args parameter...\\n'\n",
" 'INFO: Args is a list with 1 items, extracting first item\\n'\n",
" \"INFO: Extracted: {'A1': 'ABC', 'B1': '-0.08'} (type: <class \"\n",
" \"'dict'>)\\n\"\n",
" 'INFO: Cell checks to perform: 2 cells\\n'\n",
" \"INFO: A1 -> expected: 'ABC'\\n\"\n",
" \"INFO: B1 -> expected: '-0.08'\\n\"\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" \"sheets_cell_values: Checking cells: {'A1': 'ABC', 'B1': '-0.08'}\\n\"\n",
" 'INFO: === ANSWER Sheet Navigation ===\\n'\n",
" 'INFO: Attempt 1/3: Attempting to find and navigate to ANSWER sheet '\n",
" 'tab...\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Attempt 1/3: Attempting to navigate to ANSWER '\n",
" 'sheet\\n'\n",
" 'INFO: Searching for ANSWER tab with selector: '\n",
" 'span.docs-sheet-tab-name:has-text(\"ANSWER\")\\n'\n",
" 'INFO: ANSWER tab search result (attempt 1): Found\\n'\n",
" 'INFO: ✅ Found ANSWER sheet tab on attempt 1, clicking on it...\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Found ANSWER sheet tab on attempt 1, clicking on '\n",
" 'it\\n'\n",
" 'ERROR: ❌ Error navigating to ANSWER sheet on attempt 1: '\n",
" 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
" 'Call log:\\n'\n",
" ' - waiting for '\n",
" 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
" ' - - locator resolved to <span dir=\"ltr\" spellcheck=\"false\" '\n",
" 'class=\"docs-sheet-tab-name\">ANSWER</span>\\n'\n",
" ' - - attempting click action\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 20ms\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 100ms\\n'\n",
" ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 500ms\\n'\n",
" '\\n'\n",
" 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Error navigating to ANSWER sheet on attempt 1: '\n",
" 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
" 'Call log:\\n'\n",
" ' - waiting for '\n",
" 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
" ' - - locator resolved to <span dir=\"ltr\" spellcheck=\"false\" '\n",
" 'class=\"docs-sheet-tab-name\">ANSWER</span>\\n'\n",
" ' - - attempting click action\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 20ms\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 100ms\\n'\n",
" ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 500ms\\n'\n",
" '\\n'\n",
" 'INFO: Waiting 500ms before retry 2...\\n'\n",
" 'INFO: Attempt 2/3: Attempting to find and navigate to ANSWER sheet '\n",
" 'tab...\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Attempt 2/3: Attempting to navigate to ANSWER '\n",
" 'sheet\\n'\n",
" 'INFO: Searching for ANSWER tab with selector: '\n",
" 'span.docs-sheet-tab-name:has-text(\"ANSWER\")\\n'\n",
" 'INFO: ANSWER tab search result (attempt 2): Found\\n'\n",
" 'INFO: ✅ Found ANSWER sheet tab on attempt 2, clicking on it...\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Found ANSWER sheet tab on attempt 2, clicking on '\n",
" 'it\\n'\n",
" 'ERROR: ❌ Error navigating to ANSWER sheet on attempt 2: '\n",
" 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
" 'Call log:\\n'\n",
" ' - waiting for '\n",
" 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
" ' - - locator resolved to <span dir=\"ltr\" spellcheck=\"false\" '\n",
" 'class=\"docs-sheet-tab-name\">ANSWER</span>\\n'\n",
" ' - - attempting click action\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 20ms\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 100ms\\n'\n",
" ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 500ms\\n'\n",
" '\\n'\n",
" 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Error navigating to ANSWER sheet on attempt 2: '\n",
" 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
" 'Call log:\\n'\n",
" ' - waiting for '\n",
" 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
" ' - - locator resolved to <span dir=\"ltr\" spellcheck=\"false\" '\n",
" 'class=\"docs-sheet-tab-name\">ANSWER</span>\\n'\n",
" ' - - attempting click action\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 20ms\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 100ms\\n'\n",
" ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 500ms\\n'\n",
" '\\n'\n",
" 'INFO: Waiting 500ms before retry 3...\\n'\n",
" 'INFO: Attempt 3/3: Attempting to find and navigate to ANSWER sheet '\n",
" 'tab...\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Attempt 3/3: Attempting to navigate to ANSWER '\n",
" 'sheet\\n'\n",
" 'INFO: Searching for ANSWER tab with selector: '\n",
" 'span.docs-sheet-tab-name:has-text(\"ANSWER\")\\n'\n",
" 'INFO: ANSWER tab search result (attempt 3): Found\\n'\n",
" 'INFO: ✅ Found ANSWER sheet tab on attempt 3, clicking on it...\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Found ANSWER sheet tab on attempt 3, clicking on '\n",
" 'it\\n'\n",
" 'ERROR: ❌ Error navigating to ANSWER sheet on attempt 3: '\n",
" 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
" 'Call log:\\n'\n",
" ' - waiting for '\n",
" 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
" ' - - locator resolved to <span dir=\"ltr\" spellcheck=\"false\" '\n",
" 'class=\"docs-sheet-tab-name\">ANSWER</span>\\n'\n",
" ' - - attempting click action\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 20ms\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 100ms\\n'\n",
" ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 500ms\\n'\n",
" '\\n'\n",
" 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Error navigating to ANSWER sheet on attempt 3: '\n",
" 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
" 'Call log:\\n'\n",
" ' - waiting for '\n",
" 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
" ' - - locator resolved to <span dir=\"ltr\" spellcheck=\"false\" '\n",
" 'class=\"docs-sheet-tab-name\">ANSWER</span>\\n'\n",
" ' - - attempting click action\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 20ms\\n'\n",
" ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 100ms\\n'\n",
" ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
" ' - - element is visible, enabled and stable\\n'\n",
" ' - - scrolling into view if needed\\n'\n",
" ' - - done scrolling\\n'\n",
" ' - - <div aria-hidden=\"true\" class=\"modal-dialog-bg\"></div> '\n",
" 'intercepts pointer events\\n'\n",
" ' - - retrying click action\\n'\n",
" ' - - waiting 500ms\\n'\n",
" '\\n'\n",
" 'WARNING: ⚠️ Failed to navigate to ANSWER sheet after 3 attempts, '\n",
" 'proceeding with current sheet\\n'\n",
" 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Failed to navigate to ANSWER sheet after 3 '\n",
" 'attempts, proceeding with current sheet\\n'\n",
" 'INFO: === File Content Extraction ===\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Granted read-write permissions\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Extracting page contents\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Selecting content\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Successfully extracted 157940 characters from '\n",
" 'file\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Found 5003 rows in content\\n'\n",
" 'INFO: Content extracted: 157940 characters\\n'\n",
" 'INFO: === Cell Content Parsing ===\\n'\n",
" 'INFO: Split file content into 5003 rows\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Found 5003 rows in content\\n'\n",
" 'INFO: First few rows of content:\\n'\n",
" \"INFO: Row 1: 'TradeDate | Ticker | ClosePrice | Volume | | '\\n\"\n",
" \"INFO: Row 2: '2023-01-02 | ABC | 476.87 | 2225355 | | '\\n\"\n",
" \"INFO: Row 3: '2023-01-02 | DEF | 322.21 | 3778582 | | '\\n\"\n",
" 'INFO: ... and 5000 more rows\\n'\n",
" 'INFO: === Cell Reference Parsing ===\\n'\n",
" \"INFO: Processing cell reference: 'A1' -> expected: 'ABC'\\n\"\n",
" \"INFO: Parsed 'A1' -> row=1 (0-indexed: 0), col=A (0-indexed: 0)\\n\"\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Parsed cell A1 as row=0, col=0\\n'\n",
" 'INFO: Row 1 exists in content\\n'\n",
" \"INFO: Row 1 has 6 columns: ['Col1', 'Col2', 'Col3', 'Col4', \"\n",
" \"'Col5', 'Col6']\\n\"\n",
" \"INFO: ✅ Found value for A1: 'TradeDate'\\n\"\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" \"sheets_cell_values: Found value for A1: 'TradeDate'\\n\"\n",
" \"INFO: Processing cell reference: 'B1' -> expected: '-0.08'\\n\"\n",
" \"INFO: Parsed 'B1' -> row=1 (0-indexed: 0), col=B (0-indexed: 1)\\n\"\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Parsed cell B1 as row=0, col=1\\n'\n",
" 'INFO: Row 1 exists in content\\n'\n",
" \"INFO: Row 1 has 6 columns: ['Col1', 'Col2', 'Col3', 'Col4', \"\n",
" \"'Col5', 'Col6']\\n\"\n",
" \"INFO: ✅ Found value for B1: 'Ticker'\\n\"\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" \"sheets_cell_values: Found value for B1: 'Ticker'\\n\"\n",
" 'INFO: === Cell Value Comparison ===\\n'\n",
" 'INFO: Comparing cell A1:\\n'\n",
" \"INFO: Expected: 'ABC' (type: <class 'str'>)\\n\"\n",
" \"INFO: Actual: 'TradeDate' (type: <class 'str'>)\\n\"\n",
" \"INFO: ❌ VALUE MISMATCH: 'TradeDate' != 'ABC'\\n\"\n",
" 'INFO: Comparing cell B1:\\n'\n",
" \"INFO: Expected: '-0.08' (type: <class 'str'>)\\n\"\n",
" \"INFO: Actual: 'Ticker' (type: <class 'str'>)\\n\"\n",
" \"INFO: ❌ VALUE MISMATCH: 'Ticker' != '-0.08'\\n\"\n",
" 'INFO: === Final Results ===\\n'\n",
" 'INFO: Cell comparison summary:\\n'\n",
" 'INFO: Total cells checked: 2\\n'\n",
" 'INFO: Matches: 0\\n'\n",
" 'INFO: Mismatches: 2\\n'\n",
" \"INFO: Failed cells: ['A1:', 'B1:']\\n\"\n",
" 'INFO: ❌ NOT all cells match expected values\\n'\n",
" 'INFO: Mismatches: [\"Cell A1: expected \\'ABC\\', got \\'TradeDate\\'\", '\n",
" '\"Cell B1: expected \\'-0.08\\', got \\'Ticker\\'\"]\\n'\n",
" 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
" 'sheets_cell_values: Mismatches found: [\"Cell A1: expected \\'ABC\\', '\n",
" 'got \\'TradeDate\\'\", \"Cell B1: expected \\'-0.08\\', got \\'Ticker\\'\"]\\n'\n",
" 'INFO: Final reward: 0.0\\n'\n",
" 'INFO: === Sheets Cell Values Evaluation Complete ===\\n'\n",
" 'INFO: Evaluation completed. Final reward: 0.0\\n',\n",
" 'reward': 0.0}\n"
]
}
],
"source": [
"# Evaluate environment state\n",
"result = await env.evaluate()\n",
"print(\"=== Final Evaluation ===\")\n",
"pprint(result)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Environment closed\n"
]
}
],
"source": [
"# Clean up\n",
"await env.close()\n",
"print(\"Environment closed\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run OSWorld-Verified in parallel"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%|----------------------------------------| 0/200 [1:24<??:??, ?? steps/min]2025-08-08 19:24:29,970 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
" 0%|----------------------------------------| 0/200 [1:25<??:??, ?? steps/min]2025-08-08 19:24:30,647 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
"2025-08-08 19:24:31,329 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
" 0%|----------------------------------------| 0/200 [1:26<??:??, ?? steps/min]2025-08-08 19:24:31,958 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
" 0%|----------------------------------------| 0/200 [1:28<??:??, ?? steps/min]2025-08-08 19:24:35,310 - agent.ComputerAgent - INFO - Computer: wait({})\n",
"2025-08-08 19:24:36,641 - agent.ComputerAgent - INFO - Computer: wait({})\n",
"2025-08-08 19:24:37,969 - agent.ComputerAgent - INFO - Computer: wait({})\n",
"2025-08-08 19:24:39,338 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 2%|----------------------------------------| 4/200 [1:36<78:54, 2.5 steps/min]2025-08-08 19:24:42,498 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
" 2%|----------------------------------------| 4/200 [1:38<80:39, 2.4 steps/min]2025-08-08 19:24:44,669 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
" 2%|----------------------------------------| 4/200 [1:39<81:37, 2.4 steps/min]2025-08-08 19:24:45,319 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
"2025-08-08 19:24:45,992 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
"2025-08-08 19:24:47,339 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 2%|----------------------------------------| 4/200 [1:42<83:47, 2.3 steps/min]2025-08-08 19:24:47,968 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
"2025-08-08 19:24:49,301 - agent.ComputerAgent - INFO - Computer: wait({})\n",
"2025-08-08 19:24:50,712 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 2%|█---------------------------------------| 5/200 [1:45<68:53, 2.8 steps/min]2025-08-08 19:24:52,079 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 4%|█---------------------------------------| 7/200 [1:47<49:20, 3.9 steps/min]2025-08-08 19:24:53,458 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 4%|█---------------------------------------| 9/200 [1:52<39:53, 4.8 steps/min]2025-08-08 19:24:59,176 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
" 4%|█---------------------------------------| 9/200 [1:54<40:29, 4.7 steps/min]2025-08-08 19:24:59,868 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
" 4%|█---------------------------------------| 9/200 [1:55<40:50, 4.7 steps/min]2025-08-08 19:25:01,049 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
" 4%|█---------------------------------------| 9/200 [1:56<41:11, 4.6 steps/min]2025-08-08 19:25:01,688 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
"2025-08-08 19:25:02,319 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
" 4%|█---------------------------------------| 9/200 [1:58<41:56, 4.6 steps/min]2025-08-08 19:25:04,661 - agent.ComputerAgent - INFO - Computer: wait({})\n",
"2025-08-08 19:25:05,961 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 4%|█---------------------------------------| 9/200 [2:01<42:52, 4.5 steps/min]2025-08-08 19:25:07,310 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 6%|██--------------------------------------| 12/200 [2:06<33:03, 5.7 steps/min]2025-08-08 19:25:13,200 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['F']})\n",
" 6%|██--------------------------------------| 12/200 [2:08<33:32, 5.6 steps/min]2025-08-08 19:25:14,354 - agent.ComputerAgent - INFO - LLM processing started with 7 messages\n",
" 6%|██--------------------------------------| 13/200 [2:09<31:04, 6.0 steps/min]2025-08-08 19:25:15,039 - agent.ComputerAgent - INFO - LLM processing started with 7 messages\n",
"2025-08-08 19:25:16,054 - agent.ComputerAgent - INFO - LLM processing started with 7 messages\n",
"2025-08-08 19:25:17,449 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 233, 'y': 149})\n",
" 7%|██--------------------------------------| 14/200 [2:17<30:29, 6.1 steps/min]2025-08-08 19:25:23,160 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n",
" 7%|██--------------------------------------| 14/200 [2:18<30:43, 6.1 steps/min]2025-08-08 19:25:24,856 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n",
" 7%|██--------------------------------------| 14/200 [2:20<31:02, 6.0 steps/min]2025-08-08 19:25:26,201 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 440, 'y': 73})\n",
" 7%|██--------------------------------------| 14/200 [2:21<31:19, 5.9 steps/min]2025-08-08 19:25:27,514 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'SHIFT', 'P']})\n",
"2025-08-08 19:25:28,832 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 386, 'y': 74})\n",
" 8%|███-------------------------------------| 17/200 [2:27<26:24, 6.9 steps/min]2025-08-08 19:25:33,703 - agent.ComputerAgent - INFO - Agent: Task completed.\n",
"2025-08-08 19:25:34,379 - agent.ComputerAgent - INFO - Total usage:\n",
" - input_tokens: 5095\n",
" - input_tokens_details:\n",
" - cached_tokens: 0\n",
" - output_tokens: 28\n",
" - output_tokens_details:\n",
" - reasoning_tokens: 0\n",
" - total_tokens: 5123\n",
" 9%|███-------------------------------------| 18/200 [2:29<25:13, 7.2 steps/min]2025-08-08 19:25:35,019 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
"2025-08-08 19:25:35,669 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
" 9%|███-------------------------------------| 18/200 [2:30<25:26, 7.2 steps/min]2025-08-08 19:25:36,358 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
" 9%|███-------------------------------------| 18/200 [2:31<25:36, 7.1 steps/min]2025-08-08 19:25:37,713 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 280, 'y': 219})\n",
" 18%|███████---------------------------------| 36/200 [2:39<12:05, 13.6 steps/min]2025-08-08 19:25:44,610 - agent.ComputerAgent - INFO - LLM processing started with 11 messages\n",
" 18%|███████---------------------------------| 36/200 [2:40<12:09, 13.5 steps/min]2025-08-08 19:25:45,963 - agent.ComputerAgent - INFO - Computer: type({'text': 'Preferences: Open Settings (UI)'})\n",
" 18%|███████---------------------------------| 36/200 [2:41<12:14, 13.4 steps/min]2025-08-08 19:25:47,774 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 658, 'y': 213})\n",
"2025-08-08 19:25:49,132 - agent.ComputerAgent - INFO - Computer: move({'x': 417, 'y': 429})\n",
" 20%|███████---------------------------------| 39/200 [2:47<11:31, 14.0 steps/min]2025-08-08 19:25:52,821 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n",
" 20%|███████---------------------------------| 39/200 [2:49<11:39, 13.8 steps/min]2025-08-08 19:25:55,508 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
" 20%|███████---------------------------------| 39/200 [2:50<11:45, 13.7 steps/min]2025-08-08 19:25:56,169 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
" 20%|███████---------------------------------| 39/200 [2:52<11:53, 13.5 steps/min]2025-08-08 19:25:59,049 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 351, 'y': 460}, {'x': 382, 'y': 460}]})\n",
" 20%|███████---------------------------------| 39/200 [2:54<11:59, 13.4 steps/min]2025-08-08 19:26:00,904 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
" 20%|████████--------------------------------| 41/200 [2:59<11:34, 13.7 steps/min]2025-08-08 19:26:05,794 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 679, 'y': 122})\n",
" 20%|████████--------------------------------| 41/200 [3:01<11:42, 13.6 steps/min]2025-08-08 19:26:06,422 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n",
" 21%|████████--------------------------------| 42/200 [3:02<11:24, 13.8 steps/min]2025-08-08 19:26:08,744 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 598, 'y': 482})\n",
" 21%|████████--------------------------------| 42/200 [3:04<11:32, 13.7 steps/min]2025-08-08 19:26:09,410 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n",
" 22%|████████--------------------------------| 43/200 [3:07<11:22, 13.8 steps/min]2025-08-08 19:26:12,913 - agent.ComputerAgent - INFO - LLM processing started with 15 messages\n",
" 22%|████████--------------------------------| 43/200 [3:10<11:34, 13.6 steps/min]2025-08-08 19:26:16,109 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n",
" 22%|████████--------------------------------| 43/200 [3:13<11:46, 13.3 steps/min]2025-08-08 19:26:20,031 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 341, 'y': 462}, {'x': 322, 'y': 462}]})\n",
"2025-08-08 19:26:21,348 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'SHIFT', 'P']})\n",
" 22%|█████████-------------------------------| 45/200 [3:18<11:24, 13.6 steps/min]2025-08-08 19:26:24,698 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 612, 'y': 131})\n",
" 23%|█████████-------------------------------| 46/200 [3:21<11:16, 13.7 steps/min]2025-08-08 19:26:27,859 - agent.ComputerAgent - INFO - LLM processing started with 17 messages\n",
" 23%|█████████-------------------------------| 46/200 [3:23<11:20, 13.6 steps/min]2025-08-08 19:26:28,522 - agent.ComputerAgent - INFO - LLM processing started with 17 messages\n",
" 23%|█████████-------------------------------| 46/200 [3:26<11:30, 13.4 steps/min]2025-08-08 19:26:31,719 - agent.ComputerAgent - INFO - LLM processing started with 17 messages\n",
" 23%|█████████-------------------------------| 46/200 [3:27<11:33, 13.3 steps/min]2025-08-08 19:26:33,665 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 643, 'y': 131})\n",
" 24%|█████████-------------------------------| 47/200 [3:33<11:36, 13.2 steps/min]2025-08-08 19:26:41,002 - agent.ComputerAgent - INFO - Computer: type({'text': 'wrap tab'})\n",
" 24%|█████████-------------------------------| 47/200 [3:36<11:44, 13.0 steps/min]2025-08-08 19:26:42,323 - agent.ComputerAgent - INFO - Computer: type({'text': 'Preferences: Open User Settings (JSON)'})\n",
" 24%|█████████-------------------------------| 48/200 [3:37<11:29, 13.2 steps/min]2025-08-08 19:26:42,978 - agent.ComputerAgent - INFO - LLM processing started with 19 messages\n",
" 24%|█████████-------------------------------| 49/200 [3:38<11:13, 13.4 steps/min]2025-08-08 19:26:45,382 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 355, 'y': 558})\n",
" 25%|██████████------------------------------| 50/200 [3:42<11:07, 13.5 steps/min]2025-08-08 19:26:48,550 - agent.ComputerAgent - INFO - LLM processing started with 19 messages\n",
" 25%|██████████------------------------------| 50/200 [3:43<11:11, 13.4 steps/min]2025-08-08 19:26:49,199 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n",
" 25%|██████████------------------------------| 50/200 [3:45<11:17, 13.3 steps/min]2025-08-08 19:26:52,588 - agent.ComputerAgent - INFO - Computer: type({'text': 'focus editor'})\n",
" 25%|██████████------------------------------| 50/200 [3:47<11:23, 13.2 steps/min]2025-08-08 19:26:53,260 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n",
" 26%|██████████------------------------------| 51/200 [3:53<11:23, 13.1 steps/min]2025-08-08 19:26:59,430 - agent.ComputerAgent - INFO - LLM processing started with 21 messages\n",
" 26%|██████████------------------------------| 51/200 [3:55<11:29, 13.0 steps/min]2025-08-08 19:27:01,780 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
"2025-08-08 19:27:03,107 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 674, 'y': 212})\n",
" 26%|██████████------------------------------| 53/200 [4:01<11:09, 13.2 steps/min]2025-08-08 19:27:07,985 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 83, 'y': 147})\n",
" 26%|██████████------------------------------| 53/200 [4:03<11:14, 13.1 steps/min]2025-08-08 19:27:09,630 - agent.ComputerAgent - INFO - LLM processing started with 23 messages\n",
" 27%|██████████------------------------------| 54/200 [4:04<11:02, 13.2 steps/min]2025-08-08 19:27:10,257 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n",
" 27%|██████████------------------------------| 54/200 [4:07<11:10, 13.1 steps/min]2025-08-08 19:27:14,605 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 981, 'y': 129})\n",
" 27%|██████████------------------------------| 54/200 [4:09<11:15, 13.0 steps/min]2025-08-08 19:27:15,269 - agent.ComputerAgent - INFO - LLM processing started with 23 messages\n",
" 28%|███████████-----------------------------| 55/200 [4:15<11:14, 12.9 steps/min]2025-08-08 19:27:22,005 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n",
"2025-08-08 19:27:23,434 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 679, 'y': 101})\n",
" 28%|███████████-----------------------------| 55/200 [4:18<11:22, 12.8 steps/min]2025-08-08 19:27:25,394 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 115, 'y': 383})\n",
"2025-08-08 19:27:26,746 - agent.ComputerAgent - INFO - Computer: type({'text': '}'})\n",
" 29%|███████████-----------------------------| 58/200 [4:24<10:46, 13.2 steps/min]2025-08-08 19:27:29,873 - agent.ComputerAgent - INFO - LLM processing started with 25 messages\n",
" 29%|███████████-----------------------------| 58/200 [4:27<10:54, 13.0 steps/min]2025-08-08 19:27:33,061 - agent.ComputerAgent - INFO - LLM processing started with 25 messages\n",
" 29%|███████████-----------------------------| 58/200 [4:28<10:57, 13.0 steps/min]2025-08-08 19:27:33,730 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
"2025-08-08 19:27:35,127 - agent.ComputerAgent - INFO - Computer: type({'text': 'focus terminal'})\n",
" 30%|███████████-----------------------------| 59/200 [4:32<10:51, 13.0 steps/min]2025-08-08 19:27:38,470 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 679, 'y': 100})\n",
" 30%|████████████----------------------------| 60/200 [4:35<10:43, 13.1 steps/min]2025-08-08 19:27:41,640 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
" 30%|████████████----------------------------| 60/200 [4:38<10:50, 12.9 steps/min]2025-08-08 19:27:45,339 - agent.ComputerAgent - INFO - LLM processing started with 27 messages\n",
" 30%|████████████----------------------------| 60/200 [4:42<10:59, 12.7 steps/min]2025-08-08 19:27:48,491 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
" 30%|████████████----------------------------| 60/200 [4:45<11:06, 12.6 steps/min]2025-08-08 19:27:52,344 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'SHIFT', 'P']})\n",
" 30%|████████████----------------------------| 60/200 [4:47<11:11, 12.5 steps/min]2025-08-08 19:27:54,184 - agent.ComputerAgent - INFO - Computer: type({'text': 'edited_colorful.png'})\n",
" 31%|████████████----------------------------| 62/200 [4:50<10:46, 12.8 steps/min]2025-08-08 19:27:56,481 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 241, 'y': 110})\n",
"2025-08-08 19:27:57,814 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 981, 'y': 131})\n",
" 31%|████████████----------------------------| 62/200 [4:53<10:52, 12.7 steps/min]2025-08-08 19:27:59,141 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 675, 'y': 100})\n",
" 32%|████████████----------------------------| 64/200 [4:54<10:25, 13.0 steps/min]2025-08-08 19:27:59,799 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n",
" 32%|█████████████---------------------------| 65/200 [4:55<10:13, 13.2 steps/min]2025-08-08 19:28:00,988 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n",
" 32%|█████████████---------------------------| 65/200 [4:58<10:20, 13.1 steps/min]2025-08-08 19:28:04,640 - agent.ComputerAgent - INFO - LLM processing started with 4 messages\n",
" 32%|█████████████---------------------------| 65/200 [4:59<10:22, 13.0 steps/min]2025-08-08 19:28:05,289 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n",
" 32%|█████████████---------------------------| 65/200 [5:00<10:25, 13.0 steps/min]2025-08-08 19:28:06,432 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n",
" 32%|█████████████---------------------------| 65/200 [5:02<10:29, 12.9 steps/min]2025-08-08 19:28:09,350 - agent.ComputerAgent - INFO - Computer: type({'text': 'Tasks: Configure Default Build Task'})\n",
"2025-08-08 19:28:10,636 - agent.ComputerAgent - INFO - Computer: type({'text': 'drip coffee maker'})\n",
" 34%|█████████████---------------------------| 67/200 [5:10<10:17, 12.9 steps/min]2025-08-08 19:28:17,499 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['BACKSPACE']})\n",
"2025-08-08 19:28:18,798 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 676, 'y': 101})\n",
" 34%|█████████████---------------------------| 67/200 [5:14<10:23, 12.8 steps/min]2025-08-08 19:28:19,419 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n",
"2025-08-08 19:28:20,058 - agent.ComputerAgent - INFO - LLM processing started with 31 messages\n",
" 34%|█████████████---------------------------| 69/200 [5:16<10:00, 13.1 steps/min]2025-08-08 19:28:22,940 - agent.ComputerAgent - INFO - Computer: type({'text': 'focus active editor group'})\n",
" 35%|██████████████--------------------------| 70/200 [5:20<9:54, 13.1 steps/min]]2025-08-08 19:28:26,283 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
" 35%|██████████████--------------------------| 70/200 [5:21<9:57, 13.1 steps/min]2025-08-08 19:28:26,920 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
"2025-08-08 19:28:27,559 - agent.ComputerAgent - INFO - LLM processing started with 31 messages\n",
" 36%|██████████████--------------------------| 71/200 [5:23<9:48, 13.1 steps/min]2025-08-08 19:28:29,749 - agent.ComputerAgent - INFO - LLM processing started with 31 messages\n",
" 36%|██████████████--------------------------| 71/200 [5:27<9:54, 13.0 steps/min]2025-08-08 19:28:33,099 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
" 36%|██████████████--------------------------| 71/200 [5:28<9:56, 13.0 steps/min]2025-08-08 19:28:33,732 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n",
" 36%|██████████████--------------------------| 72/200 [5:34<9:54, 12.9 steps/min]2025-08-08 19:28:39,897 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n",
" 36%|██████████████--------------------------| 72/200 [5:35<9:56, 12.9 steps/min]2025-08-08 19:28:41,768 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['BACKSPACE']})\n",
" 36%|██████████████--------------------------| 73/200 [5:38<9:48, 13.0 steps/min]2025-08-08 19:28:45,130 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 77, 'y': 236})\n",
" 36%|██████████████--------------------------| 73/200 [5:40<9:52, 12.9 steps/min]2025-08-08 19:28:46,538 - agent.ComputerAgent - INFO - Computer: click({'button': 'right', 'x': 638, 'y': 100})\n",
" 37%|██████████████--------------------------| 74/200 [5:41<9:42, 13.0 steps/min]2025-08-08 19:28:47,895 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 732, 'y': 179})\n",
" 38%|███████████████-------------------------| 75/200 [5:43<9:32, 13.1 steps/min]2025-08-08 19:28:49,073 - agent.ComputerAgent - INFO - LLM processing started with 33 messages\n",
" 38%|███████████████-------------------------| 76/200 [5:46<9:25, 13.2 steps/min]2025-08-08 19:28:52,240 - agent.ComputerAgent - INFO - LLM processing started with 11 messages\n",
" 38%|███████████████-------------------------| 76/200 [5:48<9:28, 13.1 steps/min]2025-08-08 19:28:54,419 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n",
" 38%|███████████████-------------------------| 76/200 [5:49<9:30, 13.0 steps/min]2025-08-08 19:28:55,580 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n",
" 38%|███████████████-------------------------| 76/200 [5:53<9:37, 12.9 steps/min]2025-08-08 19:29:00,506 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
" 38%|███████████████-------------------------| 77/200 [5:57<9:31, 12.9 steps/min]2025-08-08 19:29:03,867 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['BACKSPACE']})\n",
" 39%|███████████████-------------------------| 78/200 [6:01<9:24, 13.0 steps/min]2025-08-08 19:29:07,492 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n",
" 39%|███████████████-------------------------| 78/200 [6:02<9:27, 12.9 steps/min]2025-08-08 19:29:08,836 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 75, 'y': 525})\n",
"2025-08-08 19:29:10,446 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 684, 'y': 114})\n",
" 40%|████████████████------------------------| 80/200 [6:06<9:10, 13.1 steps/min]2025-08-08 19:29:12,120 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n",
"2025-08-08 19:29:13,475 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 569, 'y': 186})\n",
" 40%|████████████████------------------------| 81/200 [6:10<9:04, 13.1 steps/min]2025-08-08 19:29:16,613 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n",
" 40%|████████████████------------------------| 81/200 [6:11<9:06, 13.1 steps/min]2025-08-08 19:29:17,780 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n",
" 40%|████████████████------------------------| 81/200 [6:16<9:12, 12.9 steps/min]2025-08-08 19:29:21,989 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n",
" 40%|████████████████------------------------| 81/200 [6:18<9:15, 12.8 steps/min]2025-08-08 19:29:24,280 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 92, 'y': 524})\n",
" 41%|████████████████------------------------| 82/200 [6:20<9:07, 12.9 steps/min]2025-08-08 19:29:26,630 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 658, 'y': 147})\n",
" 42%|████████████████------------------------| 83/200 [6:24<9:02, 12.9 steps/min]2025-08-08 19:29:30,800 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n",
" 42%|████████████████------------------------| 83/200 [6:26<9:04, 12.9 steps/min]2025-08-08 19:29:32,144 - agent.ComputerAgent - INFO - Agent: Task completed.\n",
"2025-08-08 19:29:32,760 - agent.ComputerAgent - INFO - Total usage:\n",
" - input_tokens: 11426\n",
" - input_tokens_details:\n",
" - cached_tokens: 0\n",
" - output_tokens: 69\n",
" - output_tokens_details:\n",
" - reasoning_tokens: 0\n",
" - total_tokens: 11495\n",
" 42%|████████████████------------------------| 84/200 [6:28<8:55, 13.0 steps/min]2025-08-08 19:29:33,941 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n",
" 42%|████████████████------------------------| 84/200 [6:29<8:57, 12.9 steps/min]2025-08-08 19:29:35,277 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 81, 'y': 347})\n",
" 42%|████████████████------------------------| 84/200 [6:30<8:59, 12.9 steps/min]2025-08-08 19:29:36,610 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'J']})\n",
" 45%|██████████████████----------------------| 90/200 [6:37<8:05, 13.6 steps/min]2025-08-08 19:29:42,940 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n",
" 45%|██████████████████----------------------| 90/200 [6:38<8:06, 13.6 steps/min]2025-08-08 19:29:43,571 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n",
"2025-08-08 19:29:44,917 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 53, 'y': 288})\n",
" 46%|██████████████████----------------------| 91/200 [6:45<8:05, 13.5 steps/min]2025-08-08 19:29:51,777 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 700, 'y': 241})\n",
" 46%|██████████████████----------------------| 91/200 [6:47<8:07, 13.4 steps/min]2025-08-08 19:29:52,610 - agent.ComputerAgent - INFO - LLM processing started with 19 messages\n",
" 46%|██████████████████----------------------| 92/200 [6:53<8:04, 13.4 steps/min]2025-08-08 19:29:58,819 - agent.ComputerAgent - INFO - LLM processing started with 43 messages\n",
"2025-08-08 19:30:00,108 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
" 46%|██████████████████----------------------| 92/200 [6:55<8:07, 13.3 steps/min]2025-08-08 19:30:01,399 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 429, 'x': 117, 'y': 706})\n",
" 47%|██████████████████----------------------| 94/200 [6:57<7:51, 13.5 steps/min]2025-08-08 19:30:03,698 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 172, 'x': 212, 'y': 423})\n",
" 48%|███████████████████---------------------| 95/200 [7:01<7:46, 13.5 steps/min]2025-08-08 19:30:07,860 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n",
" 48%|███████████████████---------------------| 95/200 [7:03<7:47, 13.5 steps/min]2025-08-08 19:30:08,549 - agent.ComputerAgent - INFO - LLM processing started with 21 messages\n",
" 48%|███████████████████---------------------| 95/200 [7:05<7:49, 13.4 steps/min]2025-08-08 19:30:10,720 - agent.ComputerAgent - INFO - LLM processing started with 41 messages\n",
" 48%|███████████████████---------------------| 95/200 [7:06<7:51, 13.4 steps/min]2025-08-08 19:30:12,602 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 768, 'y': 246}, {'x': 741, 'y': 245}]})\n",
" 48%|███████████████████---------------------| 96/200 [7:13<7:50, 13.3 steps/min]2025-08-08 19:30:19,742 - agent.ComputerAgent - INFO - LLM processing started with 45 messages\n",
" 48%|███████████████████---------------------| 96/200 [7:21<7:57, 13.1 steps/min]2025-08-08 19:30:27,150 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 267, 'y': 187})\n",
" 48%|███████████████████---------------------| 97/200 [7:24<7:51, 13.1 steps/min]2025-08-08 19:30:31,050 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 150, 'x': 195, 'y': 446})\n",
"2025-08-08 19:30:32,313 - agent.ComputerAgent - INFO - Computer: screenshot({})\n",
" 48%|███████████████████---------------------| 97/200 [7:27<7:55, 13.0 steps/min]2025-08-08 19:30:33,651 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 706, 'y': 243}, {'x': 681, 'y': 240}]})\n",
" 50%|███████████████████---------------------| 99/200 [7:28<7:38, 13.2 steps/min]2025-08-08 19:30:34,278 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n",
" 50%|████████████████████--------------------| 100/200 [7:29<7:29, 13.3 steps/min]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"No screenshot found, taking screenshot\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-08-08 19:30:35,460 - agent.ComputerAgent - INFO - LLM processing started with 45 messages\n",
" 50%|████████████████████--------------------| 100/200 [7:32<7:32, 13.2 steps/min]2025-08-08 19:30:38,650 - agent.ComputerAgent - INFO - LLM processing started with 44 messages\n",
" 50%|████████████████████--------------------| 100/200 [7:35<7:35, 13.2 steps/min]2025-08-08 19:30:40,900 - agent.ComputerAgent - INFO - LLM processing started with 48 messages\n",
" 50%|████████████████████--------------------| 100/200 [7:37<7:37, 13.1 steps/min]2025-08-08 19:30:43,737 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 79, 'y': 637})\n",
" 50%|████████████████████--------------------| 101/200 [7:40<7:30, 13.2 steps/min]2025-08-08 19:30:46,567 - agent.ComputerAgent - INFO - Agent: I am unable to complete the task. However, I see that the outside task has been completed meaning the \"Focus Active Editor Group\" key binding has been set. Task completed\n",
"2025-08-08 19:30:47,191 - agent.ComputerAgent - INFO - Total usage:\n",
" - input_tokens: 5872\n",
" - input_tokens_details:\n",
" - cached_tokens: 0\n",
" - output_tokens: 37\n",
" - output_tokens_details:\n",
" - reasoning_tokens: 0\n",
" - total_tokens: 5909\n",
" 51%|████████████████████--------------------| 102/200 [7:44<7:26, 13.2 steps/min]2025-08-08 19:30:49,850 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
" 52%|████████████████████--------------------| 104/200 [7:52<7:16, 13.2 steps/min]2025-08-08 19:30:58,933 - agent.ComputerAgent - INFO - Computer: type({'text': '25'})\n",
"2025-08-08 19:31:00,266 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 552, 'y': 630})\n",
" 53%|█████████████████████-------------------| 106/200 [7:56<7:02, 13.3 steps/min]2025-08-08 19:31:02,587 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 753, 'y': 241})\n",
" 54%|█████████████████████-------------------| 107/200 [7:59<6:57, 13.4 steps/min]2025-08-08 19:31:05,710 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n",
" 54%|█████████████████████-------------------| 107/200 [8:01<6:58, 13.3 steps/min]2025-08-08 19:31:07,372 - agent.ComputerAgent - INFO - LLM processing started with 47 messages\n",
" 54%|█████████████████████-------------------| 107/200 [8:02<6:59, 13.3 steps/min]2025-08-08 19:31:08,050 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
" 54%|█████████████████████-------------------| 107/200 [8:03<7:00, 13.3 steps/min]2025-08-08 19:31:09,710 - agent.ComputerAgent - INFO - LLM processing started with 51 messages\n",
" 54%|█████████████████████-------------------| 107/200 [8:05<7:01, 13.2 steps/min]2025-08-08 19:31:11,042 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 54%|█████████████████████-------------------| 108/200 [8:09<6:56, 13.2 steps/min]2025-08-08 19:31:15,879 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 91, 'y': 636})\n",
" 54%|█████████████████████-------------------| 108/200 [8:11<6:58, 13.2 steps/min]2025-08-08 19:31:17,020 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
" 55%|█████████████████████-------------------| 109/200 [8:13<6:51, 13.3 steps/min]2025-08-08 19:31:19,881 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 55%|██████████████████████------------------| 110/200 [8:16<6:45, 13.3 steps/min]2025-08-08 19:31:22,060 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n",
" 55%|██████████████████████------------------| 110/200 [8:17<6:46, 13.3 steps/min]2025-08-08 19:31:23,916 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 743, 'y': 240}, {'x': 681, 'y': 243}]})\n",
" 56%|██████████████████████------------------| 111/200 [8:20<6:41, 13.3 steps/min]2025-08-08 19:31:26,060 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
" 56%|██████████████████████------------------| 111/200 [8:24<6:44, 13.2 steps/min]2025-08-08 19:31:30,411 - agent.ComputerAgent - INFO - Agent: The screen is currently blank. I'll continue monitoring to see if anything changes.\n",
"2025-08-08 19:31:30,412 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 56%|██████████████████████------------------| 111/200 [8:25<6:45, 13.2 steps/min]2025-08-08 19:31:31,824 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 358, 'y': 548})\n",
"2025-08-08 19:31:33,154 - agent.ComputerAgent - INFO - Computer: type({'text': '60'})\n",
" 57%|██████████████████████------------------| 114/200 [8:33<6:27, 13.3 steps/min]2025-08-08 19:31:39,501 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
" 57%|██████████████████████------------------| 114/200 [8:34<6:28, 13.3 steps/min]2025-08-08 19:31:40,131 - agent.ComputerAgent - INFO - LLM processing started with 50 messages\n",
" 57%|██████████████████████------------------| 114/200 [8:36<6:29, 13.2 steps/min]2025-08-08 19:31:42,301 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n",
" 57%|██████████████████████------------------| 114/200 [8:41<6:33, 13.1 steps/min]2025-08-08 19:31:48,792 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 57%|███████████████████████-----------------| 115/200 [8:49<6:31, 13.0 steps/min]2025-08-08 19:31:54,501 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
" 57%|███████████████████████-----------------| 115/200 [8:50<6:31, 13.0 steps/min]2025-08-08 19:31:55,230 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
" 57%|███████████████████████-----------------| 115/200 [8:52<6:33, 13.0 steps/min]2025-08-08 19:31:58,023 - agent.ComputerAgent - INFO - Computer: wait({})\n",
"2025-08-08 19:31:59,366 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 131, 'y': 636})\n",
" 57%|███████████████████████-----------------| 115/200 [8:54<6:35, 12.9 steps/min]2025-08-08 19:32:01,196 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 28, 'y': 158})\n",
"2025-08-08 19:32:02,508 - agent.ComputerAgent - INFO - Agent: Chrome is open. I'll configure it to delete browsing data automatically upon closing. I should be able to do so in the settings.\n",
"2025-08-08 19:32:02,508 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1008, 'y': 31})\n",
" 60%|███████████████████████-----------------| 119/200 [8:58<6:06, 13.3 steps/min]2025-08-08 19:32:04,651 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
" 60%|███████████████████████-----------------| 119/200 [8:59<6:07, 13.2 steps/min]2025-08-08 19:32:05,840 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n",
" 60%|███████████████████████-----------------| 119/200 [9:02<6:09, 13.2 steps/min]2025-08-08 19:32:08,696 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 60%|███████████████████████-----------------| 119/200 [9:03<6:10, 13.1 steps/min]2025-08-08 19:32:09,381 - agent.ComputerAgent - INFO - LLM processing started with 53 messages\n",
"2025-08-08 19:32:10,033 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
" 60%|████████████████████████----------------| 120/200 [9:09<6:06, 13.1 steps/min]2025-08-08 19:32:15,457 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n",
" 60%|████████████████████████----------------| 120/200 [9:11<6:07, 13.0 steps/min]2025-08-08 19:32:17,101 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
"2025-08-08 19:32:18,450 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 16, 'y': 142})\n",
" 60%|████████████████████████----------------| 120/200 [9:13<6:09, 13.0 steps/min]Error running ComputerAgent: litellm.BadRequestError: OpenAIException - {\n",
" \"error\": {\n",
" \"message\": \"Item 'rs_689688fccf8481908fd292817ff04fd8027ddb559dbe0463' of type 'reasoning' was provided without its required following item.\",\n",
" \"type\": \"invalid_request_error\",\n",
" \"param\": \"input\",\n",
" \"code\": null\n",
" }\n",
"} LiteLLM Retried: 3 times\n",
" 61%|████████████████████████----------------| 122/200 [9:20<5:58, 13.1 steps/min]2025-08-08 19:32:26,107 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n",
"2025-08-08 19:32:27,452 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 541, 'y': 251})\n",
" 62%|████████████████████████----------------| 123/200 [9:23<5:52, 13.1 steps/min]2025-08-08 19:32:29,893 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 28, 'y': 150})\n",
" 62%|████████████████████████----------------| 124/200 [9:27<5:47, 13.1 steps/min]2025-08-08 19:32:33,695 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 62%|████████████████████████----------------| 124/200 [9:28<5:48, 13.1 steps/min]2025-08-08 19:32:34,860 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n",
" 62%|█████████████████████████---------------| 125/200 [9:35<5:45, 13.0 steps/min]2025-08-08 19:32:41,071 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n",
" 65%|██████████████████████████--------------| 130/200 [9:40<5:12, 13.4 steps/min]2025-08-08 19:32:46,139 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 558, 'y': 431})\n",
" 66%|██████████████████████████--------------| 131/200 [9:43<5:07, 13.5 steps/min]2025-08-08 19:32:49,527 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1000, 'y': 61})\n",
" 66%|██████████████████████████--------------| 132/200 [9:46<5:02, 13.5 steps/min]2025-08-08 19:32:52,701 - agent.ComputerAgent - INFO - LLM processing started with 11 messages\n",
" 66%|██████████████████████████--------------| 132/200 [9:48<5:03, 13.4 steps/min]2025-08-08 19:32:55,391 - agent.ComputerAgent - INFO - LLM processing started with 21 messages\n",
" 66%|██████████████████████████--------------| 132/200 [9:58<5:08, 13.2 steps/min]2025-08-08 19:33:04,800 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 377, 'y': 624})\n",
" 66%|██████████████████████████--------------| 133/200 [10:03<5:03, 13.2 steps/min]2025-08-08 19:33:09,179 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1002, 'y': 65})\n",
" 67%|██████████████████████████--------------| 134/200 [10:06<4:58, 13.3 steps/min]2025-08-08 19:33:11,832 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n",
" 67%|██████████████████████████--------------| 134/200 [10:09<5:00, 13.2 steps/min]2025-08-08 19:33:14,981 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n",
" 67%|██████████████████████████--------------| 134/200 [10:15<5:03, 13.1 steps/min]2025-08-08 19:33:21,886 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1003, 'y': 62})\n",
" 67%|██████████████████████████--------------| 134/200 [10:17<5:03, 13.0 steps/min]2025-08-08 19:33:23,717 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 330, 'y': 536})\n",
" 68%|███████████████████████████-------------| 136/200 [10:22<4:53, 13.1 steps/min]2025-08-08 19:33:28,911 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
" 68%|███████████████████████████-------------| 136/200 [10:25<4:54, 13.1 steps/min]2025-08-08 19:33:30,580 - agent.ComputerAgent - INFO - LLM processing started with 17 messages\n",
" 68%|███████████████████████████-------------| 136/200 [10:31<4:57, 12.9 steps/min]2025-08-08 19:33:37,956 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1002, 'y': 63})\n",
" 68%|███████████████████████████-------------| 137/200 [10:36<4:52, 12.9 steps/min]2025-08-08 19:33:42,842 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 346, 'y': 208})\n",
" 68%|███████████████████████████-------------| 137/200 [10:38<4:53, 12.9 steps/min]2025-08-08 19:33:43,490 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n",
" 69%|███████████████████████████-------------| 138/200 [10:45<4:49, 12.8 steps/min]2025-08-08 19:33:51,422 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1002, 'y': 64})\n",
" 69%|███████████████████████████-------------| 138/200 [10:46<4:50, 12.8 steps/min]2025-08-08 19:33:52,042 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n",
" 70%|███████████████████████████-------------| 139/200 [10:51<4:45, 12.8 steps/min]2025-08-08 19:33:57,212 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
" 70%|███████████████████████████-------------| 139/200 [10:53<4:46, 12.8 steps/min]2025-08-08 19:34:00,042 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 70%|████████████████████████████------------| 140/200 [10:56<4:41, 12.8 steps/min]2025-08-08 19:34:02,329 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1007, 'y': 62})\n",
" 70%|████████████████████████████------------| 141/200 [11:02<4:37, 12.8 steps/min]2025-08-08 19:34:08,003 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n",
"2025-08-08 19:34:08,681 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n",
" 70%|████████████████████████████------------| 141/200 [11:10<4:40, 12.6 steps/min]2025-08-08 19:34:17,904 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 856, 'y': 571})\n",
" 71%|████████████████████████████------------| 142/200 [11:16<4:36, 12.6 steps/min]2025-08-08 19:34:22,796 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 210, 'x': 189, 'y': 577})\n",
" 71%|████████████████████████████------------| 142/200 [11:18<4:36, 12.6 steps/min]2025-08-08 19:34:23,443 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n",
" 72%|████████████████████████████------------| 143/200 [11:24<4:32, 12.5 steps/min]2025-08-08 19:34:30,151 - agent.ComputerAgent - INFO - LLM processing started with 25 messages\n",
" 72%|████████████████████████████------------| 143/200 [11:25<4:33, 12.5 steps/min]2025-08-08 19:34:31,534 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 123, 'y': 197})\n",
" 72%|████████████████████████████------------| 144/200 [11:31<4:29, 12.5 steps/min]2025-08-08 19:34:37,221 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n",
" 72%|████████████████████████████------------| 144/200 [11:33<4:29, 12.5 steps/min]2025-08-08 19:34:40,096 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 201, 'x': 206, 'y': 576})\n",
" 72%|█████████████████████████████-----------| 145/200 [11:39<4:25, 12.4 steps/min]2025-08-08 19:34:45,523 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 469, 'y': 286})\n",
" 73%|█████████████████████████████-----------| 146/200 [11:41<4:19, 12.5 steps/min]2025-08-08 19:34:47,192 - agent.ComputerAgent - INFO - LLM processing started with 27 messages\n",
" 73%|█████████████████████████████-----------| 146/200 [11:45<4:21, 12.4 steps/min]2025-08-08 19:34:51,413 - agent.ComputerAgent - INFO - LLM processing started with 43 messages\n",
" 73%|█████████████████████████████-----------| 146/200 [11:54<4:24, 12.3 steps/min]2025-08-08 19:35:00,828 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 600, 'y': 297})\n",
" 73%|█████████████████████████████-----------| 146/200 [11:56<4:24, 12.2 steps/min]2025-08-08 19:35:02,670 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 349, 'y': 166})\n",
" 74%|█████████████████████████████-----------| 148/200 [12:01<4:13, 12.3 steps/min]2025-08-08 19:35:07,831 - agent.ComputerAgent - INFO - LLM processing started with 46 messages\n",
" 74%|█████████████████████████████-----------| 148/200 [12:04<4:14, 12.3 steps/min]2025-08-08 19:35:09,512 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n",
" 74%|█████████████████████████████-----------| 148/200 [12:12<4:17, 12.1 steps/min]2025-08-08 19:35:18,465 - agent.ComputerAgent - INFO - Computer: type({'text': 'splash screen'})\n",
" 74%|█████████████████████████████-----------| 148/200 [12:13<4:17, 12.1 steps/min]2025-08-08 19:35:20,254 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 370, 'y': 209})\n",
" 75%|██████████████████████████████----------| 150/200 [12:19<4:06, 12.2 steps/min]2025-08-08 19:35:25,432 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
" 75%|██████████████████████████████----------| 150/200 [12:20<4:06, 12.1 steps/min]2025-08-08 19:35:26,072 - agent.ComputerAgent - INFO - LLM processing started with 49 messages\n",
" 75%|██████████████████████████████----------| 150/200 [12:27<4:09, 12.0 steps/min]2025-08-08 19:35:34,465 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 582, 'y': 570})\n",
" 76%|██████████████████████████████----------| 151/200 [12:34<4:04, 12.0 steps/min]2025-08-08 19:35:40,143 - agent.ComputerAgent - INFO - LLM processing started with 52 messages\n",
"2025-08-08 19:35:41,503 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 409, 'y': 166})\n",
" 76%|██████████████████████████████----------| 152/200 [12:43<4:01, 11.9 steps/min]2025-08-08 19:35:49,127 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n",
" 76%|██████████████████████████████----------| 152/200 [12:49<4:03, 11.8 steps/min]2025-08-08 19:35:56,040 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 388, 'y': 101})\n",
" 76%|██████████████████████████████----------| 153/200 [12:52<3:57, 11.9 steps/min]2025-08-08 19:35:58,458 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'A']})\n",
" 77%|██████████████████████████████----------| 154/200 [12:59<3:52, 11.9 steps/min]2025-08-08 19:36:05,632 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n",
" 77%|██████████████████████████████----------| 154/200 [13:09<3:55, 11.7 steps/min]2025-08-08 19:36:15,055 - agent.ComputerAgent - INFO - Computer: type({'text': 'logo'})\n",
" 78%|███████████████████████████████---------| 155/200 [13:16<3:51, 11.7 steps/min]2025-08-08 19:36:21,733 - agent.ComputerAgent - INFO - LLM processing started with 39 messages\n",
" 78%|███████████████████████████████---------| 155/200 [13:21<3:52, 11.6 steps/min]2025-08-08 19:36:26,422 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
" 78%|███████████████████████████████---------| 155/200 [13:27<3:54, 11.5 steps/min]2025-08-08 19:36:33,289 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 605, 'y': 523})\n",
" 78%|███████████████████████████████---------| 156/200 [13:31<3:48, 11.5 steps/min]2025-08-08 19:36:37,669 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 731, 'y': 130})\n",
" 78%|███████████████████████████████---------| 157/200 [13:34<3:43, 11.6 steps/min]2025-08-08 19:36:40,312 - agent.ComputerAgent - INFO - LLM processing started with 4 messages\n",
" 78%|███████████████████████████████---------| 157/200 [13:38<3:44, 11.5 steps/min]2025-08-08 19:36:45,032 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n",
" 78%|███████████████████████████████---------| 157/200 [14:00<3:50, 11.2 steps/min]2025-08-08 19:37:06,541 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 20, 'y': 160})\n",
"2025-08-08 19:37:07,826 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 601, 'y': 520})\n",
" 80%|███████████████████████████████---------| 159/200 [14:08<3:38, 11.2 steps/min]2025-08-08 19:37:13,510 - agent.ComputerAgent - INFO - LLM processing started with 7 messages\n",
"2025-08-08 19:37:14,191 - agent.ComputerAgent - INFO - LLM processing started with 45 messages\n",
" 80%|███████████████████████████████---------| 159/200 [14:16<3:40, 11.1 steps/min]2025-08-08 19:37:22,586 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 64, 'y': 745})\n",
" 80%|███████████████████████████████---------| 159/200 [14:17<3:41, 11.1 steps/min]2025-08-08 19:37:24,998 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 80%|████████████████████████████████--------| 161/200 [14:22<3:28, 11.2 steps/min]2025-08-08 19:37:28,163 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
" 80%|████████████████████████████████--------| 161/200 [14:27<3:30, 11.1 steps/min]2025-08-08 19:37:32,874 - agent.ComputerAgent - INFO - LLM processing started with 47 messages\n",
" 80%|████████████████████████████████--------| 161/200 [14:31<3:31, 11.1 steps/min]2025-08-08 19:37:37,739 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 135, 'y': 743})\n",
" 81%|████████████████████████████████--------| 162/200 [14:33<3:25, 11.1 steps/min]2025-08-08 19:37:39,367 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
" 81%|████████████████████████████████--------| 162/200 [14:38<3:25, 11.1 steps/min]2025-08-08 19:37:43,552 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
" 81%|████████████████████████████████--------| 162/200 [14:41<3:26, 11.0 steps/min]2025-08-08 19:37:47,420 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 17, 'y': 141})\n",
" 81%|████████████████████████████████--------| 162/200 [14:42<3:27, 11.0 steps/min]2025-08-08 19:37:49,292 - agent.ComputerAgent - INFO - Computer: wait({})\n",
"2025-08-08 19:37:50,567 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 82%|█████████████████████████████████-------| 165/200 [14:49<3:08, 11.1 steps/min]2025-08-08 19:37:55,784 - agent.ComputerAgent - INFO - LLM processing started with 4 messages\n",
" 82%|█████████████████████████████████-------| 165/200 [14:51<3:09, 11.1 steps/min]2025-08-08 19:37:56,434 - agent.ComputerAgent - INFO - LLM processing started with 50 messages\n",
"2025-08-08 19:37:57,074 - agent.ComputerAgent - INFO - LLM processing started with 15 messages\n",
" 82%|█████████████████████████████████-------| 165/200 [14:58<3:10, 11.0 steps/min]2025-08-08 19:38:04,386 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 195, 'y': 62})\n",
" 83%|█████████████████████████████████-------| 166/200 [15:00<3:04, 11.1 steps/min]2025-08-08 19:38:07,265 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 130, 'y': 743})\n",
" 84%|█████████████████████████████████-------| 167/200 [15:04<2:58, 11.1 steps/min]2025-08-08 19:38:09,895 - agent.ComputerAgent - INFO - LLM processing started with 7 messages\n",
" 84%|█████████████████████████████████-------| 167/200 [15:06<2:59, 11.1 steps/min]2025-08-08 19:38:13,330 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1008, 'y': 31})\n",
" 84%|█████████████████████████████████-------| 167/200 [15:08<2:59, 11.0 steps/min]2025-08-08 19:38:13,936 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n",
" 84%|█████████████████████████████████-------| 168/200 [15:11<2:53, 11.1 steps/min]2025-08-08 19:38:17,811 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 30, 'y': 438})\n",
" 84%|█████████████████████████████████-------| 169/200 [15:18<2:48, 11.0 steps/min]2025-08-08 19:38:23,525 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
" 84%|█████████████████████████████████-------| 169/200 [15:22<2:49, 11.0 steps/min]2025-08-08 19:38:28,914 - agent.ComputerAgent - INFO - Computer: double_click({'x': 126, 'y': 742})\n",
" 85%|██████████████████████████████████------| 170/200 [15:26<2:43, 11.0 steps/min]2025-08-08 19:38:32,765 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 108, 'y': 183})\n",
" 86%|██████████████████████████████████------| 171/200 [15:31<2:37, 11.0 steps/min]2025-08-08 19:38:36,966 - agent.ComputerAgent - INFO - LLM processing started with 21 messages\n",
" 86%|██████████████████████████████████------| 171/200 [15:32<2:38, 11.0 steps/min]2025-08-08 19:38:38,114 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
" 86%|██████████████████████████████████------| 171/200 [15:40<2:39, 10.9 steps/min]2025-08-08 19:38:47,011 - agent.ComputerAgent - INFO - Computer: type({'text': 'Percentage Tables'})\n",
" 86%|██████████████████████████████████------| 171/200 [15:42<2:39, 10.9 steps/min]2025-08-08 19:38:48,303 - agent.ComputerAgent - INFO - Computer: double_click({'x': 208, 'y': 109})\n",
" 86%|██████████████████████████████████------| 173/200 [15:47<2:27, 11.0 steps/min]2025-08-08 19:38:53,975 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n",
" 86%|██████████████████████████████████------| 173/200 [15:49<2:28, 10.9 steps/min]2025-08-08 19:38:54,654 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n",
" 86%|██████████████████████████████████------| 173/200 [15:56<2:29, 10.9 steps/min]2025-08-08 19:39:02,573 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 573, 'y': 431})\n",
" 87%|██████████████████████████████████------| 174/200 [15:59<2:23, 10.9 steps/min]2025-08-08 19:39:05,943 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 499, 'y': 216})\n",
" 88%|███████████████████████████████████-----| 175/200 [16:02<2:17, 10.9 steps/min]2025-08-08 19:39:08,625 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
" 88%|███████████████████████████████████-----| 175/200 [16:05<2:17, 10.9 steps/min]2025-08-08 19:39:11,825 - agent.ComputerAgent - INFO - LLM processing started with 19 messages\n",
" 88%|███████████████████████████████████-----| 175/200 [16:11<2:18, 10.8 steps/min]2025-08-08 19:39:17,697 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'C']})\n",
" 88%|███████████████████████████████████-----| 176/200 [16:17<2:13, 10.8 steps/min]2025-08-08 19:39:23,366 - agent.ComputerAgent - INFO - LLM processing started with 21 messages\n",
" 88%|███████████████████████████████████-----| 176/200 [16:20<2:13, 10.8 steps/min]2025-08-08 19:39:27,228 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 83, 'y': 139}, {'x': 182, 'y': 140}]})\n",
" 88%|███████████████████████████████████-----| 177/200 [16:26<2:08, 10.8 steps/min]2025-08-08 19:39:32,543 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 18, 'y': 103})\n",
" 88%|███████████████████████████████████-----| 177/200 [16:27<2:08, 10.8 steps/min]2025-08-08 19:39:33,672 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n",
" 89%|███████████████████████████████████-----| 178/200 [16:33<2:02, 10.8 steps/min]2025-08-08 19:39:38,886 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n",
" 89%|███████████████████████████████████-----| 178/200 [16:39<2:03, 10.7 steps/min]2025-08-08 19:39:45,775 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 90%|███████████████████████████████████-----| 179/200 [16:47<1:58, 10.7 steps/min]2025-08-08 19:39:52,435 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
" 90%|███████████████████████████████████-----| 179/200 [16:53<1:58, 10.6 steps/min]2025-08-08 19:39:59,802 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 145, 'y': 744})\n",
" 90%|████████████████████████████████████----| 180/200 [16:56<1:52, 10.6 steps/min]2025-08-08 19:40:02,614 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 16, 'y': 107})\n",
" 90%|████████████████████████████████████----| 181/200 [16:59<1:47, 10.6 steps/min]2025-08-08 19:40:05,795 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
" 90%|████████████████████████████████████----| 181/200 [17:03<1:47, 10.6 steps/min]2025-08-08 19:40:08,465 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n",
" 90%|████████████████████████████████████----| 181/200 [17:10<1:48, 10.5 steps/min]2025-08-08 19:40:16,353 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1009, 'y': 30})\n",
" 91%|████████████████████████████████████----| 182/200 [17:16<1:42, 10.5 steps/min]2025-08-08 19:40:22,015 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n",
" 91%|████████████████████████████████████----| 182/200 [17:23<1:43, 10.5 steps/min]2025-08-08 19:40:29,384 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 764, 'y': 105})\n",
" 92%|████████████████████████████████████----| 183/200 [17:25<1:37, 10.5 steps/min]2025-08-08 19:40:32,732 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 648, 'y': 438})\n",
" 92%|████████████████████████████████████----| 184/200 [17:29<1:31, 10.5 steps/min]2025-08-08 19:40:35,397 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
" 92%|████████████████████████████████████----| 184/200 [17:33<1:31, 10.5 steps/min]2025-08-08 19:40:40,065 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n",
" 92%|████████████████████████████████████----| 184/200 [17:37<1:31, 10.4 steps/min]2025-08-08 19:40:43,903 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 18, 'y': 108})\n",
" 92%|█████████████████████████████████████---| 185/200 [17:44<1:26, 10.4 steps/min]2025-08-08 19:40:49,587 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n",
" 92%|█████████████████████████████████████---| 185/200 [17:46<1:26, 10.4 steps/min]2025-08-08 19:40:52,403 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 33, 'y': 154})\n",
" 93%|█████████████████████████████████████---| 186/200 [17:53<1:20, 10.4 steps/min]2025-08-08 19:40:59,804 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 763, 'y': 105})\n",
" 94%|█████████████████████████████████████---| 187/200 [18:00<1:15, 10.4 steps/min]2025-08-08 19:41:05,496 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n",
" 94%|█████████████████████████████████████---| 187/200 [18:01<1:15, 10.4 steps/min]2025-08-08 19:41:06,675 - agent.ComputerAgent - INFO - LLM processing started with 41 messages\n",
" 94%|█████████████████████████████████████---| 187/200 [18:10<1:15, 10.3 steps/min]2025-08-08 19:41:17,113 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 23, 'y': 108})\n",
" 94%|█████████████████████████████████████---| 187/200 [18:12<1:15, 10.3 steps/min]2025-08-08 19:41:18,459 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 23, 'y': 155})\n",
" 94%|█████████████████████████████████████---| 189/200 [18:17<1:03, 10.3 steps/min]2025-08-08 19:41:23,639 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n",
" 94%|█████████████████████████████████████---| 189/200 [18:19<1:04, 10.3 steps/min]2025-08-08 19:41:26,346 - agent.ComputerAgent - INFO - LLM processing started with 44 messages\n",
" 94%|█████████████████████████████████████---| 189/200 [18:27<1:04, 10.2 steps/min]2025-08-08 19:41:34,220 - agent.ComputerAgent - INFO - Computer: wait({})\n",
"2025-08-08 19:41:35,516 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 30, 'y': 106})\n",
" 96%|██████████████████████████████████████--| 191/200 [18:35<0:52, 10.3 steps/min]2025-08-08 19:41:41,726 - agent.ComputerAgent - INFO - LLM processing started with 43 messages\n",
" 96%|██████████████████████████████████████--| 191/200 [18:37<0:52, 10.3 steps/min]2025-08-08 19:41:42,406 - agent.ComputerAgent - INFO - LLM processing started with 46 messages\n",
" 96%|██████████████████████████████████████--| 191/200 [18:48<0:53, 10.2 steps/min]2025-08-08 19:41:54,298 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 434, 'y': 545})\n",
" 96%|██████████████████████████████████████--| 192/200 [18:54<0:47, 10.2 steps/min]2025-08-08 19:42:01,182 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1008, 'y': 34})\n",
" 96%|██████████████████████████████████████--| 192/200 [18:56<0:47, 10.1 steps/min]2025-08-08 19:42:02,336 - agent.ComputerAgent - INFO - LLM processing started with 46 messages\n",
" 96%|██████████████████████████████████████--| 193/200 [19:00<0:41, 10.2 steps/min]2025-08-08 19:42:07,037 - agent.ComputerAgent - INFO - LLM processing started with 49 messages\n",
" 96%|██████████████████████████████████████--| 193/200 [19:06<0:41, 10.1 steps/min]2025-08-08 19:42:13,366 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 14, 'y': 105})\n",
" 96%|██████████████████████████████████████--| 193/200 [19:08<0:41, 10.1 steps/min]2025-08-08 19:42:15,279 - agent.ComputerAgent - INFO - Computer: wait({})\n",
" 98%|███████████████████████████████████████-| 195/200 [19:13<0:29, 10.1 steps/min]2025-08-08 19:42:19,416 - agent.ComputerAgent - INFO - LLM processing started with 49 messages\n",
" 98%|███████████████████████████████████████-| 195/200 [19:15<0:29, 10.1 steps/min]2025-08-08 19:42:21,596 - agent.ComputerAgent - INFO - LLM processing started with 51 messages\n",
" 98%|███████████████████████████████████████-| 195/200 [19:23<0:29, 10.1 steps/min]2025-08-08 19:42:29,973 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1011, 'y': 34})\n",
"2025-08-08 19:42:31,295 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 439, 'y': 103}, {'x': 608, 'y': 98}]})\n",
" 98%|███████████████████████████████████████-| 197/200 [19:31<0:17, 10.1 steps/min]2025-08-08 19:42:37,998 - agent.ComputerAgent - INFO - LLM processing started with 52 messages\n",
" 98%|███████████████████████████████████████-| 197/200 [19:33<0:17, 10.1 steps/min]2025-08-08 19:42:38,676 - agent.ComputerAgent - INFO - LLM processing started with 53 messages\n",
" 98%|███████████████████████████████████████-| 197/200 [19:46<0:18, 10.0 steps/min]2025-08-08 19:42:52,119 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 18, 'y': 154})\n",
" 99%|███████████████████████████████████████-| 198/200 [19:58<0:12, 9.9 steps/min]]2025-08-08 19:43:05,074 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 61, 'y': 15})\n",
"100%|███████████████████████████████████████-| 199/200 [20:05<0:06, 9.9 steps/min]2025-08-08 19:43:11,267 - agent.ComputerAgent - INFO - LLM processing started with 55 messages\n",
"100%|███████████████████████████████████████-| 199/200 [20:21<0:06, 9.8 steps/min]2025-08-08 19:43:28,471 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 59, 'y': 127})\n",
"100%|████████████████████████████████████████| 200/200 [20:32<0:00, 9.7 steps/min]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'task_count': 11, 'avg_reward': 0.2, 'success_rate': 18.181818181818183}\n",
"View results at: https://app.hud.so/jobs/d80a9a78-0e06-4b49-ba3e-cb5c8db4ba7c\n"
]
}
],
"source": [
"from agent.integrations.hud import run_job\n",
"from hud import load_taskset\n",
"from hud.taskset import TaskSet\n",
"import logging\n",
"\n",
"# Load taskset\n",
"taskset = await load_taskset(\"OSWorld-Verified\")\n",
"taskset = TaskSet(tasks=taskset[:10]) # limit to 10 tasks instead of all 370\n",
"\n",
"# Run benchmark job\n",
"job = await run_job(\n",
" model=\"openai/computer-use-preview\",\n",
" task_or_taskset=taskset,\n",
" job_name=\"test-computeragent-job\",\n",
" max_concurrent_tasks=5,\n",
" # add any extra ComputerAgent kwargs:\n",
" verbosity=logging.INFO, # Enable logging\n",
" trajectory_dir=\"trajectories\" # Save trajectories locally\n",
")\n",
"\n",
"# Get results OR view them at app.hud.so\n",
"print(await job.get_analytics())\n",
"print(f\"View results at: https://app.hud.so/jobs/{job.id}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 4
}