diff --git a/notebooks/eval_osworld.ipynb b/notebooks/eval_osworld.ipynb
index 0d58f58e..a287022c 100644
--- a/notebooks/eval_osworld.ipynb
+++ b/notebooks/eval_osworld.ipynb
@@ -50,18 +50,9 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 5,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/dillondupont/cua-clean/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Import the HUD-integrated ComputerAgent\n",
"from agent.integrations.hud import ComputerAgent"
@@ -93,7 +84,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -121,14 +112,14 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "[INFO] 2025-08-08 15:16:46,133 | hud.environment | View the live trace at https://app.hud.so/trace/662fd59f-5a8d-4205-9b88-32c00d0feab0\n"
+ "[INFO] 2025-08-08 19:08:17,078 | hud.environment | View the live trace at https://app.hud.so/trace/ca88c178-cf40-499b-8ad3-d5d60348d9fe\n"
]
},
{
@@ -147,7 +138,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -156,7 +147,7 @@
"\n",
"
\n",
@@ -172,10 +163,10 @@
{
"data": {
"text/plain": [
- "'\\n \\n '"
+ "'\\n \\n '"
]
},
- "execution_count": 6,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -188,21 +179,21 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Test with Claude Model\n",
+ "## Test with any supported CUA model\n",
"\n",
- "The ComputerAgent can use Claude models just like the original ClaudeAgent:"
+ "The ComputerAgent integration can use Claude, OpenAI, UI-TARS, or composed models just like the original ComputerAgent:"
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created Claude agent: computeragent-claude-3-5-sonnet-20241022\n"
+ "Created agent: computeragent-computer-use-preview\n"
]
}
],
@@ -210,17 +201,20 @@
"import logging\n",
"# Create ComputerAgent with Claude\n",
"claude_agent = ComputerAgent(\n",
- " model=\"anthropic/claude-3-5-sonnet-20241022\",\n",
- " environment=\"linux\", # OSWorld typically uses Linux\n",
+ " # model=\"anthropic/claude-3-5-sonnet-20241022\",\n",
+ " model=\"openai/computer-use-preview\",\n",
+ " # environment=\"linux\", # OSWorld typically uses Linux\n",
+ " environment=\"browser\", # SheetBench uses the browser\n",
+ " trajectory_dir=\"trajectories\",\n",
" verbosity=logging.INFO,\n",
")\n",
"\n",
- "print(f\"Created Claude agent: {claude_agent.name}\")"
+ "print(f\"Created agent: {claude_agent.name}\")"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -235,15 +229,127 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "2025-08-08 15:17:04,030 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n"
+ "2025-08-08 19:14:10,479 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
+ "2025-08-08 19:14:18,867 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 55, 'y': 149})\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Agent's action: [ResponseAction(type='response', reasoning='I\\'ll help you complete this task step by step, but I notice that I don\\'t have any input data or access to Excel through the available functions. The only function I have access to is the \"computer\" function which allows for basic desktop interaction.\\n\\nTo properly assist you, I would need:\\n1. The actual input data you want to analyze\\n2. Access to Excel or another spreadsheet tool to perform the calculations\\n\\nCould you please provide the input data and confirm if there\\'s a specific way to access Excel or the data file on this system?\\n\\nOnce provided, I can help calculate correlations between volume and next day price changes, sort the data as specified, and format the results according to your requirements.', logs={'conversation_length': 2}, text='I\\'ll help you complete this task step by step, but I notice that I don\\'t have any input data or access to Excel through the available functions. The only function I have access to is the \"computer\" function which allows for basic desktop interaction.\\n\\nTo properly assist you, I would need:\\n1. The actual input data you want to analyze\\n2. Access to Excel or another spreadsheet tool to perform the calculations\\n\\nCould you please provide the input data and confirm if there\\'s a specific way to access Excel or the data file on this system?\\n\\nOnce provided, I can help calculate correlations between volume and next day price changes, sort the data as specified, and format the results according to your requirements.')]\n",
- "Task completed after 1 steps\n"
+ "Agent's action: [ClickAction(type='click', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 3}, point=Point(x=77, y=174), button='left', pattern=None, hold_keys=None)]\n",
+ "========= Step 2 ==========\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2025-08-08 19:14:24,566 - agent.ComputerAgent - INFO - LLM processing started with 4 messages\n",
+ "2025-08-08 19:14:30,430 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'A']})\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Agent's action: [PressAction(type='press', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 5}, keys=['ctrl', 'a'])]\n",
+ "========= Step 3 ==========\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2025-08-08 19:14:36,137 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n",
+ "2025-08-08 19:14:42,483 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 73, 'y': 151})\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Agent's action: [ClickAction(type='click', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 7}, point=Point(x=102, y=176), button='left', pattern=None, hold_keys=None)]\n",
+ "========= Step 4 ==========\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2025-08-08 19:14:48,687 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n",
+ "2025-08-08 19:14:59,516 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'A']})\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Agent's action: [PressAction(type='press', reasoning='Sorting dataset for analysis preparation', logs={'conversation_length': 9}, keys=['ctrl', 'a'])]\n",
+ "========= Step 5 ==========\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2025-08-08 19:15:05,229 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
+ "2025-08-08 19:15:15,153 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 55, 'y': 147}, {'x': 319, 'y': 713}]})\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Agent's action: [DragAction(type='drag', reasoning='Highlighting data for sorting preparation', logs={'conversation_length': 12}, path=[Point(x=77, y=172), Point(x=448, y=835)], pattern=None, hold_keys=None)]\n",
+ "========= Step 6 ==========\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2025-08-08 19:15:21,362 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
+ "2025-08-08 19:15:33,774 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 229, 'y': 41})\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Agent's action: [ClickAction(type='click', reasoning='Opening sort options for data', logs={'conversation_length': 15}, point=Point(x=322, y=48), button='left', pattern=None, hold_keys=None)]\n",
+ "========= Step 7 ==========\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2025-08-08 19:15:39,973 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n",
+ "2025-08-08 19:15:52,928 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 430, 'y': 96})\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Agent's action: [ClickAction(type='click', reasoning='Choosing \"Sort range\" for sorting', logs={'conversation_length': 18}, point=Point(x=604, y=112), button='left', pattern=None, hold_keys=None)]\n",
+ "========= Step 8 ==========\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2025-08-08 19:15:59,611 - agent.ComputerAgent - INFO - LLM processing started with 19 messages\n",
+ "2025-08-08 19:16:17,003 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 530, 'y': 172})\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Agent's action: [ClickAction(type='click', reasoning='Accessing advanced sorting options now', logs={'conversation_length': 21}, point=Point(x=745, y=201), button='left', pattern=None, hold_keys=None)]\n"
]
}
],
@@ -280,9 +386,353 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "=== Final Evaluation ===\n",
+ "{'error': None,\n",
+ " 'gold_file_url': 'https://gahludmjcsmszgyufydt.supabase.co//storage/v1/object/public/sheetbench/615426c8-9df7-4ffa-92e9-200134a84da9/gold_solution_2.xlsx?',\n",
+ " 'logs': 'INFO: Starting evaluation with evaluator: sheets_cell_values\\n'\n",
+ " \"INFO: Evaluator args: [{'A1': 'ABC', 'B1': '-0.08'}]\\n\"\n",
+ " 'INFO: Partial rewarding: False\\n'\n",
+ " 'INFO: Starting sheets_cell_values evaluation for environment: '\n",
+ " 'af7a34a0-43b0-44d2-82d0-2b66ed16f1ea\\n'\n",
+ " \"INFO: Raw args received: [{'A1': 'ABC', 'B1': '-0.08'}] (type: \"\n",
+ " \")\\n\"\n",
+ " 'INFO: Partial rewarding enabled: False\\n'\n",
+ " 'INFO: === Google Sheets Cell Value Verification ===\\n'\n",
+ " 'INFO: Current page URL: '\n",
+ " 'https://docs.google.com/spreadsheets/d/1h-Ec3rW9sAME2sTn8qxIvFxO6qXtdURPacEFL5DJnqw/edit?gid=700326861#gid=700326861\\n'\n",
+ " 'INFO: ✅ Confirmed on Google Sheets page\\n'\n",
+ " 'INFO: Processing args parameter...\\n'\n",
+ " 'INFO: Args is a list with 1 items, extracting first item\\n'\n",
+ " \"INFO: Extracted: {'A1': 'ABC', 'B1': '-0.08'} (type: )\\n\"\n",
+ " 'INFO: Cell checks to perform: 2 cells\\n'\n",
+ " \"INFO: A1 -> expected: 'ABC'\\n\"\n",
+ " \"INFO: B1 -> expected: '-0.08'\\n\"\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " \"sheets_cell_values: Checking cells: {'A1': 'ABC', 'B1': '-0.08'}\\n\"\n",
+ " 'INFO: === ANSWER Sheet Navigation ===\\n'\n",
+ " 'INFO: Attempt 1/3: Attempting to find and navigate to ANSWER sheet '\n",
+ " 'tab...\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Attempt 1/3: Attempting to navigate to ANSWER '\n",
+ " 'sheet\\n'\n",
+ " 'INFO: Searching for ANSWER tab with selector: '\n",
+ " 'span.docs-sheet-tab-name:has-text(\"ANSWER\")\\n'\n",
+ " 'INFO: ANSWER tab search result (attempt 1): Found\\n'\n",
+ " 'INFO: ✅ Found ANSWER sheet tab on attempt 1, clicking on it...\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Found ANSWER sheet tab on attempt 1, clicking on '\n",
+ " 'it\\n'\n",
+ " 'ERROR: ❌ Error navigating to ANSWER sheet on attempt 1: '\n",
+ " 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
+ " 'Call log:\\n'\n",
+ " ' - waiting for '\n",
+ " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
+ " ' - - locator resolved to ANSWER\\n'\n",
+ " ' - - attempting click action\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 20ms\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 100ms\\n'\n",
+ " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 500ms\\n'\n",
+ " '\\n'\n",
+ " 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Error navigating to ANSWER sheet on attempt 1: '\n",
+ " 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
+ " 'Call log:\\n'\n",
+ " ' - waiting for '\n",
+ " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
+ " ' - - locator resolved to ANSWER\\n'\n",
+ " ' - - attempting click action\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 20ms\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 100ms\\n'\n",
+ " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 500ms\\n'\n",
+ " '\\n'\n",
+ " 'INFO: Waiting 500ms before retry 2...\\n'\n",
+ " 'INFO: Attempt 2/3: Attempting to find and navigate to ANSWER sheet '\n",
+ " 'tab...\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Attempt 2/3: Attempting to navigate to ANSWER '\n",
+ " 'sheet\\n'\n",
+ " 'INFO: Searching for ANSWER tab with selector: '\n",
+ " 'span.docs-sheet-tab-name:has-text(\"ANSWER\")\\n'\n",
+ " 'INFO: ANSWER tab search result (attempt 2): Found\\n'\n",
+ " 'INFO: ✅ Found ANSWER sheet tab on attempt 2, clicking on it...\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Found ANSWER sheet tab on attempt 2, clicking on '\n",
+ " 'it\\n'\n",
+ " 'ERROR: ❌ Error navigating to ANSWER sheet on attempt 2: '\n",
+ " 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
+ " 'Call log:\\n'\n",
+ " ' - waiting for '\n",
+ " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
+ " ' - - locator resolved to ANSWER\\n'\n",
+ " ' - - attempting click action\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 20ms\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 100ms\\n'\n",
+ " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 500ms\\n'\n",
+ " '\\n'\n",
+ " 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Error navigating to ANSWER sheet on attempt 2: '\n",
+ " 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
+ " 'Call log:\\n'\n",
+ " ' - waiting for '\n",
+ " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
+ " ' - - locator resolved to ANSWER\\n'\n",
+ " ' - - attempting click action\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 20ms\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 100ms\\n'\n",
+ " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 500ms\\n'\n",
+ " '\\n'\n",
+ " 'INFO: Waiting 500ms before retry 3...\\n'\n",
+ " 'INFO: Attempt 3/3: Attempting to find and navigate to ANSWER sheet '\n",
+ " 'tab...\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Attempt 3/3: Attempting to navigate to ANSWER '\n",
+ " 'sheet\\n'\n",
+ " 'INFO: Searching for ANSWER tab with selector: '\n",
+ " 'span.docs-sheet-tab-name:has-text(\"ANSWER\")\\n'\n",
+ " 'INFO: ANSWER tab search result (attempt 3): Found\\n'\n",
+ " 'INFO: ✅ Found ANSWER sheet tab on attempt 3, clicking on it...\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Found ANSWER sheet tab on attempt 3, clicking on '\n",
+ " 'it\\n'\n",
+ " 'ERROR: ❌ Error navigating to ANSWER sheet on attempt 3: '\n",
+ " 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
+ " 'Call log:\\n'\n",
+ " ' - waiting for '\n",
+ " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
+ " ' - - locator resolved to ANSWER\\n'\n",
+ " ' - - attempting click action\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 20ms\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 100ms\\n'\n",
+ " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 500ms\\n'\n",
+ " '\\n'\n",
+ " 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Error navigating to ANSWER sheet on attempt 3: '\n",
+ " 'Locator.click: Timeout 30000ms exceeded.\\n'\n",
+ " 'Call log:\\n'\n",
+ " ' - waiting for '\n",
+ " 'locator(\"span.docs-sheet-tab-name:has-text(\\\\\"ANSWER\\\\\")\")\\n'\n",
+ " ' - - locator resolved to ANSWER\\n'\n",
+ " ' - - attempting click action\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 20ms\\n'\n",
+ " ' - 2 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 100ms\\n'\n",
+ " ' - 35 × waiting for element to be visible, enabled and stable\\n'\n",
+ " ' - - element is visible, enabled and stable\\n'\n",
+ " ' - - scrolling into view if needed\\n'\n",
+ " ' - - done scrolling\\n'\n",
+ " ' - - '\n",
+ " 'intercepts pointer events\\n'\n",
+ " ' - - retrying click action\\n'\n",
+ " ' - - waiting 500ms\\n'\n",
+ " '\\n'\n",
+ " 'WARNING: ⚠️ Failed to navigate to ANSWER sheet after 3 attempts, '\n",
+ " 'proceeding with current sheet\\n'\n",
+ " 'WARNING: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Failed to navigate to ANSWER sheet after 3 '\n",
+ " 'attempts, proceeding with current sheet\\n'\n",
+ " 'INFO: === File Content Extraction ===\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Granted read-write permissions\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Extracting page contents\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Selecting content\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Successfully extracted 157940 characters from '\n",
+ " 'file\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Found 5003 rows in content\\n'\n",
+ " 'INFO: Content extracted: 157940 characters\\n'\n",
+ " 'INFO: === Cell Content Parsing ===\\n'\n",
+ " 'INFO: Split file content into 5003 rows\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Found 5003 rows in content\\n'\n",
+ " 'INFO: First few rows of content:\\n'\n",
+ " \"INFO: Row 1: 'TradeDate | Ticker | ClosePrice | Volume | | '\\n\"\n",
+ " \"INFO: Row 2: '2023-01-02 | ABC | 476.87 | 2225355 | | '\\n\"\n",
+ " \"INFO: Row 3: '2023-01-02 | DEF | 322.21 | 3778582 | | '\\n\"\n",
+ " 'INFO: ... and 5000 more rows\\n'\n",
+ " 'INFO: === Cell Reference Parsing ===\\n'\n",
+ " \"INFO: Processing cell reference: 'A1' -> expected: 'ABC'\\n\"\n",
+ " \"INFO: Parsed 'A1' -> row=1 (0-indexed: 0), col=A (0-indexed: 0)\\n\"\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Parsed cell A1 as row=0, col=0\\n'\n",
+ " 'INFO: Row 1 exists in content\\n'\n",
+ " \"INFO: Row 1 has 6 columns: ['Col1', 'Col2', 'Col3', 'Col4', \"\n",
+ " \"'Col5', 'Col6']\\n\"\n",
+ " \"INFO: ✅ Found value for A1: 'TradeDate'\\n\"\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " \"sheets_cell_values: Found value for A1: 'TradeDate'\\n\"\n",
+ " \"INFO: Processing cell reference: 'B1' -> expected: '-0.08'\\n\"\n",
+ " \"INFO: Parsed 'B1' -> row=1 (0-indexed: 0), col=B (0-indexed: 1)\\n\"\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Parsed cell B1 as row=0, col=1\\n'\n",
+ " 'INFO: Row 1 exists in content\\n'\n",
+ " \"INFO: Row 1 has 6 columns: ['Col1', 'Col2', 'Col3', 'Col4', \"\n",
+ " \"'Col5', 'Col6']\\n\"\n",
+ " \"INFO: ✅ Found value for B1: 'Ticker'\\n\"\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " \"sheets_cell_values: Found value for B1: 'Ticker'\\n\"\n",
+ " 'INFO: === Cell Value Comparison ===\\n'\n",
+ " 'INFO: Comparing cell A1:\\n'\n",
+ " \"INFO: Expected: 'ABC' (type: )\\n\"\n",
+ " \"INFO: Actual: 'TradeDate' (type: )\\n\"\n",
+ " \"INFO: ❌ VALUE MISMATCH: 'TradeDate' != 'ABC'\\n\"\n",
+ " 'INFO: Comparing cell B1:\\n'\n",
+ " \"INFO: Expected: '-0.08' (type: )\\n\"\n",
+ " \"INFO: Actual: 'Ticker' (type: )\\n\"\n",
+ " \"INFO: ❌ VALUE MISMATCH: 'Ticker' != '-0.08'\\n\"\n",
+ " 'INFO: === Final Results ===\\n'\n",
+ " 'INFO: Cell comparison summary:\\n'\n",
+ " 'INFO: Total cells checked: 2\\n'\n",
+ " 'INFO: Matches: 0\\n'\n",
+ " 'INFO: Mismatches: 2\\n'\n",
+ " \"INFO: Failed cells: ['A1:', 'B1:']\\n\"\n",
+ " 'INFO: ❌ NOT all cells match expected values\\n'\n",
+ " 'INFO: Mismatches: [\"Cell A1: expected \\'ABC\\', got \\'TradeDate\\'\", '\n",
+ " '\"Cell B1: expected \\'-0.08\\', got \\'Ticker\\'\"]\\n'\n",
+ " 'INFO: [TASK af7a34a0-43b0-44d2-82d0-2b66ed16f1ea] '\n",
+ " 'sheets_cell_values: Mismatches found: [\"Cell A1: expected \\'ABC\\', '\n",
+ " 'got \\'TradeDate\\'\", \"Cell B1: expected \\'-0.08\\', got \\'Ticker\\'\"]\\n'\n",
+ " 'INFO: Final reward: 0.0\\n'\n",
+ " 'INFO: === Sheets Cell Values Evaluation Complete ===\\n'\n",
+ " 'INFO: Evaluation completed. Final reward: 0.0\\n',\n",
+ " 'reward': 0.0}\n"
+ ]
+ }
+ ],
"source": [
"# Evaluate environment state\n",
"result = await env.evaluate()\n",
@@ -292,7 +742,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -318,17 +768,426 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 0%|----------------------------------------| 0/200 [1:24?:??, ?? steps/min]2025-08-08 19:24:29,970 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
+ " 0%|----------------------------------------| 0/200 [1:25?:??, ?? steps/min]2025-08-08 19:24:30,647 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
+ "2025-08-08 19:24:31,329 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
+ " 0%|----------------------------------------| 0/200 [1:26?:??, ?? steps/min]2025-08-08 19:24:31,958 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
+ " 0%|----------------------------------------| 0/200 [1:28?:??, ?? steps/min]2025-08-08 19:24:35,310 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ "2025-08-08 19:24:36,641 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ "2025-08-08 19:24:37,969 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ "2025-08-08 19:24:39,338 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 2%|----------------------------------------| 4/200 [1:36<78:54, 2.5 steps/min]2025-08-08 19:24:42,498 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
+ " 2%|----------------------------------------| 4/200 [1:38<80:39, 2.4 steps/min]2025-08-08 19:24:44,669 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
+ " 2%|----------------------------------------| 4/200 [1:39<81:37, 2.4 steps/min]2025-08-08 19:24:45,319 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
+ "2025-08-08 19:24:45,992 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
+ "2025-08-08 19:24:47,339 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 2%|----------------------------------------| 4/200 [1:42<83:47, 2.3 steps/min]2025-08-08 19:24:47,968 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
+ "2025-08-08 19:24:49,301 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ "2025-08-08 19:24:50,712 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 2%|█---------------------------------------| 5/200 [1:45<68:53, 2.8 steps/min]2025-08-08 19:24:52,079 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 4%|█---------------------------------------| 7/200 [1:47<49:20, 3.9 steps/min]2025-08-08 19:24:53,458 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 4%|█---------------------------------------| 9/200 [1:52<39:53, 4.8 steps/min]2025-08-08 19:24:59,176 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
+ " 4%|█---------------------------------------| 9/200 [1:54<40:29, 4.7 steps/min]2025-08-08 19:24:59,868 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
+ " 4%|█---------------------------------------| 9/200 [1:55<40:50, 4.7 steps/min]2025-08-08 19:25:01,049 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
+ " 4%|█---------------------------------------| 9/200 [1:56<41:11, 4.6 steps/min]2025-08-08 19:25:01,688 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
+ "2025-08-08 19:25:02,319 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
+ " 4%|█---------------------------------------| 9/200 [1:58<41:56, 4.6 steps/min]2025-08-08 19:25:04,661 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ "2025-08-08 19:25:05,961 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 4%|█---------------------------------------| 9/200 [2:01<42:52, 4.5 steps/min]2025-08-08 19:25:07,310 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 6%|██--------------------------------------| 12/200 [2:06<33:03, 5.7 steps/min]2025-08-08 19:25:13,200 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['F']})\n",
+ " 6%|██--------------------------------------| 12/200 [2:08<33:32, 5.6 steps/min]2025-08-08 19:25:14,354 - agent.ComputerAgent - INFO - LLM processing started with 7 messages\n",
+ " 6%|██--------------------------------------| 13/200 [2:09<31:04, 6.0 steps/min]2025-08-08 19:25:15,039 - agent.ComputerAgent - INFO - LLM processing started with 7 messages\n",
+ "2025-08-08 19:25:16,054 - agent.ComputerAgent - INFO - LLM processing started with 7 messages\n",
+ "2025-08-08 19:25:17,449 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 233, 'y': 149})\n",
+ " 7%|██--------------------------------------| 14/200 [2:17<30:29, 6.1 steps/min]2025-08-08 19:25:23,160 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n",
+ " 7%|██--------------------------------------| 14/200 [2:18<30:43, 6.1 steps/min]2025-08-08 19:25:24,856 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n",
+ " 7%|██--------------------------------------| 14/200 [2:20<31:02, 6.0 steps/min]2025-08-08 19:25:26,201 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 440, 'y': 73})\n",
+ " 7%|██--------------------------------------| 14/200 [2:21<31:19, 5.9 steps/min]2025-08-08 19:25:27,514 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'SHIFT', 'P']})\n",
+ "2025-08-08 19:25:28,832 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 386, 'y': 74})\n",
+ " 8%|███-------------------------------------| 17/200 [2:27<26:24, 6.9 steps/min]2025-08-08 19:25:33,703 - agent.ComputerAgent - INFO - Agent: Task completed.\n",
+ "2025-08-08 19:25:34,379 - agent.ComputerAgent - INFO - Total usage:\n",
+ " - input_tokens: 5095\n",
+ " - input_tokens_details:\n",
+ " - cached_tokens: 0\n",
+ " - output_tokens: 28\n",
+ " - output_tokens_details:\n",
+ " - reasoning_tokens: 0\n",
+ " - total_tokens: 5123\n",
+ " 9%|███-------------------------------------| 18/200 [2:29<25:13, 7.2 steps/min]2025-08-08 19:25:35,019 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
+ "2025-08-08 19:25:35,669 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
+ " 9%|███-------------------------------------| 18/200 [2:30<25:26, 7.2 steps/min]2025-08-08 19:25:36,358 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
+ " 9%|███-------------------------------------| 18/200 [2:31<25:36, 7.1 steps/min]2025-08-08 19:25:37,713 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 280, 'y': 219})\n",
+ " 18%|███████---------------------------------| 36/200 [2:39<12:05, 13.6 steps/min]2025-08-08 19:25:44,610 - agent.ComputerAgent - INFO - LLM processing started with 11 messages\n",
+ " 18%|███████---------------------------------| 36/200 [2:40<12:09, 13.5 steps/min]2025-08-08 19:25:45,963 - agent.ComputerAgent - INFO - Computer: type({'text': 'Preferences: Open Settings (UI)'})\n",
+ " 18%|███████---------------------------------| 36/200 [2:41<12:14, 13.4 steps/min]2025-08-08 19:25:47,774 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 658, 'y': 213})\n",
+ "2025-08-08 19:25:49,132 - agent.ComputerAgent - INFO - Computer: move({'x': 417, 'y': 429})\n",
+ " 20%|███████---------------------------------| 39/200 [2:47<11:31, 14.0 steps/min]2025-08-08 19:25:52,821 - agent.ComputerAgent - INFO - LLM processing started with 12 messages\n",
+ " 20%|███████---------------------------------| 39/200 [2:49<11:39, 13.8 steps/min]2025-08-08 19:25:55,508 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
+ " 20%|███████---------------------------------| 39/200 [2:50<11:45, 13.7 steps/min]2025-08-08 19:25:56,169 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
+ " 20%|███████---------------------------------| 39/200 [2:52<11:53, 13.5 steps/min]2025-08-08 19:25:59,049 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 351, 'y': 460}, {'x': 382, 'y': 460}]})\n",
+ " 20%|███████---------------------------------| 39/200 [2:54<11:59, 13.4 steps/min]2025-08-08 19:26:00,904 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
+ " 20%|████████--------------------------------| 41/200 [2:59<11:34, 13.7 steps/min]2025-08-08 19:26:05,794 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 679, 'y': 122})\n",
+ " 20%|████████--------------------------------| 41/200 [3:01<11:42, 13.6 steps/min]2025-08-08 19:26:06,422 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n",
+ " 21%|████████--------------------------------| 42/200 [3:02<11:24, 13.8 steps/min]2025-08-08 19:26:08,744 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 598, 'y': 482})\n",
+ " 21%|████████--------------------------------| 42/200 [3:04<11:32, 13.7 steps/min]2025-08-08 19:26:09,410 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n",
+ " 22%|████████--------------------------------| 43/200 [3:07<11:22, 13.8 steps/min]2025-08-08 19:26:12,913 - agent.ComputerAgent - INFO - LLM processing started with 15 messages\n",
+ " 22%|████████--------------------------------| 43/200 [3:10<11:34, 13.6 steps/min]2025-08-08 19:26:16,109 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n",
+ " 22%|████████--------------------------------| 43/200 [3:13<11:46, 13.3 steps/min]2025-08-08 19:26:20,031 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 341, 'y': 462}, {'x': 322, 'y': 462}]})\n",
+ "2025-08-08 19:26:21,348 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'SHIFT', 'P']})\n",
+ " 22%|█████████-------------------------------| 45/200 [3:18<11:24, 13.6 steps/min]2025-08-08 19:26:24,698 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 612, 'y': 131})\n",
+ " 23%|█████████-------------------------------| 46/200 [3:21<11:16, 13.7 steps/min]2025-08-08 19:26:27,859 - agent.ComputerAgent - INFO - LLM processing started with 17 messages\n",
+ " 23%|█████████-------------------------------| 46/200 [3:23<11:20, 13.6 steps/min]2025-08-08 19:26:28,522 - agent.ComputerAgent - INFO - LLM processing started with 17 messages\n",
+ " 23%|█████████-------------------------------| 46/200 [3:26<11:30, 13.4 steps/min]2025-08-08 19:26:31,719 - agent.ComputerAgent - INFO - LLM processing started with 17 messages\n",
+ " 23%|█████████-------------------------------| 46/200 [3:27<11:33, 13.3 steps/min]2025-08-08 19:26:33,665 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 643, 'y': 131})\n",
+ " 24%|█████████-------------------------------| 47/200 [3:33<11:36, 13.2 steps/min]2025-08-08 19:26:41,002 - agent.ComputerAgent - INFO - Computer: type({'text': 'wrap tab'})\n",
+ " 24%|█████████-------------------------------| 47/200 [3:36<11:44, 13.0 steps/min]2025-08-08 19:26:42,323 - agent.ComputerAgent - INFO - Computer: type({'text': 'Preferences: Open User Settings (JSON)'})\n",
+ " 24%|█████████-------------------------------| 48/200 [3:37<11:29, 13.2 steps/min]2025-08-08 19:26:42,978 - agent.ComputerAgent - INFO - LLM processing started with 19 messages\n",
+ " 24%|█████████-------------------------------| 49/200 [3:38<11:13, 13.4 steps/min]2025-08-08 19:26:45,382 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 355, 'y': 558})\n",
+ " 25%|██████████------------------------------| 50/200 [3:42<11:07, 13.5 steps/min]2025-08-08 19:26:48,550 - agent.ComputerAgent - INFO - LLM processing started with 19 messages\n",
+ " 25%|██████████------------------------------| 50/200 [3:43<11:11, 13.4 steps/min]2025-08-08 19:26:49,199 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n",
+ " 25%|██████████------------------------------| 50/200 [3:45<11:17, 13.3 steps/min]2025-08-08 19:26:52,588 - agent.ComputerAgent - INFO - Computer: type({'text': 'focus editor'})\n",
+ " 25%|██████████------------------------------| 50/200 [3:47<11:23, 13.2 steps/min]2025-08-08 19:26:53,260 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n",
+ " 26%|██████████------------------------------| 51/200 [3:53<11:23, 13.1 steps/min]2025-08-08 19:26:59,430 - agent.ComputerAgent - INFO - LLM processing started with 21 messages\n",
+ " 26%|██████████------------------------------| 51/200 [3:55<11:29, 13.0 steps/min]2025-08-08 19:27:01,780 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
+ "2025-08-08 19:27:03,107 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 674, 'y': 212})\n",
+ " 26%|██████████------------------------------| 53/200 [4:01<11:09, 13.2 steps/min]2025-08-08 19:27:07,985 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 83, 'y': 147})\n",
+ " 26%|██████████------------------------------| 53/200 [4:03<11:14, 13.1 steps/min]2025-08-08 19:27:09,630 - agent.ComputerAgent - INFO - LLM processing started with 23 messages\n",
+ " 27%|██████████------------------------------| 54/200 [4:04<11:02, 13.2 steps/min]2025-08-08 19:27:10,257 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n",
+ " 27%|██████████------------------------------| 54/200 [4:07<11:10, 13.1 steps/min]2025-08-08 19:27:14,605 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 981, 'y': 129})\n",
+ " 27%|██████████------------------------------| 54/200 [4:09<11:15, 13.0 steps/min]2025-08-08 19:27:15,269 - agent.ComputerAgent - INFO - LLM processing started with 23 messages\n",
+ " 28%|███████████-----------------------------| 55/200 [4:15<11:14, 12.9 steps/min]2025-08-08 19:27:22,005 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n",
+ "2025-08-08 19:27:23,434 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 679, 'y': 101})\n",
+ " 28%|███████████-----------------------------| 55/200 [4:18<11:22, 12.8 steps/min]2025-08-08 19:27:25,394 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 115, 'y': 383})\n",
+ "2025-08-08 19:27:26,746 - agent.ComputerAgent - INFO - Computer: type({'text': '}'})\n",
+ " 29%|███████████-----------------------------| 58/200 [4:24<10:46, 13.2 steps/min]2025-08-08 19:27:29,873 - agent.ComputerAgent - INFO - LLM processing started with 25 messages\n",
+ " 29%|███████████-----------------------------| 58/200 [4:27<10:54, 13.0 steps/min]2025-08-08 19:27:33,061 - agent.ComputerAgent - INFO - LLM processing started with 25 messages\n",
+ " 29%|███████████-----------------------------| 58/200 [4:28<10:57, 13.0 steps/min]2025-08-08 19:27:33,730 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
+ "2025-08-08 19:27:35,127 - agent.ComputerAgent - INFO - Computer: type({'text': 'focus terminal'})\n",
+ " 30%|███████████-----------------------------| 59/200 [4:32<10:51, 13.0 steps/min]2025-08-08 19:27:38,470 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 679, 'y': 100})\n",
+ " 30%|████████████----------------------------| 60/200 [4:35<10:43, 13.1 steps/min]2025-08-08 19:27:41,640 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
+ " 30%|████████████----------------------------| 60/200 [4:38<10:50, 12.9 steps/min]2025-08-08 19:27:45,339 - agent.ComputerAgent - INFO - LLM processing started with 27 messages\n",
+ " 30%|████████████----------------------------| 60/200 [4:42<10:59, 12.7 steps/min]2025-08-08 19:27:48,491 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
+ " 30%|████████████----------------------------| 60/200 [4:45<11:06, 12.6 steps/min]2025-08-08 19:27:52,344 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'SHIFT', 'P']})\n",
+ " 30%|████████████----------------------------| 60/200 [4:47<11:11, 12.5 steps/min]2025-08-08 19:27:54,184 - agent.ComputerAgent - INFO - Computer: type({'text': 'edited_colorful.png'})\n",
+ " 31%|████████████----------------------------| 62/200 [4:50<10:46, 12.8 steps/min]2025-08-08 19:27:56,481 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 241, 'y': 110})\n",
+ "2025-08-08 19:27:57,814 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 981, 'y': 131})\n",
+ " 31%|████████████----------------------------| 62/200 [4:53<10:52, 12.7 steps/min]2025-08-08 19:27:59,141 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 675, 'y': 100})\n",
+ " 32%|████████████----------------------------| 64/200 [4:54<10:25, 13.0 steps/min]2025-08-08 19:27:59,799 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n",
+ " 32%|█████████████---------------------------| 65/200 [4:55<10:13, 13.2 steps/min]2025-08-08 19:28:00,988 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n",
+ " 32%|█████████████---------------------------| 65/200 [4:58<10:20, 13.1 steps/min]2025-08-08 19:28:04,640 - agent.ComputerAgent - INFO - LLM processing started with 4 messages\n",
+ " 32%|█████████████---------------------------| 65/200 [4:59<10:22, 13.0 steps/min]2025-08-08 19:28:05,289 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n",
+ " 32%|█████████████---------------------------| 65/200 [5:00<10:25, 13.0 steps/min]2025-08-08 19:28:06,432 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n",
+ " 32%|█████████████---------------------------| 65/200 [5:02<10:29, 12.9 steps/min]2025-08-08 19:28:09,350 - agent.ComputerAgent - INFO - Computer: type({'text': 'Tasks: Configure Default Build Task'})\n",
+ "2025-08-08 19:28:10,636 - agent.ComputerAgent - INFO - Computer: type({'text': 'drip coffee maker'})\n",
+ " 34%|█████████████---------------------------| 67/200 [5:10<10:17, 12.9 steps/min]2025-08-08 19:28:17,499 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['BACKSPACE']})\n",
+ "2025-08-08 19:28:18,798 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 676, 'y': 101})\n",
+ " 34%|█████████████---------------------------| 67/200 [5:14<10:23, 12.8 steps/min]2025-08-08 19:28:19,419 - agent.ComputerAgent - INFO - LLM processing started with 6 messages\n",
+ "2025-08-08 19:28:20,058 - agent.ComputerAgent - INFO - LLM processing started with 31 messages\n",
+ " 34%|█████████████---------------------------| 69/200 [5:16<10:00, 13.1 steps/min]2025-08-08 19:28:22,940 - agent.ComputerAgent - INFO - Computer: type({'text': 'focus active editor group'})\n",
+ " 35%|██████████████--------------------------| 70/200 [5:20<9:54, 13.1 steps/min]]2025-08-08 19:28:26,283 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
+ " 35%|██████████████--------------------------| 70/200 [5:21<9:57, 13.1 steps/min]2025-08-08 19:28:26,920 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
+ "2025-08-08 19:28:27,559 - agent.ComputerAgent - INFO - LLM processing started with 31 messages\n",
+ " 36%|██████████████--------------------------| 71/200 [5:23<9:48, 13.1 steps/min]2025-08-08 19:28:29,749 - agent.ComputerAgent - INFO - LLM processing started with 31 messages\n",
+ " 36%|██████████████--------------------------| 71/200 [5:27<9:54, 13.0 steps/min]2025-08-08 19:28:33,099 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
+ " 36%|██████████████--------------------------| 71/200 [5:28<9:56, 13.0 steps/min]2025-08-08 19:28:33,732 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n",
+ " 36%|██████████████--------------------------| 72/200 [5:34<9:54, 12.9 steps/min]2025-08-08 19:28:39,897 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n",
+ " 36%|██████████████--------------------------| 72/200 [5:35<9:56, 12.9 steps/min]2025-08-08 19:28:41,768 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['BACKSPACE']})\n",
+ " 36%|██████████████--------------------------| 73/200 [5:38<9:48, 13.0 steps/min]2025-08-08 19:28:45,130 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 77, 'y': 236})\n",
+ " 36%|██████████████--------------------------| 73/200 [5:40<9:52, 12.9 steps/min]2025-08-08 19:28:46,538 - agent.ComputerAgent - INFO - Computer: click({'button': 'right', 'x': 638, 'y': 100})\n",
+ " 37%|██████████████--------------------------| 74/200 [5:41<9:42, 13.0 steps/min]2025-08-08 19:28:47,895 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 732, 'y': 179})\n",
+ " 38%|███████████████-------------------------| 75/200 [5:43<9:32, 13.1 steps/min]2025-08-08 19:28:49,073 - agent.ComputerAgent - INFO - LLM processing started with 33 messages\n",
+ " 38%|███████████████-------------------------| 76/200 [5:46<9:25, 13.2 steps/min]2025-08-08 19:28:52,240 - agent.ComputerAgent - INFO - LLM processing started with 11 messages\n",
+ " 38%|███████████████-------------------------| 76/200 [5:48<9:28, 13.1 steps/min]2025-08-08 19:28:54,419 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n",
+ " 38%|███████████████-------------------------| 76/200 [5:49<9:30, 13.0 steps/min]2025-08-08 19:28:55,580 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n",
+ " 38%|███████████████-------------------------| 76/200 [5:53<9:37, 12.9 steps/min]2025-08-08 19:29:00,506 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
+ " 38%|███████████████-------------------------| 77/200 [5:57<9:31, 12.9 steps/min]2025-08-08 19:29:03,867 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['BACKSPACE']})\n",
+ " 39%|███████████████-------------------------| 78/200 [6:01<9:24, 13.0 steps/min]2025-08-08 19:29:07,492 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n",
+ " 39%|███████████████-------------------------| 78/200 [6:02<9:27, 12.9 steps/min]2025-08-08 19:29:08,836 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 75, 'y': 525})\n",
+ "2025-08-08 19:29:10,446 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 684, 'y': 114})\n",
+ " 40%|████████████████------------------------| 80/200 [6:06<9:10, 13.1 steps/min]2025-08-08 19:29:12,120 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n",
+ "2025-08-08 19:29:13,475 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 569, 'y': 186})\n",
+ " 40%|████████████████------------------------| 81/200 [6:10<9:04, 13.1 steps/min]2025-08-08 19:29:16,613 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n",
+ " 40%|████████████████------------------------| 81/200 [6:11<9:06, 13.1 steps/min]2025-08-08 19:29:17,780 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n",
+ " 40%|████████████████------------------------| 81/200 [6:16<9:12, 12.9 steps/min]2025-08-08 19:29:21,989 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n",
+ " 40%|████████████████------------------------| 81/200 [6:18<9:15, 12.8 steps/min]2025-08-08 19:29:24,280 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 92, 'y': 524})\n",
+ " 41%|████████████████------------------------| 82/200 [6:20<9:07, 12.9 steps/min]2025-08-08 19:29:26,630 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 658, 'y': 147})\n",
+ " 42%|████████████████------------------------| 83/200 [6:24<9:02, 12.9 steps/min]2025-08-08 19:29:30,800 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n",
+ " 42%|████████████████------------------------| 83/200 [6:26<9:04, 12.9 steps/min]2025-08-08 19:29:32,144 - agent.ComputerAgent - INFO - Agent: Task completed.\n",
+ "2025-08-08 19:29:32,760 - agent.ComputerAgent - INFO - Total usage:\n",
+ " - input_tokens: 11426\n",
+ " - input_tokens_details:\n",
+ " - cached_tokens: 0\n",
+ " - output_tokens: 69\n",
+ " - output_tokens_details:\n",
+ " - reasoning_tokens: 0\n",
+ " - total_tokens: 11495\n",
+ " 42%|████████████████------------------------| 84/200 [6:28<8:55, 13.0 steps/min]2025-08-08 19:29:33,941 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n",
+ " 42%|████████████████------------------------| 84/200 [6:29<8:57, 12.9 steps/min]2025-08-08 19:29:35,277 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 81, 'y': 347})\n",
+ " 42%|████████████████------------------------| 84/200 [6:30<8:59, 12.9 steps/min]2025-08-08 19:29:36,610 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'J']})\n",
+ " 45%|██████████████████----------------------| 90/200 [6:37<8:05, 13.6 steps/min]2025-08-08 19:29:42,940 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n",
+ " 45%|██████████████████----------------------| 90/200 [6:38<8:06, 13.6 steps/min]2025-08-08 19:29:43,571 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n",
+ "2025-08-08 19:29:44,917 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 53, 'y': 288})\n",
+ " 46%|██████████████████----------------------| 91/200 [6:45<8:05, 13.5 steps/min]2025-08-08 19:29:51,777 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 700, 'y': 241})\n",
+ " 46%|██████████████████----------------------| 91/200 [6:47<8:07, 13.4 steps/min]2025-08-08 19:29:52,610 - agent.ComputerAgent - INFO - LLM processing started with 19 messages\n",
+ " 46%|██████████████████----------------------| 92/200 [6:53<8:04, 13.4 steps/min]2025-08-08 19:29:58,819 - agent.ComputerAgent - INFO - LLM processing started with 43 messages\n",
+ "2025-08-08 19:30:00,108 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['ENTER']})\n",
+ " 46%|██████████████████----------------------| 92/200 [6:55<8:07, 13.3 steps/min]2025-08-08 19:30:01,399 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 429, 'x': 117, 'y': 706})\n",
+ " 47%|██████████████████----------------------| 94/200 [6:57<7:51, 13.5 steps/min]2025-08-08 19:30:03,698 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 172, 'x': 212, 'y': 423})\n",
+ " 48%|███████████████████---------------------| 95/200 [7:01<7:46, 13.5 steps/min]2025-08-08 19:30:07,860 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n",
+ " 48%|███████████████████---------------------| 95/200 [7:03<7:47, 13.5 steps/min]2025-08-08 19:30:08,549 - agent.ComputerAgent - INFO - LLM processing started with 21 messages\n",
+ " 48%|███████████████████---------------------| 95/200 [7:05<7:49, 13.4 steps/min]2025-08-08 19:30:10,720 - agent.ComputerAgent - INFO - LLM processing started with 41 messages\n",
+ " 48%|███████████████████---------------------| 95/200 [7:06<7:51, 13.4 steps/min]2025-08-08 19:30:12,602 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 768, 'y': 246}, {'x': 741, 'y': 245}]})\n",
+ " 48%|███████████████████---------------------| 96/200 [7:13<7:50, 13.3 steps/min]2025-08-08 19:30:19,742 - agent.ComputerAgent - INFO - LLM processing started with 45 messages\n",
+ " 48%|███████████████████---------------------| 96/200 [7:21<7:57, 13.1 steps/min]2025-08-08 19:30:27,150 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 267, 'y': 187})\n",
+ " 48%|███████████████████---------------------| 97/200 [7:24<7:51, 13.1 steps/min]2025-08-08 19:30:31,050 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 150, 'x': 195, 'y': 446})\n",
+ "2025-08-08 19:30:32,313 - agent.ComputerAgent - INFO - Computer: screenshot({})\n",
+ " 48%|███████████████████---------------------| 97/200 [7:27<7:55, 13.0 steps/min]2025-08-08 19:30:33,651 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 706, 'y': 243}, {'x': 681, 'y': 240}]})\n",
+ " 50%|███████████████████---------------------| 99/200 [7:28<7:38, 13.2 steps/min]2025-08-08 19:30:34,278 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n",
+ " 50%|████████████████████--------------------| 100/200 [7:29<7:29, 13.3 steps/min]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "No screenshot found, taking screenshot\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2025-08-08 19:30:35,460 - agent.ComputerAgent - INFO - LLM processing started with 45 messages\n",
+ " 50%|████████████████████--------------------| 100/200 [7:32<7:32, 13.2 steps/min]2025-08-08 19:30:38,650 - agent.ComputerAgent - INFO - LLM processing started with 44 messages\n",
+ " 50%|████████████████████--------------------| 100/200 [7:35<7:35, 13.2 steps/min]2025-08-08 19:30:40,900 - agent.ComputerAgent - INFO - LLM processing started with 48 messages\n",
+ " 50%|████████████████████--------------------| 100/200 [7:37<7:37, 13.1 steps/min]2025-08-08 19:30:43,737 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 79, 'y': 637})\n",
+ " 50%|████████████████████--------------------| 101/200 [7:40<7:30, 13.2 steps/min]2025-08-08 19:30:46,567 - agent.ComputerAgent - INFO - Agent: I am unable to complete the task. However, I see that the outside task has been completed meaning the \"Focus Active Editor Group\" key binding has been set. Task completed\n",
+ "2025-08-08 19:30:47,191 - agent.ComputerAgent - INFO - Total usage:\n",
+ " - input_tokens: 5872\n",
+ " - input_tokens_details:\n",
+ " - cached_tokens: 0\n",
+ " - output_tokens: 37\n",
+ " - output_tokens_details:\n",
+ " - reasoning_tokens: 0\n",
+ " - total_tokens: 5909\n",
+ " 51%|████████████████████--------------------| 102/200 [7:44<7:26, 13.2 steps/min]2025-08-08 19:30:49,850 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
+ " 52%|████████████████████--------------------| 104/200 [7:52<7:16, 13.2 steps/min]2025-08-08 19:30:58,933 - agent.ComputerAgent - INFO - Computer: type({'text': '25'})\n",
+ "2025-08-08 19:31:00,266 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 552, 'y': 630})\n",
+ " 53%|█████████████████████-------------------| 106/200 [7:56<7:02, 13.3 steps/min]2025-08-08 19:31:02,587 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 753, 'y': 241})\n",
+ " 54%|█████████████████████-------------------| 107/200 [7:59<6:57, 13.4 steps/min]2025-08-08 19:31:05,710 - agent.ComputerAgent - INFO - LLM processing started with 28 messages\n",
+ " 54%|█████████████████████-------------------| 107/200 [8:01<6:58, 13.3 steps/min]2025-08-08 19:31:07,372 - agent.ComputerAgent - INFO - LLM processing started with 47 messages\n",
+ " 54%|█████████████████████-------------------| 107/200 [8:02<6:59, 13.3 steps/min]2025-08-08 19:31:08,050 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
+ " 54%|█████████████████████-------------------| 107/200 [8:03<7:00, 13.3 steps/min]2025-08-08 19:31:09,710 - agent.ComputerAgent - INFO - LLM processing started with 51 messages\n",
+ " 54%|█████████████████████-------------------| 107/200 [8:05<7:01, 13.2 steps/min]2025-08-08 19:31:11,042 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 54%|█████████████████████-------------------| 108/200 [8:09<6:56, 13.2 steps/min]2025-08-08 19:31:15,879 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 91, 'y': 636})\n",
+ " 54%|█████████████████████-------------------| 108/200 [8:11<6:58, 13.2 steps/min]2025-08-08 19:31:17,020 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
+ " 55%|█████████████████████-------------------| 109/200 [8:13<6:51, 13.3 steps/min]2025-08-08 19:31:19,881 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 55%|██████████████████████------------------| 110/200 [8:16<6:45, 13.3 steps/min]2025-08-08 19:31:22,060 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n",
+ " 55%|██████████████████████------------------| 110/200 [8:17<6:46, 13.3 steps/min]2025-08-08 19:31:23,916 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 743, 'y': 240}, {'x': 681, 'y': 243}]})\n",
+ " 56%|██████████████████████------------------| 111/200 [8:20<6:41, 13.3 steps/min]2025-08-08 19:31:26,060 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
+ " 56%|██████████████████████------------------| 111/200 [8:24<6:44, 13.2 steps/min]2025-08-08 19:31:30,411 - agent.ComputerAgent - INFO - Agent: The screen is currently blank. I'll continue monitoring to see if anything changes.\n",
+ "2025-08-08 19:31:30,412 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 56%|██████████████████████------------------| 111/200 [8:25<6:45, 13.2 steps/min]2025-08-08 19:31:31,824 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 358, 'y': 548})\n",
+ "2025-08-08 19:31:33,154 - agent.ComputerAgent - INFO - Computer: type({'text': '60'})\n",
+ " 57%|██████████████████████------------------| 114/200 [8:33<6:27, 13.3 steps/min]2025-08-08 19:31:39,501 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
+ " 57%|██████████████████████------------------| 114/200 [8:34<6:28, 13.3 steps/min]2025-08-08 19:31:40,131 - agent.ComputerAgent - INFO - LLM processing started with 50 messages\n",
+ " 57%|██████████████████████------------------| 114/200 [8:36<6:29, 13.2 steps/min]2025-08-08 19:31:42,301 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n",
+ " 57%|██████████████████████------------------| 114/200 [8:41<6:33, 13.1 steps/min]2025-08-08 19:31:48,792 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 57%|███████████████████████-----------------| 115/200 [8:49<6:31, 13.0 steps/min]2025-08-08 19:31:54,501 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
+ " 57%|███████████████████████-----------------| 115/200 [8:50<6:31, 13.0 steps/min]2025-08-08 19:31:55,230 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
+ " 57%|███████████████████████-----------------| 115/200 [8:52<6:33, 13.0 steps/min]2025-08-08 19:31:58,023 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ "2025-08-08 19:31:59,366 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 131, 'y': 636})\n",
+ " 57%|███████████████████████-----------------| 115/200 [8:54<6:35, 12.9 steps/min]2025-08-08 19:32:01,196 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 28, 'y': 158})\n",
+ "2025-08-08 19:32:02,508 - agent.ComputerAgent - INFO - Agent: Chrome is open. I'll configure it to delete browsing data automatically upon closing. I should be able to do so in the settings.\n",
+ "2025-08-08 19:32:02,508 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1008, 'y': 31})\n",
+ " 60%|███████████████████████-----------------| 119/200 [8:58<6:06, 13.3 steps/min]2025-08-08 19:32:04,651 - agent.ComputerAgent - INFO - LLM processing started with 3 messages\n",
+ " 60%|███████████████████████-----------------| 119/200 [8:59<6:07, 13.2 steps/min]2025-08-08 19:32:05,840 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n",
+ " 60%|███████████████████████-----------------| 119/200 [9:02<6:09, 13.2 steps/min]2025-08-08 19:32:08,696 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 60%|███████████████████████-----------------| 119/200 [9:03<6:10, 13.1 steps/min]2025-08-08 19:32:09,381 - agent.ComputerAgent - INFO - LLM processing started with 53 messages\n",
+ "2025-08-08 19:32:10,033 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
+ " 60%|████████████████████████----------------| 120/200 [9:09<6:06, 13.1 steps/min]2025-08-08 19:32:15,457 - agent.ComputerAgent - INFO - LLM processing started with 36 messages\n",
+ " 60%|████████████████████████----------------| 120/200 [9:11<6:07, 13.0 steps/min]2025-08-08 19:32:17,101 - agent.ComputerAgent - INFO - LLM processing started with 5 messages\n",
+ "2025-08-08 19:32:18,450 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 16, 'y': 142})\n",
+ " 60%|████████████████████████----------------| 120/200 [9:13<6:09, 13.0 steps/min]Error running ComputerAgent: litellm.BadRequestError: OpenAIException - {\n",
+ " \"error\": {\n",
+ " \"message\": \"Item 'rs_689688fccf8481908fd292817ff04fd8027ddb559dbe0463' of type 'reasoning' was provided without its required following item.\",\n",
+ " \"type\": \"invalid_request_error\",\n",
+ " \"param\": \"input\",\n",
+ " \"code\": null\n",
+ " }\n",
+ "} LiteLLM Retried: 3 times\n",
+ " 61%|████████████████████████----------------| 122/200 [9:20<5:58, 13.1 steps/min]2025-08-08 19:32:26,107 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n",
+ "2025-08-08 19:32:27,452 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 541, 'y': 251})\n",
+ " 62%|████████████████████████----------------| 123/200 [9:23<5:52, 13.1 steps/min]2025-08-08 19:32:29,893 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 28, 'y': 150})\n",
+ " 62%|████████████████████████----------------| 124/200 [9:27<5:47, 13.1 steps/min]2025-08-08 19:32:33,695 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 62%|████████████████████████----------------| 124/200 [9:28<5:48, 13.1 steps/min]2025-08-08 19:32:34,860 - agent.ComputerAgent - INFO - LLM processing started with 8 messages\n",
+ " 62%|█████████████████████████---------------| 125/200 [9:35<5:45, 13.0 steps/min]2025-08-08 19:32:41,071 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n",
+ " 65%|██████████████████████████--------------| 130/200 [9:40<5:12, 13.4 steps/min]2025-08-08 19:32:46,139 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 558, 'y': 431})\n",
+ " 66%|██████████████████████████--------------| 131/200 [9:43<5:07, 13.5 steps/min]2025-08-08 19:32:49,527 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1000, 'y': 61})\n",
+ " 66%|██████████████████████████--------------| 132/200 [9:46<5:02, 13.5 steps/min]2025-08-08 19:32:52,701 - agent.ComputerAgent - INFO - LLM processing started with 11 messages\n",
+ " 66%|██████████████████████████--------------| 132/200 [9:48<5:03, 13.4 steps/min]2025-08-08 19:32:55,391 - agent.ComputerAgent - INFO - LLM processing started with 21 messages\n",
+ " 66%|██████████████████████████--------------| 132/200 [9:58<5:08, 13.2 steps/min]2025-08-08 19:33:04,800 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 377, 'y': 624})\n",
+ " 66%|██████████████████████████--------------| 133/200 [10:03<5:03, 13.2 steps/min]2025-08-08 19:33:09,179 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1002, 'y': 65})\n",
+ " 67%|██████████████████████████--------------| 134/200 [10:06<4:58, 13.3 steps/min]2025-08-08 19:33:11,832 - agent.ComputerAgent - INFO - LLM processing started with 14 messages\n",
+ " 67%|██████████████████████████--------------| 134/200 [10:09<5:00, 13.2 steps/min]2025-08-08 19:33:14,981 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n",
+ " 67%|██████████████████████████--------------| 134/200 [10:15<5:03, 13.1 steps/min]2025-08-08 19:33:21,886 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1003, 'y': 62})\n",
+ " 67%|██████████████████████████--------------| 134/200 [10:17<5:03, 13.0 steps/min]2025-08-08 19:33:23,717 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 330, 'y': 536})\n",
+ " 68%|███████████████████████████-------------| 136/200 [10:22<4:53, 13.1 steps/min]2025-08-08 19:33:28,911 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
+ " 68%|███████████████████████████-------------| 136/200 [10:25<4:54, 13.1 steps/min]2025-08-08 19:33:30,580 - agent.ComputerAgent - INFO - LLM processing started with 17 messages\n",
+ " 68%|███████████████████████████-------------| 136/200 [10:31<4:57, 12.9 steps/min]2025-08-08 19:33:37,956 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1002, 'y': 63})\n",
+ " 68%|███████████████████████████-------------| 137/200 [10:36<4:52, 12.9 steps/min]2025-08-08 19:33:42,842 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 346, 'y': 208})\n",
+ " 68%|███████████████████████████-------------| 137/200 [10:38<4:53, 12.9 steps/min]2025-08-08 19:33:43,490 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n",
+ " 69%|███████████████████████████-------------| 138/200 [10:45<4:49, 12.8 steps/min]2025-08-08 19:33:51,422 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1002, 'y': 64})\n",
+ " 69%|███████████████████████████-------------| 138/200 [10:46<4:50, 12.8 steps/min]2025-08-08 19:33:52,042 - agent.ComputerAgent - INFO - LLM processing started with 20 messages\n",
+ " 70%|███████████████████████████-------------| 139/200 [10:51<4:45, 12.8 steps/min]2025-08-08 19:33:57,212 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
+ " 70%|███████████████████████████-------------| 139/200 [10:53<4:46, 12.8 steps/min]2025-08-08 19:34:00,042 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 70%|████████████████████████████------------| 140/200 [10:56<4:41, 12.8 steps/min]2025-08-08 19:34:02,329 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1007, 'y': 62})\n",
+ " 70%|████████████████████████████------------| 141/200 [11:02<4:37, 12.8 steps/min]2025-08-08 19:34:08,003 - agent.ComputerAgent - INFO - LLM processing started with 22 messages\n",
+ "2025-08-08 19:34:08,681 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n",
+ " 70%|████████████████████████████------------| 141/200 [11:10<4:40, 12.6 steps/min]2025-08-08 19:34:17,904 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 856, 'y': 571})\n",
+ " 71%|████████████████████████████------------| 142/200 [11:16<4:36, 12.6 steps/min]2025-08-08 19:34:22,796 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 210, 'x': 189, 'y': 577})\n",
+ " 71%|████████████████████████████------------| 142/200 [11:18<4:36, 12.6 steps/min]2025-08-08 19:34:23,443 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n",
+ " 72%|████████████████████████████------------| 143/200 [11:24<4:32, 12.5 steps/min]2025-08-08 19:34:30,151 - agent.ComputerAgent - INFO - LLM processing started with 25 messages\n",
+ " 72%|████████████████████████████------------| 143/200 [11:25<4:33, 12.5 steps/min]2025-08-08 19:34:31,534 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 123, 'y': 197})\n",
+ " 72%|████████████████████████████------------| 144/200 [11:31<4:29, 12.5 steps/min]2025-08-08 19:34:37,221 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n",
+ " 72%|████████████████████████████------------| 144/200 [11:33<4:29, 12.5 steps/min]2025-08-08 19:34:40,096 - agent.ComputerAgent - INFO - Computer: scroll({'scroll_x': 0, 'scroll_y': 201, 'x': 206, 'y': 576})\n",
+ " 72%|█████████████████████████████-----------| 145/200 [11:39<4:25, 12.4 steps/min]2025-08-08 19:34:45,523 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 469, 'y': 286})\n",
+ " 73%|█████████████████████████████-----------| 146/200 [11:41<4:19, 12.5 steps/min]2025-08-08 19:34:47,192 - agent.ComputerAgent - INFO - LLM processing started with 27 messages\n",
+ " 73%|█████████████████████████████-----------| 146/200 [11:45<4:21, 12.4 steps/min]2025-08-08 19:34:51,413 - agent.ComputerAgent - INFO - LLM processing started with 43 messages\n",
+ " 73%|█████████████████████████████-----------| 146/200 [11:54<4:24, 12.3 steps/min]2025-08-08 19:35:00,828 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 600, 'y': 297})\n",
+ " 73%|█████████████████████████████-----------| 146/200 [11:56<4:24, 12.2 steps/min]2025-08-08 19:35:02,670 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 349, 'y': 166})\n",
+ " 74%|█████████████████████████████-----------| 148/200 [12:01<4:13, 12.3 steps/min]2025-08-08 19:35:07,831 - agent.ComputerAgent - INFO - LLM processing started with 46 messages\n",
+ " 74%|█████████████████████████████-----------| 148/200 [12:04<4:14, 12.3 steps/min]2025-08-08 19:35:09,512 - agent.ComputerAgent - INFO - LLM processing started with 30 messages\n",
+ " 74%|█████████████████████████████-----------| 148/200 [12:12<4:17, 12.1 steps/min]2025-08-08 19:35:18,465 - agent.ComputerAgent - INFO - Computer: type({'text': 'splash screen'})\n",
+ " 74%|█████████████████████████████-----------| 148/200 [12:13<4:17, 12.1 steps/min]2025-08-08 19:35:20,254 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 370, 'y': 209})\n",
+ " 75%|██████████████████████████████----------| 150/200 [12:19<4:06, 12.2 steps/min]2025-08-08 19:35:25,432 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
+ " 75%|██████████████████████████████----------| 150/200 [12:20<4:06, 12.1 steps/min]2025-08-08 19:35:26,072 - agent.ComputerAgent - INFO - LLM processing started with 49 messages\n",
+ " 75%|██████████████████████████████----------| 150/200 [12:27<4:09, 12.0 steps/min]2025-08-08 19:35:34,465 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 582, 'y': 570})\n",
+ " 76%|██████████████████████████████----------| 151/200 [12:34<4:04, 12.0 steps/min]2025-08-08 19:35:40,143 - agent.ComputerAgent - INFO - LLM processing started with 52 messages\n",
+ "2025-08-08 19:35:41,503 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 409, 'y': 166})\n",
+ " 76%|██████████████████████████████----------| 152/200 [12:43<4:01, 11.9 steps/min]2025-08-08 19:35:49,127 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n",
+ " 76%|██████████████████████████████----------| 152/200 [12:49<4:03, 11.8 steps/min]2025-08-08 19:35:56,040 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 388, 'y': 101})\n",
+ " 76%|██████████████████████████████----------| 153/200 [12:52<3:57, 11.9 steps/min]2025-08-08 19:35:58,458 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'A']})\n",
+ " 77%|██████████████████████████████----------| 154/200 [12:59<3:52, 11.9 steps/min]2025-08-08 19:36:05,632 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n",
+ " 77%|██████████████████████████████----------| 154/200 [13:09<3:55, 11.7 steps/min]2025-08-08 19:36:15,055 - agent.ComputerAgent - INFO - Computer: type({'text': 'logo'})\n",
+ " 78%|███████████████████████████████---------| 155/200 [13:16<3:51, 11.7 steps/min]2025-08-08 19:36:21,733 - agent.ComputerAgent - INFO - LLM processing started with 39 messages\n",
+ " 78%|███████████████████████████████---------| 155/200 [13:21<3:52, 11.6 steps/min]2025-08-08 19:36:26,422 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
+ " 78%|███████████████████████████████---------| 155/200 [13:27<3:54, 11.5 steps/min]2025-08-08 19:36:33,289 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 605, 'y': 523})\n",
+ " 78%|███████████████████████████████---------| 156/200 [13:31<3:48, 11.5 steps/min]2025-08-08 19:36:37,669 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 731, 'y': 130})\n",
+ " 78%|███████████████████████████████---------| 157/200 [13:34<3:43, 11.6 steps/min]2025-08-08 19:36:40,312 - agent.ComputerAgent - INFO - LLM processing started with 4 messages\n",
+ " 78%|███████████████████████████████---------| 157/200 [13:38<3:44, 11.5 steps/min]2025-08-08 19:36:45,032 - agent.ComputerAgent - INFO - LLM processing started with 42 messages\n",
+ " 78%|███████████████████████████████---------| 157/200 [14:00<3:50, 11.2 steps/min]2025-08-08 19:37:06,541 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 20, 'y': 160})\n",
+ "2025-08-08 19:37:07,826 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 601, 'y': 520})\n",
+ " 80%|███████████████████████████████---------| 159/200 [14:08<3:38, 11.2 steps/min]2025-08-08 19:37:13,510 - agent.ComputerAgent - INFO - LLM processing started with 7 messages\n",
+ "2025-08-08 19:37:14,191 - agent.ComputerAgent - INFO - LLM processing started with 45 messages\n",
+ " 80%|███████████████████████████████---------| 159/200 [14:16<3:40, 11.1 steps/min]2025-08-08 19:37:22,586 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 64, 'y': 745})\n",
+ " 80%|███████████████████████████████---------| 159/200 [14:17<3:41, 11.1 steps/min]2025-08-08 19:37:24,998 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 80%|████████████████████████████████--------| 161/200 [14:22<3:28, 11.2 steps/min]2025-08-08 19:37:28,163 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
+ " 80%|████████████████████████████████--------| 161/200 [14:27<3:30, 11.1 steps/min]2025-08-08 19:37:32,874 - agent.ComputerAgent - INFO - LLM processing started with 47 messages\n",
+ " 80%|████████████████████████████████--------| 161/200 [14:31<3:31, 11.1 steps/min]2025-08-08 19:37:37,739 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 135, 'y': 743})\n",
+ " 81%|████████████████████████████████--------| 162/200 [14:33<3:25, 11.1 steps/min]2025-08-08 19:37:39,367 - agent.ComputerAgent - INFO - LLM processing started with 1 messages\n",
+ " 81%|████████████████████████████████--------| 162/200 [14:38<3:25, 11.1 steps/min]2025-08-08 19:37:43,552 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
+ " 81%|████████████████████████████████--------| 162/200 [14:41<3:26, 11.0 steps/min]2025-08-08 19:37:47,420 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 17, 'y': 141})\n",
+ " 81%|████████████████████████████████--------| 162/200 [14:42<3:27, 11.0 steps/min]2025-08-08 19:37:49,292 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ "2025-08-08 19:37:50,567 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 82%|█████████████████████████████████-------| 165/200 [14:49<3:08, 11.1 steps/min]2025-08-08 19:37:55,784 - agent.ComputerAgent - INFO - LLM processing started with 4 messages\n",
+ " 82%|█████████████████████████████████-------| 165/200 [14:51<3:09, 11.1 steps/min]2025-08-08 19:37:56,434 - agent.ComputerAgent - INFO - LLM processing started with 50 messages\n",
+ "2025-08-08 19:37:57,074 - agent.ComputerAgent - INFO - LLM processing started with 15 messages\n",
+ " 82%|█████████████████████████████████-------| 165/200 [14:58<3:10, 11.0 steps/min]2025-08-08 19:38:04,386 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 195, 'y': 62})\n",
+ " 83%|█████████████████████████████████-------| 166/200 [15:00<3:04, 11.1 steps/min]2025-08-08 19:38:07,265 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 130, 'y': 743})\n",
+ " 84%|█████████████████████████████████-------| 167/200 [15:04<2:58, 11.1 steps/min]2025-08-08 19:38:09,895 - agent.ComputerAgent - INFO - LLM processing started with 7 messages\n",
+ " 84%|█████████████████████████████████-------| 167/200 [15:06<2:59, 11.1 steps/min]2025-08-08 19:38:13,330 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1008, 'y': 31})\n",
+ " 84%|█████████████████████████████████-------| 167/200 [15:08<2:59, 11.0 steps/min]2025-08-08 19:38:13,936 - agent.ComputerAgent - INFO - LLM processing started with 18 messages\n",
+ " 84%|█████████████████████████████████-------| 168/200 [15:11<2:53, 11.1 steps/min]2025-08-08 19:38:17,811 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 30, 'y': 438})\n",
+ " 84%|█████████████████████████████████-------| 169/200 [15:18<2:48, 11.0 steps/min]2025-08-08 19:38:23,525 - agent.ComputerAgent - INFO - LLM processing started with 10 messages\n",
+ " 84%|█████████████████████████████████-------| 169/200 [15:22<2:49, 11.0 steps/min]2025-08-08 19:38:28,914 - agent.ComputerAgent - INFO - Computer: double_click({'x': 126, 'y': 742})\n",
+ " 85%|██████████████████████████████████------| 170/200 [15:26<2:43, 11.0 steps/min]2025-08-08 19:38:32,765 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 108, 'y': 183})\n",
+ " 86%|██████████████████████████████████------| 171/200 [15:31<2:37, 11.0 steps/min]2025-08-08 19:38:36,966 - agent.ComputerAgent - INFO - LLM processing started with 21 messages\n",
+ " 86%|██████████████████████████████████------| 171/200 [15:32<2:38, 11.0 steps/min]2025-08-08 19:38:38,114 - agent.ComputerAgent - INFO - LLM processing started with 13 messages\n",
+ " 86%|██████████████████████████████████------| 171/200 [15:40<2:39, 10.9 steps/min]2025-08-08 19:38:47,011 - agent.ComputerAgent - INFO - Computer: type({'text': 'Percentage Tables'})\n",
+ " 86%|██████████████████████████████████------| 171/200 [15:42<2:39, 10.9 steps/min]2025-08-08 19:38:48,303 - agent.ComputerAgent - INFO - Computer: double_click({'x': 208, 'y': 109})\n",
+ " 86%|██████████████████████████████████------| 173/200 [15:47<2:27, 11.0 steps/min]2025-08-08 19:38:53,975 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n",
+ " 86%|██████████████████████████████████------| 173/200 [15:49<2:28, 10.9 steps/min]2025-08-08 19:38:54,654 - agent.ComputerAgent - INFO - LLM processing started with 16 messages\n",
+ " 86%|██████████████████████████████████------| 173/200 [15:56<2:29, 10.9 steps/min]2025-08-08 19:39:02,573 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 573, 'y': 431})\n",
+ " 87%|██████████████████████████████████------| 174/200 [15:59<2:23, 10.9 steps/min]2025-08-08 19:39:05,943 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 499, 'y': 216})\n",
+ " 88%|███████████████████████████████████-----| 175/200 [16:02<2:17, 10.9 steps/min]2025-08-08 19:39:08,625 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
+ " 88%|███████████████████████████████████-----| 175/200 [16:05<2:17, 10.9 steps/min]2025-08-08 19:39:11,825 - agent.ComputerAgent - INFO - LLM processing started with 19 messages\n",
+ " 88%|███████████████████████████████████-----| 175/200 [16:11<2:18, 10.8 steps/min]2025-08-08 19:39:17,697 - agent.ComputerAgent - INFO - Computer: keypress({'keys': ['CTRL', 'C']})\n",
+ " 88%|███████████████████████████████████-----| 176/200 [16:17<2:13, 10.8 steps/min]2025-08-08 19:39:23,366 - agent.ComputerAgent - INFO - LLM processing started with 21 messages\n",
+ " 88%|███████████████████████████████████-----| 176/200 [16:20<2:13, 10.8 steps/min]2025-08-08 19:39:27,228 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 83, 'y': 139}, {'x': 182, 'y': 140}]})\n",
+ " 88%|███████████████████████████████████-----| 177/200 [16:26<2:08, 10.8 steps/min]2025-08-08 19:39:32,543 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 18, 'y': 103})\n",
+ " 88%|███████████████████████████████████-----| 177/200 [16:27<2:08, 10.8 steps/min]2025-08-08 19:39:33,672 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n",
+ " 89%|███████████████████████████████████-----| 178/200 [16:33<2:02, 10.8 steps/min]2025-08-08 19:39:38,886 - agent.ComputerAgent - INFO - LLM processing started with 24 messages\n",
+ " 89%|███████████████████████████████████-----| 178/200 [16:39<2:03, 10.7 steps/min]2025-08-08 19:39:45,775 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 90%|███████████████████████████████████-----| 179/200 [16:47<1:58, 10.7 steps/min]2025-08-08 19:39:52,435 - agent.ComputerAgent - INFO - LLM processing started with 26 messages\n",
+ " 90%|███████████████████████████████████-----| 179/200 [16:53<1:58, 10.6 steps/min]2025-08-08 19:39:59,802 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 145, 'y': 744})\n",
+ " 90%|████████████████████████████████████----| 180/200 [16:56<1:52, 10.6 steps/min]2025-08-08 19:40:02,614 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 16, 'y': 107})\n",
+ " 90%|████████████████████████████████████----| 181/200 [16:59<1:47, 10.6 steps/min]2025-08-08 19:40:05,795 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
+ " 90%|████████████████████████████████████----| 181/200 [17:03<1:47, 10.6 steps/min]2025-08-08 19:40:08,465 - agent.ComputerAgent - INFO - LLM processing started with 29 messages\n",
+ " 90%|████████████████████████████████████----| 181/200 [17:10<1:48, 10.5 steps/min]2025-08-08 19:40:16,353 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1009, 'y': 30})\n",
+ " 91%|████████████████████████████████████----| 182/200 [17:16<1:42, 10.5 steps/min]2025-08-08 19:40:22,015 - agent.ComputerAgent - INFO - LLM processing started with 35 messages\n",
+ " 91%|████████████████████████████████████----| 182/200 [17:23<1:43, 10.5 steps/min]2025-08-08 19:40:29,384 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 764, 'y': 105})\n",
+ " 92%|████████████████████████████████████----| 183/200 [17:25<1:37, 10.5 steps/min]2025-08-08 19:40:32,732 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 648, 'y': 438})\n",
+ " 92%|████████████████████████████████████----| 184/200 [17:29<1:31, 10.5 steps/min]2025-08-08 19:40:35,397 - agent.ComputerAgent - INFO - LLM processing started with 32 messages\n",
+ " 92%|████████████████████████████████████----| 184/200 [17:33<1:31, 10.5 steps/min]2025-08-08 19:40:40,065 - agent.ComputerAgent - INFO - LLM processing started with 38 messages\n",
+ " 92%|████████████████████████████████████----| 184/200 [17:37<1:31, 10.4 steps/min]2025-08-08 19:40:43,903 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 18, 'y': 108})\n",
+ " 92%|█████████████████████████████████████---| 185/200 [17:44<1:26, 10.4 steps/min]2025-08-08 19:40:49,587 - agent.ComputerAgent - INFO - LLM processing started with 34 messages\n",
+ " 92%|█████████████████████████████████████---| 185/200 [17:46<1:26, 10.4 steps/min]2025-08-08 19:40:52,403 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 33, 'y': 154})\n",
+ " 93%|█████████████████████████████████████---| 186/200 [17:53<1:20, 10.4 steps/min]2025-08-08 19:40:59,804 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 763, 'y': 105})\n",
+ " 94%|█████████████████████████████████████---| 187/200 [18:00<1:15, 10.4 steps/min]2025-08-08 19:41:05,496 - agent.ComputerAgent - INFO - LLM processing started with 37 messages\n",
+ " 94%|█████████████████████████████████████---| 187/200 [18:01<1:15, 10.4 steps/min]2025-08-08 19:41:06,675 - agent.ComputerAgent - INFO - LLM processing started with 41 messages\n",
+ " 94%|█████████████████████████████████████---| 187/200 [18:10<1:15, 10.3 steps/min]2025-08-08 19:41:17,113 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 23, 'y': 108})\n",
+ " 94%|█████████████████████████████████████---| 187/200 [18:12<1:15, 10.3 steps/min]2025-08-08 19:41:18,459 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 23, 'y': 155})\n",
+ " 94%|█████████████████████████████████████---| 189/200 [18:17<1:03, 10.3 steps/min]2025-08-08 19:41:23,639 - agent.ComputerAgent - INFO - LLM processing started with 40 messages\n",
+ " 94%|█████████████████████████████████████---| 189/200 [18:19<1:04, 10.3 steps/min]2025-08-08 19:41:26,346 - agent.ComputerAgent - INFO - LLM processing started with 44 messages\n",
+ " 94%|█████████████████████████████████████---| 189/200 [18:27<1:04, 10.2 steps/min]2025-08-08 19:41:34,220 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ "2025-08-08 19:41:35,516 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 30, 'y': 106})\n",
+ " 96%|██████████████████████████████████████--| 191/200 [18:35<0:52, 10.3 steps/min]2025-08-08 19:41:41,726 - agent.ComputerAgent - INFO - LLM processing started with 43 messages\n",
+ " 96%|██████████████████████████████████████--| 191/200 [18:37<0:52, 10.3 steps/min]2025-08-08 19:41:42,406 - agent.ComputerAgent - INFO - LLM processing started with 46 messages\n",
+ " 96%|██████████████████████████████████████--| 191/200 [18:48<0:53, 10.2 steps/min]2025-08-08 19:41:54,298 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 434, 'y': 545})\n",
+ " 96%|██████████████████████████████████████--| 192/200 [18:54<0:47, 10.2 steps/min]2025-08-08 19:42:01,182 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1008, 'y': 34})\n",
+ " 96%|██████████████████████████████████████--| 192/200 [18:56<0:47, 10.1 steps/min]2025-08-08 19:42:02,336 - agent.ComputerAgent - INFO - LLM processing started with 46 messages\n",
+ " 96%|██████████████████████████████████████--| 193/200 [19:00<0:41, 10.2 steps/min]2025-08-08 19:42:07,037 - agent.ComputerAgent - INFO - LLM processing started with 49 messages\n",
+ " 96%|██████████████████████████████████████--| 193/200 [19:06<0:41, 10.1 steps/min]2025-08-08 19:42:13,366 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 14, 'y': 105})\n",
+ " 96%|██████████████████████████████████████--| 193/200 [19:08<0:41, 10.1 steps/min]2025-08-08 19:42:15,279 - agent.ComputerAgent - INFO - Computer: wait({})\n",
+ " 98%|███████████████████████████████████████-| 195/200 [19:13<0:29, 10.1 steps/min]2025-08-08 19:42:19,416 - agent.ComputerAgent - INFO - LLM processing started with 49 messages\n",
+ " 98%|███████████████████████████████████████-| 195/200 [19:15<0:29, 10.1 steps/min]2025-08-08 19:42:21,596 - agent.ComputerAgent - INFO - LLM processing started with 51 messages\n",
+ " 98%|███████████████████████████████████████-| 195/200 [19:23<0:29, 10.1 steps/min]2025-08-08 19:42:29,973 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 1011, 'y': 34})\n",
+ "2025-08-08 19:42:31,295 - agent.ComputerAgent - INFO - Computer: drag({'path': [{'x': 439, 'y': 103}, {'x': 608, 'y': 98}]})\n",
+ " 98%|███████████████████████████████████████-| 197/200 [19:31<0:17, 10.1 steps/min]2025-08-08 19:42:37,998 - agent.ComputerAgent - INFO - LLM processing started with 52 messages\n",
+ " 98%|███████████████████████████████████████-| 197/200 [19:33<0:17, 10.1 steps/min]2025-08-08 19:42:38,676 - agent.ComputerAgent - INFO - LLM processing started with 53 messages\n",
+ " 98%|███████████████████████████████████████-| 197/200 [19:46<0:18, 10.0 steps/min]2025-08-08 19:42:52,119 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 18, 'y': 154})\n",
+ " 99%|███████████████████████████████████████-| 198/200 [19:58<0:12, 9.9 steps/min]]2025-08-08 19:43:05,074 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 61, 'y': 15})\n",
+ "100%|███████████████████████████████████████-| 199/200 [20:05<0:06, 9.9 steps/min]2025-08-08 19:43:11,267 - agent.ComputerAgent - INFO - LLM processing started with 55 messages\n",
+ "100%|███████████████████████████████████████-| 199/200 [20:21<0:06, 9.8 steps/min]2025-08-08 19:43:28,471 - agent.ComputerAgent - INFO - Computer: click({'button': 'left', 'x': 59, 'y': 127})\n",
+ "100%|████████████████████████████████████████| 200/200 [20:32<0:00, 9.7 steps/min]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'task_count': 11, 'avg_reward': 0.2, 'success_rate': 18.181818181818183}\n",
+ "View results at: https://app.hud.so/jobs/d80a9a78-0e06-4b49-ba3e-cb5c8db4ba7c\n"
+ ]
+ }
+ ],
"source": [
"from agent.integrations.hud import run_job\n",
"from hud import load_taskset\n",
+ "from hud.taskset import TaskSet\n",
"import logging\n",
"\n",
"# Load taskset\n",
"taskset = await load_taskset(\"OSWorld-Verified\")\n",
- "taskset = taskset[:10] # limit to 10 tasks instead of all 370\n",
+ "taskset = TaskSet(tasks=taskset[:10]) # limit to 10 tasks instead of all 370\n",
"\n",
"# Run benchmark job\n",
"job = await run_job(\n",
@@ -338,7 +1197,7 @@
" max_concurrent_tasks=5,\n",
" # add any extra ComputerAgent kwargs:\n",
" verbosity=logging.INFO, # Enable logging\n",
- " # trajectory_dir=\"..\" # Save trajectories locally\n",
+ " trajectory_dir=\"trajectories\" # Save trajectories locally\n",
")\n",
"\n",
"# Get results OR view them at app.hud.so\n",