mirror of
https://github.com/trycua/computer.git
synced 2026-02-05 05:09:59 -06:00
Formatting Fix
This commit is contained in:
@@ -10,7 +10,9 @@ class CUAAdapter(CustomLLM):
|
||||
def __init__(self, base_url: str | None = None, api_key: str | None = None, **_: Any):
|
||||
super().__init__()
|
||||
self.base_url = base_url or os.environ.get("CUA_BASE_URL") or "https://inference.cua.ai/v1"
|
||||
self.api_key = api_key or os.environ.get("CUA_INFERENCE_API_KEY") or os.environ.get("CUA_API_KEY")
|
||||
self.api_key = (
|
||||
api_key or os.environ.get("CUA_INFERENCE_API_KEY") or os.environ.get("CUA_API_KEY")
|
||||
)
|
||||
|
||||
def _normalize_model(self, model: str) -> str:
|
||||
# Accept either "cua/<model>" or raw "<model>"
|
||||
|
||||
@@ -59,10 +59,8 @@
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Get API keys from environment or prompt user\n",
|
||||
"anthropic_key = os.getenv(\"ANTHROPIC_API_KEY\") or \\\n",
|
||||
" input(\"Enter your Anthropic API key: \")\n",
|
||||
"openai_key = os.getenv(\"OPENAI_API_KEY\") or \\\n",
|
||||
" input(\"Enter your OpenAI API key: \")\n",
|
||||
"anthropic_key = os.getenv(\"ANTHROPIC_API_KEY\") or input(\"Enter your Anthropic API key: \")\n",
|
||||
"openai_key = os.getenv(\"OPENAI_API_KEY\") or input(\"Enter your OpenAI API key: \")\n",
|
||||
"\n",
|
||||
"os.environ[\"ANTHROPIC_API_KEY\"] = anthropic_key\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = openai_key"
|
||||
@@ -95,10 +93,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cua_api_key = os.getenv(\"CUA_API_KEY\") or \\\n",
|
||||
" input(\"Enter your Cua API Key: \")\n",
|
||||
"container_name = os.getenv(\"CONTAINER_NAME\") or \\\n",
|
||||
" input(\"Enter your Cloud Container name: \")"
|
||||
"cua_api_key = os.getenv(\"CUA_API_KEY\") or input(\"Enter your Cua API Key: \")\n",
|
||||
"container_name = os.getenv(\"CONTAINER_NAME\") or input(\"Enter your Cloud Container name: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -114,7 +110,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os_type = input(\"Enter the OS type of your sandbox (linux/macos) [default: linux]: \").lower() or \"linux\""
|
||||
"os_type = (\n",
|
||||
" input(\"Enter the OS type of your sandbox (linux/macos) [default: linux]: \").lower() or \"linux\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -139,7 +137,7 @@
|
||||
" api_key=cua_api_key,\n",
|
||||
" name=container_name,\n",
|
||||
" provider_type=VMProviderType.CLOUD,\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Create agent\n",
|
||||
@@ -148,8 +146,8 @@
|
||||
" tools=[computer],\n",
|
||||
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
||||
" only_n_most_recent_images=3,\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
")\n"
|
||||
" verbosity=logging.INFO,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -168,7 +166,7 @@
|
||||
"tasks = [\n",
|
||||
" \"Open a web browser and navigate to GitHub\",\n",
|
||||
" \"Search for the trycua/cua repository\",\n",
|
||||
" \"Take a screenshot of the repository page\"\n",
|
||||
" \"Take a screenshot of the repository page\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for i, task in enumerate(tasks):\n",
|
||||
@@ -176,7 +174,7 @@
|
||||
" async for result in agent.run(task):\n",
|
||||
" # print(result)\n",
|
||||
" pass\n",
|
||||
" print(f\"✅ Task {i+1}/{len(tasks)} completed: {task}\")\n"
|
||||
" print(f\"✅ Task {i+1}/{len(tasks)} completed: {task}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -206,7 +204,7 @@
|
||||
" os_type=\"linux\",\n",
|
||||
" provider_type=\"docker\",\n",
|
||||
" image=\"trycua/cua-ubuntu:latest\",\n",
|
||||
" name=\"my-cua-container\"\n",
|
||||
" name=\"my-cua-container\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -237,12 +235,12 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"computer = Computer(\n",
|
||||
" verbosity=logging.INFO, \n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" provider_type=VMProviderType.LUME,\n",
|
||||
" display=\"1024x768\",\n",
|
||||
" memory=\"8GB\",\n",
|
||||
" cpu=\"4\",\n",
|
||||
" os_type=\"macos\"\n",
|
||||
" os_type=\"macos\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -272,7 +270,7 @@
|
||||
" tools=[computer],\n",
|
||||
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
||||
" only_n_most_recent_images=3,\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -328,14 +326,14 @@
|
||||
"\n",
|
||||
"# Create agent with Anthropic loop and provider\n",
|
||||
"agent = ComputerAgent(\n",
|
||||
" model=\"omniparser+ollama_chat/gemma3:12b-it-q4_K_M\",\n",
|
||||
" # model=\"omniparser+openai/gpt-4o-mini\",\n",
|
||||
" # model=\"omniparser+anthropic/claude-3-7-sonnet-20250219\",\n",
|
||||
" tools=[computer],\n",
|
||||
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
||||
" only_n_most_recent_images=3,\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
" )\n",
|
||||
" model=\"omniparser+ollama_chat/gemma3:12b-it-q4_K_M\",\n",
|
||||
" # model=\"omniparser+openai/gpt-4o-mini\",\n",
|
||||
" # model=\"omniparser+anthropic/claude-3-7-sonnet-20250219\",\n",
|
||||
" tools=[computer],\n",
|
||||
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
||||
" only_n_most_recent_images=3,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"tasks = [\n",
|
||||
" \"Look for a repository named trycua/cua on GitHub.\",\n",
|
||||
@@ -414,7 +412,7 @@
|
||||
" tools=[computer], # Can be cloud or local\n",
|
||||
" model=\"openai/computer-use-preview\",\n",
|
||||
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -430,7 +428,14 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "anthropic_agent = ComputerAgent(\n tools=[computer],\n model=\"anthropic/claude-sonnet-4-5-20250929\",\n trajectory_dir=str(Path(\"trajectories\")),\n verbosity=logging.INFO\n)\n"
|
||||
"source": [
|
||||
"anthropic_agent = ComputerAgent(\n",
|
||||
" tools=[computer],\n",
|
||||
" model=\"anthropic/claude-sonnet-4-5-20250929\",\n",
|
||||
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -451,8 +456,8 @@
|
||||
" # model=\"omniparser+openai/gpt-4o-mini\",\n",
|
||||
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
||||
" only_n_most_recent_images=3,\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
")\n"
|
||||
" verbosity=logging.INFO,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -470,12 +475,12 @@
|
||||
"source": [
|
||||
"uitars_agent = ComputerAgent(\n",
|
||||
" tools=[computer],\n",
|
||||
" model=\"mlx/mlx-community/UI-TARS-1.5-7B-6bit\", # local MLX\n",
|
||||
" model=\"mlx/mlx-community/UI-TARS-1.5-7B-6bit\", # local MLX\n",
|
||||
" # model=\"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B\", # local Huggingface (transformers)\n",
|
||||
" # model=\"huggingface/ByteDance-Seed/UI-TARS-1.5-7B\", # remote Huggingface (TGI)\n",
|
||||
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
")\n"
|
||||
" verbosity=logging.INFO,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -495,7 +500,10 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "print(f\"Trajectories saved to: {Path('trajectories').absolute()}\")\nprint(\"Upload trajectory files to https://cua.ai/trajectory-viewer to visualize agent actions\")\n"
|
||||
"source": [
|
||||
"print(f\"Trajectories saved to: {Path('trajectories').absolute()}\")\n",
|
||||
"print(\"Upload trajectory files to https://cua.ai/trajectory-viewer to visualize agent actions\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -96,7 +96,7 @@
|
||||
"source": [
|
||||
"async def execute_action(computer, action):\n",
|
||||
" action_type = action.type\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" if action_type == \"click\":\n",
|
||||
" x = action.x\n",
|
||||
" y = action.y\n",
|
||||
@@ -107,12 +107,12 @@
|
||||
" await computer.interface.right_click()\n",
|
||||
" else:\n",
|
||||
" await computer.interface.left_click()\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" elif action_type == \"type\":\n",
|
||||
" text = action.text\n",
|
||||
" print(f\"Typing text: {text}\")\n",
|
||||
" await computer.interface.type_text(text)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" elif action_type == \"scroll\":\n",
|
||||
" x = action.x\n",
|
||||
" y = action.y\n",
|
||||
@@ -121,7 +121,7 @@
|
||||
" print(f\"Scrolling at ({x}, {y}) with offsets (scroll_x={scroll_x}, scroll_y={scroll_y})\")\n",
|
||||
" await computer.interface.move_cursor(x, y)\n",
|
||||
" await computer.interface.scroll(scroll_y) # Assuming CUA provides a scroll method\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" elif action_type == \"keypress\":\n",
|
||||
" keys = action.keys\n",
|
||||
" for key in keys:\n",
|
||||
@@ -133,17 +133,17 @@
|
||||
" await computer.interface.press_key(\"space\")\n",
|
||||
" else:\n",
|
||||
" await computer.interface.press_key(key)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" elif action_type == \"wait\":\n",
|
||||
" print(f\"Waiting for 2 seconds\")\n",
|
||||
" print(\"Waiting for 2 seconds\")\n",
|
||||
" await asyncio.sleep(2)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" elif action_type == \"screenshot\":\n",
|
||||
" print(\"Taking screenshot\")\n",
|
||||
" # This is handled automatically in the main loop, but we can take an extra one if requested\n",
|
||||
" screenshot = await computer.interface.screenshot()\n",
|
||||
" return screenshot\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" else:\n",
|
||||
" print(f\"Unrecognized action: {action_type}\")"
|
||||
]
|
||||
@@ -173,43 +173,45 @@
|
||||
"source": [
|
||||
"async def cua_openai_loop():\n",
|
||||
" # Initialize the CUA computer instance (macOS sandbox)\n",
|
||||
" async with Computer(\n",
|
||||
" display=\"1024x768\",\n",
|
||||
" memory=\"4GB\",\n",
|
||||
" cpu=\"2\",\n",
|
||||
" os_type=\"macos\"\n",
|
||||
" ) as computer:\n",
|
||||
" async with Computer(display=\"1024x768\", memory=\"4GB\", cpu=\"2\", os_type=\"macos\") as computer:\n",
|
||||
" await computer.run()\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # Capture the initial screenshot\n",
|
||||
" screenshot = await computer.interface.screenshot()\n",
|
||||
" screenshot_base64 = base64.b64encode(screenshot).decode('utf-8')\n",
|
||||
" screenshot_base64 = base64.b64encode(screenshot).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
" # Initial request to start the loop\n",
|
||||
" response = openai.responses.create(\n",
|
||||
" model=\"computer-use-preview\",\n",
|
||||
" tools=[{\n",
|
||||
" \"type\": \"computer_use_preview\",\n",
|
||||
" \"display_width\": 1024,\n",
|
||||
" \"display_height\": 768,\n",
|
||||
" \"environment\": \"mac\"\n",
|
||||
" }],\n",
|
||||
" input=[\n",
|
||||
" { # type: ignore\n",
|
||||
" \"role\": \"user\", \n",
|
||||
" \"content\": [\n",
|
||||
" {\"type\": \"input_text\", \"text\": \"Open Safari, download and install Cursor.\"},\n",
|
||||
" {\"type\": \"input_image\", \"image_url\": f\"data:image/png;base64,{screenshot_base64}\"}\n",
|
||||
" ]\n",
|
||||
" tools=[\n",
|
||||
" {\n",
|
||||
" \"type\": \"computer_use_preview\",\n",
|
||||
" \"display_width\": 1024,\n",
|
||||
" \"display_height\": 768,\n",
|
||||
" \"environment\": \"mac\",\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" truncation=\"auto\"\n",
|
||||
" input=[\n",
|
||||
" { # type: ignore\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\"type\": \"input_text\", \"text\": \"Open Safari, download and install Cursor.\"},\n",
|
||||
" {\n",
|
||||
" \"type\": \"input_image\",\n",
|
||||
" \"image_url\": f\"data:image/png;base64,{screenshot_base64}\",\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" truncation=\"auto\",\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Continue the loop until no more computer_call actions\n",
|
||||
" while True:\n",
|
||||
" # Check for computer_call actions\n",
|
||||
" computer_calls = [item for item in response.output if item and item.type == \"computer_call\"]\n",
|
||||
" computer_calls = [\n",
|
||||
" item for item in response.output if item and item.type == \"computer_call\"\n",
|
||||
" ]\n",
|
||||
" if not computer_calls:\n",
|
||||
" print(\"No more computer calls. Loop complete.\")\n",
|
||||
" break\n",
|
||||
@@ -234,33 +236,38 @@
|
||||
"\n",
|
||||
" # Capture new screenshot after action\n",
|
||||
" new_screenshot = await computer.interface.screenshot()\n",
|
||||
" new_screenshot_base64 = base64.b64encode(new_screenshot).decode('utf-8')\n",
|
||||
" new_screenshot_base64 = base64.b64encode(new_screenshot).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
" # Send the screenshot back as computer_call_output\n",
|
||||
" response = openai.responses.create(\n",
|
||||
" model=\"computer-use-preview\",\n",
|
||||
" previous_response_id=response.id, # Link to previous response\n",
|
||||
" tools=[{\n",
|
||||
" \"type\": \"computer_use_preview\",\n",
|
||||
" \"display_width\": 1024,\n",
|
||||
" \"display_height\": 768,\n",
|
||||
" \"environment\": \"mac\"\n",
|
||||
" }],\n",
|
||||
" input=[{ # type: ignore\n",
|
||||
" \"type\": \"computer_call_output\",\n",
|
||||
" \"call_id\": last_call_id,\n",
|
||||
" \"acknowledged_safety_checks\": acknowledged_checks,\n",
|
||||
" \"output\": {\n",
|
||||
" \"type\": \"input_image\",\n",
|
||||
" \"image_url\": f\"data:image/png;base64,{new_screenshot_base64}\"\n",
|
||||
" tools=[\n",
|
||||
" {\n",
|
||||
" \"type\": \"computer_use_preview\",\n",
|
||||
" \"display_width\": 1024,\n",
|
||||
" \"display_height\": 768,\n",
|
||||
" \"environment\": \"mac\",\n",
|
||||
" }\n",
|
||||
" }],\n",
|
||||
" truncation=\"auto\"\n",
|
||||
" ],\n",
|
||||
" input=[\n",
|
||||
" { # type: ignore\n",
|
||||
" \"type\": \"computer_call_output\",\n",
|
||||
" \"call_id\": last_call_id,\n",
|
||||
" \"acknowledged_safety_checks\": acknowledged_checks,\n",
|
||||
" \"output\": {\n",
|
||||
" \"type\": \"input_image\",\n",
|
||||
" \"image_url\": f\"data:image/png;base64,{new_screenshot_base64}\",\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" truncation=\"auto\",\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # End the session\n",
|
||||
" await computer.stop()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Run the loop\n",
|
||||
"await cua_openai_loop()"
|
||||
]
|
||||
|
||||
@@ -128,12 +128,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = ComputerAgent(\n",
|
||||
" tools=[computer],\n",
|
||||
" model=\"openai/computer-use-preview\",\n",
|
||||
" save_trajectory=True,\n",
|
||||
" only_n_most_recent_images=3,\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
" )\n",
|
||||
" tools=[computer],\n",
|
||||
" model=\"openai/computer-use-preview\",\n",
|
||||
" save_trajectory=True,\n",
|
||||
" only_n_most_recent_images=3,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"for i, task in enumerate(tasks):\n",
|
||||
|
||||
@@ -76,13 +76,18 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY') or input('Enter your OPENROUTER_API_KEY: ').strip()\n",
|
||||
"os.environ['OPENROUTER_API_KEY'] = OPENROUTER_API_KEY\n",
|
||||
"OPENROUTER_API_KEY = (\n",
|
||||
" os.getenv(\"OPENROUTER_API_KEY\") or input(\"Enter your OPENROUTER_API_KEY: \").strip()\n",
|
||||
")\n",
|
||||
"os.environ[\"OPENROUTER_API_KEY\"] = OPENROUTER_API_KEY\n",
|
||||
"\n",
|
||||
"# Optional: if planning model uses OpenAI provider\n",
|
||||
"OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') or input('(Optional) Enter your OPENAI_API_KEY (press Enter to skip): ').strip()\n",
|
||||
"OPENAI_API_KEY = (\n",
|
||||
" os.getenv(\"OPENAI_API_KEY\")\n",
|
||||
" or input(\"(Optional) Enter your OPENAI_API_KEY (press Enter to skip): \").strip()\n",
|
||||
")\n",
|
||||
"if OPENAI_API_KEY:\n",
|
||||
" os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY"
|
||||
" os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -104,24 +109,26 @@
|
||||
"from computer import Computer\n",
|
||||
"from agent import ComputerAgent\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def main():\n",
|
||||
" # Launch & connect to a Docker container running the Computer Server\n",
|
||||
" async with Computer(\n",
|
||||
" os_type='linux',\n",
|
||||
" provider_type='docker',\n",
|
||||
" image='trycua/cua-ubuntu:latest',\n",
|
||||
" name='my-cua-container'\n",
|
||||
" os_type=\"linux\",\n",
|
||||
" provider_type=\"docker\",\n",
|
||||
" image=\"trycua/cua-ubuntu:latest\",\n",
|
||||
" name=\"my-cua-container\",\n",
|
||||
" ) as computer:\n",
|
||||
" agent = ComputerAgent(\n",
|
||||
" model='openrouter/z-ai/glm-4.5v+openai/gpt-5-nano',\n",
|
||||
" model=\"openrouter/z-ai/glm-4.5v+openai/gpt-5-nano\",\n",
|
||||
" tools=[computer],\n",
|
||||
" trajectory_dir='trajectories' # Save agent trajectory (screenshots, api calls)\n",
|
||||
" trajectory_dir=\"trajectories\", # Save agent trajectory (screenshots, api calls)\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Simple task to verify end-to-end\n",
|
||||
" async for _ in agent.run('Open a browser and go to example.com'):\n",
|
||||
" async for _ in agent.run(\"Open a browser and go to example.com\"):\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"asyncio.run(main())"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -49,10 +49,8 @@
|
||||
"# Get API key and container name from environment or prompt user\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"cua_api_key = os.getenv(\"CUA_API_KEY\") or \\\n",
|
||||
" input(\"Enter your Cua API Key: \")\n",
|
||||
"container_name = os.getenv(\"CONTAINER_NAME\") or \\\n",
|
||||
" input(\"Enter your Cloud Container name: \")"
|
||||
"cua_api_key = os.getenv(\"CUA_API_KEY\") or input(\"Enter your Cua API Key: \")\n",
|
||||
"container_name = os.getenv(\"CONTAINER_NAME\") or input(\"Enter your Cloud Container name: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -68,7 +66,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os_type = input(\"Enter the OS type of your sandbox (linux/macos) [default: linux]: \").lower() or \"linux\""
|
||||
"os_type = (\n",
|
||||
" input(\"Enter the OS type of your sandbox (linux/macos) [default: linux]: \").lower() or \"linux\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -121,7 +121,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"await computer.run() # Initialize the computer first\n",
|
||||
"await computer.run() # Initialize the computer first\n",
|
||||
"\n",
|
||||
"screenshot = await computer.interface.screenshot()\n",
|
||||
"\n",
|
||||
@@ -159,10 +159,10 @@
|
||||
" os_type=\"linux\",\n",
|
||||
" provider_type=\"docker\",\n",
|
||||
" image=\"trycua/cua-ubuntu:latest\",\n",
|
||||
" name=\"my-cua-container\"\n",
|
||||
" name=\"my-cua-container\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"await computer.run() # Launch & connect to Docker container"
|
||||
"await computer.run() # Launch & connect to Docker container"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -330,12 +330,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"computer = Computer(\n",
|
||||
" display=\"1024x768\",\n",
|
||||
" memory=\"8GB\",\n",
|
||||
" cpu=\"4\",\n",
|
||||
" os_type=\"macos\"\n",
|
||||
")\n",
|
||||
"computer = Computer(display=\"1024x768\", memory=\"8GB\", cpu=\"4\", os_type=\"macos\")\n",
|
||||
"\n",
|
||||
"await computer.run()"
|
||||
]
|
||||
@@ -513,7 +508,7 @@
|
||||
" memory=\"4GB\",\n",
|
||||
" cpu=\"2\",\n",
|
||||
" os_type=\"macos\",\n",
|
||||
" shared_directories=[\"/absolute/path/to/directory\"]\n",
|
||||
" shared_directories=[\"/absolute/path/to/directory\"],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -533,11 +528,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"computer = Computer(\n",
|
||||
" display=\"1024x768\",\n",
|
||||
" memory=\"4GB\",\n",
|
||||
" cpu=\"2\",\n",
|
||||
" os_type=\"macos\",\n",
|
||||
" use_host_computer_server=True\n",
|
||||
" display=\"1024x768\", memory=\"4GB\", cpu=\"2\", os_type=\"macos\", use_host_computer_server=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -563,12 +554,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"async with Computer(\n",
|
||||
" display=\"1024x768\",\n",
|
||||
" memory=\"4GB\",\n",
|
||||
" cpu=\"2\",\n",
|
||||
" os_type=\"macos\"\n",
|
||||
") as computer:\n",
|
||||
"async with Computer(display=\"1024x768\", memory=\"4GB\", cpu=\"2\", os_type=\"macos\") as computer:\n",
|
||||
" await computer.run()\n",
|
||||
" res = await computer.interface.run_command(\"ls -a\")\n",
|
||||
"\n",
|
||||
|
||||
@@ -40,10 +40,10 @@
|
||||
" os_type=\"linux\",\n",
|
||||
" provider_type=\"docker\",\n",
|
||||
" image=\"trycua/cua-ubuntu:latest\",\n",
|
||||
" name=\"my-cua-container\"\n",
|
||||
" name=\"my-cua-container\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"await computer.run() # Launch & connect to Docker container"
|
||||
"await computer.run() # Launch & connect to Docker container"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -81,12 +81,12 @@
|
||||
" instructions=instructions,\n",
|
||||
" callbacks=[LoggingCallback(level=logging.INFO)],\n",
|
||||
")\n",
|
||||
"messages = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"Open the settings and turn on dark mode.\"}\n",
|
||||
"]\n",
|
||||
"messages = [{\"role\": \"user\", \"content\": \"Open the settings and turn on dark mode.\"}]\n",
|
||||
"\n",
|
||||
"# In notebooks, you may want to consume the async generator\n",
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def run_once():\n",
|
||||
" async for chunk in agent.run(messages):\n",
|
||||
" # Print any assistant text outputs\n",
|
||||
@@ -96,7 +96,8 @@
|
||||
" if c.get(\"text\"):\n",
|
||||
" print(c.get(\"text\"))\n",
|
||||
"\n",
|
||||
"await run_once()\n"
|
||||
"\n",
|
||||
"await run_once()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -127,11 +128,12 @@
|
||||
" return \"0.00%\"\n",
|
||||
" return f\"{(numerator/denominator)*100:.2f}%\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"agent_with_tool = ComputerAgent(\n",
|
||||
" model=\"openai/computer-use-preview\",\n",
|
||||
" tools=[computer, calculate_percentage],\n",
|
||||
" instructions=\"When doing math, prefer the `calculate_percentage` tool when relevant.\",\n",
|
||||
")\n"
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -148,7 +150,18 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "from agent.callbacks import ImageRetentionCallback, TrajectorySaverCallback\n\nagent_with_callbacks = ComputerAgent(\n model=\"anthropic/claude-sonnet-4-5-20250929\",\n tools=[computer],\n callbacks=[\n ImageRetentionCallback(only_n_most_recent_images=3),\n TrajectorySaverCallback(\"./trajectories\"),\n ],\n)\n"
|
||||
"source": [
|
||||
"from agent.callbacks import ImageRetentionCallback, TrajectorySaverCallback\n",
|
||||
"\n",
|
||||
"agent_with_callbacks = ComputerAgent(\n",
|
||||
" model=\"anthropic/claude-sonnet-4-5-20250929\",\n",
|
||||
" tools=[computer],\n",
|
||||
" callbacks=[\n",
|
||||
" ImageRetentionCallback(only_n_most_recent_images=3),\n",
|
||||
" TrajectorySaverCallback(\"./trajectories\"),\n",
|
||||
" ],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Install dependencies if needed\n",
|
||||
"# !uv venv \n",
|
||||
"# !uv venv\n",
|
||||
"# !source .venv/bin/activate\n",
|
||||
"# !uv sync"
|
||||
]
|
||||
@@ -42,14 +42,14 @@
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Load environment variables from ../.env\n",
|
||||
"load_dotenv(dotenv_path='../.env')\n",
|
||||
"load_dotenv(dotenv_path=\"../.env\")\n",
|
||||
"\n",
|
||||
"# Required environment variables:\n",
|
||||
"# - HUD_API_KEY (for HUD access)\n",
|
||||
"# - ANTHROPIC_API_KEY (for Claude models)\n",
|
||||
"# - OPENAI_API_KEY (for OpenAI models)\n",
|
||||
"assert os.getenv('HUD_API_KEY') is not None\n",
|
||||
"assert os.getenv('ANTHROPIC_API_KEY') is not None or os.getenv('OPENAI_API_KEY') is not None\n",
|
||||
"assert os.getenv(\"HUD_API_KEY\") is not None\n",
|
||||
"assert os.getenv(\"ANTHROPIC_API_KEY\") is not None or os.getenv(\"OPENAI_API_KEY\") is not None\n",
|
||||
"\n",
|
||||
"from pprint import pprint"
|
||||
]
|
||||
@@ -76,7 +76,7 @@
|
||||
"await run_single_task(\n",
|
||||
" dataset=\"hud-evals/OSWorld-Verified\",\n",
|
||||
" model=\"openai/computer-use-preview+openai/gpt-5\", # or any supported model string\n",
|
||||
" task_id=155 # open last tab task (easy)\n",
|
||||
" task_id=155, # open last tab task (easy)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -100,12 +100,12 @@
|
||||
"job_name = f\"osworld-test-{str(uuid.uuid4())[:4]}\"\n",
|
||||
"\n",
|
||||
"results = await run_full_dataset(\n",
|
||||
" dataset=\"hud-evals/OSWorld-Verified\", # You can also pass a Dataset or a list[dict]\n",
|
||||
" job_name=job_name, # Optional; defaults to a timestamp for custom datasets\n",
|
||||
" model=\"openai/computer-use-preview\", # Or any supported model string\n",
|
||||
" max_concurrent=20, # Tune to your infra\n",
|
||||
" max_steps=50, # Safety cap per task\n",
|
||||
" split=\"train[:3]\" # Limit to just 3 tasks\n",
|
||||
" dataset=\"hud-evals/OSWorld-Verified\", # You can also pass a Dataset or a list[dict]\n",
|
||||
" job_name=job_name, # Optional; defaults to a timestamp for custom datasets\n",
|
||||
" model=\"openai/computer-use-preview\", # Or any supported model string\n",
|
||||
" max_concurrent=20, # Tune to your infra\n",
|
||||
" max_steps=50, # Safety cap per task\n",
|
||||
" split=\"train[:3]\", # Limit to just 3 tasks\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# results is a list from hud.datasets.run_dataset; inspect/aggregate as needed\n",
|
||||
@@ -133,7 +133,7 @@
|
||||
"models_to_test = [\n",
|
||||
" \"openai/computer-use-preview+anthropic/claude-opus-4-20250514\",\n",
|
||||
"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"for model in models_to_test:\n",
|
||||
" # Full dataset evaluation (runs via HUD's run_dataset under the hood)\n",
|
||||
@@ -142,12 +142,12 @@
|
||||
"\n",
|
||||
" results = await run_full_dataset(\n",
|
||||
" dataset=\"hud-evals/OSWorld-Verified\",\n",
|
||||
" job_name=job_name, \n",
|
||||
" job_name=job_name,\n",
|
||||
" model=model,\n",
|
||||
" max_concurrent=20, \n",
|
||||
" max_concurrent=20,\n",
|
||||
" max_steps=75,\n",
|
||||
" trajectory_dir=f\"trajectories/osworld_{job_uuid}\",\n",
|
||||
" only_n_most_recent_images=3\n",
|
||||
" only_n_most_recent_images=3,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # results is a list from hud.datasets.run_dataset; inspect/aggregate as needed\n",
|
||||
|
||||
@@ -73,11 +73,12 @@
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"from pathlib import Path\n",
|
||||
"if not Path('.env').exists():\n",
|
||||
" Path('.env').write_text(ENV_TEMPLATE)\n",
|
||||
" print('A .env file was created! Fill in the empty values you need.')\n",
|
||||
"\n",
|
||||
"if not Path(\".env\").exists():\n",
|
||||
" Path(\".env\").write_text(ENV_TEMPLATE)\n",
|
||||
" print(\"A .env file was created! Fill in the empty values you need.\")\n",
|
||||
"else:\n",
|
||||
" print('.env already exists')\n"
|
||||
" print(\".env already exists\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -90,10 +91,11 @@
|
||||
"# Load .env into environment\n",
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"load_dotenv(dotenv_path='.env', override=True)\n",
|
||||
"print('OPENAI_API_KEY set:', bool(os.getenv('OPENAI_API_KEY')))\n",
|
||||
"print('ANTHROPIC_API_KEY set:', bool(os.getenv('ANTHROPIC_API_KEY')))\n",
|
||||
"print('OLLAMA_API_BASE:', os.getenv('OLLAMA_API_BASE', 'http://localhost:11434'))\n"
|
||||
"\n",
|
||||
"load_dotenv(dotenv_path=\".env\", override=True)\n",
|
||||
"print(\"OPENAI_API_KEY set:\", bool(os.getenv(\"OPENAI_API_KEY\")))\n",
|
||||
"print(\"ANTHROPIC_API_KEY set:\", bool(os.getenv(\"ANTHROPIC_API_KEY\")))\n",
|
||||
"print(\"OLLAMA_API_BASE:\", os.getenv(\"OLLAMA_API_BASE\", \"http://localhost:11434\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -143,15 +145,11 @@
|
||||
"from computer import Computer, VMProviderType\n",
|
||||
"import webbrowser\n",
|
||||
"\n",
|
||||
"computer = Computer(\n",
|
||||
" os_type=\"linux\",\n",
|
||||
" provider_type=VMProviderType.DOCKER,\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
")\n",
|
||||
"computer = Computer(os_type=\"linux\", provider_type=VMProviderType.DOCKER, verbosity=logging.INFO)\n",
|
||||
"await computer.run()\n",
|
||||
"\n",
|
||||
"# Optional: open the VNC page in your browser\n",
|
||||
"webbrowser.open('http://localhost:8006/', new=0, autoraise=True)\n"
|
||||
"webbrowser.open(\"http://localhost:8006/\", new=0, autoraise=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -181,16 +179,16 @@
|
||||
"agent_all_in_one = ComputerAgent(\n",
|
||||
" model=\"ollama/blaifa/InternVL3_5:8b\",\n",
|
||||
" tools=[computer],\n",
|
||||
" trajectory_dir=str(Path('trajectories')),\n",
|
||||
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
||||
" only_n_most_recent_images=3,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" # instructions=\"You are a helpful assistant.\" # Editable instructions for prompt engineering\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print('Running all-in-one Ollama CUA model...')\n",
|
||||
"print(\"Running all-in-one Ollama CUA model...\")\n",
|
||||
"async for _ in agent_all_in_one.run(\"Open the web browser and go to example.com\"):\n",
|
||||
" pass\n",
|
||||
"print('✅ Done')\n"
|
||||
"print(\"✅ Done\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -221,15 +219,15 @@
|
||||
"agent_composed = ComputerAgent(\n",
|
||||
" model=\"anthropic/claude-3-5-sonnet-20241022+ollama/gemma3:4b\",\n",
|
||||
" tools=[computer],\n",
|
||||
" trajectory_dir='trajectories',\n",
|
||||
" trajectory_dir=\"trajectories\",\n",
|
||||
" only_n_most_recent_images=3,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print('Running composed agent (OpenAI grounding + Ollama VLM)...')\n",
|
||||
"print(\"Running composed agent (OpenAI grounding + Ollama VLM)...\")\n",
|
||||
"async for _ in agent_composed.run(\"Open a text editor and type: Hello from composed model!\"):\n",
|
||||
" pass\n",
|
||||
"print('✅ Done')\n"
|
||||
"print(\"✅ Done\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -79,6 +79,7 @@
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.path.exists(\".env\"):\n",
|
||||
" open(\".env\", \"w\").write(ENV_TEMPLATE)\n",
|
||||
" print(\"A .env file was created! Fill in the empty values.\")"
|
||||
@@ -103,7 +104,8 @@
|
||||
"# HUD requires the .env file to be in the same directory\n",
|
||||
"\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"load_dotenv(dotenv_path='.env', override=True)\n",
|
||||
"\n",
|
||||
"load_dotenv(dotenv_path=\".env\", override=True)\n",
|
||||
"\n",
|
||||
"assert os.getenv(\"HUD_API_KEY\")"
|
||||
]
|
||||
@@ -124,7 +126,22 @@
|
||||
"id": "cd4393b0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "import logging\nfrom pathlib import Path\nfrom agent import ComputerAgent\n\n# Here you can set the model and tools for your agent.\n# Computer use models: https://cua.ai/docs/agent-sdk/supported-agents/computer-use-agents\n# Composed agent models: https://cua.ai/docs/agent-sdk/supported-agents/composed-agents\n# Custom tools: https://cua.ai/docs/agent-sdk/custom-tools\nagent_config = {\n \"model\": \"openai/computer-use-preview\",\n \"trajectory_dir\": str(Path(\"trajectories\")),\n \"only_n_most_recent_images\": 3,\n \"verbosity\": logging.INFO\n}"
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"from pathlib import Path\n",
|
||||
"from agent import ComputerAgent\n",
|
||||
"\n",
|
||||
"# Here you can set the model and tools for your agent.\n",
|
||||
"# Computer use models: https://cua.ai/docs/agent-sdk/supported-agents/computer-use-agents\n",
|
||||
"# Composed agent models: https://cua.ai/docs/agent-sdk/supported-agents/composed-agents\n",
|
||||
"# Custom tools: https://cua.ai/docs/agent-sdk/custom-tools\n",
|
||||
"agent_config = {\n",
|
||||
" \"model\": \"openai/computer-use-preview\",\n",
|
||||
" \"trajectory_dir\": str(Path(\"trajectories\")),\n",
|
||||
" \"only_n_most_recent_images\": 3,\n",
|
||||
" \"verbosity\": logging.INFO,\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -157,14 +174,10 @@
|
||||
"import webbrowser\n",
|
||||
"\n",
|
||||
"# Connect to your existing cloud container\n",
|
||||
"computer = Computer(\n",
|
||||
" os_type=\"linux\",\n",
|
||||
" provider_type=VMProviderType.DOCKER,\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
")\n",
|
||||
"computer = Computer(os_type=\"linux\", provider_type=VMProviderType.DOCKER, verbosity=logging.INFO)\n",
|
||||
"await computer.run()\n",
|
||||
"\n",
|
||||
"agent_config[\"tools\"] = [ computer ]\n",
|
||||
"agent_config[\"tools\"] = [computer]\n",
|
||||
"\n",
|
||||
"webbrowser.open(\"http://localhost:8006/\", new=0, autoraise=True)"
|
||||
]
|
||||
@@ -189,9 +202,7 @@
|
||||
"# Create agent\n",
|
||||
"agent = ComputerAgent(**agent_config)\n",
|
||||
"\n",
|
||||
"tasks = [\n",
|
||||
" \"Open the web browser and search for a repository named trycua/cua on GitHub.\"\n",
|
||||
"]\n",
|
||||
"tasks = [\"Open the web browser and search for a repository named trycua/cua on GitHub.\"]\n",
|
||||
"\n",
|
||||
"for i, task in enumerate(tasks):\n",
|
||||
" print(f\"\\nExecuting task {i}/{len(tasks)}: {task}\")\n",
|
||||
@@ -218,7 +229,29 @@
|
||||
"id": "6bf0887e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "import uuid\nfrom pprint import pprint\nfrom agent.integrations.hud import run_full_dataset\n\njob_name = f\"osworld-test-{str(uuid.uuid4())[:4]}\"\n\n# Full dataset evaluation (runs via HUD's run_dataset under the hood)\n# See the documentation here: https://cua.ai/docs/agent-sdk/integrations/hud#running-a-full-dataset\nresults = await run_full_dataset(\n dataset=\"ddupont/OSWorld-Tiny-Public\",\n job_name=job_name,\n **agent_config,\n max_concurrent=20,\n max_steps=50,\n #split=\"train[:5]\"\n)\n\n# results is a list from hud.datasets.run_dataset; inspect/aggregate as needed\nprint(f\"Job: {job_name}\")\nprint(f\"Total results: {len(results)}\")\npprint(results[:3])"
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"from pprint import pprint\n",
|
||||
"from agent.integrations.hud import run_full_dataset\n",
|
||||
"\n",
|
||||
"job_name = f\"osworld-test-{str(uuid.uuid4())[:4]}\"\n",
|
||||
"\n",
|
||||
"# Full dataset evaluation (runs via HUD's run_dataset under the hood)\n",
|
||||
"# See the documentation here: https://cua.ai/docs/agent-sdk/integrations/hud#running-a-full-dataset\n",
|
||||
"results = await run_full_dataset(\n",
|
||||
" dataset=\"ddupont/OSWorld-Tiny-Public\",\n",
|
||||
" job_name=job_name,\n",
|
||||
" **agent_config,\n",
|
||||
" max_concurrent=20,\n",
|
||||
" max_steps=50,\n",
|
||||
" # split=\"train[:5]\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# results is a list from hud.datasets.run_dataset; inspect/aggregate as needed\n",
|
||||
"print(f\"Job: {job_name}\")\n",
|
||||
"print(f\"Total results: {len(results)}\")\n",
|
||||
"pprint(results[:3])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
||||
@@ -78,6 +78,7 @@
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.path.exists(\".env\"):\n",
|
||||
" open(\".env\", \"w\").write(ENV_TEMPLATE)\n",
|
||||
" print(\"A .env file was created! Fill in the empty values.\")"
|
||||
@@ -102,7 +103,8 @@
|
||||
"# HUD requires the .env file to be in the same directory\n",
|
||||
"\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"load_dotenv(dotenv_path='.env', override=True)\n",
|
||||
"\n",
|
||||
"load_dotenv(dotenv_path=\".env\", override=True)\n",
|
||||
"\n",
|
||||
"assert os.getenv(\"CUA_API_KEY\")\n",
|
||||
"assert os.getenv(\"CUA_CONTAINER_NAME\")\n",
|
||||
@@ -125,7 +127,22 @@
|
||||
"id": "cd4393b0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "import logging\nfrom pathlib import Path\nfrom agent import ComputerAgent\n\n# Here you can set the model and tools for your agent.\n# Computer use models: https://cua.ai/docs/agent-sdk/supported-agents/computer-use-agents\n# Composed agent models: https://cua.ai/docs/agent-sdk/supported-agents/composed-agents\n# Custom tools: https://cua.ai/docs/agent-sdk/custom-tools\nagent_config = {\n \"model\": \"openai/computer-use-preview\",\n \"trajectory_dir\": str(Path(\"trajectories\")),\n \"only_n_most_recent_images\": 3,\n \"verbosity\": logging.INFO\n}"
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"from pathlib import Path\n",
|
||||
"from agent import ComputerAgent\n",
|
||||
"\n",
|
||||
"# Here you can set the model and tools for your agent.\n",
|
||||
"# Computer use models: https://cua.ai/docs/agent-sdk/supported-agents/computer-use-agents\n",
|
||||
"# Composed agent models: https://cua.ai/docs/agent-sdk/supported-agents/composed-agents\n",
|
||||
"# Custom tools: https://cua.ai/docs/agent-sdk/custom-tools\n",
|
||||
"agent_config = {\n",
|
||||
" \"model\": \"openai/computer-use-preview\",\n",
|
||||
" \"trajectory_dir\": str(Path(\"trajectories\")),\n",
|
||||
" \"only_n_most_recent_images\": 3,\n",
|
||||
" \"verbosity\": logging.INFO,\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -158,10 +175,10 @@
|
||||
" provider_type=VMProviderType.CLOUD,\n",
|
||||
" name=os.getenv(\"CUA_CONTAINER_NAME\") or \"\",\n",
|
||||
" api_key=os.getenv(\"CUA_API_KEY\"),\n",
|
||||
" verbosity=logging.INFO\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent_config[\"tools\"] = [ computer ]"
|
||||
"agent_config[\"tools\"] = [computer]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -180,9 +197,7 @@
|
||||
"# Create agent\n",
|
||||
"agent = ComputerAgent(**agent_config)\n",
|
||||
"\n",
|
||||
"tasks = [\n",
|
||||
" \"Open the web browser and search for a repository named trycua/cua on GitHub.\"\n",
|
||||
"]\n",
|
||||
"tasks = [\"Open the web browser and search for a repository named trycua/cua on GitHub.\"]\n",
|
||||
"\n",
|
||||
"for i, task in enumerate(tasks):\n",
|
||||
" print(f\"\\nExecuting task {i}/{len(tasks)}: {task}\")\n",
|
||||
@@ -209,7 +224,29 @@
|
||||
"id": "6bf0887e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "import uuid\nfrom pprint import pprint\nfrom agent.integrations.hud import run_full_dataset\n\njob_name = f\"osworld-test-{str(uuid.uuid4())[:4]}\"\n\n# Full dataset evaluation (runs via HUD's run_dataset under the hood)\n# See the documentation here: https://cua.ai/docs/agent-sdk/integrations/hud#running-a-full-dataset\nresults = await run_full_dataset(\n dataset=\"ddupont/OSWorld-Tiny-Public\",\n job_name=job_name,\n **agent_config,\n max_concurrent=20,\n max_steps=50,\n #split=\"train[:5]\"\n)\n\n# results is a list from hud.datasets.run_dataset; inspect/aggregate as needed\nprint(f\"Job: {job_name}\")\nprint(f\"Total results: {len(results)}\")\npprint(results[:3])"
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"from pprint import pprint\n",
|
||||
"from agent.integrations.hud import run_full_dataset\n",
|
||||
"\n",
|
||||
"job_name = f\"osworld-test-{str(uuid.uuid4())[:4]}\"\n",
|
||||
"\n",
|
||||
"# Full dataset evaluation (runs via HUD's run_dataset under the hood)\n",
|
||||
"# See the documentation here: https://cua.ai/docs/agent-sdk/integrations/hud#running-a-full-dataset\n",
|
||||
"results = await run_full_dataset(\n",
|
||||
" dataset=\"ddupont/OSWorld-Tiny-Public\",\n",
|
||||
" job_name=job_name,\n",
|
||||
" **agent_config,\n",
|
||||
" max_concurrent=20,\n",
|
||||
" max_steps=50,\n",
|
||||
" # split=\"train[:5]\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# results is a list from hud.datasets.run_dataset; inspect/aggregate as needed\n",
|
||||
"print(f\"Job: {job_name}\")\n",
|
||||
"print(f\"Total results: {len(results)}\")\n",
|
||||
"pprint(results[:3])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
||||
61
uv.lock
generated
61
uv.lock
generated
@@ -861,7 +861,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "cua-agent"
|
||||
version = "0.4.37"
|
||||
version = "0.4.39"
|
||||
source = { editable = "libs/python/agent" }
|
||||
dependencies = [
|
||||
{ name = "aiohttp" },
|
||||
@@ -1015,7 +1015,7 @@ provides-extras = ["openai", "anthropic", "qwen", "omni", "uitars", "uitars-mlx"
|
||||
|
||||
[[package]]
|
||||
name = "cua-computer"
|
||||
version = "0.4.11"
|
||||
version = "0.4.12"
|
||||
source = { editable = "libs/python/computer" }
|
||||
dependencies = [
|
||||
{ name = "aiohttp" },
|
||||
@@ -1059,13 +1059,12 @@ provides-extras = ["lume", "lumier", "ui", "all"]
|
||||
|
||||
[[package]]
|
||||
name = "cua-computer-server"
|
||||
version = "0.1.28"
|
||||
version = "0.1.30"
|
||||
source = { editable = "libs/python/computer-server" }
|
||||
dependencies = [
|
||||
{ name = "aiohttp" },
|
||||
{ name = "fastapi" },
|
||||
{ name = "pillow" },
|
||||
{ name = "pip-system-certs", marker = "sys_platform == 'win32'" },
|
||||
{ name = "pyautogui" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "pynput" },
|
||||
@@ -1073,6 +1072,7 @@ dependencies = [
|
||||
{ name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" },
|
||||
{ name = "pyobjc-framework-quartz", marker = "sys_platform == 'darwin'" },
|
||||
{ name = "pyperclip" },
|
||||
{ name = "python-certifi-win32", marker = "sys_platform == 'win32'" },
|
||||
{ name = "python-xlib", marker = "sys_platform == 'linux'" },
|
||||
{ name = "pywin32", marker = "sys_platform == 'win32'" },
|
||||
{ name = "pywinctl" },
|
||||
@@ -1098,7 +1098,6 @@ requires-dist = [
|
||||
{ name = "aiohttp", specifier = ">=3.9.1" },
|
||||
{ name = "fastapi", specifier = ">=0.111.0" },
|
||||
{ name = "pillow", specifier = ">=10.2.0" },
|
||||
{ name = "pip-system-certs", marker = "sys_platform == 'win32'" },
|
||||
{ name = "pyautogui", specifier = ">=0.9.54" },
|
||||
{ name = "pydantic", specifier = ">=2.0.0" },
|
||||
{ name = "pynput", specifier = ">=1.8.1" },
|
||||
@@ -1109,6 +1108,7 @@ requires-dist = [
|
||||
{ name = "pyobjc-framework-quartz", marker = "sys_platform == 'darwin'", specifier = ">=10.1" },
|
||||
{ name = "pyobjc-framework-quartz", marker = "extra == 'macos'", specifier = ">=10.1" },
|
||||
{ name = "pyperclip", specifier = ">=1.9.0" },
|
||||
{ name = "python-certifi-win32", marker = "sys_platform == 'win32'" },
|
||||
{ name = "python-xlib", marker = "sys_platform == 'linux'", specifier = ">=0.33" },
|
||||
{ name = "python-xlib", marker = "extra == 'linux'", specifier = ">=0.33" },
|
||||
{ name = "pywin32", marker = "sys_platform == 'win32'", specifier = ">=310" },
|
||||
@@ -1847,6 +1847,8 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" },
|
||||
@@ -1856,6 +1858,8 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" },
|
||||
]
|
||||
|
||||
@@ -4330,27 +4334,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f3/7e/b623008460c09a0cb38263c93b828c666493caee2eb34ff67f778b87e58c/pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e", size = 2424803, upload-time = "2025-07-01T09:15:15.695Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pip"
|
||||
version = "25.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fe/6e/74a3f0179a4a73a53d66ce57fdb4de0080a8baa1de0063de206d6167acc2/pip-25.3.tar.gz", hash = "sha256:8d0538dbbd7babbd207f261ed969c65de439f6bc9e5dbd3b3b9a77f25d95f343", size = 1803014, upload-time = "2025-10-25T00:55:41.394Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/44/3c/d717024885424591d5376220b5e836c2d5293ce2011523c9de23ff7bf068/pip-25.3-py3-none-any.whl", hash = "sha256:9655943313a94722b7774661c21049070f6bbb0a1516bf02f7c8d5d9201514cd", size = 1778622, upload-time = "2025-10-25T00:55:39.247Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pip-system-certs"
|
||||
version = "5.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pip", marker = "sys_platform == 'win32'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7d/6a/563b05a4f6c9ddc205c98bb413e74221368efb98b8fb9cca96b578b8930c/pip_system_certs-5.3.tar.gz", hash = "sha256:19c8bf9957bcce7d69c4dbc2d0b2ef13de1984d53f50a59012e6dbbad0af67c6", size = 6395, upload-time = "2025-10-16T06:14:55.217Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9f/57/752b63c609affae8f26ae0f1d1103d6ea7e707ad45943f62f7422936071d/pip_system_certs-5.3-py3-none-any.whl", hash = "sha256:3fbb5de62e374a99b688b1ad06e64ee5c4aeb633ef23e3a677d32e3e84fd863c", size = 6896, upload-time = "2025-10-16T06:14:54.072Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "platformdirs"
|
||||
version = "4.5.0"
|
||||
@@ -7479,6 +7462,19 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/74/79/3323f08c98b9a5b726303b68babdd26cf4fe710709b7c61c96e6bb4f3d10/python_bidi-0.6.6-cp313-cp313-win_amd64.whl", hash = "sha256:63f7a9eaec31078e7611ab958b6e18e796c05b63ca50c1f7298311dc1e15ac3e", size = 159973, upload-time = "2025-02-18T21:43:10.431Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-certifi-win32"
|
||||
version = "1.6.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi", marker = "sys_platform == 'win32'" },
|
||||
{ name = "setuptools-scm", marker = "sys_platform == 'win32'" },
|
||||
{ name = "wrapt", marker = "sys_platform == 'win32'" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/16/c5/9c455ba848b14adce70c0176106fad190b7854acdc120cf9e72af7b9ac2d/python_certifi_win32-1.6.1-py2.py3-none-any.whl", hash = "sha256:508fd4fb1730cad2d9dada061df737650c8cfaa205d64657faa4cc6a55384402", size = 7256, upload-time = "2022-07-02T22:13:55.87Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.9.0.post0"
|
||||
@@ -8165,6 +8161,19 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "setuptools-scm"
|
||||
version = "9.2.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "packaging", marker = "sys_platform == 'win32'" },
|
||||
{ name = "setuptools", marker = "sys_platform == 'win32'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7b/b1/19587742aad604f1988a8a362e660e8c3ac03adccdb71c96d86526e5eb62/setuptools_scm-9.2.2.tar.gz", hash = "sha256:1c674ab4665686a0887d7e24c03ab25f24201c213e82ea689d2f3e169ef7ef57", size = 203385, upload-time = "2025-10-19T22:08:05.608Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/ea/ac2bf868899d0d2e82ef72d350d97a846110c709bacf2d968431576ca915/setuptools_scm-9.2.2-py3-none-any.whl", hash = "sha256:30e8f84d2ab1ba7cb0e653429b179395d0c33775d54807fc5f1dd6671801aef7", size = 62975, upload-time = "2025-10-19T22:08:04.007Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shapely"
|
||||
version = "2.1.2"
|
||||
|
||||
Reference in New Issue
Block a user