mirror of
https://github.com/trycua/computer.git
synced 2026-01-04 12:30:08 -06:00
557 lines
14 KiB
Plaintext
557 lines
14 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Agent\n",
|
|
"\n",
|
|
"This notebook demonstrates how to use Cua's Agent to run workflows in virtual sandboxes, either using C/ua Cloud Containers or local VMs on Apple Silicon Macs."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Installation"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip uninstall -y cua-agent"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install \"cua-agent[all]\"\n",
|
|
"\n",
|
|
"# Or install individual agent loops:\n",
|
|
"# !pip install cua-agent[openai]\n",
|
|
"# !pip install cua-agent[anthropic]\n",
|
|
"# !pip install cua-agent[uitars]\n",
|
|
"# !pip install cua-agent[omni]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# If locally installed, use this instead:\n",
|
|
"import os\n",
|
|
"\n",
|
|
"os.chdir('../libs/agent')\n",
|
|
"!poetry install\n",
|
|
"!poetry build\n",
|
|
"\n",
|
|
"!pip uninstall cua-agent -y\n",
|
|
"!pip install ./dist/cua_agent-0.1.0-py3-none-any.whl --force-reinstall"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Initialize a Computer Agent"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Agent allows you to run an agentic workflow in virtual sandbox instances. You can choose between cloud containers or local VMs."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from computer import Computer, VMProviderType\n",
|
|
"from agent import ComputerAgent, LLM, AgentLoop, LLMProvider"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"\n",
|
|
"# Get API keys from environment or prompt user\n",
|
|
"anthropic_key = os.getenv(\"ANTHROPIC_API_KEY\") or \\\n",
|
|
" input(\"Enter your Anthropic API key: \")\n",
|
|
"openai_key = os.getenv(\"OPENAI_API_KEY\") or \\\n",
|
|
" input(\"Enter your OpenAI API key: \")\n",
|
|
"\n",
|
|
"os.environ[\"ANTHROPIC_API_KEY\"] = anthropic_key\n",
|
|
"os.environ[\"OPENAI_API_KEY\"] = openai_key"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Option 1: Agent with C/ua Cloud Containers\n",
|
|
"\n",
|
|
"Use cloud containers for running agents from any system without local setup."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Prerequisites for Cloud Containers\n",
|
|
"\n",
|
|
"To use C/ua Cloud Containers, you need to:\n",
|
|
"1. Sign up at https://trycua.com\n",
|
|
"2. Create a Cloud Container\n",
|
|
"3. Generate an API Key\n",
|
|
"\n",
|
|
"Once you have these, you can connect to your cloud container and run agents on it."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Get C/ua API credentials and container details"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cua_api_key = os.getenv(\"CUA_API_KEY\") or \\\n",
|
|
" input(\"Enter your C/ua API Key: \")\n",
|
|
"container_name = os.getenv(\"CONTAINER_NAME\") or \\\n",
|
|
" input(\"Enter your Cloud Container name: \")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Choose the OS type for your container (linux or macos)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"os_type = input(\"Enter the OS type of your container (linux/macos) [default: linux]: \").lower() or \"linux\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Create an agent with cloud container"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import logging\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"# Connect to your existing cloud container\n",
|
|
"computer = Computer(\n",
|
|
" os_type=os_type,\n",
|
|
" api_key=cua_api_key,\n",
|
|
" name=container_name,\n",
|
|
" provider_type=VMProviderType.CLOUD,\n",
|
|
" verbosity=logging.INFO\n",
|
|
")\n",
|
|
"\n",
|
|
"# Create agent\n",
|
|
"agent = ComputerAgent(\n",
|
|
" computer=computer,\n",
|
|
" loop=AgentLoop.OPENAI,\n",
|
|
" model=LLM(provider=LLMProvider.OPENAI),\n",
|
|
" save_trajectory=True,\n",
|
|
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
|
" only_n_most_recent_images=3,\n",
|
|
" verbosity=logging.INFO\n",
|
|
")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Run tasks on cloud container"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tasks = [\n",
|
|
" \"Open a web browser and navigate to GitHub\",\n",
|
|
" \"Search for the trycua/cua repository\",\n",
|
|
" \"Take a screenshot of the repository page\"\n",
|
|
"]\n",
|
|
"\n",
|
|
"for i, task in enumerate(tasks):\n",
|
|
" print(f\"\\nExecuting task {i+1}/{len(tasks)}: {task}\")\n",
|
|
" async for result in cloud_agent.run(task):\n",
|
|
" # print(result)\n",
|
|
" pass\n",
|
|
" print(f\"✅ Task {i+1}/{len(tasks)} completed: {task}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Option 2: Agent with Local VMs (Lume daemon)\n",
|
|
"\n",
|
|
"For Apple Silicon Macs, run agents on local VMs with near-native performance."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Before we can create an agent, we need to initialize a local computer with Lume."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import logging\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"\n",
|
|
"computer = Computer(\n",
|
|
" verbosity=logging.INFO, \n",
|
|
" provider_type=VMProviderType.LUME,\n",
|
|
" display=\"1024x768\",\n",
|
|
" memory=\"8GB\",\n",
|
|
" cpu=\"4\",\n",
|
|
" os_type=\"macos\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Create an agent with local VM"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Let's start by creating an agent that relies on the OpenAI API computer-use-preview model."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create agent with Anthropic loop and provider\n",
|
|
"agent = ComputerAgent(\n",
|
|
" computer=computer,\n",
|
|
" loop=AgentLoop.OPENAI,\n",
|
|
" model=LLM(provider=LLMProvider.OPENAI),\n",
|
|
" save_trajectory=True,\n",
|
|
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
|
" only_n_most_recent_images=3,\n",
|
|
" verbosity=logging.INFO\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Run tasks on a local Lume VM"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tasks = [\n",
|
|
" \"Look for a repository named trycua/cua on GitHub.\",\n",
|
|
" \"Check the open issues, open the most recent one and read it.\",\n",
|
|
" \"Clone the repository in users/lume/projects if it doesn't exist yet.\",\n",
|
|
" \"Open the repository with an app named Cursor (on the dock, black background and white cube icon).\",\n",
|
|
" \"From Cursor, open Composer if not already open.\",\n",
|
|
" \"Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.\",\n",
|
|
"]\n",
|
|
"\n",
|
|
"for i, task in enumerate(tasks):\n",
|
|
" print(f\"\\nExecuting task {i}/{len(tasks)}: {task}\")\n",
|
|
" async for result in agent.run(task):\n",
|
|
" # print(result)\n",
|
|
" pass\n",
|
|
"\n",
|
|
" print(f\"\\n✅ Task {i+1}/{len(tasks)} completed: {task}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Or using the Omni Agent Loop:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import logging\n",
|
|
"from pathlib import Path\n",
|
|
"from agent import ComputerAgent, LLM, AgentLoop\n",
|
|
"\n",
|
|
"computer = Computer(verbosity=logging.INFO)\n",
|
|
"\n",
|
|
"# Create agent with Anthropic loop and provider\n",
|
|
"agent = ComputerAgent(\n",
|
|
" computer=computer,\n",
|
|
" loop=AgentLoop.OMNI,\n",
|
|
" # model=LLM(provider=LLMProvider.ANTHROPIC, name=\"claude-3-7-sonnet-20250219\"),\n",
|
|
" # model=LLM(provider=LLMProvider.OPENAI, name=\"gpt-4.5-preview\"),\n",
|
|
" model=LLM(provider=LLMProvider.OLLAMA, name=\"gemma3:12b-it-q4_K_M\"),\n",
|
|
" save_trajectory=True,\n",
|
|
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
|
" only_n_most_recent_images=3,\n",
|
|
" verbosity=logging.INFO\n",
|
|
" )\n",
|
|
"\n",
|
|
"tasks = [\n",
|
|
" \"Look for a repository named trycua/cua on GitHub.\",\n",
|
|
" \"Check the open issues, open the most recent one and read it.\",\n",
|
|
" \"Clone the repository in users/lume/projects if it doesn't exist yet.\",\n",
|
|
" \"Open the repository with an app named Cursor (on the dock, black background and white cube icon).\",\n",
|
|
" \"From Cursor, open Composer if not already open.\",\n",
|
|
" \"Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.\",\n",
|
|
"]\n",
|
|
"\n",
|
|
"for i, task in enumerate(tasks):\n",
|
|
" print(f\"\\nExecuting task {i}/{len(tasks)}: {task}\")\n",
|
|
" async for result in agent.run(task):\n",
|
|
" # print(result)\n",
|
|
" pass\n",
|
|
"\n",
|
|
" print(f\"\\n✅ Task {i+1}/{len(tasks)} completed: {task}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Using the Gradio UI\n",
|
|
"\n",
|
|
"The agent includes a Gradio-based user interface for easy interaction. To use it:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from agent.ui.gradio.app import create_gradio_ui\n",
|
|
"\n",
|
|
"app = create_gradio_ui()\n",
|
|
"app.launch(share=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Advanced Agent Configurations"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Using different agent loops"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"You can use different agent loops depending on your needs:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"1. OpenAI Agent Loop"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"openai_agent = ComputerAgent(\n",
|
|
" computer=computer, # Can be cloud or local\n",
|
|
" loop=AgentLoop.OPENAI,\n",
|
|
" model=LLM(provider=LLMProvider.OPENAI),\n",
|
|
" save_trajectory=True,\n",
|
|
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
|
" verbosity=logging.INFO\n",
|
|
")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"2. Anthropic Agent Loop"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"anthropic_agent = ComputerAgent(\n",
|
|
" computer=computer,\n",
|
|
" loop=AgentLoop.ANTHROPIC,\n",
|
|
" model=LLM(provider=LLMProvider.ANTHROPIC),\n",
|
|
" save_trajectory=True,\n",
|
|
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
|
" verbosity=logging.INFO\n",
|
|
")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"3. Omni Agent Loop (supports multiple providers)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"omni_agent = ComputerAgent(\n",
|
|
" computer=computer,\n",
|
|
" loop=AgentLoop.OMNI,\n",
|
|
" model=LLM(provider=LLMProvider.ANTHROPIC, name=\"claude-3-7-sonnet-20250219\"),\n",
|
|
" # model=LLM(provider=LLMProvider.OPENAI, name=\"gpt-4.5-preview\"),\n",
|
|
" # model=LLM(provider=LLMProvider.OLLAMA, name=\"gemma3:12b-it-q4_K_M\"),\n",
|
|
" save_trajectory=True,\n",
|
|
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
|
" only_n_most_recent_images=3,\n",
|
|
" verbosity=logging.INFO\n",
|
|
")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"4. UITARS Agent Loop (for local inference on Apple Silicon)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"uitars_agent = ComputerAgent(\n",
|
|
" computer=computer,\n",
|
|
" loop=AgentLoop.UITARS,\n",
|
|
" model=LLM(provider=LLMProvider.UITARS),\n",
|
|
" save_trajectory=True,\n",
|
|
" trajectory_dir=str(Path(\"trajectories\")),\n",
|
|
" verbosity=logging.INFO\n",
|
|
")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Trajectory viewing"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"All agent runs save trajectories that can be viewed at https://trycua.com/trajectory-viewer"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(f\"Trajectories saved to: {Path('trajectories').absolute()}\")\n",
|
|
"print(\"Upload trajectory files to https://trycua.com/trajectory-viewer to visualize agent actions\")\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "cua312",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.9"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|