computer/notebooks/agent_nb.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Agent\n",
    "\n",
    "This notebook demonstrates how to use Cua's Agent to run a workflow in a virtual sandbox on Apple Silicon Macs."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Installation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "zsh:1: no matches found: cua-agent[all]\n"
     ]
    }
   ],
   "source": [
    "!pip uninstall cua-agent[all]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install \"cua-agent[all]\"\n",
    "\n",
    "# Or install individual agent loops:\n",
    "# !pip install cua-agent[anthropic]\n",
    "# !pip install cua-agent[omni]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# If locally installed, use this instead:\n",
    "import os\n",
    "\n",
    "os.chdir('../libs/agent')\n",
    "!poetry install\n",
    "!poetry build\n",
    "\n",
    "!pip uninstall cua-agent -y\n",
    "!pip install ./dist/cua_agent-0.1.0-py3-none-any.whl --force-reinstall"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Initialize a Computer Agent"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Computer allows you to run an agentic workflow in a virtual sandbox instances on Apple Silicon. Here's a basic example:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from agent import AgentLoop, LLMProvider\n",
    "from computer import Computer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Similar to Computer, you can either use the async context manager pattern or initialize the ComputerAgent instance directly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"ANTHROPIC_API_KEY\"] = \"your-anthropic-api-key\"\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"your-openai-api-key\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Direct initialization:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "import logging\n",
    "from pathlib import Path\n",
    "from agent.core.computer_agent import ComputerAgent, LLM\n",
    "\n",
    "computer = Computer(verbosity=logging.INFO)\n",
    "\n",
    "# Create agent with Anthropic loop and provider\n",
    "agent = ComputerAgent(\n",
    "        computer=computer,\n",
    "        loop=AgentLoop.ANTHROPIC,\n",
    "        # loop=AgentLoop.OMNI,\n",
    "        model=LLM(provider=LLMProvider.ANTHROPIC, name=\"claude-3-7-sonnet-20250219\"),\n",
    "        # model=LLM(provider=LLMProvider.OPENAI, name=\"gpt-4.5-preview\"),\n",
    "        save_trajectory=True,\n",
    "        trajectory_dir=str(Path(\"trajectories\")),\n",
    "        only_n_most_recent_images=3,\n",
    "        verbosity=logging.INFO\n",
    "    )\n",
    "\n",
    "tasks = [\n",
    "\"\"\"\n",
    "Please help me with the following task:\n",
    "1. Open Safari browser\n",
    "2. Go to Wikipedia.org\n",
    "3. Search for \"Claude AI\" \n",
    "4. Summarize the main points you find about Claude AI\n",
    "\"\"\"\n",
    "]\n",
    "\n",
    "async with agent:\n",
    "    for i, task in enumerate(tasks, 1):\n",
    "        print(f\"\\nExecuting task {i}/{len(tasks)}: {task}\")\n",
    "        async for result in agent.run(task):\n",
    "            print(result)\n",
    "        print(f\"Task {i} completed\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Or using the Omni Agentic Loop:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "import logging\n",
    "from pathlib import Path\n",
    "\n",
    "computer = Computer(verbosity=logging.INFO)\n",
    "\n",
    "# Create agent with Anthropic loop and provider\n",
    "agent = ComputerAgent(\n",
    "    computer=computer,\n",
    "    # loop=AgentLoop.ANTHROPIC,\n",
    "    loop=AgentLoop.OMNI,\n",
    "    model=LLM(provider=LLMProvider.OPENAI, name=\"gpt-4.5-preview\"),\n",
    "    # model=LLM(provider=LLMProvider.ANTHROPIC, name=\"claude-3-7-sonnet-20250219\"),\n",
    "    save_trajectory=True,\n",
    "    trajectory_dir=str(Path(\"trajectories\")),\n",
    "    only_n_most_recent_images=3,\n",
    "    verbosity=logging.INFO,\n",
    ")\n",
    "\n",
    "tasks = [\n",
    "\"\"\"\n",
    "Please help me with the following task:\n",
    "1. Open Safari browser\n",
    "2. Go to Wikipedia.org\n",
    "3. Search for \"Claude AI\" \n",
    "4. Summarize the main points you find about Claude AI\n",
    "\"\"\"\n",
    "]\n",
    "\n",
    "async with agent:\n",
    "    for i, task in enumerate(tasks, 1):\n",
    "        print(f\"\\nExecuting task {i}/{len(tasks)}: {task}\")\n",
    "        async for result in agent.run(task):\n",
    "            print(result)\n",
    "        print(f\"Task {i} completed\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}