diff --git a/notebooks/agent_nb.ipynb b/notebooks/agent_nb.ipynb index cdc46616..84d67574 100644 --- a/notebooks/agent_nb.ipynb +++ b/notebooks/agent_nb.ipynb @@ -6,7 +6,7 @@ "source": [ "## Agent\n", "\n", - "This notebook demonstrates how to use Cua's Agent to run a workflow in a virtual sandbox on Apple Silicon Macs." + "This notebook demonstrates how to use Cua's Agent to run workflows in virtual sandboxes, either using C/ua Cloud Containers or local VMs on Apple Silicon Macs." ] }, { @@ -68,7 +68,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Agent allows you to run an agentic workflow in a virtual sandbox instances on Apple Silicon. Here's a basic example:" + "Agent allows you to run an agentic workflow in virtual sandbox instances. You can choose between cloud containers or local VMs." ] }, { @@ -83,15 +83,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "# Get API keys from environment or prompt user\n", - "anthropic_key = os.getenv(\"ANTHROPIC_API_KEY\") or input(\"Enter your Anthropic API key: \")\n", - "openai_key = os.getenv(\"OPENAI_API_KEY\") or input(\"Enter your OpenAI API key: \")\n", + "anthropic_key = os.getenv(\"ANTHROPIC_API_KEY\") or \\\n", + " input(\"Enter your Anthropic API key: \")\n", + "openai_key = os.getenv(\"OPENAI_API_KEY\") or \\\n", + " input(\"Enter your OpenAI API key: \")\n", "\n", "os.environ[\"ANTHROPIC_API_KEY\"] = anthropic_key\n", "os.environ[\"OPENAI_API_KEY\"] = openai_key" @@ -101,7 +103,165 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Similar to Computer, you can either use the async context manager pattern or initialize the ComputerAgent instance directly." + "## Option 1: Agent with C/ua Cloud Containers\n", + "\n", + "Use cloud containers for running agents from any system without local setup." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prerequisites for Cloud Containers\n", + "\n", + "To use C/ua Cloud Containers, you need to:\n", + "1. Sign up at https://trycua.com\n", + "2. Create a Cloud Container\n", + "3. Generate an API Key\n", + "\n", + "Once you have these, you can connect to your cloud container and run agents on it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get C/ua API credentials and container details" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cua_api_key = os.getenv(\"CUA_API_KEY\") or \\\n", + " input(\"Enter your C/ua API Key: \")\n", + "container_name = os.getenv(\"CONTAINER_NAME\") or \\\n", + " input(\"Enter your Cloud Container name: \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Choose the OS type for your container (linux or macos)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os_type = input(\"Enter the OS type of your container (linux/macos) [default: linux]: \").lower() or \"linux\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an agent with cloud container" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "from pathlib import Path\n", + "\n", + "# Connect to your existing cloud container\n", + "computer = Computer(\n", + " os_type=os_type,\n", + " api_key=cua_api_key,\n", + " name=container_name,\n", + " provider_type=VMProviderType.CLOUD,\n", + " verbosity=logging.INFO\n", + ")\n", + "\n", + "# Create agent\n", + "agent = ComputerAgent(\n", + " computer=computer,\n", + " loop=AgentLoop.OPENAI,\n", + " model=LLM(provider=LLMProvider.OPENAI),\n", + " save_trajectory=True,\n", + " trajectory_dir=str(Path(\"trajectories\")),\n", + " only_n_most_recent_images=3,\n", + " verbosity=logging.INFO\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run tasks on cloud container" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tasks = [\n", + " \"Open a web browser and navigate to GitHub\",\n", + " \"Search for the trycua/cua repository\",\n", + " \"Take a screenshot of the repository page\"\n", + "]\n", + "\n", + "for i, task in enumerate(tasks):\n", + " print(f\"\\nExecuting task {i+1}/{len(tasks)}: {task}\")\n", + " async for result in cloud_agent.run(task):\n", + " # print(result)\n", + " pass\n", + " print(f\"✅ Task {i+1}/{len(tasks)} completed: {task}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Option 2: Agent with Local VMs (Lume daemon)\n", + "\n", + "For Apple Silicon Macs, run agents on local VMs with near-native performance." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before we can create an agent, we need to initialize a local computer with Lume." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "from pathlib import Path\n", + "\n", + "\n", + "computer = Computer(\n", + " verbosity=logging.INFO, \n", + " provider_type=VMProviderType.LUME,\n", + " display=\"1024x768\",\n", + " memory=\"8GB\",\n", + " cpu=\"4\",\n", + " os_type=\"macos\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an agent with local VM" ] }, { @@ -117,22 +277,31 @@ "metadata": {}, "outputs": [], "source": [ - "import logging\n", - "from pathlib import Path\n", - "\n", - "computer = Computer(verbosity=logging.INFO, provider_type=VMProviderType.LUME)\n", - "\n", "# Create agent with Anthropic loop and provider\n", "agent = ComputerAgent(\n", - " computer=computer,\n", - " loop=AgentLoop.OPENAI,\n", - " model=LLM(provider=LLMProvider.OPENAI),\n", - " save_trajectory=True,\n", - " trajectory_dir=str(Path(\"trajectories\")),\n", - " only_n_most_recent_images=3,\n", - " verbosity=logging.INFO\n", - " )\n", - "\n", + " computer=computer,\n", + " loop=AgentLoop.OPENAI,\n", + " model=LLM(provider=LLMProvider.OPENAI),\n", + " save_trajectory=True,\n", + " trajectory_dir=str(Path(\"trajectories\")),\n", + " only_n_most_recent_images=3,\n", + " verbosity=logging.INFO\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run tasks on a local Lume VM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "tasks = [\n", " \"Look for a repository named trycua/cua on GitHub.\",\n", " \"Check the open issues, open the most recent one and read it.\",\n", @@ -210,22 +379,6 @@ "The agent includes a Gradio-based user interface for easy interaction. To use it:" ] }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "# Get API keys from environment or prompt user\n", - "anthropic_key = os.getenv(\"ANTHROPIC_API_KEY\") or input(\"Enter your Anthropic API key: \")\n", - "openai_key = os.getenv(\"OPENAI_API_KEY\") or input(\"Enter your OpenAI API key: \")\n", - "\n", - "os.environ[\"ANTHROPIC_API_KEY\"] = anthropic_key\n", - "os.environ[\"OPENAI_API_KEY\"] = openai_key" - ] - }, { "cell_type": "code", "execution_count": null, @@ -237,6 +390,146 @@ "app = create_gradio_ui()\n", "app.launch(share=False)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Advanced Agent Configurations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using different agent loops" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can use different agent loops depending on your needs:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. OpenAI Agent Loop" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "openai_agent = ComputerAgent(\n", + " computer=computer, # Can be cloud or local\n", + " loop=AgentLoop.OPENAI,\n", + " model=LLM(provider=LLMProvider.OPENAI),\n", + " save_trajectory=True,\n", + " trajectory_dir=str(Path(\"trajectories\")),\n", + " verbosity=logging.INFO\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. Anthropic Agent Loop" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "anthropic_agent = ComputerAgent(\n", + " computer=computer,\n", + " loop=AgentLoop.ANTHROPIC,\n", + " model=LLM(provider=LLMProvider.ANTHROPIC),\n", + " save_trajectory=True,\n", + " trajectory_dir=str(Path(\"trajectories\")),\n", + " verbosity=logging.INFO\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3. Omni Agent Loop (supports multiple providers)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "omni_agent = ComputerAgent(\n", + " computer=computer,\n", + " loop=AgentLoop.OMNI,\n", + " model=LLM(provider=LLMProvider.ANTHROPIC, name=\"claude-3-7-sonnet-20250219\"),\n", + " # model=LLM(provider=LLMProvider.OPENAI, name=\"gpt-4.5-preview\"),\n", + " # model=LLM(provider=LLMProvider.OLLAMA, name=\"gemma3:12b-it-q4_K_M\"),\n", + " save_trajectory=True,\n", + " trajectory_dir=str(Path(\"trajectories\")),\n", + " only_n_most_recent_images=3,\n", + " verbosity=logging.INFO\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "4. UITARS Agent Loop (for local inference on Apple Silicon)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uitars_agent = ComputerAgent(\n", + " computer=computer,\n", + " loop=AgentLoop.UITARS,\n", + " model=LLM(provider=LLMProvider.UITARS),\n", + " save_trajectory=True,\n", + " trajectory_dir=str(Path(\"trajectories\")),\n", + " verbosity=logging.INFO\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Trajectory viewing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All agent runs save trajectories that can be viewed at https://trycua.com/trajectory-viewer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Trajectories saved to: {Path('trajectories').absolute()}\")\n", + "print(\"Upload trajectory files to https://trycua.com/trajectory-viewer to visualize agent actions\")\n" + ] } ], "metadata": { diff --git a/notebooks/computer_nb.ipynb b/notebooks/computer_nb.ipynb index ec460de5..c0bd8460 100644 --- a/notebooks/computer_nb.ipynb +++ b/notebooks/computer_nb.ipynb @@ -6,7 +6,7 @@ "source": [ "## Computer\n", "\n", - "This notebook demonstrates how to use Computer to operate a Lume sandbox VMs programmatically on Apple Silicon macOS systems." + "This notebook demonstrates how to use Computer to operate sandbox VMs programmatically, either using C/ua Cloud Containers or local Lume VMs on Apple Silicon macOS systems." ] }, { @@ -22,25 +22,23 @@ "metadata": {}, "outputs": [], "source": [ - "!pip uninstall -y cua-computer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "!pip uninstall -y cua-computer\n", "!pip install \"cua-computer[all]\"" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If locally installed, use this instead:" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# If locally installed, use this instead:\n", "import os\n", "\n", "os.chdir('../libs/computer')\n", @@ -55,7 +53,126 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Lume daemon\n", + "## Option 1: C/ua Cloud Containers\n", + "\n", + "C/ua Cloud Containers provide remote VMs that can be accessed from any system without local setup." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prerequisites for Cloud Containers\n", + "\n", + "To use C/ua Cloud Containers, you need to:\n", + "1. Sign up at https://trycua.com\n", + "2. Create a Cloud Container\n", + "3. Generate an API Key\n", + "\n", + "Once you have these, you can connect to your cloud container using its name." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get API key and container name from environment or prompt user\n", + "import os\n", + "\n", + "cua_api_key = os.getenv(\"CUA_API_KEY\") or \\\n", + " input(\"Enter your C/ua API Key: \")\n", + "container_name = os.getenv(\"CONTAINER_NAME\") or \\\n", + " input(\"Enter your Cloud Container name: \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Choose the OS type for your container (linux or macos)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os_type = input(\"Enter the OS type of your container (linux/macos) [default: linux]: \").lower() or \"linux\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Connect to your Cloud Container" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from computer import Computer, VMProviderType" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Connect to your existing C/ua Cloud Container" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "computer = Computer(\n", + " os_type=os_type, # Must match the OS type of your cloud container\n", + " api_key=cua_api_key,\n", + " name=container_name,\n", + " provider_type=VMProviderType.CLOUD,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Take a screenshot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "screenshot = await computer.interface.screenshot()\n", + "\n", + "with open(\"screenshot.png\", \"wb\") as f:\n", + " f.write(screenshot)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Option 2: Local VMs (Lume daemon)\n", + "\n", + "For Apple Silicon Macs, you can run VMs locally using the Lume daemon." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lume daemon setup\n", "\n", "Refer to [../libs/lume/README.md](../libs/lume/README.md) for more details on the lume cli." ] @@ -143,7 +260,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Initialize a Computer instance" + "### Initialize a Local Computer instance" ] }, { @@ -190,7 +307,7 @@ " os_type=\"macos\",\n", " provider_type=VMProviderType.LUME,\n", ") as computer:\n", - " await computer.run()\n", + " pass\n", " # ... do something with the computer interface" ] }, @@ -217,6 +334,15 @@ "await computer.run()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Computer Interface\n", + "\n", + "Both cloud and local computers provide the same interface for interaction." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -461,7 +587,7 @@ ], "metadata": { "kernelspec": { - "display_name": "cua312", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -475,7 +601,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.9" + "version": "3.12.2" } }, "nbformat": 4,