diff --git a/examples/agent_examples.py b/examples/agent_examples.py index cfb7dd52..f362aab3 100644 --- a/examples/agent_examples.py +++ b/examples/agent_examples.py @@ -23,52 +23,43 @@ async def run_agent_example(): print("\n=== Example: ComputerAgent with OpenAI and Omni provider ===") try: - # Create Computer instance with default parameters - computer = Computer(verbosity=logging.DEBUG) + # Create Computer instance with async context manager + async with Computer(verbosity=logging.DEBUG) as macos_computer: + # Create agent with loop and provider + agent = ComputerAgent( + computer=macos_computer, + loop=AgentLoop.OPENAI, + # loop=AgentLoop.ANTHROPIC, + # loop=AgentLoop.OMNI, + model=LLM(provider=LLMProvider.OPENAI), # No model name for Operator CUA + # model=LLM(provider=LLMProvider.OPENAI, name="gpt-4.5-preview"), + # model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"), + save_trajectory=True, + only_n_most_recent_images=3, + verbosity=logging.DEBUG, + ) - # Create agent with loop and provider - agent = ComputerAgent( - computer=computer, - loop=AgentLoop.OPENAI, - # loop=AgentLoop.ANTHROPIC, - # loop=AgentLoop.OMNI, - model=LLM(provider=LLMProvider.OPENAI), # No model name for Operator CUA - # model=LLM(provider=LLMProvider.OPENAI, name="gpt-4.5-preview"), - # model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"), - save_trajectory=True, - only_n_most_recent_images=3, - verbosity=logging.DEBUG, - ) + tasks = [ + "Look for a repository named trycua/cua on GitHub.", + "Check the open issues, open the most recent one and read it.", + "Clone the repository in users/lume/projects if it doesn't exist yet.", + "Open the repository with an app named Cursor (on the dock, black background and white cube icon).", + "From Cursor, open Composer if not already open.", + "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.", + ] - tasks = [ - "Look for a repository named trycua/cua on GitHub.", - "Check the open issues, open the most recent one and read it.", - "Clone the repository in users/lume/projects if it doesn't exist yet.", - "Open the repository with an app named Cursor (on the dock, black background and white cube icon).", - "From Cursor, open Composer if not already open.", - "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.", - ] + for i, task in enumerate(tasks): + print(f"\nExecuting task {i}/{len(tasks)}: {task}") + async for result in agent.run(task): + # print(result) + pass - for i, task in enumerate(tasks): - print(f"\nExecuting task {i}/{len(tasks)}: {task}") - async for result in agent.run(task): - # print(result) - pass - - print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}") + print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}") except Exception as e: - logger.error(f"Error in run_omni_agent_example: {e}") + logger.error(f"Error in run_agent_example: {e}") traceback.print_exc() raise - finally: - # Clean up resources - if computer and computer._initialized: - try: - # await computer.stop() - pass - except Exception as e: - logger.warning(f"Error stopping computer: {e}") def main(): diff --git a/libs/agent/agent/providers/openai/loop.py b/libs/agent/agent/providers/openai/loop.py index 66114970..33d65e0e 100644 --- a/libs/agent/agent/providers/openai/loop.py +++ b/libs/agent/agent/providers/openai/loop.py @@ -15,7 +15,6 @@ from .api_handler import OpenAIAPIHandler from .response_handler import OpenAIResponseHandler from .tools.manager import ToolManager from .types import LLMProvider, ResponseItemType -from .prompts import SYSTEM_PROMPT logger = logging.getLogger(__name__) diff --git a/libs/agent/agent/providers/openai/prompts.py b/libs/agent/agent/providers/openai/prompts.py deleted file mode 100644 index d57eeb56..00000000 --- a/libs/agent/agent/providers/openai/prompts.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Prompts for OpenAI Agent Response API.""" - -# System prompt to be used when no specific system prompt is provided -SYSTEM_PROMPT = """ -You are a helpful assistant that can control a computer to help users accomplish tasks. -You have access to a computer where you can: -- Click, scroll, and type to interact with the interface -- Use keyboard shortcuts and special keys -- Read text and images from the screen -- Navigate and interact with applications - -A few important rules to follow: -1. Only perform actions that the user has requested or that directly support their task -2. If uncertain about what the user wants, ask for clarification -3. Explain your steps clearly when working on complex tasks -4. Be careful when interacting with sensitive data or performing potentially destructive actions -5. Always respect user privacy and avoid accessing personal information unless necessary for the task - -When in doubt about how to accomplish something, try to break it down into simpler steps using available computer actions. -""" diff --git a/libs/computer/computer/computer.py b/libs/computer/computer/computer.py index 26fcbf9b..6c1119ac 100644 --- a/libs/computer/computer/computer.py +++ b/libs/computer/computer/computer.py @@ -1,12 +1,6 @@ from typing import Optional, List, Literal, Dict, Any, Union, TYPE_CHECKING, cast from pylume import PyLume -from pylume.models import ( - VMRunOpts, - VMUpdateOpts, - ImageRef, - SharedDirectory, - VMStatus -) +from pylume.models import VMRunOpts, VMUpdateOpts, ImageRef, SharedDirectory, VMStatus import asyncio from .models import Computer as ComputerConfig, Display from .interface.factory import InterfaceFactory @@ -66,8 +60,6 @@ class Computer: port: Optional port to use for the PyLume server host: Host to use for PyLume connections (e.g. "localhost", "host.docker.internal") """ - if TYPE_CHECKING: - from .interface.base import BaseComputerInterface self.logger = Logger("cua.computer", verbosity) self.logger.info("Initializing Computer...") @@ -159,6 +151,18 @@ class Computer: """Exit async context manager.""" pass + def __enter__(self): + """Enter synchronous context manager.""" + # Run the event loop to call the async run method + loop = asyncio.get_event_loop() + loop.run_until_complete(self.run()) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Exit synchronous context manager.""" + # We could add cleanup here if needed in the future + pass + async def run(self) -> None: """Initialize the VM and computer interface.""" if TYPE_CHECKING: