""" Browser Tool Example Demonstrates how to use the BrowserTool to control a browser programmatically via the computer server. The browser runs visibly on the XFCE desktop so visual agents can see it. Prerequisites: - Computer server running (Docker container or local) - For Docker: Container should be running with browser tool support - For local: Playwright and Firefox must be installed Usage: python examples/browser_tool_example.py """ import asyncio import logging import sys from pathlib import Path # Add the libs path to sys.path libs_path = Path(__file__).parent.parent / "libs" / "python" sys.path.insert(0, str(libs_path)) from agent.tools.browser_tool import BrowserTool # Import Computer interface and BrowserTool from computer import Computer # Configure logging to see what's happening logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) async def test_browser_tool(): """Test the BrowserTool with various commands.""" # Initialize the computer interface # For local testing, use provider_type="docker" # For provider_type="cloud", provide name and api_key computer = Computer(provider_type="docker") # Initialize the browser tool with the computer interface browser = BrowserTool(interface=computer) logger.info("Testing Browser Tool...") try: # Test 1: Visit a URL logger.info("Test 1: Visiting a URL...") result = await browser.visit_url("https://www.trycua.com") logger.info(f"Visit URL result: {result}") # Wait a bit for the page to load await asyncio.sleep(2) # Test 2: Web search logger.info("Test 2: Performing a web search...") result = await browser.web_search("Python programming") logger.info(f"Web search result: {result}") # Wait a bit await asyncio.sleep(2) # Test 3: Scroll logger.info("Test 3: Scrolling the page...") result = await browser.scroll(delta_x=0, delta_y=500) logger.info(f"Scroll result: {result}") # Wait a bit await asyncio.sleep(1) # Test 4: Click (example coordinates - adjust based on your screen) logger.info("Test 4: Clicking at coordinates...") result = await browser.click(x=500, y=300) logger.info(f"Click result: {result}") # Wait a bit await asyncio.sleep(1) # Test 5: Type text (if there's a focused input field) logger.info("Test 5: Typing text...") result = await browser.type("Hello from BrowserTool!") logger.info(f"Type result: {result}") logger.info("All tests completed!") except Exception as e: logger.error(f"Error during testing: {e}", exc_info=True) if __name__ == "__main__": asyncio.run(test_browser_tool())