Files
lume/libs/python/computer-server/computer_server/browser.py
Adam 37c5be669b dd browser tool with Playwright for visible browser automation
Add browser tool with Playwright/Firefox support. Includes BrowserManager,
/playwright_exec endpoint, BrowserTool client, and auto-recovery. Fixes
Python version in startup script and adds Playwright to Docker build.
2025-12-02 12:53:25 -05:00

309 lines
12 KiB
Python

"""
Browser manager using Playwright for programmatic browser control.
This allows agents to control a browser that runs visibly on the XFCE desktop.
"""
import asyncio
import logging
import os
from typing import Any, Dict, Optional
try:
from playwright.async_api import async_playwright, Browser, BrowserContext, Page
except ImportError:
async_playwright = None
Browser = None
BrowserContext = None
Page = None
logger = logging.getLogger(__name__)
class BrowserManager:
"""
Manages a Playwright browser instance that runs visibly on the XFCE desktop.
Uses persistent context to maintain cookies and sessions.
"""
def __init__(self):
"""Initialize the BrowserManager."""
self.playwright = None
self.browser: Optional[Browser] = None
self.context: Optional[BrowserContext] = None
self.page: Optional[Page] = None
self._initialized = False
self._initialization_error: Optional[str] = None
self._lock = asyncio.Lock()
async def _ensure_initialized(self):
"""Ensure the browser is initialized."""
# Check if browser was closed and needs reinitialization
if self._initialized:
try:
# Check if context is still valid by trying to access it
if self.context:
# Try to get pages - this will raise if context is closed
_ = self.context.pages
# If we get here, context is still alive
return
else:
# Context was closed, need to reinitialize
self._initialized = False
logger.warning("Browser context was closed, will reinitialize...")
except Exception as e:
# Context is dead, need to reinitialize
logger.warning(f"Browser context is dead ({e}), will reinitialize...")
self._initialized = False
self.context = None
self.page = None
# Clean up playwright if it exists
if self.playwright:
try:
await self.playwright.stop()
except Exception:
pass
self.playwright = None
async with self._lock:
# Double-check after acquiring lock (another thread might have initialized it)
if self._initialized:
try:
if self.context:
_ = self.context.pages
return
except Exception:
self._initialized = False
self.context = None
self.page = None
if self.playwright:
try:
await self.playwright.stop()
except Exception:
pass
self.playwright = None
if async_playwright is None:
raise RuntimeError(
"playwright is not installed. Please install it with: pip install playwright && playwright install --with-deps firefox"
)
try:
# Get display from environment or default to :1
display = os.environ.get("DISPLAY", ":1")
logger.info(f"Initializing browser with DISPLAY={display}")
# Start playwright
self.playwright = await async_playwright().start()
# Launch Firefox with persistent context (keeps cookies/sessions)
# headless=False is CRITICAL so the visual agent can see it
user_data_dir = os.path.join(os.path.expanduser("~"), ".playwright-firefox")
os.makedirs(user_data_dir, exist_ok=True)
# launch_persistent_context returns a BrowserContext, not a Browser
# Note: Removed --kiosk mode so the desktop remains visible
self.context = await self.playwright.firefox.launch_persistent_context(
user_data_dir=user_data_dir,
headless=False, # CRITICAL: visible for visual agent
viewport={"width": 1024, "height": 768},
# Removed --kiosk to allow desktop visibility
)
# Get the first page or create one
pages = self.context.pages
if pages:
self.page = pages[0]
else:
self.page = await self.context.new_page()
self._initialized = True
logger.info("Browser initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize browser: {e}")
import traceback
logger.error(traceback.format_exc())
# Don't raise - return error in execute_command instead
self._initialization_error = str(e)
raise
async def execute_command(self, cmd: str, params: Dict[str, Any]) -> Dict[str, Any]:
"""
Execute a browser command.
Args:
cmd: Command name (visit_url, click, type, scroll, web_search)
params: Command parameters
Returns:
Result dictionary with success status and any data
"""
try:
await self._ensure_initialized()
except Exception as e:
error_msg = getattr(self, '_initialization_error', None) or str(e)
logger.error(f"Browser initialization failed: {error_msg}")
return {
"success": False,
"error": f"Browser initialization failed: {error_msg}. "
f"Make sure Playwright and Firefox are installed, and DISPLAY is set correctly."
}
# Ensure browser is still initialized (in case it was manually closed)
# This will automatically reinitialize if the browser was closed
await self._ensure_initialized()
# Check if page is still valid
page_valid = False
try:
if self.page is not None:
# Try to access page.url to check if it's still valid
_ = self.page.url
page_valid = True
except Exception as e:
logger.warning(f"Page is invalid: {e}, will get a new page...")
self.page = None
# Get a valid page if we don't have one
if not page_valid or self.page is None:
try:
pages = self.context.pages
if pages:
# Find first non-closed page
for p in pages:
try:
if not p.is_closed():
self.page = p
logger.info("Reusing existing open page")
page_valid = True
break
except Exception:
continue
# If no valid page found, create a new one
if not page_valid:
self.page = await self.context.new_page()
logger.info("Created new page")
except Exception as e:
logger.error(f"Failed to get new page: {e}, browser may be closed")
# Browser was closed - reinitialize it
try:
logger.info("Browser was closed manually, reinitializing...")
self._initialized = False
self.context = None
self.page = None
if self.playwright:
try:
await self.playwright.stop()
except Exception:
pass
self.playwright = None
# Reinitialize
await self._ensure_initialized()
# Get or create a page
pages = self.context.pages
if pages:
self.page = pages[0]
else:
self.page = await self.context.new_page()
logger.info("Browser reopened successfully after manual closure")
except Exception as reinit_error:
logger.error(f"Failed to reinitialize browser: {reinit_error}")
import traceback
logger.error(traceback.format_exc())
return {"success": False, "error": f"Browser was closed and cannot be recovered: {reinit_error}"}
try:
if cmd == "visit_url":
url = params.get("url")
if not url:
return {"success": False, "error": "url parameter is required"}
await self.page.goto(url, wait_until="domcontentloaded", timeout=30000)
return {"success": True, "url": self.page.url}
elif cmd == "click":
x = params.get("x")
y = params.get("y")
if x is None or y is None:
return {"success": False, "error": "x and y parameters are required"}
await self.page.mouse.click(x, y)
return {"success": True}
elif cmd == "type":
text = params.get("text")
if text is None:
return {"success": False, "error": "text parameter is required"}
await self.page.keyboard.type(text)
return {"success": True}
elif cmd == "scroll":
delta_x = params.get("delta_x", 0)
delta_y = params.get("delta_y", 0)
await self.page.mouse.wheel(delta_x, delta_y)
return {"success": True}
elif cmd == "web_search":
query = params.get("query")
if not query:
return {"success": False, "error": "query parameter is required"}
# Navigate to Google search
search_url = f"https://www.google.com/search?q={query}"
await self.page.goto(search_url, wait_until="domcontentloaded", timeout=30000)
return {"success": True, "url": self.page.url}
else:
return {"success": False, "error": f"Unknown command: {cmd}"}
except Exception as e:
logger.error(f"Error executing command {cmd}: {e}")
import traceback
logger.error(traceback.format_exc())
# If page was closed due to error, try to recover
if "closed" in str(e).lower() and self.context:
try:
pages = self.context.pages
if pages:
self.page = pages[0]
logger.info("Recovered page after error")
else:
self.page = await self.context.new_page()
logger.info("Created new page after error")
except Exception as recover_error:
logger.error(f"Failed to recover page: {recover_error}")
return {"success": False, "error": str(e)}
async def close(self):
"""Close the browser and cleanup resources."""
async with self._lock:
try:
if self.context:
await self.context.close()
self.context = None
if self.browser:
await self.browser.close()
self.browser = None
if self.playwright:
await self.playwright.stop()
self.playwright = None
self.page = None
self._initialized = False
logger.info("Browser closed successfully")
except Exception as e:
logger.error(f"Error closing browser: {e}")
# Global instance
_browser_manager: Optional[BrowserManager] = None
def get_browser_manager() -> BrowserManager:
"""Get or create the global BrowserManager instance."""
global _browser_manager
if _browser_manager is None:
_browser_manager = BrowserManager()
return _browser_manager