From 679eafe7c7e053c48caf16b017d94227d955776c Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 11 Nov 2025 12:44:24 -0500 Subject: [PATCH] update computer SDK and computer-server SDK to support the new .sandbox.cua.ai domain --- .../computer_server/watchdog.py | 61 +++++++++---- .../computer/providers/cloud/provider.py | 88 +++++++++++++++++-- 2 files changed, 126 insertions(+), 23 deletions(-) diff --git a/libs/python/computer-server/computer_server/watchdog.py b/libs/python/computer-server/computer_server/watchdog.py index 7c9ca83f..460c51c6 100644 --- a/libs/python/computer-server/computer_server/watchdog.py +++ b/libs/python/computer-server/computer_server/watchdog.py @@ -75,14 +75,23 @@ class Watchdog: Returns: WebSocket URI for the Computer API Server """ - ip_address = ( - "localhost" - if not self.container_name - else f"{self.container_name}.containers.cloud.trycua.com" - ) - protocol = "wss" if self.container_name else "ws" - port = "8443" if self.container_name else "8000" - return f"{protocol}://{ip_address}:{port}/ws" + if not self.container_name: + return "ws://localhost:8000/ws" + + # Try .sandbox.cua.ai first, fallback to .containers.cloud.trycua.com + return f"wss://{self.container_name}.sandbox.cua.ai:8443/ws" + + @property + def ws_uri_fallback(self) -> str: + """Get the fallback WebSocket URI using legacy hostname. + + Returns: + Fallback WebSocket URI for the Computer API Server + """ + if not self.container_name: + return "ws://localhost:8000/ws" + + return f"wss://{self.container_name}.containers.cloud.trycua.com:8443/ws" async def ping(self) -> bool: """ @@ -91,11 +100,11 @@ class Watchdog: Returns: True if connection successful, False otherwise """ + # Create a simple ping message + ping_message = {"command": "get_screen_size", "params": {}} + + # Try primary URI first (.sandbox.cua.ai) try: - # Create a simple ping message - ping_message = {"command": "get_screen_size", "params": {}} - - # Try to connect to the WebSocket async with websockets.connect( self.ws_uri, max_size=1024 * 1024 * 10 # 10MB limit to match server ) as websocket: @@ -105,13 +114,35 @@ class Watchdog: # Wait for any response or just close try: response = await asyncio.wait_for(websocket.recv(), timeout=5) - logger.debug(f"Ping response received: {response[:100]}...") + logger.debug(f"Ping response received from primary URI: {response[:100]}...") return True except asyncio.TimeoutError: return False except Exception as e: - logger.warning(f"Ping failed: {e}") - return False + logger.debug(f"Primary URI ping failed: {e}") + + # Try fallback URI (.containers.cloud.trycua.com) + if self.container_name: + try: + async with websockets.connect( + self.ws_uri_fallback, max_size=1024 * 1024 * 10 # 10MB limit to match server + ) as websocket: + # Send ping message + await websocket.send(json.dumps(ping_message)) + + # Wait for any response or just close + try: + response = await asyncio.wait_for(websocket.recv(), timeout=5) + logger.debug(f"Ping response received from fallback URI: {response[:100]}...") + return True + except asyncio.TimeoutError: + return False + except Exception as fallback_e: + logger.warning(f"Both primary and fallback ping failed. Primary: {e}, Fallback: {fallback_e}") + return False + else: + logger.warning(f"Ping failed: {e}") + return False def kill_processes_on_port(self, port: int) -> bool: """ diff --git a/libs/python/computer/computer/providers/cloud/provider.py b/libs/python/computer/computer/providers/cloud/provider.py index 7d479686..8db233a2 100644 --- a/libs/python/computer/computer/providers/cloud/provider.py +++ b/libs/python/computer/computer/providers/cloud/provider.py @@ -46,6 +46,8 @@ class CloudProvider(BaseVMProvider): self.api_key = api_key self.verbose = verbose self.api_base = (api_base or DEFAULT_API_BASE).rstrip("/") + # Host caching dictionary: {vm_name: host_string} + self._host_cache: Dict[str, str] = {} @property def provider_type(self) -> VMProviderType: @@ -60,12 +62,12 @@ class CloudProvider(BaseVMProvider): async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]: """Get VM information by querying the VM status endpoint. - - Build hostname via get_ip(name) → "{name}.containers.cloud.trycua.com" + - Build hostname via _get_host_for_vm(name) using cached host or fallback - Probe https://{hostname}:8443/status with a short timeout - If JSON contains a "status" field, return it; otherwise infer - Fallback to DNS resolve check to distinguish unknown vs not_found """ - hostname = await self.get_ip(name=name) + hostname = await self._get_host_for_vm(name) # Try HTTPS probe to the computer-server status endpoint (8443) try: @@ -118,8 +120,20 @@ class CloudProvider(BaseVMProvider): vm = dict(item) if isinstance(item, dict) else {} name = vm.get("name") password = vm.get("password") + api_host = vm.get("host") # Read host from API response + if isinstance(name, str) and name: - host = f"{name}.containers.cloud.trycua.com" + # Use host from API if available, otherwise fallback to legacy format + if isinstance(api_host, str) and api_host: + host = api_host + # Cache the host for this VM + self._host_cache[name] = host + else: + # Legacy fallback + host = f"{name}.containers.cloud.trycua.com" + # Cache the legacy host + self._host_cache[name] = host + # api_url: always set if missing if not vm.get("api_url"): vm["api_url"] = f"https://{host}:8443" @@ -227,15 +241,73 @@ class CloudProvider(BaseVMProvider): "message": "update_vm not supported by public API", } + async def _get_host_for_vm(self, name: str) -> str: + """ + Get the host for a VM, trying multiple approaches: + 1. Check cache first + 2. Try to refresh cache by calling list_vms + 3. Try .sandbox.cua.ai format + 4. Fallback to legacy .containers.cloud.trycua.com format + + Args: + name: VM name + + Returns: + Host string for the VM + """ + # Check cache first + if name in self._host_cache: + return self._host_cache[name] + + # Try to refresh cache by calling list_vms + try: + await self.list_vms() + # Check cache again after refresh + if name in self._host_cache: + return self._host_cache[name] + except Exception as e: + logger.warning(f"Failed to refresh VM list for host lookup: {e}") + + # Try .sandbox.cua.ai format first + sandbox_host = f"{name}.sandbox.cua.ai" + if await self._test_host_connectivity(sandbox_host): + self._host_cache[name] = sandbox_host + return sandbox_host + + # Fallback to legacy format + legacy_host = f"{name}.containers.cloud.trycua.com" + # Cache the legacy host + self._host_cache[name] = legacy_host + return legacy_host + + async def _test_host_connectivity(self, hostname: str) -> bool: + """ + Test if a host is reachable by trying to connect to its status endpoint. + + Args: + hostname: Host to test + + Returns: + True if host is reachable, False otherwise + """ + try: + timeout = aiohttp.ClientTimeout(total=2) # Short timeout for connectivity test + async with aiohttp.ClientSession(timeout=timeout) as session: + url = f"https://{hostname}:8443/status" + async with session.get(url, allow_redirects=False) as resp: + # Any response (even error) means the host is reachable + return True + except Exception: + return False + async def get_ip( self, name: Optional[str] = None, storage: Optional[str] = None, retry_delay: int = 2 ) -> str: """ - Return the VM's IP address as '{container_name}.containers.cloud.trycua.com'. - Uses the provided 'name' argument (the VM name requested by the caller), - falling back to self.name only if 'name' is None. - Retries up to 3 times with retry_delay seconds if hostname is not available. + Return the VM's host address, trying to use cached host from API or falling back to legacy format. + Uses the provided 'name' argument (the VM name requested by the caller). """ if name is None: raise ValueError("VM name is required for CloudProvider.get_ip") - return f"{name}.containers.cloud.trycua.com" + + return await self._get_host_for_vm(name)