Fix get_vm returning not_found for running VMs (#1070)

* feat(computer): use API endpoint for get_vm instead of direct probe Call GET /v1/vms/:name for authoritative VM status instead of probing the computer-server at port 8443. The probe was unreliable for VMs that are still booting or have DNS not yet propagated, causing false not_found results. OS is now returned by the API from vm_type, so the computer-server probe is no longer needed. * feat(computer): use API endpoint for get_vm instead of direct probe Same fix as CloudProvider — call GET /v1/vms/:name for authoritative status. OS is resolved server-side from vm_type. * fix(cli): simplify cmd_get now that get_vm uses the API
2026-02-15 02:39:32 -06:00 · 2026-02-11 22:09:39 +05:30
parent dccbe1898c
commit a3708eea7f
3 changed files with 80 additions and 81 deletions
--- a/libs/python/computer/computer/providers/cloud/provider.py
+++ b/libs/python/computer/computer/providers/cloud/provider.py
@@ -65,43 +65,50 @@ class CloudProvider(BaseVMProvider):
        pass

    async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
-        """Get VM information by querying the VM status endpoint.
+        """Get VM information via the public API and optionally probe for os_type.

-        - Build hostname via _get_host_for_vm(name) using cached host or fallback
-        - Probe https://{hostname}:8443/status with a short timeout
-        - If JSON contains a "status" field, return it; otherwise infer
-        - Fallback to DNS resolve check to distinguish unknown vs not_found
+        Uses GET /v1/vms/:name as source of truth for VM existence and status,
+        then probes the computer-server for supplementary info (os_type).
        """
        hostname = await self._get_host_for_vm(name)
+        api_url = f"https://{hostname}:8443"

-        # Try HTTPS probe to the computer-server status endpoint (8443)
+        # Query the API for authoritative VM info
+        url = f"{self.api_base}/v1/vms/{name}"
+        headers = {"Authorization": f"Bearer {self.api_key}", "Accept": "application/json"}
        try:
-            timeout = aiohttp.ClientTimeout(total=3)
-            async with aiohttp.ClientSession(timeout=timeout) as session:
-                url = f"https://{hostname}:8443/status"
-                async with session.get(url, allow_redirects=False) as resp:
-                    status_code = resp.status
-                    vm_status: str
-                    vm_os_type: Optional[str] = None
-                    if status_code == 200:
-                        try:
-                            data = await resp.json(content_type=None)
-                            vm_status = str(data.get("status", "ok"))
-                            vm_os_type = str(data.get("os_type"))
-                        except Exception:
-                            vm_status = "unknown"
-                    elif status_code < 500:
-                        vm_status = "unknown"
-                    else:
-                        vm_status = "unknown"
-                    return {
-                        "name": name,
-                        "status": "running" if vm_status == "ok" else vm_status,
-                        "api_url": f"https://{hostname}:8443",
-                        "os_type": vm_os_type,
-                    }
-        except Exception:
-            return {"name": name, "status": "not_found", "api_url": f"https://{hostname}:8443"}
+            async with aiohttp.ClientSession() as session:
+                async with session.get(url, headers=headers) as resp:
+                    if resp.status == 404:
+                        return {"name": name, "status": "not_found", "api_url": api_url}
+                    if resp.status == 401:
+                        return {"name": name, "status": "unauthorized", "api_url": api_url}
+                    if resp.status != 200:
+                        text = await resp.text()
+                        logger.error(f"get_vm API error: HTTP {resp.status} - {text}")
+                        return {"name": name, "status": "unknown", "api_url": api_url}
+                    vm_info = await resp.json(content_type=None)
+        except Exception as e:
+            logger.error(f"get_vm API request failed: {e}")
+            return {"name": name, "status": "unknown", "api_url": api_url}
+
+        # Enrich with host-derived URLs
+        host = vm_info.get("host")
+        if isinstance(host, str) and host:
+            self._host_cache[name] = host
+            hostname = host
+            api_url = f"https://{hostname}:8443"
+        vm_info["api_url"] = api_url
+
+        password = vm_info.get("password")
+        if not vm_info.get("vnc_url") and isinstance(password, str) and password:
+            vm_info["vnc_url"] = f"https://{hostname}/vnc.html?autoconnect=true&password={password}"
+
+        # Map "os" from API to "os_type" for backward compatibility
+        if vm_info.get("os") and not vm_info.get("os_type"):
+            vm_info["os_type"] = vm_info["os"]
+
+        return vm_info

    async def list_vms(self) -> ListVMsResponse:
        url = f"{self.api_base}/v1/vms"
--- a/libs/python/computer/computer/providers/cloud/providerv2.py
+++ b/libs/python/computer/computer/providers/cloud/providerv2.py
@@ -81,42 +81,47 @@ class CloudV2Provider(BaseVMProvider):
        return f"{name}-vnc.cua.sh"

    async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
-        """Get VM information by querying the VM status endpoint.
+        """Get VM information via the public API and optionally probe for os_type.

-        - Build hostname via _get_api_host(name)
-        - Probe https://{hostname}:443/status with a short timeout
-        - If JSON contains a "status" field, return it; otherwise infer
+        Uses GET /v1/vms/:name as source of truth for VM existence and status,
+        then probes the computer-server for supplementary info (os_type).
        """
-        hostname = self._get_api_host(name)
+        api_host = self._get_api_host(name)
+        api_url = f"https://{api_host}:443"

-        # Try HTTPS probe to the computer-server status endpoint (443)
+        # Query the API for authoritative VM info
+        url = f"{self.api_base}/v1/vms/{name}"
+        headers = {"Authorization": f"Bearer {self.api_key}", "Accept": "application/json"}
        try:
-            timeout = aiohttp.ClientTimeout(total=3)
-            async with aiohttp.ClientSession(timeout=timeout) as session:
-                url = f"https://{hostname}:443/status"
-                async with session.get(url, allow_redirects=False) as resp:
-                    status_code = resp.status
-                    vm_status: str
-                    vm_os_type: Optional[str] = None
-                    if status_code == 200:
-                        try:
-                            data = await resp.json(content_type=None)
-                            vm_status = str(data.get("status", "ok"))
-                            vm_os_type = str(data.get("os_type"))
-                        except Exception:
-                            vm_status = "unknown"
-                    elif status_code < 500:
-                        vm_status = "unknown"
-                    else:
-                        vm_status = "unknown"
-                    return {
-                        "name": name,
-                        "status": "running" if vm_status == "ok" else vm_status,
-                        "api_url": f"https://{hostname}:443",
-                        "os_type": vm_os_type,
-                    }
-        except Exception:
-            return {"name": name, "status": "not_found", "api_url": f"https://{hostname}:443"}
+            async with aiohttp.ClientSession() as session:
+                async with session.get(url, headers=headers) as resp:
+                    if resp.status == 404:
+                        return {"name": name, "status": "not_found", "api_url": api_url}
+                    if resp.status == 401:
+                        return {"name": name, "status": "unauthorized", "api_url": api_url}
+                    if resp.status != 200:
+                        text = await resp.text()
+                        logger.error(f"get_vm API error: HTTP {resp.status} - {text}")
+                        return {"name": name, "status": "unknown", "api_url": api_url}
+                    vm_info = await resp.json(content_type=None)
+        except Exception as e:
+            logger.error(f"get_vm API request failed: {e}")
+            return {"name": name, "status": "unknown", "api_url": api_url}
+
+        # Enrich with V2 domain URLs
+        vm_info["api_url"] = api_url
+        vnc_host = self._get_vnc_host(name)
+        password = vm_info.get("password")
+        if not vm_info.get("vnc_url") and isinstance(password, str) and password:
+            vm_info["vnc_url"] = (
+                f"https://{vnc_host}:443/vnc.html?autoconnect=true&password={password}"
+            )
+
+        # Map "os" from API to "os_type" for backward compatibility
+        if vm_info.get("os") and not vm_info.get("os_type"):
+            vm_info["os_type"] = vm_info["os"]
+
+        return vm_info

    async def list_vms(self) -> ListVMsResponse:
        url = f"{self.api_base}/v1/vms"
--- a/libs/python/cua-cli/cua_cli/commands/sandbox.py
+++ b/libs/python/cua-cli/cua_cli/commands/sandbox.py
@@ -342,22 +342,7 @@ def cmd_get(args: argparse.Namespace) -> int:

    async def _get():
        async with _get_provider() as provider:
-            # First get from list to get full details including password
-            vms = await provider.list_vms()
-            vm_info = next((vm for vm in vms if vm.get("name") == args.name), None)
-
-            # Also probe the VM directly for status
-            status_info = await provider.get_vm(args.name)
-
-            if vm_info:
-                # Merge status info (only if get_vm returned a real status)
-                probe_status = status_info.get("status")
-                if probe_status and probe_status != "not_found":
-                    vm_info["status"] = probe_status
-                vm_info["os_type"] = status_info.get("os_type") or vm_info.get("os_type")
-                return vm_info
-            else:
-                return status_info
+            return await provider.get_vm(args.name)

    result = run_async(_get())

@@ -538,7 +523,9 @@ def cmd_vnc(args: argparse.Namespace) -> int:

    if host and password:
        encoded_password = quote(password, safe="")
-        vnc_url = f"https://{host}/vnc.html?autoconnect=true&password={encoded_password}&show_dot=true"
+        vnc_url = (
+            f"https://{host}/vnc.html?autoconnect=true&password={encoded_password}&show_dot=true"
+        )
    else:
        print_error("Could not determine VNC URL. Sandbox may not be ready.")
        return 1