Merge branch 'main' into improve-docs-home

2026-05-12 11:29:41 -05:00 · 2025-11-12 11:12:45 -05:00
parent eaaf592f26 a126ffa491
commit d86a280936
25 changed files with 1419 additions and 73 deletions
@@ -4,8 +4,6 @@ name: Test CUA Supporting Models
 # Run manually using workflow_dispatch with test_models=true

 on:
-  pull_request_target:
-    branches: [main, master]
  workflow_dispatch:
    inputs:
      test_models:
@@ -20,7 +18,7 @@ on:
 jobs:
  # Test all CUA models - runs on PRs, schedules, or when manually triggered
  test-all-models:
-    if: ${{ github.event_name == 'pull_request_target' || github.event_name == 'schedule' || fromJSON(inputs.test_models || 'false') }}
+    if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || fromJSON(inputs.test_models || 'false') }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
@@ -42,13 +40,13 @@ jobs:
          - gemini-2.5-computer-use-preview-10-2025

          # InternVL
-          - huggingface-local/OpenGVLab/InternVL3_5-1B
+          # - huggingface-local/OpenGVLab/InternVL3_5-1B
          # - huggingface-local/OpenGVLab/InternVL3_5-2B
          # - huggingface-local/OpenGVLab/InternVL3_5-4B
          # - huggingface-local/OpenGVLab/InternVL3_5-8B

          # UI-TARS (supports full computer-use, can run standalone)
-          - huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
+          # - huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B

          # Note: OpenCUA, GTA, and Holo are grounding-only models
          # They only support predict_click(), not agent.run()
@@ -56,7 +54,7 @@ jobs:

          # Moondream (typically used in composed agents)
          # Format: moondream3+{any-llm-with-tools}
-          - moondream3+anthropic/claude-sonnet-4-5-20250929 # Claude has VLM + Tools
+          # - moondream3+anthropic/claude-sonnet-4-5-20250929 # Claude has VLM + Tools
          # - moondream3+openai/gpt-4o  # GPT-4o has VLM + Tools

          # OmniParser (typically used in composed agents)
@@ -68,9 +66,9 @@ jobs:
          # Format: {grounding-model}+{any-vlm-with-tools}
          # These grounding-only models (OpenCUA, GTA, Holo) must be used in composed form
          # since they only support predict_click(), not full agent.run()
-          - huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-sonnet-4-5-20250929
-          - huggingface-local/xlangai/OpenCUA-7B+anthropic/claude-sonnet-4-5-20250929
-          - huggingface-local/Hcompany/Holo1.5-3B+anthropic/claude-sonnet-4-5-20250929
+          # - huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-sonnet-4-5-20250929
+          # - huggingface-local/xlangai/OpenCUA-7B+anthropic/claude-sonnet-4-5-20250929
+          # - huggingface-local/Hcompany/Holo1.5-3B+anthropic/claude-sonnet-4-5-20250929

    steps:
      - name: Checkout repository
@@ -219,6 +217,7 @@ jobs:
          path: |
            tests/agent_loop_testing/test_images/
            *.log
+          if-no-files-found: ignore
          retention-days: 7

      - name: Upload test summary data
@@ -228,6 +227,7 @@ jobs:
          # Unique, slash-free artifact name per matrix entry
          name: test-summary-${{ env.SAFE_MODEL_NAME }}
          path: test_summary/
+          if-no-files-found: ignore
          retention-days: 1

      - name: Set default Slack color
@@ -248,7 +248,7 @@ jobs:

  # Summary job that aggregates all model test results
  test-summary:
-    if: ${{ always() && (github.event_name == 'pull_request_target' || github.event_name == 'schedule' || fromJSON(inputs.test_models || 'false')) }}
+    if: ${{ always() && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || fromJSON(inputs.test_models || 'false')) }}
    needs: test-all-models
    runs-on: ubuntu-latest
    steps:
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.37
+current_version = 0.4.38
 commit = True
 tag = True
 tag_name = agent-v{new_version}
@@ -1,36 +1,40 @@
-"""
-Agent loops for agent
-"""
-
-# Import the loops to register them
-from . import (
-    anthropic,
-    composed_grounded,
-    gemini,
-    glm45v,
-    gta1,
-    holo,
-    internvl,
-    moondream3,
-    omniparser,
-    openai,
-    opencua,
-    qwen,
-    uitars,
-)
-
-__all__ = [
-    "anthropic",
-    "openai",
-    "uitars",
-    "omniparser",
-    "gta1",
-    "composed_grounded",
-    "glm45v",
-    "opencua",
-    "internvl",
-    "holo",
-    "moondream3",
-    "gemini",
-    "qwen",
-]
+"""
+Agent loops for agent
+"""
+
+# Import the loops to register them
+from . import (
+    anthropic,
+    composed_grounded,
+    gelato,
+    gemini,
+    glm45v,
+    gta1,
+    holo,
+    internvl,
+    moondream3,
+    omniparser,
+    openai,
+    opencua,
+    qwen,
+    uiins,
+    uitars,
+)
+
+__all__ = [
+    "anthropic",
+    "openai",
+    "uitars",
+    "omniparser",
+    "gta1",
+    "composed_grounded",
+    "glm45v",
+    "opencua",
+    "internvl",
+    "holo",
+    "moondream3",
+    "gemini",
+    "qwen",
+    "uiins",
+    "gelato",
+]
@@ -0,0 +1,183 @@
+"""
+Gelato agent loop implementation for click prediction using litellm.acompletion
+Model: https://huggingface.co/mlfoundations/Gelato-30B-A3B
+Code: https://github.com/mlfoundations/Gelato/tree/main
+"""
+
+import base64
+import math
+import re
+from io import BytesIO
+from typing import Any, Dict, List, Optional, Tuple
+
+import litellm
+from PIL import Image
+
+from ..decorators import register_agent
+from ..loops.base import AsyncAgentConfig
+from ..types import AgentCapability
+
+SYSTEM_PROMPT = """
+You are an expert UI element locator. Given a GUI image and a user's element description, provide the coordinates of the specified element as a single (x,y) point. For elements with area, return the center point.
+
+Output the coordinate pair exactly:
+(x,y)
+"""
+
+
+def extract_coordinates(raw_string):
+    """
+    Extract the coordinates from the raw string.
+    Args:
+        raw_string: str (e.g. "(100, 200)")
+    Returns:
+        x: float (e.g. 100.0)
+        y: float (e.g. 200.0)
+    """
+    try:
+        matches = re.findall(r"\((-?\d*\.?\d+),\s*(-?\d*\.?\d+)\)", raw_string)
+        return [tuple(map(int, match)) for match in matches][0]
+    except:
+        return 0, 0
+
+
+def smart_resize(
+    height: int,
+    width: int,
+    factor: int = 28,
+    min_pixels: int = 3136,
+    max_pixels: int = 8847360,
+) -> Tuple[int, int]:
+    """Smart resize function similar to qwen_vl_utils."""
+    # Calculate the total pixels
+    total_pixels = height * width
+
+    # If already within bounds, return original dimensions
+    if min_pixels <= total_pixels <= max_pixels:
+        # Round to nearest factor
+        new_height = (height // factor) * factor
+        new_width = (width // factor) * factor
+        return new_height, new_width
+
+    # Calculate scaling factor
+    if total_pixels > max_pixels:
+        scale = (max_pixels / total_pixels) ** 0.5
+    else:
+        scale = (min_pixels / total_pixels) ** 0.5
+
+    # Apply scaling
+    new_height = int(height * scale)
+    new_width = int(width * scale)
+
+    # Round to nearest factor
+    new_height = (new_height // factor) * factor
+    new_width = (new_width // factor) * factor
+
+    # Ensure minimum size
+    new_height = max(new_height, factor)
+    new_width = max(new_width, factor)
+
+    return new_height, new_width
+
+
+@register_agent(models=r".*Gelato.*")
+class GelatoConfig(AsyncAgentConfig):
+    """Gelato agent configuration implementing AsyncAgentConfig protocol for click prediction."""
+
+    def __init__(self):
+        self.current_model = None
+        self.last_screenshot_b64 = None
+
+    async def predict_step(
+        self,
+        messages: List[Dict[str, Any]],
+        model: str,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        max_retries: Optional[int] = None,
+        stream: bool = False,
+        computer_handler=None,
+        _on_api_start=None,
+        _on_api_end=None,
+        _on_usage=None,
+        _on_screenshot=None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        raise NotImplementedError()
+
+    async def predict_click(
+        self, model: str, image_b64: str, instruction: str, **kwargs
+    ) -> Optional[Tuple[float, float]]:
+        """
+        Predict click coordinates using UI-Ins model via litellm.acompletion.
+
+        Args:
+            model: The UI-Ins model name
+            image_b64: Base64 encoded image
+            instruction: Instruction for where to click
+
+        Returns:
+            Tuple of (x, y) coordinates or None if prediction fails
+        """
+        # Decode base64 image
+        image_data = base64.b64decode(image_b64)
+        image = Image.open(BytesIO(image_data))
+        width, height = image.width, image.height
+
+        # Smart resize the image (similar to qwen_vl_utils)
+        resized_height, resized_width = smart_resize(
+            height,
+            width,
+            factor=28,  # Default factor for Qwen models
+            min_pixels=3136,
+            max_pixels=4096 * 2160,
+        )
+        resized_image = image.resize((resized_width, resized_height))
+        scale_x, scale_y = width / resized_width, height / resized_height
+
+        # Convert resized image back to base64
+        buffered = BytesIO()
+        resized_image.save(buffered, format="PNG")
+        resized_image_b64 = base64.b64encode(buffered.getvalue()).decode()
+
+        # Prepare system and user messages
+        system_message = {
+            "role": "system",
+            "content": [{"type": "text", "text": SYSTEM_PROMPT.strip()}],
+        }
+
+        user_message = {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{resized_image_b64}"},
+                },
+                {"type": "text", "text": instruction},
+            ],
+        }
+
+        # Prepare API call kwargs
+        api_kwargs = {
+            "model": model,
+            "messages": [system_message, user_message],
+            "max_tokens": 2056,
+            "temperature": 0.0,
+            **kwargs,
+        }
+
+        # Use liteLLM acompletion
+        response = await litellm.acompletion(**api_kwargs)
+
+        # Extract response text
+        output_text = response.choices[0].message.content  # type: ignore
+
+        # Extract and rescale coordinates
+        pred_x, pred_y = extract_coordinates(output_text)  # type: ignore
+        pred_x *= scale_x
+        pred_y *= scale_y
+
+        return (math.floor(pred_x), math.floor(pred_y))
+
+    def get_capabilities(self) -> List[AgentCapability]:
+        """Return the capabilities supported by this agent."""
+        return ["click"]
@@ -0,0 +1,175 @@
+"""
+UI-Ins agent loop implementation for click prediction using litellm.acompletion
+Paper: https://arxiv.org/pdf/2510.202861
+Code: https://github.com/alibaba/UI-Ins
+"""
+
+import asyncio
+import base64
+import json
+import math
+import re
+import uuid
+from io import BytesIO
+from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
+
+import litellm
+from PIL import Image
+
+from ..decorators import register_agent
+from ..loops.base import AsyncAgentConfig
+from ..types import AgentCapability, AgentResponse, Messages, Tools
+
+SYSTEM_PROMPT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.\n\n## Output Format\nReturn a json object with a reasoning process in  tags, a function name and arguments within  XML tags:\n```\n\n...\n\n\n{"name": "grounding", "arguments": }\n\n```\n represents the following item of the action space:\n## Action Space{"action": "click", "coordinate": [x, y]}\nYour task is to accurately locate a UI element based on the instruction. You should first analyze instruction in  tags and finally output the function in  tags.\n"""
+
+
+def parse_coordinates(raw_string: str) -> tuple[int, int]:
+    matches = re.findall(r"\[(\d+),\s*(\d+)\]", raw_string)
+    if matches:
+        return tuple(map(int, matches[0]))
+    return -1, -1
+
+
+def smart_resize(
+    height: int,
+    width: int,
+    factor: int = 28,
+    min_pixels: int = 3136,
+    max_pixels: int = 8847360,
+) -> Tuple[int, int]:
+    """Smart resize function similar to qwen_vl_utils."""
+    # Calculate the total pixels
+    total_pixels = height * width
+
+    # If already within bounds, return original dimensions
+    if min_pixels <= total_pixels <= max_pixels:
+        # Round to nearest factor
+        new_height = (height // factor) * factor
+        new_width = (width // factor) * factor
+        return new_height, new_width
+
+    # Calculate scaling factor
+    if total_pixels > max_pixels:
+        scale = (max_pixels / total_pixels) ** 0.5
+    else:
+        scale = (min_pixels / total_pixels) ** 0.5
+
+    # Apply scaling
+    new_height = int(height * scale)
+    new_width = int(width * scale)
+
+    # Round to nearest factor
+    new_height = (new_height // factor) * factor
+    new_width = (new_width // factor) * factor
+
+    # Ensure minimum size
+    new_height = max(new_height, factor)
+    new_width = max(new_width, factor)
+
+    return new_height, new_width
+
+
+@register_agent(models=r".*UI-Ins.*")
+class UIInsConfig(AsyncAgentConfig):
+    """UI-Ins agent configuration implementing AsyncAgentConfig protocol for click prediction."""
+
+    def __init__(self):
+        self.current_model = None
+        self.last_screenshot_b64 = None
+
+    async def predict_step(
+        self,
+        messages: List[Dict[str, Any]],
+        model: str,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        max_retries: Optional[int] = None,
+        stream: bool = False,
+        computer_handler=None,
+        _on_api_start=None,
+        _on_api_end=None,
+        _on_usage=None,
+        _on_screenshot=None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        raise NotImplementedError()
+
+    async def predict_click(
+        self, model: str, image_b64: str, instruction: str, **kwargs
+    ) -> Optional[Tuple[float, float]]:
+        """
+        Predict click coordinates using UI-Ins model via litellm.acompletion.
+
+        Args:
+            model: The UI-Ins model name
+            image_b64: Base64 encoded image
+            instruction: Instruction for where to click
+
+        Returns:
+            Tuple of (x, y) coordinates or None if prediction fails
+        """
+        # Decode base64 image
+        image_data = base64.b64decode(image_b64)
+        image = Image.open(BytesIO(image_data))
+        width, height = image.width, image.height
+
+        # Smart resize the image (similar to qwen_vl_utils)
+        resized_height, resized_width = smart_resize(
+            height,
+            width,
+            factor=28,  # Default factor for Qwen models
+            min_pixels=3136,
+            max_pixels=4096 * 2160,
+        )
+        resized_image = image.resize((resized_width, resized_height))
+        scale_x, scale_y = width / resized_width, height / resized_height
+
+        # Convert resized image back to base64
+        buffered = BytesIO()
+        resized_image.save(buffered, format="PNG")
+        resized_image_b64 = base64.b64encode(buffered.getvalue()).decode()
+
+        # Prepare system and user messages
+        system_message = {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are a helpful assistant."},
+                {"type": "text", "text": SYSTEM_PROMPT},
+            ],
+        }
+
+        user_message = {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{resized_image_b64}"},
+                },
+                {"type": "text", "text": instruction},
+            ],
+        }
+
+        # Prepare API call kwargs
+        api_kwargs = {
+            "model": model,
+            "messages": [system_message, user_message],
+            "max_tokens": 2056,
+            "temperature": 0.0,
+            **kwargs,
+        }
+
+        # Use liteLLM acompletion
+        response = await litellm.acompletion(**api_kwargs)
+
+        # Extract response text
+        output_text = response.choices[0].message.content  # type: ignore
+
+        # Extract and rescale coordinates
+        pred_x, pred_y = parse_coordinates(output_text)  # type: ignore
+        pred_x *= scale_x
+        pred_y *= scale_y
+
+        return (math.floor(pred_x), math.floor(pred_y))
+
+    def get_capabilities(self) -> List[AgentCapability]:
+        """Return the capabilities supported by this agent."""
+        return ["click"]
@@ -4,7 +4,7 @@ build-backend = "pdm.backend"

 [project]
 name = "cua-agent"
-version = "0.4.37"
+version = "0.4.38"
 description = "CUA (Computer Use) Agent for AI-driven computer interaction"
 readme = "README.md"
 authors = [
@@ -75,14 +75,23 @@ class Watchdog:
        Returns:
            WebSocket URI for the Computer API Server
        """
-        ip_address = (
-            "localhost"
-            if not self.container_name
-            else f"{self.container_name}.containers.cloud.trycua.com"
-        )
-        protocol = "wss" if self.container_name else "ws"
-        port = "8443" if self.container_name else "8000"
-        return f"{protocol}://{ip_address}:{port}/ws"
+        if not self.container_name:
+            return "ws://localhost:8000/ws"
+
+        # Try .sandbox.cua.ai first, fallback to .containers.cloud.trycua.com
+        return f"wss://{self.container_name}.sandbox.cua.ai:8443/ws"
+
+    @property
+    def ws_uri_fallback(self) -> str:
+        """Get the fallback WebSocket URI using legacy hostname.
+
+        Returns:
+            Fallback WebSocket URI for the Computer API Server
+        """
+        if not self.container_name:
+            return "ws://localhost:8000/ws"
+
+        return f"wss://{self.container_name}.containers.cloud.trycua.com:8443/ws"

    async def ping(self) -> bool:
        """
@@ -91,11 +100,11 @@ class Watchdog:
        Returns:
            True if connection successful, False otherwise
        """
-        try:
-            # Create a simple ping message
-            ping_message = {"command": "get_screen_size", "params": {}}
+        # Create a simple ping message
+        ping_message = {"command": "get_screen_size", "params": {}}

-            # Try to connect to the WebSocket
+        # Try primary URI first (.sandbox.cua.ai)
+        try:
            async with websockets.connect(
                self.ws_uri, max_size=1024 * 1024 * 10  # 10MB limit to match server
            ) as websocket:
@@ -105,13 +114,40 @@ class Watchdog:
                # Wait for any response or just close
                try:
                    response = await asyncio.wait_for(websocket.recv(), timeout=5)
-                    logger.debug(f"Ping response received: {response[:100]}...")
+                    logger.debug(f"Ping response received from primary URI: {response[:100]}...")
                    return True
                except asyncio.TimeoutError:
                    return False
        except Exception as e:
-            logger.warning(f"Ping failed: {e}")
-            return False
+            logger.debug(f"Primary URI ping failed: {e}")
+
+            # Try fallback URI (.containers.cloud.trycua.com)
+            if self.container_name:
+                try:
+                    async with websockets.connect(
+                        self.ws_uri_fallback,
+                        max_size=1024 * 1024 * 10,  # 10MB limit to match server
+                    ) as websocket:
+                        # Send ping message
+                        await websocket.send(json.dumps(ping_message))
+
+                        # Wait for any response or just close
+                        try:
+                            response = await asyncio.wait_for(websocket.recv(), timeout=5)
+                            logger.debug(
+                                f"Ping response received from fallback URI: {response[:100]}..."
+                            )
+                            return True
+                        except asyncio.TimeoutError:
+                            return False
+                except Exception as fallback_e:
+                    logger.warning(
+                        f"Both primary and fallback ping failed. Primary: {e}, Fallback: {fallback_e}"
+                    )
+                    return False
+            else:
+                logger.warning(f"Ping failed: {e}")
+                return False

    def kill_processes_on_port(self, port: int) -> bool:
        """
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.11
+current_version = 0.4.12
 commit = True
 tag = True
 tag_name = computer-v{new_version}
@@ -46,6 +46,8 @@ class CloudProvider(BaseVMProvider):
        self.api_key = api_key
        self.verbose = verbose
        self.api_base = (api_base or DEFAULT_API_BASE).rstrip("/")
+        # Host caching dictionary: {vm_name: host_string}
+        self._host_cache: Dict[str, str] = {}

    @property
    def provider_type(self) -> VMProviderType:
@@ -60,12 +62,12 @@ class CloudProvider(BaseVMProvider):
    async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
        """Get VM information by querying the VM status endpoint.

-        - Build hostname via get_ip(name) → "{name}.containers.cloud.trycua.com"
+        - Build hostname via _get_host_for_vm(name) using cached host or fallback
        - Probe https://{hostname}:8443/status with a short timeout
        - If JSON contains a "status" field, return it; otherwise infer
        - Fallback to DNS resolve check to distinguish unknown vs not_found
        """
-        hostname = await self.get_ip(name=name)
+        hostname = await self._get_host_for_vm(name)

        # Try HTTPS probe to the computer-server status endpoint (8443)
        try:
@@ -118,8 +120,20 @@ class CloudProvider(BaseVMProvider):
                            vm = dict(item) if isinstance(item, dict) else {}
                            name = vm.get("name")
                            password = vm.get("password")
+                            api_host = vm.get("host")  # Read host from API response
+
                            if isinstance(name, str) and name:
-                                host = f"{name}.containers.cloud.trycua.com"
+                                # Use host from API if available, otherwise fallback to legacy format
+                                if isinstance(api_host, str) and api_host:
+                                    host = api_host
+                                    # Cache the host for this VM
+                                    self._host_cache[name] = host
+                                else:
+                                    # Legacy fallback
+                                    host = f"{name}.containers.cloud.trycua.com"
+                                    # Cache the legacy host
+                                    self._host_cache[name] = host
+
                                # api_url: always set if missing
                                if not vm.get("api_url"):
                                    vm["api_url"] = f"https://{host}:8443"
@@ -227,15 +241,73 @@ class CloudProvider(BaseVMProvider):
            "message": "update_vm not supported by public API",
        }

+    async def _get_host_for_vm(self, name: str) -> str:
+        """
+        Get the host for a VM, trying multiple approaches:
+        1. Check cache first
+        2. Try to refresh cache by calling list_vms
+        3. Try .sandbox.cua.ai format
+        4. Fallback to legacy .containers.cloud.trycua.com format
+
+        Args:
+            name: VM name
+
+        Returns:
+            Host string for the VM
+        """
+        # Check cache first
+        if name in self._host_cache:
+            return self._host_cache[name]
+
+        # Try to refresh cache by calling list_vms
+        try:
+            await self.list_vms()
+            # Check cache again after refresh
+            if name in self._host_cache:
+                return self._host_cache[name]
+        except Exception as e:
+            logger.warning(f"Failed to refresh VM list for host lookup: {e}")
+
+        # Try .sandbox.cua.ai format first
+        sandbox_host = f"{name}.sandbox.cua.ai"
+        if await self._test_host_connectivity(sandbox_host):
+            self._host_cache[name] = sandbox_host
+            return sandbox_host
+
+        # Fallback to legacy format
+        legacy_host = f"{name}.containers.cloud.trycua.com"
+        # Cache the legacy host
+        self._host_cache[name] = legacy_host
+        return legacy_host
+
+    async def _test_host_connectivity(self, hostname: str) -> bool:
+        """
+        Test if a host is reachable by trying to connect to its status endpoint.
+
+        Args:
+            hostname: Host to test
+
+        Returns:
+            True if host is reachable, False otherwise
+        """
+        try:
+            timeout = aiohttp.ClientTimeout(total=2)  # Short timeout for connectivity test
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                url = f"https://{hostname}:8443/status"
+                async with session.get(url, allow_redirects=False) as resp:
+                    # Any response (even error) means the host is reachable
+                    return True
+        except Exception:
+            return False
+
    async def get_ip(
        self, name: Optional[str] = None, storage: Optional[str] = None, retry_delay: int = 2
    ) -> str:
        """
-        Return the VM's IP address as '{container_name}.containers.cloud.trycua.com'.
-        Uses the provided 'name' argument (the VM name requested by the caller),
-        falling back to self.name only if 'name' is None.
-        Retries up to 3 times with retry_delay seconds if hostname is not available.
+        Return the VM's host address, trying to use cached host from API or falling back to legacy format.
+        Uses the provided 'name' argument (the VM name requested by the caller).
        """
        if name is None:
            raise ValueError("VM name is required for CloudProvider.get_ip")
-        return f"{name}.containers.cloud.trycua.com"
+
+        return await self._get_host_for_vm(name)
@@ -4,7 +4,7 @@ build-backend = "pdm.backend"

 [project]
 name = "cua-computer"
-version = "0.4.11"
+version = "0.4.12"
 description = "Computer-Use Interface (CUI) framework powering Cua"
 readme = "README.md"
 authors = [
@@ -0,0 +1,34 @@
+# dependencies (bun install)
+node_modules
+
+# output
+out
+dist
+*.tgz
+
+# code coverage
+coverage
+*.lcov
+
+# logs
+logs
+_.log
+report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
+
+# dotenv environment variable files
+.env
+.env.development.local
+.env.test.local
+.env.production.local
+.env.local
+
+# caches
+.eslintcache
+.cache
+*.tsbuildinfo
+
+# IntelliJ based IDEs
+.idea
+
+# Finder (MacOS) folder config
+.DS_Store
@@ -0,0 +1,105 @@
+Default to using Bun instead of Node.js.
+
+- Use `bun <file>` instead of `node <file>` or `ts-node <file>`
+- Use `bun test` instead of `jest` or `vitest`
+- Use `bun build <file.html|file.ts|file.css>` instead of `webpack` or `esbuild`
+- Use `bun install` instead of `npm install` or `yarn install` or `pnpm install`
+- Use `bun run <script>` instead of `npm run <script>` or `yarn run <script>` or `pnpm run <script>`
+- Bun automatically loads .env, so don't use dotenv.
+
+## APIs
+
+- `Bun.serve()` supports WebSockets, HTTPS, and routes. Don't use `express`.
+- `bun:sqlite` for SQLite. Don't use `better-sqlite3`.
+- `Bun.redis` for Redis. Don't use `ioredis`.
+- `Bun.sql` for Postgres. Don't use `pg` or `postgres.js`.
+- `WebSocket` is built-in. Don't use `ws`.
+- Prefer `Bun.file` over `node:fs`'s readFile/writeFile
+- Bun.$`ls` instead of execa.
+
+## Testing
+
+Use `bun test` to run tests.
+
+```ts#index.test.ts
+import { test, expect } from "bun:test";
+
+test("hello world", () => {
+  expect(1).toBe(1);
+});
+```
+
+## Frontend
+
+Use HTML imports with `Bun.serve()`. Don't use `vite`. HTML imports fully support React, CSS, Tailwind.
+
+Server:
+
+```ts#index.ts
+import index from "./index.html"
+
+Bun.serve({
+  routes: {
+    "/": index,
+    "/api/users/:id": {
+      GET: (req) => {
+        return new Response(JSON.stringify({ id: req.params.id }));
+      },
+    },
+  },
+  // optional websocket support
+  websocket: {
+    open: (ws) => {
+      ws.send("Hello, world!");
+    },
+    message: (ws, message) => {
+      ws.send(message);
+    },
+    close: (ws) => {
+      // handle close
+    }
+  },
+  development: {
+    hmr: true,
+    console: true,
+  }
+})
+```
+
+HTML files can import .tsx, .jsx or .js files directly and Bun's bundler will transpile & bundle automatically. `<link>` tags can point to stylesheets and Bun's CSS bundler will bundle.
+
+```html#index.html
+<html>
+  <body>
+    <h1>Hello, world!</h1>
+    <script type="module" src="./frontend.tsx"></script>
+  </body>
+</html>
+```
+
+With the following `frontend.tsx`:
+
+```tsx#frontend.tsx
+import React from "react";
+
+// import .css files directly and it works
+import './index.css';
+
+import { createRoot } from "react-dom/client";
+
+const root = createRoot(document.body);
+
+export default function Frontend() {
+  return <h1>Hello, world!</h1>;
+}
+
+root.render(<Frontend />);
+```
+
+Then, run index.ts
+
+```sh
+bun --hot ./index.ts
+```
+
+For more information, read the Bun API docs in `node_modules/bun-types/docs/**.md`.
@@ -0,0 +1,76 @@
+# CUA CLI (Bun)
+
+## Install
+
+```bash
+bun install
+bun link           # register package globally
+bun link cua-cli   # install the global binary `cua`
+```
+
+If you want to run without linking:
+
+```bash
+bun run ./index.ts -- --help
+```
+
+## Commands
+
+- **Auth**
+  - `cua auth login` – opens browser to authorize; stores API key locally
+  - `cua auth login --api-key sk-...` – stores provided key directly
+  - `cua auth pull` – writes/updates `.env` with `CUA_API_KEY`
+  - `cua auth logout` – clears stored API key
+
+- **VMs**
+  - `cua vm list`
+  - `cua vm create --os OS --configuration SIZE --region REGION` – creates a new VM
+    - OS: `linux`, `windows`, `macos`
+    - SIZE: `small`, `medium`, `large`
+    - REGION: `north-america`, `europe`, `asia-pacific`, `south-america`
+  - `cua vm delete NAME` – deletes a VM
+  - `cua vm start NAME`
+  - `cua vm stop NAME`
+  - `cua vm restart NAME`
+  - `cua vm vnc NAME` – opens NoVNC URL in your browser
+  - `cua vm chat NAME` – opens Dashboard Playground for the VM
+
+## Auth Flow (Dynamic Callback Port)
+
+- CLI starts a small local HTTP server using `Bun.serve({ port: 0 })` which picks an available port.
+- Browser is opened to `https://cua.ai/cli-auth?callback_url=http://127.0.0.1:<port>/callback`.
+- After you click "Authorize CLI", the browser redirects to the local server with `?token=...`.
+- The CLI saves the API key in `~/.config/cua/cli.sqlite`.
+
+> Note: If the browser cannot be opened automatically, copy/paste the printed URL.
+
+## Project Structure
+
+- `index.ts` – entry point (shebang + start CLI)
+- `src/cli.ts` – yargs bootstrapping
+- `src/commands/auth.ts` – auth/login/pull/logout commands
+- `src/commands/vm.ts` – vm list/start/stop/restart commands
+- `src/auth.ts` – browser flow + local callback server (dynamic port)
+- `src/http.ts` – HTTP helper
+- `src/storage.ts` – SQLite-backed key-value storage
+- `src/config.ts` – constants and paths
+- `src/util.ts` – table printing, .env writer
+
+## Notes
+
+- Stored API key lives at `~/.config/cua/cli.sqlite` under `kv(api_key)`.
+- Public API base defaults to `https://api.cua.ai` (override via `CUA_API_BASE`).
+- Website base defaults to `https://cua.ai` (override via `CUA_WEBSITE_URL`).
+- Authorization header: `Authorization: Bearer <api_key>`.
+
+### Environment overrides
+
+You can point the CLI to alternate deployments:
+
+```bash
+export CUA_API_BASE=https://api.staging.cua.ai
+export CUA_WEBSITE_URL=https://staging.cua.ai
+
+cua auth login
+cua vm chat my-vm    # opens https://staging.cua.ai/dashboard/playground?...
+```
@@ -0,0 +1,64 @@
+{
+  "lockfileVersion": 1,
+  "configVersion": 0,
+  "workspaces": {
+    "": {
+      "name": "cua-cli",
+      "dependencies": {
+        "yargs": "^18.0.0",
+      },
+      "devDependencies": {
+        "@types/bun": "latest",
+        "@types/yargs": "^17.0.33",
+      },
+      "peerDependencies": {
+        "typescript": "^5",
+      },
+    },
+  },
+  "packages": {
+    "@types/bun": ["@types/bun@1.3.0", "", { "dependencies": { "bun-types": "1.3.0" } }, "sha512-+lAGCYjXjip2qY375xX/scJeVRmZ5cY0wyHYyCYxNcdEXrQ4AOe3gACgd4iQ8ksOslJtW4VNxBJ8llUwc3a6AA=="],
+
+    "@types/node": ["@types/node@24.9.0", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-MKNwXh3seSK8WurXF7erHPJ2AONmMwkI7zAMrXZDPIru8jRqkk6rGDBVbw4mLwfqA+ZZliiDPg05JQ3uW66tKQ=="],
+
+    "@types/react": ["@types/react@19.2.2", "", { "dependencies": { "csstype": "^3.0.2" } }, "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA=="],
+
+    "@types/yargs": ["@types/yargs@17.0.33", "", { "dependencies": { "@types/yargs-parser": "*" } }, "sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA=="],
+
+    "@types/yargs-parser": ["@types/yargs-parser@21.0.3", "", {}, "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ=="],
+
+    "ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="],
+
+    "ansi-styles": ["ansi-styles@6.2.3", "", {}, "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg=="],
+
+    "bun-types": ["bun-types@1.3.0", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-u8X0thhx+yJ0KmkxuEo9HAtdfgCBaM/aI9K90VQcQioAmkVp3SG3FkwWGibUFz3WdXAdcsqOcbU40lK7tbHdkQ=="],
+
+    "cliui": ["cliui@9.0.1", "", { "dependencies": { "string-width": "^7.2.0", "strip-ansi": "^7.1.0", "wrap-ansi": "^9.0.0" } }, "sha512-k7ndgKhwoQveBL+/1tqGJYNz097I7WOvwbmmU2AR5+magtbjPWQTS1C5vzGkBC8Ym8UWRzfKUzUUqFLypY4Q+w=="],
+
+    "csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
+
+    "emoji-regex": ["emoji-regex@10.6.0", "", {}, "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A=="],
+
+    "escalade": ["escalade@3.2.0", "", {}, "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA=="],
+
+    "get-caller-file": ["get-caller-file@2.0.5", "", {}, "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg=="],
+
+    "get-east-asian-width": ["get-east-asian-width@1.4.0", "", {}, "sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q=="],
+
+    "string-width": ["string-width@7.2.0", "", { "dependencies": { "emoji-regex": "^10.3.0", "get-east-asian-width": "^1.0.0", "strip-ansi": "^7.1.0" } }, "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ=="],
+
+    "strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="],
+
+    "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
+
+    "undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
+
+    "wrap-ansi": ["wrap-ansi@9.0.2", "", { "dependencies": { "ansi-styles": "^6.2.1", "string-width": "^7.0.0", "strip-ansi": "^7.1.0" } }, "sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww=="],
+
+    "y18n": ["y18n@5.0.8", "", {}, "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA=="],
+
+    "yargs": ["yargs@18.0.0", "", { "dependencies": { "cliui": "^9.0.1", "escalade": "^3.1.1", "get-caller-file": "^2.0.5", "string-width": "^7.2.0", "y18n": "^5.0.5", "yargs-parser": "^22.0.0" } }, "sha512-4UEqdc2RYGHZc7Doyqkrqiln3p9X2DZVxaGbwhn2pi7MrRagKaOcIKe8L3OxYcbhXLgLFUS3zAYuQjKBQgmuNg=="],
+
+    "yargs-parser": ["yargs-parser@22.0.0", "", {}, "sha512-rwu/ClNdSMpkSrUb+d6BRsSkLUq1fmfsY6TOpYzTwvwkg1/NRG85KBy3kq++A8LKQwX6lsu+aWad+2khvuXrqw=="],
+  }
+}
@@ -0,0 +1,7 @@
+#! /usr/bin/env bun
+import { runCli } from './src/cli';
+
+runCli().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
@@ -0,0 +1,19 @@
+{
+  "name": "cua-cli",
+  "module": "index.ts",
+  "type": "module",
+  "private": true,
+  "devDependencies": {
+    "@types/bun": "latest",
+    "@types/yargs": "^17.0.33"
+  },
+  "peerDependencies": {
+    "typescript": "^5"
+  },
+  "bin": {
+    "cua": "./index.ts"
+  },
+  "dependencies": {
+    "yargs": "^18.0.0"
+  }
+}
@@ -0,0 +1,70 @@
+import { AUTH_PAGE, CALLBACK_HOST } from './config';
+import { setApiKey, getApiKey } from './storage';
+import { openInBrowser } from './util';
+
+const c = {
+  reset: '\x1b[0m',
+  bold: '\x1b[1m',
+  dim: '\x1b[2m',
+  underline: '\x1b[4m',
+  cyan: '\x1b[36m',
+  green: '\x1b[32m',
+  yellow: '\x1b[33m',
+};
+
+export async function loginViaBrowser(): Promise<string> {
+  let resolveToken!: (v: string) => void;
+  const tokenPromise = new Promise<string>((resolve) => {
+    resolveToken = resolve;
+  });
+
+  // dynamic port (0) -> OS chooses available port
+  const server = Bun.serve({
+    hostname: CALLBACK_HOST,
+    port: 0,
+    fetch(req) {
+      const u = new URL(req.url);
+      if (u.pathname !== '/callback') return new Response('Not found', { status: 404 });
+      const token = u.searchParams.get('token');
+      if (!token) return new Response('Missing token', { status: 400 });
+      resolveToken(token);
+      queueMicrotask(() => server.stop());
+      return new Response('CLI authorized. You can close this window.', {
+        status: 200,
+        headers: { 'content-type': 'text/plain' },
+      });
+    },
+  });
+
+  const callbackURL = `http://${CALLBACK_HOST}:${server.port}/callback`;
+  const url = `${AUTH_PAGE}?callback_url=${encodeURIComponent(callbackURL)}`;
+  console.log(`${c.cyan}${c.bold}Opening your default browser to authorize the CLI...${c.reset}`);
+  console.log(`${c.dim}If the browser does not open automatically, copy/paste this URL:${c.reset}`);
+  console.log(`${c.yellow}${c.underline}${url}${c.reset}`);
+  await openInBrowser(url);
+
+  let timeoutId: ReturnType<typeof setTimeout> | undefined;
+  const timeout = new Promise<string>((_, reject) => {
+    timeoutId = setTimeout(
+      () => reject(new Error('Timed out waiting for authorization')),
+      2 * 60 * 1000
+    );
+  });
+  try {
+    const result = await Promise.race([tokenPromise, timeout]);
+    if (timeoutId) clearTimeout(timeoutId);
+    return result;
+  } finally {
+    try {
+      server.stop();
+    } catch {}
+  }
+}
+
+export async function ensureApiKeyInteractive(): Promise<string> {
+  const existing = getApiKey();
+  if (existing) return existing;
+  const token = await loginViaBrowser();
+  setApiKey(token);
+  return token;
+}
@@ -0,0 +1,11 @@
+import yargs from 'yargs';
+import { hideBin } from 'yargs/helpers';
+import { registerAuthCommands } from './commands/auth';
+import { registerVmCommands } from './commands/vm';
+
+export async function runCli() {
+  let argv = yargs(hideBin(process.argv)).scriptName('cua');
+  argv = registerAuthCommands(argv);
+  argv = registerVmCommands(argv);
+  await argv.demandCommand(1).strict().help().parseAsync();
+}
@@ -0,0 +1,46 @@
+import { setApiKey, clearApiKey } from '../storage';
+import { ensureApiKeyInteractive, loginViaBrowser } from '../auth';
+import { writeEnvFile } from '../util';
+import type { Argv } from 'yargs';
+
+export function registerAuthCommands(y: Argv) {
+  return y.command('auth', 'Auth commands', (ya) =>
+    ya
+      .command(
+        'login',
+        'Open browser to authorize and store API key',
+        (y) => y.option('api-key', { type: 'string', describe: 'API key to store directly' }),
+        async (argv: Record<string, unknown>) => {
+          if (argv['api-key']) {
+            setApiKey(String(argv['api-key']));
+            console.log('API key saved');
+            return;
+          }
+          console.log('Opening browser for CLI auth...');
+          const token = await loginViaBrowser();
+          setApiKey(token);
+          console.log('API key saved');
+        }
+      )
+      .command(
+        'pull',
+        'Create or update .env with CUA_API_KEY (login if needed)',
+        () => {},
+        async (_argv: Record<string, unknown>) => {
+          const token = await ensureApiKeyInteractive();
+          const out = await writeEnvFile(process.cwd(), token);
+          console.log(`Wrote ${out}`);
+        }
+      )
+      .command(
+        'logout',
+        'Remove stored API key',
+        () => {},
+        async (_argv: Record<string, unknown>) => {
+          clearApiKey();
+          console.log('Logged out');
+        }
+      )
+      .demandCommand(1, 'Specify an auth subcommand')
+  );
+}
@@ -0,0 +1,294 @@
+import type { Argv } from 'yargs';
+import { ensureApiKeyInteractive } from '../auth';
+import { http } from '../http';
+import { printVmList, openInBrowser } from '../util';
+import { WEBSITE_URL } from '../config';
+import type { VmItem } from '../util';
+import { clearApiKey } from '../storage';
+
+export function registerVmCommands(y: Argv) {
+  return y.command('vm', 'VM commands', (yv) =>
+    yv
+      .command(
+        'list',
+        'List VMs',
+        () => {},
+        async (_argv: Record<string, unknown>) => {
+          const token = await ensureApiKeyInteractive();
+          const res = await http('/v1/vms', { token });
+          if (res.status === 401) {
+            clearApiKey();
+            console.error("Unauthorized. Try 'cua auth login' again.");
+            process.exit(1);
+          }
+          if (!res.ok) {
+            console.error(`Request failed: ${res.status}`);
+            process.exit(1);
+          }
+          const data = (await res.json()) as VmItem[];
+          printVmList(data);
+        }
+      )
+      .command(
+        'create',
+        'Create a new VM',
+        (y) =>
+          y
+            .option('os', {
+              type: 'string',
+              choices: ['linux', 'windows', 'macos'],
+              demandOption: true,
+              describe: 'Operating system',
+            })
+            .option('configuration', {
+              type: 'string',
+              choices: ['small', 'medium', 'large'],
+              demandOption: true,
+              describe: 'VM size configuration',
+            })
+            .option('region', {
+              type: 'string',
+              choices: ['north-america', 'europe', 'asia-pacific', 'south-america'],
+              demandOption: true,
+              describe: 'VM region',
+            }),
+        async (argv: Record<string, unknown>) => {
+          const token = await ensureApiKeyInteractive();
+          const { os, configuration, region } = argv as {
+            os: string;
+            configuration: string;
+            region: string;
+          };
+
+          const res = await http('/v1/vms', {
+            token,
+            method: 'POST',
+            body: { os, configuration, region },
+          });
+
+          if (res.status === 401) {
+            clearApiKey();
+            console.error("Unauthorized. Try 'cua auth login' again.");
+            process.exit(1);
+          }
+
+          if (res.status === 400) {
+            console.error('Invalid request or unsupported configuration');
+            process.exit(1);
+          }
+
+          if (res.status === 500) {
+            console.error('Internal server error');
+            process.exit(1);
+          }
+
+          if (res.status === 200) {
+            // VM ready immediately
+            const data = (await res.json()) as {
+              status: string;
+              name: string;
+              password: string;
+              host: string;
+            };
+            console.log(`VM created and ready: ${data.name}`);
+            console.log(`Password: ${data.password}`);
+            console.log(`Host: ${data.host}`);
+            return;
+          }
+
+          if (res.status === 202) {
+            // VM provisioning started
+            const data = (await res.json()) as { status: string; name: string; job_id: string };
+            console.log(`VM provisioning started: ${data.name}`);
+            console.log(`Job ID: ${data.job_id}`);
+            console.log("Use 'cua vm list' to monitor provisioning progress");
+            return;
+          }
+
+          console.error(`Unexpected status: ${res.status}`);
+          process.exit(1);
+        }
+      )
+      .command(
+        'delete <name>',
+        'Delete a VM',
+        (y) => y.positional('name', { type: 'string', describe: 'VM name' }),
+        async (argv: Record<string, unknown>) => {
+          const token = await ensureApiKeyInteractive();
+          const name = String((argv as any).name);
+          const res = await http(`/v1/vms/${encodeURIComponent(name)}`, {
+            token,
+            method: 'DELETE',
+          });
+
+          if (res.status === 202) {
+            const body = (await res.json().catch(() => ({}))) as { status?: string };
+            console.log(`VM deletion initiated: ${body.status ?? 'deleting'}`);
+            return;
+          }
+
+          if (res.status === 404) {
+            console.error('VM not found or not owned by you');
+            process.exit(1);
+          }
+
+          if (res.status === 401) {
+            clearApiKey();
+            console.error("Unauthorized. Try 'cua auth login' again.");
+            process.exit(1);
+          }
+
+          console.error(`Unexpected status: ${res.status}`);
+          process.exit(1);
+        }
+      )
+      .command(
+        'start <name>',
+        'Start a VM',
+        (y) => y.positional('name', { type: 'string', describe: 'VM name' }),
+        async (argv: Record<string, unknown>) => {
+          const token = await ensureApiKeyInteractive();
+          const name = String((argv as any).name);
+          const res = await http(`/v1/vms/${encodeURIComponent(name)}/start`, {
+            token,
+            method: 'POST',
+          });
+          if (res.status === 204) {
+            console.log('Start accepted');
+            return;
+          }
+          if (res.status === 404) {
+            console.error('VM not found');
+            process.exit(1);
+          }
+          if (res.status === 401) {
+            clearApiKey();
+            console.error("Unauthorized. Try 'cua auth login' again.");
+            process.exit(1);
+          }
+          console.error(`Unexpected status: ${res.status}`);
+          process.exit(1);
+        }
+      )
+      .command(
+        'stop <name>',
+        'Stop a VM',
+        (y) => y.positional('name', { type: 'string', describe: 'VM name' }),
+        async (argv: Record<string, unknown>) => {
+          const token = await ensureApiKeyInteractive();
+          const name = String((argv as any).name);
+          const res = await http(`/v1/vms/${encodeURIComponent(name)}/stop`, {
+            token,
+            method: 'POST',
+          });
+          if (res.status === 202) {
+            const body = (await res.json().catch(() => ({}))) as { status?: string };
+            console.log(body.status ?? 'stopping');
+            return;
+          }
+          if (res.status === 404) {
+            console.error('VM not found');
+            process.exit(1);
+          }
+          if (res.status === 401) {
+            clearApiKey();
+            console.error("Unauthorized. Try 'cua auth login' again.");
+            process.exit(1);
+          }
+          console.error(`Unexpected status: ${res.status}`);
+          process.exit(1);
+        }
+      )
+      .command(
+        'restart <name>',
+        'Restart a VM',
+        (y) => y.positional('name', { type: 'string', describe: 'VM name' }),
+        async (argv: Record<string, unknown>) => {
+          const token = await ensureApiKeyInteractive();
+          const name = String((argv as any).name);
+          const res = await http(`/v1/vms/${encodeURIComponent(name)}/restart`, {
+            token,
+            method: 'POST',
+          });
+          if (res.status === 202) {
+            const body = (await res.json().catch(() => ({}))) as { status?: string };
+            console.log(body.status ?? 'restarting');
+            return;
+          }
+          if (res.status === 404) {
+            console.error('VM not found');
+            process.exit(1);
+          }
+          if (res.status === 401) {
+            clearApiKey();
+            console.error("Unauthorized. Try 'cua auth login' again.");
+            process.exit(1);
+          }
+          console.error(`Unexpected status: ${res.status}`);
+          process.exit(1);
+        }
+      )
+      .command(
+        'vnc <name>',
+        'Open NoVNC for a VM in your browser',
+        (y) => y.positional('name', { type: 'string', describe: 'VM name' }),
+        async (argv: Record<string, unknown>) => {
+          const token = await ensureApiKeyInteractive();
+          const name = String((argv as any).name);
+          const listRes = await http('/v1/vms', { token });
+          if (listRes.status === 401) {
+            clearApiKey();
+            console.error("Unauthorized. Try 'cua auth login' again.");
+            process.exit(1);
+          }
+          if (!listRes.ok) {
+            console.error(`Request failed: ${listRes.status}`);
+            process.exit(1);
+          }
+          const vms = (await listRes.json()) as VmItem[];
+          const vm = vms.find((v) => v.name === name);
+          if (!vm) {
+            console.error('VM not found');
+            process.exit(1);
+          }
+          const host =
+            vm.host && vm.host.length ? vm.host : `${vm.name}.containers.cloud.trycua.com`;
+          const url = `https://${host}/vnc.html?autoconnect=true&password=${encodeURIComponent(vm.password)}`;
+          console.log(`Opening NoVNC: ${url}`);
+          await openInBrowser(url);
+        }
+      )
+      .command(
+        'chat <name>',
+        'Open CUA dashboard playground for a VM',
+        (y) => y.positional('name', { type: 'string', describe: 'VM name' }),
+        async (argv: Record<string, unknown>) => {
+          const token = await ensureApiKeyInteractive();
+          const name = String((argv as any).name);
+          const listRes = await http('/v1/vms', { token });
+          if (listRes.status === 401) {
+            clearApiKey();
+            console.error("Unauthorized. Try 'cua auth login' again.");
+            process.exit(1);
+          }
+          if (!listRes.ok) {
+            console.error(`Request failed: ${listRes.status}`);
+            process.exit(1);
+          }
+          const vms = (await listRes.json()) as VmItem[];
+          const vm = vms.find((v) => v.name === name);
+          if (!vm) {
+            console.error('VM not found');
+            process.exit(1);
+          }
+          const host =
+            vm.host && vm.host.length ? vm.host : `${vm.name}.containers.cloud.trycua.com`;
+          const base = WEBSITE_URL.replace(/\/$/, '');
+          const url = `${base}/dashboard/playground?host=${encodeURIComponent(host)}&id=${encodeURIComponent(vm.name)}&name=${encodeURIComponent(vm.name)}&vnc_password=${encodeURIComponent(vm.password)}&fullscreen=true`;
+          console.log(`Opening Playground: ${url}`);
+          await openInBrowser(url);
+        }
+      )
+      .demandCommand(1, 'Specify a vm subcommand')
+  );
+}
@@ -0,0 +1,17 @@
+export const WEBSITE_URL = Bun.env.CUA_WEBSITE_URL?.replace(/\/$/, '') || 'https://cua.ai';
+export const API_BASE = Bun.env.CUA_API_BASE?.replace(/\/$/, '') || 'https://api.cua.ai';
+export const AUTH_PAGE = `${WEBSITE_URL}/cli-auth`;
+export const CALLBACK_HOST = '127.0.0.1';
+
+export function getConfigDir(): string {
+  const home = Bun.env.HOME || Bun.env.USERPROFILE || '.';
+  const dir = `${home}/.cua`;
+  try {
+    Bun.spawnSync(['mkdir', '-p', dir]);
+  } catch {}
+  return dir;
+}
+
+export function getDbPath(): string {
+  return `${getConfigDir()}/cli.sqlite`;
+}
@@ -0,0 +1,15 @@
+import { API_BASE } from './config';
+
+export async function http(
+  path: string,
+  opts: { method?: string; token: string; body?: any }
+): Promise<Response> {
+  const url = `${API_BASE}${path}`;
+  const headers: Record<string, string> = { Authorization: `Bearer ${opts.token}` };
+  if (opts.body) headers['content-type'] = 'application/json';
+  return fetch(url, {
+    method: opts.method || 'GET',
+    headers,
+    body: opts.body ? JSON.stringify(opts.body) : undefined,
+  });
+}
@@ -0,0 +1,40 @@
+import { Database } from 'bun:sqlite';
+import { getDbPath } from './config';
+
+function getDb(): Database {
+  const db = new Database(getDbPath());
+  db.exec('PRAGMA journal_mode = WAL;');
+  db.exec('CREATE TABLE IF NOT EXISTS kv (k TEXT PRIMARY KEY, v TEXT NOT NULL);');
+  return db;
+}
+
+export function setApiKey(token: string) {
+  const db = getDb();
+  try {
+    const stmt = db.query(
+      "INSERT INTO kv (k, v) VALUES ('api_key', ?) ON CONFLICT(k) DO UPDATE SET v=excluded.v"
+    );
+    stmt.run(token);
+  } finally {
+    db.close();
+  }
+}
+
+export function getApiKey(): string | null {
+  const db = getDb();
+  try {
+    const row = db.query("SELECT v FROM kv WHERE k='api_key'").get() as { v: string } | undefined;
+    return row?.v ?? null;
+  } finally {
+    db.close();
+  }
+}
+
+export function clearApiKey() {
+  const db = getDb();
+  try {
+    db.query("DELETE FROM kv WHERE k='api_key'").run();
+  } finally {
+    db.close();
+  }
+}
@@ -0,0 +1,49 @@
+export async function writeEnvFile(cwd: string, key: string) {
+  const path = `${cwd}/.env`;
+  let content = '';
+  try {
+    content = await Bun.file(path).text();
+  } catch {}
+  const lines = content.split(/\r?\n/).filter(Boolean);
+  const idx = lines.findIndex((l) => l.startsWith('CUA_API_KEY='));
+  if (idx >= 0) lines[idx] = `CUA_API_KEY=${key}`;
+  else lines.push(`CUA_API_KEY=${key}`);
+  await Bun.write(path, lines.join('\n') + '\n');
+  return path;
+}
+
+export type VmStatus = 'pending' | 'running' | 'stopped' | 'terminated' | 'failed';
+export type VmItem = { name: string; password: string; status: VmStatus; host?: string };
+
+export function printVmList(items: VmItem[]) {
+  const rows: string[][] = [
+    ['NAME', 'STATUS', 'PASSWORD', 'HOST'],
+    ...items.map((v) => [v.name, String(v.status), v.password, v.host || '']),
+  ];
+  const widths: number[] = [0, 0, 0, 0];
+  for (const r of rows)
+    for (let i = 0; i < 4; i++) widths[i] = Math.max(widths[i] ?? 0, (r[i] ?? '').length);
+  for (const r of rows) console.log(r.map((c, i) => (c ?? '').padEnd(widths[i] ?? 0)).join('  '));
+  if (items.length === 0) console.log('No VMs found');
+}
+
+export async function openInBrowser(url: string) {
+  const platform = process.platform;
+  let cmd: string;
+  let args: string[] = [];
+  if (platform === 'darwin') {
+    cmd = 'open';
+    args = [url];
+  } else if (platform === 'win32') {
+    cmd = 'cmd';
+    args = ['/c', 'start', '', url];
+  } else {
+    cmd = 'xdg-open';
+    args = [url];
+  }
+  try {
+    await Bun.spawn({ cmd: [cmd, ...args] }).exited;
+  } catch {
+    console.error(`Failed to open browser. Please visit: ${url}`);
+  }
+}
@@ -0,0 +1,29 @@
+{
+  "compilerOptions": {
+    // Environment setup & latest features
+    "lib": ["ESNext"],
+    "target": "ESNext",
+    "module": "Preserve",
+    "moduleDetection": "force",
+    "jsx": "react-jsx",
+    "allowJs": true,
+
+    // Bundler mode
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "verbatimModuleSyntax": true,
+    "noEmit": true,
+
+    // Best practices
+    "strict": true,
+    "skipLibCheck": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUncheckedIndexedAccess": true,
+    "noImplicitOverride": true,
+
+    // Some stricter flags (disabled by default)
+    "noUnusedLocals": false,
+    "noUnusedParameters": false,
+    "noPropertyAccessFromIndexSignature": false
+  }
+}