Merge branch 'main' into feature/agent/uitars-mlx

2026-01-08 14:30:25 -06:00 · 2025-05-04 21:40:07 -04:00
parent 0304c45de5 40e36e21b5
commit 21abb93473
59 changed files with 2345 additions and 396 deletions
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -151,6 +151,15 @@
      "contributions": [
        "code"
      ]
+    },
+    {
+      "login": "FinnBorge",
+      "name": "FinnBorge",
+      "avatar_url": "https://avatars.githubusercontent.com/u/9272726?v=4",
+      "profile": "https://github.com/FinnBorge",
+      "contributions": [
+        "code"
+      ]
    }
  ]
 }
--- a/.gitignore
+++ b/.gitignore
@@ -15,7 +15,8 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
+lib/*
+!libs/lumier/src/lib/
 lib64/
 parts/
 sdist/
@@ -242,4 +243,7 @@ trajectories/
 .storage/

 # Gradio settings
-.gradio_settings.json
+.gradio_settings.json
+
+# Lumier Storage
+storage/
--- a/.vscode/lumier.code-workspace
+++ b/.vscode/lumier.code-workspace
@@ -0,0 +1,30 @@
+{
+    "folders": [
+        {
+            "name": "lumier",
+            "path": "../libs/lumier"
+        },
+        {
+            "name": "lume",
+            "path": "../libs/lume"
+        }
+    ],
+    "settings": {
+        "files.exclude": {
+            "**/.git": true,
+            "**/.svn": true,
+            "**/.hg": true,
+            "**/CVS": true,
+            "**/.DS_Store": true
+        }
+    },
+    "tasks": {
+        "version": "2.0.0",
+        "tasks": [
+        ]
+    },
+    "launch": {
+        "configurations": [
+        ]
+    }
+} 
--- a/README.md
+++ b/README.md
@@ -47,6 +47,13 @@ If you only need the virtualization capabilities:
 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
 ```

+Optionally, if you don't want Lume to run as a background service:
+```bash
+/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh) --no-background-service"
+```
+
+**Note:** If you choose this option, you'll need to manually start the Lume API service whenever needed by running `lume serve` in your terminal. This applies to Option 2 after completing step 1.
+
 For Lume usage instructions, refer to the [Lume documentation](./libs/lume/README.md).

 ### Option 2: Full Computer-Use Agent Capabilities
@@ -62,17 +69,12 @@ If you want to use AI agents with virtualized environments:
   lume pull macos-sequoia-cua:latest
   ```

-3. Start Lume daemon service:
-   ```bash
-   lume serve
-   ```
-
-4. Install the Python libraries:
+3. Install the Python libraries:
   ```bash
   pip install cua-computer cua-agent[all]
   ```

-5. Use the libraries in your Python code:
+4. Use the libraries in your Python code:
   ```python
   from computer import Computer
   from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
@@ -80,7 +82,7 @@ If you want to use AI agents with virtualized environments:
   async with Computer(verbosity=logging.DEBUG) as macos_computer:
     agent = ComputerAgent(
         computer=macos_computer,
-         loop=AgentLoop.OPENAI, # or AgentLoop.UITARS, AgentLoop.OMNI, or AgentLoop.ANTHROPIC
+         loop=AgentLoop.OPENAI, # or AgentLoop.UITARS, AgentLoop.OMNI, or AgentLoop.UITARS, or AgentLoop.ANTHROPIC
         model=LLM(provider=LLMProvider.OPENAI) # or LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit")
     )

@@ -95,7 +97,7 @@ If you want to use AI agents with virtualized environments:
   
   Explore the [Agent Notebook](./notebooks/) for a ready-to-run example.

-6. Optionally, you can use the Agent with a Gradio UI:
+5. Optionally, you can use the Agent with a Gradio UI:

   ```python
   from utils import load_dotenv_files
@@ -228,6 +230,7 @@ Apple, macOS, and Apple Silicon are trademarks of Apple Inc. Ubuntu and Canonica
    <tr>
      <td align="center" valign="top" width="14.28%"><a href="https://www.encona.com/"><img src="https://avatars.githubusercontent.com/u/891558?v=4?s=100" width="100px;" alt="Rahim Nathwani"/><br /><sub><b>Rahim Nathwani</b></sub></a><br /><a href="#code-rahimnathwani" title="Code">💻</a></td>
      <td align="center" valign="top" width="14.28%"><a href="https://mjspeck.github.io/"><img src="https://avatars.githubusercontent.com/u/20689127?v=4?s=100" width="100px;" alt="Matt Speck"/><br /><sub><b>Matt Speck</b></sub></a><br /><a href="#code-mjspeck" title="Code">💻</a></td>
+      <td align="center" valign="top" width="14.28%"><a href="https://github.com/FinnBorge"><img src="https://avatars.githubusercontent.com/u/9272726?v=4?s=100" width="100px;" alt="FinnBorge"/><br /><sub><b>FinnBorge</b></sub></a><br /><a href="#code-FinnBorge" title="Code">💻</a></td>
    </tr>
  </tbody>
 </table>
--- a/libs/agent/README.md
+++ b/libs/agent/README.md
@@ -50,10 +50,10 @@ async with Computer() as macos_computer:
      # model=LLM(provider=LLMProvider.ANTHROPIC)
      # or
      # loop=AgentLoop.OMNI,
-      # model=LLM(provider=LLMProvider.OLLAMA, model="gemma3")
+      # model=LLM(provider=LLMProvider.OLLAMA, name="gemma3")
      # or
      # loop=AgentLoop.UITARS,
-      # model=LLM(provider=LLMProvider.OAICOMPAT, model="tgi", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1")
+      # model=LLM(provider=LLMProvider.OAICOMPAT, name="ByteDance-Seed/UI-TARS-1.5-7B", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1")
  )

  tasks = [
--- a/libs/agent/agent/providers/anthropic/loop.py
+++ b/libs/agent/agent/providers/anthropic/loop.py
@@ -279,6 +279,8 @@ class AnthropicLoop(BaseLoop):
                    messages,
                    model=self.model,
                )
+                # Log standardized response for ease of parsing
+                self._log_api_call("agent_response", request=None, response=openai_compatible_response)
                await queue.put(openai_compatible_response)

                if not should_continue:
--- a/libs/agent/agent/providers/anthropic/tools/computer.py
+++ b/libs/agent/agent/providers/anthropic/tools/computer.py
@@ -161,15 +161,17 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
                    self.logger.info(f"Moving cursor to ({x}, {y})")
                    await self.computer.interface.move_cursor(x, y)
                elif action == "left_click_drag":
-                    self.logger.info(f"Dragging from ({x}, {y})")
-                    # First move to the position
-                    await self.computer.interface.move_cursor(x, y)
-                    # Then perform drag operation - check if drag_to exists or we need to use other methods
-                    try:
-                        await self.computer.interface.drag_to(x, y)
-                    except Exception as e:
-                        self.logger.error(f"Error during drag operation: {str(e)}")
-                        raise ToolError(f"Failed to perform drag: {str(e)}")
+                    # Get the start coordinate from kwargs
+                    start_coordinate = kwargs.get("start_coordinate")
+                    if not start_coordinate:
+                        raise ToolError("start_coordinate is required for left_click_drag action")
+                    
+                    start_x, start_y = start_coordinate
+                    end_x, end_y = x, y
+                    
+                    self.logger.info(f"Dragging from ({start_x}, {start_y}) to ({end_x}, {end_y})")
+                    await self.computer.interface.move_cursor(start_x, start_y)
+                    await self.computer.interface.drag_to(end_x, end_y)

                # Wait briefly for any UI changes
                await asyncio.sleep(0.5)
--- a/libs/agent/agent/providers/omni/loop.py
+++ b/libs/agent/agent/providers/omni/loop.py
@@ -670,6 +670,8 @@ class OmniLoop(BaseLoop):
                    parsed_screen=parsed_screen,
                    parser=self.parser
                )
+                # Log standardized response for ease of parsing
+                self._log_api_call("agent_response", request=None, response=openai_compatible_response)

                # Yield the response to the caller
                yield openai_compatible_response
--- a/libs/agent/agent/providers/openai/loop.py
+++ b/libs/agent/agent/providers/openai/loop.py
@@ -276,6 +276,10 @@ class OpenAILoop(BaseLoop):
                    )
                    # Don't reset last_response_id to None - keep the previous value if available

+
+                # Log standardized response for ease of parsing
+                # Since this is the openAI responses format, we don't need to convert it to agent response format
+                self._log_api_call("agent_response", request=None, response=response)
                # Process API response
                await queue.put(response)

--- a/libs/agent/agent/providers/openai/tools/computer.py
+++ b/libs/agent/agent/providers/openai/tools/computer.py
@@ -44,6 +44,7 @@ Action = Literal[
    "double_click",
    "screenshot",
    "scroll",
+    "drag",
 ]


@@ -162,9 +163,14 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
                y = kwargs.get("y")
                if x is None or y is None:
                    raise ToolError("x and y coordinates are required for scroll action")
-                scroll_x = kwargs.get("scroll_x", 0) // 20
-                scroll_y = kwargs.get("scroll_y", 0) // 20
+                scroll_x = kwargs.get("scroll_x", 0) // 50
+                scroll_y = kwargs.get("scroll_y", 0) // 50
                return await self.handle_scroll(x, y, scroll_x, scroll_y)
+            elif type == "drag":
+                path = kwargs.get("path")
+                if not path or not isinstance(path, list) or len(path) < 2:
+                    raise ToolError("path is required for drag action and must contain at least 2 points")
+                return await self.handle_drag(path)
            elif type == "screenshot":
                return await self.screenshot()
            elif type == "wait":
@@ -240,11 +246,7 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):

            if len(mapped_keys) > 1:
                # For key combinations (like Ctrl+C)
-                for k in mapped_keys:
-                    await self.computer.interface.press_key(k)
-                await asyncio.sleep(0.1)
-                for k in reversed(mapped_keys):
-                    await self.computer.interface.press_key(k)
+                await self.computer.interface.hotkey(*mapped_keys)
            else:
                # Single key press
                await self.computer.interface.press_key(mapped_keys[0])
@@ -306,6 +308,41 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
            self.logger.error(f"Error in handle_scroll: {str(e)}")
            raise ToolError(f"Failed to scroll at ({x}, {y}): {str(e)}")

+    async def handle_drag(self, path: List[Dict[str, int]]) -> ToolResult:
+        """Handle mouse drag operation using a path of coordinates.
+
+        Args:
+            path: List of coordinate points {"x": int, "y": int} defining the drag path
+
+        Returns:
+            ToolResult with the operation result and screenshot
+        """
+        try:
+            # Convert from [{"x": x, "y": y}, ...] format to [(x, y), ...] format
+            points = [(p["x"], p["y"]) for p in path]
+            
+            # Perform drag action
+            if len(points) == 2:
+                await self.computer.interface.move_cursor(points[0][0], points[0][1])
+                await self.computer.interface.drag_to(points[1][0], points[1][1])
+            else:
+                await self.computer.interface.drag(points, button="left")
+            
+            # Wait for UI to update
+            await asyncio.sleep(0.5)
+            
+            # Take screenshot after action
+            screenshot = await self.computer.interface.screenshot()
+            base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
+            
+            return ToolResult(
+                output=f"Dragged from ({path[0]['x']}, {path[0]['y']}) to ({path[-1]['x']}, {path[-1]['y']})",
+                base64_image=base64_screenshot,
+            )
+        except Exception as e:
+            self.logger.error(f"Error in handle_drag: {str(e)}")
+            raise ToolError(f"Failed to perform drag operation: {str(e)}")
+
    async def screenshot(self) -> ToolResult:
        """Take a screenshot."""
        try:
--- a/libs/agent/agent/providers/uitars/clients/oaicompat.py
+++ b/libs/agent/agent/providers/uitars/clients/oaicompat.py
@@ -94,8 +94,15 @@ class OAICompatClient(BaseUITarsClient):
        """
        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}

-        final_messages = [{"role": "system", "content": system}]
-
+        final_messages = [
+            {
+                "role": "system", 
+                "content": [
+                    { "type": "text", "text": system }
+                ]
+            }
+        ]
+        
        # Process messages
        for item in messages:
            if isinstance(item, dict):
@@ -138,8 +145,13 @@ class OAICompatClient(BaseUITarsClient):
                    message = {"role": "user", "content": [{"type": "text", "text": item}]}
                final_messages.append(message)

-        payload = {"model": self.model, "messages": final_messages, "temperature": self.temperature}
-        payload["max_tokens"] = max_tokens or self.max_tokens
+        payload = {
+            "model": self.model, 
+            "messages": final_messages, 
+            "max_tokens": max_tokens or self.max_tokens,
+            "temperature": self.temperature,
+            "top_p": 0.7,
+        }

        try:
            async with aiohttp.ClientSession() as session:
@@ -178,25 +190,21 @@ class OAICompatClient(BaseUITarsClient):
                    response_text = await response.text()
                    logger.debug(f"Response content: {response_text}")
                    
+                    # if 503, then the endpoint is still warming up
+                    if response.status == 503:
+                        logger.error(f"Endpoint is still warming up, please try again later")
+                        raise Exception(f"Endpoint is still warming up: {response_text}")
+                    
                    # Try to parse as JSON if the content type is appropriate
                    if "application/json" in response.headers.get('Content-Type', ''):
                        response_json = await response.json()
                    else:
                        raise Exception(f"Response is not JSON format")
-                        # # Optionally try to parse it anyway
-                        # try:
-                        #     import json
-                        #     response_json = json.loads(response_text)
-                        # except json.JSONDecodeError as e:
-                        #     print(f"Failed to parse response as JSON: {e}")

                    if response.status != 200:
-                        error_msg = response_json.get("error", {}).get(
-                            "message", str(response_json)
-                        )
-                        logger.error(f"Error in API call: {error_msg}")
-                        raise Exception(f"API error: {error_msg}")
-
+                        logger.error(f"Error in API call: {response_text}")
+                        raise Exception(f"API error: {response_text}")
+                    
                    return response_json

        except Exception as e:
--- a/libs/agent/agent/providers/uitars/loop.py
+++ b/libs/agent/agent/providers/uitars/loop.py
@@ -17,10 +17,10 @@ from ...core.types import AgentResponse, LLMProvider
 from ...core.visualization import VisualizationHelper
 from computer import Computer

-from .utils import add_box_token, parse_actions, parse_action_parameters
+from .utils import add_box_token, parse_actions, parse_action_parameters, to_agent_response_format
 from .tools.manager import ToolManager
 from .tools.computer import ToolResult
-from .prompts import COMPUTER_USE, SYSTEM_PROMPT
+from .prompts import COMPUTER_USE, SYSTEM_PROMPT, MAC_SPECIFIC_NOTES

 from .clients.oaicompat import OAICompatClient
 from .clients.mlxvlm import MLXVLMUITarsClient
@@ -197,7 +197,7 @@ class UITARSLoop(BaseLoop):
        if first_user_idx is not None and instruction:
            # Create the computer use prompt
            user_prompt = COMPUTER_USE.format(
-                instruction=instruction,
+                instruction='\n'.join([instruction, MAC_SPECIFIC_NOTES]),
                language="English"
            )
            
@@ -453,7 +453,7 @@ class UITARSLoop(BaseLoop):
    # MAIN LOOP - IMPLEMENTING ABSTRACT METHOD
    ###########################################

-    async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[Dict[str, Any], None]:
+    async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
        """Run the agent loop with provided messages.

        Args:
@@ -520,41 +520,16 @@ class UITARSLoop(BaseLoop):

                # Update whether an action screenshot was saved this turn
                action_screenshot_saved = action_screenshot_saved or new_screenshot_saved
-
-                # Parse actions from the raw response
-                raw_response = response["choices"][0]["message"]["content"]
-                parsed_actions = parse_actions(raw_response)
                
-                # Extract thought content if available
-                thought = ""
-                if "Thought:" in raw_response:
-                    thought_match = re.search(r"Thought: (.*?)(?=\s*Action:|$)", raw_response, re.DOTALL)
-                    if thought_match:
-                        thought = thought_match.group(1).strip()
+                agent_response = await to_agent_response_format(
+                    response,
+                    messages,
+                    model=self.model,
+                )
+                # Log standardized response for ease of parsing
+                self._log_api_call("agent_response", request=None, response=agent_response)
+                yield agent_response
                
-                # Create standardized thought response format
-                thought_response = {
-                    "role": "assistant",
-                    "content": thought or raw_response,
-                    "metadata": {
-                        "title": "🧠 UI-TARS Thoughts"
-                    }
-                }
-                
-                # Create action response format
-                action_response = {
-                    "role": "assistant",
-                    "content": str(parsed_actions),
-                    "metadata": {
-                        "title": "🖱️ UI-TARS Actions",
-                    }
-                }
-
-                # Yield both responses to the caller (thoughts first, then actions)
-                yield thought_response
-                if parsed_actions:
-                    yield action_response
-
                # Check if we should continue this conversation
                running = should_continue

@@ -575,7 +550,8 @@ class UITARSLoop(BaseLoop):
                    logger.error(f"Maximum retry attempts reached. Last error was: {str(e)}")

                yield {
-                    "error": str(e),
+                    "role": "assistant",
+                    "content": f"Error: {str(e)}",
                    "metadata": {"title": "❌ Error"},
                }

--- a/libs/agent/agent/providers/uitars/prompts.py
+++ b/libs/agent/agent/providers/uitars/prompts.py
@@ -1,5 +1,9 @@
 """Prompts for UI-TARS agent."""

+MAC_SPECIFIC_NOTES = """
+(You are operating on macOS, use 'cmd' instead of 'ctrl' for most shortcuts e.g., hotkey(key='cmd c') for copy, hotkey(key='cmd v') for paste, hotkey(key='cmd t') for new tab).)
+"""
+
 SYSTEM_PROMPT = "You are a helpful assistant."

 COMPUTER_USE = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
@@ -56,4 +60,4 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par

 ## User Instruction
 {instruction}
-""" 
+"""
--- a/libs/agent/agent/providers/uitars/tools/computer.py
+++ b/libs/agent/agent/providers/uitars/tools/computer.py
@@ -173,9 +173,13 @@ class ComputerTool(BaseComputerTool):
            elif action == "hotkey":
                if "keys" in kwargs:
                    keys = kwargs["keys"]
-                    for key in keys:
-                        await self.computer.interface.press_key(key)
                    
+                    if len(keys) > 1:
+                        await self.computer.interface.hotkey(*keys)
+                    else:
+                        # Single key press
+                        await self.computer.interface.press_key(keys[0])
+                        
                    # Wait for UI to update
                    await asyncio.sleep(0.3)
                    
--- a/libs/agent/agent/providers/uitars/utils.py
+++ b/libs/agent/agent/providers/uitars/utils.py
@@ -4,9 +4,114 @@ import logging
 import base64
 import re
 from typing import Any, Dict, List, Optional, Union, Tuple
+from datetime import datetime

 logger = logging.getLogger(__name__)

+from ...core.types import AgentResponse
+
+async def to_agent_response_format(
+    response: Dict[str, Any],
+    messages: List[Dict[str, Any]],
+    model: Optional[str] = None,
+) -> AgentResponse:
+    """Convert raw UI-TARS response to agent response format.
+    
+    Args:
+        response: Raw UI-TARS response
+        messages: List of messages in standard format
+        model: Optional model name
+    
+    Returns:
+        AgentResponse: Standardized agent response format
+    """
+    # Create unique IDs for this response
+    response_id = f"resp_{datetime.now().strftime('%Y%m%d%H%M%S')}_{id(response)}"
+    reasoning_id = f"rs_{response_id}"
+    action_id = f"cu_{response_id}"
+    call_id = f"call_{response_id}"
+
+    # Parse actions from the raw response
+    content = response["choices"][0]["message"]["content"]
+    actions = parse_actions(content)
+    
+    # Extract thought content if available
+    reasoning_text = ""
+    if "Thought:" in content:
+        thought_match = re.search(r"Thought: (.*?)(?=\s*Action:|$)", content, re.DOTALL)
+        if thought_match:
+            reasoning_text = thought_match.group(1).strip()
+    
+    # Create output items
+    output_items = []
+    if reasoning_text:
+        output_items.append({
+            "type": "reasoning",
+            "id": reasoning_id,
+            "text": reasoning_text
+        })
+    if actions:
+        for i, action in enumerate(actions):
+            action_name, tool_args = parse_action_parameters(action)
+            if action_name == "finished":
+                output_items.append({
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [{
+                        "type": "output_text",
+                        "text": tool_args["content"]
+                    }],
+                    "id": f"action_{i}_{action_id}",
+                    "status": "completed"
+                })
+            else:
+                if tool_args.get("action") == action_name:
+                    del tool_args["action"]
+                output_items.append({
+                    "type": "computer_call",
+                    "id": f"{action}_{i}_{action_id}",
+                    "call_id": f"call_{i}_{action_id}",
+                    "action": { "type": action_name, **tool_args },
+                    "pending_safety_checks": [],
+                    "status": "completed"
+                })
+    
+    # Create agent response
+    agent_response = AgentResponse(
+        id=response_id,
+        object="response",
+        created_at=int(datetime.now().timestamp()),
+        status="completed",
+        error=None,
+        incomplete_details=None,
+        instructions=None,
+        max_output_tokens=None,
+        model=model or response["model"],
+        output=output_items,
+        parallel_tool_calls=True,
+        previous_response_id=None,
+        reasoning={"effort": "medium"},
+        store=True,
+        temperature=0.0,
+        top_p=0.7,
+        text={"format": {"type": "text"}},
+        tool_choice="auto",
+        tools=[
+            {
+                "type": "computer_use_preview",
+                "display_height": 768,
+                "display_width": 1024,
+                "environment": "mac",
+            }
+        ],
+        truncation="auto",
+        usage=response["usage"],
+        user=None,
+        metadata={},
+        response=response
+    )
+    return agent_response
+

 def add_box_token(input_string: str) -> str:
    """Add box tokens to the coordinates in the model response.
@@ -74,7 +179,13 @@ def parse_action_parameters(action: str) -> Tuple[str, Dict[str, Any]]:
    """
    # Handle "finished" action
    if action.startswith("finished"):
-        return "finished", {}
+        # Parse content if it exists
+        content_match = re.search(r"content='([^']*)'", action)
+        if content_match:
+            content = content_match.group(1)
+            return "finished", {"content": content}
+        else:
+            return "finished", {}
    
    # Parse action parameters
    action_match = re.match(r'(\w+)\((.*)\)', action)
--- a/libs/agent/agent/ui/gradio/app.py
+++ b/libs/agent/agent/ui/gradio/app.py
@@ -35,6 +35,7 @@ from pathlib import Path
 from typing import Dict, List, Optional, AsyncGenerator, Any, Tuple, Union
 import gradio as gr
 from gradio.components.chatbot import MetadataDict
+from typing import cast

 # Import from agent package
 from agent.core.types import AgentResponse
@@ -332,63 +333,6 @@ def get_ollama_models() -> List[str]:
        logging.error(f"Error getting Ollama models: {e}")
        return []

-
-def extract_synthesized_text(
-    result: Union[AgentResponse, Dict[str, Any]],
-) -> Tuple[str, MetadataDict]:
-    """Extract synthesized text from the agent result."""
-    synthesized_text = ""
-    metadata = MetadataDict()
-
-    if "output" in result and result["output"]:
-        for output in result["output"]:
-            if output.get("type") == "reasoning":
-                metadata["title"] = "🧠 Reasoning"
-                content = output.get("content", "")
-                if content:
-                    synthesized_text += f"{content}\n"
-            elif output.get("type") == "message":
-                # Handle message type outputs - can contain rich content
-                content = output.get("content", [])
-
-                # Content is usually an array of content blocks
-                if isinstance(content, list):
-                    for block in content:
-                        if isinstance(block, dict) and block.get("type") == "output_text":
-                            text_value = block.get("text", "")
-                            if text_value:
-                                synthesized_text += f"{text_value}\n"
-
-            elif output.get("type") == "computer_call":
-                action = output.get("action", {})
-                action_type = action.get("type", "")
-
-                # Create a descriptive text about the action
-                if action_type == "click":
-                    button = action.get("button", "")
-                    x = action.get("x", "")
-                    y = action.get("y", "")
-                    synthesized_text += f"Clicked {button} at position ({x}, {y}).\n"
-                elif action_type == "type":
-                    text = action.get("text", "")
-                    synthesized_text += f"Typed: {text}.\n"
-                elif action_type == "keypress":
-                    # Extract key correctly from either keys array or key field
-                    if isinstance(action.get("keys"), list):
-                        key = ", ".join(action.get("keys"))
-                    else:
-                        key = action.get("key", "")
-
-                    synthesized_text += f"Pressed key: {key}\n"
-                else:
-                    synthesized_text += f"Performed {action_type} action.\n"
-
-                metadata["status"] = "done"
-                metadata["title"] = f"🛠️ {synthesized_text.strip().splitlines()[-1]}"
-
-    return synthesized_text.strip(), metadata
-
-
 def create_computer_instance(verbosity: int = logging.INFO) -> Computer:
    """Create or get the global Computer instance."""
    global global_computer
@@ -457,66 +401,6 @@ def create_agent(

    return global_agent

-
-def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> Tuple[str, MetadataDict]:
-    """Process agent results for the Gradio UI."""
-    # Extract text content
-    text_obj = result.get("text", {})
-    metadata = result.get("metadata", {})
-
-    # Create a properly typed MetadataDict
-    metadata_dict = MetadataDict()
-    metadata_dict["title"] = metadata.get("title", "")
-    metadata_dict["status"] = "done"
-    metadata = metadata_dict
-
-    # For OpenAI's Computer-Use Agent, text field is an object with format property
-    if (
-        text_obj
-        and isinstance(text_obj, dict)
-        and "format" in text_obj
-        and not text_obj.get("value", "")
-    ):
-        content, metadata = extract_synthesized_text(result)
-    else:
-        if not text_obj:
-            text_obj = result
-
-        # For other types of results, try to get text directly
-        if isinstance(text_obj, dict):
-            if "value" in text_obj:
-                content = text_obj["value"]
-            elif "text" in text_obj:
-                content = text_obj["text"]
-            elif "content" in text_obj:
-                content = text_obj["content"]
-            else:
-                content = ""
-        else:
-            content = str(text_obj) if text_obj else ""
-
-    # If still no content but we have outputs, create a summary
-    if not content and "output" in result and result["output"]:
-        output = result["output"]
-        for out in output:
-            if out.get("type") == "reasoning":
-                content = out.get("content", "")
-                if content:
-                    break
-            elif out.get("type") == "computer_call":
-                action = out.get("action", {})
-                action_type = action.get("type", "")
-                if action_type:
-                    content = f"Performing action: {action_type}"
-                    break
-
-    # Clean up the text - ensure content is a string
-    if not isinstance(content, str):
-        content = str(content) if content else ""
-
-    return content, metadata
-
-
 def create_gradio_ui(
    provider_name: str = "openai",
    model_name: str = "gpt-4o",
@@ -921,17 +805,64 @@ def create_gradio_ui(

                        # Stream responses from the agent
                        async for result in global_agent.run(last_user_message):
-                            # Process result
-                            content, metadata = process_agent_result(result)
-
-                            # Skip empty content
-                            if content or metadata.get("title"):
-                                history.append(
-                                    gr.ChatMessage(
-                                        role="assistant", content=content, metadata=metadata
+                            print(f"DEBUG - Agent response ------- START")
+                            from pprint import pprint
+                            pprint(result)
+                            print(f"DEBUG - Agent response ------- END")
+                            
+                            def generate_gradio_messages():
+                                if result.get("content"):
+                                    yield gr.ChatMessage(
+                                        role="assistant",
+                                        content=result.get("content", ""),
+                                        metadata=cast(MetadataDict, result.get("metadata", {}))
                                    )
-                                )
-                            yield history
+                                else:
+                                    outputs = result.get("output", [])
+                                    for output in outputs:
+                                        if output.get("type") == "message":
+                                            content = output.get("content", [])
+                                            for content_part in content:
+                                                if content_part.get("text"):
+                                                    yield gr.ChatMessage(
+                                                        role=output.get("role", "assistant"),
+                                                        content=content_part.get("text", ""),
+                                                        metadata=content_part.get("metadata", {})
+                                                    )
+                                        elif output.get("type") == "reasoning":
+                                            # if it's openAI, we only have access to a summary of the reasoning
+                                            summary_content = output.get("summary", [])
+                                            if summary_content:
+                                                for summary_part in summary_content:
+                                                    if summary_part.get("type") == "summary_text":
+                                                        yield gr.ChatMessage(
+                                                            role="assistant",
+                                                            content=summary_part.get("text", "")
+                                                        )
+                                            else:
+                                                summary_content = output.get("text", "")
+                                                if summary_content:
+                                                    yield gr.ChatMessage(
+                                                        role="assistant",
+                                                        content=summary_content,
+                                                    )
+                                        elif output.get("type") == "computer_call":
+                                            action = output.get("action", {})
+                                            action_type = action.get("type", "")
+                                            if action_type:
+                                                action_title = f"🛠️ Performing {action_type}"
+                                                if action.get("x") and action.get("y"):
+                                                    action_title += f" at ({action['x']}, {action['y']})"
+                                                yield gr.ChatMessage(
+                                                    role="assistant",
+                                                    content=f"```json\n{json.dumps(action)}\n```",
+                                                    metadata={"title": action_title}
+                                                )
+                            
+                            for message in generate_gradio_messages():
+                                history.append(message)
+                                yield history
+                            
                    except Exception as e:
                        import traceback

--- a/libs/computer-server/computer_server/handlers/base.py
+++ b/libs/computer-server/computer_server/handlers/base.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Optional, Dict, Any
+from typing import Optional, Dict, Any, List, Tuple

 class BaseAccessibilityHandler(ABC):
    """Abstract base class for OS-specific accessibility handlers."""
@@ -59,6 +59,17 @@ class BaseAutomationHandler(ABC):
            duration: How long the drag should take in seconds
        """
        pass
+    
+    @abstractmethod
+    async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> Dict[str, Any]:
+        """Drag the cursor from current position to specified coordinates.
+        
+        Args:
+            path: A list of tuples of x and y coordinates to drag to
+            button: The mouse button to use ('left', 'middle', 'right')
+            duration: How long the drag should take in seconds
+        """
+        pass

    # Keyboard Actions
    @abstractmethod
--- a/libs/computer-server/computer_server/handlers/macos.py
+++ b/libs/computer-server/computer_server/handlers/macos.py
@@ -1,7 +1,7 @@
 import pyautogui
 import base64
 from io import BytesIO
-from typing import Optional, Dict, Any, List
+from typing import Optional, Dict, Any, List, Tuple
 from ctypes import byref, c_void_p, POINTER
 from AppKit import NSWorkspace  # type: ignore
 import AppKit
@@ -563,6 +563,39 @@ class MacOSAutomationHandler(BaseAutomationHandler):
        except Exception as e:
            return {"success": False, "error": str(e)}

+    async def drag(
+        self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5
+    ) -> Dict[str, Any]:
+        try:
+            if not path or len(path) < 2:
+                return {"success": False, "error": "Path must contain at least 2 points"}
+            
+            # Move to the first point
+            start_x, start_y = path[0]
+            pyautogui.moveTo(start_x, start_y)
+            
+            # Press the mouse button
+            pyautogui.mouseDown(button=button)
+            
+            # Calculate time between points to distribute duration evenly
+            step_duration = duration / (len(path) - 1) if len(path) > 1 else duration
+            
+            # Move through each subsequent point
+            for x, y in path[1:]:
+                pyautogui.moveTo(x, y, duration=step_duration)
+            
+            # Release the mouse button
+            pyautogui.mouseUp(button=button)
+            
+            return {"success": True}
+        except Exception as e:
+            # Make sure to release the mouse button if an error occurs
+            try:
+                pyautogui.mouseUp(button=button)
+            except:
+                pass
+            return {"success": False, "error": str(e)}
+
    # Keyboard Actions
    async def type_text(self, text: str) -> Dict[str, Any]:
        try:
--- a/libs/computer-server/computer_server/main.py
+++ b/libs/computer-server/computer_server/main.py
@@ -65,6 +65,7 @@ async def websocket_endpoint(websocket: WebSocket):
        "type_text": manager.automation_handler.type_text,
        "press_key": manager.automation_handler.press_key,
        "drag_to": manager.automation_handler.drag_to,
+        "drag": manager.automation_handler.drag,
        "hotkey": manager.automation_handler.hotkey,
        "get_cursor_position": manager.automation_handler.get_cursor_position,
        "get_screen_size": manager.automation_handler.get_screen_size,
--- a/libs/computer/computer/computer.py
+++ b/libs/computer/computer/computer.py
@@ -29,7 +29,7 @@ class Computer:
        display: Union[Display, Dict[str, int], str] = "1024x768",
        memory: str = "8GB",
        cpu: str = "4",
-        os: OSType = "macos",
+        os_type: OSType = "macos",
        name: str = "",
        image: str = "macos-sequoia-cua:latest",
        shared_directories: Optional[List[str]] = None,
@@ -68,6 +68,7 @@ class Computer:
        self.image = image
        self.port = port
        self.host = host
+        self.os_type = os_type

        # Store telemetry preference
        self._telemetry_enabled = telemetry_enabled
@@ -129,8 +130,8 @@ class Computer:
        self.shared_paths = []
        if shared_directories:
            for path in shared_directories:
-                abs_path = os.path.abspath(os.path.expanduser(path))  # type: ignore[attr-defined]
-                if not os.path.exists(abs_path):  # type: ignore[attr-defined]
+                abs_path = os.path.abspath(os.path.expanduser(path))
+                if not os.path.exists(abs_path):
                    raise ValueError(f"Shared directory does not exist: {path}")
                self.shared_paths.append(abs_path)
        self._pylume_context = None
@@ -188,7 +189,7 @@ class Computer:
                self._interface = cast(
                    BaseComputerInterface,
                    InterfaceFactory.create_interface_for_os(
-                        os=self.os, ip_address=ip_address  # type: ignore[arg-type]
+                        os=self.os_type, ip_address=ip_address  # type: ignore[arg-type]
                    ),
                )

@@ -288,13 +289,13 @@ class Computer:

        try:
            # Initialize the interface using the factory with the specified OS
-            self.logger.info(f"Initializing interface for {self.os} at {ip_address}")
+            self.logger.info(f"Initializing interface for {self.os_type} at {ip_address}")
            from .interface.base import BaseComputerInterface

            self._interface = cast(
                BaseComputerInterface,
                InterfaceFactory.create_interface_for_os(
-                    os=self.os, ip_address=ip_address  # type: ignore[arg-type]
+                    os=self.os_type, ip_address=ip_address  # type: ignore[arg-type]
                ),
            )

--- a/libs/computer/computer/interface/base.py
+++ b/libs/computer/computer/interface/base.py
@@ -79,6 +79,17 @@ class BaseComputerInterface(ABC):
        """
        pass

+    @abstractmethod
+    async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
+        """Drag the cursor along a path of coordinates.
+
+        Args:
+            path: List of (x, y) coordinate tuples defining the drag path
+            button: The mouse button to use ('left', 'middle', 'right')
+            duration: Total time in seconds that the drag operation should take
+        """
+        pass
+
    # Keyboard Actions
    @abstractmethod
    async def type_text(self, text: str) -> None:
--- a/libs/computer/computer/interface/macos.py
+++ b/libs/computer/computer/interface/macos.py
@@ -328,6 +328,11 @@ class MacOSComputerInterface(BaseComputerInterface):
            "drag_to", {"x": x, "y": y, "button": button, "duration": duration}
        )

+    async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
+        await self._send_command(
+            "drag", {"path": path, "button": button, "duration": duration}
+        )
+
    # Keyboard Actions
    async def type_text(self, text: str) -> None:
        await self._send_command("type_text", {"text": text})
--- a/libs/computer/computer/interface/models.py
+++ b/libs/computer/computer/interface/models.py
@@ -7,6 +7,9 @@ NavigationKey = Literal['pagedown', 'pageup', 'home', 'end', 'left', 'right', 'u
 # Special key literals
 SpecialKey = Literal['enter', 'esc', 'tab', 'space', 'backspace', 'del']

+# Modifier key literals
+ModifierKey = Literal['ctrl', 'alt', 'shift', 'win', 'command', 'option']
+
 # Function key literals
 FunctionKey = Literal['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12']

@@ -35,6 +38,14 @@ class Key(Enum):
    BACKSPACE = 'backspace'
    DELETE = 'del'
    
+    # Modifier keys
+    ALT = 'alt'
+    CTRL = 'ctrl'
+    SHIFT = 'shift'
+    WIN = 'win'
+    COMMAND = 'command'
+    OPTION = 'option'
+    
    # Function keys
    F1 = 'f1'
    F2 = 'f2'
@@ -73,14 +84,27 @@ class Key(Enum):
            'escape': cls.ESCAPE,
            'esc': cls.ESC,
            'delete': cls.DELETE,
-            'del': cls.DELETE
+            'del': cls.DELETE,
+            # Modifier key mappings
+            'alt': cls.ALT,
+            'ctrl': cls.CTRL,
+            'control': cls.CTRL,
+            'shift': cls.SHIFT,
+            'win': cls.WIN,
+            'windows': cls.WIN,
+            'super': cls.WIN,
+            'command': cls.COMMAND,
+            'cmd': cls.COMMAND,
+            '⌘': cls.COMMAND,
+            'option': cls.OPTION,
+            '⌥': cls.OPTION,
        }
        
        normalized = key.lower().strip()
        return key_mapping.get(normalized, key)

 # Combined key type
-KeyType = Union[Key, NavigationKey, SpecialKey, FunctionKey, str]
+KeyType = Union[Key, NavigationKey, SpecialKey, ModifierKey, FunctionKey, str]

 class AccessibilityWindow(TypedDict):
    """Information about a window in the accessibility tree."""
--- a/libs/lume/README.md
+++ b/libs/lume/README.md
@@ -147,6 +147,14 @@ Install with a single command:
 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
 ```

+By default, Lume is installed as a background service that starts automatically on login. If you prefer to start the Lume API service manually when needed, you can use the `--no-background-service` option:
+
+```bash
+/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh) --no-background-service"
+```
+
+**Note:** With this option, you'll need to manually start the Lume API service by running `lume serve` in your terminal whenever you need to use tools or libraries that rely on the Lume API (such as the Computer-Use Agent).
+
 You can also download the `lume.pkg.tar.gz` archive from the [latest release](https://github.com/trycua/lume/releases), extract it, and install the package manually.

 ## Prebuilt Images
--- a/libs/lume/scripts/install.sh
+++ b/libs/lume/scripts/install.sh
@@ -20,24 +20,32 @@ INSTALL_DIR="${INSTALL_DIR:-$DEFAULT_INSTALL_DIR}"
 GITHUB_REPO="trycua/cua"
 LATEST_RELEASE_URL="https://api.github.com/repos/$GITHUB_REPO/releases/latest"

+# Option to skip background service setup (default: install it)
+INSTALL_BACKGROUND_SERVICE=true
+
 # Parse command line arguments
 while [ "$#" -gt 0 ]; do
  case "$1" in
    --install-dir=*)
      INSTALL_DIR="${1#*=}"
      ;;
+    --no-background-service|--skip-background-service)
+      INSTALL_BACKGROUND_SERVICE=false
+      ;;
    --help)
      echo "${BOLD}${BLUE}Lume Installer${NORMAL}"
      echo "Usage: $0 [OPTIONS]"
      echo ""
      echo "Options:"
-      echo "  --install-dir=DIR   Install to the specified directory (default: $DEFAULT_INSTALL_DIR)"
-      echo "  --help              Display this help message"
+      echo "  --install-dir=DIR         Install to the specified directory (default: $DEFAULT_INSTALL_DIR)"
+      echo "  --no-background-service   Do not setup the Lume background service (LaunchAgent)"
+      echo "  --help                    Display this help message"
      echo ""
      echo "Examples:"
-      echo "  $0                               # Install to $DEFAULT_INSTALL_DIR"
-      echo "  $0 --install-dir=/usr/local/bin  # Install to system directory (may require root privileges)"
-      echo "  INSTALL_DIR=/opt/lume $0         # Install to /opt/lume (legacy env var support)"
+      echo "  $0                                   # Install to $DEFAULT_INSTALL_DIR and setup background service"
+      echo "  $0 --install-dir=/usr/local/bin      # Install to system directory (may require root privileges)"
+      echo "  $0 --no-background-service           # Install without setting up the background service"
+      echo "  INSTALL_DIR=/opt/lume $0             # Install to /opt/lume (legacy env var support)"
      exit 0
      ;;
    *)
@@ -173,11 +181,25 @@ install_binary() {
  
  # Check if the installation directory is in PATH
  if [ -n "${PATH##*$INSTALL_DIR*}" ]; then
+    SHELL_NAME=$(basename "$SHELL")
    echo "${YELLOW}Warning: $INSTALL_DIR is not in your PATH.${NORMAL}"
-    echo "To add it, run one of these commands based on your shell:"
-    echo "  For bash: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile"
-    echo "  For zsh:  echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zshrc"
-    echo "  For fish: echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish"
+    case "$SHELL_NAME" in
+      zsh)
+        echo "To add it, run:"
+        echo "  echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zprofile"
+        ;;
+      bash)
+        echo "To add it, run:"
+        echo "  echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile"
+        ;;
+      fish)
+        echo "To add it, run:"
+        echo "  echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish"
+        ;;
+      *)
+        echo "Add $INSTALL_DIR to your PATH in your shell profile file."
+        ;;
+    esac
  fi
 }

@@ -188,11 +210,97 @@ main() {
  create_temp_dir
  download_release
  install_binary
-  
+
  echo ""
  echo "${GREEN}${BOLD}Lume has been successfully installed!${NORMAL}"
  echo "Run ${BOLD}lume${NORMAL} to get started."
+
+  if [ "$INSTALL_BACKGROUND_SERVICE" = true ]; then
+    # --- Setup background service (LaunchAgent) for Lume ---
+    SERVICE_NAME="com.trycua.lume_daemon"
+    PLIST_PATH="$HOME/Library/LaunchAgents/$SERVICE_NAME.plist"
+    LUME_BIN="$INSTALL_DIR/lume"
+
+    echo ""
+    echo "Setting up LaunchAgent to run lume daemon on login..."
+
+    # Create LaunchAgents directory if it doesn't exist
+    mkdir -p "$HOME/Library/LaunchAgents"
+
+    # Unload existing service if present
+    if [ -f "$PLIST_PATH" ]; then
+      echo "Existing LaunchAgent found. Unloading..."
+      launchctl unload "$PLIST_PATH" 2>/dev/null || true
+    fi
+
+    # Create the plist file
+    cat <<EOF > "$PLIST_PATH"
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>Label</key>
+    <string>$SERVICE_NAME</string>
+    <key>ProgramArguments</key>
+    <array>
+        <string>$LUME_BIN</string>
+        <string>serve</string>
+    </array>
+    <key>RunAtLoad</key>
+    <true/>
+    <key>KeepAlive</key>
+    <true/>
+    <key>WorkingDirectory</key>
+    <string>$HOME</string>
+    <key>EnvironmentVariables</key>
+    <dict>
+        <key>PATH</key>
+        <string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$HOME/.local/bin</string>
+        <key>HOME</key>
+        <string>$HOME</string>
+    </dict>
+    <key>StandardOutPath</key>
+    <string>/tmp/lume_daemon.log</string>
+    <key>StandardErrorPath</key>
+    <string>/tmp/lume_daemon.error.log</string>
+    <key>ProcessType</key>
+    <string>Interactive</string>
+    <key>SessionType</key>
+    <string>Aqua</string>
+</dict>
+</plist>
+EOF
+
+    # Set permissions
+    chmod 644 "$PLIST_PATH"
+    touch /tmp/lume_daemon.log /tmp/lume_daemon.error.log
+    chmod 644 /tmp/lume_daemon.log /tmp/lume_daemon.error.log
+
+    # Load the LaunchAgent
+    echo "Loading LaunchAgent..."
+    launchctl unload "$PLIST_PATH" 2>/dev/null || true
+    launchctl load "$PLIST_PATH"
+
+    echo "${GREEN}Lume daemon LaunchAgent installed and loaded. It will start automatically on login!${NORMAL}"
+    echo "To check status: launchctl list | grep $SERVICE_NAME"
+    echo "To view logs: tail -f /tmp/lume_daemon.log"
+    echo ""
+    echo "To remove the lume daemon service, run:"
+    echo "  launchctl unload \"$PLIST_PATH\""
+    echo "  rm \"$PLIST_PATH\""
+  else
+    SERVICE_NAME="com.trycua.lume_daemon"
+    PLIST_PATH="$HOME/Library/LaunchAgents/$SERVICE_NAME.plist"
+    if [ -f "$PLIST_PATH" ]; then
+      echo "Removing existing Lume background service (LaunchAgent)..."
+      launchctl unload "$PLIST_PATH" 2>/dev/null || true
+      rm "$PLIST_PATH"
+      echo "Lume background service (LaunchAgent) removed."
+    else
+      echo "Skipping Lume background service (LaunchAgent) setup as requested (use --no-background-service)."
+    fi
+  fi
 }

 # Run the installation
-main 
+main
--- a/libs/lume/src/Commands/Create.swift
+++ b/libs/lume/src/Commands/Create.swift
@@ -40,7 +40,7 @@ struct Create: AsyncParsableCommand {
    )
    var ipsw: String?

-    @Option(name: .customLong("storage"), help: "VM storage location to use")
+    @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
    var storage: String?

    init() {
--- a/libs/lume/src/Commands/Delete.swift
+++ b/libs/lume/src/Commands/Delete.swift
@@ -12,7 +12,7 @@ struct Delete: AsyncParsableCommand {
    @Flag(name: .long, help: "Force deletion without confirmation")
    var force = false

-    @Option(name: .customLong("storage"), help: "VM storage location to use")
+    @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
    var storage: String?

    init() {}
--- a/libs/lume/src/Commands/Get.swift
+++ b/libs/lume/src/Commands/Get.swift
@@ -12,7 +12,7 @@ struct Get: AsyncParsableCommand {
    @Option(name: [.long, .customShort("f")], help: "Output format (json|text)")
    var format: FormatOption = .text

-    @Option(name: .customLong("storage"), help: "VM storage location to use")
+    @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
    var storage: String?

    init() {
--- a/libs/lume/src/Commands/List.swift
+++ b/libs/lume/src/Commands/List.swift
@@ -10,15 +10,22 @@ struct List: AsyncParsableCommand {
    @Option(name: [.long, .customShort("f")], help: "Output format (json|text)")
    var format: FormatOption = .text
    
+    @Option(name: .long, help: "Filter by storage location name")
+    var storage: String?
+
    init() {
    }
    
    @MainActor
    func run() async throws {
        let manager = LumeController()
-        let vms = try manager.list()
+        let vms = try manager.list(storage: self.storage)
        if vms.isEmpty && self.format == .text {
-            print("No virtual machines found")
+            if let storageName = self.storage {
+                print("No virtual machines found in storage '\(storageName)'")
+            } else {
+                print("No virtual machines found")
+            }
        } else {
            try VMDetailsPrinter.printStatus(vms, format: self.format)
        }
--- a/libs/lume/src/Commands/Pull.swift
+++ b/libs/lume/src/Commands/Pull.swift
@@ -19,7 +19,7 @@ struct Pull: AsyncParsableCommand {
    @Option(help: "Organization to pull from. Defaults to trycua")
    var organization: String = "trycua"

-    @Option(name: .customLong("storage"), help: "VM storage location to use")
+    @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
    var storage: String?

    init() {}
--- a/libs/lume/src/Commands/Run.swift
+++ b/libs/lume/src/Commands/Run.swift
@@ -48,7 +48,7 @@ struct Run: AsyncParsableCommand {
    @Option(help: "For MacOS VMs only, boot into the VM in recovery mode")
    var recoveryMode: Bool = false

-    @Option(name: .customLong("storage"), help: "VM storage location to use")
+    @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
    var storage: String?

    private var parsedSharedDirectories: [SharedDirectory] {
--- a/libs/lume/src/Commands/Set.swift
+++ b/libs/lume/src/Commands/Set.swift
@@ -21,7 +21,7 @@ struct Set: AsyncParsableCommand {
    @Option(help: "New display resolution in format WIDTHxHEIGHT.")
    var display: VMDisplayResolution?

-    @Option(name: .customLong("storage"), help: "VM storage location to use")
+    @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
    var storage: String?

    init() {
--- a/libs/lume/src/Commands/Stop.swift
+++ b/libs/lume/src/Commands/Stop.swift
@@ -9,7 +9,7 @@ struct Stop: AsyncParsableCommand {
    @Argument(help: "Name of the virtual machine", completion: .custom(completeVMName))
    var name: String

-    @Option(name: .customLong("storage"), help: "VM storage location to use")
+    @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
    var storage: String?

    init() {
--- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift
+++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift
@@ -643,7 +643,7 @@ class ImageContainerRegistry: @unchecked Sendable {
        image: String,
        name: String?,
        locationName: String? = nil
-    ) async throws {
+    ) async throws -> VMDirectory {
        guard !image.isEmpty else {
            throw ValidationError("Image name cannot be empty")
        }
@@ -652,7 +652,16 @@ class ImageContainerRegistry: @unchecked Sendable {

        // Use provided name or derive from image
        let vmName = name ?? image.split(separator: ":").first.map(String.init) ?? ""
-        let vmDir = try home.getVMDirectory(vmName, storage: locationName)
+        
+        // Determine if locationName is a direct path or a named storage location
+        let vmDir: VMDirectory
+        if let locationName = locationName, locationName.contains("/") || locationName.contains("\\") {
+            // Direct path
+            vmDir = try home.getVMDirectoryFromPath(vmName, storagePath: locationName)
+        } else {
+            // Named storage or default location
+            vmDir = try home.getVMDirectory(vmName, storage: locationName)
+        }

        // Optimize network early in the process
        optimizeNetworkSettings()
@@ -991,6 +1000,7 @@ class ImageContainerRegistry: @unchecked Sendable {
        Logger.info(
            "Run 'lume run \(vmName)' to reduce the disk image file size by using macOS sparse file system"
        )
+        return vmDir
    }

    // Helper function to clean up a specific cache entry
@@ -3024,7 +3034,8 @@ class ImageContainerRegistry: @unchecked Sendable {

                            // Replace original with optimized version
                            try FileManager.default.removeItem(at: reassembledFile)
-                            try FileManager.default.moveItem(at: optimizedFile, to: reassembledFile)
+                            try FileManager.default.moveItem(
+                                at: optimizedFile, to: reassembledFile)
                            Logger.info("Using sparse-optimized file for verification")
                        } else {
                            Logger.info(
--- a/libs/lume/src/FileSystem/Home.swift
+++ b/libs/lume/src/FileSystem/Home.swift
@@ -92,6 +92,28 @@ final class Home {
        let baseDir = Path(location.expandedPath)
        return VMDirectory(baseDir.directory(name))
    }
+    
+    /// Gets a VM directory from a direct file path
+    ///
+    /// - Parameters:
+    ///   - name: Name of the VM directory
+    ///   - storagePath: Direct file system path where the VM is located
+    /// - Returns: A VMDirectory instance
+    /// - Throws: HomeError if path is invalid
+    func getVMDirectoryFromPath(_ name: String, storagePath: String) throws -> VMDirectory {
+        let baseDir = Path(storagePath)
+        
+        // Create the directory if it doesn't exist
+        if !fileExists(at: storagePath) {
+            Logger.info("Creating storage directory", metadata: ["path": storagePath])
+            try createVMLocation(at: storagePath)
+        } else if !isValidDirectory(at: storagePath) {
+            // Path exists but isn't a valid directory
+            throw HomeError.invalidHomeDirectory
+        }
+        
+        return VMDirectory(baseDir.directory(name))
+    }

    /// Returns all initialized VM directories across all locations
    /// - Returns: An array of VMDirectory instances with location info
--- a/libs/lume/src/FileSystem/VMDirectory.swift
+++ b/libs/lume/src/FileSystem/VMDirectory.swift
@@ -8,7 +8,7 @@ import Foundation
 /// - Handling disk operations
 /// - Managing VM state and locking
 /// - Providing access to VM-related paths
-struct VMDirectory {
+struct VMDirectory: Sendable {
    // MARK: - Constants
    
    private enum FileNames {
@@ -26,8 +26,6 @@ struct VMDirectory {
    let configPath: Path
    let sessionsPath: Path
    
-    private let fileManager: FileManager
-    
    /// The name of the VM directory
    var name: String { dir.name }
    
@@ -36,10 +34,8 @@ struct VMDirectory {
    /// Creates a new VMDirectory instance
    /// - Parameters:
    ///   - dir: The base directory path for the VM
-    ///   - fileManager: FileManager instance to use for file operations
-    init(_ dir: Path, fileManager: FileManager = .default) {
+    init(_ dir: Path) {
        self.dir = dir
-        self.fileManager = fileManager
        self.nvramPath = dir.file(FileNames.nvram)
        self.diskPath = dir.file(FileNames.disk)
        self.configPath = dir.file(FileNames.config)
@@ -52,7 +48,25 @@ struct VMDirectory {
 extension VMDirectory {
    /// Checks if the VM directory is fully initialized with all required files
    func initialized() -> Bool {
-        configPath.exists() && diskPath.exists() && nvramPath.exists()
+        // Add detailed logging for debugging
+        let configExists = configPath.exists()
+        let diskExists = diskPath.exists()
+        let nvramExists = nvramPath.exists()
+        
+        Logger.info(
+            "VM directory initialization check", 
+            metadata: [
+                "directory": dir.path,
+                "config_path": configPath.path,
+                "config_exists": "\(configExists)",
+                "disk_path": diskPath.path,
+                "disk_exists": "\(diskExists)",
+                "nvram_path": nvramPath.path,
+                "nvram_exists": "\(nvramExists)"
+            ]
+        )
+        
+        return configExists && diskExists && nvramExists
    }

    /// Checks if the VM directory exists
@@ -70,7 +84,7 @@ extension VMDirectory {
    func setDisk(_ size: UInt64) throws {
        do {
            if !diskPath.exists() {
-                guard fileManager.createFile(atPath: diskPath.path, contents: nil) else {
+                guard FileManager.default.createFile(atPath: diskPath.path, contents: nil) else {
                    throw VMDirectoryError.fileCreationFailed(diskPath.path)
                }
            }
@@ -96,7 +110,7 @@ extension VMDirectory {
        
        do {
            let data = try encoder.encode(config)
-            guard fileManager.createFile(atPath: configPath.path, contents: data) else {
+            guard FileManager.default.createFile(atPath: configPath.path, contents: data) else {
                throw VMDirectoryError.fileCreationFailed(configPath.path)
            }
        } catch {
@@ -108,7 +122,7 @@ extension VMDirectory {
    /// - Returns: The loaded configuration
    /// - Throws: VMDirectoryError if the load operation fails
    func loadConfig() throws -> VMConfig {
-        guard let data = fileManager.contents(atPath: configPath.path) else {
+        guard let data = FileManager.default.contents(atPath: configPath.path) else {
            throw VMDirectoryError.configNotFound
        }
        
@@ -137,7 +151,7 @@ extension VMDirectory {
        
        do {
            let data = try encoder.encode(session)
-            guard fileManager.createFile(atPath: sessionsPath.path, contents: data) else {
+            guard FileManager.default.createFile(atPath: sessionsPath.path, contents: data) else {
                throw VMDirectoryError.fileCreationFailed(sessionsPath.path)
            }
        } catch {
@@ -149,7 +163,7 @@ extension VMDirectory {
    /// - Returns: The loaded VNC session
    /// - Throws: VMDirectoryError if the load operation fails
    func loadSession() throws -> VNCSession {
-        guard let data = fileManager.contents(atPath: sessionsPath.path) else {
+        guard let data = FileManager.default.contents(atPath: sessionsPath.path) else {
            throw VMDirectoryError.sessionNotFound
        }
        
@@ -163,7 +177,7 @@ extension VMDirectory {
    
    /// Removes the VNC session information from disk
    func clearSession() {
-        try? fileManager.removeItem(atPath: sessionsPath.path)
+        try? FileManager.default.removeItem(atPath: sessionsPath.path)
    }
 }

@@ -176,6 +190,6 @@ extension VMDirectory: CustomStringConvertible {

 extension VMDirectory {
    func delete() throws {
-        try fileManager.removeItem(atPath: dir.path)
+        try FileManager.default.removeItem(atPath: dir.path)
    }
 }
--- a/libs/lume/src/LumeController.swift
+++ b/libs/lume/src/LumeController.swift
@@ -48,15 +48,72 @@ final class LumeController {

    /// Lists all virtual machines in the system
    @MainActor
-    public func list() throws -> [VMDetails] {
+    public func list(storage: String? = nil) throws -> [VMDetails] {
        do {
-            let vmLocations = try home.getAllVMDirectories()
-            let statuses = try vmLocations.map { vmWithLoc in
-                let vm = try self.get(
-                    name: vmWithLoc.directory.name, storage: vmWithLoc.locationName)
-                return vm.details
+            if let storage = storage {
+                // If storage is specified, only return VMs from that location
+                if storage.contains("/") || storage.contains("\\") {
+                    // Direct path - check if it exists
+                    if !FileManager.default.fileExists(atPath: storage) {
+                        // Return empty array if the path doesn't exist
+                        return []
+                    }
+                    
+                    // Try to get all VMs from the specified path
+                    // We need to check which subdirectories are valid VM dirs
+                    let directoryURL = URL(fileURLWithPath: storage)
+                    let contents = try FileManager.default.contentsOfDirectory(
+                        at: directoryURL,
+                        includingPropertiesForKeys: [.isDirectoryKey],
+                        options: .skipsHiddenFiles
+                    )
+                    
+                    let statuses = try contents.compactMap { subdir -> VMDetails? in
+                        guard let isDirectory = try subdir.resourceValues(forKeys: [.isDirectoryKey]).isDirectory,
+                              isDirectory else {
+                            return nil
+                        }
+                        
+                        let vmName = subdir.lastPathComponent
+                        // Check if it's a valid VM directory
+                        let vmDir = try home.getVMDirectoryFromPath(vmName, storagePath: storage)
+                        if !vmDir.initialized() {
+                            return nil
+                        }
+                        
+                        do {
+                            let vm = try self.get(name: vmName, storage: storage)
+                            return vm.details
+                        } catch {
+                            // Skip invalid VM directories
+                            return nil
+                        }
+                    }
+                    return statuses
+                } else {
+                    // Named storage
+                    let vmsWithLoc = try home.getAllVMDirectories()
+                    let statuses = try vmsWithLoc.compactMap { vmWithLoc -> VMDetails? in
+                        // Only include VMs from the specified location
+                        if vmWithLoc.locationName != storage {
+                            return nil
+                        }
+                        let vm = try self.get(
+                            name: vmWithLoc.directory.name, storage: vmWithLoc.locationName)
+                        return vm.details
+                    }
+                    return statuses
+                }
+            } else {
+                // No storage filter - get all VMs
+                let vmsWithLoc = try home.getAllVMDirectories()
+                let statuses = try vmsWithLoc.compactMap { vmWithLoc -> VMDetails? in
+                    let vm = try self.get(
+                        name: vmWithLoc.directory.name, storage: vmWithLoc.locationName)
+                    return vm.details
+                }
+                return statuses
            }
-            return statuses
        } catch {
            Logger.error("Failed to list VMs", metadata: ["error": error.localizedDescription])
            throw error
@@ -133,20 +190,42 @@ final class LumeController {
    public func get(name: String, storage: String? = nil) throws -> VM {
        let normalizedName = normalizeVMName(name: name)
        do {
-            // Try to find the VM and get its actual location
-            let actualLocation = try self.validateVMExists(
-                normalizedName, storage: storage)
+            let vm: VM
+            if let storagePath = storage, storagePath.contains("/") || storagePath.contains("\\") {
+                // Storage is a direct path
+                let vmDir = try home.getVMDirectoryFromPath(normalizedName, storagePath: storagePath)
+                guard vmDir.initialized() else {
+                    // Throw a specific error if the directory exists but isn't a valid VM
+                    if vmDir.exists() {
+                        throw VMError.notInitialized(normalizedName)
+                    } else {
+                        throw VMError.notFound(normalizedName)
+                    }
+                }
+                // Pass the path as the storage context
+                vm = try self.loadVM(vmDir: vmDir, storage: storagePath)
+            } else {
+                // Storage is nil or a named location
+                let actualLocation = try self.validateVMExists(
+                    normalizedName, storage: storage)

-            // Load the VM from its actual location
-            let vm = try self.loadVM(name: normalizedName, storage: actualLocation)
+                let vmDir = try home.getVMDirectory(normalizedName, storage: actualLocation)
+                // loadVM will re-check initialized, but good practice to keep validateVMExists result.
+                vm = try self.loadVM(vmDir: vmDir, storage: actualLocation)
+            }
            return vm
        } catch {
-            Logger.error("Failed to get VM", metadata: ["error": error.localizedDescription])
+            Logger.error(
+                "Failed to get VM",
+                metadata: [
+                    "vmName": normalizedName, "storage": storage ?? "default",
+                    "error": error.localizedDescription,
+                ])
+            // Re-throw the original error to preserve its type
            throw error
        }
    }

-    /// Factory for creating the appropriate VM type based on the OS
    @MainActor
    public func create(
        name: String,
@@ -329,58 +408,84 @@ final class LumeController {
            "Running VM",
            metadata: [
                "name": normalizedName,
-                "location": storage ?? "default",
                "no_display": "\(noDisplay)",
                "shared_directories":
                    "\(sharedDirectories.map( { $0.string } ).joined(separator: ", "))",
                "mount": mount?.path ?? "none",
                "vnc_port": "\(vncPort)",
                "recovery_mode": "\(recoveryMode)",
-                "storage_param": storage ?? "default",
+                "storage_param": storage ?? "default", // Log the original param
                "usb_storage_devices": "\(usbMassStoragePaths?.count ?? 0)",
            ])

        do {
-            // Check if this is an image reference (contains a tag)
-            let components = name.split(separator: ":")
-            if components.count == 2 {
-                do {
-                    _ = try self.validateVMExists(normalizedName, storage: storage)
-                } catch {
-                    // If the VM doesn't exist, try to pull the image
+            // Check if name is an image ref to auto-pull
+            let components = normalizedName.split(separator: ":")
+            if components.count == 2 { // Check if it looks like image:tag
+                // Attempt to validate if VM exists first, suppressing the error
+                // This avoids pulling if the VM already exists, even if name looks like an image ref
+                let vmExists = (try? self.validateVMExists(normalizedName, storage: storage)) != nil
+                if !vmExists {
+                    Logger.info(
+                        "VM not found, attempting to pull image based on name",
+                        metadata: ["imageRef": normalizedName])
+                    // Use the potentially new VM name derived from the image ref
+                    let potentialVMName = String(components[0])
                    try await pullImage(
-                        image: name,
-                        name: nil,
+                        image: normalizedName, // Full image ref
+                        name: potentialVMName, // Name derived from image
                        registry: registry,
                        organization: organization,
                        storage: storage
                    )
+                    // Important: After pull, the effective name might have changed
+                    // We proceed assuming the user wants to run the VM derived from image name
+                    // normalizedName = potentialVMName // Re-assign normalizedName if pull logic creates it
+                    // Note: Current pullImage doesn't return the final VM name, 
+                    // so we assume it matches the name derived from the image.
+                    // This might need refinement if pullImage behaviour changes.
                }
            }

-            // Find VM and get its actual location
-            let actualLocation = try validateVMExists(normalizedName, storage: storage)
+            // Determine effective storage path or name AND get the VMDirectory
+            let effectiveStorage: String?
+            let vmDir: VMDirectory

-            // Log if we found the VM in a different location than default
-            if actualLocation != storage && actualLocation != nil {
+            if let storagePath = storage, storagePath.contains("/") || storagePath.contains("\\") {
+                // Storage is a direct path
+                vmDir = try home.getVMDirectoryFromPath(normalizedName, storagePath: storagePath)
+                guard vmDir.initialized() else {
+                    if vmDir.exists() {
+                        throw VMError.notInitialized(normalizedName)
+                    } else {
+                        throw VMError.notFound(normalizedName)
+                    }
+                }
+                effectiveStorage = storagePath // Use the path string
+                Logger.info("Using direct storage path", metadata: ["path": storagePath])
+            } else {
+                // Storage is nil or a named location - validate and get the actual name
+                let actualLocationName = try validateVMExists(normalizedName, storage: storage)
+                vmDir = try home.getVMDirectory(normalizedName, storage: actualLocationName) // Get VMDir for named location
+                effectiveStorage = actualLocationName // Use the named location string
                Logger.info(
-                    "Found VM in location",
+                    "Using named storage location",
                    metadata: [
-                        "name": normalizedName,
-                        "location": actualLocation ?? "default",
+                        "requested": storage ?? "default",
+                        "actual": actualLocationName ?? "default",
                    ])
            }

+            // Validate parameters using the located VMDirectory
            try validateRunParameters(
-                name: normalizedName,
+                vmDir: vmDir, // Pass vmDir
                sharedDirectories: sharedDirectories,
                mount: mount,
-                storage: actualLocation,
                usbMassStoragePaths: usbMassStoragePaths
            )

-            // Use the actual VM location that we found
-            let vm = try get(name: normalizedName, storage: actualLocation)
+            // Load the VM directly using the located VMDirectory and storage context
+            let vm = try self.loadVM(vmDir: vmDir, storage: effectiveStorage)

            SharedVM.shared.setVM(name: normalizedName, vm: vm)
            try await vm.run(
@@ -488,7 +593,7 @@ final class LumeController {

            let imageContainerRegistry = ImageContainerRegistry(
                registry: registry, organization: organization)
-            try await imageContainerRegistry.pull(
+            let _ = try await imageContainerRegistry.pull(
                image: actualImage,
                name: vmName,
                locationName: storage)
@@ -752,15 +857,17 @@ final class LumeController {
    }

    @MainActor
-    private func loadVM(name: String, storage: String? = nil) throws -> VM {
-        let vmDir = try home.getVMDirectory(name, storage: storage)
+    private func loadVM(vmDir: VMDirectory, storage: String?) throws -> VM {
+        // vmDir is now passed directly
        guard vmDir.initialized() else {
-            throw VMError.notInitialized(name)
+            throw VMError.notInitialized(vmDir.name) // Use name from vmDir
        }

        let config: VMConfig = try vmDir.loadConfig()
+        // Pass the provided storage (which could be a path or named location)
        let vmDirContext = VMDirContext(
-            dir: vmDir, config: config, home: home, storage: storage)
+            dir: vmDir, config: config, home: home, storage: storage
+        )

        let imageLoader =
            config.os.lowercased() == "macos" ? imageLoaderFactory.createImageLoader() : nil
@@ -808,11 +915,22 @@ final class LumeController {
    public func validateVMExists(_ name: String, storage: String? = nil) throws -> String? {
        // If location is specified, only check that location
        if let storage = storage {
-            let vmDir = try home.getVMDirectory(name, storage: storage)
-            guard vmDir.initialized() else {
-                throw VMError.notFound(name)
+            // Check if storage is a path by looking for directory separator
+            if storage.contains("/") || storage.contains("\\") {
+                // Treat as direct path
+                let vmDir = try home.getVMDirectoryFromPath(name, storagePath: storage)
+                guard vmDir.initialized() else {
+                    throw VMError.notFound(name)
+                }
+                return storage  // Return the path as the location identifier
+            } else {
+                // Treat as named storage
+                let vmDir = try home.getVMDirectory(name, storage: storage)
+                guard vmDir.initialized() else {
+                    throw VMError.notFound(name)
+                }
+                return storage
            }
-            return storage
        }

        // If no location specified, try to find the VM in any location
@@ -826,6 +944,51 @@ final class LumeController {
        throw VMError.notFound(name)
    }

+    private func validateRunParameters(
+        vmDir: VMDirectory, // Changed signature: accept VMDirectory
+        sharedDirectories: [SharedDirectory]?,
+        mount: Path?,
+        usbMassStoragePaths: [Path]? = nil
+    ) throws {
+        // VM existence is confirmed by having vmDir, no need for validateVMExists
+        if let dirs = sharedDirectories {
+            try self.validateSharedDirectories(dirs)
+        }
+
+        // Validate USB mass storage paths
+        if let usbPaths = usbMassStoragePaths {
+            for path in usbPaths {
+                if !FileManager.default.fileExists(atPath: path.path) {
+                    throw ValidationError("USB mass storage image not found: \(path.path)")
+                }
+            }
+
+            if #available(macOS 15.0, *) {
+                // USB mass storage is supported
+            } else {
+                Logger.info(
+                    "USB mass storage devices require macOS 15.0 or later. They will be ignored.")
+            }
+        }
+
+        // Load config directly from vmDir
+        let vmConfig = try vmDir.loadConfig()
+        switch vmConfig.os.lowercased() {
+        case "macos":
+            if mount != nil {
+                throw ValidationError(
+                    "Mounting disk images is not supported for macOS VMs. If you are looking to mount a IPSW, please use the --ipsw option in the create command."
+                )
+            }
+        case "linux":
+            if let mount = mount, !FileManager.default.fileExists(atPath: mount.path) {
+                throw ValidationError("Mount file not found: \(mount.path)")
+            }
+        default:
+            break
+        }
+    }
+
    private func validatePullParameters(
        image: String,
        name: String,
@@ -846,51 +1009,31 @@ final class LumeController {
            throw ValidationError("Organization cannot be empty")
        }

-        let vmDir = try home.getVMDirectory(name, storage: storage)
-        if vmDir.exists() {
-            throw VMError.alreadyExists(name)
-        }
-    }
-
-    private func validateRunParameters(
-        name: String, sharedDirectories: [SharedDirectory]?, mount: Path?,
-        storage: String? = nil, usbMassStoragePaths: [Path]? = nil
-    ) throws {
-        _ = try self.validateVMExists(name, storage: storage)
-        if let dirs = sharedDirectories {
-            try self.validateSharedDirectories(dirs)
-        }
-
-        // Validate USB mass storage paths
-        if let usbPaths = usbMassStoragePaths {
-            for path in usbPaths {
-                if !FileManager.default.fileExists(atPath: path.path) {
-                    throw ValidationError("USB mass storage image not found: \(path.path)")
+        // Determine if storage is a path or a named storage location
+        let vmDir: VMDirectory
+        if let storage = storage, storage.contains("/") || storage.contains("\\") {
+            // Create the base directory if it doesn't exist
+            if !FileManager.default.fileExists(atPath: storage) {
+                Logger.info("Creating VM storage directory", metadata: ["path": storage])
+                do {
+                    try FileManager.default.createDirectory(
+                        atPath: storage,
+                        withIntermediateDirectories: true
+                    )
+                } catch {
+                    throw HomeError.directoryCreationFailed(path: storage)
                }
            }
-
-            if #available(macOS 15.0, *) {
-                // USB mass storage is supported
-            } else {
-                Logger.info(
-                    "USB mass storage devices require macOS 15.0 or later. They will be ignored.")
-            }
+            
+            // Use getVMDirectoryFromPath for direct paths
+            vmDir = try home.getVMDirectoryFromPath(name, storagePath: storage)
+        } else {
+            // Use getVMDirectory for named storage locations
+            vmDir = try home.getVMDirectory(name, storage: storage)
        }
-
-        let vmConfig = try home.getVMDirectory(name, storage: storage).loadConfig()
-        switch vmConfig.os.lowercased() {
-        case "macos":
-            if mount != nil {
-                throw ValidationError(
-                    "Mounting disk images is not supported for macOS VMs. If you are looking to mount a IPSW, please use the --ipsw option in the create command."
-                )
-            }
-        case "linux":
-            if let mount = mount, !FileManager.default.fileExists(atPath: mount.path) {
-                throw ValidationError("Mount file not found: \(mount.path)")
-            }
-        default:
-            break
+        
+        if vmDir.exists() {
+            throw VMError.alreadyExists(name)
        }
    }

--- a/libs/lume/src/Server/Handlers.swift
+++ b/libs/lume/src/Server/Handlers.swift
@@ -6,10 +6,10 @@ import Virtualization
 extension Server {
    // MARK: - VM Management Handlers

-    func handleListVMs() async throws -> HTTPResponse {
+    func handleListVMs(storage: String? = nil) async throws -> HTTPResponse {
        do {
            let vmController = LumeController()
-            let vms = try vmController.list()
+            let vms = try vmController.list(storage: storage)
            return try .json(vms)
        } catch {
            return .badRequest(message: error.localizedDescription)
--- a/libs/lume/src/Server/Requests.swift
+++ b/libs/lume/src/Server/Requests.swift
@@ -109,7 +109,7 @@ struct PushRequest: Codable {
    let tags: [String] // List of tags to push
    var registry: String // Registry URL
    var organization: String // Organization/user in the registry
-    let storage: String? // Optional VM storage location
+    let storage: String? // Optional VM storage location or direct path
    var chunkSizeMb: Int // Chunk size
    // dryRun and reassemble are less common for API, default to false?
    // verbose is usually handled by server logging
--- a/libs/lume/src/Server/Server.swift
+++ b/libs/lume/src/Server/Server.swift
@@ -79,9 +79,11 @@ final class Server {
        routes = [
            Route(
                method: "GET", path: "/lume/vms",
-                handler: { [weak self] _ in
+                handler: { [weak self] request in
                    guard let self else { throw HTTPError.internalError }
-                    return try await self.handleListVMs()
+                    // Extract storage from query params if present
+                    let storage = self.extractQueryParam(request: request, name: "storage")
+                    return try await self.handleListVMs(storage: storage)
                }),
            Route(
                method: "GET", path: "/lume/vms/:name",
@@ -177,8 +179,21 @@ final class Server {
                        return HTTPResponse(statusCode: .badRequest, body: "Missing VM name")
                    }

-                    // Extract storage from query params if present
-                    let storage = self.extractQueryParam(request: request, name: "storage")
+                    Logger.info("Processing stop VM request", metadata: ["method": request.method, "path": request.path])
+
+                    // Extract storage from the request body
+                    var storage: String? = nil
+                    if let bodyData = request.body, !bodyData.isEmpty {
+                        do {
+                            if let json = try JSONSerialization.jsonObject(with: bodyData) as? [String: Any],
+                               let bodyStorage = json["storage"] as? String {
+                                storage = bodyStorage
+                                Logger.info("Extracted storage from request body", metadata: ["storage": bodyStorage])
+                            }
+                        } catch {
+                            Logger.error("Failed to parse request body JSON", metadata: ["error": error.localizedDescription])
+                        }
+                    }

                    return try await self.handleStopVM(name: name, storage: storage)
                }),
--- a/libs/lumier/.dockerignore
+++ b/libs/lumier/.dockerignore
@@ -0,0 +1,24 @@
+# Ignore macOS system files and trash
+.DS_Store
+.Trashes
+**/.Trashes
+**/.*
+
+# Ignore Python cache
+__pycache__/
+*.pyc
+*.pyo
+
+# Ignore virtual environments
+.venv/
+venv/
+
+# Ignore editor/project files
+.vscode/
+.idea/
+*.swp
+
+# Ignore test artifacts
+test-results/
+
+# Ignore anything else you don't want in the Docker build context
--- a/libs/lumier/Dockerfile
+++ b/libs/lumier/Dockerfile
@@ -0,0 +1,74 @@
+# Base image using Debian for arm64 architecture (optimized for Apple Silicon)
+FROM debian:bullseye-slim AS lumier-base
+
+# Set environment variables for Lume API server configuration
+ENV LUME_API_HOST="host.docker.internal"
+ENV LUME_API_PORT="8080"
+
+# Default VM configuration (can be overridden at runtime)
+ENV VERSION="ghcr.io/trycua/macos-sequoia-vanilla:latest"
+ENV RAM_SIZE="8192"
+ENV CPU_CORES="4"
+ENV DISK_SIZE="100"
+ENV DISPLAY="1024x768"
+ENV VM_NAME="lumier"
+ENV HOST_DATA_PATH=""
+ENV LUMIER_DEBUG="0"
+
+# Install necessary tools and noVNC dependencies
+RUN apt-get update && \
+    apt-get install -y \
+    netcat-traditional \
+    curl \
+    sshpass \
+    wget \
+    unzip \
+    git \
+    python3 \
+    python3-pip \
+    python3-numpy \
+    procps && \
+    rm -rf /var/lib/apt/lists/*
+
+# Add a dummy environment variable to invalidate cache
+ENV CACHEBUST=1
+
+# Download and install noVNC without caching
+RUN wget https://github.com/trycua/noVNC/archive/refs/heads/master.zip -O master1.zip && \
+    unzip master1.zip && \
+    mv noVNC-master /opt/noVNC && \
+    rm master1.zip
+
+# Set environment variables for noVNC
+ENV NOVNC_PATH="/opt/noVNC"
+
+# Create directory structure
+RUN mkdir -p /run/bin /run/lib /run/config /run/hooks
+
+# Copy scripts to the container
+COPY src/bin/tunnel.sh /run/bin/
+COPY src/bin/tunnel-script.sh /usr/local/bin/lume
+COPY src/bin/tunnel-script.sh /usr/local/bin/sshpass
+COPY src/config/constants.sh /run/config/
+COPY src/bin/entry.sh /run/bin/entry.sh
+
+# Copy library files if they exist
+COPY src/lib/ /run/lib/
+COPY src/hooks/ /run/hooks/
+
+# Make scripts executable
+RUN chmod +x /usr/local/bin/lume \
+    /usr/local/bin/sshpass \
+    /run/bin/* \
+    /run/hooks/* 2>/dev/null || true
+
+# Expose ports for noVNC and Lume API
+EXPOSE 8080
+EXPOSE 8006
+
+# VOLUME setup
+VOLUME [ "/storage" ]
+VOLUME [ "/data" ]
+
+# Default entrypoint
+ENTRYPOINT ["/run/bin/entry.sh"]
--- a/libs/lumier/README.md
+++ b/libs/lumier/README.md
@@ -0,0 +1,175 @@
+<div align="center">
+<h1>
+  <div class="image-wrapper" style="display: inline-block;">
+    <picture>
+      <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
+      <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
+      <img alt="Shows my svg">
+    </picture>
+  </div>
+
+  [![Swift 6](https://img.shields.io/badge/Swift_6-F54A2A?logo=swift&logoColor=white&labelColor=F54A2A)](#)
+  [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
+  [![Homebrew](https://img.shields.io/badge/Homebrew-FBB040?logo=homebrew&logoColor=fff)](#install)
+  [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
+</h1>
+</div>
+
+**Lumier** provides a Docker-based interface for the `lume` CLI, allowing you to easily run macOS virtual machines inside a container with VNC access. It creates a secure tunnel to execute lume commands on your host machine while providing a containerized environment for your applications.
+
+## Requirements
+
+Before using Lumier, make sure you have:
+
+1. Install [lume](https://github.com/trycua/cua/blob/main/libs/lume/README.md) on your host machine
+2. Docker installed on your host machine
+3. `socat` installed for the tunnel (install with Homebrew: `brew install socat`)
+
+## Installation
+
+You can use Lumier directly from its directory or install it to your system:
+
+```bash
+# Option 1: Install to your user's bin directory (recommended)
+./install.sh
+
+# Option 2: Install to a custom directory
+./install.sh --install-dir=/usr/local/bin  # May require sudo
+
+# Option 3: View installation options
+./install.sh --help
+```
+
+After installation, you can run `lumier` from anywhere in your terminal.
+
+If you get a "command not found" error, make sure the installation directory is in your PATH. The installer will warn you if it isn't and provide instructions to add it.
+
+## Usage
+
+There are two ways to use Lumier: with the provided script or directly with Docker.
+
+### Option 1: Using the Lumier Script
+
+Lumier provides a simple CLI interface to manage VMs in Docker with full Docker compatibility:
+
+```bash
+# Show help and available commands
+lumier help
+
+# Start the tunnel to connect to lume 
+lumier start
+
+# Check if the tunnel is running
+lumier status
+
+# Stop the tunnel
+lumier stop
+
+# Build the Docker image (optional, happens automatically on first run)
+lumier build
+
+# Run a VM with default settings
+lumier run -it --rm
+
+# Run a VM with custom settings using Docker's -e flag
+lumier run -it --rm \
+    --name lumier-vm \
+    -p 8006:8006 \
+    -v $(pwd)/storage:/storage \
+    -v $(pwd)/shared:/data \
+    -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
+    -e CPU_CORES=4 \
+    -e RAM_SIZE=8192
+    
+# Note:
+# The lumier script now automatically detects the real host paths for ./storage and ./shared
+# and passes them to the container as HOST_STORAGE_PATH and HOST_DATA_PATH.
+# You do NOT need to specify these environment variables manually.
+# The VM name is always set from the container name.
+```
+
+### Option 2: Using Docker Directly
+
+You can also use Docker commands directly without the lumier utility:
+
+```bash
+# 1. Start the tunnel manually
+cd libs/lumier
+socat TCP-LISTEN:8080,reuseaddr,fork EXEC:"$PWD/src/bin/tunnel.sh" &
+TUNNEL_PID=$!
+
+# 2. Build the Docker image
+docker build -t lumier:latest .
+
+# 3. Run the container
+docker run -it --rm \
+    --name lumier-vm \
+    -p 8006:8006 \
+    -v $(pwd)/storage:/storage \
+    -v $(pwd)/shared:/data \
+    -e VM_NAME=lumier-vm \
+    -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
+    -e CPU_CORES=4 \
+    -e RAM_SIZE=8192 \
+    -e HOST_STORAGE_PATH=$(pwd)/storage \
+    -e HOST_DATA_PATH=$(pwd)/shared \
+    lumier:latest
+    
+# 4. Stop the tunnel when you're done
+kill $TUNNEL_PID
+
+# Alternatively, find and kill the tunnel process
+# First, find the process
+lsof -i TCP:8080
+# Then kill it by PID
+kill <PID>
+```
+
+Note that when using Docker directly, you're responsible for:
+- Starting and managing the tunnel
+- Building the Docker image
+- Providing the correct environment variables 
+
+## Available Environment Variables
+
+These variables can be set using Docker's `-e` flag:
+
+- `VM_NAME`: Set the VM name (default: lumier)
+- `VERSION`: Set the VM image (default: ghcr.io/trycua/macos-sequoia-vanilla:latest)
+- `CPU_CORES`: Set the number of CPU cores (default: 4)
+- `RAM_SIZE`: Set the memory size in MB (default: 8192)
+- `DISPLAY`: Set the display resolution (default: 1024x768)
+- `HOST_DATA_PATH`: Path on the host to share with the VM
+- `LUMIER_DEBUG`: Enable debug mode (set to 1)
+
+## Project Structure
+
+The project is organized as follows:
+
+```
+lumier/
+├── Dockerfile            # Main Docker image definition
+├── README.md             # This file
+├── lumier                # Main CLI script
+├── install.sh            # Installation script
+├── src/                  # Source code
+│   ├── bin/              # Executable scripts
+│   │   ├── entry.sh      # Docker entrypoint
+│   │   ├── server.sh     # Tunnel server manager
+│   │   └── tunnel.sh     # Tunnel request handler
+│   ├── config/           # Configuration
+│   │   └── constants.sh  # Shared constants
+│   ├── hooks/            # Lifecycle hooks
+│   │   └── on-logon.sh   # Run after VM boots
+│   └── lib/              # Shared library code
+│       ├── utils.sh      # Utility functions
+│       └── vm.sh         # VM management functions
+└── mount/                # Default shared directory
+```
+
+## VNC Access
+
+When a VM is running, you can access it via VNC through:
+http://localhost:8006/vnc.html
+
+The password is displayed in the console output when the VM starts.
--- a/libs/lumier/install.sh
+++ b/libs/lumier/install.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+set -e
+
+# Lumier Installer
+# This script installs Lumier to your system
+
+# Define colors for output
+BOLD=$(tput bold)
+NORMAL=$(tput sgr0)
+RED=$(tput setaf 1)
+GREEN=$(tput setaf 2)
+BLUE=$(tput setaf 4)
+YELLOW=$(tput setaf 3)
+
+# Default installation directory (user-specific, doesn't require sudo)
+DEFAULT_INSTALL_DIR="$HOME/.local/bin"
+INSTALL_DIR="${INSTALL_DIR:-$DEFAULT_INSTALL_DIR}"
+
+# Script directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Parse command line arguments
+while [ "$#" -gt 0 ]; do
+  case "$1" in
+    --install-dir=*)
+      INSTALL_DIR="${1#*=}"
+      ;;
+    --help)
+      echo "${BOLD}${BLUE}Lumier Installer${NORMAL}"
+      echo "Usage: $0 [OPTIONS]"
+      echo ""
+      echo "Options:"
+      echo "  --install-dir=DIR   Install to the specified directory (default: $DEFAULT_INSTALL_DIR)"
+      echo "  --help              Display this help message"
+      echo ""
+      echo "Examples:"
+      echo "  $0                               # Install to $DEFAULT_INSTALL_DIR"
+      echo "  $0 --install-dir=/usr/local/bin  # Install to system directory (may require root privileges)"
+      echo "  INSTALL_DIR=/opt/lumier $0       # Install to /opt/lumier (legacy env var support)"
+      exit 0
+      ;;
+    *)
+      echo "${RED}Unknown option: $1${NORMAL}"
+      echo "Use --help for usage information"
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+echo "${BOLD}${BLUE}Lumier Installer${NORMAL}"
+echo "This script will install Lumier to your system."
+
+# Check if we're running with appropriate permissions
+check_permissions() {
+  # System directories that typically require root privileges
+  SYSTEM_DIRS=("/usr/local/bin" "/usr/bin" "/bin" "/opt")
+  
+  NEEDS_ROOT=false
+  for DIR in "${SYSTEM_DIRS[@]}"; do
+    if [[ "$INSTALL_DIR" == "$DIR"* ]] && [ ! -w "$INSTALL_DIR" ]; then
+      NEEDS_ROOT=true
+      break
+    fi
+  done
+  
+  if [ "$NEEDS_ROOT" = true ]; then
+    echo "${YELLOW}Warning: Installing to $INSTALL_DIR may require root privileges.${NORMAL}"
+    echo "Consider these alternatives:"
+    echo "  • Install to a user-writable location: $0 --install-dir=$HOME/.local/bin"
+    echo "  • Create the directory with correct permissions first:"
+    echo "    sudo mkdir -p $INSTALL_DIR && sudo chown $(whoami) $INSTALL_DIR"
+    echo ""
+    
+    # Check if we already have write permission (might have been set up previously)
+    if [ ! -w "$INSTALL_DIR" ] && [ ! -w "$(dirname "$INSTALL_DIR")" ]; then
+      echo "${RED}Error: You don't have write permission to $INSTALL_DIR${NORMAL}"
+      echo "Please choose a different installation directory or ensure you have the proper permissions."
+      exit 1
+    fi
+  fi
+}
+
+# Detect OS and architecture
+detect_platform() {
+  OS=$(uname -s | tr '[:upper:]' '[:lower:]')
+  ARCH=$(uname -m)
+  
+  if [ "$OS" != "darwin" ]; then
+    echo "${RED}Error: Currently only macOS is supported.${NORMAL}"
+    exit 1
+  fi
+  
+  if [ "$ARCH" != "arm64" ]; then
+    echo "${RED}Error: Lumier only supports macOS on Apple Silicon (ARM64).${NORMAL}"
+    exit 1
+  fi
+  
+  PLATFORM="darwin-arm64"
+  echo "Detected platform: ${BOLD}$PLATFORM${NORMAL}"
+}
+
+# Check dependencies
+check_dependencies() {
+  echo "Checking dependencies..."
+  
+  # Check if lume is installed
+  if ! command -v lume &> /dev/null; then
+    echo "${RED}Error: Lume is required but not installed.${NORMAL}"
+    echo "Please install Lume first: https://github.com/trycua/cua/blob/main/libs/lume/README.md"
+    exit 1
+  fi
+  
+  # Check if socat is installed
+  if ! command -v socat &> /dev/null; then
+    echo "${YELLOW}Warning: socat is required but not installed.${NORMAL}"
+    echo "Installing socat with Homebrew..."
+    
+    # Check if Homebrew is installed
+    if ! command -v brew &> /dev/null; then
+      echo "${RED}Error: Homebrew is required to install socat.${NORMAL}"
+      echo "Please install Homebrew first: https://brew.sh/"
+      echo "Or install socat manually, then run this script again."
+      exit 1
+    fi
+    
+    # Install socat
+    brew install socat
+  fi
+  
+  # Check if Docker is installed
+  if ! command -v docker &> /dev/null; then
+    echo "${YELLOW}Warning: Docker is required but not installed.${NORMAL}"
+    echo "Please install Docker: https://docs.docker.com/get-docker/"
+    echo "Continuing with installation, but Lumier will not work without Docker."
+  fi
+  
+  echo "${GREEN}All dependencies are satisfied.${NORMAL}"
+}
+
+# Copy the lumier script directly
+copy_lumier() {
+  echo "Copying lumier script to $INSTALL_DIR..."
+  cp "$SCRIPT_DIR/lumier" "$INSTALL_DIR/lumier"
+  chmod +x "$INSTALL_DIR/lumier"
+}
+
+# Main installation flow
+main() {
+  check_permissions
+  detect_platform
+  check_dependencies
+  
+  echo "Installing Lumier to $INSTALL_DIR..."
+  
+  # Create install directory if it doesn't exist
+  mkdir -p "$INSTALL_DIR"
+  
+  # Copy the lumier script
+  copy_lumier
+  
+  echo "${GREEN}Installation complete!${NORMAL}"
+  echo "Lumier has been installed to ${BOLD}$INSTALL_DIR/lumier${NORMAL}"
+  
+  # Check if the installation directory is in PATH
+  if [[ ":$PATH:" != *":$INSTALL_DIR:"* ]]; then
+    echo "${YELLOW}Warning: $INSTALL_DIR is not in your PATH.${NORMAL}"
+    echo "To add it, run one of these commands based on your shell:"
+    echo "  For bash: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile"
+    echo "  For zsh:  echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zshrc"
+    echo "  For fish: echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish"
+  fi
+}
+
+# Run the installation
+main 
--- a/libs/lumier/lumier
+++ b/libs/lumier/lumier
@@ -0,0 +1,200 @@
+#!/usr/bin/env bash
+
+# Exit on errors, undefined variables, and propagate errors in pipes
+set -eo pipefail
+
+# Always use the current working directory as the build context
+SCRIPT_DIR="$(pwd)"
+PORT=8080
+DEBUG=${LUMIER_DEBUG:-0}
+
+usage() {
+    cat <<EOF
+Lumier - Docker container wrapper for lume Virtual Machines
+
+Usage: $(basename "$0") COMMAND [OPTIONS]
+
+Commands:
+  run [DOCKER_ARGS]       Build (if needed) and run the Lumier container with Docker args
+  tunnel start            Start the Lumier tunnel
+  tunnel stop             Stop the Lumier tunnel
+  tunnel status           Check the status of the Lumier tunnel
+  build [DOCKER_ARGS]     Build the Lumier Docker image with optional Docker args
+  help                    Show this help message
+
+Docker Container Environment Variables:
+  These can be set using Docker's -e flag:
+  
+  VM_NAME                 Set the VM name (default: lumier)
+  VERSION                 Set the VM image (default: ghcr.io/trycua/macos-sequoia-vanilla:latest)
+  CPU_CORES               Set the number of CPU cores (default: 4)
+  RAM_SIZE                Set the memory size in MB (default: 8192)
+  HOST_DATA_PATH          Path to mount as shared directory in the VM
+  LUMIER_DEBUG            Enable debug mode (set to 1)
+
+Script Environment Variables:
+  LUMIER_IMAGE            Docker image name (default: lumier:latest)
+
+Examples:
+  # Run a VM with default settings
+  $(basename "$0") run -it --rm
+  
+  # Run a VM with custom settings using Docker's -e flag
+  $(basename "$0") run -it --rm \\
+      --name custom-container-name \\
+      -e VM_NAME=my-vm \\
+      -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \\
+      -e RAM_SIZE=16384 \\
+      -v $(pwd)/mount:/data
+
+  # Build with a custom image name
+  LUMIER_IMAGE=myorg/lumier:v1 $(basename "$0") build
+EOF
+}
+
+# Check if the tunnel is active
+is_tunnel_active() {
+    if lsof -i TCP:$PORT 2>/dev/null | grep LISTEN > /dev/null; then
+        return 0  # Tunnel is active
+    else
+        return 1  # Tunnel is not active
+    fi
+}
+
+# Start the tunnel if needed
+ensure_tunnel() {
+    if ! is_tunnel_active; then
+        echo "Tunnel is not active. Starting tunnel..."
+        "$SCRIPT_DIR/src/bin/server.sh" start
+        sleep 2  # Wait for the tunnel to start
+        
+        if ! is_tunnel_active; then
+            echo "Failed to start tunnel. Make sure 'lume' is installed on your host."
+            exit 1
+        fi
+    else
+        echo "Tunnel is already active."
+    fi
+}
+
+# Build the Docker image with cache busting
+build_image() {
+    local image_name="${LUMIER_IMAGE:-lumier:latest}"
+    echo "Building Lumier Docker image: $image_name"
+    echo "SCRIPT_DIR=$SCRIPT_DIR"
+    echo "Checking for Dockerfile at: $SCRIPT_DIR/Dockerfile"
+    ls -l "$SCRIPT_DIR/Dockerfile" || echo "Dockerfile not found at $SCRIPT_DIR/Dockerfile"
+    
+    # Pass any additional arguments to docker build with cache busting
+    docker build --build-arg CACHEBUST=$(date +%s) -t "$image_name" "$SCRIPT_DIR" "$@"
+    
+    echo "Lumier image built successfully: $image_name"
+}
+
+# Run the Docker container
+run_container() {
+    local image_name="${LUMIER_IMAGE:-lumier:latest}"
+    
+    # Ensure the Docker image exists
+    if ! docker image inspect "$image_name" &>/dev/null; then
+        echo "Docker image '$image_name' not found. Building it..."
+        build_image
+    fi
+    
+    # Ensure the tunnel is running
+    ensure_tunnel
+    
+    # Automatically resolve and pass host paths for storage and data
+    STORAGE_PATH="${HOST_STORAGE_PATH:-$(realpath ./storage)}"
+    DATA_PATH="${HOST_DATA_PATH:-$(realpath ./shared)}"
+
+    # Only add -e if not already present in args
+    DOCKER_ARGS=( )
+    add_env_var() {
+        local var="$1"; local val="$2"; local flag="-e $var="
+        for arg in "$@"; do
+            [[ "$arg" == *"$flag"* ]] && return 0
+        done
+        DOCKER_ARGS+=( -e "$var=$val" )
+    }
+    add_env_var HOST_STORAGE_PATH "$STORAGE_PATH"
+    add_env_var HOST_DATA_PATH "$DATA_PATH"
+
+    # Detect --name argument and set VM_NAME if not already present
+    local container_name=""
+    local prev_arg=""
+    for arg in "$@"; do
+        if [[ "$prev_arg" == "--name" ]]; then
+            container_name="$arg"
+            break
+        elif [[ "$arg" == --name=* ]]; then
+            container_name="${arg#--name=}"
+            break
+        fi
+        prev_arg="$arg"
+    done
+    # Only add -e VM_NAME if not already present and container_name is set
+    local vm_name_set=false
+    for arg in "$@"; do
+        if [[ "$arg" == "-e" ]] && [[ "$2" == VM_NAME=* ]]; then
+            vm_name_set=true
+            break
+        elif [[ "$arg" == "-eVM_NAME="* ]]; then
+            vm_name_set=true
+            break
+        elif [[ "$arg" == "-e"* ]] && [[ "$arg" == *"VM_NAME="* ]]; then
+            vm_name_set=true
+            break
+        fi
+    done
+    if [[ -n "$container_name" && "$vm_name_set" != true ]]; then
+        DOCKER_ARGS+=( -e "VM_NAME=$container_name" )
+    fi
+
+    echo "Running Lumier container with image: $image_name"
+    if [[ "$*" == *"-p 8006:8006"* || "$*" == *"-p"*"8006:8006"* ]]; then
+        docker run "${DOCKER_ARGS[@]}" "$@" "$image_name"
+    else
+        docker run "${DOCKER_ARGS[@]}" -p 8006:8006 "$@" "$image_name"
+    fi
+}
+
+# Main command handling
+case "${1:-help}" in
+    run)
+        shift
+        run_container "$@"
+        ;;
+    tunnel)
+        # Handle tunnel subcommands
+        case "${2:-}" in
+            start)
+                "$SCRIPT_DIR/src/bin/server.sh" start
+                ;;
+            stop)
+                "$SCRIPT_DIR/src/bin/server.sh" stop
+                ;;
+            status)
+                "$SCRIPT_DIR/src/bin/server.sh" status
+                ;;
+            *)
+                echo "Unknown tunnel subcommand: $2"
+                usage
+                exit 1
+                ;;
+        esac
+        ;;
+
+    build)
+        shift
+        build_image "$@"
+        ;;
+    help)
+        usage
+        ;;
+    *)
+        echo "Unknown command: $1"
+        usage
+        exit 1
+        ;;
+esac 
--- a/libs/lumier/mount/server.py
+++ b/libs/lumier/mount/server.py
@@ -0,0 +1,10 @@
+from flask import Flask
+
+app = Flask(__name__)
+
+@app.route('/')
+def hello_world():
+    return 'Hello, World, from VM!'
+
+if __name__ == '__main__':
+    app.run(debug=True, host="0.0.0.0", port=5001)
--- a/libs/lumier/mount/setup.sh
+++ b/libs/lumier/mount/setup.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+echo "Creating helloworld.txt on the Desktop..."
+if [ ! -f ~/Desktop/helloworld.txt ]; then
+  echo "Hello, World!" > ~/Desktop/helloworld.txt
+  echo "helloworld.txt created successfully."
+else
+  echo "helloworld.txt already exists."
+fi
--- a/libs/lumier/src/bin/entry.sh
+++ b/libs/lumier/src/bin/entry.sh
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+
+# Exit on errors, undefined variables, and propagate errors in pipes
+set -euo pipefail
+
+# Source configuration files
+CONFIG_DIR="/run/config"
+LIB_DIR="/run/lib"
+
+# Source constants if available
+if [ -f "${CONFIG_DIR}/constants.sh" ]; then
+  source "${CONFIG_DIR}/constants.sh"
+fi
+
+# Import utilities
+for lib in "${LIB_DIR}"/*.sh; do
+  if [ -f "$lib" ]; then
+    source "$lib"
+  fi
+done
+
+# Set VM_NAME to env or fallback to container name (from --name)
+if [ -z "${VM_NAME:-}" ]; then
+    VM_NAME="$(cat /etc/hostname)"
+    export VM_NAME
+fi
+
+# Set HOST_STORAGE_PATH to /storage/$VM_NAME if not set
+if [ -z "${HOST_STORAGE_PATH:-}" ]; then
+    HOST_STORAGE_PATH="/storage/$VM_NAME"
+    export HOST_STORAGE_PATH
+fi
+
+# Optionally check for mountpoints
+if mountpoint -q /storage; then
+    echo "/storage is mounted"
+fi
+if mountpoint -q /data; then
+    echo "/data is mounted"
+fi
+
+# Log startup info
+echo "Lumier VM is starting..."
+
+# Cleanup function to ensure VM and noVNC proxy shutdown on container stop
+cleanup() {
+  set +e  # Don't exit on error in cleanup
+  echo "[cleanup] Caught signal, shutting down..."
+  echo "[cleanup] Stopping VM..."
+  stop_vm
+  # Now gently stop noVNC proxy if running
+  # if [ -n "${NOVNC_PID:-}" ] && kill -0 "$NOVNC_PID" 2>/dev/null; then
+  #   echo "[cleanup] Stopping noVNC proxy (PID $NOVNC_PID)..."
+  #   kill -TERM "$NOVNC_PID"
+  #   # Wait up to 5s for noVNC to exit
+  #   for i in {1..5}; do
+  #     if ! kill -0 "$NOVNC_PID" 2>/dev/null; then
+  #       echo "[cleanup] noVNC proxy stopped."
+  #       break
+  #     fi
+  #     sleep 1
+  #   done
+  #   # Escalate if still running
+  #   if kill -0 "$NOVNC_PID" 2>/dev/null; then
+  #     echo "[cleanup] noVNC proxy did not exit, killing..."
+  #     kill -KILL "$NOVNC_PID" 2>/dev/null
+  #   fi
+  # fi
+  echo "[cleanup] Done. Exiting."
+  exit 0
+}
+trap cleanup SIGTERM SIGINT
+
+# Start the VM
+start_vm
+
+# Start noVNC for VNC access
+NOVNC_PID=""
+if [ -n "${VNC_PORT:-}" ] && [ -n "${VNC_PASSWORD:-}" ]; then
+  echo "Starting noVNC proxy with optimized color settings..."
+  ${NOVNC_PATH}/utils/novnc_proxy --vnc host.docker.internal:${VNC_PORT} --listen 8006 --web ${NOVNC_PATH} > /dev/null 2>&1 &
+  NOVNC_PID=$!
+  disown $NOVNC_PID
+  echo "noVNC interface available at: http://localhost:8006/vnc.html?password=${VNC_PASSWORD}&autoconnect=true&logging=debug"
+fi
+
+# Run any post-startup hooks
+if [ -d "/run/hooks" ]; then
+  for hook in /run/hooks/*; do
+    if [ -x "$hook" ]; then
+      echo "Running hook: $(basename "$hook")"
+      "$hook"
+    fi
+  done
+fi
+
+echo "Lumier is running. Press Ctrl+C to stop."
+tail -f /dev/null
--- a/libs/lumier/src/bin/server.sh
+++ b/libs/lumier/src/bin/server.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+
+# Exit on errors, undefined variables, and propagate errors in pipes
+set -euo pipefail
+
+# Source constants if available
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+if [ -f "${SCRIPT_DIR}/../config/constants.sh" ]; then
+  source "${SCRIPT_DIR}/../config/constants.sh"
+fi
+
+# Use the tunnel port from constants if available, otherwise default to 8080
+PORT="${TUNNEL_PORT:-8080}"
+TUNNEL_SCRIPT="${SCRIPT_DIR}/tunnel.sh"
+
+# Function to check if the tunnel is active
+is_tunnel_active() {
+    if lsof -i TCP:$PORT 2>/dev/null | grep LISTEN > /dev/null; then
+        return 0  # Tunnel is active
+    else
+        return 1  # Tunnel is not active
+    fi
+}
+
+# Function to start the tunnel
+start_tunnel() {
+    echo "Starting tunnel on port $PORT..."
+    if is_tunnel_active; then
+        echo "Tunnel is already running on port $PORT."
+        return 0
+    fi
+    
+    # Start socat in the background
+    socat TCP-LISTEN:$PORT,reuseaddr,fork EXEC:"$TUNNEL_SCRIPT" &
+    SOCAT_PID=$!
+    
+    # Check if the tunnel started successfully
+    sleep 1
+    if ! is_tunnel_active; then
+        echo "Failed to start tunnel on port $PORT."
+        return 1
+    fi
+    
+    echo "Tunnel started successfully on port $PORT (PID: $SOCAT_PID)."
+    return 0
+}
+
+# Function to stop the tunnel
+stop_tunnel() {
+    echo "Stopping tunnel on port $PORT..."
+    if ! is_tunnel_active; then
+        echo "No tunnel running on port $PORT."
+        return 0
+    fi
+    
+    # Find and kill the socat process
+    local pid=$(lsof -i TCP:$PORT | grep LISTEN | awk '{print $2}')
+    if [ -n "$pid" ]; then
+        kill $pid
+        echo "Tunnel stopped (PID: $pid)."
+        return 0
+    else
+        echo "Failed to find process using port $PORT."
+        return 1
+    fi
+}
+
+# Function to check tunnel status
+status_tunnel() {
+    if is_tunnel_active; then
+        local pid=$(lsof -i TCP:$PORT | grep LISTEN | awk '{print $2}')
+        echo "Tunnel is active on port $PORT (PID: $pid)."
+        return 0
+    else
+        echo "No tunnel running on port $PORT."
+        return 1
+    fi
+}
+
+# Parse command line arguments
+case "${1:-}" in
+    start)
+        start_tunnel
+        ;;
+    stop)
+        stop_tunnel
+        ;;
+    restart)
+        stop_tunnel
+        start_tunnel
+        ;;
+    status)
+        status_tunnel
+        ;;
+    *)
+        echo "Usage: $0 {start|stop|restart|status}"
+        exit 1
+        ;;
+esac 
--- a/libs/lumier/src/bin/tunnel-script.sh
+++ b/libs/lumier/src/bin/tunnel-script.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+# Source constants if running in container context
+if [ -f "/run/config/constants.sh" ]; then
+  source "/run/config/constants.sh"
+fi
+
+# Define server address with fallback
+SERVER="${TUNNEL_HOST:-host.docker.internal}:${TUNNEL_PORT:-8080}"
+
+# Extract the base name of the command and arguments
+command=$(basename "$0")
+subcommand="$1"
+shift
+args="$@"
+
+command="$command $subcommand $args"
+
+# Concatenate command and any stdin data
+full_data="$command"
+if [ ! -t 0 ]; then
+  stdin_data=$(cat)
+  if [ -n "$stdin_data" ]; then
+    # Format full_data to include stdin data
+    full_data="$full_data << 'EOF'
+    $stdin_data
+EOF"
+  fi
+fi
+
+# Trim leading/trailing whitespace and newlines
+full_data=$(echo -e "$full_data" | sed 's/^[ \t\n]*//;s/[ \t\n]*$//')
+
+# Log command if debug is enabled
+if [ "${LUMIER_DEBUG:-0}" -eq 1 ]; then
+  echo "Executing lume command: $full_data" >&2
+  echo "Sending to: $SERVER" >&2
+fi
+
+# Use curl with -N to disable output buffering and -s for silent mode
+curl -N -s -X POST \
+  -H "Content-Type: application/octet-stream" \
+  --data-binary @- \
+  "http://$SERVER" <<< "$full_data" 
--- a/libs/lumier/src/bin/tunnel.sh
+++ b/libs/lumier/src/bin/tunnel.sh
@@ -0,0 +1,96 @@
+#!/usr/bin/env bash
+
+# Exit on errors, undefined variables, and propagate errors in pipes
+set -euo pipefail
+
+# Source constants if available
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+if [ -f "${SCRIPT_DIR}/../config/constants.sh" ]; then
+  source "${SCRIPT_DIR}/../config/constants.sh"
+fi
+
+# Handle errors and cleanup
+cleanup() {
+  local exit_code=$?
+  # Clean up any temporary files if they exist
+  [ -n "${temp_file:-}" ] && [ -f "$temp_file" ] && rm "$temp_file"
+  [ -n "${fifo:-}" ] && [ -p "$fifo" ] && rm "$fifo"
+  exit $exit_code
+}
+trap cleanup EXIT INT TERM
+
+log_debug() {
+  if [ "${LUMIER_DEBUG:-0}" -eq 1 ]; then
+    echo "[DEBUG] $*" >&2
+  fi
+}
+
+send_error_response() {
+  local status_code=$1
+  local message=$2
+  echo "HTTP/1.1 $status_code"
+  echo "Content-Type: text/plain"
+  echo ""
+  echo "$message"
+  exit 1
+}
+
+# Read the HTTP request line
+read -r request_line
+log_debug "Request: $request_line"
+
+# Read headers and look for Content-Length
+content_length=0
+while IFS= read -r header; do
+    [[ $header == $'\r' ]] && break  # End of headers
+    log_debug "Header: $header"
+    if [[ "$header" =~ ^Content-Length:\ ([0-9]+) ]]; then
+        content_length="${BASH_REMATCH[1]}"
+    fi
+done
+
+# Read the body using the content length
+command=""
+if [ "$content_length" -gt 0 ]; then
+    command=$(dd bs=1 count="$content_length" 2>/dev/null)
+    log_debug "Received command: $command"
+fi
+
+# Determine the executable and arguments based on the command
+if [[ "$command" == lume* ]]; then
+    executable="$(which lume || echo "/usr/local/bin/lume")"
+    command_args="${command#lume}"  # Remove 'lume' from the command
+elif [[ "$command" == sshpass* ]]; then
+    executable="$(which sshpass || echo "/usr/local/bin/sshpass")"
+    command_args="${command#sshpass}"
+else
+    send_error_response "400 Bad Request" "Unsupported command: $command"
+fi
+
+# Check if executable exists
+if [ ! -x "$executable" ]; then
+    send_error_response "500 Internal Server Error" "Executable not found or not executable: $executable"
+fi
+
+# Create a temporary file to store the command
+temp_file=$(mktemp)
+echo "$executable $command_args" > "$temp_file"
+chmod +x "$temp_file"
+
+# Create a FIFO (named pipe) for capturing output
+fifo=$(mktemp -u)
+mkfifo "$fifo"
+
+# Execute the command and pipe its output through awk to ensure line-buffering
+{
+    log_debug "Executing: $executable $command_args"
+    "$temp_file" 2>&1 | awk '{ print; fflush() }' > "$fifo"
+} &
+
+# Stream the output from the FIFO as an HTTP response
+{
+    echo -e "HTTP/1.1 200 OK\r"
+    echo -e "Content-Type: text/plain\r"
+    echo -e "\r"
+    cat "$fifo"
+} 
--- a/libs/lumier/src/config/constants.sh
+++ b/libs/lumier/src/config/constants.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+# Port configuration
+TUNNEL_PORT=8080
+VNC_PORT=8006
+
+# Host configuration
+TUNNEL_HOST="host.docker.internal"
+
+# Default VM configuration
+DEFAULT_RAM_SIZE="8192"
+DEFAULT_CPU_CORES="4"
+DEFAULT_DISK_SIZE="100"
+DEFAULT_VM_NAME="lumier"
+DEFAULT_VM_VERSION="ghcr.io/trycua/macos-sequoia-vanilla:latest"
+
+# Paths
+NOVNC_PATH="/opt/noVNC"
+LIFECYCLE_HOOKS_DIR="/run/hooks"
+
+# VM connection details
+HOST_USER="lume"
+HOST_PASSWORD="lume"
+SSH_RETRY_ATTEMPTS=20
+SSH_RETRY_INTERVAL=5 
--- a/libs/lumier/src/hooks/on-logon.sh
+++ b/libs/lumier/src/hooks/on-logon.sh
@@ -0,0 +1,8 @@
+setup_script="$DATA_FOLDER_PATH/setup.sh"
+
+if [ -f "$setup_script" ]; then
+    chmod +x "$setup_script"
+    source "$setup_script"
+else
+    echo "Setup script not found at: $setup_script"
+fi
--- a/libs/lumier/src/lib/utils.sh
+++ b/libs/lumier/src/lib/utils.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+
+# Function to wait for SSH to become available
+wait_for_ssh() {
+    local host_ip=$1
+    local user=$2
+    local password=$3
+    local retry_interval=${4:-5}   # Default retry interval is 5 seconds
+    local max_retries=${5:-20}    # Default maximum retries is 20 (0 for infinite)
+
+    echo "Waiting for SSH to become available on $host_ip..."
+
+    local retry_count=0
+    while true; do
+        # Try to connect via SSH
+        sshpass -p "$password" ssh -o StrictHostKeyChecking=no "$user@$host_ip" "exit"
+
+        # Check the exit status of the SSH command
+        if [ $? -eq 0 ]; then
+            echo "SSH is ready on $host_ip!"
+            return 0
+        fi
+
+        # Increment retry count
+        ((retry_count++))
+        
+        # Exit if maximum retries are reached
+        if [ $max_retries -ne 0 ] && [ $retry_count -ge $max_retries ]; then
+            echo "Maximum retries reached. SSH is not available."
+            return 1
+        fi
+
+        echo "SSH not ready. Retrying in $retry_interval seconds... (Attempt $retry_count)"
+        sleep $retry_interval
+    done
+}
+
+# Function to execute a script on a remote server using sshpass
+execute_remote_script() {
+    local host="$1"
+    local user="$2"
+    local password="$3"
+    local script_path="$4"
+    local vnc_password="$5"
+    local data_folder="$6"
+
+    # Check if all required arguments are provided
+    if [ -z "$host" ] || [ -z "$user" ] || [ -z "$password" ] || [ -z "$script_path" ] || [ -z "$vnc_password" ]; then
+        echo "Usage: execute_remote_script <host> <user> <password> <script_path> <vnc_password> [data_folder]"
+        return 1
+    fi
+
+    echo "VNC password exported to VM: $vnc_password"
+
+    data_folder_path="$VM_SHARED_FILES_PATH/$data_folder"
+    echo "Data folder path in VM: $data_folder_path"
+
+    # Read the script content and prepend the shebang
+    script_content="#!/usr/bin/env bash\n"
+    if [ -n "$data_folder" ]; then
+        script_content+="export VNC_PASSWORD='$vnc_password'\n"
+        script_content+="export DATA_FOLDER_PATH='$data_folder_path'\n"
+    fi
+    script_content+="$(<"$script_path")"
+
+    # Use a here-document to send the script content
+    sshpass -p "$password" ssh -o StrictHostKeyChecking=no "$user@$host" "bash -s" <<EOF
+$script_content
+EOF
+
+    # Check the exit status of the sshpass command
+    if [ $? -ne 0 ]; then
+        echo "Failed to execute script on remote host $host."
+        return 1
+    fi
+}
+
+# Example usage
+# output = execute_remote_script('192.168.1.100', 'username', 'password', '/path/to/script.sh')
+# print(output)
+
+extract_json_field() {
+    local field_name=$1
+    local input=$2
+    local result
+    result=$(echo "$input" | grep -oP '"'"$field_name"'"\s*:\s*"\K[^"]+')
+    if [[ $? -ne 0 ]]; then
+        echo ""
+    else
+        echo "$result"
+    fi
+}
+
+extract_json_field_from_file() {
+    local field_name=$1
+    local json_file=$2
+    local json_text
+    json_text=$(<"$json_file")
+    extract_json_field "$field_name" "$json_text"
+}
+
+extract_json_field_from_text() {
+    local field_name=$1
+    local json_text=$2
+    extract_json_field "$field_name" "$json_text"
+}
--- a/libs/lumier/src/lib/vm.sh
+++ b/libs/lumier/src/lib/vm.sh
@@ -0,0 +1,181 @@
+#!/usr/bin/env bash
+
+start_vm() {
+    # Determine storage path for VM
+    STORAGE_PATH="$HOST_STORAGE_PATH"
+    if [ -z "$STORAGE_PATH" ]; then
+        STORAGE_PATH="storage_${VM_NAME}"
+    fi
+
+    # Check if VM exists and its status using JSON format
+    VM_INFO=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json 2>&1)
+
+    # Check if VM not found error
+    if [[ $VM_INFO == *"Virtual machine not found"* ]]; then
+        IMAGE_NAME="${VERSION##*/}"
+        lume pull "$IMAGE_NAME" "$VM_NAME" --storage "$STORAGE_PATH"
+    else
+        # Parse the JSON status - check if it contains "status" : "running"
+        if [[ $VM_INFO == *'"status" : "running"'* ]]; then
+            lume_stop "$VM_NAME" "$STORAGE_PATH"
+            # lume stop "$VM_NAME" --storage "$STORAGE_PATH"
+        fi
+    fi
+
+    # Set VM parameters
+    lume set "$VM_NAME" --cpu "$CPU_CORES" --memory "${RAM_SIZE}MB" --display "$DISPLAY" --storage "$STORAGE_PATH"
+
+    # Fetch VM configuration
+    CONFIG_JSON=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json)
+    
+    # Setup data directory args if necessary
+    SHARED_DIR_ARGS=""
+    if [ -d "/data" ]; then
+        if [ -n "$HOST_DATA_PATH" ]; then
+            SHARED_DIR_ARGS="--shared-dir=$HOST_DATA_PATH"
+        else
+            echo "Warning: /data volume exists but HOST_DATA_PATH is not set. Cannot mount volume."
+        fi
+    fi
+
+    # Run VM with VNC and shared directory using curl
+    lume_run $SHARED_DIR_ARGS --storage "$STORAGE_PATH" "$VM_NAME" &
+    # lume run "$VM_NAME" --storage "$STORAGE_PATH" --no-display
+
+    # Wait for VM to be running and VNC URL to be available
+    vm_ip=""
+    vnc_url=""
+    max_attempts=30
+    attempt=0
+    
+    while [ $attempt -lt $max_attempts ]; do
+        # Get VM info as JSON
+        VM_INFO=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json 2>/dev/null)
+        
+        # Check if VM has status 'running'
+        if [[ $VM_INFO == *'"status" : "running"'* ]]; then
+            # Extract IP address using the existing function from utils.sh
+            vm_ip=$(extract_json_field "ipAddress" "$VM_INFO")
+            # Extract VNC URL using the existing function from utils.sh
+            vnc_url=$(extract_json_field "vncUrl" "$VM_INFO")
+            
+            # If we have both IP and VNC URL, break the loop
+            if [ -n "$vm_ip" ] && [ -n "$vnc_url" ]; then
+                break
+            fi
+        fi
+        
+        sleep 2
+        attempt=$((attempt + 1))
+    done
+    
+    if [ -z "$vm_ip" ] || [ -z "$vnc_url" ]; then
+        echo "Timed out waiting for VM to start or VNC URL to become available."
+        lume_stop "$VM_NAME" "$STORAGE_PATH" > /dev/null 2>&1
+        # lume stop "$VM_NAME" --storage "$STORAGE_PATH" > /dev/null 2>&1
+        exit 1
+    fi
+
+        
+    # Parse VNC URL to extract password and port
+    VNC_PASSWORD=$(echo "$vnc_url" | sed -n 's/.*:\(.*\)@.*/\1/p')
+    VNC_PORT=$(echo "$vnc_url" | sed -n 's/.*:\([0-9]\+\)$/\1/p')
+    
+    # Wait for SSH to become available
+    wait_for_ssh "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" 5 20
+
+    # Export VNC variables for entry.sh to use
+    export VNC_PORT
+    export VNC_PASSWORD
+    
+    # Execute on-logon.sh if present
+    on_logon_script="/run/lifecycle/on-logon.sh"
+    if [ -f "$on_logon_script" ]; then
+        execute_remote_script "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" "$on_logon_script" "$VNC_PASSWORD" "$DATA_FOLDER"
+    fi
+
+    # The VM is still running because we never killed lume run.
+    # If you want to stop the VM at some point, you can kill $LUME_PID or use lume_stop.
+}
+
+stop_vm() {
+    echo "Stopping VM '$VM_NAME'..."
+    STORAGE_PATH="$HOST_STORAGE_PATH"
+    if [ -z "$STORAGE_PATH" ]; then
+        STORAGE_PATH="storage_${VM_NAME}"
+    fi
+    # Check if the VM exists and is running (use lume get for speed)
+    VM_INFO=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json 2>/dev/null)
+    if [[ -z "$VM_INFO" || $VM_INFO == *"Virtual machine not found"* ]]; then
+        echo "VM '$VM_NAME' does not exist."
+    elif [[ $VM_INFO == *'"status" : "running"'* ]]; then
+        lume_stop "$VM_NAME" "$STORAGE_PATH"
+        echo "VM '$VM_NAME' was running and is now stopped."
+    elif [[ $VM_INFO == *'"status" : "stopped"'* ]]; then
+        echo "VM '$VM_NAME' is already stopped."
+    else
+        echo "Unknown VM status for '$VM_NAME'."
+    fi
+}
+
+is_vm_running() {
+    lume ls | grep -q "$VM_NAME"
+}
+
+# Stop VM with storage location specified using curl
+lume_stop() {
+    local vm_name="$1"
+    local storage="$2"
+    curl --connect-timeout 6000 \
+      --max-time 5000 \
+      -X POST \
+      -H "Content-Type: application/json" \
+      -d '{"storage":"'$storage'"}' \
+      "http://host.docker.internal:3000/lume/vms/${vm_name}/stop"
+}
+
+# Run VM with VNC client started and shared directory using curl
+lume_run() {
+    # Parse args
+    local shared_dir=""
+    local storage="ssd"
+    local vm_name="lume_vm"
+    local no_display=true
+    while [[ $# -gt 0 ]]; do
+        case $1 in
+            --shared-dir=*)
+                shared_dir="${1#*=}"
+                shift
+                ;;
+            --storage)
+                storage="$2"
+                shift 2
+                ;;
+            --no-display)
+                no_display=true
+                shift
+                ;;
+            *)
+                # Assume last arg is VM name if not an option
+                vm_name="$1"
+                shift
+                ;;
+        esac
+    done
+    
+    # Default to ~/Projects if not provided
+    if [[ -z "$shared_dir" ]]; then
+        shared_dir="~/Projects"
+    fi
+    
+    local json_body="{\"noDisplay\": true, \"sharedDirectories\": [{\"hostPath\": \"$shared_dir\", \"readOnly\": false}], \"storage\": \"$storage\", \"recoveryMode\": false}"
+    local curl_cmd="curl --connect-timeout 6000 \
+      --max-time 5000 \
+      -X POST \
+      -H 'Content-Type: application/json' \
+      -d '$json_body' \
+      http://host.docker.internal:3000/lume/vms/$vm_name/run"
+    echo "[lume_run] Running:"
+    echo "$curl_cmd"
+    eval "$curl_cmd"
+}
--- a/libs/mcp-server/README.md
+++ b/libs/mcp-server/README.md
@@ -68,13 +68,51 @@ You can then use the script in your MCP configuration like this:
        "CUA_AGENT_LOOP": "OMNI",
        "CUA_MODEL_PROVIDER": "ANTHROPIC",
        "CUA_MODEL_NAME": "claude-3-7-sonnet-20250219",
-        "ANTHROPIC_API_KEY": "your-api-key"
+        "CUA_PROVIDER_API_KEY": "your-api-key"
      }
    }
  }
 }
 ```

+## Development Guide
+
+If you want to develop with the cua-mcp-server directly without installation, you can use this configuration:
+
+```json
+{
+  "mcpServers": {
+    "cua-agent": {
+      "command": "/bin/bash",
+      "args": ["~/cua/libs/mcp-server/scripts/start_mcp_server.sh"],
+      "env": {
+        "CUA_AGENT_LOOP": "UITARS",
+        "CUA_MODEL_PROVIDER": "OAICOMPAT",
+        "CUA_MODEL_NAME": "ByteDance-Seed/UI-TARS-1.5-7B",
+        "CUA_PROVIDER_BASE_URL": "https://****************.us-east-1.aws.endpoints.huggingface.cloud/v1",
+        "CUA_PROVIDER_API_KEY": "your-api-key"
+      }
+    }
+  }
+}
+```
+
+This configuration:
+- Uses the start_mcp_server.sh script which automatically sets up the Python path and runs the server module
+- Works with Claude Desktop, Cursor, or any other MCP client
+- Automatically uses your development code without requiring installation
+
+Just add this to your MCP client's configuration and it will use your local development version of the server.
+
+### Troubleshooting
+
+If you get a `/bin/bash: ~/cua/libs/mcp-server/scripts/start_mcp_server.sh: No such file or directory` error, try changing the path to the script to be absolute instead of relative.
+
+To see the logs:
+```
+tail -n 20 -f ~/Library/Logs/Claude/mcp*.log
+```
+
 ## Claude Desktop Integration

 To use with Claude Desktop, add an entry to your Claude Desktop configuration (`claude_desktop_config.json`, typically found in `~/.config/claude-desktop/`):
--- a/libs/mcp-server/mcp_server/server.py
+++ b/libs/mcp-server/mcp_server/server.py
@@ -1,9 +1,10 @@
 import asyncio
+import base64
 import logging
 import os
 import sys
 import traceback
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union, Tuple

 # Configure logging to output to stderr for debug visibility
 logging.basicConfig(
@@ -17,7 +18,7 @@ logger = logging.getLogger("mcp-server")
 logger.debug("MCP Server module loading...")

 try:
-    from mcp.server.fastmcp import Context, FastMCP
+    from mcp.server.fastmcp import Context, FastMCP, Image

    logger.debug("Successfully imported FastMCP")
 except ImportError as e:
@@ -49,16 +50,37 @@ def serve() -> FastMCP:
    server = FastMCP("cua-agent")

    @server.tool()
-    async def run_cua_task(ctx: Context, task: str) -> str:
+    async def screenshot_cua(ctx: Context) -> Image:
        """
-        Run a Computer-Use Agent (CUA) task and return the results.
+        Take a screenshot of the current MacOS VM screen and return the image. Use this before running a CUA task to get a snapshot of the current state.
+
+        Args:
+            ctx: The MCP context
+
+        Returns:
+            An image resource containing the screenshot
+        """
+        global global_computer
+        if global_computer is None:
+            global_computer = Computer(verbosity=logging.INFO)
+            await global_computer.run()
+        screenshot = await global_computer.interface.screenshot()
+        return Image(
+            format="png",
+            data=screenshot
+        )
+
+    @server.tool()
+    async def run_cua_task(ctx: Context, task: str) -> Tuple[str, Image]:
+        """
+        Run a Computer-Use Agent (CUA) task in a MacOS VM and return the results.

        Args:
            ctx: The MCP context
            task: The instruction or task for the agent to perform

        Returns:
-            A string containing the agent's response
+            A tuple containing the agent's response and the final screenshot
        """
        global global_computer

@@ -72,12 +94,7 @@ def serve() -> FastMCP:

            # Determine which loop to use
            loop_str = os.getenv("CUA_AGENT_LOOP", "OMNI")
-            if loop_str == "OPENAI":
-                loop = AgentLoop.OPENAI
-            elif loop_str == "ANTHROPIC":
-                loop = AgentLoop.ANTHROPIC
-            else:
-                loop = AgentLoop.OMNI
+            loop = getattr(AgentLoop, loop_str)

            # Determine provider
            provider_str = os.getenv("CUA_MODEL_PROVIDER", "ANTHROPIC")
@@ -89,6 +106,9 @@ def serve() -> FastMCP:
            # Get base URL for provider (if needed)
            provider_base_url = os.getenv("CUA_PROVIDER_BASE_URL", None)

+            # Get api key for provider (if needed)
+            api_key = os.getenv("CUA_PROVIDER_API_KEY", None)
+
            # Create agent with the specified configuration
            agent = ComputerAgent(
                computer=global_computer,
@@ -98,6 +118,7 @@ def serve() -> FastMCP:
                    name=model_name,
                    provider_base_url=provider_base_url,
                ),
+                api_key=api_key,
                save_trajectory=False,
                only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")),
                verbosity=logging.INFO,
@@ -107,33 +128,34 @@ def serve() -> FastMCP:
            full_result = ""
            async for result in agent.run(task):
                logger.info(f"Agent step complete: {result.get('id', 'unknown')}")
+                ctx.info(f"Agent step complete: {result.get('id', 'unknown')}")

                # Add response ID to output
                full_result += f"\n[Response ID: {result.get('id', 'unknown')}]\n"
-
-                # Extract and concatenate text responses
-                if "text" in result:
-                    # Handle both string and dict responses
-                    text_response = result.get("text", "")
-                    if isinstance(text_response, str):
-                        full_result += f"Response: {text_response}\n"
-                    else:
-                        # If it's a dict or other structure, convert to string representation
-                        full_result += f"Response: {str(text_response)}\n"
-
-                # Log detailed information
-                if "tools" in result:
-                    tools_info = result.get("tools")
-                    logger.debug(f"Tools used: {tools_info}")
-                    full_result += f"\nTools used: {tools_info}\n"
+                
+                if "content" in result:
+                    full_result += f"Response: {result.get('content', '')}\n"

                # Process output if available
                outputs = result.get("output", [])
                for output in outputs:
                    output_type = output.get("type")
-                    if output_type == "reasoning":
+                    if output_type == "message":
+                        logger.debug(f"Message: {output}")
+                        content = output.get("content", [])
+                        for content_part in content:
+                            if content_part.get("text"):
+                                full_result += f"\nMessage: {content_part.get('text', '')}\n"
+                    elif output_type == "reasoning":
                        logger.debug(f"Reasoning: {output}")
-                        full_result += f"\nReasoning: {output.get('content', '')}\n"
+                        
+                        summary_content = output.get("summary", [])
+                        if summary_content:
+                            for summary_part in summary_content:
+                                if summary_part.get("text"):
+                                    full_result += f"\nReasoning: {summary_part.get('text', '')}\n"
+                        else:
+                            full_result += f"\nReasoning: {output.get('text', output.get('content', ''))}\n"
                    elif output_type == "computer_call":
                        logger.debug(f"Computer call: {output}")
                        action = output.get("action", "")
@@ -144,17 +166,25 @@ def serve() -> FastMCP:
                full_result += "\n" + "-" * 40 + "\n"

            logger.info(f"CUA task completed successfully")
-            return full_result or "Task completed with no text output."
+            ctx.info(f"CUA task completed successfully")
+            return (
+                full_result or "Task completed with no text output.",
+                Image(
+                    format="png",
+                    data=await global_computer.interface.screenshot()
+                )
+            )

        except Exception as e:
            error_msg = f"Error running CUA task: {str(e)}\n{traceback.format_exc()}"
            logger.error(error_msg)
+            ctx.error(error_msg)
            return f"Error during task execution: {str(e)}"

    @server.tool()
-    async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> str:
+    async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> List:
        """
-        Run multiple CUA tasks in sequence and return the combined results.
+        Run multiple CUA tasks in a MacOS VM in sequence and return the combined results.

        Args:
            ctx: The MCP context
@@ -164,13 +194,15 @@ def serve() -> FastMCP:
            Combined results from all tasks
        """
        results = []
-
        for i, task in enumerate(tasks):
            logger.info(f"Running task {i+1}/{len(tasks)}: {task}")
-            result = await run_cua_task(ctx, task)
-            results.append(f"Task {i+1}: {task}\nResult: {result}\n")
-
-        return "\n".join(results)
+            ctx.info(f"Running task {i+1}/{len(tasks)}: {task}")
+            
+            ctx.report_progress(i / len(tasks))
+            results.extend(await run_cua_task(ctx, task))
+            ctx.report_progress((i + 1) / len(tasks))
+            
+        return results

    return server

--- a/libs/mcp-server/scripts/start_mcp_server.sh
+++ b/libs/mcp-server/scripts/start_mcp_server.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+set -e
+
+# Set the CUA repository path based on script location
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../.." &> /dev/null && pwd )"
+PYTHON_PATH="${CUA_REPO_DIR}/.venv/bin/python"
+
+# Set Python path to include all necessary libraries
+export PYTHONPATH="${CUA_REPO_DIR}/libs/mcp-server:${CUA_REPO_DIR}/libs/agent:${CUA_REPO_DIR}/libs/computer:${CUA_REPO_DIR}/libs/core:${CUA_REPO_DIR}/libs/pylume"
+
+# Run the MCP server directly as a module
+$PYTHON_PATH -m mcp_server.server
--- a/notebooks/blog/build-your-own-operator-on-macos-1.ipynb
+++ b/notebooks/blog/build-your-own-operator-on-macos-1.ipynb
@@ -145,9 +145,8 @@
    "                await computer.interface.press_key(key)\n",
    "    \n",
    "    elif action_type == \"wait\":\n",
-    "        wait_time = action.time\n",
-    "        print(f\"Waiting for {wait_time} seconds\")\n",
-    "        await asyncio.sleep(wait_time)\n",
+    "        print(f\"Waiting for 2 seconds\")\n",
+    "        await asyncio.sleep(2)\n",
    "    \n",
    "    elif action_type == \"screenshot\":\n",
    "        print(\"Taking screenshot\")\n",