Merge branch 'main' into feature/agent/uitars-mlx

This commit is contained in:
Dillon DuPont
2025-05-04 21:40:07 -04:00
59 changed files with 2345 additions and 396 deletions

View File

@@ -151,6 +151,15 @@
"contributions": [
"code"
]
},
{
"login": "FinnBorge",
"name": "FinnBorge",
"avatar_url": "https://avatars.githubusercontent.com/u/9272726?v=4",
"profile": "https://github.com/FinnBorge",
"contributions": [
"code"
]
}
]
}

8
.gitignore vendored
View File

@@ -15,7 +15,8 @@ dist/
downloads/
eggs/
.eggs/
lib/
lib/*
!libs/lumier/src/lib/
lib64/
parts/
sdist/
@@ -242,4 +243,7 @@ trajectories/
.storage/
# Gradio settings
.gradio_settings.json
.gradio_settings.json
# Lumier Storage
storage/

30
.vscode/lumier.code-workspace vendored Normal file
View File

@@ -0,0 +1,30 @@
{
"folders": [
{
"name": "lumier",
"path": "../libs/lumier"
},
{
"name": "lume",
"path": "../libs/lume"
}
],
"settings": {
"files.exclude": {
"**/.git": true,
"**/.svn": true,
"**/.hg": true,
"**/CVS": true,
"**/.DS_Store": true
}
},
"tasks": {
"version": "2.0.0",
"tasks": [
]
},
"launch": {
"configurations": [
]
}
}

View File

@@ -47,6 +47,13 @@ If you only need the virtualization capabilities:
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
```
Optionally, if you don't want Lume to run as a background service:
```bash
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh) --no-background-service"
```
**Note:** If you choose this option, you'll need to manually start the Lume API service whenever needed by running `lume serve` in your terminal. This applies to Option 2 after completing step 1.
For Lume usage instructions, refer to the [Lume documentation](./libs/lume/README.md).
### Option 2: Full Computer-Use Agent Capabilities
@@ -62,17 +69,12 @@ If you want to use AI agents with virtualized environments:
lume pull macos-sequoia-cua:latest
```
3. Start Lume daemon service:
```bash
lume serve
```
4. Install the Python libraries:
3. Install the Python libraries:
```bash
pip install cua-computer cua-agent[all]
```
5. Use the libraries in your Python code:
4. Use the libraries in your Python code:
```python
from computer import Computer
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
@@ -80,7 +82,7 @@ If you want to use AI agents with virtualized environments:
async with Computer(verbosity=logging.DEBUG) as macos_computer:
agent = ComputerAgent(
computer=macos_computer,
loop=AgentLoop.OPENAI, # or AgentLoop.UITARS, AgentLoop.OMNI, or AgentLoop.ANTHROPIC
loop=AgentLoop.OPENAI, # or AgentLoop.UITARS, AgentLoop.OMNI, or AgentLoop.UITARS, or AgentLoop.ANTHROPIC
model=LLM(provider=LLMProvider.OPENAI) # or LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit")
)
@@ -95,7 +97,7 @@ If you want to use AI agents with virtualized environments:
Explore the [Agent Notebook](./notebooks/) for a ready-to-run example.
6. Optionally, you can use the Agent with a Gradio UI:
5. Optionally, you can use the Agent with a Gradio UI:
```python
from utils import load_dotenv_files
@@ -228,6 +230,7 @@ Apple, macOS, and Apple Silicon are trademarks of Apple Inc. Ubuntu and Canonica
<tr>
<td align="center" valign="top" width="14.28%"><a href="https://www.encona.com/"><img src="https://avatars.githubusercontent.com/u/891558?v=4?s=100" width="100px;" alt="Rahim Nathwani"/><br /><sub><b>Rahim Nathwani</b></sub></a><br /><a href="#code-rahimnathwani" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://mjspeck.github.io/"><img src="https://avatars.githubusercontent.com/u/20689127?v=4?s=100" width="100px;" alt="Matt Speck"/><br /><sub><b>Matt Speck</b></sub></a><br /><a href="#code-mjspeck" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/FinnBorge"><img src="https://avatars.githubusercontent.com/u/9272726?v=4?s=100" width="100px;" alt="FinnBorge"/><br /><sub><b>FinnBorge</b></sub></a><br /><a href="#code-FinnBorge" title="Code">💻</a></td>
</tr>
</tbody>
</table>

View File

@@ -50,10 +50,10 @@ async with Computer() as macos_computer:
# model=LLM(provider=LLMProvider.ANTHROPIC)
# or
# loop=AgentLoop.OMNI,
# model=LLM(provider=LLMProvider.OLLAMA, model="gemma3")
# model=LLM(provider=LLMProvider.OLLAMA, name="gemma3")
# or
# loop=AgentLoop.UITARS,
# model=LLM(provider=LLMProvider.OAICOMPAT, model="tgi", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1")
# model=LLM(provider=LLMProvider.OAICOMPAT, name="ByteDance-Seed/UI-TARS-1.5-7B", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1")
)
tasks = [

View File

@@ -279,6 +279,8 @@ class AnthropicLoop(BaseLoop):
messages,
model=self.model,
)
# Log standardized response for ease of parsing
self._log_api_call("agent_response", request=None, response=openai_compatible_response)
await queue.put(openai_compatible_response)
if not should_continue:

View File

@@ -161,15 +161,17 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
self.logger.info(f"Moving cursor to ({x}, {y})")
await self.computer.interface.move_cursor(x, y)
elif action == "left_click_drag":
self.logger.info(f"Dragging from ({x}, {y})")
# First move to the position
await self.computer.interface.move_cursor(x, y)
# Then perform drag operation - check if drag_to exists or we need to use other methods
try:
await self.computer.interface.drag_to(x, y)
except Exception as e:
self.logger.error(f"Error during drag operation: {str(e)}")
raise ToolError(f"Failed to perform drag: {str(e)}")
# Get the start coordinate from kwargs
start_coordinate = kwargs.get("start_coordinate")
if not start_coordinate:
raise ToolError("start_coordinate is required for left_click_drag action")
start_x, start_y = start_coordinate
end_x, end_y = x, y
self.logger.info(f"Dragging from ({start_x}, {start_y}) to ({end_x}, {end_y})")
await self.computer.interface.move_cursor(start_x, start_y)
await self.computer.interface.drag_to(end_x, end_y)
# Wait briefly for any UI changes
await asyncio.sleep(0.5)

View File

@@ -670,6 +670,8 @@ class OmniLoop(BaseLoop):
parsed_screen=parsed_screen,
parser=self.parser
)
# Log standardized response for ease of parsing
self._log_api_call("agent_response", request=None, response=openai_compatible_response)
# Yield the response to the caller
yield openai_compatible_response

View File

@@ -276,6 +276,10 @@ class OpenAILoop(BaseLoop):
)
# Don't reset last_response_id to None - keep the previous value if available
# Log standardized response for ease of parsing
# Since this is the openAI responses format, we don't need to convert it to agent response format
self._log_api_call("agent_response", request=None, response=response)
# Process API response
await queue.put(response)

View File

@@ -44,6 +44,7 @@ Action = Literal[
"double_click",
"screenshot",
"scroll",
"drag",
]
@@ -162,9 +163,14 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
y = kwargs.get("y")
if x is None or y is None:
raise ToolError("x and y coordinates are required for scroll action")
scroll_x = kwargs.get("scroll_x", 0) // 20
scroll_y = kwargs.get("scroll_y", 0) // 20
scroll_x = kwargs.get("scroll_x", 0) // 50
scroll_y = kwargs.get("scroll_y", 0) // 50
return await self.handle_scroll(x, y, scroll_x, scroll_y)
elif type == "drag":
path = kwargs.get("path")
if not path or not isinstance(path, list) or len(path) < 2:
raise ToolError("path is required for drag action and must contain at least 2 points")
return await self.handle_drag(path)
elif type == "screenshot":
return await self.screenshot()
elif type == "wait":
@@ -240,11 +246,7 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
if len(mapped_keys) > 1:
# For key combinations (like Ctrl+C)
for k in mapped_keys:
await self.computer.interface.press_key(k)
await asyncio.sleep(0.1)
for k in reversed(mapped_keys):
await self.computer.interface.press_key(k)
await self.computer.interface.hotkey(*mapped_keys)
else:
# Single key press
await self.computer.interface.press_key(mapped_keys[0])
@@ -306,6 +308,41 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
self.logger.error(f"Error in handle_scroll: {str(e)}")
raise ToolError(f"Failed to scroll at ({x}, {y}): {str(e)}")
async def handle_drag(self, path: List[Dict[str, int]]) -> ToolResult:
"""Handle mouse drag operation using a path of coordinates.
Args:
path: List of coordinate points {"x": int, "y": int} defining the drag path
Returns:
ToolResult with the operation result and screenshot
"""
try:
# Convert from [{"x": x, "y": y}, ...] format to [(x, y), ...] format
points = [(p["x"], p["y"]) for p in path]
# Perform drag action
if len(points) == 2:
await self.computer.interface.move_cursor(points[0][0], points[0][1])
await self.computer.interface.drag_to(points[1][0], points[1][1])
else:
await self.computer.interface.drag(points, button="left")
# Wait for UI to update
await asyncio.sleep(0.5)
# Take screenshot after action
screenshot = await self.computer.interface.screenshot()
base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
return ToolResult(
output=f"Dragged from ({path[0]['x']}, {path[0]['y']}) to ({path[-1]['x']}, {path[-1]['y']})",
base64_image=base64_screenshot,
)
except Exception as e:
self.logger.error(f"Error in handle_drag: {str(e)}")
raise ToolError(f"Failed to perform drag operation: {str(e)}")
async def screenshot(self) -> ToolResult:
"""Take a screenshot."""
try:

View File

@@ -94,8 +94,15 @@ class OAICompatClient(BaseUITarsClient):
"""
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
final_messages = [{"role": "system", "content": system}]
final_messages = [
{
"role": "system",
"content": [
{ "type": "text", "text": system }
]
}
]
# Process messages
for item in messages:
if isinstance(item, dict):
@@ -138,8 +145,13 @@ class OAICompatClient(BaseUITarsClient):
message = {"role": "user", "content": [{"type": "text", "text": item}]}
final_messages.append(message)
payload = {"model": self.model, "messages": final_messages, "temperature": self.temperature}
payload["max_tokens"] = max_tokens or self.max_tokens
payload = {
"model": self.model,
"messages": final_messages,
"max_tokens": max_tokens or self.max_tokens,
"temperature": self.temperature,
"top_p": 0.7,
}
try:
async with aiohttp.ClientSession() as session:
@@ -178,25 +190,21 @@ class OAICompatClient(BaseUITarsClient):
response_text = await response.text()
logger.debug(f"Response content: {response_text}")
# if 503, then the endpoint is still warming up
if response.status == 503:
logger.error(f"Endpoint is still warming up, please try again later")
raise Exception(f"Endpoint is still warming up: {response_text}")
# Try to parse as JSON if the content type is appropriate
if "application/json" in response.headers.get('Content-Type', ''):
response_json = await response.json()
else:
raise Exception(f"Response is not JSON format")
# # Optionally try to parse it anyway
# try:
# import json
# response_json = json.loads(response_text)
# except json.JSONDecodeError as e:
# print(f"Failed to parse response as JSON: {e}")
if response.status != 200:
error_msg = response_json.get("error", {}).get(
"message", str(response_json)
)
logger.error(f"Error in API call: {error_msg}")
raise Exception(f"API error: {error_msg}")
logger.error(f"Error in API call: {response_text}")
raise Exception(f"API error: {response_text}")
return response_json
except Exception as e:

View File

@@ -17,10 +17,10 @@ from ...core.types import AgentResponse, LLMProvider
from ...core.visualization import VisualizationHelper
from computer import Computer
from .utils import add_box_token, parse_actions, parse_action_parameters
from .utils import add_box_token, parse_actions, parse_action_parameters, to_agent_response_format
from .tools.manager import ToolManager
from .tools.computer import ToolResult
from .prompts import COMPUTER_USE, SYSTEM_PROMPT
from .prompts import COMPUTER_USE, SYSTEM_PROMPT, MAC_SPECIFIC_NOTES
from .clients.oaicompat import OAICompatClient
from .clients.mlxvlm import MLXVLMUITarsClient
@@ -197,7 +197,7 @@ class UITARSLoop(BaseLoop):
if first_user_idx is not None and instruction:
# Create the computer use prompt
user_prompt = COMPUTER_USE.format(
instruction=instruction,
instruction='\n'.join([instruction, MAC_SPECIFIC_NOTES]),
language="English"
)
@@ -453,7 +453,7 @@ class UITARSLoop(BaseLoop):
# MAIN LOOP - IMPLEMENTING ABSTRACT METHOD
###########################################
async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[Dict[str, Any], None]:
async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
"""Run the agent loop with provided messages.
Args:
@@ -520,41 +520,16 @@ class UITARSLoop(BaseLoop):
# Update whether an action screenshot was saved this turn
action_screenshot_saved = action_screenshot_saved or new_screenshot_saved
# Parse actions from the raw response
raw_response = response["choices"][0]["message"]["content"]
parsed_actions = parse_actions(raw_response)
# Extract thought content if available
thought = ""
if "Thought:" in raw_response:
thought_match = re.search(r"Thought: (.*?)(?=\s*Action:|$)", raw_response, re.DOTALL)
if thought_match:
thought = thought_match.group(1).strip()
agent_response = await to_agent_response_format(
response,
messages,
model=self.model,
)
# Log standardized response for ease of parsing
self._log_api_call("agent_response", request=None, response=agent_response)
yield agent_response
# Create standardized thought response format
thought_response = {
"role": "assistant",
"content": thought or raw_response,
"metadata": {
"title": "🧠 UI-TARS Thoughts"
}
}
# Create action response format
action_response = {
"role": "assistant",
"content": str(parsed_actions),
"metadata": {
"title": "🖱️ UI-TARS Actions",
}
}
# Yield both responses to the caller (thoughts first, then actions)
yield thought_response
if parsed_actions:
yield action_response
# Check if we should continue this conversation
running = should_continue
@@ -575,7 +550,8 @@ class UITARSLoop(BaseLoop):
logger.error(f"Maximum retry attempts reached. Last error was: {str(e)}")
yield {
"error": str(e),
"role": "assistant",
"content": f"Error: {str(e)}",
"metadata": {"title": "❌ Error"},
}

View File

@@ -1,5 +1,9 @@
"""Prompts for UI-TARS agent."""
MAC_SPECIFIC_NOTES = """
(You are operating on macOS, use 'cmd' instead of 'ctrl' for most shortcuts e.g., hotkey(key='cmd c') for copy, hotkey(key='cmd v') for paste, hotkey(key='cmd t') for new tab).)
"""
SYSTEM_PROMPT = "You are a helpful assistant."
COMPUTER_USE = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
@@ -56,4 +60,4 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par
## User Instruction
{instruction}
"""
"""

View File

@@ -173,9 +173,13 @@ class ComputerTool(BaseComputerTool):
elif action == "hotkey":
if "keys" in kwargs:
keys = kwargs["keys"]
for key in keys:
await self.computer.interface.press_key(key)
if len(keys) > 1:
await self.computer.interface.hotkey(*keys)
else:
# Single key press
await self.computer.interface.press_key(keys[0])
# Wait for UI to update
await asyncio.sleep(0.3)

View File

@@ -4,9 +4,114 @@ import logging
import base64
import re
from typing import Any, Dict, List, Optional, Union, Tuple
from datetime import datetime
logger = logging.getLogger(__name__)
from ...core.types import AgentResponse
async def to_agent_response_format(
response: Dict[str, Any],
messages: List[Dict[str, Any]],
model: Optional[str] = None,
) -> AgentResponse:
"""Convert raw UI-TARS response to agent response format.
Args:
response: Raw UI-TARS response
messages: List of messages in standard format
model: Optional model name
Returns:
AgentResponse: Standardized agent response format
"""
# Create unique IDs for this response
response_id = f"resp_{datetime.now().strftime('%Y%m%d%H%M%S')}_{id(response)}"
reasoning_id = f"rs_{response_id}"
action_id = f"cu_{response_id}"
call_id = f"call_{response_id}"
# Parse actions from the raw response
content = response["choices"][0]["message"]["content"]
actions = parse_actions(content)
# Extract thought content if available
reasoning_text = ""
if "Thought:" in content:
thought_match = re.search(r"Thought: (.*?)(?=\s*Action:|$)", content, re.DOTALL)
if thought_match:
reasoning_text = thought_match.group(1).strip()
# Create output items
output_items = []
if reasoning_text:
output_items.append({
"type": "reasoning",
"id": reasoning_id,
"text": reasoning_text
})
if actions:
for i, action in enumerate(actions):
action_name, tool_args = parse_action_parameters(action)
if action_name == "finished":
output_items.append({
"type": "message",
"role": "assistant",
"content": [{
"type": "output_text",
"text": tool_args["content"]
}],
"id": f"action_{i}_{action_id}",
"status": "completed"
})
else:
if tool_args.get("action") == action_name:
del tool_args["action"]
output_items.append({
"type": "computer_call",
"id": f"{action}_{i}_{action_id}",
"call_id": f"call_{i}_{action_id}",
"action": { "type": action_name, **tool_args },
"pending_safety_checks": [],
"status": "completed"
})
# Create agent response
agent_response = AgentResponse(
id=response_id,
object="response",
created_at=int(datetime.now().timestamp()),
status="completed",
error=None,
incomplete_details=None,
instructions=None,
max_output_tokens=None,
model=model or response["model"],
output=output_items,
parallel_tool_calls=True,
previous_response_id=None,
reasoning={"effort": "medium"},
store=True,
temperature=0.0,
top_p=0.7,
text={"format": {"type": "text"}},
tool_choice="auto",
tools=[
{
"type": "computer_use_preview",
"display_height": 768,
"display_width": 1024,
"environment": "mac",
}
],
truncation="auto",
usage=response["usage"],
user=None,
metadata={},
response=response
)
return agent_response
def add_box_token(input_string: str) -> str:
"""Add box tokens to the coordinates in the model response.
@@ -74,7 +179,13 @@ def parse_action_parameters(action: str) -> Tuple[str, Dict[str, Any]]:
"""
# Handle "finished" action
if action.startswith("finished"):
return "finished", {}
# Parse content if it exists
content_match = re.search(r"content='([^']*)'", action)
if content_match:
content = content_match.group(1)
return "finished", {"content": content}
else:
return "finished", {}
# Parse action parameters
action_match = re.match(r'(\w+)\((.*)\)', action)

View File

@@ -35,6 +35,7 @@ from pathlib import Path
from typing import Dict, List, Optional, AsyncGenerator, Any, Tuple, Union
import gradio as gr
from gradio.components.chatbot import MetadataDict
from typing import cast
# Import from agent package
from agent.core.types import AgentResponse
@@ -332,63 +333,6 @@ def get_ollama_models() -> List[str]:
logging.error(f"Error getting Ollama models: {e}")
return []
def extract_synthesized_text(
result: Union[AgentResponse, Dict[str, Any]],
) -> Tuple[str, MetadataDict]:
"""Extract synthesized text from the agent result."""
synthesized_text = ""
metadata = MetadataDict()
if "output" in result and result["output"]:
for output in result["output"]:
if output.get("type") == "reasoning":
metadata["title"] = "🧠 Reasoning"
content = output.get("content", "")
if content:
synthesized_text += f"{content}\n"
elif output.get("type") == "message":
# Handle message type outputs - can contain rich content
content = output.get("content", [])
# Content is usually an array of content blocks
if isinstance(content, list):
for block in content:
if isinstance(block, dict) and block.get("type") == "output_text":
text_value = block.get("text", "")
if text_value:
synthesized_text += f"{text_value}\n"
elif output.get("type") == "computer_call":
action = output.get("action", {})
action_type = action.get("type", "")
# Create a descriptive text about the action
if action_type == "click":
button = action.get("button", "")
x = action.get("x", "")
y = action.get("y", "")
synthesized_text += f"Clicked {button} at position ({x}, {y}).\n"
elif action_type == "type":
text = action.get("text", "")
synthesized_text += f"Typed: {text}.\n"
elif action_type == "keypress":
# Extract key correctly from either keys array or key field
if isinstance(action.get("keys"), list):
key = ", ".join(action.get("keys"))
else:
key = action.get("key", "")
synthesized_text += f"Pressed key: {key}\n"
else:
synthesized_text += f"Performed {action_type} action.\n"
metadata["status"] = "done"
metadata["title"] = f"🛠️ {synthesized_text.strip().splitlines()[-1]}"
return synthesized_text.strip(), metadata
def create_computer_instance(verbosity: int = logging.INFO) -> Computer:
"""Create or get the global Computer instance."""
global global_computer
@@ -457,66 +401,6 @@ def create_agent(
return global_agent
def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> Tuple[str, MetadataDict]:
"""Process agent results for the Gradio UI."""
# Extract text content
text_obj = result.get("text", {})
metadata = result.get("metadata", {})
# Create a properly typed MetadataDict
metadata_dict = MetadataDict()
metadata_dict["title"] = metadata.get("title", "")
metadata_dict["status"] = "done"
metadata = metadata_dict
# For OpenAI's Computer-Use Agent, text field is an object with format property
if (
text_obj
and isinstance(text_obj, dict)
and "format" in text_obj
and not text_obj.get("value", "")
):
content, metadata = extract_synthesized_text(result)
else:
if not text_obj:
text_obj = result
# For other types of results, try to get text directly
if isinstance(text_obj, dict):
if "value" in text_obj:
content = text_obj["value"]
elif "text" in text_obj:
content = text_obj["text"]
elif "content" in text_obj:
content = text_obj["content"]
else:
content = ""
else:
content = str(text_obj) if text_obj else ""
# If still no content but we have outputs, create a summary
if not content and "output" in result and result["output"]:
output = result["output"]
for out in output:
if out.get("type") == "reasoning":
content = out.get("content", "")
if content:
break
elif out.get("type") == "computer_call":
action = out.get("action", {})
action_type = action.get("type", "")
if action_type:
content = f"Performing action: {action_type}"
break
# Clean up the text - ensure content is a string
if not isinstance(content, str):
content = str(content) if content else ""
return content, metadata
def create_gradio_ui(
provider_name: str = "openai",
model_name: str = "gpt-4o",
@@ -921,17 +805,64 @@ def create_gradio_ui(
# Stream responses from the agent
async for result in global_agent.run(last_user_message):
# Process result
content, metadata = process_agent_result(result)
# Skip empty content
if content or metadata.get("title"):
history.append(
gr.ChatMessage(
role="assistant", content=content, metadata=metadata
print(f"DEBUG - Agent response ------- START")
from pprint import pprint
pprint(result)
print(f"DEBUG - Agent response ------- END")
def generate_gradio_messages():
if result.get("content"):
yield gr.ChatMessage(
role="assistant",
content=result.get("content", ""),
metadata=cast(MetadataDict, result.get("metadata", {}))
)
)
yield history
else:
outputs = result.get("output", [])
for output in outputs:
if output.get("type") == "message":
content = output.get("content", [])
for content_part in content:
if content_part.get("text"):
yield gr.ChatMessage(
role=output.get("role", "assistant"),
content=content_part.get("text", ""),
metadata=content_part.get("metadata", {})
)
elif output.get("type") == "reasoning":
# if it's openAI, we only have access to a summary of the reasoning
summary_content = output.get("summary", [])
if summary_content:
for summary_part in summary_content:
if summary_part.get("type") == "summary_text":
yield gr.ChatMessage(
role="assistant",
content=summary_part.get("text", "")
)
else:
summary_content = output.get("text", "")
if summary_content:
yield gr.ChatMessage(
role="assistant",
content=summary_content,
)
elif output.get("type") == "computer_call":
action = output.get("action", {})
action_type = action.get("type", "")
if action_type:
action_title = f"🛠️ Performing {action_type}"
if action.get("x") and action.get("y"):
action_title += f" at ({action['x']}, {action['y']})"
yield gr.ChatMessage(
role="assistant",
content=f"```json\n{json.dumps(action)}\n```",
metadata={"title": action_title}
)
for message in generate_gradio_messages():
history.append(message)
yield history
except Exception as e:
import traceback

View File

@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Optional, Dict, Any
from typing import Optional, Dict, Any, List, Tuple
class BaseAccessibilityHandler(ABC):
"""Abstract base class for OS-specific accessibility handlers."""
@@ -59,6 +59,17 @@ class BaseAutomationHandler(ABC):
duration: How long the drag should take in seconds
"""
pass
@abstractmethod
async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> Dict[str, Any]:
"""Drag the cursor from current position to specified coordinates.
Args:
path: A list of tuples of x and y coordinates to drag to
button: The mouse button to use ('left', 'middle', 'right')
duration: How long the drag should take in seconds
"""
pass
# Keyboard Actions
@abstractmethod

View File

@@ -1,7 +1,7 @@
import pyautogui
import base64
from io import BytesIO
from typing import Optional, Dict, Any, List
from typing import Optional, Dict, Any, List, Tuple
from ctypes import byref, c_void_p, POINTER
from AppKit import NSWorkspace # type: ignore
import AppKit
@@ -563,6 +563,39 @@ class MacOSAutomationHandler(BaseAutomationHandler):
except Exception as e:
return {"success": False, "error": str(e)}
async def drag(
self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5
) -> Dict[str, Any]:
try:
if not path or len(path) < 2:
return {"success": False, "error": "Path must contain at least 2 points"}
# Move to the first point
start_x, start_y = path[0]
pyautogui.moveTo(start_x, start_y)
# Press the mouse button
pyautogui.mouseDown(button=button)
# Calculate time between points to distribute duration evenly
step_duration = duration / (len(path) - 1) if len(path) > 1 else duration
# Move through each subsequent point
for x, y in path[1:]:
pyautogui.moveTo(x, y, duration=step_duration)
# Release the mouse button
pyautogui.mouseUp(button=button)
return {"success": True}
except Exception as e:
# Make sure to release the mouse button if an error occurs
try:
pyautogui.mouseUp(button=button)
except:
pass
return {"success": False, "error": str(e)}
# Keyboard Actions
async def type_text(self, text: str) -> Dict[str, Any]:
try:

View File

@@ -65,6 +65,7 @@ async def websocket_endpoint(websocket: WebSocket):
"type_text": manager.automation_handler.type_text,
"press_key": manager.automation_handler.press_key,
"drag_to": manager.automation_handler.drag_to,
"drag": manager.automation_handler.drag,
"hotkey": manager.automation_handler.hotkey,
"get_cursor_position": manager.automation_handler.get_cursor_position,
"get_screen_size": manager.automation_handler.get_screen_size,

View File

@@ -29,7 +29,7 @@ class Computer:
display: Union[Display, Dict[str, int], str] = "1024x768",
memory: str = "8GB",
cpu: str = "4",
os: OSType = "macos",
os_type: OSType = "macos",
name: str = "",
image: str = "macos-sequoia-cua:latest",
shared_directories: Optional[List[str]] = None,
@@ -68,6 +68,7 @@ class Computer:
self.image = image
self.port = port
self.host = host
self.os_type = os_type
# Store telemetry preference
self._telemetry_enabled = telemetry_enabled
@@ -129,8 +130,8 @@ class Computer:
self.shared_paths = []
if shared_directories:
for path in shared_directories:
abs_path = os.path.abspath(os.path.expanduser(path)) # type: ignore[attr-defined]
if not os.path.exists(abs_path): # type: ignore[attr-defined]
abs_path = os.path.abspath(os.path.expanduser(path))
if not os.path.exists(abs_path):
raise ValueError(f"Shared directory does not exist: {path}")
self.shared_paths.append(abs_path)
self._pylume_context = None
@@ -188,7 +189,7 @@ class Computer:
self._interface = cast(
BaseComputerInterface,
InterfaceFactory.create_interface_for_os(
os=self.os, ip_address=ip_address # type: ignore[arg-type]
os=self.os_type, ip_address=ip_address # type: ignore[arg-type]
),
)
@@ -288,13 +289,13 @@ class Computer:
try:
# Initialize the interface using the factory with the specified OS
self.logger.info(f"Initializing interface for {self.os} at {ip_address}")
self.logger.info(f"Initializing interface for {self.os_type} at {ip_address}")
from .interface.base import BaseComputerInterface
self._interface = cast(
BaseComputerInterface,
InterfaceFactory.create_interface_for_os(
os=self.os, ip_address=ip_address # type: ignore[arg-type]
os=self.os_type, ip_address=ip_address # type: ignore[arg-type]
),
)

View File

@@ -79,6 +79,17 @@ class BaseComputerInterface(ABC):
"""
pass
@abstractmethod
async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
"""Drag the cursor along a path of coordinates.
Args:
path: List of (x, y) coordinate tuples defining the drag path
button: The mouse button to use ('left', 'middle', 'right')
duration: Total time in seconds that the drag operation should take
"""
pass
# Keyboard Actions
@abstractmethod
async def type_text(self, text: str) -> None:

View File

@@ -328,6 +328,11 @@ class MacOSComputerInterface(BaseComputerInterface):
"drag_to", {"x": x, "y": y, "button": button, "duration": duration}
)
async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
await self._send_command(
"drag", {"path": path, "button": button, "duration": duration}
)
# Keyboard Actions
async def type_text(self, text: str) -> None:
await self._send_command("type_text", {"text": text})

View File

@@ -7,6 +7,9 @@ NavigationKey = Literal['pagedown', 'pageup', 'home', 'end', 'left', 'right', 'u
# Special key literals
SpecialKey = Literal['enter', 'esc', 'tab', 'space', 'backspace', 'del']
# Modifier key literals
ModifierKey = Literal['ctrl', 'alt', 'shift', 'win', 'command', 'option']
# Function key literals
FunctionKey = Literal['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12']
@@ -35,6 +38,14 @@ class Key(Enum):
BACKSPACE = 'backspace'
DELETE = 'del'
# Modifier keys
ALT = 'alt'
CTRL = 'ctrl'
SHIFT = 'shift'
WIN = 'win'
COMMAND = 'command'
OPTION = 'option'
# Function keys
F1 = 'f1'
F2 = 'f2'
@@ -73,14 +84,27 @@ class Key(Enum):
'escape': cls.ESCAPE,
'esc': cls.ESC,
'delete': cls.DELETE,
'del': cls.DELETE
'del': cls.DELETE,
# Modifier key mappings
'alt': cls.ALT,
'ctrl': cls.CTRL,
'control': cls.CTRL,
'shift': cls.SHIFT,
'win': cls.WIN,
'windows': cls.WIN,
'super': cls.WIN,
'command': cls.COMMAND,
'cmd': cls.COMMAND,
'': cls.COMMAND,
'option': cls.OPTION,
'': cls.OPTION,
}
normalized = key.lower().strip()
return key_mapping.get(normalized, key)
# Combined key type
KeyType = Union[Key, NavigationKey, SpecialKey, FunctionKey, str]
KeyType = Union[Key, NavigationKey, SpecialKey, ModifierKey, FunctionKey, str]
class AccessibilityWindow(TypedDict):
"""Information about a window in the accessibility tree."""

View File

@@ -147,6 +147,14 @@ Install with a single command:
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
```
By default, Lume is installed as a background service that starts automatically on login. If you prefer to start the Lume API service manually when needed, you can use the `--no-background-service` option:
```bash
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh) --no-background-service"
```
**Note:** With this option, you'll need to manually start the Lume API service by running `lume serve` in your terminal whenever you need to use tools or libraries that rely on the Lume API (such as the Computer-Use Agent).
You can also download the `lume.pkg.tar.gz` archive from the [latest release](https://github.com/trycua/lume/releases), extract it, and install the package manually.
## Prebuilt Images

View File

@@ -20,24 +20,32 @@ INSTALL_DIR="${INSTALL_DIR:-$DEFAULT_INSTALL_DIR}"
GITHUB_REPO="trycua/cua"
LATEST_RELEASE_URL="https://api.github.com/repos/$GITHUB_REPO/releases/latest"
# Option to skip background service setup (default: install it)
INSTALL_BACKGROUND_SERVICE=true
# Parse command line arguments
while [ "$#" -gt 0 ]; do
case "$1" in
--install-dir=*)
INSTALL_DIR="${1#*=}"
;;
--no-background-service|--skip-background-service)
INSTALL_BACKGROUND_SERVICE=false
;;
--help)
echo "${BOLD}${BLUE}Lume Installer${NORMAL}"
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " --install-dir=DIR Install to the specified directory (default: $DEFAULT_INSTALL_DIR)"
echo " --help Display this help message"
echo " --install-dir=DIR Install to the specified directory (default: $DEFAULT_INSTALL_DIR)"
echo " --no-background-service Do not setup the Lume background service (LaunchAgent)"
echo " --help Display this help message"
echo ""
echo "Examples:"
echo " $0 # Install to $DEFAULT_INSTALL_DIR"
echo " $0 --install-dir=/usr/local/bin # Install to system directory (may require root privileges)"
echo " INSTALL_DIR=/opt/lume $0 # Install to /opt/lume (legacy env var support)"
echo " $0 # Install to $DEFAULT_INSTALL_DIR and setup background service"
echo " $0 --install-dir=/usr/local/bin # Install to system directory (may require root privileges)"
echo " $0 --no-background-service # Install without setting up the background service"
echo " INSTALL_DIR=/opt/lume $0 # Install to /opt/lume (legacy env var support)"
exit 0
;;
*)
@@ -173,11 +181,25 @@ install_binary() {
# Check if the installation directory is in PATH
if [ -n "${PATH##*$INSTALL_DIR*}" ]; then
SHELL_NAME=$(basename "$SHELL")
echo "${YELLOW}Warning: $INSTALL_DIR is not in your PATH.${NORMAL}"
echo "To add it, run one of these commands based on your shell:"
echo " For bash: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile"
echo " For zsh: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zshrc"
echo " For fish: echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish"
case "$SHELL_NAME" in
zsh)
echo "To add it, run:"
echo " echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zprofile"
;;
bash)
echo "To add it, run:"
echo " echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile"
;;
fish)
echo "To add it, run:"
echo " echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish"
;;
*)
echo "Add $INSTALL_DIR to your PATH in your shell profile file."
;;
esac
fi
}
@@ -188,11 +210,97 @@ main() {
create_temp_dir
download_release
install_binary
echo ""
echo "${GREEN}${BOLD}Lume has been successfully installed!${NORMAL}"
echo "Run ${BOLD}lume${NORMAL} to get started."
if [ "$INSTALL_BACKGROUND_SERVICE" = true ]; then
# --- Setup background service (LaunchAgent) for Lume ---
SERVICE_NAME="com.trycua.lume_daemon"
PLIST_PATH="$HOME/Library/LaunchAgents/$SERVICE_NAME.plist"
LUME_BIN="$INSTALL_DIR/lume"
echo ""
echo "Setting up LaunchAgent to run lume daemon on login..."
# Create LaunchAgents directory if it doesn't exist
mkdir -p "$HOME/Library/LaunchAgents"
# Unload existing service if present
if [ -f "$PLIST_PATH" ]; then
echo "Existing LaunchAgent found. Unloading..."
launchctl unload "$PLIST_PATH" 2>/dev/null || true
fi
# Create the plist file
cat <<EOF > "$PLIST_PATH"
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>$SERVICE_NAME</string>
<key>ProgramArguments</key>
<array>
<string>$LUME_BIN</string>
<string>serve</string>
</array>
<key>RunAtLoad</key>
<true/>
<key>KeepAlive</key>
<true/>
<key>WorkingDirectory</key>
<string>$HOME</string>
<key>EnvironmentVariables</key>
<dict>
<key>PATH</key>
<string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$HOME/.local/bin</string>
<key>HOME</key>
<string>$HOME</string>
</dict>
<key>StandardOutPath</key>
<string>/tmp/lume_daemon.log</string>
<key>StandardErrorPath</key>
<string>/tmp/lume_daemon.error.log</string>
<key>ProcessType</key>
<string>Interactive</string>
<key>SessionType</key>
<string>Aqua</string>
</dict>
</plist>
EOF
# Set permissions
chmod 644 "$PLIST_PATH"
touch /tmp/lume_daemon.log /tmp/lume_daemon.error.log
chmod 644 /tmp/lume_daemon.log /tmp/lume_daemon.error.log
# Load the LaunchAgent
echo "Loading LaunchAgent..."
launchctl unload "$PLIST_PATH" 2>/dev/null || true
launchctl load "$PLIST_PATH"
echo "${GREEN}Lume daemon LaunchAgent installed and loaded. It will start automatically on login!${NORMAL}"
echo "To check status: launchctl list | grep $SERVICE_NAME"
echo "To view logs: tail -f /tmp/lume_daemon.log"
echo ""
echo "To remove the lume daemon service, run:"
echo " launchctl unload \"$PLIST_PATH\""
echo " rm \"$PLIST_PATH\""
else
SERVICE_NAME="com.trycua.lume_daemon"
PLIST_PATH="$HOME/Library/LaunchAgents/$SERVICE_NAME.plist"
if [ -f "$PLIST_PATH" ]; then
echo "Removing existing Lume background service (LaunchAgent)..."
launchctl unload "$PLIST_PATH" 2>/dev/null || true
rm "$PLIST_PATH"
echo "Lume background service (LaunchAgent) removed."
else
echo "Skipping Lume background service (LaunchAgent) setup as requested (use --no-background-service)."
fi
fi
}
# Run the installation
main
main

View File

@@ -40,7 +40,7 @@ struct Create: AsyncParsableCommand {
)
var ipsw: String?
@Option(name: .customLong("storage"), help: "VM storage location to use")
@Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
var storage: String?
init() {

View File

@@ -12,7 +12,7 @@ struct Delete: AsyncParsableCommand {
@Flag(name: .long, help: "Force deletion without confirmation")
var force = false
@Option(name: .customLong("storage"), help: "VM storage location to use")
@Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
var storage: String?
init() {}

View File

@@ -12,7 +12,7 @@ struct Get: AsyncParsableCommand {
@Option(name: [.long, .customShort("f")], help: "Output format (json|text)")
var format: FormatOption = .text
@Option(name: .customLong("storage"), help: "VM storage location to use")
@Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
var storage: String?
init() {

View File

@@ -10,15 +10,22 @@ struct List: AsyncParsableCommand {
@Option(name: [.long, .customShort("f")], help: "Output format (json|text)")
var format: FormatOption = .text
@Option(name: .long, help: "Filter by storage location name")
var storage: String?
init() {
}
@MainActor
func run() async throws {
let manager = LumeController()
let vms = try manager.list()
let vms = try manager.list(storage: self.storage)
if vms.isEmpty && self.format == .text {
print("No virtual machines found")
if let storageName = self.storage {
print("No virtual machines found in storage '\(storageName)'")
} else {
print("No virtual machines found")
}
} else {
try VMDetailsPrinter.printStatus(vms, format: self.format)
}

View File

@@ -19,7 +19,7 @@ struct Pull: AsyncParsableCommand {
@Option(help: "Organization to pull from. Defaults to trycua")
var organization: String = "trycua"
@Option(name: .customLong("storage"), help: "VM storage location to use")
@Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
var storage: String?
init() {}

View File

@@ -48,7 +48,7 @@ struct Run: AsyncParsableCommand {
@Option(help: "For MacOS VMs only, boot into the VM in recovery mode")
var recoveryMode: Bool = false
@Option(name: .customLong("storage"), help: "VM storage location to use")
@Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
var storage: String?
private var parsedSharedDirectories: [SharedDirectory] {

View File

@@ -21,7 +21,7 @@ struct Set: AsyncParsableCommand {
@Option(help: "New display resolution in format WIDTHxHEIGHT.")
var display: VMDisplayResolution?
@Option(name: .customLong("storage"), help: "VM storage location to use")
@Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
var storage: String?
init() {

View File

@@ -9,7 +9,7 @@ struct Stop: AsyncParsableCommand {
@Argument(help: "Name of the virtual machine", completion: .custom(completeVMName))
var name: String
@Option(name: .customLong("storage"), help: "VM storage location to use")
@Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
var storage: String?
init() {

View File

@@ -643,7 +643,7 @@ class ImageContainerRegistry: @unchecked Sendable {
image: String,
name: String?,
locationName: String? = nil
) async throws {
) async throws -> VMDirectory {
guard !image.isEmpty else {
throw ValidationError("Image name cannot be empty")
}
@@ -652,7 +652,16 @@ class ImageContainerRegistry: @unchecked Sendable {
// Use provided name or derive from image
let vmName = name ?? image.split(separator: ":").first.map(String.init) ?? ""
let vmDir = try home.getVMDirectory(vmName, storage: locationName)
// Determine if locationName is a direct path or a named storage location
let vmDir: VMDirectory
if let locationName = locationName, locationName.contains("/") || locationName.contains("\\") {
// Direct path
vmDir = try home.getVMDirectoryFromPath(vmName, storagePath: locationName)
} else {
// Named storage or default location
vmDir = try home.getVMDirectory(vmName, storage: locationName)
}
// Optimize network early in the process
optimizeNetworkSettings()
@@ -991,6 +1000,7 @@ class ImageContainerRegistry: @unchecked Sendable {
Logger.info(
"Run 'lume run \(vmName)' to reduce the disk image file size by using macOS sparse file system"
)
return vmDir
}
// Helper function to clean up a specific cache entry
@@ -3024,7 +3034,8 @@ class ImageContainerRegistry: @unchecked Sendable {
// Replace original with optimized version
try FileManager.default.removeItem(at: reassembledFile)
try FileManager.default.moveItem(at: optimizedFile, to: reassembledFile)
try FileManager.default.moveItem(
at: optimizedFile, to: reassembledFile)
Logger.info("Using sparse-optimized file for verification")
} else {
Logger.info(

View File

@@ -92,6 +92,28 @@ final class Home {
let baseDir = Path(location.expandedPath)
return VMDirectory(baseDir.directory(name))
}
/// Gets a VM directory from a direct file path
///
/// - Parameters:
/// - name: Name of the VM directory
/// - storagePath: Direct file system path where the VM is located
/// - Returns: A VMDirectory instance
/// - Throws: HomeError if path is invalid
func getVMDirectoryFromPath(_ name: String, storagePath: String) throws -> VMDirectory {
let baseDir = Path(storagePath)
// Create the directory if it doesn't exist
if !fileExists(at: storagePath) {
Logger.info("Creating storage directory", metadata: ["path": storagePath])
try createVMLocation(at: storagePath)
} else if !isValidDirectory(at: storagePath) {
// Path exists but isn't a valid directory
throw HomeError.invalidHomeDirectory
}
return VMDirectory(baseDir.directory(name))
}
/// Returns all initialized VM directories across all locations
/// - Returns: An array of VMDirectory instances with location info

View File

@@ -8,7 +8,7 @@ import Foundation
/// - Handling disk operations
/// - Managing VM state and locking
/// - Providing access to VM-related paths
struct VMDirectory {
struct VMDirectory: Sendable {
// MARK: - Constants
private enum FileNames {
@@ -26,8 +26,6 @@ struct VMDirectory {
let configPath: Path
let sessionsPath: Path
private let fileManager: FileManager
/// The name of the VM directory
var name: String { dir.name }
@@ -36,10 +34,8 @@ struct VMDirectory {
/// Creates a new VMDirectory instance
/// - Parameters:
/// - dir: The base directory path for the VM
/// - fileManager: FileManager instance to use for file operations
init(_ dir: Path, fileManager: FileManager = .default) {
init(_ dir: Path) {
self.dir = dir
self.fileManager = fileManager
self.nvramPath = dir.file(FileNames.nvram)
self.diskPath = dir.file(FileNames.disk)
self.configPath = dir.file(FileNames.config)
@@ -52,7 +48,25 @@ struct VMDirectory {
extension VMDirectory {
/// Checks if the VM directory is fully initialized with all required files
func initialized() -> Bool {
configPath.exists() && diskPath.exists() && nvramPath.exists()
// Add detailed logging for debugging
let configExists = configPath.exists()
let diskExists = diskPath.exists()
let nvramExists = nvramPath.exists()
Logger.info(
"VM directory initialization check",
metadata: [
"directory": dir.path,
"config_path": configPath.path,
"config_exists": "\(configExists)",
"disk_path": diskPath.path,
"disk_exists": "\(diskExists)",
"nvram_path": nvramPath.path,
"nvram_exists": "\(nvramExists)"
]
)
return configExists && diskExists && nvramExists
}
/// Checks if the VM directory exists
@@ -70,7 +84,7 @@ extension VMDirectory {
func setDisk(_ size: UInt64) throws {
do {
if !diskPath.exists() {
guard fileManager.createFile(atPath: diskPath.path, contents: nil) else {
guard FileManager.default.createFile(atPath: diskPath.path, contents: nil) else {
throw VMDirectoryError.fileCreationFailed(diskPath.path)
}
}
@@ -96,7 +110,7 @@ extension VMDirectory {
do {
let data = try encoder.encode(config)
guard fileManager.createFile(atPath: configPath.path, contents: data) else {
guard FileManager.default.createFile(atPath: configPath.path, contents: data) else {
throw VMDirectoryError.fileCreationFailed(configPath.path)
}
} catch {
@@ -108,7 +122,7 @@ extension VMDirectory {
/// - Returns: The loaded configuration
/// - Throws: VMDirectoryError if the load operation fails
func loadConfig() throws -> VMConfig {
guard let data = fileManager.contents(atPath: configPath.path) else {
guard let data = FileManager.default.contents(atPath: configPath.path) else {
throw VMDirectoryError.configNotFound
}
@@ -137,7 +151,7 @@ extension VMDirectory {
do {
let data = try encoder.encode(session)
guard fileManager.createFile(atPath: sessionsPath.path, contents: data) else {
guard FileManager.default.createFile(atPath: sessionsPath.path, contents: data) else {
throw VMDirectoryError.fileCreationFailed(sessionsPath.path)
}
} catch {
@@ -149,7 +163,7 @@ extension VMDirectory {
/// - Returns: The loaded VNC session
/// - Throws: VMDirectoryError if the load operation fails
func loadSession() throws -> VNCSession {
guard let data = fileManager.contents(atPath: sessionsPath.path) else {
guard let data = FileManager.default.contents(atPath: sessionsPath.path) else {
throw VMDirectoryError.sessionNotFound
}
@@ -163,7 +177,7 @@ extension VMDirectory {
/// Removes the VNC session information from disk
func clearSession() {
try? fileManager.removeItem(atPath: sessionsPath.path)
try? FileManager.default.removeItem(atPath: sessionsPath.path)
}
}
@@ -176,6 +190,6 @@ extension VMDirectory: CustomStringConvertible {
extension VMDirectory {
func delete() throws {
try fileManager.removeItem(atPath: dir.path)
try FileManager.default.removeItem(atPath: dir.path)
}
}

View File

@@ -48,15 +48,72 @@ final class LumeController {
/// Lists all virtual machines in the system
@MainActor
public func list() throws -> [VMDetails] {
public func list(storage: String? = nil) throws -> [VMDetails] {
do {
let vmLocations = try home.getAllVMDirectories()
let statuses = try vmLocations.map { vmWithLoc in
let vm = try self.get(
name: vmWithLoc.directory.name, storage: vmWithLoc.locationName)
return vm.details
if let storage = storage {
// If storage is specified, only return VMs from that location
if storage.contains("/") || storage.contains("\\") {
// Direct path - check if it exists
if !FileManager.default.fileExists(atPath: storage) {
// Return empty array if the path doesn't exist
return []
}
// Try to get all VMs from the specified path
// We need to check which subdirectories are valid VM dirs
let directoryURL = URL(fileURLWithPath: storage)
let contents = try FileManager.default.contentsOfDirectory(
at: directoryURL,
includingPropertiesForKeys: [.isDirectoryKey],
options: .skipsHiddenFiles
)
let statuses = try contents.compactMap { subdir -> VMDetails? in
guard let isDirectory = try subdir.resourceValues(forKeys: [.isDirectoryKey]).isDirectory,
isDirectory else {
return nil
}
let vmName = subdir.lastPathComponent
// Check if it's a valid VM directory
let vmDir = try home.getVMDirectoryFromPath(vmName, storagePath: storage)
if !vmDir.initialized() {
return nil
}
do {
let vm = try self.get(name: vmName, storage: storage)
return vm.details
} catch {
// Skip invalid VM directories
return nil
}
}
return statuses
} else {
// Named storage
let vmsWithLoc = try home.getAllVMDirectories()
let statuses = try vmsWithLoc.compactMap { vmWithLoc -> VMDetails? in
// Only include VMs from the specified location
if vmWithLoc.locationName != storage {
return nil
}
let vm = try self.get(
name: vmWithLoc.directory.name, storage: vmWithLoc.locationName)
return vm.details
}
return statuses
}
} else {
// No storage filter - get all VMs
let vmsWithLoc = try home.getAllVMDirectories()
let statuses = try vmsWithLoc.compactMap { vmWithLoc -> VMDetails? in
let vm = try self.get(
name: vmWithLoc.directory.name, storage: vmWithLoc.locationName)
return vm.details
}
return statuses
}
return statuses
} catch {
Logger.error("Failed to list VMs", metadata: ["error": error.localizedDescription])
throw error
@@ -133,20 +190,42 @@ final class LumeController {
public func get(name: String, storage: String? = nil) throws -> VM {
let normalizedName = normalizeVMName(name: name)
do {
// Try to find the VM and get its actual location
let actualLocation = try self.validateVMExists(
normalizedName, storage: storage)
let vm: VM
if let storagePath = storage, storagePath.contains("/") || storagePath.contains("\\") {
// Storage is a direct path
let vmDir = try home.getVMDirectoryFromPath(normalizedName, storagePath: storagePath)
guard vmDir.initialized() else {
// Throw a specific error if the directory exists but isn't a valid VM
if vmDir.exists() {
throw VMError.notInitialized(normalizedName)
} else {
throw VMError.notFound(normalizedName)
}
}
// Pass the path as the storage context
vm = try self.loadVM(vmDir: vmDir, storage: storagePath)
} else {
// Storage is nil or a named location
let actualLocation = try self.validateVMExists(
normalizedName, storage: storage)
// Load the VM from its actual location
let vm = try self.loadVM(name: normalizedName, storage: actualLocation)
let vmDir = try home.getVMDirectory(normalizedName, storage: actualLocation)
// loadVM will re-check initialized, but good practice to keep validateVMExists result.
vm = try self.loadVM(vmDir: vmDir, storage: actualLocation)
}
return vm
} catch {
Logger.error("Failed to get VM", metadata: ["error": error.localizedDescription])
Logger.error(
"Failed to get VM",
metadata: [
"vmName": normalizedName, "storage": storage ?? "default",
"error": error.localizedDescription,
])
// Re-throw the original error to preserve its type
throw error
}
}
/// Factory for creating the appropriate VM type based on the OS
@MainActor
public func create(
name: String,
@@ -329,58 +408,84 @@ final class LumeController {
"Running VM",
metadata: [
"name": normalizedName,
"location": storage ?? "default",
"no_display": "\(noDisplay)",
"shared_directories":
"\(sharedDirectories.map( { $0.string } ).joined(separator: ", "))",
"mount": mount?.path ?? "none",
"vnc_port": "\(vncPort)",
"recovery_mode": "\(recoveryMode)",
"storage_param": storage ?? "default",
"storage_param": storage ?? "default", // Log the original param
"usb_storage_devices": "\(usbMassStoragePaths?.count ?? 0)",
])
do {
// Check if this is an image reference (contains a tag)
let components = name.split(separator: ":")
if components.count == 2 {
do {
_ = try self.validateVMExists(normalizedName, storage: storage)
} catch {
// If the VM doesn't exist, try to pull the image
// Check if name is an image ref to auto-pull
let components = normalizedName.split(separator: ":")
if components.count == 2 { // Check if it looks like image:tag
// Attempt to validate if VM exists first, suppressing the error
// This avoids pulling if the VM already exists, even if name looks like an image ref
let vmExists = (try? self.validateVMExists(normalizedName, storage: storage)) != nil
if !vmExists {
Logger.info(
"VM not found, attempting to pull image based on name",
metadata: ["imageRef": normalizedName])
// Use the potentially new VM name derived from the image ref
let potentialVMName = String(components[0])
try await pullImage(
image: name,
name: nil,
image: normalizedName, // Full image ref
name: potentialVMName, // Name derived from image
registry: registry,
organization: organization,
storage: storage
)
// Important: After pull, the effective name might have changed
// We proceed assuming the user wants to run the VM derived from image name
// normalizedName = potentialVMName // Re-assign normalizedName if pull logic creates it
// Note: Current pullImage doesn't return the final VM name,
// so we assume it matches the name derived from the image.
// This might need refinement if pullImage behaviour changes.
}
}
// Find VM and get its actual location
let actualLocation = try validateVMExists(normalizedName, storage: storage)
// Determine effective storage path or name AND get the VMDirectory
let effectiveStorage: String?
let vmDir: VMDirectory
// Log if we found the VM in a different location than default
if actualLocation != storage && actualLocation != nil {
if let storagePath = storage, storagePath.contains("/") || storagePath.contains("\\") {
// Storage is a direct path
vmDir = try home.getVMDirectoryFromPath(normalizedName, storagePath: storagePath)
guard vmDir.initialized() else {
if vmDir.exists() {
throw VMError.notInitialized(normalizedName)
} else {
throw VMError.notFound(normalizedName)
}
}
effectiveStorage = storagePath // Use the path string
Logger.info("Using direct storage path", metadata: ["path": storagePath])
} else {
// Storage is nil or a named location - validate and get the actual name
let actualLocationName = try validateVMExists(normalizedName, storage: storage)
vmDir = try home.getVMDirectory(normalizedName, storage: actualLocationName) // Get VMDir for named location
effectiveStorage = actualLocationName // Use the named location string
Logger.info(
"Found VM in location",
"Using named storage location",
metadata: [
"name": normalizedName,
"location": actualLocation ?? "default",
"requested": storage ?? "default",
"actual": actualLocationName ?? "default",
])
}
// Validate parameters using the located VMDirectory
try validateRunParameters(
name: normalizedName,
vmDir: vmDir, // Pass vmDir
sharedDirectories: sharedDirectories,
mount: mount,
storage: actualLocation,
usbMassStoragePaths: usbMassStoragePaths
)
// Use the actual VM location that we found
let vm = try get(name: normalizedName, storage: actualLocation)
// Load the VM directly using the located VMDirectory and storage context
let vm = try self.loadVM(vmDir: vmDir, storage: effectiveStorage)
SharedVM.shared.setVM(name: normalizedName, vm: vm)
try await vm.run(
@@ -488,7 +593,7 @@ final class LumeController {
let imageContainerRegistry = ImageContainerRegistry(
registry: registry, organization: organization)
try await imageContainerRegistry.pull(
let _ = try await imageContainerRegistry.pull(
image: actualImage,
name: vmName,
locationName: storage)
@@ -752,15 +857,17 @@ final class LumeController {
}
@MainActor
private func loadVM(name: String, storage: String? = nil) throws -> VM {
let vmDir = try home.getVMDirectory(name, storage: storage)
private func loadVM(vmDir: VMDirectory, storage: String?) throws -> VM {
// vmDir is now passed directly
guard vmDir.initialized() else {
throw VMError.notInitialized(name)
throw VMError.notInitialized(vmDir.name) // Use name from vmDir
}
let config: VMConfig = try vmDir.loadConfig()
// Pass the provided storage (which could be a path or named location)
let vmDirContext = VMDirContext(
dir: vmDir, config: config, home: home, storage: storage)
dir: vmDir, config: config, home: home, storage: storage
)
let imageLoader =
config.os.lowercased() == "macos" ? imageLoaderFactory.createImageLoader() : nil
@@ -808,11 +915,22 @@ final class LumeController {
public func validateVMExists(_ name: String, storage: String? = nil) throws -> String? {
// If location is specified, only check that location
if let storage = storage {
let vmDir = try home.getVMDirectory(name, storage: storage)
guard vmDir.initialized() else {
throw VMError.notFound(name)
// Check if storage is a path by looking for directory separator
if storage.contains("/") || storage.contains("\\") {
// Treat as direct path
let vmDir = try home.getVMDirectoryFromPath(name, storagePath: storage)
guard vmDir.initialized() else {
throw VMError.notFound(name)
}
return storage // Return the path as the location identifier
} else {
// Treat as named storage
let vmDir = try home.getVMDirectory(name, storage: storage)
guard vmDir.initialized() else {
throw VMError.notFound(name)
}
return storage
}
return storage
}
// If no location specified, try to find the VM in any location
@@ -826,6 +944,51 @@ final class LumeController {
throw VMError.notFound(name)
}
private func validateRunParameters(
vmDir: VMDirectory, // Changed signature: accept VMDirectory
sharedDirectories: [SharedDirectory]?,
mount: Path?,
usbMassStoragePaths: [Path]? = nil
) throws {
// VM existence is confirmed by having vmDir, no need for validateVMExists
if let dirs = sharedDirectories {
try self.validateSharedDirectories(dirs)
}
// Validate USB mass storage paths
if let usbPaths = usbMassStoragePaths {
for path in usbPaths {
if !FileManager.default.fileExists(atPath: path.path) {
throw ValidationError("USB mass storage image not found: \(path.path)")
}
}
if #available(macOS 15.0, *) {
// USB mass storage is supported
} else {
Logger.info(
"USB mass storage devices require macOS 15.0 or later. They will be ignored.")
}
}
// Load config directly from vmDir
let vmConfig = try vmDir.loadConfig()
switch vmConfig.os.lowercased() {
case "macos":
if mount != nil {
throw ValidationError(
"Mounting disk images is not supported for macOS VMs. If you are looking to mount a IPSW, please use the --ipsw option in the create command."
)
}
case "linux":
if let mount = mount, !FileManager.default.fileExists(atPath: mount.path) {
throw ValidationError("Mount file not found: \(mount.path)")
}
default:
break
}
}
private func validatePullParameters(
image: String,
name: String,
@@ -846,51 +1009,31 @@ final class LumeController {
throw ValidationError("Organization cannot be empty")
}
let vmDir = try home.getVMDirectory(name, storage: storage)
if vmDir.exists() {
throw VMError.alreadyExists(name)
}
}
private func validateRunParameters(
name: String, sharedDirectories: [SharedDirectory]?, mount: Path?,
storage: String? = nil, usbMassStoragePaths: [Path]? = nil
) throws {
_ = try self.validateVMExists(name, storage: storage)
if let dirs = sharedDirectories {
try self.validateSharedDirectories(dirs)
}
// Validate USB mass storage paths
if let usbPaths = usbMassStoragePaths {
for path in usbPaths {
if !FileManager.default.fileExists(atPath: path.path) {
throw ValidationError("USB mass storage image not found: \(path.path)")
// Determine if storage is a path or a named storage location
let vmDir: VMDirectory
if let storage = storage, storage.contains("/") || storage.contains("\\") {
// Create the base directory if it doesn't exist
if !FileManager.default.fileExists(atPath: storage) {
Logger.info("Creating VM storage directory", metadata: ["path": storage])
do {
try FileManager.default.createDirectory(
atPath: storage,
withIntermediateDirectories: true
)
} catch {
throw HomeError.directoryCreationFailed(path: storage)
}
}
if #available(macOS 15.0, *) {
// USB mass storage is supported
} else {
Logger.info(
"USB mass storage devices require macOS 15.0 or later. They will be ignored.")
}
// Use getVMDirectoryFromPath for direct paths
vmDir = try home.getVMDirectoryFromPath(name, storagePath: storage)
} else {
// Use getVMDirectory for named storage locations
vmDir = try home.getVMDirectory(name, storage: storage)
}
let vmConfig = try home.getVMDirectory(name, storage: storage).loadConfig()
switch vmConfig.os.lowercased() {
case "macos":
if mount != nil {
throw ValidationError(
"Mounting disk images is not supported for macOS VMs. If you are looking to mount a IPSW, please use the --ipsw option in the create command."
)
}
case "linux":
if let mount = mount, !FileManager.default.fileExists(atPath: mount.path) {
throw ValidationError("Mount file not found: \(mount.path)")
}
default:
break
if vmDir.exists() {
throw VMError.alreadyExists(name)
}
}

View File

@@ -6,10 +6,10 @@ import Virtualization
extension Server {
// MARK: - VM Management Handlers
func handleListVMs() async throws -> HTTPResponse {
func handleListVMs(storage: String? = nil) async throws -> HTTPResponse {
do {
let vmController = LumeController()
let vms = try vmController.list()
let vms = try vmController.list(storage: storage)
return try .json(vms)
} catch {
return .badRequest(message: error.localizedDescription)

View File

@@ -109,7 +109,7 @@ struct PushRequest: Codable {
let tags: [String] // List of tags to push
var registry: String // Registry URL
var organization: String // Organization/user in the registry
let storage: String? // Optional VM storage location
let storage: String? // Optional VM storage location or direct path
var chunkSizeMb: Int // Chunk size
// dryRun and reassemble are less common for API, default to false?
// verbose is usually handled by server logging

View File

@@ -79,9 +79,11 @@ final class Server {
routes = [
Route(
method: "GET", path: "/lume/vms",
handler: { [weak self] _ in
handler: { [weak self] request in
guard let self else { throw HTTPError.internalError }
return try await self.handleListVMs()
// Extract storage from query params if present
let storage = self.extractQueryParam(request: request, name: "storage")
return try await self.handleListVMs(storage: storage)
}),
Route(
method: "GET", path: "/lume/vms/:name",
@@ -177,8 +179,21 @@ final class Server {
return HTTPResponse(statusCode: .badRequest, body: "Missing VM name")
}
// Extract storage from query params if present
let storage = self.extractQueryParam(request: request, name: "storage")
Logger.info("Processing stop VM request", metadata: ["method": request.method, "path": request.path])
// Extract storage from the request body
var storage: String? = nil
if let bodyData = request.body, !bodyData.isEmpty {
do {
if let json = try JSONSerialization.jsonObject(with: bodyData) as? [String: Any],
let bodyStorage = json["storage"] as? String {
storage = bodyStorage
Logger.info("Extracted storage from request body", metadata: ["storage": bodyStorage])
}
} catch {
Logger.error("Failed to parse request body JSON", metadata: ["error": error.localizedDescription])
}
}
return try await self.handleStopVM(name: name, storage: storage)
}),

24
libs/lumier/.dockerignore Normal file
View File

@@ -0,0 +1,24 @@
# Ignore macOS system files and trash
.DS_Store
.Trashes
**/.Trashes
**/.*
# Ignore Python cache
__pycache__/
*.pyc
*.pyo
# Ignore virtual environments
.venv/
venv/
# Ignore editor/project files
.vscode/
.idea/
*.swp
# Ignore test artifacts
test-results/
# Ignore anything else you don't want in the Docker build context

74
libs/lumier/Dockerfile Normal file
View File

@@ -0,0 +1,74 @@
# Base image using Debian for arm64 architecture (optimized for Apple Silicon)
FROM debian:bullseye-slim AS lumier-base
# Set environment variables for Lume API server configuration
ENV LUME_API_HOST="host.docker.internal"
ENV LUME_API_PORT="8080"
# Default VM configuration (can be overridden at runtime)
ENV VERSION="ghcr.io/trycua/macos-sequoia-vanilla:latest"
ENV RAM_SIZE="8192"
ENV CPU_CORES="4"
ENV DISK_SIZE="100"
ENV DISPLAY="1024x768"
ENV VM_NAME="lumier"
ENV HOST_DATA_PATH=""
ENV LUMIER_DEBUG="0"
# Install necessary tools and noVNC dependencies
RUN apt-get update && \
apt-get install -y \
netcat-traditional \
curl \
sshpass \
wget \
unzip \
git \
python3 \
python3-pip \
python3-numpy \
procps && \
rm -rf /var/lib/apt/lists/*
# Add a dummy environment variable to invalidate cache
ENV CACHEBUST=1
# Download and install noVNC without caching
RUN wget https://github.com/trycua/noVNC/archive/refs/heads/master.zip -O master1.zip && \
unzip master1.zip && \
mv noVNC-master /opt/noVNC && \
rm master1.zip
# Set environment variables for noVNC
ENV NOVNC_PATH="/opt/noVNC"
# Create directory structure
RUN mkdir -p /run/bin /run/lib /run/config /run/hooks
# Copy scripts to the container
COPY src/bin/tunnel.sh /run/bin/
COPY src/bin/tunnel-script.sh /usr/local/bin/lume
COPY src/bin/tunnel-script.sh /usr/local/bin/sshpass
COPY src/config/constants.sh /run/config/
COPY src/bin/entry.sh /run/bin/entry.sh
# Copy library files if they exist
COPY src/lib/ /run/lib/
COPY src/hooks/ /run/hooks/
# Make scripts executable
RUN chmod +x /usr/local/bin/lume \
/usr/local/bin/sshpass \
/run/bin/* \
/run/hooks/* 2>/dev/null || true
# Expose ports for noVNC and Lume API
EXPOSE 8080
EXPOSE 8006
# VOLUME setup
VOLUME [ "/storage" ]
VOLUME [ "/data" ]
# Default entrypoint
ENTRYPOINT ["/run/bin/entry.sh"]

175
libs/lumier/README.md Normal file
View File

@@ -0,0 +1,175 @@
<div align="center">
<h1>
<div class="image-wrapper" style="display: inline-block;">
<picture>
<source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
<source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
<img alt="Shows my svg">
</picture>
</div>
[![Swift 6](https://img.shields.io/badge/Swift_6-F54A2A?logo=swift&logoColor=white&labelColor=F54A2A)](#)
[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
[![Homebrew](https://img.shields.io/badge/Homebrew-FBB040?logo=homebrew&logoColor=fff)](#install)
[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
</h1>
</div>
**Lumier** provides a Docker-based interface for the `lume` CLI, allowing you to easily run macOS virtual machines inside a container with VNC access. It creates a secure tunnel to execute lume commands on your host machine while providing a containerized environment for your applications.
## Requirements
Before using Lumier, make sure you have:
1. Install [lume](https://github.com/trycua/cua/blob/main/libs/lume/README.md) on your host machine
2. Docker installed on your host machine
3. `socat` installed for the tunnel (install with Homebrew: `brew install socat`)
## Installation
You can use Lumier directly from its directory or install it to your system:
```bash
# Option 1: Install to your user's bin directory (recommended)
./install.sh
# Option 2: Install to a custom directory
./install.sh --install-dir=/usr/local/bin # May require sudo
# Option 3: View installation options
./install.sh --help
```
After installation, you can run `lumier` from anywhere in your terminal.
If you get a "command not found" error, make sure the installation directory is in your PATH. The installer will warn you if it isn't and provide instructions to add it.
## Usage
There are two ways to use Lumier: with the provided script or directly with Docker.
### Option 1: Using the Lumier Script
Lumier provides a simple CLI interface to manage VMs in Docker with full Docker compatibility:
```bash
# Show help and available commands
lumier help
# Start the tunnel to connect to lume
lumier start
# Check if the tunnel is running
lumier status
# Stop the tunnel
lumier stop
# Build the Docker image (optional, happens automatically on first run)
lumier build
# Run a VM with default settings
lumier run -it --rm
# Run a VM with custom settings using Docker's -e flag
lumier run -it --rm \
--name lumier-vm \
-p 8006:8006 \
-v $(pwd)/storage:/storage \
-v $(pwd)/shared:/data \
-e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
-e CPU_CORES=4 \
-e RAM_SIZE=8192
# Note:
# The lumier script now automatically detects the real host paths for ./storage and ./shared
# and passes them to the container as HOST_STORAGE_PATH and HOST_DATA_PATH.
# You do NOT need to specify these environment variables manually.
# The VM name is always set from the container name.
```
### Option 2: Using Docker Directly
You can also use Docker commands directly without the lumier utility:
```bash
# 1. Start the tunnel manually
cd libs/lumier
socat TCP-LISTEN:8080,reuseaddr,fork EXEC:"$PWD/src/bin/tunnel.sh" &
TUNNEL_PID=$!
# 2. Build the Docker image
docker build -t lumier:latest .
# 3. Run the container
docker run -it --rm \
--name lumier-vm \
-p 8006:8006 \
-v $(pwd)/storage:/storage \
-v $(pwd)/shared:/data \
-e VM_NAME=lumier-vm \
-e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
-e CPU_CORES=4 \
-e RAM_SIZE=8192 \
-e HOST_STORAGE_PATH=$(pwd)/storage \
-e HOST_DATA_PATH=$(pwd)/shared \
lumier:latest
# 4. Stop the tunnel when you're done
kill $TUNNEL_PID
# Alternatively, find and kill the tunnel process
# First, find the process
lsof -i TCP:8080
# Then kill it by PID
kill <PID>
```
Note that when using Docker directly, you're responsible for:
- Starting and managing the tunnel
- Building the Docker image
- Providing the correct environment variables
## Available Environment Variables
These variables can be set using Docker's `-e` flag:
- `VM_NAME`: Set the VM name (default: lumier)
- `VERSION`: Set the VM image (default: ghcr.io/trycua/macos-sequoia-vanilla:latest)
- `CPU_CORES`: Set the number of CPU cores (default: 4)
- `RAM_SIZE`: Set the memory size in MB (default: 8192)
- `DISPLAY`: Set the display resolution (default: 1024x768)
- `HOST_DATA_PATH`: Path on the host to share with the VM
- `LUMIER_DEBUG`: Enable debug mode (set to 1)
## Project Structure
The project is organized as follows:
```
lumier/
├── Dockerfile # Main Docker image definition
├── README.md # This file
├── lumier # Main CLI script
├── install.sh # Installation script
├── src/ # Source code
│ ├── bin/ # Executable scripts
│ │ ├── entry.sh # Docker entrypoint
│ │ ├── server.sh # Tunnel server manager
│ │ └── tunnel.sh # Tunnel request handler
│ ├── config/ # Configuration
│ │ └── constants.sh # Shared constants
│ ├── hooks/ # Lifecycle hooks
│ │ └── on-logon.sh # Run after VM boots
│ └── lib/ # Shared library code
│ ├── utils.sh # Utility functions
│ └── vm.sh # VM management functions
└── mount/ # Default shared directory
```
## VNC Access
When a VM is running, you can access it via VNC through:
http://localhost:8006/vnc.html
The password is displayed in the console output when the VM starts.

176
libs/lumier/install.sh Executable file
View File

@@ -0,0 +1,176 @@
#!/bin/bash
set -e
# Lumier Installer
# This script installs Lumier to your system
# Define colors for output
BOLD=$(tput bold)
NORMAL=$(tput sgr0)
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
BLUE=$(tput setaf 4)
YELLOW=$(tput setaf 3)
# Default installation directory (user-specific, doesn't require sudo)
DEFAULT_INSTALL_DIR="$HOME/.local/bin"
INSTALL_DIR="${INSTALL_DIR:-$DEFAULT_INSTALL_DIR}"
# Script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Parse command line arguments
while [ "$#" -gt 0 ]; do
case "$1" in
--install-dir=*)
INSTALL_DIR="${1#*=}"
;;
--help)
echo "${BOLD}${BLUE}Lumier Installer${NORMAL}"
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " --install-dir=DIR Install to the specified directory (default: $DEFAULT_INSTALL_DIR)"
echo " --help Display this help message"
echo ""
echo "Examples:"
echo " $0 # Install to $DEFAULT_INSTALL_DIR"
echo " $0 --install-dir=/usr/local/bin # Install to system directory (may require root privileges)"
echo " INSTALL_DIR=/opt/lumier $0 # Install to /opt/lumier (legacy env var support)"
exit 0
;;
*)
echo "${RED}Unknown option: $1${NORMAL}"
echo "Use --help for usage information"
exit 1
;;
esac
shift
done
echo "${BOLD}${BLUE}Lumier Installer${NORMAL}"
echo "This script will install Lumier to your system."
# Check if we're running with appropriate permissions
check_permissions() {
# System directories that typically require root privileges
SYSTEM_DIRS=("/usr/local/bin" "/usr/bin" "/bin" "/opt")
NEEDS_ROOT=false
for DIR in "${SYSTEM_DIRS[@]}"; do
if [[ "$INSTALL_DIR" == "$DIR"* ]] && [ ! -w "$INSTALL_DIR" ]; then
NEEDS_ROOT=true
break
fi
done
if [ "$NEEDS_ROOT" = true ]; then
echo "${YELLOW}Warning: Installing to $INSTALL_DIR may require root privileges.${NORMAL}"
echo "Consider these alternatives:"
echo " • Install to a user-writable location: $0 --install-dir=$HOME/.local/bin"
echo " • Create the directory with correct permissions first:"
echo " sudo mkdir -p $INSTALL_DIR && sudo chown $(whoami) $INSTALL_DIR"
echo ""
# Check if we already have write permission (might have been set up previously)
if [ ! -w "$INSTALL_DIR" ] && [ ! -w "$(dirname "$INSTALL_DIR")" ]; then
echo "${RED}Error: You don't have write permission to $INSTALL_DIR${NORMAL}"
echo "Please choose a different installation directory or ensure you have the proper permissions."
exit 1
fi
fi
}
# Detect OS and architecture
detect_platform() {
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
ARCH=$(uname -m)
if [ "$OS" != "darwin" ]; then
echo "${RED}Error: Currently only macOS is supported.${NORMAL}"
exit 1
fi
if [ "$ARCH" != "arm64" ]; then
echo "${RED}Error: Lumier only supports macOS on Apple Silicon (ARM64).${NORMAL}"
exit 1
fi
PLATFORM="darwin-arm64"
echo "Detected platform: ${BOLD}$PLATFORM${NORMAL}"
}
# Check dependencies
check_dependencies() {
echo "Checking dependencies..."
# Check if lume is installed
if ! command -v lume &> /dev/null; then
echo "${RED}Error: Lume is required but not installed.${NORMAL}"
echo "Please install Lume first: https://github.com/trycua/cua/blob/main/libs/lume/README.md"
exit 1
fi
# Check if socat is installed
if ! command -v socat &> /dev/null; then
echo "${YELLOW}Warning: socat is required but not installed.${NORMAL}"
echo "Installing socat with Homebrew..."
# Check if Homebrew is installed
if ! command -v brew &> /dev/null; then
echo "${RED}Error: Homebrew is required to install socat.${NORMAL}"
echo "Please install Homebrew first: https://brew.sh/"
echo "Or install socat manually, then run this script again."
exit 1
fi
# Install socat
brew install socat
fi
# Check if Docker is installed
if ! command -v docker &> /dev/null; then
echo "${YELLOW}Warning: Docker is required but not installed.${NORMAL}"
echo "Please install Docker: https://docs.docker.com/get-docker/"
echo "Continuing with installation, but Lumier will not work without Docker."
fi
echo "${GREEN}All dependencies are satisfied.${NORMAL}"
}
# Copy the lumier script directly
copy_lumier() {
echo "Copying lumier script to $INSTALL_DIR..."
cp "$SCRIPT_DIR/lumier" "$INSTALL_DIR/lumier"
chmod +x "$INSTALL_DIR/lumier"
}
# Main installation flow
main() {
check_permissions
detect_platform
check_dependencies
echo "Installing Lumier to $INSTALL_DIR..."
# Create install directory if it doesn't exist
mkdir -p "$INSTALL_DIR"
# Copy the lumier script
copy_lumier
echo "${GREEN}Installation complete!${NORMAL}"
echo "Lumier has been installed to ${BOLD}$INSTALL_DIR/lumier${NORMAL}"
# Check if the installation directory is in PATH
if [[ ":$PATH:" != *":$INSTALL_DIR:"* ]]; then
echo "${YELLOW}Warning: $INSTALL_DIR is not in your PATH.${NORMAL}"
echo "To add it, run one of these commands based on your shell:"
echo " For bash: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile"
echo " For zsh: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zshrc"
echo " For fish: echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish"
fi
}
# Run the installation
main

200
libs/lumier/lumier Executable file
View File

@@ -0,0 +1,200 @@
#!/usr/bin/env bash
# Exit on errors, undefined variables, and propagate errors in pipes
set -eo pipefail
# Always use the current working directory as the build context
SCRIPT_DIR="$(pwd)"
PORT=8080
DEBUG=${LUMIER_DEBUG:-0}
usage() {
cat <<EOF
Lumier - Docker container wrapper for lume Virtual Machines
Usage: $(basename "$0") COMMAND [OPTIONS]
Commands:
run [DOCKER_ARGS] Build (if needed) and run the Lumier container with Docker args
tunnel start Start the Lumier tunnel
tunnel stop Stop the Lumier tunnel
tunnel status Check the status of the Lumier tunnel
build [DOCKER_ARGS] Build the Lumier Docker image with optional Docker args
help Show this help message
Docker Container Environment Variables:
These can be set using Docker's -e flag:
VM_NAME Set the VM name (default: lumier)
VERSION Set the VM image (default: ghcr.io/trycua/macos-sequoia-vanilla:latest)
CPU_CORES Set the number of CPU cores (default: 4)
RAM_SIZE Set the memory size in MB (default: 8192)
HOST_DATA_PATH Path to mount as shared directory in the VM
LUMIER_DEBUG Enable debug mode (set to 1)
Script Environment Variables:
LUMIER_IMAGE Docker image name (default: lumier:latest)
Examples:
# Run a VM with default settings
$(basename "$0") run -it --rm
# Run a VM with custom settings using Docker's -e flag
$(basename "$0") run -it --rm \\
--name custom-container-name \\
-e VM_NAME=my-vm \\
-e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \\
-e RAM_SIZE=16384 \\
-v $(pwd)/mount:/data
# Build with a custom image name
LUMIER_IMAGE=myorg/lumier:v1 $(basename "$0") build
EOF
}
# Check if the tunnel is active
is_tunnel_active() {
if lsof -i TCP:$PORT 2>/dev/null | grep LISTEN > /dev/null; then
return 0 # Tunnel is active
else
return 1 # Tunnel is not active
fi
}
# Start the tunnel if needed
ensure_tunnel() {
if ! is_tunnel_active; then
echo "Tunnel is not active. Starting tunnel..."
"$SCRIPT_DIR/src/bin/server.sh" start
sleep 2 # Wait for the tunnel to start
if ! is_tunnel_active; then
echo "Failed to start tunnel. Make sure 'lume' is installed on your host."
exit 1
fi
else
echo "Tunnel is already active."
fi
}
# Build the Docker image with cache busting
build_image() {
local image_name="${LUMIER_IMAGE:-lumier:latest}"
echo "Building Lumier Docker image: $image_name"
echo "SCRIPT_DIR=$SCRIPT_DIR"
echo "Checking for Dockerfile at: $SCRIPT_DIR/Dockerfile"
ls -l "$SCRIPT_DIR/Dockerfile" || echo "Dockerfile not found at $SCRIPT_DIR/Dockerfile"
# Pass any additional arguments to docker build with cache busting
docker build --build-arg CACHEBUST=$(date +%s) -t "$image_name" "$SCRIPT_DIR" "$@"
echo "Lumier image built successfully: $image_name"
}
# Run the Docker container
run_container() {
local image_name="${LUMIER_IMAGE:-lumier:latest}"
# Ensure the Docker image exists
if ! docker image inspect "$image_name" &>/dev/null; then
echo "Docker image '$image_name' not found. Building it..."
build_image
fi
# Ensure the tunnel is running
ensure_tunnel
# Automatically resolve and pass host paths for storage and data
STORAGE_PATH="${HOST_STORAGE_PATH:-$(realpath ./storage)}"
DATA_PATH="${HOST_DATA_PATH:-$(realpath ./shared)}"
# Only add -e if not already present in args
DOCKER_ARGS=( )
add_env_var() {
local var="$1"; local val="$2"; local flag="-e $var="
for arg in "$@"; do
[[ "$arg" == *"$flag"* ]] && return 0
done
DOCKER_ARGS+=( -e "$var=$val" )
}
add_env_var HOST_STORAGE_PATH "$STORAGE_PATH"
add_env_var HOST_DATA_PATH "$DATA_PATH"
# Detect --name argument and set VM_NAME if not already present
local container_name=""
local prev_arg=""
for arg in "$@"; do
if [[ "$prev_arg" == "--name" ]]; then
container_name="$arg"
break
elif [[ "$arg" == --name=* ]]; then
container_name="${arg#--name=}"
break
fi
prev_arg="$arg"
done
# Only add -e VM_NAME if not already present and container_name is set
local vm_name_set=false
for arg in "$@"; do
if [[ "$arg" == "-e" ]] && [[ "$2" == VM_NAME=* ]]; then
vm_name_set=true
break
elif [[ "$arg" == "-eVM_NAME="* ]]; then
vm_name_set=true
break
elif [[ "$arg" == "-e"* ]] && [[ "$arg" == *"VM_NAME="* ]]; then
vm_name_set=true
break
fi
done
if [[ -n "$container_name" && "$vm_name_set" != true ]]; then
DOCKER_ARGS+=( -e "VM_NAME=$container_name" )
fi
echo "Running Lumier container with image: $image_name"
if [[ "$*" == *"-p 8006:8006"* || "$*" == *"-p"*"8006:8006"* ]]; then
docker run "${DOCKER_ARGS[@]}" "$@" "$image_name"
else
docker run "${DOCKER_ARGS[@]}" -p 8006:8006 "$@" "$image_name"
fi
}
# Main command handling
case "${1:-help}" in
run)
shift
run_container "$@"
;;
tunnel)
# Handle tunnel subcommands
case "${2:-}" in
start)
"$SCRIPT_DIR/src/bin/server.sh" start
;;
stop)
"$SCRIPT_DIR/src/bin/server.sh" stop
;;
status)
"$SCRIPT_DIR/src/bin/server.sh" status
;;
*)
echo "Unknown tunnel subcommand: $2"
usage
exit 1
;;
esac
;;
build)
shift
build_image "$@"
;;
help)
usage
;;
*)
echo "Unknown command: $1"
usage
exit 1
;;
esac

View File

@@ -0,0 +1,10 @@
from flask import Flask
app = Flask(__name__)
@app.route('/')
def hello_world():
return 'Hello, World, from VM!'
if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0", port=5001)

9
libs/lumier/mount/setup.sh Executable file
View File

@@ -0,0 +1,9 @@
#!/usr/bin/env bash
echo "Creating helloworld.txt on the Desktop..."
if [ ! -f ~/Desktop/helloworld.txt ]; then
echo "Hello, World!" > ~/Desktop/helloworld.txt
echo "helloworld.txt created successfully."
else
echo "helloworld.txt already exists."
fi

98
libs/lumier/src/bin/entry.sh Executable file
View File

@@ -0,0 +1,98 @@
#!/usr/bin/env bash
# Exit on errors, undefined variables, and propagate errors in pipes
set -euo pipefail
# Source configuration files
CONFIG_DIR="/run/config"
LIB_DIR="/run/lib"
# Source constants if available
if [ -f "${CONFIG_DIR}/constants.sh" ]; then
source "${CONFIG_DIR}/constants.sh"
fi
# Import utilities
for lib in "${LIB_DIR}"/*.sh; do
if [ -f "$lib" ]; then
source "$lib"
fi
done
# Set VM_NAME to env or fallback to container name (from --name)
if [ -z "${VM_NAME:-}" ]; then
VM_NAME="$(cat /etc/hostname)"
export VM_NAME
fi
# Set HOST_STORAGE_PATH to /storage/$VM_NAME if not set
if [ -z "${HOST_STORAGE_PATH:-}" ]; then
HOST_STORAGE_PATH="/storage/$VM_NAME"
export HOST_STORAGE_PATH
fi
# Optionally check for mountpoints
if mountpoint -q /storage; then
echo "/storage is mounted"
fi
if mountpoint -q /data; then
echo "/data is mounted"
fi
# Log startup info
echo "Lumier VM is starting..."
# Cleanup function to ensure VM and noVNC proxy shutdown on container stop
cleanup() {
set +e # Don't exit on error in cleanup
echo "[cleanup] Caught signal, shutting down..."
echo "[cleanup] Stopping VM..."
stop_vm
# Now gently stop noVNC proxy if running
# if [ -n "${NOVNC_PID:-}" ] && kill -0 "$NOVNC_PID" 2>/dev/null; then
# echo "[cleanup] Stopping noVNC proxy (PID $NOVNC_PID)..."
# kill -TERM "$NOVNC_PID"
# # Wait up to 5s for noVNC to exit
# for i in {1..5}; do
# if ! kill -0 "$NOVNC_PID" 2>/dev/null; then
# echo "[cleanup] noVNC proxy stopped."
# break
# fi
# sleep 1
# done
# # Escalate if still running
# if kill -0 "$NOVNC_PID" 2>/dev/null; then
# echo "[cleanup] noVNC proxy did not exit, killing..."
# kill -KILL "$NOVNC_PID" 2>/dev/null
# fi
# fi
echo "[cleanup] Done. Exiting."
exit 0
}
trap cleanup SIGTERM SIGINT
# Start the VM
start_vm
# Start noVNC for VNC access
NOVNC_PID=""
if [ -n "${VNC_PORT:-}" ] && [ -n "${VNC_PASSWORD:-}" ]; then
echo "Starting noVNC proxy with optimized color settings..."
${NOVNC_PATH}/utils/novnc_proxy --vnc host.docker.internal:${VNC_PORT} --listen 8006 --web ${NOVNC_PATH} > /dev/null 2>&1 &
NOVNC_PID=$!
disown $NOVNC_PID
echo "noVNC interface available at: http://localhost:8006/vnc.html?password=${VNC_PASSWORD}&autoconnect=true&logging=debug"
fi
# Run any post-startup hooks
if [ -d "/run/hooks" ]; then
for hook in /run/hooks/*; do
if [ -x "$hook" ]; then
echo "Running hook: $(basename "$hook")"
"$hook"
fi
done
fi
echo "Lumier is running. Press Ctrl+C to stop."
tail -f /dev/null

99
libs/lumier/src/bin/server.sh Executable file
View File

@@ -0,0 +1,99 @@
#!/usr/bin/env bash
# Exit on errors, undefined variables, and propagate errors in pipes
set -euo pipefail
# Source constants if available
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
if [ -f "${SCRIPT_DIR}/../config/constants.sh" ]; then
source "${SCRIPT_DIR}/../config/constants.sh"
fi
# Use the tunnel port from constants if available, otherwise default to 8080
PORT="${TUNNEL_PORT:-8080}"
TUNNEL_SCRIPT="${SCRIPT_DIR}/tunnel.sh"
# Function to check if the tunnel is active
is_tunnel_active() {
if lsof -i TCP:$PORT 2>/dev/null | grep LISTEN > /dev/null; then
return 0 # Tunnel is active
else
return 1 # Tunnel is not active
fi
}
# Function to start the tunnel
start_tunnel() {
echo "Starting tunnel on port $PORT..."
if is_tunnel_active; then
echo "Tunnel is already running on port $PORT."
return 0
fi
# Start socat in the background
socat TCP-LISTEN:$PORT,reuseaddr,fork EXEC:"$TUNNEL_SCRIPT" &
SOCAT_PID=$!
# Check if the tunnel started successfully
sleep 1
if ! is_tunnel_active; then
echo "Failed to start tunnel on port $PORT."
return 1
fi
echo "Tunnel started successfully on port $PORT (PID: $SOCAT_PID)."
return 0
}
# Function to stop the tunnel
stop_tunnel() {
echo "Stopping tunnel on port $PORT..."
if ! is_tunnel_active; then
echo "No tunnel running on port $PORT."
return 0
fi
# Find and kill the socat process
local pid=$(lsof -i TCP:$PORT | grep LISTEN | awk '{print $2}')
if [ -n "$pid" ]; then
kill $pid
echo "Tunnel stopped (PID: $pid)."
return 0
else
echo "Failed to find process using port $PORT."
return 1
fi
}
# Function to check tunnel status
status_tunnel() {
if is_tunnel_active; then
local pid=$(lsof -i TCP:$PORT | grep LISTEN | awk '{print $2}')
echo "Tunnel is active on port $PORT (PID: $pid)."
return 0
else
echo "No tunnel running on port $PORT."
return 1
fi
}
# Parse command line arguments
case "${1:-}" in
start)
start_tunnel
;;
stop)
stop_tunnel
;;
restart)
stop_tunnel
start_tunnel
;;
status)
status_tunnel
;;
*)
echo "Usage: $0 {start|stop|restart|status}"
exit 1
;;
esac

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Source constants if running in container context
if [ -f "/run/config/constants.sh" ]; then
source "/run/config/constants.sh"
fi
# Define server address with fallback
SERVER="${TUNNEL_HOST:-host.docker.internal}:${TUNNEL_PORT:-8080}"
# Extract the base name of the command and arguments
command=$(basename "$0")
subcommand="$1"
shift
args="$@"
command="$command $subcommand $args"
# Concatenate command and any stdin data
full_data="$command"
if [ ! -t 0 ]; then
stdin_data=$(cat)
if [ -n "$stdin_data" ]; then
# Format full_data to include stdin data
full_data="$full_data << 'EOF'
$stdin_data
EOF"
fi
fi
# Trim leading/trailing whitespace and newlines
full_data=$(echo -e "$full_data" | sed 's/^[ \t\n]*//;s/[ \t\n]*$//')
# Log command if debug is enabled
if [ "${LUMIER_DEBUG:-0}" -eq 1 ]; then
echo "Executing lume command: $full_data" >&2
echo "Sending to: $SERVER" >&2
fi
# Use curl with -N to disable output buffering and -s for silent mode
curl -N -s -X POST \
-H "Content-Type: application/octet-stream" \
--data-binary @- \
"http://$SERVER" <<< "$full_data"

96
libs/lumier/src/bin/tunnel.sh Executable file
View File

@@ -0,0 +1,96 @@
#!/usr/bin/env bash
# Exit on errors, undefined variables, and propagate errors in pipes
set -euo pipefail
# Source constants if available
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
if [ -f "${SCRIPT_DIR}/../config/constants.sh" ]; then
source "${SCRIPT_DIR}/../config/constants.sh"
fi
# Handle errors and cleanup
cleanup() {
local exit_code=$?
# Clean up any temporary files if they exist
[ -n "${temp_file:-}" ] && [ -f "$temp_file" ] && rm "$temp_file"
[ -n "${fifo:-}" ] && [ -p "$fifo" ] && rm "$fifo"
exit $exit_code
}
trap cleanup EXIT INT TERM
log_debug() {
if [ "${LUMIER_DEBUG:-0}" -eq 1 ]; then
echo "[DEBUG] $*" >&2
fi
}
send_error_response() {
local status_code=$1
local message=$2
echo "HTTP/1.1 $status_code"
echo "Content-Type: text/plain"
echo ""
echo "$message"
exit 1
}
# Read the HTTP request line
read -r request_line
log_debug "Request: $request_line"
# Read headers and look for Content-Length
content_length=0
while IFS= read -r header; do
[[ $header == $'\r' ]] && break # End of headers
log_debug "Header: $header"
if [[ "$header" =~ ^Content-Length:\ ([0-9]+) ]]; then
content_length="${BASH_REMATCH[1]}"
fi
done
# Read the body using the content length
command=""
if [ "$content_length" -gt 0 ]; then
command=$(dd bs=1 count="$content_length" 2>/dev/null)
log_debug "Received command: $command"
fi
# Determine the executable and arguments based on the command
if [[ "$command" == lume* ]]; then
executable="$(which lume || echo "/usr/local/bin/lume")"
command_args="${command#lume}" # Remove 'lume' from the command
elif [[ "$command" == sshpass* ]]; then
executable="$(which sshpass || echo "/usr/local/bin/sshpass")"
command_args="${command#sshpass}"
else
send_error_response "400 Bad Request" "Unsupported command: $command"
fi
# Check if executable exists
if [ ! -x "$executable" ]; then
send_error_response "500 Internal Server Error" "Executable not found or not executable: $executable"
fi
# Create a temporary file to store the command
temp_file=$(mktemp)
echo "$executable $command_args" > "$temp_file"
chmod +x "$temp_file"
# Create a FIFO (named pipe) for capturing output
fifo=$(mktemp -u)
mkfifo "$fifo"
# Execute the command and pipe its output through awk to ensure line-buffering
{
log_debug "Executing: $executable $command_args"
"$temp_file" 2>&1 | awk '{ print; fflush() }' > "$fifo"
} &
# Stream the output from the FIFO as an HTTP response
{
echo -e "HTTP/1.1 200 OK\r"
echo -e "Content-Type: text/plain\r"
echo -e "\r"
cat "$fifo"
}

View File

@@ -0,0 +1,25 @@
#!/usr/bin/env bash
# Port configuration
TUNNEL_PORT=8080
VNC_PORT=8006
# Host configuration
TUNNEL_HOST="host.docker.internal"
# Default VM configuration
DEFAULT_RAM_SIZE="8192"
DEFAULT_CPU_CORES="4"
DEFAULT_DISK_SIZE="100"
DEFAULT_VM_NAME="lumier"
DEFAULT_VM_VERSION="ghcr.io/trycua/macos-sequoia-vanilla:latest"
# Paths
NOVNC_PATH="/opt/noVNC"
LIFECYCLE_HOOKS_DIR="/run/hooks"
# VM connection details
HOST_USER="lume"
HOST_PASSWORD="lume"
SSH_RETRY_ATTEMPTS=20
SSH_RETRY_INTERVAL=5

View File

@@ -0,0 +1,8 @@
setup_script="$DATA_FOLDER_PATH/setup.sh"
if [ -f "$setup_script" ]; then
chmod +x "$setup_script"
source "$setup_script"
else
echo "Setup script not found at: $setup_script"
fi

106
libs/lumier/src/lib/utils.sh Executable file
View File

@@ -0,0 +1,106 @@
#!/usr/bin/env bash
# Function to wait for SSH to become available
wait_for_ssh() {
local host_ip=$1
local user=$2
local password=$3
local retry_interval=${4:-5} # Default retry interval is 5 seconds
local max_retries=${5:-20} # Default maximum retries is 20 (0 for infinite)
echo "Waiting for SSH to become available on $host_ip..."
local retry_count=0
while true; do
# Try to connect via SSH
sshpass -p "$password" ssh -o StrictHostKeyChecking=no "$user@$host_ip" "exit"
# Check the exit status of the SSH command
if [ $? -eq 0 ]; then
echo "SSH is ready on $host_ip!"
return 0
fi
# Increment retry count
((retry_count++))
# Exit if maximum retries are reached
if [ $max_retries -ne 0 ] && [ $retry_count -ge $max_retries ]; then
echo "Maximum retries reached. SSH is not available."
return 1
fi
echo "SSH not ready. Retrying in $retry_interval seconds... (Attempt $retry_count)"
sleep $retry_interval
done
}
# Function to execute a script on a remote server using sshpass
execute_remote_script() {
local host="$1"
local user="$2"
local password="$3"
local script_path="$4"
local vnc_password="$5"
local data_folder="$6"
# Check if all required arguments are provided
if [ -z "$host" ] || [ -z "$user" ] || [ -z "$password" ] || [ -z "$script_path" ] || [ -z "$vnc_password" ]; then
echo "Usage: execute_remote_script <host> <user> <password> <script_path> <vnc_password> [data_folder]"
return 1
fi
echo "VNC password exported to VM: $vnc_password"
data_folder_path="$VM_SHARED_FILES_PATH/$data_folder"
echo "Data folder path in VM: $data_folder_path"
# Read the script content and prepend the shebang
script_content="#!/usr/bin/env bash\n"
if [ -n "$data_folder" ]; then
script_content+="export VNC_PASSWORD='$vnc_password'\n"
script_content+="export DATA_FOLDER_PATH='$data_folder_path'\n"
fi
script_content+="$(<"$script_path")"
# Use a here-document to send the script content
sshpass -p "$password" ssh -o StrictHostKeyChecking=no "$user@$host" "bash -s" <<EOF
$script_content
EOF
# Check the exit status of the sshpass command
if [ $? -ne 0 ]; then
echo "Failed to execute script on remote host $host."
return 1
fi
}
# Example usage
# output = execute_remote_script('192.168.1.100', 'username', 'password', '/path/to/script.sh')
# print(output)
extract_json_field() {
local field_name=$1
local input=$2
local result
result=$(echo "$input" | grep -oP '"'"$field_name"'"\s*:\s*"\K[^"]+')
if [[ $? -ne 0 ]]; then
echo ""
else
echo "$result"
fi
}
extract_json_field_from_file() {
local field_name=$1
local json_file=$2
local json_text
json_text=$(<"$json_file")
extract_json_field "$field_name" "$json_text"
}
extract_json_field_from_text() {
local field_name=$1
local json_text=$2
extract_json_field "$field_name" "$json_text"
}

181
libs/lumier/src/lib/vm.sh Executable file
View File

@@ -0,0 +1,181 @@
#!/usr/bin/env bash
start_vm() {
# Determine storage path for VM
STORAGE_PATH="$HOST_STORAGE_PATH"
if [ -z "$STORAGE_PATH" ]; then
STORAGE_PATH="storage_${VM_NAME}"
fi
# Check if VM exists and its status using JSON format
VM_INFO=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json 2>&1)
# Check if VM not found error
if [[ $VM_INFO == *"Virtual machine not found"* ]]; then
IMAGE_NAME="${VERSION##*/}"
lume pull "$IMAGE_NAME" "$VM_NAME" --storage "$STORAGE_PATH"
else
# Parse the JSON status - check if it contains "status" : "running"
if [[ $VM_INFO == *'"status" : "running"'* ]]; then
lume_stop "$VM_NAME" "$STORAGE_PATH"
# lume stop "$VM_NAME" --storage "$STORAGE_PATH"
fi
fi
# Set VM parameters
lume set "$VM_NAME" --cpu "$CPU_CORES" --memory "${RAM_SIZE}MB" --display "$DISPLAY" --storage "$STORAGE_PATH"
# Fetch VM configuration
CONFIG_JSON=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json)
# Setup data directory args if necessary
SHARED_DIR_ARGS=""
if [ -d "/data" ]; then
if [ -n "$HOST_DATA_PATH" ]; then
SHARED_DIR_ARGS="--shared-dir=$HOST_DATA_PATH"
else
echo "Warning: /data volume exists but HOST_DATA_PATH is not set. Cannot mount volume."
fi
fi
# Run VM with VNC and shared directory using curl
lume_run $SHARED_DIR_ARGS --storage "$STORAGE_PATH" "$VM_NAME" &
# lume run "$VM_NAME" --storage "$STORAGE_PATH" --no-display
# Wait for VM to be running and VNC URL to be available
vm_ip=""
vnc_url=""
max_attempts=30
attempt=0
while [ $attempt -lt $max_attempts ]; do
# Get VM info as JSON
VM_INFO=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json 2>/dev/null)
# Check if VM has status 'running'
if [[ $VM_INFO == *'"status" : "running"'* ]]; then
# Extract IP address using the existing function from utils.sh
vm_ip=$(extract_json_field "ipAddress" "$VM_INFO")
# Extract VNC URL using the existing function from utils.sh
vnc_url=$(extract_json_field "vncUrl" "$VM_INFO")
# If we have both IP and VNC URL, break the loop
if [ -n "$vm_ip" ] && [ -n "$vnc_url" ]; then
break
fi
fi
sleep 2
attempt=$((attempt + 1))
done
if [ -z "$vm_ip" ] || [ -z "$vnc_url" ]; then
echo "Timed out waiting for VM to start or VNC URL to become available."
lume_stop "$VM_NAME" "$STORAGE_PATH" > /dev/null 2>&1
# lume stop "$VM_NAME" --storage "$STORAGE_PATH" > /dev/null 2>&1
exit 1
fi
# Parse VNC URL to extract password and port
VNC_PASSWORD=$(echo "$vnc_url" | sed -n 's/.*:\(.*\)@.*/\1/p')
VNC_PORT=$(echo "$vnc_url" | sed -n 's/.*:\([0-9]\+\)$/\1/p')
# Wait for SSH to become available
wait_for_ssh "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" 5 20
# Export VNC variables for entry.sh to use
export VNC_PORT
export VNC_PASSWORD
# Execute on-logon.sh if present
on_logon_script="/run/lifecycle/on-logon.sh"
if [ -f "$on_logon_script" ]; then
execute_remote_script "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" "$on_logon_script" "$VNC_PASSWORD" "$DATA_FOLDER"
fi
# The VM is still running because we never killed lume run.
# If you want to stop the VM at some point, you can kill $LUME_PID or use lume_stop.
}
stop_vm() {
echo "Stopping VM '$VM_NAME'..."
STORAGE_PATH="$HOST_STORAGE_PATH"
if [ -z "$STORAGE_PATH" ]; then
STORAGE_PATH="storage_${VM_NAME}"
fi
# Check if the VM exists and is running (use lume get for speed)
VM_INFO=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json 2>/dev/null)
if [[ -z "$VM_INFO" || $VM_INFO == *"Virtual machine not found"* ]]; then
echo "VM '$VM_NAME' does not exist."
elif [[ $VM_INFO == *'"status" : "running"'* ]]; then
lume_stop "$VM_NAME" "$STORAGE_PATH"
echo "VM '$VM_NAME' was running and is now stopped."
elif [[ $VM_INFO == *'"status" : "stopped"'* ]]; then
echo "VM '$VM_NAME' is already stopped."
else
echo "Unknown VM status for '$VM_NAME'."
fi
}
is_vm_running() {
lume ls | grep -q "$VM_NAME"
}
# Stop VM with storage location specified using curl
lume_stop() {
local vm_name="$1"
local storage="$2"
curl --connect-timeout 6000 \
--max-time 5000 \
-X POST \
-H "Content-Type: application/json" \
-d '{"storage":"'$storage'"}' \
"http://host.docker.internal:3000/lume/vms/${vm_name}/stop"
}
# Run VM with VNC client started and shared directory using curl
lume_run() {
# Parse args
local shared_dir=""
local storage="ssd"
local vm_name="lume_vm"
local no_display=true
while [[ $# -gt 0 ]]; do
case $1 in
--shared-dir=*)
shared_dir="${1#*=}"
shift
;;
--storage)
storage="$2"
shift 2
;;
--no-display)
no_display=true
shift
;;
*)
# Assume last arg is VM name if not an option
vm_name="$1"
shift
;;
esac
done
# Default to ~/Projects if not provided
if [[ -z "$shared_dir" ]]; then
shared_dir="~/Projects"
fi
local json_body="{\"noDisplay\": true, \"sharedDirectories\": [{\"hostPath\": \"$shared_dir\", \"readOnly\": false}], \"storage\": \"$storage\", \"recoveryMode\": false}"
local curl_cmd="curl --connect-timeout 6000 \
--max-time 5000 \
-X POST \
-H 'Content-Type: application/json' \
-d '$json_body' \
http://host.docker.internal:3000/lume/vms/$vm_name/run"
echo "[lume_run] Running:"
echo "$curl_cmd"
eval "$curl_cmd"
}

View File

@@ -68,13 +68,51 @@ You can then use the script in your MCP configuration like this:
"CUA_AGENT_LOOP": "OMNI",
"CUA_MODEL_PROVIDER": "ANTHROPIC",
"CUA_MODEL_NAME": "claude-3-7-sonnet-20250219",
"ANTHROPIC_API_KEY": "your-api-key"
"CUA_PROVIDER_API_KEY": "your-api-key"
}
}
}
}
```
## Development Guide
If you want to develop with the cua-mcp-server directly without installation, you can use this configuration:
```json
{
"mcpServers": {
"cua-agent": {
"command": "/bin/bash",
"args": ["~/cua/libs/mcp-server/scripts/start_mcp_server.sh"],
"env": {
"CUA_AGENT_LOOP": "UITARS",
"CUA_MODEL_PROVIDER": "OAICOMPAT",
"CUA_MODEL_NAME": "ByteDance-Seed/UI-TARS-1.5-7B",
"CUA_PROVIDER_BASE_URL": "https://****************.us-east-1.aws.endpoints.huggingface.cloud/v1",
"CUA_PROVIDER_API_KEY": "your-api-key"
}
}
}
}
```
This configuration:
- Uses the start_mcp_server.sh script which automatically sets up the Python path and runs the server module
- Works with Claude Desktop, Cursor, or any other MCP client
- Automatically uses your development code without requiring installation
Just add this to your MCP client's configuration and it will use your local development version of the server.
### Troubleshooting
If you get a `/bin/bash: ~/cua/libs/mcp-server/scripts/start_mcp_server.sh: No such file or directory` error, try changing the path to the script to be absolute instead of relative.
To see the logs:
```
tail -n 20 -f ~/Library/Logs/Claude/mcp*.log
```
## Claude Desktop Integration
To use with Claude Desktop, add an entry to your Claude Desktop configuration (`claude_desktop_config.json`, typically found in `~/.config/claude-desktop/`):

View File

@@ -1,9 +1,10 @@
import asyncio
import base64
import logging
import os
import sys
import traceback
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Union, Tuple
# Configure logging to output to stderr for debug visibility
logging.basicConfig(
@@ -17,7 +18,7 @@ logger = logging.getLogger("mcp-server")
logger.debug("MCP Server module loading...")
try:
from mcp.server.fastmcp import Context, FastMCP
from mcp.server.fastmcp import Context, FastMCP, Image
logger.debug("Successfully imported FastMCP")
except ImportError as e:
@@ -49,16 +50,37 @@ def serve() -> FastMCP:
server = FastMCP("cua-agent")
@server.tool()
async def run_cua_task(ctx: Context, task: str) -> str:
async def screenshot_cua(ctx: Context) -> Image:
"""
Run a Computer-Use Agent (CUA) task and return the results.
Take a screenshot of the current MacOS VM screen and return the image. Use this before running a CUA task to get a snapshot of the current state.
Args:
ctx: The MCP context
Returns:
An image resource containing the screenshot
"""
global global_computer
if global_computer is None:
global_computer = Computer(verbosity=logging.INFO)
await global_computer.run()
screenshot = await global_computer.interface.screenshot()
return Image(
format="png",
data=screenshot
)
@server.tool()
async def run_cua_task(ctx: Context, task: str) -> Tuple[str, Image]:
"""
Run a Computer-Use Agent (CUA) task in a MacOS VM and return the results.
Args:
ctx: The MCP context
task: The instruction or task for the agent to perform
Returns:
A string containing the agent's response
A tuple containing the agent's response and the final screenshot
"""
global global_computer
@@ -72,12 +94,7 @@ def serve() -> FastMCP:
# Determine which loop to use
loop_str = os.getenv("CUA_AGENT_LOOP", "OMNI")
if loop_str == "OPENAI":
loop = AgentLoop.OPENAI
elif loop_str == "ANTHROPIC":
loop = AgentLoop.ANTHROPIC
else:
loop = AgentLoop.OMNI
loop = getattr(AgentLoop, loop_str)
# Determine provider
provider_str = os.getenv("CUA_MODEL_PROVIDER", "ANTHROPIC")
@@ -89,6 +106,9 @@ def serve() -> FastMCP:
# Get base URL for provider (if needed)
provider_base_url = os.getenv("CUA_PROVIDER_BASE_URL", None)
# Get api key for provider (if needed)
api_key = os.getenv("CUA_PROVIDER_API_KEY", None)
# Create agent with the specified configuration
agent = ComputerAgent(
computer=global_computer,
@@ -98,6 +118,7 @@ def serve() -> FastMCP:
name=model_name,
provider_base_url=provider_base_url,
),
api_key=api_key,
save_trajectory=False,
only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")),
verbosity=logging.INFO,
@@ -107,33 +128,34 @@ def serve() -> FastMCP:
full_result = ""
async for result in agent.run(task):
logger.info(f"Agent step complete: {result.get('id', 'unknown')}")
ctx.info(f"Agent step complete: {result.get('id', 'unknown')}")
# Add response ID to output
full_result += f"\n[Response ID: {result.get('id', 'unknown')}]\n"
# Extract and concatenate text responses
if "text" in result:
# Handle both string and dict responses
text_response = result.get("text", "")
if isinstance(text_response, str):
full_result += f"Response: {text_response}\n"
else:
# If it's a dict or other structure, convert to string representation
full_result += f"Response: {str(text_response)}\n"
# Log detailed information
if "tools" in result:
tools_info = result.get("tools")
logger.debug(f"Tools used: {tools_info}")
full_result += f"\nTools used: {tools_info}\n"
if "content" in result:
full_result += f"Response: {result.get('content', '')}\n"
# Process output if available
outputs = result.get("output", [])
for output in outputs:
output_type = output.get("type")
if output_type == "reasoning":
if output_type == "message":
logger.debug(f"Message: {output}")
content = output.get("content", [])
for content_part in content:
if content_part.get("text"):
full_result += f"\nMessage: {content_part.get('text', '')}\n"
elif output_type == "reasoning":
logger.debug(f"Reasoning: {output}")
full_result += f"\nReasoning: {output.get('content', '')}\n"
summary_content = output.get("summary", [])
if summary_content:
for summary_part in summary_content:
if summary_part.get("text"):
full_result += f"\nReasoning: {summary_part.get('text', '')}\n"
else:
full_result += f"\nReasoning: {output.get('text', output.get('content', ''))}\n"
elif output_type == "computer_call":
logger.debug(f"Computer call: {output}")
action = output.get("action", "")
@@ -144,17 +166,25 @@ def serve() -> FastMCP:
full_result += "\n" + "-" * 40 + "\n"
logger.info(f"CUA task completed successfully")
return full_result or "Task completed with no text output."
ctx.info(f"CUA task completed successfully")
return (
full_result or "Task completed with no text output.",
Image(
format="png",
data=await global_computer.interface.screenshot()
)
)
except Exception as e:
error_msg = f"Error running CUA task: {str(e)}\n{traceback.format_exc()}"
logger.error(error_msg)
ctx.error(error_msg)
return f"Error during task execution: {str(e)}"
@server.tool()
async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> str:
async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> List:
"""
Run multiple CUA tasks in sequence and return the combined results.
Run multiple CUA tasks in a MacOS VM in sequence and return the combined results.
Args:
ctx: The MCP context
@@ -164,13 +194,15 @@ def serve() -> FastMCP:
Combined results from all tasks
"""
results = []
for i, task in enumerate(tasks):
logger.info(f"Running task {i+1}/{len(tasks)}: {task}")
result = await run_cua_task(ctx, task)
results.append(f"Task {i+1}: {task}\nResult: {result}\n")
return "\n".join(results)
ctx.info(f"Running task {i+1}/{len(tasks)}: {task}")
ctx.report_progress(i / len(tasks))
results.extend(await run_cua_task(ctx, task))
ctx.report_progress((i + 1) / len(tasks))
return results
return server

View File

@@ -0,0 +1,14 @@
#!/bin/bash
set -e
# Set the CUA repository path based on script location
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../.." &> /dev/null && pwd )"
PYTHON_PATH="${CUA_REPO_DIR}/.venv/bin/python"
# Set Python path to include all necessary libraries
export PYTHONPATH="${CUA_REPO_DIR}/libs/mcp-server:${CUA_REPO_DIR}/libs/agent:${CUA_REPO_DIR}/libs/computer:${CUA_REPO_DIR}/libs/core:${CUA_REPO_DIR}/libs/pylume"
# Run the MCP server directly as a module
$PYTHON_PATH -m mcp_server.server

View File

@@ -145,9 +145,8 @@
" await computer.interface.press_key(key)\n",
" \n",
" elif action_type == \"wait\":\n",
" wait_time = action.time\n",
" print(f\"Waiting for {wait_time} seconds\")\n",
" await asyncio.sleep(wait_time)\n",
" print(f\"Waiting for 2 seconds\")\n",
" await asyncio.sleep(2)\n",
" \n",
" elif action_type == \"screenshot\":\n",
" print(\"Taking screenshot\")\n",