diff --git a/.all-contributorsrc b/.all-contributorsrc index d1b3578e..503f0e94 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -151,6 +151,15 @@ "contributions": [ "code" ] + }, + { + "login": "FinnBorge", + "name": "FinnBorge", + "avatar_url": "https://avatars.githubusercontent.com/u/9272726?v=4", + "profile": "https://github.com/FinnBorge", + "contributions": [ + "code" + ] } ] } diff --git a/.gitignore b/.gitignore index ce8445bf..8265a5a1 100644 --- a/.gitignore +++ b/.gitignore @@ -15,7 +15,8 @@ dist/ downloads/ eggs/ .eggs/ -lib/ +lib/* +!libs/lumier/src/lib/ lib64/ parts/ sdist/ @@ -242,4 +243,7 @@ trajectories/ .storage/ # Gradio settings -.gradio_settings.json \ No newline at end of file +.gradio_settings.json + +# Lumier Storage +storage/ \ No newline at end of file diff --git a/.vscode/lumier.code-workspace b/.vscode/lumier.code-workspace new file mode 100644 index 00000000..26e12846 --- /dev/null +++ b/.vscode/lumier.code-workspace @@ -0,0 +1,30 @@ +{ + "folders": [ + { + "name": "lumier", + "path": "../libs/lumier" + }, + { + "name": "lume", + "path": "../libs/lume" + } + ], + "settings": { + "files.exclude": { + "**/.git": true, + "**/.svn": true, + "**/.hg": true, + "**/CVS": true, + "**/.DS_Store": true + } + }, + "tasks": { + "version": "2.0.0", + "tasks": [ + ] + }, + "launch": { + "configurations": [ + ] + } +} \ No newline at end of file diff --git a/README.md b/README.md index b0630760..04e7f054 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,13 @@ If you only need the virtualization capabilities: /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" ``` +Optionally, if you don't want Lume to run as a background service: +```bash +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh) --no-background-service" +``` + +**Note:** If you choose this option, you'll need to manually start the Lume API service whenever needed by running `lume serve` in your terminal. This applies to Option 2 after completing step 1. + For Lume usage instructions, refer to the [Lume documentation](./libs/lume/README.md). ### Option 2: Full Computer-Use Agent Capabilities @@ -62,17 +69,12 @@ If you want to use AI agents with virtualized environments: lume pull macos-sequoia-cua:latest ``` -3. Start Lume daemon service: - ```bash - lume serve - ``` - -4. Install the Python libraries: +3. Install the Python libraries: ```bash pip install cua-computer cua-agent[all] ``` -5. Use the libraries in your Python code: +4. Use the libraries in your Python code: ```python from computer import Computer from agent import ComputerAgent, LLM, AgentLoop, LLMProvider @@ -80,7 +82,7 @@ If you want to use AI agents with virtualized environments: async with Computer(verbosity=logging.DEBUG) as macos_computer: agent = ComputerAgent( computer=macos_computer, - loop=AgentLoop.OPENAI, # or AgentLoop.UITARS, AgentLoop.OMNI, or AgentLoop.ANTHROPIC + loop=AgentLoop.OPENAI, # or AgentLoop.UITARS, AgentLoop.OMNI, or AgentLoop.UITARS, or AgentLoop.ANTHROPIC model=LLM(provider=LLMProvider.OPENAI) # or LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit") ) @@ -95,7 +97,7 @@ If you want to use AI agents with virtualized environments: Explore the [Agent Notebook](./notebooks/) for a ready-to-run example. -6. Optionally, you can use the Agent with a Gradio UI: +5. Optionally, you can use the Agent with a Gradio UI: ```python from utils import load_dotenv_files @@ -228,6 +230,7 @@ Apple, macOS, and Apple Silicon are trademarks of Apple Inc. Ubuntu and Canonica Rahim Nathwani
Rahim Nathwani

💻 Matt Speck
Matt Speck

💻 + FinnBorge
FinnBorge

💻 diff --git a/libs/agent/README.md b/libs/agent/README.md index bc4bce32..07f3d3fd 100644 --- a/libs/agent/README.md +++ b/libs/agent/README.md @@ -50,10 +50,10 @@ async with Computer() as macos_computer: # model=LLM(provider=LLMProvider.ANTHROPIC) # or # loop=AgentLoop.OMNI, - # model=LLM(provider=LLMProvider.OLLAMA, model="gemma3") + # model=LLM(provider=LLMProvider.OLLAMA, name="gemma3") # or # loop=AgentLoop.UITARS, - # model=LLM(provider=LLMProvider.OAICOMPAT, model="tgi", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1") + # model=LLM(provider=LLMProvider.OAICOMPAT, name="ByteDance-Seed/UI-TARS-1.5-7B", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1") ) tasks = [ diff --git a/libs/agent/agent/providers/anthropic/loop.py b/libs/agent/agent/providers/anthropic/loop.py index 0ccdc79a..130a43cb 100644 --- a/libs/agent/agent/providers/anthropic/loop.py +++ b/libs/agent/agent/providers/anthropic/loop.py @@ -279,6 +279,8 @@ class AnthropicLoop(BaseLoop): messages, model=self.model, ) + # Log standardized response for ease of parsing + self._log_api_call("agent_response", request=None, response=openai_compatible_response) await queue.put(openai_compatible_response) if not should_continue: diff --git a/libs/agent/agent/providers/anthropic/tools/computer.py b/libs/agent/agent/providers/anthropic/tools/computer.py index 8425f35f..ecf232bd 100644 --- a/libs/agent/agent/providers/anthropic/tools/computer.py +++ b/libs/agent/agent/providers/anthropic/tools/computer.py @@ -161,15 +161,17 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool): self.logger.info(f"Moving cursor to ({x}, {y})") await self.computer.interface.move_cursor(x, y) elif action == "left_click_drag": - self.logger.info(f"Dragging from ({x}, {y})") - # First move to the position - await self.computer.interface.move_cursor(x, y) - # Then perform drag operation - check if drag_to exists or we need to use other methods - try: - await self.computer.interface.drag_to(x, y) - except Exception as e: - self.logger.error(f"Error during drag operation: {str(e)}") - raise ToolError(f"Failed to perform drag: {str(e)}") + # Get the start coordinate from kwargs + start_coordinate = kwargs.get("start_coordinate") + if not start_coordinate: + raise ToolError("start_coordinate is required for left_click_drag action") + + start_x, start_y = start_coordinate + end_x, end_y = x, y + + self.logger.info(f"Dragging from ({start_x}, {start_y}) to ({end_x}, {end_y})") + await self.computer.interface.move_cursor(start_x, start_y) + await self.computer.interface.drag_to(end_x, end_y) # Wait briefly for any UI changes await asyncio.sleep(0.5) diff --git a/libs/agent/agent/providers/omni/loop.py b/libs/agent/agent/providers/omni/loop.py index b53c120c..18e0375f 100644 --- a/libs/agent/agent/providers/omni/loop.py +++ b/libs/agent/agent/providers/omni/loop.py @@ -670,6 +670,8 @@ class OmniLoop(BaseLoop): parsed_screen=parsed_screen, parser=self.parser ) + # Log standardized response for ease of parsing + self._log_api_call("agent_response", request=None, response=openai_compatible_response) # Yield the response to the caller yield openai_compatible_response diff --git a/libs/agent/agent/providers/openai/loop.py b/libs/agent/agent/providers/openai/loop.py index 8e507a1b..c4e0dfb5 100644 --- a/libs/agent/agent/providers/openai/loop.py +++ b/libs/agent/agent/providers/openai/loop.py @@ -276,6 +276,10 @@ class OpenAILoop(BaseLoop): ) # Don't reset last_response_id to None - keep the previous value if available + + # Log standardized response for ease of parsing + # Since this is the openAI responses format, we don't need to convert it to agent response format + self._log_api_call("agent_response", request=None, response=response) # Process API response await queue.put(response) diff --git a/libs/agent/agent/providers/openai/tools/computer.py b/libs/agent/agent/providers/openai/tools/computer.py index ae4fdce8..c5602f4e 100644 --- a/libs/agent/agent/providers/openai/tools/computer.py +++ b/libs/agent/agent/providers/openai/tools/computer.py @@ -44,6 +44,7 @@ Action = Literal[ "double_click", "screenshot", "scroll", + "drag", ] @@ -162,9 +163,14 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool): y = kwargs.get("y") if x is None or y is None: raise ToolError("x and y coordinates are required for scroll action") - scroll_x = kwargs.get("scroll_x", 0) // 20 - scroll_y = kwargs.get("scroll_y", 0) // 20 + scroll_x = kwargs.get("scroll_x", 0) // 50 + scroll_y = kwargs.get("scroll_y", 0) // 50 return await self.handle_scroll(x, y, scroll_x, scroll_y) + elif type == "drag": + path = kwargs.get("path") + if not path or not isinstance(path, list) or len(path) < 2: + raise ToolError("path is required for drag action and must contain at least 2 points") + return await self.handle_drag(path) elif type == "screenshot": return await self.screenshot() elif type == "wait": @@ -240,11 +246,7 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool): if len(mapped_keys) > 1: # For key combinations (like Ctrl+C) - for k in mapped_keys: - await self.computer.interface.press_key(k) - await asyncio.sleep(0.1) - for k in reversed(mapped_keys): - await self.computer.interface.press_key(k) + await self.computer.interface.hotkey(*mapped_keys) else: # Single key press await self.computer.interface.press_key(mapped_keys[0]) @@ -306,6 +308,41 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool): self.logger.error(f"Error in handle_scroll: {str(e)}") raise ToolError(f"Failed to scroll at ({x}, {y}): {str(e)}") + async def handle_drag(self, path: List[Dict[str, int]]) -> ToolResult: + """Handle mouse drag operation using a path of coordinates. + + Args: + path: List of coordinate points {"x": int, "y": int} defining the drag path + + Returns: + ToolResult with the operation result and screenshot + """ + try: + # Convert from [{"x": x, "y": y}, ...] format to [(x, y), ...] format + points = [(p["x"], p["y"]) for p in path] + + # Perform drag action + if len(points) == 2: + await self.computer.interface.move_cursor(points[0][0], points[0][1]) + await self.computer.interface.drag_to(points[1][0], points[1][1]) + else: + await self.computer.interface.drag(points, button="left") + + # Wait for UI to update + await asyncio.sleep(0.5) + + # Take screenshot after action + screenshot = await self.computer.interface.screenshot() + base64_screenshot = base64.b64encode(screenshot).decode("utf-8") + + return ToolResult( + output=f"Dragged from ({path[0]['x']}, {path[0]['y']}) to ({path[-1]['x']}, {path[-1]['y']})", + base64_image=base64_screenshot, + ) + except Exception as e: + self.logger.error(f"Error in handle_drag: {str(e)}") + raise ToolError(f"Failed to perform drag operation: {str(e)}") + async def screenshot(self) -> ToolResult: """Take a screenshot.""" try: diff --git a/libs/agent/agent/providers/uitars/clients/oaicompat.py b/libs/agent/agent/providers/uitars/clients/oaicompat.py index 4567360b..423b1d3a 100644 --- a/libs/agent/agent/providers/uitars/clients/oaicompat.py +++ b/libs/agent/agent/providers/uitars/clients/oaicompat.py @@ -94,8 +94,15 @@ class OAICompatClient(BaseUITarsClient): """ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"} - final_messages = [{"role": "system", "content": system}] - + final_messages = [ + { + "role": "system", + "content": [ + { "type": "text", "text": system } + ] + } + ] + # Process messages for item in messages: if isinstance(item, dict): @@ -138,8 +145,13 @@ class OAICompatClient(BaseUITarsClient): message = {"role": "user", "content": [{"type": "text", "text": item}]} final_messages.append(message) - payload = {"model": self.model, "messages": final_messages, "temperature": self.temperature} - payload["max_tokens"] = max_tokens or self.max_tokens + payload = { + "model": self.model, + "messages": final_messages, + "max_tokens": max_tokens or self.max_tokens, + "temperature": self.temperature, + "top_p": 0.7, + } try: async with aiohttp.ClientSession() as session: @@ -178,25 +190,21 @@ class OAICompatClient(BaseUITarsClient): response_text = await response.text() logger.debug(f"Response content: {response_text}") + # if 503, then the endpoint is still warming up + if response.status == 503: + logger.error(f"Endpoint is still warming up, please try again later") + raise Exception(f"Endpoint is still warming up: {response_text}") + # Try to parse as JSON if the content type is appropriate if "application/json" in response.headers.get('Content-Type', ''): response_json = await response.json() else: raise Exception(f"Response is not JSON format") - # # Optionally try to parse it anyway - # try: - # import json - # response_json = json.loads(response_text) - # except json.JSONDecodeError as e: - # print(f"Failed to parse response as JSON: {e}") if response.status != 200: - error_msg = response_json.get("error", {}).get( - "message", str(response_json) - ) - logger.error(f"Error in API call: {error_msg}") - raise Exception(f"API error: {error_msg}") - + logger.error(f"Error in API call: {response_text}") + raise Exception(f"API error: {response_text}") + return response_json except Exception as e: diff --git a/libs/agent/agent/providers/uitars/loop.py b/libs/agent/agent/providers/uitars/loop.py index c0ea6c73..848e3504 100644 --- a/libs/agent/agent/providers/uitars/loop.py +++ b/libs/agent/agent/providers/uitars/loop.py @@ -17,10 +17,10 @@ from ...core.types import AgentResponse, LLMProvider from ...core.visualization import VisualizationHelper from computer import Computer -from .utils import add_box_token, parse_actions, parse_action_parameters +from .utils import add_box_token, parse_actions, parse_action_parameters, to_agent_response_format from .tools.manager import ToolManager from .tools.computer import ToolResult -from .prompts import COMPUTER_USE, SYSTEM_PROMPT +from .prompts import COMPUTER_USE, SYSTEM_PROMPT, MAC_SPECIFIC_NOTES from .clients.oaicompat import OAICompatClient from .clients.mlxvlm import MLXVLMUITarsClient @@ -197,7 +197,7 @@ class UITARSLoop(BaseLoop): if first_user_idx is not None and instruction: # Create the computer use prompt user_prompt = COMPUTER_USE.format( - instruction=instruction, + instruction='\n'.join([instruction, MAC_SPECIFIC_NOTES]), language="English" ) @@ -453,7 +453,7 @@ class UITARSLoop(BaseLoop): # MAIN LOOP - IMPLEMENTING ABSTRACT METHOD ########################################### - async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[Dict[str, Any], None]: + async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]: """Run the agent loop with provided messages. Args: @@ -520,41 +520,16 @@ class UITARSLoop(BaseLoop): # Update whether an action screenshot was saved this turn action_screenshot_saved = action_screenshot_saved or new_screenshot_saved - - # Parse actions from the raw response - raw_response = response["choices"][0]["message"]["content"] - parsed_actions = parse_actions(raw_response) - # Extract thought content if available - thought = "" - if "Thought:" in raw_response: - thought_match = re.search(r"Thought: (.*?)(?=\s*Action:|$)", raw_response, re.DOTALL) - if thought_match: - thought = thought_match.group(1).strip() + agent_response = await to_agent_response_format( + response, + messages, + model=self.model, + ) + # Log standardized response for ease of parsing + self._log_api_call("agent_response", request=None, response=agent_response) + yield agent_response - # Create standardized thought response format - thought_response = { - "role": "assistant", - "content": thought or raw_response, - "metadata": { - "title": "🧠 UI-TARS Thoughts" - } - } - - # Create action response format - action_response = { - "role": "assistant", - "content": str(parsed_actions), - "metadata": { - "title": "🖱️ UI-TARS Actions", - } - } - - # Yield both responses to the caller (thoughts first, then actions) - yield thought_response - if parsed_actions: - yield action_response - # Check if we should continue this conversation running = should_continue @@ -575,7 +550,8 @@ class UITARSLoop(BaseLoop): logger.error(f"Maximum retry attempts reached. Last error was: {str(e)}") yield { - "error": str(e), + "role": "assistant", + "content": f"Error: {str(e)}", "metadata": {"title": "❌ Error"}, } diff --git a/libs/agent/agent/providers/uitars/prompts.py b/libs/agent/agent/providers/uitars/prompts.py index aa24557d..fe16f0d8 100644 --- a/libs/agent/agent/providers/uitars/prompts.py +++ b/libs/agent/agent/providers/uitars/prompts.py @@ -1,5 +1,9 @@ """Prompts for UI-TARS agent.""" +MAC_SPECIFIC_NOTES = """ +(You are operating on macOS, use 'cmd' instead of 'ctrl' for most shortcuts e.g., hotkey(key='cmd c') for copy, hotkey(key='cmd v') for paste, hotkey(key='cmd t') for new tab).) +""" + SYSTEM_PROMPT = "You are a helpful assistant." COMPUTER_USE = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. @@ -56,4 +60,4 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par ## User Instruction {instruction} -""" \ No newline at end of file +""" diff --git a/libs/agent/agent/providers/uitars/tools/computer.py b/libs/agent/agent/providers/uitars/tools/computer.py index 5cf7f67a..4d5f2ce3 100644 --- a/libs/agent/agent/providers/uitars/tools/computer.py +++ b/libs/agent/agent/providers/uitars/tools/computer.py @@ -173,9 +173,13 @@ class ComputerTool(BaseComputerTool): elif action == "hotkey": if "keys" in kwargs: keys = kwargs["keys"] - for key in keys: - await self.computer.interface.press_key(key) + if len(keys) > 1: + await self.computer.interface.hotkey(*keys) + else: + # Single key press + await self.computer.interface.press_key(keys[0]) + # Wait for UI to update await asyncio.sleep(0.3) diff --git a/libs/agent/agent/providers/uitars/utils.py b/libs/agent/agent/providers/uitars/utils.py index 00565b88..cc904115 100644 --- a/libs/agent/agent/providers/uitars/utils.py +++ b/libs/agent/agent/providers/uitars/utils.py @@ -4,9 +4,114 @@ import logging import base64 import re from typing import Any, Dict, List, Optional, Union, Tuple +from datetime import datetime logger = logging.getLogger(__name__) +from ...core.types import AgentResponse + +async def to_agent_response_format( + response: Dict[str, Any], + messages: List[Dict[str, Any]], + model: Optional[str] = None, +) -> AgentResponse: + """Convert raw UI-TARS response to agent response format. + + Args: + response: Raw UI-TARS response + messages: List of messages in standard format + model: Optional model name + + Returns: + AgentResponse: Standardized agent response format + """ + # Create unique IDs for this response + response_id = f"resp_{datetime.now().strftime('%Y%m%d%H%M%S')}_{id(response)}" + reasoning_id = f"rs_{response_id}" + action_id = f"cu_{response_id}" + call_id = f"call_{response_id}" + + # Parse actions from the raw response + content = response["choices"][0]["message"]["content"] + actions = parse_actions(content) + + # Extract thought content if available + reasoning_text = "" + if "Thought:" in content: + thought_match = re.search(r"Thought: (.*?)(?=\s*Action:|$)", content, re.DOTALL) + if thought_match: + reasoning_text = thought_match.group(1).strip() + + # Create output items + output_items = [] + if reasoning_text: + output_items.append({ + "type": "reasoning", + "id": reasoning_id, + "text": reasoning_text + }) + if actions: + for i, action in enumerate(actions): + action_name, tool_args = parse_action_parameters(action) + if action_name == "finished": + output_items.append({ + "type": "message", + "role": "assistant", + "content": [{ + "type": "output_text", + "text": tool_args["content"] + }], + "id": f"action_{i}_{action_id}", + "status": "completed" + }) + else: + if tool_args.get("action") == action_name: + del tool_args["action"] + output_items.append({ + "type": "computer_call", + "id": f"{action}_{i}_{action_id}", + "call_id": f"call_{i}_{action_id}", + "action": { "type": action_name, **tool_args }, + "pending_safety_checks": [], + "status": "completed" + }) + + # Create agent response + agent_response = AgentResponse( + id=response_id, + object="response", + created_at=int(datetime.now().timestamp()), + status="completed", + error=None, + incomplete_details=None, + instructions=None, + max_output_tokens=None, + model=model or response["model"], + output=output_items, + parallel_tool_calls=True, + previous_response_id=None, + reasoning={"effort": "medium"}, + store=True, + temperature=0.0, + top_p=0.7, + text={"format": {"type": "text"}}, + tool_choice="auto", + tools=[ + { + "type": "computer_use_preview", + "display_height": 768, + "display_width": 1024, + "environment": "mac", + } + ], + truncation="auto", + usage=response["usage"], + user=None, + metadata={}, + response=response + ) + return agent_response + def add_box_token(input_string: str) -> str: """Add box tokens to the coordinates in the model response. @@ -74,7 +179,13 @@ def parse_action_parameters(action: str) -> Tuple[str, Dict[str, Any]]: """ # Handle "finished" action if action.startswith("finished"): - return "finished", {} + # Parse content if it exists + content_match = re.search(r"content='([^']*)'", action) + if content_match: + content = content_match.group(1) + return "finished", {"content": content} + else: + return "finished", {} # Parse action parameters action_match = re.match(r'(\w+)\((.*)\)', action) diff --git a/libs/agent/agent/ui/gradio/app.py b/libs/agent/agent/ui/gradio/app.py index 354580d7..cd5c14c9 100644 --- a/libs/agent/agent/ui/gradio/app.py +++ b/libs/agent/agent/ui/gradio/app.py @@ -35,6 +35,7 @@ from pathlib import Path from typing import Dict, List, Optional, AsyncGenerator, Any, Tuple, Union import gradio as gr from gradio.components.chatbot import MetadataDict +from typing import cast # Import from agent package from agent.core.types import AgentResponse @@ -332,63 +333,6 @@ def get_ollama_models() -> List[str]: logging.error(f"Error getting Ollama models: {e}") return [] - -def extract_synthesized_text( - result: Union[AgentResponse, Dict[str, Any]], -) -> Tuple[str, MetadataDict]: - """Extract synthesized text from the agent result.""" - synthesized_text = "" - metadata = MetadataDict() - - if "output" in result and result["output"]: - for output in result["output"]: - if output.get("type") == "reasoning": - metadata["title"] = "🧠 Reasoning" - content = output.get("content", "") - if content: - synthesized_text += f"{content}\n" - elif output.get("type") == "message": - # Handle message type outputs - can contain rich content - content = output.get("content", []) - - # Content is usually an array of content blocks - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "output_text": - text_value = block.get("text", "") - if text_value: - synthesized_text += f"{text_value}\n" - - elif output.get("type") == "computer_call": - action = output.get("action", {}) - action_type = action.get("type", "") - - # Create a descriptive text about the action - if action_type == "click": - button = action.get("button", "") - x = action.get("x", "") - y = action.get("y", "") - synthesized_text += f"Clicked {button} at position ({x}, {y}).\n" - elif action_type == "type": - text = action.get("text", "") - synthesized_text += f"Typed: {text}.\n" - elif action_type == "keypress": - # Extract key correctly from either keys array or key field - if isinstance(action.get("keys"), list): - key = ", ".join(action.get("keys")) - else: - key = action.get("key", "") - - synthesized_text += f"Pressed key: {key}\n" - else: - synthesized_text += f"Performed {action_type} action.\n" - - metadata["status"] = "done" - metadata["title"] = f"🛠️ {synthesized_text.strip().splitlines()[-1]}" - - return synthesized_text.strip(), metadata - - def create_computer_instance(verbosity: int = logging.INFO) -> Computer: """Create or get the global Computer instance.""" global global_computer @@ -457,66 +401,6 @@ def create_agent( return global_agent - -def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> Tuple[str, MetadataDict]: - """Process agent results for the Gradio UI.""" - # Extract text content - text_obj = result.get("text", {}) - metadata = result.get("metadata", {}) - - # Create a properly typed MetadataDict - metadata_dict = MetadataDict() - metadata_dict["title"] = metadata.get("title", "") - metadata_dict["status"] = "done" - metadata = metadata_dict - - # For OpenAI's Computer-Use Agent, text field is an object with format property - if ( - text_obj - and isinstance(text_obj, dict) - and "format" in text_obj - and not text_obj.get("value", "") - ): - content, metadata = extract_synthesized_text(result) - else: - if not text_obj: - text_obj = result - - # For other types of results, try to get text directly - if isinstance(text_obj, dict): - if "value" in text_obj: - content = text_obj["value"] - elif "text" in text_obj: - content = text_obj["text"] - elif "content" in text_obj: - content = text_obj["content"] - else: - content = "" - else: - content = str(text_obj) if text_obj else "" - - # If still no content but we have outputs, create a summary - if not content and "output" in result and result["output"]: - output = result["output"] - for out in output: - if out.get("type") == "reasoning": - content = out.get("content", "") - if content: - break - elif out.get("type") == "computer_call": - action = out.get("action", {}) - action_type = action.get("type", "") - if action_type: - content = f"Performing action: {action_type}" - break - - # Clean up the text - ensure content is a string - if not isinstance(content, str): - content = str(content) if content else "" - - return content, metadata - - def create_gradio_ui( provider_name: str = "openai", model_name: str = "gpt-4o", @@ -921,17 +805,64 @@ def create_gradio_ui( # Stream responses from the agent async for result in global_agent.run(last_user_message): - # Process result - content, metadata = process_agent_result(result) - - # Skip empty content - if content or metadata.get("title"): - history.append( - gr.ChatMessage( - role="assistant", content=content, metadata=metadata + print(f"DEBUG - Agent response ------- START") + from pprint import pprint + pprint(result) + print(f"DEBUG - Agent response ------- END") + + def generate_gradio_messages(): + if result.get("content"): + yield gr.ChatMessage( + role="assistant", + content=result.get("content", ""), + metadata=cast(MetadataDict, result.get("metadata", {})) ) - ) - yield history + else: + outputs = result.get("output", []) + for output in outputs: + if output.get("type") == "message": + content = output.get("content", []) + for content_part in content: + if content_part.get("text"): + yield gr.ChatMessage( + role=output.get("role", "assistant"), + content=content_part.get("text", ""), + metadata=content_part.get("metadata", {}) + ) + elif output.get("type") == "reasoning": + # if it's openAI, we only have access to a summary of the reasoning + summary_content = output.get("summary", []) + if summary_content: + for summary_part in summary_content: + if summary_part.get("type") == "summary_text": + yield gr.ChatMessage( + role="assistant", + content=summary_part.get("text", "") + ) + else: + summary_content = output.get("text", "") + if summary_content: + yield gr.ChatMessage( + role="assistant", + content=summary_content, + ) + elif output.get("type") == "computer_call": + action = output.get("action", {}) + action_type = action.get("type", "") + if action_type: + action_title = f"🛠️ Performing {action_type}" + if action.get("x") and action.get("y"): + action_title += f" at ({action['x']}, {action['y']})" + yield gr.ChatMessage( + role="assistant", + content=f"```json\n{json.dumps(action)}\n```", + metadata={"title": action_title} + ) + + for message in generate_gradio_messages(): + history.append(message) + yield history + except Exception as e: import traceback diff --git a/libs/computer-server/computer_server/handlers/base.py b/libs/computer-server/computer_server/handlers/base.py index 818d367c..08d57ad5 100644 --- a/libs/computer-server/computer_server/handlers/base.py +++ b/libs/computer-server/computer_server/handlers/base.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Optional, Dict, Any +from typing import Optional, Dict, Any, List, Tuple class BaseAccessibilityHandler(ABC): """Abstract base class for OS-specific accessibility handlers.""" @@ -59,6 +59,17 @@ class BaseAutomationHandler(ABC): duration: How long the drag should take in seconds """ pass + + @abstractmethod + async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> Dict[str, Any]: + """Drag the cursor from current position to specified coordinates. + + Args: + path: A list of tuples of x and y coordinates to drag to + button: The mouse button to use ('left', 'middle', 'right') + duration: How long the drag should take in seconds + """ + pass # Keyboard Actions @abstractmethod diff --git a/libs/computer-server/computer_server/handlers/macos.py b/libs/computer-server/computer_server/handlers/macos.py index 180f083a..abdedc41 100644 --- a/libs/computer-server/computer_server/handlers/macos.py +++ b/libs/computer-server/computer_server/handlers/macos.py @@ -1,7 +1,7 @@ import pyautogui import base64 from io import BytesIO -from typing import Optional, Dict, Any, List +from typing import Optional, Dict, Any, List, Tuple from ctypes import byref, c_void_p, POINTER from AppKit import NSWorkspace # type: ignore import AppKit @@ -563,6 +563,39 @@ class MacOSAutomationHandler(BaseAutomationHandler): except Exception as e: return {"success": False, "error": str(e)} + async def drag( + self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5 + ) -> Dict[str, Any]: + try: + if not path or len(path) < 2: + return {"success": False, "error": "Path must contain at least 2 points"} + + # Move to the first point + start_x, start_y = path[0] + pyautogui.moveTo(start_x, start_y) + + # Press the mouse button + pyautogui.mouseDown(button=button) + + # Calculate time between points to distribute duration evenly + step_duration = duration / (len(path) - 1) if len(path) > 1 else duration + + # Move through each subsequent point + for x, y in path[1:]: + pyautogui.moveTo(x, y, duration=step_duration) + + # Release the mouse button + pyautogui.mouseUp(button=button) + + return {"success": True} + except Exception as e: + # Make sure to release the mouse button if an error occurs + try: + pyautogui.mouseUp(button=button) + except: + pass + return {"success": False, "error": str(e)} + # Keyboard Actions async def type_text(self, text: str) -> Dict[str, Any]: try: diff --git a/libs/computer-server/computer_server/main.py b/libs/computer-server/computer_server/main.py index c95918d8..d7f66f89 100644 --- a/libs/computer-server/computer_server/main.py +++ b/libs/computer-server/computer_server/main.py @@ -65,6 +65,7 @@ async def websocket_endpoint(websocket: WebSocket): "type_text": manager.automation_handler.type_text, "press_key": manager.automation_handler.press_key, "drag_to": manager.automation_handler.drag_to, + "drag": manager.automation_handler.drag, "hotkey": manager.automation_handler.hotkey, "get_cursor_position": manager.automation_handler.get_cursor_position, "get_screen_size": manager.automation_handler.get_screen_size, diff --git a/libs/computer/computer/computer.py b/libs/computer/computer/computer.py index f4d9d9bf..ddb68f9e 100644 --- a/libs/computer/computer/computer.py +++ b/libs/computer/computer/computer.py @@ -29,7 +29,7 @@ class Computer: display: Union[Display, Dict[str, int], str] = "1024x768", memory: str = "8GB", cpu: str = "4", - os: OSType = "macos", + os_type: OSType = "macos", name: str = "", image: str = "macos-sequoia-cua:latest", shared_directories: Optional[List[str]] = None, @@ -68,6 +68,7 @@ class Computer: self.image = image self.port = port self.host = host + self.os_type = os_type # Store telemetry preference self._telemetry_enabled = telemetry_enabled @@ -129,8 +130,8 @@ class Computer: self.shared_paths = [] if shared_directories: for path in shared_directories: - abs_path = os.path.abspath(os.path.expanduser(path)) # type: ignore[attr-defined] - if not os.path.exists(abs_path): # type: ignore[attr-defined] + abs_path = os.path.abspath(os.path.expanduser(path)) + if not os.path.exists(abs_path): raise ValueError(f"Shared directory does not exist: {path}") self.shared_paths.append(abs_path) self._pylume_context = None @@ -188,7 +189,7 @@ class Computer: self._interface = cast( BaseComputerInterface, InterfaceFactory.create_interface_for_os( - os=self.os, ip_address=ip_address # type: ignore[arg-type] + os=self.os_type, ip_address=ip_address # type: ignore[arg-type] ), ) @@ -288,13 +289,13 @@ class Computer: try: # Initialize the interface using the factory with the specified OS - self.logger.info(f"Initializing interface for {self.os} at {ip_address}") + self.logger.info(f"Initializing interface for {self.os_type} at {ip_address}") from .interface.base import BaseComputerInterface self._interface = cast( BaseComputerInterface, InterfaceFactory.create_interface_for_os( - os=self.os, ip_address=ip_address # type: ignore[arg-type] + os=self.os_type, ip_address=ip_address # type: ignore[arg-type] ), ) diff --git a/libs/computer/computer/interface/base.py b/libs/computer/computer/interface/base.py index 31106c14..8fcbd21c 100644 --- a/libs/computer/computer/interface/base.py +++ b/libs/computer/computer/interface/base.py @@ -79,6 +79,17 @@ class BaseComputerInterface(ABC): """ pass + @abstractmethod + async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None: + """Drag the cursor along a path of coordinates. + + Args: + path: List of (x, y) coordinate tuples defining the drag path + button: The mouse button to use ('left', 'middle', 'right') + duration: Total time in seconds that the drag operation should take + """ + pass + # Keyboard Actions @abstractmethod async def type_text(self, text: str) -> None: diff --git a/libs/computer/computer/interface/macos.py b/libs/computer/computer/interface/macos.py index a3b99f7d..2460086c 100644 --- a/libs/computer/computer/interface/macos.py +++ b/libs/computer/computer/interface/macos.py @@ -328,6 +328,11 @@ class MacOSComputerInterface(BaseComputerInterface): "drag_to", {"x": x, "y": y, "button": button, "duration": duration} ) + async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None: + await self._send_command( + "drag", {"path": path, "button": button, "duration": duration} + ) + # Keyboard Actions async def type_text(self, text: str) -> None: await self._send_command("type_text", {"text": text}) diff --git a/libs/computer/computer/interface/models.py b/libs/computer/computer/interface/models.py index b586a9f7..e8ec1b47 100644 --- a/libs/computer/computer/interface/models.py +++ b/libs/computer/computer/interface/models.py @@ -7,6 +7,9 @@ NavigationKey = Literal['pagedown', 'pageup', 'home', 'end', 'left', 'right', 'u # Special key literals SpecialKey = Literal['enter', 'esc', 'tab', 'space', 'backspace', 'del'] +# Modifier key literals +ModifierKey = Literal['ctrl', 'alt', 'shift', 'win', 'command', 'option'] + # Function key literals FunctionKey = Literal['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12'] @@ -35,6 +38,14 @@ class Key(Enum): BACKSPACE = 'backspace' DELETE = 'del' + # Modifier keys + ALT = 'alt' + CTRL = 'ctrl' + SHIFT = 'shift' + WIN = 'win' + COMMAND = 'command' + OPTION = 'option' + # Function keys F1 = 'f1' F2 = 'f2' @@ -73,14 +84,27 @@ class Key(Enum): 'escape': cls.ESCAPE, 'esc': cls.ESC, 'delete': cls.DELETE, - 'del': cls.DELETE + 'del': cls.DELETE, + # Modifier key mappings + 'alt': cls.ALT, + 'ctrl': cls.CTRL, + 'control': cls.CTRL, + 'shift': cls.SHIFT, + 'win': cls.WIN, + 'windows': cls.WIN, + 'super': cls.WIN, + 'command': cls.COMMAND, + 'cmd': cls.COMMAND, + '⌘': cls.COMMAND, + 'option': cls.OPTION, + '⌥': cls.OPTION, } normalized = key.lower().strip() return key_mapping.get(normalized, key) # Combined key type -KeyType = Union[Key, NavigationKey, SpecialKey, FunctionKey, str] +KeyType = Union[Key, NavigationKey, SpecialKey, ModifierKey, FunctionKey, str] class AccessibilityWindow(TypedDict): """Information about a window in the accessibility tree.""" diff --git a/libs/lume/README.md b/libs/lume/README.md index 3d9c0524..b7112b07 100644 --- a/libs/lume/README.md +++ b/libs/lume/README.md @@ -147,6 +147,14 @@ Install with a single command: /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" ``` +By default, Lume is installed as a background service that starts automatically on login. If you prefer to start the Lume API service manually when needed, you can use the `--no-background-service` option: + +```bash +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh) --no-background-service" +``` + +**Note:** With this option, you'll need to manually start the Lume API service by running `lume serve` in your terminal whenever you need to use tools or libraries that rely on the Lume API (such as the Computer-Use Agent). + You can also download the `lume.pkg.tar.gz` archive from the [latest release](https://github.com/trycua/lume/releases), extract it, and install the package manually. ## Prebuilt Images diff --git a/libs/lume/scripts/install.sh b/libs/lume/scripts/install.sh index aa0529c6..4c1efdc9 100755 --- a/libs/lume/scripts/install.sh +++ b/libs/lume/scripts/install.sh @@ -20,24 +20,32 @@ INSTALL_DIR="${INSTALL_DIR:-$DEFAULT_INSTALL_DIR}" GITHUB_REPO="trycua/cua" LATEST_RELEASE_URL="https://api.github.com/repos/$GITHUB_REPO/releases/latest" +# Option to skip background service setup (default: install it) +INSTALL_BACKGROUND_SERVICE=true + # Parse command line arguments while [ "$#" -gt 0 ]; do case "$1" in --install-dir=*) INSTALL_DIR="${1#*=}" ;; + --no-background-service|--skip-background-service) + INSTALL_BACKGROUND_SERVICE=false + ;; --help) echo "${BOLD}${BLUE}Lume Installer${NORMAL}" echo "Usage: $0 [OPTIONS]" echo "" echo "Options:" - echo " --install-dir=DIR Install to the specified directory (default: $DEFAULT_INSTALL_DIR)" - echo " --help Display this help message" + echo " --install-dir=DIR Install to the specified directory (default: $DEFAULT_INSTALL_DIR)" + echo " --no-background-service Do not setup the Lume background service (LaunchAgent)" + echo " --help Display this help message" echo "" echo "Examples:" - echo " $0 # Install to $DEFAULT_INSTALL_DIR" - echo " $0 --install-dir=/usr/local/bin # Install to system directory (may require root privileges)" - echo " INSTALL_DIR=/opt/lume $0 # Install to /opt/lume (legacy env var support)" + echo " $0 # Install to $DEFAULT_INSTALL_DIR and setup background service" + echo " $0 --install-dir=/usr/local/bin # Install to system directory (may require root privileges)" + echo " $0 --no-background-service # Install without setting up the background service" + echo " INSTALL_DIR=/opt/lume $0 # Install to /opt/lume (legacy env var support)" exit 0 ;; *) @@ -173,11 +181,25 @@ install_binary() { # Check if the installation directory is in PATH if [ -n "${PATH##*$INSTALL_DIR*}" ]; then + SHELL_NAME=$(basename "$SHELL") echo "${YELLOW}Warning: $INSTALL_DIR is not in your PATH.${NORMAL}" - echo "To add it, run one of these commands based on your shell:" - echo " For bash: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile" - echo " For zsh: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zshrc" - echo " For fish: echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish" + case "$SHELL_NAME" in + zsh) + echo "To add it, run:" + echo " echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zprofile" + ;; + bash) + echo "To add it, run:" + echo " echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile" + ;; + fish) + echo "To add it, run:" + echo " echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish" + ;; + *) + echo "Add $INSTALL_DIR to your PATH in your shell profile file." + ;; + esac fi } @@ -188,11 +210,97 @@ main() { create_temp_dir download_release install_binary - + echo "" echo "${GREEN}${BOLD}Lume has been successfully installed!${NORMAL}" echo "Run ${BOLD}lume${NORMAL} to get started." + + if [ "$INSTALL_BACKGROUND_SERVICE" = true ]; then + # --- Setup background service (LaunchAgent) for Lume --- + SERVICE_NAME="com.trycua.lume_daemon" + PLIST_PATH="$HOME/Library/LaunchAgents/$SERVICE_NAME.plist" + LUME_BIN="$INSTALL_DIR/lume" + + echo "" + echo "Setting up LaunchAgent to run lume daemon on login..." + + # Create LaunchAgents directory if it doesn't exist + mkdir -p "$HOME/Library/LaunchAgents" + + # Unload existing service if present + if [ -f "$PLIST_PATH" ]; then + echo "Existing LaunchAgent found. Unloading..." + launchctl unload "$PLIST_PATH" 2>/dev/null || true + fi + + # Create the plist file + cat < "$PLIST_PATH" + + + + + Label + $SERVICE_NAME + ProgramArguments + + $LUME_BIN + serve + + RunAtLoad + + KeepAlive + + WorkingDirectory + $HOME + EnvironmentVariables + + PATH + /usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$HOME/.local/bin + HOME + $HOME + + StandardOutPath + /tmp/lume_daemon.log + StandardErrorPath + /tmp/lume_daemon.error.log + ProcessType + Interactive + SessionType + Aqua + + +EOF + + # Set permissions + chmod 644 "$PLIST_PATH" + touch /tmp/lume_daemon.log /tmp/lume_daemon.error.log + chmod 644 /tmp/lume_daemon.log /tmp/lume_daemon.error.log + + # Load the LaunchAgent + echo "Loading LaunchAgent..." + launchctl unload "$PLIST_PATH" 2>/dev/null || true + launchctl load "$PLIST_PATH" + + echo "${GREEN}Lume daemon LaunchAgent installed and loaded. It will start automatically on login!${NORMAL}" + echo "To check status: launchctl list | grep $SERVICE_NAME" + echo "To view logs: tail -f /tmp/lume_daemon.log" + echo "" + echo "To remove the lume daemon service, run:" + echo " launchctl unload \"$PLIST_PATH\"" + echo " rm \"$PLIST_PATH\"" + else + SERVICE_NAME="com.trycua.lume_daemon" + PLIST_PATH="$HOME/Library/LaunchAgents/$SERVICE_NAME.plist" + if [ -f "$PLIST_PATH" ]; then + echo "Removing existing Lume background service (LaunchAgent)..." + launchctl unload "$PLIST_PATH" 2>/dev/null || true + rm "$PLIST_PATH" + echo "Lume background service (LaunchAgent) removed." + else + echo "Skipping Lume background service (LaunchAgent) setup as requested (use --no-background-service)." + fi + fi } # Run the installation -main \ No newline at end of file +main diff --git a/libs/lume/src/Commands/Create.swift b/libs/lume/src/Commands/Create.swift index b4f02633..db042c69 100644 --- a/libs/lume/src/Commands/Create.swift +++ b/libs/lume/src/Commands/Create.swift @@ -40,7 +40,7 @@ struct Create: AsyncParsableCommand { ) var ipsw: String? - @Option(name: .customLong("storage"), help: "VM storage location to use") + @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() { diff --git a/libs/lume/src/Commands/Delete.swift b/libs/lume/src/Commands/Delete.swift index c3cd3653..7d78ca6d 100644 --- a/libs/lume/src/Commands/Delete.swift +++ b/libs/lume/src/Commands/Delete.swift @@ -12,7 +12,7 @@ struct Delete: AsyncParsableCommand { @Flag(name: .long, help: "Force deletion without confirmation") var force = false - @Option(name: .customLong("storage"), help: "VM storage location to use") + @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() {} diff --git a/libs/lume/src/Commands/Get.swift b/libs/lume/src/Commands/Get.swift index 5ff34113..aad56136 100644 --- a/libs/lume/src/Commands/Get.swift +++ b/libs/lume/src/Commands/Get.swift @@ -12,7 +12,7 @@ struct Get: AsyncParsableCommand { @Option(name: [.long, .customShort("f")], help: "Output format (json|text)") var format: FormatOption = .text - @Option(name: .customLong("storage"), help: "VM storage location to use") + @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() { diff --git a/libs/lume/src/Commands/List.swift b/libs/lume/src/Commands/List.swift index 6361f899..89a6dc6e 100644 --- a/libs/lume/src/Commands/List.swift +++ b/libs/lume/src/Commands/List.swift @@ -10,15 +10,22 @@ struct List: AsyncParsableCommand { @Option(name: [.long, .customShort("f")], help: "Output format (json|text)") var format: FormatOption = .text + @Option(name: .long, help: "Filter by storage location name") + var storage: String? + init() { } @MainActor func run() async throws { let manager = LumeController() - let vms = try manager.list() + let vms = try manager.list(storage: self.storage) if vms.isEmpty && self.format == .text { - print("No virtual machines found") + if let storageName = self.storage { + print("No virtual machines found in storage '\(storageName)'") + } else { + print("No virtual machines found") + } } else { try VMDetailsPrinter.printStatus(vms, format: self.format) } diff --git a/libs/lume/src/Commands/Pull.swift b/libs/lume/src/Commands/Pull.swift index 074e0fac..cd843381 100644 --- a/libs/lume/src/Commands/Pull.swift +++ b/libs/lume/src/Commands/Pull.swift @@ -19,7 +19,7 @@ struct Pull: AsyncParsableCommand { @Option(help: "Organization to pull from. Defaults to trycua") var organization: String = "trycua" - @Option(name: .customLong("storage"), help: "VM storage location to use") + @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() {} diff --git a/libs/lume/src/Commands/Run.swift b/libs/lume/src/Commands/Run.swift index bc659769..273e8ba7 100644 --- a/libs/lume/src/Commands/Run.swift +++ b/libs/lume/src/Commands/Run.swift @@ -48,7 +48,7 @@ struct Run: AsyncParsableCommand { @Option(help: "For MacOS VMs only, boot into the VM in recovery mode") var recoveryMode: Bool = false - @Option(name: .customLong("storage"), help: "VM storage location to use") + @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? private var parsedSharedDirectories: [SharedDirectory] { diff --git a/libs/lume/src/Commands/Set.swift b/libs/lume/src/Commands/Set.swift index 73bfe0c9..e2420a68 100644 --- a/libs/lume/src/Commands/Set.swift +++ b/libs/lume/src/Commands/Set.swift @@ -21,7 +21,7 @@ struct Set: AsyncParsableCommand { @Option(help: "New display resolution in format WIDTHxHEIGHT.") var display: VMDisplayResolution? - @Option(name: .customLong("storage"), help: "VM storage location to use") + @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() { diff --git a/libs/lume/src/Commands/Stop.swift b/libs/lume/src/Commands/Stop.swift index 933019e5..3b921114 100644 --- a/libs/lume/src/Commands/Stop.swift +++ b/libs/lume/src/Commands/Stop.swift @@ -9,7 +9,7 @@ struct Stop: AsyncParsableCommand { @Argument(help: "Name of the virtual machine", completion: .custom(completeVMName)) var name: String - @Option(name: .customLong("storage"), help: "VM storage location to use") + @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location") var storage: String? init() { diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 714cf1cb..a7a68212 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -643,7 +643,7 @@ class ImageContainerRegistry: @unchecked Sendable { image: String, name: String?, locationName: String? = nil - ) async throws { + ) async throws -> VMDirectory { guard !image.isEmpty else { throw ValidationError("Image name cannot be empty") } @@ -652,7 +652,16 @@ class ImageContainerRegistry: @unchecked Sendable { // Use provided name or derive from image let vmName = name ?? image.split(separator: ":").first.map(String.init) ?? "" - let vmDir = try home.getVMDirectory(vmName, storage: locationName) + + // Determine if locationName is a direct path or a named storage location + let vmDir: VMDirectory + if let locationName = locationName, locationName.contains("/") || locationName.contains("\\") { + // Direct path + vmDir = try home.getVMDirectoryFromPath(vmName, storagePath: locationName) + } else { + // Named storage or default location + vmDir = try home.getVMDirectory(vmName, storage: locationName) + } // Optimize network early in the process optimizeNetworkSettings() @@ -991,6 +1000,7 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info( "Run 'lume run \(vmName)' to reduce the disk image file size by using macOS sparse file system" ) + return vmDir } // Helper function to clean up a specific cache entry @@ -3024,7 +3034,8 @@ class ImageContainerRegistry: @unchecked Sendable { // Replace original with optimized version try FileManager.default.removeItem(at: reassembledFile) - try FileManager.default.moveItem(at: optimizedFile, to: reassembledFile) + try FileManager.default.moveItem( + at: optimizedFile, to: reassembledFile) Logger.info("Using sparse-optimized file for verification") } else { Logger.info( diff --git a/libs/lume/src/FileSystem/Home.swift b/libs/lume/src/FileSystem/Home.swift index b8b4ae54..d83b39b0 100644 --- a/libs/lume/src/FileSystem/Home.swift +++ b/libs/lume/src/FileSystem/Home.swift @@ -92,6 +92,28 @@ final class Home { let baseDir = Path(location.expandedPath) return VMDirectory(baseDir.directory(name)) } + + /// Gets a VM directory from a direct file path + /// + /// - Parameters: + /// - name: Name of the VM directory + /// - storagePath: Direct file system path where the VM is located + /// - Returns: A VMDirectory instance + /// - Throws: HomeError if path is invalid + func getVMDirectoryFromPath(_ name: String, storagePath: String) throws -> VMDirectory { + let baseDir = Path(storagePath) + + // Create the directory if it doesn't exist + if !fileExists(at: storagePath) { + Logger.info("Creating storage directory", metadata: ["path": storagePath]) + try createVMLocation(at: storagePath) + } else if !isValidDirectory(at: storagePath) { + // Path exists but isn't a valid directory + throw HomeError.invalidHomeDirectory + } + + return VMDirectory(baseDir.directory(name)) + } /// Returns all initialized VM directories across all locations /// - Returns: An array of VMDirectory instances with location info diff --git a/libs/lume/src/FileSystem/VMDirectory.swift b/libs/lume/src/FileSystem/VMDirectory.swift index a902e34b..3335107d 100644 --- a/libs/lume/src/FileSystem/VMDirectory.swift +++ b/libs/lume/src/FileSystem/VMDirectory.swift @@ -8,7 +8,7 @@ import Foundation /// - Handling disk operations /// - Managing VM state and locking /// - Providing access to VM-related paths -struct VMDirectory { +struct VMDirectory: Sendable { // MARK: - Constants private enum FileNames { @@ -26,8 +26,6 @@ struct VMDirectory { let configPath: Path let sessionsPath: Path - private let fileManager: FileManager - /// The name of the VM directory var name: String { dir.name } @@ -36,10 +34,8 @@ struct VMDirectory { /// Creates a new VMDirectory instance /// - Parameters: /// - dir: The base directory path for the VM - /// - fileManager: FileManager instance to use for file operations - init(_ dir: Path, fileManager: FileManager = .default) { + init(_ dir: Path) { self.dir = dir - self.fileManager = fileManager self.nvramPath = dir.file(FileNames.nvram) self.diskPath = dir.file(FileNames.disk) self.configPath = dir.file(FileNames.config) @@ -52,7 +48,25 @@ struct VMDirectory { extension VMDirectory { /// Checks if the VM directory is fully initialized with all required files func initialized() -> Bool { - configPath.exists() && diskPath.exists() && nvramPath.exists() + // Add detailed logging for debugging + let configExists = configPath.exists() + let diskExists = diskPath.exists() + let nvramExists = nvramPath.exists() + + Logger.info( + "VM directory initialization check", + metadata: [ + "directory": dir.path, + "config_path": configPath.path, + "config_exists": "\(configExists)", + "disk_path": diskPath.path, + "disk_exists": "\(diskExists)", + "nvram_path": nvramPath.path, + "nvram_exists": "\(nvramExists)" + ] + ) + + return configExists && diskExists && nvramExists } /// Checks if the VM directory exists @@ -70,7 +84,7 @@ extension VMDirectory { func setDisk(_ size: UInt64) throws { do { if !diskPath.exists() { - guard fileManager.createFile(atPath: diskPath.path, contents: nil) else { + guard FileManager.default.createFile(atPath: diskPath.path, contents: nil) else { throw VMDirectoryError.fileCreationFailed(diskPath.path) } } @@ -96,7 +110,7 @@ extension VMDirectory { do { let data = try encoder.encode(config) - guard fileManager.createFile(atPath: configPath.path, contents: data) else { + guard FileManager.default.createFile(atPath: configPath.path, contents: data) else { throw VMDirectoryError.fileCreationFailed(configPath.path) } } catch { @@ -108,7 +122,7 @@ extension VMDirectory { /// - Returns: The loaded configuration /// - Throws: VMDirectoryError if the load operation fails func loadConfig() throws -> VMConfig { - guard let data = fileManager.contents(atPath: configPath.path) else { + guard let data = FileManager.default.contents(atPath: configPath.path) else { throw VMDirectoryError.configNotFound } @@ -137,7 +151,7 @@ extension VMDirectory { do { let data = try encoder.encode(session) - guard fileManager.createFile(atPath: sessionsPath.path, contents: data) else { + guard FileManager.default.createFile(atPath: sessionsPath.path, contents: data) else { throw VMDirectoryError.fileCreationFailed(sessionsPath.path) } } catch { @@ -149,7 +163,7 @@ extension VMDirectory { /// - Returns: The loaded VNC session /// - Throws: VMDirectoryError if the load operation fails func loadSession() throws -> VNCSession { - guard let data = fileManager.contents(atPath: sessionsPath.path) else { + guard let data = FileManager.default.contents(atPath: sessionsPath.path) else { throw VMDirectoryError.sessionNotFound } @@ -163,7 +177,7 @@ extension VMDirectory { /// Removes the VNC session information from disk func clearSession() { - try? fileManager.removeItem(atPath: sessionsPath.path) + try? FileManager.default.removeItem(atPath: sessionsPath.path) } } @@ -176,6 +190,6 @@ extension VMDirectory: CustomStringConvertible { extension VMDirectory { func delete() throws { - try fileManager.removeItem(atPath: dir.path) + try FileManager.default.removeItem(atPath: dir.path) } } diff --git a/libs/lume/src/LumeController.swift b/libs/lume/src/LumeController.swift index ecdcec49..03db4999 100644 --- a/libs/lume/src/LumeController.swift +++ b/libs/lume/src/LumeController.swift @@ -48,15 +48,72 @@ final class LumeController { /// Lists all virtual machines in the system @MainActor - public func list() throws -> [VMDetails] { + public func list(storage: String? = nil) throws -> [VMDetails] { do { - let vmLocations = try home.getAllVMDirectories() - let statuses = try vmLocations.map { vmWithLoc in - let vm = try self.get( - name: vmWithLoc.directory.name, storage: vmWithLoc.locationName) - return vm.details + if let storage = storage { + // If storage is specified, only return VMs from that location + if storage.contains("/") || storage.contains("\\") { + // Direct path - check if it exists + if !FileManager.default.fileExists(atPath: storage) { + // Return empty array if the path doesn't exist + return [] + } + + // Try to get all VMs from the specified path + // We need to check which subdirectories are valid VM dirs + let directoryURL = URL(fileURLWithPath: storage) + let contents = try FileManager.default.contentsOfDirectory( + at: directoryURL, + includingPropertiesForKeys: [.isDirectoryKey], + options: .skipsHiddenFiles + ) + + let statuses = try contents.compactMap { subdir -> VMDetails? in + guard let isDirectory = try subdir.resourceValues(forKeys: [.isDirectoryKey]).isDirectory, + isDirectory else { + return nil + } + + let vmName = subdir.lastPathComponent + // Check if it's a valid VM directory + let vmDir = try home.getVMDirectoryFromPath(vmName, storagePath: storage) + if !vmDir.initialized() { + return nil + } + + do { + let vm = try self.get(name: vmName, storage: storage) + return vm.details + } catch { + // Skip invalid VM directories + return nil + } + } + return statuses + } else { + // Named storage + let vmsWithLoc = try home.getAllVMDirectories() + let statuses = try vmsWithLoc.compactMap { vmWithLoc -> VMDetails? in + // Only include VMs from the specified location + if vmWithLoc.locationName != storage { + return nil + } + let vm = try self.get( + name: vmWithLoc.directory.name, storage: vmWithLoc.locationName) + return vm.details + } + return statuses + } + } else { + // No storage filter - get all VMs + let vmsWithLoc = try home.getAllVMDirectories() + let statuses = try vmsWithLoc.compactMap { vmWithLoc -> VMDetails? in + let vm = try self.get( + name: vmWithLoc.directory.name, storage: vmWithLoc.locationName) + return vm.details + } + return statuses } - return statuses } catch { Logger.error("Failed to list VMs", metadata: ["error": error.localizedDescription]) throw error @@ -133,20 +190,42 @@ final class LumeController { public func get(name: String, storage: String? = nil) throws -> VM { let normalizedName = normalizeVMName(name: name) do { - // Try to find the VM and get its actual location - let actualLocation = try self.validateVMExists( - normalizedName, storage: storage) + let vm: VM + if let storagePath = storage, storagePath.contains("/") || storagePath.contains("\\") { + // Storage is a direct path + let vmDir = try home.getVMDirectoryFromPath(normalizedName, storagePath: storagePath) + guard vmDir.initialized() else { + // Throw a specific error if the directory exists but isn't a valid VM + if vmDir.exists() { + throw VMError.notInitialized(normalizedName) + } else { + throw VMError.notFound(normalizedName) + } + } + // Pass the path as the storage context + vm = try self.loadVM(vmDir: vmDir, storage: storagePath) + } else { + // Storage is nil or a named location + let actualLocation = try self.validateVMExists( + normalizedName, storage: storage) - // Load the VM from its actual location - let vm = try self.loadVM(name: normalizedName, storage: actualLocation) + let vmDir = try home.getVMDirectory(normalizedName, storage: actualLocation) + // loadVM will re-check initialized, but good practice to keep validateVMExists result. + vm = try self.loadVM(vmDir: vmDir, storage: actualLocation) + } return vm } catch { - Logger.error("Failed to get VM", metadata: ["error": error.localizedDescription]) + Logger.error( + "Failed to get VM", + metadata: [ + "vmName": normalizedName, "storage": storage ?? "default", + "error": error.localizedDescription, + ]) + // Re-throw the original error to preserve its type throw error } } - /// Factory for creating the appropriate VM type based on the OS @MainActor public func create( name: String, @@ -329,58 +408,84 @@ final class LumeController { "Running VM", metadata: [ "name": normalizedName, - "location": storage ?? "default", "no_display": "\(noDisplay)", "shared_directories": "\(sharedDirectories.map( { $0.string } ).joined(separator: ", "))", "mount": mount?.path ?? "none", "vnc_port": "\(vncPort)", "recovery_mode": "\(recoveryMode)", - "storage_param": storage ?? "default", + "storage_param": storage ?? "default", // Log the original param "usb_storage_devices": "\(usbMassStoragePaths?.count ?? 0)", ]) do { - // Check if this is an image reference (contains a tag) - let components = name.split(separator: ":") - if components.count == 2 { - do { - _ = try self.validateVMExists(normalizedName, storage: storage) - } catch { - // If the VM doesn't exist, try to pull the image + // Check if name is an image ref to auto-pull + let components = normalizedName.split(separator: ":") + if components.count == 2 { // Check if it looks like image:tag + // Attempt to validate if VM exists first, suppressing the error + // This avoids pulling if the VM already exists, even if name looks like an image ref + let vmExists = (try? self.validateVMExists(normalizedName, storage: storage)) != nil + if !vmExists { + Logger.info( + "VM not found, attempting to pull image based on name", + metadata: ["imageRef": normalizedName]) + // Use the potentially new VM name derived from the image ref + let potentialVMName = String(components[0]) try await pullImage( - image: name, - name: nil, + image: normalizedName, // Full image ref + name: potentialVMName, // Name derived from image registry: registry, organization: organization, storage: storage ) + // Important: After pull, the effective name might have changed + // We proceed assuming the user wants to run the VM derived from image name + // normalizedName = potentialVMName // Re-assign normalizedName if pull logic creates it + // Note: Current pullImage doesn't return the final VM name, + // so we assume it matches the name derived from the image. + // This might need refinement if pullImage behaviour changes. } } - // Find VM and get its actual location - let actualLocation = try validateVMExists(normalizedName, storage: storage) + // Determine effective storage path or name AND get the VMDirectory + let effectiveStorage: String? + let vmDir: VMDirectory - // Log if we found the VM in a different location than default - if actualLocation != storage && actualLocation != nil { + if let storagePath = storage, storagePath.contains("/") || storagePath.contains("\\") { + // Storage is a direct path + vmDir = try home.getVMDirectoryFromPath(normalizedName, storagePath: storagePath) + guard vmDir.initialized() else { + if vmDir.exists() { + throw VMError.notInitialized(normalizedName) + } else { + throw VMError.notFound(normalizedName) + } + } + effectiveStorage = storagePath // Use the path string + Logger.info("Using direct storage path", metadata: ["path": storagePath]) + } else { + // Storage is nil or a named location - validate and get the actual name + let actualLocationName = try validateVMExists(normalizedName, storage: storage) + vmDir = try home.getVMDirectory(normalizedName, storage: actualLocationName) // Get VMDir for named location + effectiveStorage = actualLocationName // Use the named location string Logger.info( - "Found VM in location", + "Using named storage location", metadata: [ - "name": normalizedName, - "location": actualLocation ?? "default", + "requested": storage ?? "default", + "actual": actualLocationName ?? "default", ]) } + // Validate parameters using the located VMDirectory try validateRunParameters( - name: normalizedName, + vmDir: vmDir, // Pass vmDir sharedDirectories: sharedDirectories, mount: mount, - storage: actualLocation, usbMassStoragePaths: usbMassStoragePaths ) - // Use the actual VM location that we found - let vm = try get(name: normalizedName, storage: actualLocation) + // Load the VM directly using the located VMDirectory and storage context + let vm = try self.loadVM(vmDir: vmDir, storage: effectiveStorage) SharedVM.shared.setVM(name: normalizedName, vm: vm) try await vm.run( @@ -488,7 +593,7 @@ final class LumeController { let imageContainerRegistry = ImageContainerRegistry( registry: registry, organization: organization) - try await imageContainerRegistry.pull( + let _ = try await imageContainerRegistry.pull( image: actualImage, name: vmName, locationName: storage) @@ -752,15 +857,17 @@ final class LumeController { } @MainActor - private func loadVM(name: String, storage: String? = nil) throws -> VM { - let vmDir = try home.getVMDirectory(name, storage: storage) + private func loadVM(vmDir: VMDirectory, storage: String?) throws -> VM { + // vmDir is now passed directly guard vmDir.initialized() else { - throw VMError.notInitialized(name) + throw VMError.notInitialized(vmDir.name) // Use name from vmDir } let config: VMConfig = try vmDir.loadConfig() + // Pass the provided storage (which could be a path or named location) let vmDirContext = VMDirContext( - dir: vmDir, config: config, home: home, storage: storage) + dir: vmDir, config: config, home: home, storage: storage + ) let imageLoader = config.os.lowercased() == "macos" ? imageLoaderFactory.createImageLoader() : nil @@ -808,11 +915,22 @@ final class LumeController { public func validateVMExists(_ name: String, storage: String? = nil) throws -> String? { // If location is specified, only check that location if let storage = storage { - let vmDir = try home.getVMDirectory(name, storage: storage) - guard vmDir.initialized() else { - throw VMError.notFound(name) + // Check if storage is a path by looking for directory separator + if storage.contains("/") || storage.contains("\\") { + // Treat as direct path + let vmDir = try home.getVMDirectoryFromPath(name, storagePath: storage) + guard vmDir.initialized() else { + throw VMError.notFound(name) + } + return storage // Return the path as the location identifier + } else { + // Treat as named storage + let vmDir = try home.getVMDirectory(name, storage: storage) + guard vmDir.initialized() else { + throw VMError.notFound(name) + } + return storage } - return storage } // If no location specified, try to find the VM in any location @@ -826,6 +944,51 @@ final class LumeController { throw VMError.notFound(name) } + private func validateRunParameters( + vmDir: VMDirectory, // Changed signature: accept VMDirectory + sharedDirectories: [SharedDirectory]?, + mount: Path?, + usbMassStoragePaths: [Path]? = nil + ) throws { + // VM existence is confirmed by having vmDir, no need for validateVMExists + if let dirs = sharedDirectories { + try self.validateSharedDirectories(dirs) + } + + // Validate USB mass storage paths + if let usbPaths = usbMassStoragePaths { + for path in usbPaths { + if !FileManager.default.fileExists(atPath: path.path) { + throw ValidationError("USB mass storage image not found: \(path.path)") + } + } + + if #available(macOS 15.0, *) { + // USB mass storage is supported + } else { + Logger.info( + "USB mass storage devices require macOS 15.0 or later. They will be ignored.") + } + } + + // Load config directly from vmDir + let vmConfig = try vmDir.loadConfig() + switch vmConfig.os.lowercased() { + case "macos": + if mount != nil { + throw ValidationError( + "Mounting disk images is not supported for macOS VMs. If you are looking to mount a IPSW, please use the --ipsw option in the create command." + ) + } + case "linux": + if let mount = mount, !FileManager.default.fileExists(atPath: mount.path) { + throw ValidationError("Mount file not found: \(mount.path)") + } + default: + break + } + } + private func validatePullParameters( image: String, name: String, @@ -846,51 +1009,31 @@ final class LumeController { throw ValidationError("Organization cannot be empty") } - let vmDir = try home.getVMDirectory(name, storage: storage) - if vmDir.exists() { - throw VMError.alreadyExists(name) - } - } - - private func validateRunParameters( - name: String, sharedDirectories: [SharedDirectory]?, mount: Path?, - storage: String? = nil, usbMassStoragePaths: [Path]? = nil - ) throws { - _ = try self.validateVMExists(name, storage: storage) - if let dirs = sharedDirectories { - try self.validateSharedDirectories(dirs) - } - - // Validate USB mass storage paths - if let usbPaths = usbMassStoragePaths { - for path in usbPaths { - if !FileManager.default.fileExists(atPath: path.path) { - throw ValidationError("USB mass storage image not found: \(path.path)") + // Determine if storage is a path or a named storage location + let vmDir: VMDirectory + if let storage = storage, storage.contains("/") || storage.contains("\\") { + // Create the base directory if it doesn't exist + if !FileManager.default.fileExists(atPath: storage) { + Logger.info("Creating VM storage directory", metadata: ["path": storage]) + do { + try FileManager.default.createDirectory( + atPath: storage, + withIntermediateDirectories: true + ) + } catch { + throw HomeError.directoryCreationFailed(path: storage) } } - - if #available(macOS 15.0, *) { - // USB mass storage is supported - } else { - Logger.info( - "USB mass storage devices require macOS 15.0 or later. They will be ignored.") - } + + // Use getVMDirectoryFromPath for direct paths + vmDir = try home.getVMDirectoryFromPath(name, storagePath: storage) + } else { + // Use getVMDirectory for named storage locations + vmDir = try home.getVMDirectory(name, storage: storage) } - - let vmConfig = try home.getVMDirectory(name, storage: storage).loadConfig() - switch vmConfig.os.lowercased() { - case "macos": - if mount != nil { - throw ValidationError( - "Mounting disk images is not supported for macOS VMs. If you are looking to mount a IPSW, please use the --ipsw option in the create command." - ) - } - case "linux": - if let mount = mount, !FileManager.default.fileExists(atPath: mount.path) { - throw ValidationError("Mount file not found: \(mount.path)") - } - default: - break + + if vmDir.exists() { + throw VMError.alreadyExists(name) } } diff --git a/libs/lume/src/Server/Handlers.swift b/libs/lume/src/Server/Handlers.swift index c968359a..bf289350 100644 --- a/libs/lume/src/Server/Handlers.swift +++ b/libs/lume/src/Server/Handlers.swift @@ -6,10 +6,10 @@ import Virtualization extension Server { // MARK: - VM Management Handlers - func handleListVMs() async throws -> HTTPResponse { + func handleListVMs(storage: String? = nil) async throws -> HTTPResponse { do { let vmController = LumeController() - let vms = try vmController.list() + let vms = try vmController.list(storage: storage) return try .json(vms) } catch { return .badRequest(message: error.localizedDescription) diff --git a/libs/lume/src/Server/Requests.swift b/libs/lume/src/Server/Requests.swift index da0bf681..5cde19d2 100644 --- a/libs/lume/src/Server/Requests.swift +++ b/libs/lume/src/Server/Requests.swift @@ -109,7 +109,7 @@ struct PushRequest: Codable { let tags: [String] // List of tags to push var registry: String // Registry URL var organization: String // Organization/user in the registry - let storage: String? // Optional VM storage location + let storage: String? // Optional VM storage location or direct path var chunkSizeMb: Int // Chunk size // dryRun and reassemble are less common for API, default to false? // verbose is usually handled by server logging diff --git a/libs/lume/src/Server/Server.swift b/libs/lume/src/Server/Server.swift index 71db4a75..782efa70 100644 --- a/libs/lume/src/Server/Server.swift +++ b/libs/lume/src/Server/Server.swift @@ -79,9 +79,11 @@ final class Server { routes = [ Route( method: "GET", path: "/lume/vms", - handler: { [weak self] _ in + handler: { [weak self] request in guard let self else { throw HTTPError.internalError } - return try await self.handleListVMs() + // Extract storage from query params if present + let storage = self.extractQueryParam(request: request, name: "storage") + return try await self.handleListVMs(storage: storage) }), Route( method: "GET", path: "/lume/vms/:name", @@ -177,8 +179,21 @@ final class Server { return HTTPResponse(statusCode: .badRequest, body: "Missing VM name") } - // Extract storage from query params if present - let storage = self.extractQueryParam(request: request, name: "storage") + Logger.info("Processing stop VM request", metadata: ["method": request.method, "path": request.path]) + + // Extract storage from the request body + var storage: String? = nil + if let bodyData = request.body, !bodyData.isEmpty { + do { + if let json = try JSONSerialization.jsonObject(with: bodyData) as? [String: Any], + let bodyStorage = json["storage"] as? String { + storage = bodyStorage + Logger.info("Extracted storage from request body", metadata: ["storage": bodyStorage]) + } + } catch { + Logger.error("Failed to parse request body JSON", metadata: ["error": error.localizedDescription]) + } + } return try await self.handleStopVM(name: name, storage: storage) }), diff --git a/libs/lumier/.dockerignore b/libs/lumier/.dockerignore new file mode 100644 index 00000000..3e0f9c98 --- /dev/null +++ b/libs/lumier/.dockerignore @@ -0,0 +1,24 @@ +# Ignore macOS system files and trash +.DS_Store +.Trashes +**/.Trashes +**/.* + +# Ignore Python cache +__pycache__/ +*.pyc +*.pyo + +# Ignore virtual environments +.venv/ +venv/ + +# Ignore editor/project files +.vscode/ +.idea/ +*.swp + +# Ignore test artifacts +test-results/ + +# Ignore anything else you don't want in the Docker build context diff --git a/libs/lumier/Dockerfile b/libs/lumier/Dockerfile new file mode 100644 index 00000000..710eb80b --- /dev/null +++ b/libs/lumier/Dockerfile @@ -0,0 +1,74 @@ +# Base image using Debian for arm64 architecture (optimized for Apple Silicon) +FROM debian:bullseye-slim AS lumier-base + +# Set environment variables for Lume API server configuration +ENV LUME_API_HOST="host.docker.internal" +ENV LUME_API_PORT="8080" + +# Default VM configuration (can be overridden at runtime) +ENV VERSION="ghcr.io/trycua/macos-sequoia-vanilla:latest" +ENV RAM_SIZE="8192" +ENV CPU_CORES="4" +ENV DISK_SIZE="100" +ENV DISPLAY="1024x768" +ENV VM_NAME="lumier" +ENV HOST_DATA_PATH="" +ENV LUMIER_DEBUG="0" + +# Install necessary tools and noVNC dependencies +RUN apt-get update && \ + apt-get install -y \ + netcat-traditional \ + curl \ + sshpass \ + wget \ + unzip \ + git \ + python3 \ + python3-pip \ + python3-numpy \ + procps && \ + rm -rf /var/lib/apt/lists/* + +# Add a dummy environment variable to invalidate cache +ENV CACHEBUST=1 + +# Download and install noVNC without caching +RUN wget https://github.com/trycua/noVNC/archive/refs/heads/master.zip -O master1.zip && \ + unzip master1.zip && \ + mv noVNC-master /opt/noVNC && \ + rm master1.zip + +# Set environment variables for noVNC +ENV NOVNC_PATH="/opt/noVNC" + +# Create directory structure +RUN mkdir -p /run/bin /run/lib /run/config /run/hooks + +# Copy scripts to the container +COPY src/bin/tunnel.sh /run/bin/ +COPY src/bin/tunnel-script.sh /usr/local/bin/lume +COPY src/bin/tunnel-script.sh /usr/local/bin/sshpass +COPY src/config/constants.sh /run/config/ +COPY src/bin/entry.sh /run/bin/entry.sh + +# Copy library files if they exist +COPY src/lib/ /run/lib/ +COPY src/hooks/ /run/hooks/ + +# Make scripts executable +RUN chmod +x /usr/local/bin/lume \ + /usr/local/bin/sshpass \ + /run/bin/* \ + /run/hooks/* 2>/dev/null || true + +# Expose ports for noVNC and Lume API +EXPOSE 8080 +EXPOSE 8006 + +# VOLUME setup +VOLUME [ "/storage" ] +VOLUME [ "/data" ] + +# Default entrypoint +ENTRYPOINT ["/run/bin/entry.sh"] \ No newline at end of file diff --git a/libs/lumier/README.md b/libs/lumier/README.md new file mode 100644 index 00000000..65803e39 --- /dev/null +++ b/libs/lumier/README.md @@ -0,0 +1,175 @@ +
+

+
+ + + + Shows my svg + +
+ + [![Swift 6](https://img.shields.io/badge/Swift_6-F54A2A?logo=swift&logoColor=white&labelColor=F54A2A)](#) + [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) + [![Homebrew](https://img.shields.io/badge/Homebrew-FBB040?logo=homebrew&logoColor=fff)](#install) + [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85) +

+
+ +**Lumier** provides a Docker-based interface for the `lume` CLI, allowing you to easily run macOS virtual machines inside a container with VNC access. It creates a secure tunnel to execute lume commands on your host machine while providing a containerized environment for your applications. + +## Requirements + +Before using Lumier, make sure you have: + +1. Install [lume](https://github.com/trycua/cua/blob/main/libs/lume/README.md) on your host machine +2. Docker installed on your host machine +3. `socat` installed for the tunnel (install with Homebrew: `brew install socat`) + +## Installation + +You can use Lumier directly from its directory or install it to your system: + +```bash +# Option 1: Install to your user's bin directory (recommended) +./install.sh + +# Option 2: Install to a custom directory +./install.sh --install-dir=/usr/local/bin # May require sudo + +# Option 3: View installation options +./install.sh --help +``` + +After installation, you can run `lumier` from anywhere in your terminal. + +If you get a "command not found" error, make sure the installation directory is in your PATH. The installer will warn you if it isn't and provide instructions to add it. + +## Usage + +There are two ways to use Lumier: with the provided script or directly with Docker. + +### Option 1: Using the Lumier Script + +Lumier provides a simple CLI interface to manage VMs in Docker with full Docker compatibility: + +```bash +# Show help and available commands +lumier help + +# Start the tunnel to connect to lume +lumier start + +# Check if the tunnel is running +lumier status + +# Stop the tunnel +lumier stop + +# Build the Docker image (optional, happens automatically on first run) +lumier build + +# Run a VM with default settings +lumier run -it --rm + +# Run a VM with custom settings using Docker's -e flag +lumier run -it --rm \ + --name lumier-vm \ + -p 8006:8006 \ + -v $(pwd)/storage:/storage \ + -v $(pwd)/shared:/data \ + -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \ + -e CPU_CORES=4 \ + -e RAM_SIZE=8192 + +# Note: +# The lumier script now automatically detects the real host paths for ./storage and ./shared +# and passes them to the container as HOST_STORAGE_PATH and HOST_DATA_PATH. +# You do NOT need to specify these environment variables manually. +# The VM name is always set from the container name. +``` + +### Option 2: Using Docker Directly + +You can also use Docker commands directly without the lumier utility: + +```bash +# 1. Start the tunnel manually +cd libs/lumier +socat TCP-LISTEN:8080,reuseaddr,fork EXEC:"$PWD/src/bin/tunnel.sh" & +TUNNEL_PID=$! + +# 2. Build the Docker image +docker build -t lumier:latest . + +# 3. Run the container +docker run -it --rm \ + --name lumier-vm \ + -p 8006:8006 \ + -v $(pwd)/storage:/storage \ + -v $(pwd)/shared:/data \ + -e VM_NAME=lumier-vm \ + -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \ + -e CPU_CORES=4 \ + -e RAM_SIZE=8192 \ + -e HOST_STORAGE_PATH=$(pwd)/storage \ + -e HOST_DATA_PATH=$(pwd)/shared \ + lumier:latest + +# 4. Stop the tunnel when you're done +kill $TUNNEL_PID + +# Alternatively, find and kill the tunnel process +# First, find the process +lsof -i TCP:8080 +# Then kill it by PID +kill +``` + +Note that when using Docker directly, you're responsible for: +- Starting and managing the tunnel +- Building the Docker image +- Providing the correct environment variables + +## Available Environment Variables + +These variables can be set using Docker's `-e` flag: + +- `VM_NAME`: Set the VM name (default: lumier) +- `VERSION`: Set the VM image (default: ghcr.io/trycua/macos-sequoia-vanilla:latest) +- `CPU_CORES`: Set the number of CPU cores (default: 4) +- `RAM_SIZE`: Set the memory size in MB (default: 8192) +- `DISPLAY`: Set the display resolution (default: 1024x768) +- `HOST_DATA_PATH`: Path on the host to share with the VM +- `LUMIER_DEBUG`: Enable debug mode (set to 1) + +## Project Structure + +The project is organized as follows: + +``` +lumier/ +├── Dockerfile # Main Docker image definition +├── README.md # This file +├── lumier # Main CLI script +├── install.sh # Installation script +├── src/ # Source code +│ ├── bin/ # Executable scripts +│ │ ├── entry.sh # Docker entrypoint +│ │ ├── server.sh # Tunnel server manager +│ │ └── tunnel.sh # Tunnel request handler +│ ├── config/ # Configuration +│ │ └── constants.sh # Shared constants +│ ├── hooks/ # Lifecycle hooks +│ │ └── on-logon.sh # Run after VM boots +│ └── lib/ # Shared library code +│ ├── utils.sh # Utility functions +│ └── vm.sh # VM management functions +└── mount/ # Default shared directory +``` + +## VNC Access + +When a VM is running, you can access it via VNC through: +http://localhost:8006/vnc.html + +The password is displayed in the console output when the VM starts. \ No newline at end of file diff --git a/libs/lumier/install.sh b/libs/lumier/install.sh new file mode 100755 index 00000000..bd9e3b6b --- /dev/null +++ b/libs/lumier/install.sh @@ -0,0 +1,176 @@ +#!/bin/bash +set -e + +# Lumier Installer +# This script installs Lumier to your system + +# Define colors for output +BOLD=$(tput bold) +NORMAL=$(tput sgr0) +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +BLUE=$(tput setaf 4) +YELLOW=$(tput setaf 3) + +# Default installation directory (user-specific, doesn't require sudo) +DEFAULT_INSTALL_DIR="$HOME/.local/bin" +INSTALL_DIR="${INSTALL_DIR:-$DEFAULT_INSTALL_DIR}" + +# Script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Parse command line arguments +while [ "$#" -gt 0 ]; do + case "$1" in + --install-dir=*) + INSTALL_DIR="${1#*=}" + ;; + --help) + echo "${BOLD}${BLUE}Lumier Installer${NORMAL}" + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --install-dir=DIR Install to the specified directory (default: $DEFAULT_INSTALL_DIR)" + echo " --help Display this help message" + echo "" + echo "Examples:" + echo " $0 # Install to $DEFAULT_INSTALL_DIR" + echo " $0 --install-dir=/usr/local/bin # Install to system directory (may require root privileges)" + echo " INSTALL_DIR=/opt/lumier $0 # Install to /opt/lumier (legacy env var support)" + exit 0 + ;; + *) + echo "${RED}Unknown option: $1${NORMAL}" + echo "Use --help for usage information" + exit 1 + ;; + esac + shift +done + +echo "${BOLD}${BLUE}Lumier Installer${NORMAL}" +echo "This script will install Lumier to your system." + +# Check if we're running with appropriate permissions +check_permissions() { + # System directories that typically require root privileges + SYSTEM_DIRS=("/usr/local/bin" "/usr/bin" "/bin" "/opt") + + NEEDS_ROOT=false + for DIR in "${SYSTEM_DIRS[@]}"; do + if [[ "$INSTALL_DIR" == "$DIR"* ]] && [ ! -w "$INSTALL_DIR" ]; then + NEEDS_ROOT=true + break + fi + done + + if [ "$NEEDS_ROOT" = true ]; then + echo "${YELLOW}Warning: Installing to $INSTALL_DIR may require root privileges.${NORMAL}" + echo "Consider these alternatives:" + echo " • Install to a user-writable location: $0 --install-dir=$HOME/.local/bin" + echo " • Create the directory with correct permissions first:" + echo " sudo mkdir -p $INSTALL_DIR && sudo chown $(whoami) $INSTALL_DIR" + echo "" + + # Check if we already have write permission (might have been set up previously) + if [ ! -w "$INSTALL_DIR" ] && [ ! -w "$(dirname "$INSTALL_DIR")" ]; then + echo "${RED}Error: You don't have write permission to $INSTALL_DIR${NORMAL}" + echo "Please choose a different installation directory or ensure you have the proper permissions." + exit 1 + fi + fi +} + +# Detect OS and architecture +detect_platform() { + OS=$(uname -s | tr '[:upper:]' '[:lower:]') + ARCH=$(uname -m) + + if [ "$OS" != "darwin" ]; then + echo "${RED}Error: Currently only macOS is supported.${NORMAL}" + exit 1 + fi + + if [ "$ARCH" != "arm64" ]; then + echo "${RED}Error: Lumier only supports macOS on Apple Silicon (ARM64).${NORMAL}" + exit 1 + fi + + PLATFORM="darwin-arm64" + echo "Detected platform: ${BOLD}$PLATFORM${NORMAL}" +} + +# Check dependencies +check_dependencies() { + echo "Checking dependencies..." + + # Check if lume is installed + if ! command -v lume &> /dev/null; then + echo "${RED}Error: Lume is required but not installed.${NORMAL}" + echo "Please install Lume first: https://github.com/trycua/cua/blob/main/libs/lume/README.md" + exit 1 + fi + + # Check if socat is installed + if ! command -v socat &> /dev/null; then + echo "${YELLOW}Warning: socat is required but not installed.${NORMAL}" + echo "Installing socat with Homebrew..." + + # Check if Homebrew is installed + if ! command -v brew &> /dev/null; then + echo "${RED}Error: Homebrew is required to install socat.${NORMAL}" + echo "Please install Homebrew first: https://brew.sh/" + echo "Or install socat manually, then run this script again." + exit 1 + fi + + # Install socat + brew install socat + fi + + # Check if Docker is installed + if ! command -v docker &> /dev/null; then + echo "${YELLOW}Warning: Docker is required but not installed.${NORMAL}" + echo "Please install Docker: https://docs.docker.com/get-docker/" + echo "Continuing with installation, but Lumier will not work without Docker." + fi + + echo "${GREEN}All dependencies are satisfied.${NORMAL}" +} + +# Copy the lumier script directly +copy_lumier() { + echo "Copying lumier script to $INSTALL_DIR..." + cp "$SCRIPT_DIR/lumier" "$INSTALL_DIR/lumier" + chmod +x "$INSTALL_DIR/lumier" +} + +# Main installation flow +main() { + check_permissions + detect_platform + check_dependencies + + echo "Installing Lumier to $INSTALL_DIR..." + + # Create install directory if it doesn't exist + mkdir -p "$INSTALL_DIR" + + # Copy the lumier script + copy_lumier + + echo "${GREEN}Installation complete!${NORMAL}" + echo "Lumier has been installed to ${BOLD}$INSTALL_DIR/lumier${NORMAL}" + + # Check if the installation directory is in PATH + if [[ ":$PATH:" != *":$INSTALL_DIR:"* ]]; then + echo "${YELLOW}Warning: $INSTALL_DIR is not in your PATH.${NORMAL}" + echo "To add it, run one of these commands based on your shell:" + echo " For bash: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile" + echo " For zsh: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zshrc" + echo " For fish: echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish" + fi +} + +# Run the installation +main \ No newline at end of file diff --git a/libs/lumier/lumier b/libs/lumier/lumier new file mode 100755 index 00000000..1c3912cb --- /dev/null +++ b/libs/lumier/lumier @@ -0,0 +1,200 @@ +#!/usr/bin/env bash + +# Exit on errors, undefined variables, and propagate errors in pipes +set -eo pipefail + +# Always use the current working directory as the build context +SCRIPT_DIR="$(pwd)" +PORT=8080 +DEBUG=${LUMIER_DEBUG:-0} + +usage() { + cat </dev/null | grep LISTEN > /dev/null; then + return 0 # Tunnel is active + else + return 1 # Tunnel is not active + fi +} + +# Start the tunnel if needed +ensure_tunnel() { + if ! is_tunnel_active; then + echo "Tunnel is not active. Starting tunnel..." + "$SCRIPT_DIR/src/bin/server.sh" start + sleep 2 # Wait for the tunnel to start + + if ! is_tunnel_active; then + echo "Failed to start tunnel. Make sure 'lume' is installed on your host." + exit 1 + fi + else + echo "Tunnel is already active." + fi +} + +# Build the Docker image with cache busting +build_image() { + local image_name="${LUMIER_IMAGE:-lumier:latest}" + echo "Building Lumier Docker image: $image_name" + echo "SCRIPT_DIR=$SCRIPT_DIR" + echo "Checking for Dockerfile at: $SCRIPT_DIR/Dockerfile" + ls -l "$SCRIPT_DIR/Dockerfile" || echo "Dockerfile not found at $SCRIPT_DIR/Dockerfile" + + # Pass any additional arguments to docker build with cache busting + docker build --build-arg CACHEBUST=$(date +%s) -t "$image_name" "$SCRIPT_DIR" "$@" + + echo "Lumier image built successfully: $image_name" +} + +# Run the Docker container +run_container() { + local image_name="${LUMIER_IMAGE:-lumier:latest}" + + # Ensure the Docker image exists + if ! docker image inspect "$image_name" &>/dev/null; then + echo "Docker image '$image_name' not found. Building it..." + build_image + fi + + # Ensure the tunnel is running + ensure_tunnel + + # Automatically resolve and pass host paths for storage and data + STORAGE_PATH="${HOST_STORAGE_PATH:-$(realpath ./storage)}" + DATA_PATH="${HOST_DATA_PATH:-$(realpath ./shared)}" + + # Only add -e if not already present in args + DOCKER_ARGS=( ) + add_env_var() { + local var="$1"; local val="$2"; local flag="-e $var=" + for arg in "$@"; do + [[ "$arg" == *"$flag"* ]] && return 0 + done + DOCKER_ARGS+=( -e "$var=$val" ) + } + add_env_var HOST_STORAGE_PATH "$STORAGE_PATH" + add_env_var HOST_DATA_PATH "$DATA_PATH" + + # Detect --name argument and set VM_NAME if not already present + local container_name="" + local prev_arg="" + for arg in "$@"; do + if [[ "$prev_arg" == "--name" ]]; then + container_name="$arg" + break + elif [[ "$arg" == --name=* ]]; then + container_name="${arg#--name=}" + break + fi + prev_arg="$arg" + done + # Only add -e VM_NAME if not already present and container_name is set + local vm_name_set=false + for arg in "$@"; do + if [[ "$arg" == "-e" ]] && [[ "$2" == VM_NAME=* ]]; then + vm_name_set=true + break + elif [[ "$arg" == "-eVM_NAME="* ]]; then + vm_name_set=true + break + elif [[ "$arg" == "-e"* ]] && [[ "$arg" == *"VM_NAME="* ]]; then + vm_name_set=true + break + fi + done + if [[ -n "$container_name" && "$vm_name_set" != true ]]; then + DOCKER_ARGS+=( -e "VM_NAME=$container_name" ) + fi + + echo "Running Lumier container with image: $image_name" + if [[ "$*" == *"-p 8006:8006"* || "$*" == *"-p"*"8006:8006"* ]]; then + docker run "${DOCKER_ARGS[@]}" "$@" "$image_name" + else + docker run "${DOCKER_ARGS[@]}" -p 8006:8006 "$@" "$image_name" + fi +} + +# Main command handling +case "${1:-help}" in + run) + shift + run_container "$@" + ;; + tunnel) + # Handle tunnel subcommands + case "${2:-}" in + start) + "$SCRIPT_DIR/src/bin/server.sh" start + ;; + stop) + "$SCRIPT_DIR/src/bin/server.sh" stop + ;; + status) + "$SCRIPT_DIR/src/bin/server.sh" status + ;; + *) + echo "Unknown tunnel subcommand: $2" + usage + exit 1 + ;; + esac + ;; + + build) + shift + build_image "$@" + ;; + help) + usage + ;; + *) + echo "Unknown command: $1" + usage + exit 1 + ;; +esac \ No newline at end of file diff --git a/libs/lumier/mount/server.py b/libs/lumier/mount/server.py new file mode 100644 index 00000000..464c26ad --- /dev/null +++ b/libs/lumier/mount/server.py @@ -0,0 +1,10 @@ +from flask import Flask + +app = Flask(__name__) + +@app.route('/') +def hello_world(): + return 'Hello, World, from VM!' + +if __name__ == '__main__': + app.run(debug=True, host="0.0.0.0", port=5001) \ No newline at end of file diff --git a/libs/lumier/mount/setup.sh b/libs/lumier/mount/setup.sh new file mode 100755 index 00000000..8897896e --- /dev/null +++ b/libs/lumier/mount/setup.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +echo "Creating helloworld.txt on the Desktop..." +if [ ! -f ~/Desktop/helloworld.txt ]; then + echo "Hello, World!" > ~/Desktop/helloworld.txt + echo "helloworld.txt created successfully." +else + echo "helloworld.txt already exists." +fi \ No newline at end of file diff --git a/libs/lumier/src/bin/entry.sh b/libs/lumier/src/bin/entry.sh new file mode 100755 index 00000000..66a375ad --- /dev/null +++ b/libs/lumier/src/bin/entry.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash + +# Exit on errors, undefined variables, and propagate errors in pipes +set -euo pipefail + +# Source configuration files +CONFIG_DIR="/run/config" +LIB_DIR="/run/lib" + +# Source constants if available +if [ -f "${CONFIG_DIR}/constants.sh" ]; then + source "${CONFIG_DIR}/constants.sh" +fi + +# Import utilities +for lib in "${LIB_DIR}"/*.sh; do + if [ -f "$lib" ]; then + source "$lib" + fi +done + +# Set VM_NAME to env or fallback to container name (from --name) +if [ -z "${VM_NAME:-}" ]; then + VM_NAME="$(cat /etc/hostname)" + export VM_NAME +fi + +# Set HOST_STORAGE_PATH to /storage/$VM_NAME if not set +if [ -z "${HOST_STORAGE_PATH:-}" ]; then + HOST_STORAGE_PATH="/storage/$VM_NAME" + export HOST_STORAGE_PATH +fi + +# Optionally check for mountpoints +if mountpoint -q /storage; then + echo "/storage is mounted" +fi +if mountpoint -q /data; then + echo "/data is mounted" +fi + +# Log startup info +echo "Lumier VM is starting..." + +# Cleanup function to ensure VM and noVNC proxy shutdown on container stop +cleanup() { + set +e # Don't exit on error in cleanup + echo "[cleanup] Caught signal, shutting down..." + echo "[cleanup] Stopping VM..." + stop_vm + # Now gently stop noVNC proxy if running + # if [ -n "${NOVNC_PID:-}" ] && kill -0 "$NOVNC_PID" 2>/dev/null; then + # echo "[cleanup] Stopping noVNC proxy (PID $NOVNC_PID)..." + # kill -TERM "$NOVNC_PID" + # # Wait up to 5s for noVNC to exit + # for i in {1..5}; do + # if ! kill -0 "$NOVNC_PID" 2>/dev/null; then + # echo "[cleanup] noVNC proxy stopped." + # break + # fi + # sleep 1 + # done + # # Escalate if still running + # if kill -0 "$NOVNC_PID" 2>/dev/null; then + # echo "[cleanup] noVNC proxy did not exit, killing..." + # kill -KILL "$NOVNC_PID" 2>/dev/null + # fi + # fi + echo "[cleanup] Done. Exiting." + exit 0 +} +trap cleanup SIGTERM SIGINT + +# Start the VM +start_vm + +# Start noVNC for VNC access +NOVNC_PID="" +if [ -n "${VNC_PORT:-}" ] && [ -n "${VNC_PASSWORD:-}" ]; then + echo "Starting noVNC proxy with optimized color settings..." + ${NOVNC_PATH}/utils/novnc_proxy --vnc host.docker.internal:${VNC_PORT} --listen 8006 --web ${NOVNC_PATH} > /dev/null 2>&1 & + NOVNC_PID=$! + disown $NOVNC_PID + echo "noVNC interface available at: http://localhost:8006/vnc.html?password=${VNC_PASSWORD}&autoconnect=true&logging=debug" +fi + +# Run any post-startup hooks +if [ -d "/run/hooks" ]; then + for hook in /run/hooks/*; do + if [ -x "$hook" ]; then + echo "Running hook: $(basename "$hook")" + "$hook" + fi + done +fi + +echo "Lumier is running. Press Ctrl+C to stop." +tail -f /dev/null \ No newline at end of file diff --git a/libs/lumier/src/bin/server.sh b/libs/lumier/src/bin/server.sh new file mode 100755 index 00000000..5849d667 --- /dev/null +++ b/libs/lumier/src/bin/server.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +# Exit on errors, undefined variables, and propagate errors in pipes +set -euo pipefail + +# Source constants if available +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +if [ -f "${SCRIPT_DIR}/../config/constants.sh" ]; then + source "${SCRIPT_DIR}/../config/constants.sh" +fi + +# Use the tunnel port from constants if available, otherwise default to 8080 +PORT="${TUNNEL_PORT:-8080}" +TUNNEL_SCRIPT="${SCRIPT_DIR}/tunnel.sh" + +# Function to check if the tunnel is active +is_tunnel_active() { + if lsof -i TCP:$PORT 2>/dev/null | grep LISTEN > /dev/null; then + return 0 # Tunnel is active + else + return 1 # Tunnel is not active + fi +} + +# Function to start the tunnel +start_tunnel() { + echo "Starting tunnel on port $PORT..." + if is_tunnel_active; then + echo "Tunnel is already running on port $PORT." + return 0 + fi + + # Start socat in the background + socat TCP-LISTEN:$PORT,reuseaddr,fork EXEC:"$TUNNEL_SCRIPT" & + SOCAT_PID=$! + + # Check if the tunnel started successfully + sleep 1 + if ! is_tunnel_active; then + echo "Failed to start tunnel on port $PORT." + return 1 + fi + + echo "Tunnel started successfully on port $PORT (PID: $SOCAT_PID)." + return 0 +} + +# Function to stop the tunnel +stop_tunnel() { + echo "Stopping tunnel on port $PORT..." + if ! is_tunnel_active; then + echo "No tunnel running on port $PORT." + return 0 + fi + + # Find and kill the socat process + local pid=$(lsof -i TCP:$PORT | grep LISTEN | awk '{print $2}') + if [ -n "$pid" ]; then + kill $pid + echo "Tunnel stopped (PID: $pid)." + return 0 + else + echo "Failed to find process using port $PORT." + return 1 + fi +} + +# Function to check tunnel status +status_tunnel() { + if is_tunnel_active; then + local pid=$(lsof -i TCP:$PORT | grep LISTEN | awk '{print $2}') + echo "Tunnel is active on port $PORT (PID: $pid)." + return 0 + else + echo "No tunnel running on port $PORT." + return 1 + fi +} + +# Parse command line arguments +case "${1:-}" in + start) + start_tunnel + ;; + stop) + stop_tunnel + ;; + restart) + stop_tunnel + start_tunnel + ;; + status) + status_tunnel + ;; + *) + echo "Usage: $0 {start|stop|restart|status}" + exit 1 + ;; +esac \ No newline at end of file diff --git a/libs/lumier/src/bin/tunnel-script.sh b/libs/lumier/src/bin/tunnel-script.sh new file mode 100755 index 00000000..529839ea --- /dev/null +++ b/libs/lumier/src/bin/tunnel-script.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +# Source constants if running in container context +if [ -f "/run/config/constants.sh" ]; then + source "/run/config/constants.sh" +fi + +# Define server address with fallback +SERVER="${TUNNEL_HOST:-host.docker.internal}:${TUNNEL_PORT:-8080}" + +# Extract the base name of the command and arguments +command=$(basename "$0") +subcommand="$1" +shift +args="$@" + +command="$command $subcommand $args" + +# Concatenate command and any stdin data +full_data="$command" +if [ ! -t 0 ]; then + stdin_data=$(cat) + if [ -n "$stdin_data" ]; then + # Format full_data to include stdin data + full_data="$full_data << 'EOF' + $stdin_data +EOF" + fi +fi + +# Trim leading/trailing whitespace and newlines +full_data=$(echo -e "$full_data" | sed 's/^[ \t\n]*//;s/[ \t\n]*$//') + +# Log command if debug is enabled +if [ "${LUMIER_DEBUG:-0}" -eq 1 ]; then + echo "Executing lume command: $full_data" >&2 + echo "Sending to: $SERVER" >&2 +fi + +# Use curl with -N to disable output buffering and -s for silent mode +curl -N -s -X POST \ + -H "Content-Type: application/octet-stream" \ + --data-binary @- \ + "http://$SERVER" <<< "$full_data" \ No newline at end of file diff --git a/libs/lumier/src/bin/tunnel.sh b/libs/lumier/src/bin/tunnel.sh new file mode 100755 index 00000000..6de14282 --- /dev/null +++ b/libs/lumier/src/bin/tunnel.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash + +# Exit on errors, undefined variables, and propagate errors in pipes +set -euo pipefail + +# Source constants if available +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +if [ -f "${SCRIPT_DIR}/../config/constants.sh" ]; then + source "${SCRIPT_DIR}/../config/constants.sh" +fi + +# Handle errors and cleanup +cleanup() { + local exit_code=$? + # Clean up any temporary files if they exist + [ -n "${temp_file:-}" ] && [ -f "$temp_file" ] && rm "$temp_file" + [ -n "${fifo:-}" ] && [ -p "$fifo" ] && rm "$fifo" + exit $exit_code +} +trap cleanup EXIT INT TERM + +log_debug() { + if [ "${LUMIER_DEBUG:-0}" -eq 1 ]; then + echo "[DEBUG] $*" >&2 + fi +} + +send_error_response() { + local status_code=$1 + local message=$2 + echo "HTTP/1.1 $status_code" + echo "Content-Type: text/plain" + echo "" + echo "$message" + exit 1 +} + +# Read the HTTP request line +read -r request_line +log_debug "Request: $request_line" + +# Read headers and look for Content-Length +content_length=0 +while IFS= read -r header; do + [[ $header == $'\r' ]] && break # End of headers + log_debug "Header: $header" + if [[ "$header" =~ ^Content-Length:\ ([0-9]+) ]]; then + content_length="${BASH_REMATCH[1]}" + fi +done + +# Read the body using the content length +command="" +if [ "$content_length" -gt 0 ]; then + command=$(dd bs=1 count="$content_length" 2>/dev/null) + log_debug "Received command: $command" +fi + +# Determine the executable and arguments based on the command +if [[ "$command" == lume* ]]; then + executable="$(which lume || echo "/usr/local/bin/lume")" + command_args="${command#lume}" # Remove 'lume' from the command +elif [[ "$command" == sshpass* ]]; then + executable="$(which sshpass || echo "/usr/local/bin/sshpass")" + command_args="${command#sshpass}" +else + send_error_response "400 Bad Request" "Unsupported command: $command" +fi + +# Check if executable exists +if [ ! -x "$executable" ]; then + send_error_response "500 Internal Server Error" "Executable not found or not executable: $executable" +fi + +# Create a temporary file to store the command +temp_file=$(mktemp) +echo "$executable $command_args" > "$temp_file" +chmod +x "$temp_file" + +# Create a FIFO (named pipe) for capturing output +fifo=$(mktemp -u) +mkfifo "$fifo" + +# Execute the command and pipe its output through awk to ensure line-buffering +{ + log_debug "Executing: $executable $command_args" + "$temp_file" 2>&1 | awk '{ print; fflush() }' > "$fifo" +} & + +# Stream the output from the FIFO as an HTTP response +{ + echo -e "HTTP/1.1 200 OK\r" + echo -e "Content-Type: text/plain\r" + echo -e "\r" + cat "$fifo" +} \ No newline at end of file diff --git a/libs/lumier/src/config/constants.sh b/libs/lumier/src/config/constants.sh new file mode 100644 index 00000000..766c4373 --- /dev/null +++ b/libs/lumier/src/config/constants.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +# Port configuration +TUNNEL_PORT=8080 +VNC_PORT=8006 + +# Host configuration +TUNNEL_HOST="host.docker.internal" + +# Default VM configuration +DEFAULT_RAM_SIZE="8192" +DEFAULT_CPU_CORES="4" +DEFAULT_DISK_SIZE="100" +DEFAULT_VM_NAME="lumier" +DEFAULT_VM_VERSION="ghcr.io/trycua/macos-sequoia-vanilla:latest" + +# Paths +NOVNC_PATH="/opt/noVNC" +LIFECYCLE_HOOKS_DIR="/run/hooks" + +# VM connection details +HOST_USER="lume" +HOST_PASSWORD="lume" +SSH_RETRY_ATTEMPTS=20 +SSH_RETRY_INTERVAL=5 \ No newline at end of file diff --git a/libs/lumier/src/hooks/on-logon.sh b/libs/lumier/src/hooks/on-logon.sh new file mode 100755 index 00000000..faa817c0 --- /dev/null +++ b/libs/lumier/src/hooks/on-logon.sh @@ -0,0 +1,8 @@ +setup_script="$DATA_FOLDER_PATH/setup.sh" + +if [ -f "$setup_script" ]; then + chmod +x "$setup_script" + source "$setup_script" +else + echo "Setup script not found at: $setup_script" +fi \ No newline at end of file diff --git a/libs/lumier/src/lib/utils.sh b/libs/lumier/src/lib/utils.sh new file mode 100755 index 00000000..7d599669 --- /dev/null +++ b/libs/lumier/src/lib/utils.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash + +# Function to wait for SSH to become available +wait_for_ssh() { + local host_ip=$1 + local user=$2 + local password=$3 + local retry_interval=${4:-5} # Default retry interval is 5 seconds + local max_retries=${5:-20} # Default maximum retries is 20 (0 for infinite) + + echo "Waiting for SSH to become available on $host_ip..." + + local retry_count=0 + while true; do + # Try to connect via SSH + sshpass -p "$password" ssh -o StrictHostKeyChecking=no "$user@$host_ip" "exit" + + # Check the exit status of the SSH command + if [ $? -eq 0 ]; then + echo "SSH is ready on $host_ip!" + return 0 + fi + + # Increment retry count + ((retry_count++)) + + # Exit if maximum retries are reached + if [ $max_retries -ne 0 ] && [ $retry_count -ge $max_retries ]; then + echo "Maximum retries reached. SSH is not available." + return 1 + fi + + echo "SSH not ready. Retrying in $retry_interval seconds... (Attempt $retry_count)" + sleep $retry_interval + done +} + +# Function to execute a script on a remote server using sshpass +execute_remote_script() { + local host="$1" + local user="$2" + local password="$3" + local script_path="$4" + local vnc_password="$5" + local data_folder="$6" + + # Check if all required arguments are provided + if [ -z "$host" ] || [ -z "$user" ] || [ -z "$password" ] || [ -z "$script_path" ] || [ -z "$vnc_password" ]; then + echo "Usage: execute_remote_script [data_folder]" + return 1 + fi + + echo "VNC password exported to VM: $vnc_password" + + data_folder_path="$VM_SHARED_FILES_PATH/$data_folder" + echo "Data folder path in VM: $data_folder_path" + + # Read the script content and prepend the shebang + script_content="#!/usr/bin/env bash\n" + if [ -n "$data_folder" ]; then + script_content+="export VNC_PASSWORD='$vnc_password'\n" + script_content+="export DATA_FOLDER_PATH='$data_folder_path'\n" + fi + script_content+="$(<"$script_path")" + + # Use a here-document to send the script content + sshpass -p "$password" ssh -o StrictHostKeyChecking=no "$user@$host" "bash -s" <&1) + + # Check if VM not found error + if [[ $VM_INFO == *"Virtual machine not found"* ]]; then + IMAGE_NAME="${VERSION##*/}" + lume pull "$IMAGE_NAME" "$VM_NAME" --storage "$STORAGE_PATH" + else + # Parse the JSON status - check if it contains "status" : "running" + if [[ $VM_INFO == *'"status" : "running"'* ]]; then + lume_stop "$VM_NAME" "$STORAGE_PATH" + # lume stop "$VM_NAME" --storage "$STORAGE_PATH" + fi + fi + + # Set VM parameters + lume set "$VM_NAME" --cpu "$CPU_CORES" --memory "${RAM_SIZE}MB" --display "$DISPLAY" --storage "$STORAGE_PATH" + + # Fetch VM configuration + CONFIG_JSON=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json) + + # Setup data directory args if necessary + SHARED_DIR_ARGS="" + if [ -d "/data" ]; then + if [ -n "$HOST_DATA_PATH" ]; then + SHARED_DIR_ARGS="--shared-dir=$HOST_DATA_PATH" + else + echo "Warning: /data volume exists but HOST_DATA_PATH is not set. Cannot mount volume." + fi + fi + + # Run VM with VNC and shared directory using curl + lume_run $SHARED_DIR_ARGS --storage "$STORAGE_PATH" "$VM_NAME" & + # lume run "$VM_NAME" --storage "$STORAGE_PATH" --no-display + + # Wait for VM to be running and VNC URL to be available + vm_ip="" + vnc_url="" + max_attempts=30 + attempt=0 + + while [ $attempt -lt $max_attempts ]; do + # Get VM info as JSON + VM_INFO=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json 2>/dev/null) + + # Check if VM has status 'running' + if [[ $VM_INFO == *'"status" : "running"'* ]]; then + # Extract IP address using the existing function from utils.sh + vm_ip=$(extract_json_field "ipAddress" "$VM_INFO") + # Extract VNC URL using the existing function from utils.sh + vnc_url=$(extract_json_field "vncUrl" "$VM_INFO") + + # If we have both IP and VNC URL, break the loop + if [ -n "$vm_ip" ] && [ -n "$vnc_url" ]; then + break + fi + fi + + sleep 2 + attempt=$((attempt + 1)) + done + + if [ -z "$vm_ip" ] || [ -z "$vnc_url" ]; then + echo "Timed out waiting for VM to start or VNC URL to become available." + lume_stop "$VM_NAME" "$STORAGE_PATH" > /dev/null 2>&1 + # lume stop "$VM_NAME" --storage "$STORAGE_PATH" > /dev/null 2>&1 + exit 1 + fi + + + # Parse VNC URL to extract password and port + VNC_PASSWORD=$(echo "$vnc_url" | sed -n 's/.*:\(.*\)@.*/\1/p') + VNC_PORT=$(echo "$vnc_url" | sed -n 's/.*:\([0-9]\+\)$/\1/p') + + # Wait for SSH to become available + wait_for_ssh "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" 5 20 + + # Export VNC variables for entry.sh to use + export VNC_PORT + export VNC_PASSWORD + + # Execute on-logon.sh if present + on_logon_script="/run/lifecycle/on-logon.sh" + if [ -f "$on_logon_script" ]; then + execute_remote_script "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" "$on_logon_script" "$VNC_PASSWORD" "$DATA_FOLDER" + fi + + # The VM is still running because we never killed lume run. + # If you want to stop the VM at some point, you can kill $LUME_PID or use lume_stop. +} + +stop_vm() { + echo "Stopping VM '$VM_NAME'..." + STORAGE_PATH="$HOST_STORAGE_PATH" + if [ -z "$STORAGE_PATH" ]; then + STORAGE_PATH="storage_${VM_NAME}" + fi + # Check if the VM exists and is running (use lume get for speed) + VM_INFO=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json 2>/dev/null) + if [[ -z "$VM_INFO" || $VM_INFO == *"Virtual machine not found"* ]]; then + echo "VM '$VM_NAME' does not exist." + elif [[ $VM_INFO == *'"status" : "running"'* ]]; then + lume_stop "$VM_NAME" "$STORAGE_PATH" + echo "VM '$VM_NAME' was running and is now stopped." + elif [[ $VM_INFO == *'"status" : "stopped"'* ]]; then + echo "VM '$VM_NAME' is already stopped." + else + echo "Unknown VM status for '$VM_NAME'." + fi +} + +is_vm_running() { + lume ls | grep -q "$VM_NAME" +} + +# Stop VM with storage location specified using curl +lume_stop() { + local vm_name="$1" + local storage="$2" + curl --connect-timeout 6000 \ + --max-time 5000 \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"storage":"'$storage'"}' \ + "http://host.docker.internal:3000/lume/vms/${vm_name}/stop" +} + +# Run VM with VNC client started and shared directory using curl +lume_run() { + # Parse args + local shared_dir="" + local storage="ssd" + local vm_name="lume_vm" + local no_display=true + while [[ $# -gt 0 ]]; do + case $1 in + --shared-dir=*) + shared_dir="${1#*=}" + shift + ;; + --storage) + storage="$2" + shift 2 + ;; + --no-display) + no_display=true + shift + ;; + *) + # Assume last arg is VM name if not an option + vm_name="$1" + shift + ;; + esac + done + + # Default to ~/Projects if not provided + if [[ -z "$shared_dir" ]]; then + shared_dir="~/Projects" + fi + + local json_body="{\"noDisplay\": true, \"sharedDirectories\": [{\"hostPath\": \"$shared_dir\", \"readOnly\": false}], \"storage\": \"$storage\", \"recoveryMode\": false}" + local curl_cmd="curl --connect-timeout 6000 \ + --max-time 5000 \ + -X POST \ + -H 'Content-Type: application/json' \ + -d '$json_body' \ + http://host.docker.internal:3000/lume/vms/$vm_name/run" + echo "[lume_run] Running:" + echo "$curl_cmd" + eval "$curl_cmd" +} \ No newline at end of file diff --git a/libs/mcp-server/README.md b/libs/mcp-server/README.md index a4307616..736ab364 100644 --- a/libs/mcp-server/README.md +++ b/libs/mcp-server/README.md @@ -68,13 +68,51 @@ You can then use the script in your MCP configuration like this: "CUA_AGENT_LOOP": "OMNI", "CUA_MODEL_PROVIDER": "ANTHROPIC", "CUA_MODEL_NAME": "claude-3-7-sonnet-20250219", - "ANTHROPIC_API_KEY": "your-api-key" + "CUA_PROVIDER_API_KEY": "your-api-key" } } } } ``` +## Development Guide + +If you want to develop with the cua-mcp-server directly without installation, you can use this configuration: + +```json +{ + "mcpServers": { + "cua-agent": { + "command": "/bin/bash", + "args": ["~/cua/libs/mcp-server/scripts/start_mcp_server.sh"], + "env": { + "CUA_AGENT_LOOP": "UITARS", + "CUA_MODEL_PROVIDER": "OAICOMPAT", + "CUA_MODEL_NAME": "ByteDance-Seed/UI-TARS-1.5-7B", + "CUA_PROVIDER_BASE_URL": "https://****************.us-east-1.aws.endpoints.huggingface.cloud/v1", + "CUA_PROVIDER_API_KEY": "your-api-key" + } + } + } +} +``` + +This configuration: +- Uses the start_mcp_server.sh script which automatically sets up the Python path and runs the server module +- Works with Claude Desktop, Cursor, or any other MCP client +- Automatically uses your development code without requiring installation + +Just add this to your MCP client's configuration and it will use your local development version of the server. + +### Troubleshooting + +If you get a `/bin/bash: ~/cua/libs/mcp-server/scripts/start_mcp_server.sh: No such file or directory` error, try changing the path to the script to be absolute instead of relative. + +To see the logs: +``` +tail -n 20 -f ~/Library/Logs/Claude/mcp*.log +``` + ## Claude Desktop Integration To use with Claude Desktop, add an entry to your Claude Desktop configuration (`claude_desktop_config.json`, typically found in `~/.config/claude-desktop/`): diff --git a/libs/mcp-server/mcp_server/server.py b/libs/mcp-server/mcp_server/server.py index f6692f9a..03971cb6 100644 --- a/libs/mcp-server/mcp_server/server.py +++ b/libs/mcp-server/mcp_server/server.py @@ -1,9 +1,10 @@ import asyncio +import base64 import logging import os import sys import traceback -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union, Tuple # Configure logging to output to stderr for debug visibility logging.basicConfig( @@ -17,7 +18,7 @@ logger = logging.getLogger("mcp-server") logger.debug("MCP Server module loading...") try: - from mcp.server.fastmcp import Context, FastMCP + from mcp.server.fastmcp import Context, FastMCP, Image logger.debug("Successfully imported FastMCP") except ImportError as e: @@ -49,16 +50,37 @@ def serve() -> FastMCP: server = FastMCP("cua-agent") @server.tool() - async def run_cua_task(ctx: Context, task: str) -> str: + async def screenshot_cua(ctx: Context) -> Image: """ - Run a Computer-Use Agent (CUA) task and return the results. + Take a screenshot of the current MacOS VM screen and return the image. Use this before running a CUA task to get a snapshot of the current state. + + Args: + ctx: The MCP context + + Returns: + An image resource containing the screenshot + """ + global global_computer + if global_computer is None: + global_computer = Computer(verbosity=logging.INFO) + await global_computer.run() + screenshot = await global_computer.interface.screenshot() + return Image( + format="png", + data=screenshot + ) + + @server.tool() + async def run_cua_task(ctx: Context, task: str) -> Tuple[str, Image]: + """ + Run a Computer-Use Agent (CUA) task in a MacOS VM and return the results. Args: ctx: The MCP context task: The instruction or task for the agent to perform Returns: - A string containing the agent's response + A tuple containing the agent's response and the final screenshot """ global global_computer @@ -72,12 +94,7 @@ def serve() -> FastMCP: # Determine which loop to use loop_str = os.getenv("CUA_AGENT_LOOP", "OMNI") - if loop_str == "OPENAI": - loop = AgentLoop.OPENAI - elif loop_str == "ANTHROPIC": - loop = AgentLoop.ANTHROPIC - else: - loop = AgentLoop.OMNI + loop = getattr(AgentLoop, loop_str) # Determine provider provider_str = os.getenv("CUA_MODEL_PROVIDER", "ANTHROPIC") @@ -89,6 +106,9 @@ def serve() -> FastMCP: # Get base URL for provider (if needed) provider_base_url = os.getenv("CUA_PROVIDER_BASE_URL", None) + # Get api key for provider (if needed) + api_key = os.getenv("CUA_PROVIDER_API_KEY", None) + # Create agent with the specified configuration agent = ComputerAgent( computer=global_computer, @@ -98,6 +118,7 @@ def serve() -> FastMCP: name=model_name, provider_base_url=provider_base_url, ), + api_key=api_key, save_trajectory=False, only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")), verbosity=logging.INFO, @@ -107,33 +128,34 @@ def serve() -> FastMCP: full_result = "" async for result in agent.run(task): logger.info(f"Agent step complete: {result.get('id', 'unknown')}") + ctx.info(f"Agent step complete: {result.get('id', 'unknown')}") # Add response ID to output full_result += f"\n[Response ID: {result.get('id', 'unknown')}]\n" - - # Extract and concatenate text responses - if "text" in result: - # Handle both string and dict responses - text_response = result.get("text", "") - if isinstance(text_response, str): - full_result += f"Response: {text_response}\n" - else: - # If it's a dict or other structure, convert to string representation - full_result += f"Response: {str(text_response)}\n" - - # Log detailed information - if "tools" in result: - tools_info = result.get("tools") - logger.debug(f"Tools used: {tools_info}") - full_result += f"\nTools used: {tools_info}\n" + + if "content" in result: + full_result += f"Response: {result.get('content', '')}\n" # Process output if available outputs = result.get("output", []) for output in outputs: output_type = output.get("type") - if output_type == "reasoning": + if output_type == "message": + logger.debug(f"Message: {output}") + content = output.get("content", []) + for content_part in content: + if content_part.get("text"): + full_result += f"\nMessage: {content_part.get('text', '')}\n" + elif output_type == "reasoning": logger.debug(f"Reasoning: {output}") - full_result += f"\nReasoning: {output.get('content', '')}\n" + + summary_content = output.get("summary", []) + if summary_content: + for summary_part in summary_content: + if summary_part.get("text"): + full_result += f"\nReasoning: {summary_part.get('text', '')}\n" + else: + full_result += f"\nReasoning: {output.get('text', output.get('content', ''))}\n" elif output_type == "computer_call": logger.debug(f"Computer call: {output}") action = output.get("action", "") @@ -144,17 +166,25 @@ def serve() -> FastMCP: full_result += "\n" + "-" * 40 + "\n" logger.info(f"CUA task completed successfully") - return full_result or "Task completed with no text output." + ctx.info(f"CUA task completed successfully") + return ( + full_result or "Task completed with no text output.", + Image( + format="png", + data=await global_computer.interface.screenshot() + ) + ) except Exception as e: error_msg = f"Error running CUA task: {str(e)}\n{traceback.format_exc()}" logger.error(error_msg) + ctx.error(error_msg) return f"Error during task execution: {str(e)}" @server.tool() - async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> str: + async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> List: """ - Run multiple CUA tasks in sequence and return the combined results. + Run multiple CUA tasks in a MacOS VM in sequence and return the combined results. Args: ctx: The MCP context @@ -164,13 +194,15 @@ def serve() -> FastMCP: Combined results from all tasks """ results = [] - for i, task in enumerate(tasks): logger.info(f"Running task {i+1}/{len(tasks)}: {task}") - result = await run_cua_task(ctx, task) - results.append(f"Task {i+1}: {task}\nResult: {result}\n") - - return "\n".join(results) + ctx.info(f"Running task {i+1}/{len(tasks)}: {task}") + + ctx.report_progress(i / len(tasks)) + results.extend(await run_cua_task(ctx, task)) + ctx.report_progress((i + 1) / len(tasks)) + + return results return server diff --git a/libs/mcp-server/scripts/start_mcp_server.sh b/libs/mcp-server/scripts/start_mcp_server.sh new file mode 100755 index 00000000..17fd9dab --- /dev/null +++ b/libs/mcp-server/scripts/start_mcp_server.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +set -e + +# Set the CUA repository path based on script location +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../.." &> /dev/null && pwd )" +PYTHON_PATH="${CUA_REPO_DIR}/.venv/bin/python" + +# Set Python path to include all necessary libraries +export PYTHONPATH="${CUA_REPO_DIR}/libs/mcp-server:${CUA_REPO_DIR}/libs/agent:${CUA_REPO_DIR}/libs/computer:${CUA_REPO_DIR}/libs/core:${CUA_REPO_DIR}/libs/pylume" + +# Run the MCP server directly as a module +$PYTHON_PATH -m mcp_server.server \ No newline at end of file diff --git a/notebooks/blog/build-your-own-operator-on-macos-1.ipynb b/notebooks/blog/build-your-own-operator-on-macos-1.ipynb index 22db332d..70c0e6ea 100644 --- a/notebooks/blog/build-your-own-operator-on-macos-1.ipynb +++ b/notebooks/blog/build-your-own-operator-on-macos-1.ipynb @@ -145,9 +145,8 @@ " await computer.interface.press_key(key)\n", " \n", " elif action_type == \"wait\":\n", - " wait_time = action.time\n", - " print(f\"Waiting for {wait_time} seconds\")\n", - " await asyncio.sleep(wait_time)\n", + " print(f\"Waiting for 2 seconds\")\n", + " await asyncio.sleep(2)\n", " \n", " elif action_type == \"screenshot\":\n", " print(\"Taking screenshot\")\n",