diff --git a/.all-contributorsrc b/.all-contributorsrc
index d1b3578e..503f0e94 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -151,6 +151,15 @@
"contributions": [
"code"
]
+ },
+ {
+ "login": "FinnBorge",
+ "name": "FinnBorge",
+ "avatar_url": "https://avatars.githubusercontent.com/u/9272726?v=4",
+ "profile": "https://github.com/FinnBorge",
+ "contributions": [
+ "code"
+ ]
}
]
}
diff --git a/.gitignore b/.gitignore
index ce8445bf..8265a5a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,7 +15,8 @@ dist/
downloads/
eggs/
.eggs/
-lib/
+lib/*
+!libs/lumier/src/lib/
lib64/
parts/
sdist/
@@ -242,4 +243,7 @@ trajectories/
.storage/
# Gradio settings
-.gradio_settings.json
\ No newline at end of file
+.gradio_settings.json
+
+# Lumier Storage
+storage/
\ No newline at end of file
diff --git a/.vscode/lumier.code-workspace b/.vscode/lumier.code-workspace
new file mode 100644
index 00000000..26e12846
--- /dev/null
+++ b/.vscode/lumier.code-workspace
@@ -0,0 +1,30 @@
+{
+ "folders": [
+ {
+ "name": "lumier",
+ "path": "../libs/lumier"
+ },
+ {
+ "name": "lume",
+ "path": "../libs/lume"
+ }
+ ],
+ "settings": {
+ "files.exclude": {
+ "**/.git": true,
+ "**/.svn": true,
+ "**/.hg": true,
+ "**/CVS": true,
+ "**/.DS_Store": true
+ }
+ },
+ "tasks": {
+ "version": "2.0.0",
+ "tasks": [
+ ]
+ },
+ "launch": {
+ "configurations": [
+ ]
+ }
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index b0630760..04e7f054 100644
--- a/README.md
+++ b/README.md
@@ -47,6 +47,13 @@ If you only need the virtualization capabilities:
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
```
+Optionally, if you don't want Lume to run as a background service:
+```bash
+/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh) --no-background-service"
+```
+
+**Note:** If you choose this option, you'll need to manually start the Lume API service whenever needed by running `lume serve` in your terminal. This applies to Option 2 after completing step 1.
+
For Lume usage instructions, refer to the [Lume documentation](./libs/lume/README.md).
### Option 2: Full Computer-Use Agent Capabilities
@@ -62,17 +69,12 @@ If you want to use AI agents with virtualized environments:
lume pull macos-sequoia-cua:latest
```
-3. Start Lume daemon service:
- ```bash
- lume serve
- ```
-
-4. Install the Python libraries:
+3. Install the Python libraries:
```bash
pip install cua-computer cua-agent[all]
```
-5. Use the libraries in your Python code:
+4. Use the libraries in your Python code:
```python
from computer import Computer
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
@@ -80,7 +82,7 @@ If you want to use AI agents with virtualized environments:
async with Computer(verbosity=logging.DEBUG) as macos_computer:
agent = ComputerAgent(
computer=macos_computer,
- loop=AgentLoop.OPENAI, # or AgentLoop.UITARS, AgentLoop.OMNI, or AgentLoop.ANTHROPIC
+ loop=AgentLoop.OPENAI, # or AgentLoop.UITARS, AgentLoop.OMNI, or AgentLoop.UITARS, or AgentLoop.ANTHROPIC
model=LLM(provider=LLMProvider.OPENAI) # or LLM(provider=LLMProvider.MLXVLM, name="mlx-community/UI-TARS-1.5-7B-4bit")
)
@@ -95,7 +97,7 @@ If you want to use AI agents with virtualized environments:
Explore the [Agent Notebook](./notebooks/) for a ready-to-run example.
-6. Optionally, you can use the Agent with a Gradio UI:
+5. Optionally, you can use the Agent with a Gradio UI:
```python
from utils import load_dotenv_files
@@ -228,6 +230,7 @@ Apple, macOS, and Apple Silicon are trademarks of Apple Inc. Ubuntu and Canonica
+
+**Lumier** provides a Docker-based interface for the `lume` CLI, allowing you to easily run macOS virtual machines inside a container with VNC access. It creates a secure tunnel to execute lume commands on your host machine while providing a containerized environment for your applications.
+
+## Requirements
+
+Before using Lumier, make sure you have:
+
+1. Install [lume](https://github.com/trycua/cua/blob/main/libs/lume/README.md) on your host machine
+2. Docker installed on your host machine
+3. `socat` installed for the tunnel (install with Homebrew: `brew install socat`)
+
+## Installation
+
+You can use Lumier directly from its directory or install it to your system:
+
+```bash
+# Option 1: Install to your user's bin directory (recommended)
+./install.sh
+
+# Option 2: Install to a custom directory
+./install.sh --install-dir=/usr/local/bin # May require sudo
+
+# Option 3: View installation options
+./install.sh --help
+```
+
+After installation, you can run `lumier` from anywhere in your terminal.
+
+If you get a "command not found" error, make sure the installation directory is in your PATH. The installer will warn you if it isn't and provide instructions to add it.
+
+## Usage
+
+There are two ways to use Lumier: with the provided script or directly with Docker.
+
+### Option 1: Using the Lumier Script
+
+Lumier provides a simple CLI interface to manage VMs in Docker with full Docker compatibility:
+
+```bash
+# Show help and available commands
+lumier help
+
+# Start the tunnel to connect to lume
+lumier start
+
+# Check if the tunnel is running
+lumier status
+
+# Stop the tunnel
+lumier stop
+
+# Build the Docker image (optional, happens automatically on first run)
+lumier build
+
+# Run a VM with default settings
+lumier run -it --rm
+
+# Run a VM with custom settings using Docker's -e flag
+lumier run -it --rm \
+ --name lumier-vm \
+ -p 8006:8006 \
+ -v $(pwd)/storage:/storage \
+ -v $(pwd)/shared:/data \
+ -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
+ -e CPU_CORES=4 \
+ -e RAM_SIZE=8192
+
+# Note:
+# The lumier script now automatically detects the real host paths for ./storage and ./shared
+# and passes them to the container as HOST_STORAGE_PATH and HOST_DATA_PATH.
+# You do NOT need to specify these environment variables manually.
+# The VM name is always set from the container name.
+```
+
+### Option 2: Using Docker Directly
+
+You can also use Docker commands directly without the lumier utility:
+
+```bash
+# 1. Start the tunnel manually
+cd libs/lumier
+socat TCP-LISTEN:8080,reuseaddr,fork EXEC:"$PWD/src/bin/tunnel.sh" &
+TUNNEL_PID=$!
+
+# 2. Build the Docker image
+docker build -t lumier:latest .
+
+# 3. Run the container
+docker run -it --rm \
+ --name lumier-vm \
+ -p 8006:8006 \
+ -v $(pwd)/storage:/storage \
+ -v $(pwd)/shared:/data \
+ -e VM_NAME=lumier-vm \
+ -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
+ -e CPU_CORES=4 \
+ -e RAM_SIZE=8192 \
+ -e HOST_STORAGE_PATH=$(pwd)/storage \
+ -e HOST_DATA_PATH=$(pwd)/shared \
+ lumier:latest
+
+# 4. Stop the tunnel when you're done
+kill $TUNNEL_PID
+
+# Alternatively, find and kill the tunnel process
+# First, find the process
+lsof -i TCP:8080
+# Then kill it by PID
+kill
+```
+
+Note that when using Docker directly, you're responsible for:
+- Starting and managing the tunnel
+- Building the Docker image
+- Providing the correct environment variables
+
+## Available Environment Variables
+
+These variables can be set using Docker's `-e` flag:
+
+- `VM_NAME`: Set the VM name (default: lumier)
+- `VERSION`: Set the VM image (default: ghcr.io/trycua/macos-sequoia-vanilla:latest)
+- `CPU_CORES`: Set the number of CPU cores (default: 4)
+- `RAM_SIZE`: Set the memory size in MB (default: 8192)
+- `DISPLAY`: Set the display resolution (default: 1024x768)
+- `HOST_DATA_PATH`: Path on the host to share with the VM
+- `LUMIER_DEBUG`: Enable debug mode (set to 1)
+
+## Project Structure
+
+The project is organized as follows:
+
+```
+lumier/
+├── Dockerfile # Main Docker image definition
+├── README.md # This file
+├── lumier # Main CLI script
+├── install.sh # Installation script
+├── src/ # Source code
+│ ├── bin/ # Executable scripts
+│ │ ├── entry.sh # Docker entrypoint
+│ │ ├── server.sh # Tunnel server manager
+│ │ └── tunnel.sh # Tunnel request handler
+│ ├── config/ # Configuration
+│ │ └── constants.sh # Shared constants
+│ ├── hooks/ # Lifecycle hooks
+│ │ └── on-logon.sh # Run after VM boots
+│ └── lib/ # Shared library code
+│ ├── utils.sh # Utility functions
+│ └── vm.sh # VM management functions
+└── mount/ # Default shared directory
+```
+
+## VNC Access
+
+When a VM is running, you can access it via VNC through:
+http://localhost:8006/vnc.html
+
+The password is displayed in the console output when the VM starts.
\ No newline at end of file
diff --git a/libs/lumier/install.sh b/libs/lumier/install.sh
new file mode 100755
index 00000000..bd9e3b6b
--- /dev/null
+++ b/libs/lumier/install.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+set -e
+
+# Lumier Installer
+# This script installs Lumier to your system
+
+# Define colors for output
+BOLD=$(tput bold)
+NORMAL=$(tput sgr0)
+RED=$(tput setaf 1)
+GREEN=$(tput setaf 2)
+BLUE=$(tput setaf 4)
+YELLOW=$(tput setaf 3)
+
+# Default installation directory (user-specific, doesn't require sudo)
+DEFAULT_INSTALL_DIR="$HOME/.local/bin"
+INSTALL_DIR="${INSTALL_DIR:-$DEFAULT_INSTALL_DIR}"
+
+# Script directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Parse command line arguments
+while [ "$#" -gt 0 ]; do
+ case "$1" in
+ --install-dir=*)
+ INSTALL_DIR="${1#*=}"
+ ;;
+ --help)
+ echo "${BOLD}${BLUE}Lumier Installer${NORMAL}"
+ echo "Usage: $0 [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --install-dir=DIR Install to the specified directory (default: $DEFAULT_INSTALL_DIR)"
+ echo " --help Display this help message"
+ echo ""
+ echo "Examples:"
+ echo " $0 # Install to $DEFAULT_INSTALL_DIR"
+ echo " $0 --install-dir=/usr/local/bin # Install to system directory (may require root privileges)"
+ echo " INSTALL_DIR=/opt/lumier $0 # Install to /opt/lumier (legacy env var support)"
+ exit 0
+ ;;
+ *)
+ echo "${RED}Unknown option: $1${NORMAL}"
+ echo "Use --help for usage information"
+ exit 1
+ ;;
+ esac
+ shift
+done
+
+echo "${BOLD}${BLUE}Lumier Installer${NORMAL}"
+echo "This script will install Lumier to your system."
+
+# Check if we're running with appropriate permissions
+check_permissions() {
+ # System directories that typically require root privileges
+ SYSTEM_DIRS=("/usr/local/bin" "/usr/bin" "/bin" "/opt")
+
+ NEEDS_ROOT=false
+ for DIR in "${SYSTEM_DIRS[@]}"; do
+ if [[ "$INSTALL_DIR" == "$DIR"* ]] && [ ! -w "$INSTALL_DIR" ]; then
+ NEEDS_ROOT=true
+ break
+ fi
+ done
+
+ if [ "$NEEDS_ROOT" = true ]; then
+ echo "${YELLOW}Warning: Installing to $INSTALL_DIR may require root privileges.${NORMAL}"
+ echo "Consider these alternatives:"
+ echo " • Install to a user-writable location: $0 --install-dir=$HOME/.local/bin"
+ echo " • Create the directory with correct permissions first:"
+ echo " sudo mkdir -p $INSTALL_DIR && sudo chown $(whoami) $INSTALL_DIR"
+ echo ""
+
+ # Check if we already have write permission (might have been set up previously)
+ if [ ! -w "$INSTALL_DIR" ] && [ ! -w "$(dirname "$INSTALL_DIR")" ]; then
+ echo "${RED}Error: You don't have write permission to $INSTALL_DIR${NORMAL}"
+ echo "Please choose a different installation directory or ensure you have the proper permissions."
+ exit 1
+ fi
+ fi
+}
+
+# Detect OS and architecture
+detect_platform() {
+ OS=$(uname -s | tr '[:upper:]' '[:lower:]')
+ ARCH=$(uname -m)
+
+ if [ "$OS" != "darwin" ]; then
+ echo "${RED}Error: Currently only macOS is supported.${NORMAL}"
+ exit 1
+ fi
+
+ if [ "$ARCH" != "arm64" ]; then
+ echo "${RED}Error: Lumier only supports macOS on Apple Silicon (ARM64).${NORMAL}"
+ exit 1
+ fi
+
+ PLATFORM="darwin-arm64"
+ echo "Detected platform: ${BOLD}$PLATFORM${NORMAL}"
+}
+
+# Check dependencies
+check_dependencies() {
+ echo "Checking dependencies..."
+
+ # Check if lume is installed
+ if ! command -v lume &> /dev/null; then
+ echo "${RED}Error: Lume is required but not installed.${NORMAL}"
+ echo "Please install Lume first: https://github.com/trycua/cua/blob/main/libs/lume/README.md"
+ exit 1
+ fi
+
+ # Check if socat is installed
+ if ! command -v socat &> /dev/null; then
+ echo "${YELLOW}Warning: socat is required but not installed.${NORMAL}"
+ echo "Installing socat with Homebrew..."
+
+ # Check if Homebrew is installed
+ if ! command -v brew &> /dev/null; then
+ echo "${RED}Error: Homebrew is required to install socat.${NORMAL}"
+ echo "Please install Homebrew first: https://brew.sh/"
+ echo "Or install socat manually, then run this script again."
+ exit 1
+ fi
+
+ # Install socat
+ brew install socat
+ fi
+
+ # Check if Docker is installed
+ if ! command -v docker &> /dev/null; then
+ echo "${YELLOW}Warning: Docker is required but not installed.${NORMAL}"
+ echo "Please install Docker: https://docs.docker.com/get-docker/"
+ echo "Continuing with installation, but Lumier will not work without Docker."
+ fi
+
+ echo "${GREEN}All dependencies are satisfied.${NORMAL}"
+}
+
+# Copy the lumier script directly
+copy_lumier() {
+ echo "Copying lumier script to $INSTALL_DIR..."
+ cp "$SCRIPT_DIR/lumier" "$INSTALL_DIR/lumier"
+ chmod +x "$INSTALL_DIR/lumier"
+}
+
+# Main installation flow
+main() {
+ check_permissions
+ detect_platform
+ check_dependencies
+
+ echo "Installing Lumier to $INSTALL_DIR..."
+
+ # Create install directory if it doesn't exist
+ mkdir -p "$INSTALL_DIR"
+
+ # Copy the lumier script
+ copy_lumier
+
+ echo "${GREEN}Installation complete!${NORMAL}"
+ echo "Lumier has been installed to ${BOLD}$INSTALL_DIR/lumier${NORMAL}"
+
+ # Check if the installation directory is in PATH
+ if [[ ":$PATH:" != *":$INSTALL_DIR:"* ]]; then
+ echo "${YELLOW}Warning: $INSTALL_DIR is not in your PATH.${NORMAL}"
+ echo "To add it, run one of these commands based on your shell:"
+ echo " For bash: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile"
+ echo " For zsh: echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zshrc"
+ echo " For fish: echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish"
+ fi
+}
+
+# Run the installation
+main
\ No newline at end of file
diff --git a/libs/lumier/lumier b/libs/lumier/lumier
new file mode 100755
index 00000000..1c3912cb
--- /dev/null
+++ b/libs/lumier/lumier
@@ -0,0 +1,200 @@
+#!/usr/bin/env bash
+
+# Exit on errors, undefined variables, and propagate errors in pipes
+set -eo pipefail
+
+# Always use the current working directory as the build context
+SCRIPT_DIR="$(pwd)"
+PORT=8080
+DEBUG=${LUMIER_DEBUG:-0}
+
+usage() {
+ cat </dev/null | grep LISTEN > /dev/null; then
+ return 0 # Tunnel is active
+ else
+ return 1 # Tunnel is not active
+ fi
+}
+
+# Start the tunnel if needed
+ensure_tunnel() {
+ if ! is_tunnel_active; then
+ echo "Tunnel is not active. Starting tunnel..."
+ "$SCRIPT_DIR/src/bin/server.sh" start
+ sleep 2 # Wait for the tunnel to start
+
+ if ! is_tunnel_active; then
+ echo "Failed to start tunnel. Make sure 'lume' is installed on your host."
+ exit 1
+ fi
+ else
+ echo "Tunnel is already active."
+ fi
+}
+
+# Build the Docker image with cache busting
+build_image() {
+ local image_name="${LUMIER_IMAGE:-lumier:latest}"
+ echo "Building Lumier Docker image: $image_name"
+ echo "SCRIPT_DIR=$SCRIPT_DIR"
+ echo "Checking for Dockerfile at: $SCRIPT_DIR/Dockerfile"
+ ls -l "$SCRIPT_DIR/Dockerfile" || echo "Dockerfile not found at $SCRIPT_DIR/Dockerfile"
+
+ # Pass any additional arguments to docker build with cache busting
+ docker build --build-arg CACHEBUST=$(date +%s) -t "$image_name" "$SCRIPT_DIR" "$@"
+
+ echo "Lumier image built successfully: $image_name"
+}
+
+# Run the Docker container
+run_container() {
+ local image_name="${LUMIER_IMAGE:-lumier:latest}"
+
+ # Ensure the Docker image exists
+ if ! docker image inspect "$image_name" &>/dev/null; then
+ echo "Docker image '$image_name' not found. Building it..."
+ build_image
+ fi
+
+ # Ensure the tunnel is running
+ ensure_tunnel
+
+ # Automatically resolve and pass host paths for storage and data
+ STORAGE_PATH="${HOST_STORAGE_PATH:-$(realpath ./storage)}"
+ DATA_PATH="${HOST_DATA_PATH:-$(realpath ./shared)}"
+
+ # Only add -e if not already present in args
+ DOCKER_ARGS=( )
+ add_env_var() {
+ local var="$1"; local val="$2"; local flag="-e $var="
+ for arg in "$@"; do
+ [[ "$arg" == *"$flag"* ]] && return 0
+ done
+ DOCKER_ARGS+=( -e "$var=$val" )
+ }
+ add_env_var HOST_STORAGE_PATH "$STORAGE_PATH"
+ add_env_var HOST_DATA_PATH "$DATA_PATH"
+
+ # Detect --name argument and set VM_NAME if not already present
+ local container_name=""
+ local prev_arg=""
+ for arg in "$@"; do
+ if [[ "$prev_arg" == "--name" ]]; then
+ container_name="$arg"
+ break
+ elif [[ "$arg" == --name=* ]]; then
+ container_name="${arg#--name=}"
+ break
+ fi
+ prev_arg="$arg"
+ done
+ # Only add -e VM_NAME if not already present and container_name is set
+ local vm_name_set=false
+ for arg in "$@"; do
+ if [[ "$arg" == "-e" ]] && [[ "$2" == VM_NAME=* ]]; then
+ vm_name_set=true
+ break
+ elif [[ "$arg" == "-eVM_NAME="* ]]; then
+ vm_name_set=true
+ break
+ elif [[ "$arg" == "-e"* ]] && [[ "$arg" == *"VM_NAME="* ]]; then
+ vm_name_set=true
+ break
+ fi
+ done
+ if [[ -n "$container_name" && "$vm_name_set" != true ]]; then
+ DOCKER_ARGS+=( -e "VM_NAME=$container_name" )
+ fi
+
+ echo "Running Lumier container with image: $image_name"
+ if [[ "$*" == *"-p 8006:8006"* || "$*" == *"-p"*"8006:8006"* ]]; then
+ docker run "${DOCKER_ARGS[@]}" "$@" "$image_name"
+ else
+ docker run "${DOCKER_ARGS[@]}" -p 8006:8006 "$@" "$image_name"
+ fi
+}
+
+# Main command handling
+case "${1:-help}" in
+ run)
+ shift
+ run_container "$@"
+ ;;
+ tunnel)
+ # Handle tunnel subcommands
+ case "${2:-}" in
+ start)
+ "$SCRIPT_DIR/src/bin/server.sh" start
+ ;;
+ stop)
+ "$SCRIPT_DIR/src/bin/server.sh" stop
+ ;;
+ status)
+ "$SCRIPT_DIR/src/bin/server.sh" status
+ ;;
+ *)
+ echo "Unknown tunnel subcommand: $2"
+ usage
+ exit 1
+ ;;
+ esac
+ ;;
+
+ build)
+ shift
+ build_image "$@"
+ ;;
+ help)
+ usage
+ ;;
+ *)
+ echo "Unknown command: $1"
+ usage
+ exit 1
+ ;;
+esac
\ No newline at end of file
diff --git a/libs/lumier/mount/server.py b/libs/lumier/mount/server.py
new file mode 100644
index 00000000..464c26ad
--- /dev/null
+++ b/libs/lumier/mount/server.py
@@ -0,0 +1,10 @@
+from flask import Flask
+
+app = Flask(__name__)
+
+@app.route('/')
+def hello_world():
+ return 'Hello, World, from VM!'
+
+if __name__ == '__main__':
+ app.run(debug=True, host="0.0.0.0", port=5001)
\ No newline at end of file
diff --git a/libs/lumier/mount/setup.sh b/libs/lumier/mount/setup.sh
new file mode 100755
index 00000000..8897896e
--- /dev/null
+++ b/libs/lumier/mount/setup.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+echo "Creating helloworld.txt on the Desktop..."
+if [ ! -f ~/Desktop/helloworld.txt ]; then
+ echo "Hello, World!" > ~/Desktop/helloworld.txt
+ echo "helloworld.txt created successfully."
+else
+ echo "helloworld.txt already exists."
+fi
\ No newline at end of file
diff --git a/libs/lumier/src/bin/entry.sh b/libs/lumier/src/bin/entry.sh
new file mode 100755
index 00000000..66a375ad
--- /dev/null
+++ b/libs/lumier/src/bin/entry.sh
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+
+# Exit on errors, undefined variables, and propagate errors in pipes
+set -euo pipefail
+
+# Source configuration files
+CONFIG_DIR="/run/config"
+LIB_DIR="/run/lib"
+
+# Source constants if available
+if [ -f "${CONFIG_DIR}/constants.sh" ]; then
+ source "${CONFIG_DIR}/constants.sh"
+fi
+
+# Import utilities
+for lib in "${LIB_DIR}"/*.sh; do
+ if [ -f "$lib" ]; then
+ source "$lib"
+ fi
+done
+
+# Set VM_NAME to env or fallback to container name (from --name)
+if [ -z "${VM_NAME:-}" ]; then
+ VM_NAME="$(cat /etc/hostname)"
+ export VM_NAME
+fi
+
+# Set HOST_STORAGE_PATH to /storage/$VM_NAME if not set
+if [ -z "${HOST_STORAGE_PATH:-}" ]; then
+ HOST_STORAGE_PATH="/storage/$VM_NAME"
+ export HOST_STORAGE_PATH
+fi
+
+# Optionally check for mountpoints
+if mountpoint -q /storage; then
+ echo "/storage is mounted"
+fi
+if mountpoint -q /data; then
+ echo "/data is mounted"
+fi
+
+# Log startup info
+echo "Lumier VM is starting..."
+
+# Cleanup function to ensure VM and noVNC proxy shutdown on container stop
+cleanup() {
+ set +e # Don't exit on error in cleanup
+ echo "[cleanup] Caught signal, shutting down..."
+ echo "[cleanup] Stopping VM..."
+ stop_vm
+ # Now gently stop noVNC proxy if running
+ # if [ -n "${NOVNC_PID:-}" ] && kill -0 "$NOVNC_PID" 2>/dev/null; then
+ # echo "[cleanup] Stopping noVNC proxy (PID $NOVNC_PID)..."
+ # kill -TERM "$NOVNC_PID"
+ # # Wait up to 5s for noVNC to exit
+ # for i in {1..5}; do
+ # if ! kill -0 "$NOVNC_PID" 2>/dev/null; then
+ # echo "[cleanup] noVNC proxy stopped."
+ # break
+ # fi
+ # sleep 1
+ # done
+ # # Escalate if still running
+ # if kill -0 "$NOVNC_PID" 2>/dev/null; then
+ # echo "[cleanup] noVNC proxy did not exit, killing..."
+ # kill -KILL "$NOVNC_PID" 2>/dev/null
+ # fi
+ # fi
+ echo "[cleanup] Done. Exiting."
+ exit 0
+}
+trap cleanup SIGTERM SIGINT
+
+# Start the VM
+start_vm
+
+# Start noVNC for VNC access
+NOVNC_PID=""
+if [ -n "${VNC_PORT:-}" ] && [ -n "${VNC_PASSWORD:-}" ]; then
+ echo "Starting noVNC proxy with optimized color settings..."
+ ${NOVNC_PATH}/utils/novnc_proxy --vnc host.docker.internal:${VNC_PORT} --listen 8006 --web ${NOVNC_PATH} > /dev/null 2>&1 &
+ NOVNC_PID=$!
+ disown $NOVNC_PID
+ echo "noVNC interface available at: http://localhost:8006/vnc.html?password=${VNC_PASSWORD}&autoconnect=true&logging=debug"
+fi
+
+# Run any post-startup hooks
+if [ -d "/run/hooks" ]; then
+ for hook in /run/hooks/*; do
+ if [ -x "$hook" ]; then
+ echo "Running hook: $(basename "$hook")"
+ "$hook"
+ fi
+ done
+fi
+
+echo "Lumier is running. Press Ctrl+C to stop."
+tail -f /dev/null
\ No newline at end of file
diff --git a/libs/lumier/src/bin/server.sh b/libs/lumier/src/bin/server.sh
new file mode 100755
index 00000000..5849d667
--- /dev/null
+++ b/libs/lumier/src/bin/server.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+
+# Exit on errors, undefined variables, and propagate errors in pipes
+set -euo pipefail
+
+# Source constants if available
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+if [ -f "${SCRIPT_DIR}/../config/constants.sh" ]; then
+ source "${SCRIPT_DIR}/../config/constants.sh"
+fi
+
+# Use the tunnel port from constants if available, otherwise default to 8080
+PORT="${TUNNEL_PORT:-8080}"
+TUNNEL_SCRIPT="${SCRIPT_DIR}/tunnel.sh"
+
+# Function to check if the tunnel is active
+is_tunnel_active() {
+ if lsof -i TCP:$PORT 2>/dev/null | grep LISTEN > /dev/null; then
+ return 0 # Tunnel is active
+ else
+ return 1 # Tunnel is not active
+ fi
+}
+
+# Function to start the tunnel
+start_tunnel() {
+ echo "Starting tunnel on port $PORT..."
+ if is_tunnel_active; then
+ echo "Tunnel is already running on port $PORT."
+ return 0
+ fi
+
+ # Start socat in the background
+ socat TCP-LISTEN:$PORT,reuseaddr,fork EXEC:"$TUNNEL_SCRIPT" &
+ SOCAT_PID=$!
+
+ # Check if the tunnel started successfully
+ sleep 1
+ if ! is_tunnel_active; then
+ echo "Failed to start tunnel on port $PORT."
+ return 1
+ fi
+
+ echo "Tunnel started successfully on port $PORT (PID: $SOCAT_PID)."
+ return 0
+}
+
+# Function to stop the tunnel
+stop_tunnel() {
+ echo "Stopping tunnel on port $PORT..."
+ if ! is_tunnel_active; then
+ echo "No tunnel running on port $PORT."
+ return 0
+ fi
+
+ # Find and kill the socat process
+ local pid=$(lsof -i TCP:$PORT | grep LISTEN | awk '{print $2}')
+ if [ -n "$pid" ]; then
+ kill $pid
+ echo "Tunnel stopped (PID: $pid)."
+ return 0
+ else
+ echo "Failed to find process using port $PORT."
+ return 1
+ fi
+}
+
+# Function to check tunnel status
+status_tunnel() {
+ if is_tunnel_active; then
+ local pid=$(lsof -i TCP:$PORT | grep LISTEN | awk '{print $2}')
+ echo "Tunnel is active on port $PORT (PID: $pid)."
+ return 0
+ else
+ echo "No tunnel running on port $PORT."
+ return 1
+ fi
+}
+
+# Parse command line arguments
+case "${1:-}" in
+ start)
+ start_tunnel
+ ;;
+ stop)
+ stop_tunnel
+ ;;
+ restart)
+ stop_tunnel
+ start_tunnel
+ ;;
+ status)
+ status_tunnel
+ ;;
+ *)
+ echo "Usage: $0 {start|stop|restart|status}"
+ exit 1
+ ;;
+esac
\ No newline at end of file
diff --git a/libs/lumier/src/bin/tunnel-script.sh b/libs/lumier/src/bin/tunnel-script.sh
new file mode 100755
index 00000000..529839ea
--- /dev/null
+++ b/libs/lumier/src/bin/tunnel-script.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+# Source constants if running in container context
+if [ -f "/run/config/constants.sh" ]; then
+ source "/run/config/constants.sh"
+fi
+
+# Define server address with fallback
+SERVER="${TUNNEL_HOST:-host.docker.internal}:${TUNNEL_PORT:-8080}"
+
+# Extract the base name of the command and arguments
+command=$(basename "$0")
+subcommand="$1"
+shift
+args="$@"
+
+command="$command $subcommand $args"
+
+# Concatenate command and any stdin data
+full_data="$command"
+if [ ! -t 0 ]; then
+ stdin_data=$(cat)
+ if [ -n "$stdin_data" ]; then
+ # Format full_data to include stdin data
+ full_data="$full_data << 'EOF'
+ $stdin_data
+EOF"
+ fi
+fi
+
+# Trim leading/trailing whitespace and newlines
+full_data=$(echo -e "$full_data" | sed 's/^[ \t\n]*//;s/[ \t\n]*$//')
+
+# Log command if debug is enabled
+if [ "${LUMIER_DEBUG:-0}" -eq 1 ]; then
+ echo "Executing lume command: $full_data" >&2
+ echo "Sending to: $SERVER" >&2
+fi
+
+# Use curl with -N to disable output buffering and -s for silent mode
+curl -N -s -X POST \
+ -H "Content-Type: application/octet-stream" \
+ --data-binary @- \
+ "http://$SERVER" <<< "$full_data"
\ No newline at end of file
diff --git a/libs/lumier/src/bin/tunnel.sh b/libs/lumier/src/bin/tunnel.sh
new file mode 100755
index 00000000..6de14282
--- /dev/null
+++ b/libs/lumier/src/bin/tunnel.sh
@@ -0,0 +1,96 @@
+#!/usr/bin/env bash
+
+# Exit on errors, undefined variables, and propagate errors in pipes
+set -euo pipefail
+
+# Source constants if available
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+if [ -f "${SCRIPT_DIR}/../config/constants.sh" ]; then
+ source "${SCRIPT_DIR}/../config/constants.sh"
+fi
+
+# Handle errors and cleanup
+cleanup() {
+ local exit_code=$?
+ # Clean up any temporary files if they exist
+ [ -n "${temp_file:-}" ] && [ -f "$temp_file" ] && rm "$temp_file"
+ [ -n "${fifo:-}" ] && [ -p "$fifo" ] && rm "$fifo"
+ exit $exit_code
+}
+trap cleanup EXIT INT TERM
+
+log_debug() {
+ if [ "${LUMIER_DEBUG:-0}" -eq 1 ]; then
+ echo "[DEBUG] $*" >&2
+ fi
+}
+
+send_error_response() {
+ local status_code=$1
+ local message=$2
+ echo "HTTP/1.1 $status_code"
+ echo "Content-Type: text/plain"
+ echo ""
+ echo "$message"
+ exit 1
+}
+
+# Read the HTTP request line
+read -r request_line
+log_debug "Request: $request_line"
+
+# Read headers and look for Content-Length
+content_length=0
+while IFS= read -r header; do
+ [[ $header == $'\r' ]] && break # End of headers
+ log_debug "Header: $header"
+ if [[ "$header" =~ ^Content-Length:\ ([0-9]+) ]]; then
+ content_length="${BASH_REMATCH[1]}"
+ fi
+done
+
+# Read the body using the content length
+command=""
+if [ "$content_length" -gt 0 ]; then
+ command=$(dd bs=1 count="$content_length" 2>/dev/null)
+ log_debug "Received command: $command"
+fi
+
+# Determine the executable and arguments based on the command
+if [[ "$command" == lume* ]]; then
+ executable="$(which lume || echo "/usr/local/bin/lume")"
+ command_args="${command#lume}" # Remove 'lume' from the command
+elif [[ "$command" == sshpass* ]]; then
+ executable="$(which sshpass || echo "/usr/local/bin/sshpass")"
+ command_args="${command#sshpass}"
+else
+ send_error_response "400 Bad Request" "Unsupported command: $command"
+fi
+
+# Check if executable exists
+if [ ! -x "$executable" ]; then
+ send_error_response "500 Internal Server Error" "Executable not found or not executable: $executable"
+fi
+
+# Create a temporary file to store the command
+temp_file=$(mktemp)
+echo "$executable $command_args" > "$temp_file"
+chmod +x "$temp_file"
+
+# Create a FIFO (named pipe) for capturing output
+fifo=$(mktemp -u)
+mkfifo "$fifo"
+
+# Execute the command and pipe its output through awk to ensure line-buffering
+{
+ log_debug "Executing: $executable $command_args"
+ "$temp_file" 2>&1 | awk '{ print; fflush() }' > "$fifo"
+} &
+
+# Stream the output from the FIFO as an HTTP response
+{
+ echo -e "HTTP/1.1 200 OK\r"
+ echo -e "Content-Type: text/plain\r"
+ echo -e "\r"
+ cat "$fifo"
+}
\ No newline at end of file
diff --git a/libs/lumier/src/config/constants.sh b/libs/lumier/src/config/constants.sh
new file mode 100644
index 00000000..766c4373
--- /dev/null
+++ b/libs/lumier/src/config/constants.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+# Port configuration
+TUNNEL_PORT=8080
+VNC_PORT=8006
+
+# Host configuration
+TUNNEL_HOST="host.docker.internal"
+
+# Default VM configuration
+DEFAULT_RAM_SIZE="8192"
+DEFAULT_CPU_CORES="4"
+DEFAULT_DISK_SIZE="100"
+DEFAULT_VM_NAME="lumier"
+DEFAULT_VM_VERSION="ghcr.io/trycua/macos-sequoia-vanilla:latest"
+
+# Paths
+NOVNC_PATH="/opt/noVNC"
+LIFECYCLE_HOOKS_DIR="/run/hooks"
+
+# VM connection details
+HOST_USER="lume"
+HOST_PASSWORD="lume"
+SSH_RETRY_ATTEMPTS=20
+SSH_RETRY_INTERVAL=5
\ No newline at end of file
diff --git a/libs/lumier/src/hooks/on-logon.sh b/libs/lumier/src/hooks/on-logon.sh
new file mode 100755
index 00000000..faa817c0
--- /dev/null
+++ b/libs/lumier/src/hooks/on-logon.sh
@@ -0,0 +1,8 @@
+setup_script="$DATA_FOLDER_PATH/setup.sh"
+
+if [ -f "$setup_script" ]; then
+ chmod +x "$setup_script"
+ source "$setup_script"
+else
+ echo "Setup script not found at: $setup_script"
+fi
\ No newline at end of file
diff --git a/libs/lumier/src/lib/utils.sh b/libs/lumier/src/lib/utils.sh
new file mode 100755
index 00000000..7d599669
--- /dev/null
+++ b/libs/lumier/src/lib/utils.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+
+# Function to wait for SSH to become available
+wait_for_ssh() {
+ local host_ip=$1
+ local user=$2
+ local password=$3
+ local retry_interval=${4:-5} # Default retry interval is 5 seconds
+ local max_retries=${5:-20} # Default maximum retries is 20 (0 for infinite)
+
+ echo "Waiting for SSH to become available on $host_ip..."
+
+ local retry_count=0
+ while true; do
+ # Try to connect via SSH
+ sshpass -p "$password" ssh -o StrictHostKeyChecking=no "$user@$host_ip" "exit"
+
+ # Check the exit status of the SSH command
+ if [ $? -eq 0 ]; then
+ echo "SSH is ready on $host_ip!"
+ return 0
+ fi
+
+ # Increment retry count
+ ((retry_count++))
+
+ # Exit if maximum retries are reached
+ if [ $max_retries -ne 0 ] && [ $retry_count -ge $max_retries ]; then
+ echo "Maximum retries reached. SSH is not available."
+ return 1
+ fi
+
+ echo "SSH not ready. Retrying in $retry_interval seconds... (Attempt $retry_count)"
+ sleep $retry_interval
+ done
+}
+
+# Function to execute a script on a remote server using sshpass
+execute_remote_script() {
+ local host="$1"
+ local user="$2"
+ local password="$3"
+ local script_path="$4"
+ local vnc_password="$5"
+ local data_folder="$6"
+
+ # Check if all required arguments are provided
+ if [ -z "$host" ] || [ -z "$user" ] || [ -z "$password" ] || [ -z "$script_path" ] || [ -z "$vnc_password" ]; then
+ echo "Usage: execute_remote_script [data_folder]"
+ return 1
+ fi
+
+ echo "VNC password exported to VM: $vnc_password"
+
+ data_folder_path="$VM_SHARED_FILES_PATH/$data_folder"
+ echo "Data folder path in VM: $data_folder_path"
+
+ # Read the script content and prepend the shebang
+ script_content="#!/usr/bin/env bash\n"
+ if [ -n "$data_folder" ]; then
+ script_content+="export VNC_PASSWORD='$vnc_password'\n"
+ script_content+="export DATA_FOLDER_PATH='$data_folder_path'\n"
+ fi
+ script_content+="$(<"$script_path")"
+
+ # Use a here-document to send the script content
+ sshpass -p "$password" ssh -o StrictHostKeyChecking=no "$user@$host" "bash -s" <&1)
+
+ # Check if VM not found error
+ if [[ $VM_INFO == *"Virtual machine not found"* ]]; then
+ IMAGE_NAME="${VERSION##*/}"
+ lume pull "$IMAGE_NAME" "$VM_NAME" --storage "$STORAGE_PATH"
+ else
+ # Parse the JSON status - check if it contains "status" : "running"
+ if [[ $VM_INFO == *'"status" : "running"'* ]]; then
+ lume_stop "$VM_NAME" "$STORAGE_PATH"
+ # lume stop "$VM_NAME" --storage "$STORAGE_PATH"
+ fi
+ fi
+
+ # Set VM parameters
+ lume set "$VM_NAME" --cpu "$CPU_CORES" --memory "${RAM_SIZE}MB" --display "$DISPLAY" --storage "$STORAGE_PATH"
+
+ # Fetch VM configuration
+ CONFIG_JSON=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json)
+
+ # Setup data directory args if necessary
+ SHARED_DIR_ARGS=""
+ if [ -d "/data" ]; then
+ if [ -n "$HOST_DATA_PATH" ]; then
+ SHARED_DIR_ARGS="--shared-dir=$HOST_DATA_PATH"
+ else
+ echo "Warning: /data volume exists but HOST_DATA_PATH is not set. Cannot mount volume."
+ fi
+ fi
+
+ # Run VM with VNC and shared directory using curl
+ lume_run $SHARED_DIR_ARGS --storage "$STORAGE_PATH" "$VM_NAME" &
+ # lume run "$VM_NAME" --storage "$STORAGE_PATH" --no-display
+
+ # Wait for VM to be running and VNC URL to be available
+ vm_ip=""
+ vnc_url=""
+ max_attempts=30
+ attempt=0
+
+ while [ $attempt -lt $max_attempts ]; do
+ # Get VM info as JSON
+ VM_INFO=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json 2>/dev/null)
+
+ # Check if VM has status 'running'
+ if [[ $VM_INFO == *'"status" : "running"'* ]]; then
+ # Extract IP address using the existing function from utils.sh
+ vm_ip=$(extract_json_field "ipAddress" "$VM_INFO")
+ # Extract VNC URL using the existing function from utils.sh
+ vnc_url=$(extract_json_field "vncUrl" "$VM_INFO")
+
+ # If we have both IP and VNC URL, break the loop
+ if [ -n "$vm_ip" ] && [ -n "$vnc_url" ]; then
+ break
+ fi
+ fi
+
+ sleep 2
+ attempt=$((attempt + 1))
+ done
+
+ if [ -z "$vm_ip" ] || [ -z "$vnc_url" ]; then
+ echo "Timed out waiting for VM to start or VNC URL to become available."
+ lume_stop "$VM_NAME" "$STORAGE_PATH" > /dev/null 2>&1
+ # lume stop "$VM_NAME" --storage "$STORAGE_PATH" > /dev/null 2>&1
+ exit 1
+ fi
+
+
+ # Parse VNC URL to extract password and port
+ VNC_PASSWORD=$(echo "$vnc_url" | sed -n 's/.*:\(.*\)@.*/\1/p')
+ VNC_PORT=$(echo "$vnc_url" | sed -n 's/.*:\([0-9]\+\)$/\1/p')
+
+ # Wait for SSH to become available
+ wait_for_ssh "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" 5 20
+
+ # Export VNC variables for entry.sh to use
+ export VNC_PORT
+ export VNC_PASSWORD
+
+ # Execute on-logon.sh if present
+ on_logon_script="/run/lifecycle/on-logon.sh"
+ if [ -f "$on_logon_script" ]; then
+ execute_remote_script "$vm_ip" "$HOST_USER" "$HOST_PASSWORD" "$on_logon_script" "$VNC_PASSWORD" "$DATA_FOLDER"
+ fi
+
+ # The VM is still running because we never killed lume run.
+ # If you want to stop the VM at some point, you can kill $LUME_PID or use lume_stop.
+}
+
+stop_vm() {
+ echo "Stopping VM '$VM_NAME'..."
+ STORAGE_PATH="$HOST_STORAGE_PATH"
+ if [ -z "$STORAGE_PATH" ]; then
+ STORAGE_PATH="storage_${VM_NAME}"
+ fi
+ # Check if the VM exists and is running (use lume get for speed)
+ VM_INFO=$(lume get "$VM_NAME" --storage "$STORAGE_PATH" -f json 2>/dev/null)
+ if [[ -z "$VM_INFO" || $VM_INFO == *"Virtual machine not found"* ]]; then
+ echo "VM '$VM_NAME' does not exist."
+ elif [[ $VM_INFO == *'"status" : "running"'* ]]; then
+ lume_stop "$VM_NAME" "$STORAGE_PATH"
+ echo "VM '$VM_NAME' was running and is now stopped."
+ elif [[ $VM_INFO == *'"status" : "stopped"'* ]]; then
+ echo "VM '$VM_NAME' is already stopped."
+ else
+ echo "Unknown VM status for '$VM_NAME'."
+ fi
+}
+
+is_vm_running() {
+ lume ls | grep -q "$VM_NAME"
+}
+
+# Stop VM with storage location specified using curl
+lume_stop() {
+ local vm_name="$1"
+ local storage="$2"
+ curl --connect-timeout 6000 \
+ --max-time 5000 \
+ -X POST \
+ -H "Content-Type: application/json" \
+ -d '{"storage":"'$storage'"}' \
+ "http://host.docker.internal:3000/lume/vms/${vm_name}/stop"
+}
+
+# Run VM with VNC client started and shared directory using curl
+lume_run() {
+ # Parse args
+ local shared_dir=""
+ local storage="ssd"
+ local vm_name="lume_vm"
+ local no_display=true
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ --shared-dir=*)
+ shared_dir="${1#*=}"
+ shift
+ ;;
+ --storage)
+ storage="$2"
+ shift 2
+ ;;
+ --no-display)
+ no_display=true
+ shift
+ ;;
+ *)
+ # Assume last arg is VM name if not an option
+ vm_name="$1"
+ shift
+ ;;
+ esac
+ done
+
+ # Default to ~/Projects if not provided
+ if [[ -z "$shared_dir" ]]; then
+ shared_dir="~/Projects"
+ fi
+
+ local json_body="{\"noDisplay\": true, \"sharedDirectories\": [{\"hostPath\": \"$shared_dir\", \"readOnly\": false}], \"storage\": \"$storage\", \"recoveryMode\": false}"
+ local curl_cmd="curl --connect-timeout 6000 \
+ --max-time 5000 \
+ -X POST \
+ -H 'Content-Type: application/json' \
+ -d '$json_body' \
+ http://host.docker.internal:3000/lume/vms/$vm_name/run"
+ echo "[lume_run] Running:"
+ echo "$curl_cmd"
+ eval "$curl_cmd"
+}
\ No newline at end of file
diff --git a/libs/mcp-server/README.md b/libs/mcp-server/README.md
index a4307616..736ab364 100644
--- a/libs/mcp-server/README.md
+++ b/libs/mcp-server/README.md
@@ -68,13 +68,51 @@ You can then use the script in your MCP configuration like this:
"CUA_AGENT_LOOP": "OMNI",
"CUA_MODEL_PROVIDER": "ANTHROPIC",
"CUA_MODEL_NAME": "claude-3-7-sonnet-20250219",
- "ANTHROPIC_API_KEY": "your-api-key"
+ "CUA_PROVIDER_API_KEY": "your-api-key"
}
}
}
}
```
+## Development Guide
+
+If you want to develop with the cua-mcp-server directly without installation, you can use this configuration:
+
+```json
+{
+ "mcpServers": {
+ "cua-agent": {
+ "command": "/bin/bash",
+ "args": ["~/cua/libs/mcp-server/scripts/start_mcp_server.sh"],
+ "env": {
+ "CUA_AGENT_LOOP": "UITARS",
+ "CUA_MODEL_PROVIDER": "OAICOMPAT",
+ "CUA_MODEL_NAME": "ByteDance-Seed/UI-TARS-1.5-7B",
+ "CUA_PROVIDER_BASE_URL": "https://****************.us-east-1.aws.endpoints.huggingface.cloud/v1",
+ "CUA_PROVIDER_API_KEY": "your-api-key"
+ }
+ }
+ }
+}
+```
+
+This configuration:
+- Uses the start_mcp_server.sh script which automatically sets up the Python path and runs the server module
+- Works with Claude Desktop, Cursor, or any other MCP client
+- Automatically uses your development code without requiring installation
+
+Just add this to your MCP client's configuration and it will use your local development version of the server.
+
+### Troubleshooting
+
+If you get a `/bin/bash: ~/cua/libs/mcp-server/scripts/start_mcp_server.sh: No such file or directory` error, try changing the path to the script to be absolute instead of relative.
+
+To see the logs:
+```
+tail -n 20 -f ~/Library/Logs/Claude/mcp*.log
+```
+
## Claude Desktop Integration
To use with Claude Desktop, add an entry to your Claude Desktop configuration (`claude_desktop_config.json`, typically found in `~/.config/claude-desktop/`):
diff --git a/libs/mcp-server/mcp_server/server.py b/libs/mcp-server/mcp_server/server.py
index f6692f9a..03971cb6 100644
--- a/libs/mcp-server/mcp_server/server.py
+++ b/libs/mcp-server/mcp_server/server.py
@@ -1,9 +1,10 @@
import asyncio
+import base64
import logging
import os
import sys
import traceback
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union, Tuple
# Configure logging to output to stderr for debug visibility
logging.basicConfig(
@@ -17,7 +18,7 @@ logger = logging.getLogger("mcp-server")
logger.debug("MCP Server module loading...")
try:
- from mcp.server.fastmcp import Context, FastMCP
+ from mcp.server.fastmcp import Context, FastMCP, Image
logger.debug("Successfully imported FastMCP")
except ImportError as e:
@@ -49,16 +50,37 @@ def serve() -> FastMCP:
server = FastMCP("cua-agent")
@server.tool()
- async def run_cua_task(ctx: Context, task: str) -> str:
+ async def screenshot_cua(ctx: Context) -> Image:
"""
- Run a Computer-Use Agent (CUA) task and return the results.
+ Take a screenshot of the current MacOS VM screen and return the image. Use this before running a CUA task to get a snapshot of the current state.
+
+ Args:
+ ctx: The MCP context
+
+ Returns:
+ An image resource containing the screenshot
+ """
+ global global_computer
+ if global_computer is None:
+ global_computer = Computer(verbosity=logging.INFO)
+ await global_computer.run()
+ screenshot = await global_computer.interface.screenshot()
+ return Image(
+ format="png",
+ data=screenshot
+ )
+
+ @server.tool()
+ async def run_cua_task(ctx: Context, task: str) -> Tuple[str, Image]:
+ """
+ Run a Computer-Use Agent (CUA) task in a MacOS VM and return the results.
Args:
ctx: The MCP context
task: The instruction or task for the agent to perform
Returns:
- A string containing the agent's response
+ A tuple containing the agent's response and the final screenshot
"""
global global_computer
@@ -72,12 +94,7 @@ def serve() -> FastMCP:
# Determine which loop to use
loop_str = os.getenv("CUA_AGENT_LOOP", "OMNI")
- if loop_str == "OPENAI":
- loop = AgentLoop.OPENAI
- elif loop_str == "ANTHROPIC":
- loop = AgentLoop.ANTHROPIC
- else:
- loop = AgentLoop.OMNI
+ loop = getattr(AgentLoop, loop_str)
# Determine provider
provider_str = os.getenv("CUA_MODEL_PROVIDER", "ANTHROPIC")
@@ -89,6 +106,9 @@ def serve() -> FastMCP:
# Get base URL for provider (if needed)
provider_base_url = os.getenv("CUA_PROVIDER_BASE_URL", None)
+ # Get api key for provider (if needed)
+ api_key = os.getenv("CUA_PROVIDER_API_KEY", None)
+
# Create agent with the specified configuration
agent = ComputerAgent(
computer=global_computer,
@@ -98,6 +118,7 @@ def serve() -> FastMCP:
name=model_name,
provider_base_url=provider_base_url,
),
+ api_key=api_key,
save_trajectory=False,
only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")),
verbosity=logging.INFO,
@@ -107,33 +128,34 @@ def serve() -> FastMCP:
full_result = ""
async for result in agent.run(task):
logger.info(f"Agent step complete: {result.get('id', 'unknown')}")
+ ctx.info(f"Agent step complete: {result.get('id', 'unknown')}")
# Add response ID to output
full_result += f"\n[Response ID: {result.get('id', 'unknown')}]\n"
-
- # Extract and concatenate text responses
- if "text" in result:
- # Handle both string and dict responses
- text_response = result.get("text", "")
- if isinstance(text_response, str):
- full_result += f"Response: {text_response}\n"
- else:
- # If it's a dict or other structure, convert to string representation
- full_result += f"Response: {str(text_response)}\n"
-
- # Log detailed information
- if "tools" in result:
- tools_info = result.get("tools")
- logger.debug(f"Tools used: {tools_info}")
- full_result += f"\nTools used: {tools_info}\n"
+
+ if "content" in result:
+ full_result += f"Response: {result.get('content', '')}\n"
# Process output if available
outputs = result.get("output", [])
for output in outputs:
output_type = output.get("type")
- if output_type == "reasoning":
+ if output_type == "message":
+ logger.debug(f"Message: {output}")
+ content = output.get("content", [])
+ for content_part in content:
+ if content_part.get("text"):
+ full_result += f"\nMessage: {content_part.get('text', '')}\n"
+ elif output_type == "reasoning":
logger.debug(f"Reasoning: {output}")
- full_result += f"\nReasoning: {output.get('content', '')}\n"
+
+ summary_content = output.get("summary", [])
+ if summary_content:
+ for summary_part in summary_content:
+ if summary_part.get("text"):
+ full_result += f"\nReasoning: {summary_part.get('text', '')}\n"
+ else:
+ full_result += f"\nReasoning: {output.get('text', output.get('content', ''))}\n"
elif output_type == "computer_call":
logger.debug(f"Computer call: {output}")
action = output.get("action", "")
@@ -144,17 +166,25 @@ def serve() -> FastMCP:
full_result += "\n" + "-" * 40 + "\n"
logger.info(f"CUA task completed successfully")
- return full_result or "Task completed with no text output."
+ ctx.info(f"CUA task completed successfully")
+ return (
+ full_result or "Task completed with no text output.",
+ Image(
+ format="png",
+ data=await global_computer.interface.screenshot()
+ )
+ )
except Exception as e:
error_msg = f"Error running CUA task: {str(e)}\n{traceback.format_exc()}"
logger.error(error_msg)
+ ctx.error(error_msg)
return f"Error during task execution: {str(e)}"
@server.tool()
- async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> str:
+ async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> List:
"""
- Run multiple CUA tasks in sequence and return the combined results.
+ Run multiple CUA tasks in a MacOS VM in sequence and return the combined results.
Args:
ctx: The MCP context
@@ -164,13 +194,15 @@ def serve() -> FastMCP:
Combined results from all tasks
"""
results = []
-
for i, task in enumerate(tasks):
logger.info(f"Running task {i+1}/{len(tasks)}: {task}")
- result = await run_cua_task(ctx, task)
- results.append(f"Task {i+1}: {task}\nResult: {result}\n")
-
- return "\n".join(results)
+ ctx.info(f"Running task {i+1}/{len(tasks)}: {task}")
+
+ ctx.report_progress(i / len(tasks))
+ results.extend(await run_cua_task(ctx, task))
+ ctx.report_progress((i + 1) / len(tasks))
+
+ return results
return server
diff --git a/libs/mcp-server/scripts/start_mcp_server.sh b/libs/mcp-server/scripts/start_mcp_server.sh
new file mode 100755
index 00000000..17fd9dab
--- /dev/null
+++ b/libs/mcp-server/scripts/start_mcp_server.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+set -e
+
+# Set the CUA repository path based on script location
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../.." &> /dev/null && pwd )"
+PYTHON_PATH="${CUA_REPO_DIR}/.venv/bin/python"
+
+# Set Python path to include all necessary libraries
+export PYTHONPATH="${CUA_REPO_DIR}/libs/mcp-server:${CUA_REPO_DIR}/libs/agent:${CUA_REPO_DIR}/libs/computer:${CUA_REPO_DIR}/libs/core:${CUA_REPO_DIR}/libs/pylume"
+
+# Run the MCP server directly as a module
+$PYTHON_PATH -m mcp_server.server
\ No newline at end of file
diff --git a/notebooks/blog/build-your-own-operator-on-macos-1.ipynb b/notebooks/blog/build-your-own-operator-on-macos-1.ipynb
index 22db332d..70c0e6ea 100644
--- a/notebooks/blog/build-your-own-operator-on-macos-1.ipynb
+++ b/notebooks/blog/build-your-own-operator-on-macos-1.ipynb
@@ -145,9 +145,8 @@
" await computer.interface.press_key(key)\n",
" \n",
" elif action_type == \"wait\":\n",
- " wait_time = action.time\n",
- " print(f\"Waiting for {wait_time} seconds\")\n",
- " await asyncio.sleep(wait_time)\n",
+ " print(f\"Waiting for 2 seconds\")\n",
+ " await asyncio.sleep(2)\n",
" \n",
" elif action_type == \"screenshot\":\n",
" print(\"Taking screenshot\")\n",