diff --git a/libs/python/mcp-server/mcp_server/server.py b/libs/python/mcp-server/mcp_server/server.py index 66aaceba..489cf684 100644 --- a/libs/python/mcp-server/mcp_server/server.py +++ b/libs/python/mcp-server/mcp_server/server.py @@ -20,7 +20,9 @@ logger = logging.getLogger("mcp-server") logger.debug("MCP Server module loading...") try: - from mcp.server.fastmcp import Context, FastMCP, Image + from mcp.server.fastmcp import Context, FastMCP + # Use the canonical Image type + from mcp.server.fastmcp.utilities.types import Image logger.debug("Successfully imported FastMCP") except ImportError as e: @@ -47,34 +49,25 @@ def get_env_bool(key: str, default: bool = False) -> bool: async def _maybe_call_ctx_method(ctx: Context, method_name: str, *args, **kwargs) -> None: """Call a context helper if it exists, awaiting the result when necessary.""" - method = getattr(ctx, method_name, None) if not callable(method): return - result = method(*args, **kwargs) if inspect.isawaitable(result): await result - def _normalise_message_content(content: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]: """Normalise message content to a list of structured parts.""" - if isinstance(content, list): return content - if content is None: return [] - return [{"type": "output_text", "text": str(content)}] - def _extract_text_from_content(content: Union[str, List[Dict[str, Any]]]) -> str: """Extract textual content for inclusion in the aggregated result string.""" - if isinstance(content, str): return content - texts: List[str] = [] for part in content or []: if not isinstance(part, dict): @@ -83,10 +76,8 @@ def _extract_text_from_content(content: Union[str, List[Dict[str, Any]]]) -> str texts.append(str(part["text"])) return "\n".join(texts) - def _serialise_tool_content(content: Any) -> str: """Convert tool outputs into a string for aggregation.""" - if isinstance(content, str): return content if isinstance(content, list): @@ -100,47 +91,30 @@ def _serialise_tool_content(content: Any) -> str: return "" return str(content) - - def serve() -> FastMCP: """Create and configure the MCP server.""" - server = FastMCP("cua-agent") + # NOTE: Do not pass model_config here; FastMCP 2.12.x doesn't support it. + server = FastMCP(name="cua-agent") - @server.tool() - async def screenshot_cua(ctx: Context) -> Image: + @server.tool(structured_output=False) + async def screenshot_cua(ctx: Context) -> Any: """ - Take a screenshot of the current MacOS VM screen and return the image. Use this before running a CUA task to get a snapshot of the current state. - - Args: - ctx: The MCP context - - Returns: - An image resource containing the screenshot + Take a screenshot of the current MacOS VM screen and return the image. """ global global_computer if global_computer is None: global_computer = Computer(verbosity=logging.INFO) await global_computer.run() screenshot = await global_computer.interface.screenshot() - return Image( - format="png", - data=screenshot - ) + # Returning Image object is fine when structured_output=False + return Image(format="png", data=screenshot) - @server.tool() - async def run_cua_task(ctx: Context, task: str) -> Tuple[str, Image]: + @server.tool(structured_output=False) + async def run_cua_task(ctx: Context, task: str) -> Any: """ - Run a Computer-Use Agent (CUA) task in a MacOS VM and return the results. - - Args: - ctx: The MCP context - task: The instruction or task for the agent to perform - - Returns: - A tuple containing the agent's response and the final screenshot + Run a Computer-Use Agent (CUA) task in a MacOS VM and return (combined text, final screenshot). """ global global_computer - try: logger.info(f"Starting CUA task: {task}") @@ -149,9 +123,8 @@ def serve() -> FastMCP: global_computer = Computer(verbosity=logging.INFO) await global_computer.run() - # Get model name - this now determines the loop and provider + # Get model name model_name = os.getenv("CUA_MODEL_NAME", "anthropic/claude-3-5-sonnet-20241022") - logger.info(f"Using model: {model_name}") # Create agent with the new v0.4.x API @@ -159,23 +132,21 @@ def serve() -> FastMCP: model=model_name, only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")), verbosity=logging.INFO, - tools=[global_computer] + tools=[global_computer], ) - # Create messages in the new v0.4.x format messages = [{"role": "user", "content": task}] - + # Collect all results aggregated_messages: List[str] = [] async for result in agent.run(messages): - logger.info(f"Agent processing step") - ctx.info(f"Agent processing step") + logger.info("Agent processing step") + ctx.info("Agent processing step") - # Process output if available outputs = result.get("output", []) for output in outputs: output_type = output.get("type") - + if output_type == "message": logger.debug("Streaming assistant message: %s", output) content = _normalise_message_content(output.get("content")) @@ -224,7 +195,7 @@ def serve() -> FastMCP: screenshot_image = Image( format="png", - data=await global_computer.interface.screenshot() + data=await global_computer.interface.screenshot(), ) return ( @@ -242,27 +213,20 @@ def serve() -> FastMCP: screenshot = await global_computer.interface.screenshot() return ( f"Error during task execution: {str(e)}", - Image(format="png", data=screenshot) + Image(format="png", data=screenshot), ) - except: + except Exception: pass # If we can't get a screenshot, return a placeholder return ( f"Error during task execution: {str(e)}", - Image(format="png", data=b"") + Image(format="png", data=b""), ) - @server.tool() - async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> List[Tuple[str, Image]]: + @server.tool(structured_output=False) + async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> Any: """ - Run multiple CUA tasks in a MacOS VM in sequence and return the combined results. - - Args: - ctx: The MCP context - tasks: List of tasks to run in sequence - - Returns: - Combined results from all tasks + Run multiple CUA tasks in sequence and return a list of (combined text, screenshot). """ total_tasks = len(tasks) if total_tasks == 0: @@ -278,7 +242,7 @@ def serve() -> FastMCP: task_result = await run_cua_task(ctx, task) results.append(task_result) ctx.report_progress((i + 1) / total_tasks) - + return results return server @@ -286,7 +250,6 @@ def serve() -> FastMCP: server = serve() - def main(): """Run the MCP server.""" try: @@ -297,6 +260,5 @@ def main(): traceback.print_exc(file=sys.stderr) sys.exit(1) - if __name__ == "__main__": main() diff --git a/libs/python/mcp-server/scripts/start_mcp_server.sh b/libs/python/mcp-server/scripts/start_mcp_server.sh index 13257351..95343ac9 100755 --- a/libs/python/mcp-server/scripts/start_mcp_server.sh +++ b/libs/python/mcp-server/scripts/start_mcp_server.sh @@ -1,14 +1,43 @@ -#!/bin/bash +#!/usr/bin/env bash +set -Eeuo pipefail -set -e - -# Set the CUA repository path based on script location +# --- Resolve repo root from this script's location --- SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../.." &> /dev/null && pwd )" -PYTHON_PATH="${CUA_REPO_DIR}/.venv/bin/python" +CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../../.." &> /dev/null && pwd )" -# Set Python path to include all necessary libraries -export PYTHONPATH="${CUA_REPO_DIR}/libs/python/mcp-server:${CUA_REPO_DIR}/libs/python/agent:${CUA_REPO_DIR}/libs/python/computer:${CUA_REPO_DIR}/libs/python/core:${CUA_REPO_DIR}/libs/python/pylume" +# --- Choose a Python interpreter (prefer repo-root venv) --- +CANDIDATES=( + "$CUA_REPO_DIR/.venv/bin/python" + "$CUA_REPO_DIR/libs/.venv/bin/python" + "$(command -v python3 || true)" + "$(command -v python || true)" +) -# Run the MCP server directly as a module -$PYTHON_PATH -m mcp_server.server \ No newline at end of file +PYTHON_PATH="" +for p in "${CANDIDATES[@]}"; do + if [[ -n "$p" && -x "$p" ]]; then + PYTHON_PATH="$p" + break + fi +done + +if [[ -z "${PYTHON_PATH}" ]]; then + >&2 echo "[cua-mcp] ERROR: No suitable Python found. Tried:" + for p in "${CANDIDATES[@]}"; do >&2 echo " - $p"; done + >&2 echo "[cua-mcp] Tip: create venv: python3 -m venv $CUA_REPO_DIR/.venv && \"$CUA_REPO_DIR/.venv/bin/pip\" install -e \"$CUA_REPO_DIR/libs/python/mcp-server\"" + exit 127 +fi + +# --- Export PYTHONPATH so module imports work during dev --- +export PYTHONPATH="$CUA_REPO_DIR/libs/python/mcp-server:$CUA_REPO_DIR/libs/python/agent:$CUA_REPO_DIR/libs/python/computer:$CUA_REPO_DIR/libs/python/core:$CUA_REPO_DIR/libs/python/pylume" + +# --- Helpful startup log for Claude's mcp.log --- +>&2 echo "[cua-mcp] using python: $PYTHON_PATH" +>&2 echo "[cua-mcp] repo dir : $CUA_REPO_DIR" +>&2 echo "[cua-mcp] PYTHONPATH : $PYTHONPATH" +if [[ -n "${CUA_MODEL_NAME:-}" ]]; then + >&2 echo "[cua-mcp] CUA_MODEL_NAME=$CUA_MODEL_NAME" +fi + +# --- Run the MCP server module --- +exec "$PYTHON_PATH" -m mcp_server.server