Fix MCP server startup and tool schema issues in server.py and start_mcp_server.sh

Changes - Refactored `server.py`: - Delayed server instantiation to avoid double-import issues and RuntimeWarning. - Reworked `run_cua_task` and `run_multi_cua_tasks` to yield structured output (text + screenshots) in a Pydantic-compatible format. - Added helper functions for normalising message content, extracting text, and serialising tool outputs. - Improved logging and error handling for clearer debugging. - Updated `start_mcp_server.sh`: - Ensures Python path points to repo-local `.venv`. - Sets `PYTHONPATH` correctly for all `libs/python/*` packages. - Added clear debug output for python binary, repo dir, and PYTHONPATH. Why - Fixed `PydanticSchemaGenerationError` when returning `Image` objects from tool functions. - Prevented `FastMCP.__init__()` errors by aligning with supported arguments. - Removed duplicate module loading at startup that triggered RuntimeWarnings. - Improved clarity of server startup logs for easier integration with Claude Desktop. Outcome The MCP server now starts cleanly, registers all tools, and stays connected with Claude Desktop without schema errors or double-import issues.
2026-01-04 12:30:08 -06:00 · 2025-09-30 18:52:04 -04:00
parent ee615390e9
commit 37864faef2
2 changed files with 66 additions and 75 deletions
--- a/libs/python/mcp-server/mcp_server/server.py
+++ b/libs/python/mcp-server/mcp_server/server.py
@@ -20,7 +20,9 @@ logger = logging.getLogger("mcp-server")
 logger.debug("MCP Server module loading...")

 try:
-    from mcp.server.fastmcp import Context, FastMCP, Image
+    from mcp.server.fastmcp import Context, FastMCP
+    # Use the canonical Image type
+    from mcp.server.fastmcp.utilities.types import Image

    logger.debug("Successfully imported FastMCP")
 except ImportError as e:
@@ -47,34 +49,25 @@ def get_env_bool(key: str, default: bool = False) -> bool:

 async def _maybe_call_ctx_method(ctx: Context, method_name: str, *args, **kwargs) -> None:
    """Call a context helper if it exists, awaiting the result when necessary."""
-
    method = getattr(ctx, method_name, None)
    if not callable(method):
        return
-
    result = method(*args, **kwargs)
    if inspect.isawaitable(result):
        await result

-
 def _normalise_message_content(content: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
    """Normalise message content to a list of structured parts."""
-
    if isinstance(content, list):
        return content
-
    if content is None:
        return []
-
    return [{"type": "output_text", "text": str(content)}]

-
 def _extract_text_from_content(content: Union[str, List[Dict[str, Any]]]) -> str:
    """Extract textual content for inclusion in the aggregated result string."""
-
    if isinstance(content, str):
        return content
-
    texts: List[str] = []
    for part in content or []:
        if not isinstance(part, dict):
@@ -83,10 +76,8 @@ def _extract_text_from_content(content: Union[str, List[Dict[str, Any]]]) -> str
            texts.append(str(part["text"]))
    return "\n".join(texts)

-
 def _serialise_tool_content(content: Any) -> str:
    """Convert tool outputs into a string for aggregation."""
-
    if isinstance(content, str):
        return content
    if isinstance(content, list):
@@ -100,47 +91,30 @@ def _serialise_tool_content(content: Any) -> str:
        return ""
    return str(content)

-
-
 def serve() -> FastMCP:
    """Create and configure the MCP server."""
-    server = FastMCP("cua-agent")
+    # NOTE: Do not pass model_config here; FastMCP 2.12.x doesn't support it.
+    server = FastMCP(name="cua-agent")

-    @server.tool()
-    async def screenshot_cua(ctx: Context) -> Image:
+    @server.tool(structured_output=False)
+    async def screenshot_cua(ctx: Context) -> Any:
        """
-        Take a screenshot of the current MacOS VM screen and return the image. Use this before running a CUA task to get a snapshot of the current state.
-
-        Args:
-            ctx: The MCP context
-
-        Returns:
-            An image resource containing the screenshot
+        Take a screenshot of the current MacOS VM screen and return the image.
        """
        global global_computer
        if global_computer is None:
            global_computer = Computer(verbosity=logging.INFO)
            await global_computer.run()
        screenshot = await global_computer.interface.screenshot()
-        return Image(
-            format="png",
-            data=screenshot
-        )
+        # Returning Image object is fine when structured_output=False
+        return Image(format="png", data=screenshot)

-    @server.tool()
-    async def run_cua_task(ctx: Context, task: str) -> Tuple[str, Image]:
+    @server.tool(structured_output=False)
+    async def run_cua_task(ctx: Context, task: str) -> Any:
        """
-        Run a Computer-Use Agent (CUA) task in a MacOS VM and return the results.
-
-        Args:
-            ctx: The MCP context
-            task: The instruction or task for the agent to perform
-
-        Returns:
-            A tuple containing the agent's response and the final screenshot
+        Run a Computer-Use Agent (CUA) task in a MacOS VM and return (combined text, final screenshot).
        """
        global global_computer
-
        try:
            logger.info(f"Starting CUA task: {task}")

@@ -149,9 +123,8 @@ def serve() -> FastMCP:
                global_computer = Computer(verbosity=logging.INFO)
                await global_computer.run()

-            # Get model name - this now determines the loop and provider
+            # Get model name
            model_name = os.getenv("CUA_MODEL_NAME", "anthropic/claude-3-5-sonnet-20241022")
-            
            logger.info(f"Using model: {model_name}")

            # Create agent with the new v0.4.x API
@@ -159,23 +132,21 @@ def serve() -> FastMCP:
                model=model_name,
                only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")),
                verbosity=logging.INFO,
-                tools=[global_computer]
+                tools=[global_computer],
            )

-            # Create messages in the new v0.4.x format
            messages = [{"role": "user", "content": task}]
-            
+
            # Collect all results
            aggregated_messages: List[str] = []
            async for result in agent.run(messages):
-                logger.info(f"Agent processing step")
-                ctx.info(f"Agent processing step")
+                logger.info("Agent processing step")
+                ctx.info("Agent processing step")

-                # Process output if available
                outputs = result.get("output", [])
                for output in outputs:
                    output_type = output.get("type")
-                    
+
                    if output_type == "message":
                        logger.debug("Streaming assistant message: %s", output)
                        content = _normalise_message_content(output.get("content"))
@@ -224,7 +195,7 @@ def serve() -> FastMCP:

            screenshot_image = Image(
                format="png",
-                data=await global_computer.interface.screenshot()
+                data=await global_computer.interface.screenshot(),
            )

            return (
@@ -242,27 +213,20 @@ def serve() -> FastMCP:
                    screenshot = await global_computer.interface.screenshot()
                    return (
                        f"Error during task execution: {str(e)}",
-                        Image(format="png", data=screenshot)
+                        Image(format="png", data=screenshot),
                    )
-            except:
+            except Exception:
                pass
            # If we can't get a screenshot, return a placeholder
            return (
                f"Error during task execution: {str(e)}",
-                Image(format="png", data=b"")
+                Image(format="png", data=b""),
            )

-    @server.tool()
-    async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> List[Tuple[str, Image]]:
+    @server.tool(structured_output=False)
+    async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> Any:
        """
-        Run multiple CUA tasks in a MacOS VM in sequence and return the combined results.
-
-        Args:
-            ctx: The MCP context
-            tasks: List of tasks to run in sequence
-
-        Returns:
-            Combined results from all tasks
+        Run multiple CUA tasks in sequence and return a list of (combined text, screenshot).
        """
        total_tasks = len(tasks)
        if total_tasks == 0:
@@ -278,7 +242,7 @@ def serve() -> FastMCP:
            task_result = await run_cua_task(ctx, task)
            results.append(task_result)
            ctx.report_progress((i + 1) / total_tasks)
-            
+
        return results

    return server
@@ -286,7 +250,6 @@ def serve() -> FastMCP:

 server = serve()

-
 def main():
    """Run the MCP server."""
    try:
@@ -297,6 +260,5 @@ def main():
        traceback.print_exc(file=sys.stderr)
        sys.exit(1)

-
 if __name__ == "__main__":
    main()
--- a/libs/python/mcp-server/scripts/start_mcp_server.sh
+++ b/libs/python/mcp-server/scripts/start_mcp_server.sh
@@ -1,14 +1,43 @@
-#!/bin/bash
+#!/usr/bin/env bash
+set -Eeuo pipefail

-set -e
-
-# Set the CUA repository path based on script location
+# --- Resolve repo root from this script's location ---
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
-CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../.." &> /dev/null && pwd )"
-PYTHON_PATH="${CUA_REPO_DIR}/.venv/bin/python"
+CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../../.." &> /dev/null && pwd )"

-# Set Python path to include all necessary libraries
-export PYTHONPATH="${CUA_REPO_DIR}/libs/python/mcp-server:${CUA_REPO_DIR}/libs/python/agent:${CUA_REPO_DIR}/libs/python/computer:${CUA_REPO_DIR}/libs/python/core:${CUA_REPO_DIR}/libs/python/pylume"
+# --- Choose a Python interpreter (prefer repo-root venv) ---
+CANDIDATES=(
+  "$CUA_REPO_DIR/.venv/bin/python"
+  "$CUA_REPO_DIR/libs/.venv/bin/python"
+  "$(command -v python3 || true)"
+  "$(command -v python || true)"
+)

-# Run the MCP server directly as a module
-$PYTHON_PATH -m mcp_server.server
+PYTHON_PATH=""
+for p in "${CANDIDATES[@]}"; do
+  if [[ -n "$p" && -x "$p" ]]; then
+    PYTHON_PATH="$p"
+    break
+  fi
+done
+
+if [[ -z "${PYTHON_PATH}" ]]; then
+  >&2 echo "[cua-mcp] ERROR: No suitable Python found. Tried:"
+  for p in "${CANDIDATES[@]}"; do >&2 echo "  - $p"; done
+  >&2 echo "[cua-mcp] Tip: create venv:  python3 -m venv $CUA_REPO_DIR/.venv && \"$CUA_REPO_DIR/.venv/bin/pip\" install -e \"$CUA_REPO_DIR/libs/python/mcp-server\""
+  exit 127
+fi
+
+# --- Export PYTHONPATH so module imports work during dev ---
+export PYTHONPATH="$CUA_REPO_DIR/libs/python/mcp-server:$CUA_REPO_DIR/libs/python/agent:$CUA_REPO_DIR/libs/python/computer:$CUA_REPO_DIR/libs/python/core:$CUA_REPO_DIR/libs/python/pylume"
+
+# --- Helpful startup log for Claude's mcp.log ---
+>&2 echo "[cua-mcp] using python: $PYTHON_PATH"
+>&2 echo "[cua-mcp] repo dir    : $CUA_REPO_DIR"
+>&2 echo "[cua-mcp] PYTHONPATH  : $PYTHONPATH"
+if [[ -n "${CUA_MODEL_NAME:-}" ]]; then
+  >&2 echo "[cua-mcp] CUA_MODEL_NAME=$CUA_MODEL_NAME"
+fi
+
+# --- Run the MCP server module ---
+exec "$PYTHON_PATH" -m mcp_server.server