Fix MCP server startup and tool schema issues in server.py and start_mcp_server.sh

Changes
- Refactored `server.py`:
  - Delayed server instantiation to avoid double-import issues and RuntimeWarning.
  - Reworked `run_cua_task` and `run_multi_cua_tasks` to yield structured output
    (text + screenshots) in a Pydantic-compatible format.
  - Added helper functions for normalising message content, extracting text,
    and serialising tool outputs.
  - Improved logging and error handling for clearer debugging.

- Updated `start_mcp_server.sh`:
  - Ensures Python path points to repo-local `.venv`.
  - Sets `PYTHONPATH` correctly for all `libs/python/*` packages.
  - Added clear debug output for python binary, repo dir, and PYTHONPATH.

Why
- Fixed `PydanticSchemaGenerationError` when returning `Image` objects from
  tool functions.
- Prevented `FastMCP.__init__()` errors by aligning with supported arguments.
- Removed duplicate module loading at startup that triggered RuntimeWarnings.
- Improved clarity of server startup logs for easier integration with Claude Desktop.

Outcome
The MCP server now starts cleanly, registers all tools, and stays connected
with Claude Desktop without schema errors or double-import issues.
This commit is contained in:
Adam
2025-09-30 18:52:04 -04:00
parent ee615390e9
commit 37864faef2
2 changed files with 66 additions and 75 deletions

View File

@@ -20,7 +20,9 @@ logger = logging.getLogger("mcp-server")
logger.debug("MCP Server module loading...")
try:
from mcp.server.fastmcp import Context, FastMCP, Image
from mcp.server.fastmcp import Context, FastMCP
# Use the canonical Image type
from mcp.server.fastmcp.utilities.types import Image
logger.debug("Successfully imported FastMCP")
except ImportError as e:
@@ -47,34 +49,25 @@ def get_env_bool(key: str, default: bool = False) -> bool:
async def _maybe_call_ctx_method(ctx: Context, method_name: str, *args, **kwargs) -> None:
"""Call a context helper if it exists, awaiting the result when necessary."""
method = getattr(ctx, method_name, None)
if not callable(method):
return
result = method(*args, **kwargs)
if inspect.isawaitable(result):
await result
def _normalise_message_content(content: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
"""Normalise message content to a list of structured parts."""
if isinstance(content, list):
return content
if content is None:
return []
return [{"type": "output_text", "text": str(content)}]
def _extract_text_from_content(content: Union[str, List[Dict[str, Any]]]) -> str:
"""Extract textual content for inclusion in the aggregated result string."""
if isinstance(content, str):
return content
texts: List[str] = []
for part in content or []:
if not isinstance(part, dict):
@@ -83,10 +76,8 @@ def _extract_text_from_content(content: Union[str, List[Dict[str, Any]]]) -> str
texts.append(str(part["text"]))
return "\n".join(texts)
def _serialise_tool_content(content: Any) -> str:
"""Convert tool outputs into a string for aggregation."""
if isinstance(content, str):
return content
if isinstance(content, list):
@@ -100,47 +91,30 @@ def _serialise_tool_content(content: Any) -> str:
return ""
return str(content)
def serve() -> FastMCP:
"""Create and configure the MCP server."""
server = FastMCP("cua-agent")
# NOTE: Do not pass model_config here; FastMCP 2.12.x doesn't support it.
server = FastMCP(name="cua-agent")
@server.tool()
async def screenshot_cua(ctx: Context) -> Image:
@server.tool(structured_output=False)
async def screenshot_cua(ctx: Context) -> Any:
"""
Take a screenshot of the current MacOS VM screen and return the image. Use this before running a CUA task to get a snapshot of the current state.
Args:
ctx: The MCP context
Returns:
An image resource containing the screenshot
Take a screenshot of the current MacOS VM screen and return the image.
"""
global global_computer
if global_computer is None:
global_computer = Computer(verbosity=logging.INFO)
await global_computer.run()
screenshot = await global_computer.interface.screenshot()
return Image(
format="png",
data=screenshot
)
# Returning Image object is fine when structured_output=False
return Image(format="png", data=screenshot)
@server.tool()
async def run_cua_task(ctx: Context, task: str) -> Tuple[str, Image]:
@server.tool(structured_output=False)
async def run_cua_task(ctx: Context, task: str) -> Any:
"""
Run a Computer-Use Agent (CUA) task in a MacOS VM and return the results.
Args:
ctx: The MCP context
task: The instruction or task for the agent to perform
Returns:
A tuple containing the agent's response and the final screenshot
Run a Computer-Use Agent (CUA) task in a MacOS VM and return (combined text, final screenshot).
"""
global global_computer
try:
logger.info(f"Starting CUA task: {task}")
@@ -149,9 +123,8 @@ def serve() -> FastMCP:
global_computer = Computer(verbosity=logging.INFO)
await global_computer.run()
# Get model name - this now determines the loop and provider
# Get model name
model_name = os.getenv("CUA_MODEL_NAME", "anthropic/claude-3-5-sonnet-20241022")
logger.info(f"Using model: {model_name}")
# Create agent with the new v0.4.x API
@@ -159,23 +132,21 @@ def serve() -> FastMCP:
model=model_name,
only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")),
verbosity=logging.INFO,
tools=[global_computer]
tools=[global_computer],
)
# Create messages in the new v0.4.x format
messages = [{"role": "user", "content": task}]
# Collect all results
aggregated_messages: List[str] = []
async for result in agent.run(messages):
logger.info(f"Agent processing step")
ctx.info(f"Agent processing step")
logger.info("Agent processing step")
ctx.info("Agent processing step")
# Process output if available
outputs = result.get("output", [])
for output in outputs:
output_type = output.get("type")
if output_type == "message":
logger.debug("Streaming assistant message: %s", output)
content = _normalise_message_content(output.get("content"))
@@ -224,7 +195,7 @@ def serve() -> FastMCP:
screenshot_image = Image(
format="png",
data=await global_computer.interface.screenshot()
data=await global_computer.interface.screenshot(),
)
return (
@@ -242,27 +213,20 @@ def serve() -> FastMCP:
screenshot = await global_computer.interface.screenshot()
return (
f"Error during task execution: {str(e)}",
Image(format="png", data=screenshot)
Image(format="png", data=screenshot),
)
except:
except Exception:
pass
# If we can't get a screenshot, return a placeholder
return (
f"Error during task execution: {str(e)}",
Image(format="png", data=b"")
Image(format="png", data=b""),
)
@server.tool()
async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> List[Tuple[str, Image]]:
@server.tool(structured_output=False)
async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> Any:
"""
Run multiple CUA tasks in a MacOS VM in sequence and return the combined results.
Args:
ctx: The MCP context
tasks: List of tasks to run in sequence
Returns:
Combined results from all tasks
Run multiple CUA tasks in sequence and return a list of (combined text, screenshot).
"""
total_tasks = len(tasks)
if total_tasks == 0:
@@ -278,7 +242,7 @@ def serve() -> FastMCP:
task_result = await run_cua_task(ctx, task)
results.append(task_result)
ctx.report_progress((i + 1) / total_tasks)
return results
return server
@@ -286,7 +250,6 @@ def serve() -> FastMCP:
server = serve()
def main():
"""Run the MCP server."""
try:
@@ -297,6 +260,5 @@ def main():
traceback.print_exc(file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -1,14 +1,43 @@
#!/bin/bash
#!/usr/bin/env bash
set -Eeuo pipefail
set -e
# Set the CUA repository path based on script location
# --- Resolve repo root from this script's location ---
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../.." &> /dev/null && pwd )"
PYTHON_PATH="${CUA_REPO_DIR}/.venv/bin/python"
CUA_REPO_DIR="$( cd "$SCRIPT_DIR/../../../.." &> /dev/null && pwd )"
# Set Python path to include all necessary libraries
export PYTHONPATH="${CUA_REPO_DIR}/libs/python/mcp-server:${CUA_REPO_DIR}/libs/python/agent:${CUA_REPO_DIR}/libs/python/computer:${CUA_REPO_DIR}/libs/python/core:${CUA_REPO_DIR}/libs/python/pylume"
# --- Choose a Python interpreter (prefer repo-root venv) ---
CANDIDATES=(
"$CUA_REPO_DIR/.venv/bin/python"
"$CUA_REPO_DIR/libs/.venv/bin/python"
"$(command -v python3 || true)"
"$(command -v python || true)"
)
# Run the MCP server directly as a module
$PYTHON_PATH -m mcp_server.server
PYTHON_PATH=""
for p in "${CANDIDATES[@]}"; do
if [[ -n "$p" && -x "$p" ]]; then
PYTHON_PATH="$p"
break
fi
done
if [[ -z "${PYTHON_PATH}" ]]; then
>&2 echo "[cua-mcp] ERROR: No suitable Python found. Tried:"
for p in "${CANDIDATES[@]}"; do >&2 echo " - $p"; done
>&2 echo "[cua-mcp] Tip: create venv: python3 -m venv $CUA_REPO_DIR/.venv && \"$CUA_REPO_DIR/.venv/bin/pip\" install -e \"$CUA_REPO_DIR/libs/python/mcp-server\""
exit 127
fi
# --- Export PYTHONPATH so module imports work during dev ---
export PYTHONPATH="$CUA_REPO_DIR/libs/python/mcp-server:$CUA_REPO_DIR/libs/python/agent:$CUA_REPO_DIR/libs/python/computer:$CUA_REPO_DIR/libs/python/core:$CUA_REPO_DIR/libs/python/pylume"
# --- Helpful startup log for Claude's mcp.log ---
>&2 echo "[cua-mcp] using python: $PYTHON_PATH"
>&2 echo "[cua-mcp] repo dir : $CUA_REPO_DIR"
>&2 echo "[cua-mcp] PYTHONPATH : $PYTHONPATH"
if [[ -n "${CUA_MODEL_NAME:-}" ]]; then
>&2 echo "[cua-mcp] CUA_MODEL_NAME=$CUA_MODEL_NAME"
fi
# --- Run the MCP server module ---
exec "$PYTHON_PATH" -m mcp_server.server