hatchet/frontend/docs/pages/api/mcp.ts

/**
 * MCP (Model Context Protocol) server for Hatchet documentation.
 *
 * Implements the Streamable HTTP transport (stateless mode) so that
 * AI editors like Cursor, Claude Code, and Claude Desktop can query
 * Hatchet docs as MCP resources.
 *
 * Endpoint: POST /api/mcp   (JSON-RPC 2.0)
 *           GET  /api/mcp   (returns server metadata)
 */
import type { NextApiRequest, NextApiResponse } from "next";
import fs from "node:fs";
import path from "node:path";
import { PostHog } from "posthog-node";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
interface JsonRpcRequest {
  jsonrpc: "2.0";
  id?: string | number | null;
  method: string;
  params?: Record<string, unknown>;
}

interface JsonRpcResponse {
  jsonrpc: "2.0";
  id: string | number | null;
  result?: unknown;
  error?: { code: number; message: string; data?: unknown };
}

interface McpResource {
  uri: string;
  name: string;
  description: string;
  mimeType: string;
}

interface DocEntry {
  uri: string;
  name: string;
  description: string;
  filePath: string;
}

// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
const PROTOCOL_VERSION = "2024-11-05";
const SERVER_NAME = "hatchet-docs";
const SERVER_VERSION = "1.0.0";

const LLMS_DIR = path.join(process.cwd(), "public", "llms");
const LLMS_TXT_PATH = path.join(process.cwd(), "public", "llms.txt");
const SEARCH_INDEX_PATH = path.join(
  process.cwd(),
  "public",
  "llms-search-index.json",
);

// ---------------------------------------------------------------------------
// PostHog server-side analytics
// ---------------------------------------------------------------------------
let posthogClient: PostHog | null = null;

function getPostHog(): PostHog | null {
  if (posthogClient) return posthogClient;
  const key = process.env.NEXT_PUBLIC_POSTHOG_KEY;
  if (!key) return null;
  posthogClient = new PostHog(key, {
    host: process.env.NEXT_PUBLIC_POSTHOG_HOST || "https://us.i.posthog.com",
    flushAt: 10,
    flushInterval: 5000,
  });
  return posthogClient;
}

function trackMcpEvent(
  req: NextApiRequest,
  method: string,
  properties?: Record<string, unknown>,
): void {
  const ph = getPostHog();
  if (!ph) return;
  // Use a session ID from the MCP client if available, otherwise anonymous
  const sessionId = (req.headers["mcp-session-id"] as string) || "anonymous";
  ph.capture({
    distinctId: `mcp:${sessionId}`,
    event: "mcp_request",
    properties: {
      method,
      user_agent: req.headers["user-agent"] || "",
      ...properties,
    },
  });
}

// ---------------------------------------------------------------------------
// MiniSearch index (pre-generated by scripts/generate-llms.ts)
// ---------------------------------------------------------------------------
import MiniSearch from "minisearch";
import {
  MINISEARCH_OPTIONS,
  SEARCH_OPTIONS,
  rerankResults,
  expandSynonyms,
} from "@/lib/search-config";

interface SearchDoc {
  id: string;
  title: string;
  content: string;
}

let cachedMiniSearch: MiniSearch<SearchDoc> | null = null;

function getSearchIndex(): MiniSearch<SearchDoc> | null {
  if (cachedMiniSearch) return cachedMiniSearch;
  try {
    const raw = fs.readFileSync(SEARCH_INDEX_PATH, "utf-8");
    cachedMiniSearch = MiniSearch.loadJSON<SearchDoc>(raw, MINISEARCH_OPTIONS);
    return cachedMiniSearch;
  } catch {
    return null;
  }
}

// ---------------------------------------------------------------------------
// Build the resource catalogue from public/llms/
// ---------------------------------------------------------------------------
let cachedDocs: DocEntry[] | null = null;

function collectDocs(): DocEntry[] {
  if (cachedDocs) return cachedDocs;

  const entries: DocEntry[] = [];

  // Parse llms.txt to get titles and URLs
  const titleMap = new Map<string, string>();
  if (fs.existsSync(LLMS_TXT_PATH)) {
    const llmsTxt = fs.readFileSync(LLMS_TXT_PATH, "utf-8");
    const linkPattern =
      /- \[([^\]]+)\]\(https:\/\/docs\.hatchet\.run\/([^)]+)\)/g;
    let m: RegExpExecArray | null;
    while ((m = linkPattern.exec(llmsTxt)) !== null) {
      titleMap.set(m[2], m[1]);
    }
  }

  function walk(dir: string, prefix: string): void {
    if (!fs.existsSync(dir)) return;
    const items = fs.readdirSync(dir, { withFileTypes: true });
    for (const item of items) {
      if (item.isDirectory()) {
        walk(
          path.join(dir, item.name),
          prefix ? `${prefix}/${item.name}` : item.name,
        );
      } else if (item.name.endsWith(".md")) {
        const slug = item.name.replace(/\.md$/, "");
        const docPath = prefix ? `${prefix}/${slug}` : slug;

        // Skip duplicates (e.g. home.md vs home/index.md)
        if (
          slug === "index" &&
          entries.some((e) => e.uri === `hatchet://docs/${prefix}`)
        ) {
          continue;
        }
        const lookupKey = slug === "index" ? `${prefix}/index` : docPath;
        const title = titleMap.get(lookupKey) || titleMap.get(docPath) || slug;

        const uri = `hatchet://docs/${docPath}`;
        if (entries.some((e) => e.uri === uri)) continue;

        entries.push({
          uri,
          name: title,
          description: `Hatchet documentation: ${title}`,
          filePath: path.join(dir, item.name),
        });
      }
    }
  }

  walk(LLMS_DIR, "");
  cachedDocs = entries;
  return entries;
}

// ---------------------------------------------------------------------------
// MCP method handlers
// ---------------------------------------------------------------------------
function handleInitialize(id: string | number | null): JsonRpcResponse {
  return {
    jsonrpc: "2.0",
    id,
    result: {
      protocolVersion: PROTOCOL_VERSION,
      capabilities: {
        resources: { listChanged: false },
        tools: {},
      },
      serverInfo: {
        name: SERVER_NAME,
        version: SERVER_VERSION,
      },
    },
  };
}

function handleResourcesList(id: string | number | null): JsonRpcResponse {
  const docs = collectDocs();
  const resources: McpResource[] = docs.map((d) => ({
    uri: d.uri,
    name: d.name,
    description: d.description,
    mimeType: "text/markdown",
  }));

  return {
    jsonrpc: "2.0",
    id,
    result: { resources },
  };
}

function handleResourcesRead(
  id: string | number | null,
  params: Record<string, unknown>,
): JsonRpcResponse {
  const uri = params.uri as string | undefined;
  if (!uri) {
    return {
      jsonrpc: "2.0",
      id,
      error: { code: -32602, message: "Missing required parameter: uri" },
    };
  }

  const docs = collectDocs();
  const doc = docs.find((d) => d.uri === uri);
  if (!doc) {
    return {
      jsonrpc: "2.0",
      id,
      error: { code: -32602, message: `Resource not found: ${uri}` },
    };
  }

  let content = "";
  try {
    content = fs.readFileSync(doc.filePath, "utf-8");
  } catch {
    return {
      jsonrpc: "2.0",
      id,
      error: { code: -32603, message: `Failed to read resource: ${uri}` },
    };
  }

  return {
    jsonrpc: "2.0",
    id,
    result: {
      contents: [
        {
          uri: doc.uri,
          mimeType: "text/markdown",
          text: content,
        },
      ],
    },
  };
}

function handleToolsList(id: string | number | null): JsonRpcResponse {
  return {
    jsonrpc: "2.0",
    id,
    result: {
      tools: [
        {
          name: "search_docs",
          description:
            "Search Hatchet documentation by keyword. Returns matching page titles and URIs.",
          inputSchema: {
            type: "object",
            properties: {
              query: {
                type: "string",
                description:
                  "Search query (keywords to match against page titles and content)",
              },
              max_results: {
                type: "number",
                description:
                  "Maximum number of results to return (default: 10)",
              },
            },
            required: ["query"],
          },
        },
      ],
    },
  };
}

function handleToolsCall(
  id: string | number | null,
  params: Record<string, unknown>,
): JsonRpcResponse {
  const toolName = params.name as string | undefined;
  const args = (params.arguments || {}) as Record<string, unknown>;

  if (toolName === "search_docs") {
    return handleSearchDocs(id, args);
  }

  if (toolName === "get_full_docs") {
    return handleGetFullDocs(id);
  }

  return {
    jsonrpc: "2.0",
    id,
    error: { code: -32602, message: `Unknown tool: ${toolName}` },
  };
}

function handleSearchDocs(
  id: string | number | null,
  args: Record<string, unknown>,
): JsonRpcResponse {
  const query = ((args.query as string) || "").trim();
  const maxResults = (args.max_results as number) || 10;

  if (!query) {
    return {
      jsonrpc: "2.0",
      id,
      error: { code: -32602, message: "Missing required argument: query" },
    };
  }

  const miniSearch = getSearchIndex();
  const docCatalogue = collectDocs();

  // Build a lookup from URI to DocEntry for snippet extraction
  const docByUri = new Map<string, DocEntry>();
  for (const d of docCatalogue) {
    docByUri.set(d.uri, d);
  }

  let results: Array<{ uri: string; title: string; score: number }> = [];

  if (miniSearch) {
    // Expand synonyms so alternate phrasings (e.g. "delay" → "schedule sleep")
    // still surface the right pages, then rerank against the original query.
    const expanded = expandSynonyms(query);
    const searchResults = miniSearch.search(expanded, SEARCH_OPTIONS);
    const reranked = rerankResults(searchResults, query);

    const seenPages = new Set<string>();
    const deduped: typeof reranked = [];
    for (const r of reranked) {
      const pageUri = r.id.replace(/#.*$/, "");
      if (!seenPages.has(pageUri)) {
        seenPages.add(pageUri);
        deduped.push(r);
      }
    }

    results = deduped.slice(0, maxResults).map((r) => ({
      uri: r.id.replace(/#.*$/, ""),
      title: (r.pageTitle as string) || (r.title as string) || r.id,
      score: r.score,
    }));
  } else {
    // Fallback: simple keyword matching if index unavailable
    const keywords = query.toLowerCase().split(/\s+/);
    const scored: Array<{ uri: string; title: string; score: number }> = [];
    for (const doc of docCatalogue) {
      let score = 0;
      const nameLower = doc.name.toLowerCase();
      const uriLower = doc.uri.toLowerCase();
      for (const kw of keywords) {
        if (nameLower.includes(kw)) score += 10;
        if (uriLower.includes(kw)) score += 5;
      }
      if (score > 0) {
        scored.push({ uri: doc.uri, title: doc.name, score });
      }
    }
    scored.sort((a, b) => b.score - a.score);
    results = scored.slice(0, maxResults);
  }

  // Extract snippets from the matching docs
  const formatted = results.map((r, i) => {
    let snippet = "";
    const doc = docByUri.get(r.uri);
    if (doc) {
      try {
        const content = fs.readFileSync(doc.filePath, "utf-8");
        const contentLower = content.toLowerCase();
        const keywords = query.toLowerCase().split(/\s+/);
        const firstKw = keywords.find((kw) => contentLower.includes(kw));
        if (firstKw) {
          const idx = contentLower.indexOf(firstKw);
          const start = Math.max(0, idx - 80);
          const end = Math.min(content.length, idx + firstKw.length + 80);
          snippet =
            (start > 0 ? "..." : "") +
            content.slice(start, end).trim() +
            (end < content.length ? "..." : "");
        }
      } catch {
        // skip snippet on read error
      }
    }
    return `${i + 1}. **${r.title}** (${r.uri})\n   ${snippet}`;
  });

  const text =
    results.length === 0
      ? `No results found for "${query}".`
      : formatted.join("\n\n");

  return {
    jsonrpc: "2.0",
    id,
    result: {
      content: [{ type: "text", text }],
    },
  };
}

function handleGetFullDocs(id: string | number | null): JsonRpcResponse {
  const fullDocsPath = path.join(process.cwd(), "public", "llms-full.txt");
  let content = "";
  try {
    content = fs.readFileSync(fullDocsPath, "utf-8");
  } catch {
    return {
      jsonrpc: "2.0",
      id,
      error: {
        code: -32603,
        message: "Failed to read full documentation file",
      },
    };
  }

  return {
    jsonrpc: "2.0",
    id,
    result: {
      content: [{ type: "text", text: content }],
    },
  };
}

// ---------------------------------------------------------------------------
// Agent instruction tools
// ---------------------------------------------------------------------------
const PAGES_DIR = path.join(process.cwd(), "pages", "agent-instructions");

function readAgentPage(slug: string): string | null {
  // Try generated markdown first, fall back to MDX source
  const llmsPath = path.join(LLMS_DIR, "agent-instructions", `${slug}.md`);
  if (fs.existsSync(llmsPath)) {
    return fs.readFileSync(llmsPath, "utf-8");
  }
  const mdxPath = path.join(PAGES_DIR, `${slug}.mdx`);
  if (fs.existsSync(mdxPath)) {
    return fs.readFileSync(mdxPath, "utf-8");
  }
  return null;
}

// ---------------------------------------------------------------------------
// Notifications (no response needed)
// ---------------------------------------------------------------------------
const NOTIFICATION_METHODS = new Set([
  "notifications/initialized",
  "notifications/cancelled",
  "notifications/progress",
]);

// ---------------------------------------------------------------------------
// Route JSON-RPC request to handler
// ---------------------------------------------------------------------------
function routeRequest(
  rpcReq: JsonRpcRequest,
  httpReq: NextApiRequest,
): JsonRpcResponse | null {
  const { id, method, params } = rpcReq;

  // Notifications have no id and expect no response
  if (id === undefined || id === null) {
    if (NOTIFICATION_METHODS.has(method)) return null;
    // Unknown notification — ignore
    return null;
  }

  // Track MCP usage
  const trackProps: Record<string, unknown> = {};
  if (method === "tools/call" && params?.name) {
    trackProps.tool = params.name;
    const args = params.arguments as Record<string, unknown> | undefined;
    if (args?.query) trackProps.tool_query = args.query;
  }
  if (method === "resources/read" && params?.uri) {
    trackProps.resource_uri = params.uri;
  }
  trackMcpEvent(httpReq, method, trackProps);

  switch (method) {
    case "initialize":
      return handleInitialize(id);
    case "resources/list":
      return handleResourcesList(id);
    case "resources/read":
      return handleResourcesRead(id, params || {});
    case "tools/list":
      return handleToolsList(id);
    case "tools/call":
      return handleToolsCall(id, params || {});
    case "ping":
      return { jsonrpc: "2.0", id, result: {} };
    default:
      return {
        jsonrpc: "2.0",
        id,
        error: { code: -32601, message: `Method not found: ${method}` },
      };
  }
}

// ---------------------------------------------------------------------------
// Next.js API handler
// ---------------------------------------------------------------------------

export const config = {
  // Disable body size limit and response size limit for SSE / large docs
  api: { responseLimit: false },
};

export default function handler(
  req: NextApiRequest,
  res: NextApiResponse,
): void {
  // CORS headers for cross-origin MCP clients
  res.setHeader("Access-Control-Allow-Origin", "*");
  res.setHeader("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS");
  res.setHeader(
    "Access-Control-Allow-Headers",
    "Content-Type, Accept, Mcp-Session-Id",
  );
  res.setHeader("Access-Control-Expose-Headers", "Mcp-Session-Id");

  if (req.method === "OPTIONS") {
    res.status(204).end();
    return;
  }

  // -----------------------------------------------------------------------
  // GET — Streamable HTTP SSE endpoint for server-to-client notifications.
  // mcp-remote establishes this connection first before sending POST.
  // For a stateless server we just keep the stream open.
  // -----------------------------------------------------------------------
  if (req.method === "GET") {
    const accept = (req.headers.accept || "").toLowerCase();

    if (accept.includes("text/event-stream")) {
      // SSE stream — required by MCP Streamable HTTP transport
      res.writeHead(200, {
        "Content-Type": "text/event-stream",
        "Cache-Control": "no-cache, no-transform",
        Connection: "keep-alive",
      });

      // Send a keep-alive comment so the client knows the connection is alive
      res.write(": connected\n\n");

      // Keep the connection open; the client closes when it's done
      const keepAlive = setInterval(() => {
        res.write(": ping\n\n");
      }, 15_000);

      req.on("close", () => {
        clearInterval(keepAlive);
        res.end();
      });
      return;
    }

    // Plain GET returns server metadata (useful for browser discovery)
    res.status(200).json({
      name: SERVER_NAME,
      version: SERVER_VERSION,
      protocolVersion: PROTOCOL_VERSION,
      description:
        "MCP server for Hatchet documentation. Send JSON-RPC 2.0 POST requests to interact.",
    });
    return;
  }

  // -----------------------------------------------------------------------
  // DELETE — session termination (no-op for stateless server)
  // -----------------------------------------------------------------------
  if (req.method === "DELETE") {
    res.status(200).end();
    return;
  }

  if (req.method !== "POST") {
    res.status(405).json({ error: "Method not allowed" });
    return;
  }

  // -----------------------------------------------------------------------
  // POST — JSON-RPC 2.0 request handling
  // -----------------------------------------------------------------------
  const body = req.body;

  // Handle batch requests (array of JSON-RPC messages)
  if (Array.isArray(body)) {
    const responses: JsonRpcResponse[] = [];
    for (const item of body) {
      const result = routeRequest(item as JsonRpcRequest, req);
      if (result) responses.push(result);
    }
    if (responses.length === 0) {
      res.status(204).end();
    } else {
      res.status(200).json(responses);
    }
    return;
  }

  // Single request
  const result = routeRequest(body as JsonRpcRequest, req);
  if (!result) {
    // Notification — no response
    res.status(204).end();
    return;
  }

  res.status(200).json(result);
}