From 2e51871ad5b2ccafd1d0406252f06acee7789da2 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 7 Oct 2025 00:05:34 +0200
Subject: [PATCH] feat(ui): add button to enable Agentic MCP (#6400)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/openai/mcp.go |   9 +
 core/http/static/chat.js          | 337 +++++++++++++++++++-----------
 core/http/views/chat.html         |  34 +++
 3 files changed, 258 insertions(+), 122 deletions(-)
diff --git a/core/http/endpoints/openai/mcp.go b/core/http/endpoints/openai/mcp.go
index 11897ae55..5ef17fc9f 100644
--- a/core/http/endpoints/openai/mcp.go
+++ b/core/http/endpoints/openai/mcp.go
@@ -3,6 +3,7 @@ package openai
 import (
 	"encoding/json"
 	"errors"
+	"fmt"
 	"strings"
 	"sync"
 	"time"
@@ -49,6 +50,10 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 			return fiber.ErrBadRequest
 		}
 
+		if config.MCP.Servers == "" && config.MCP.Stdio == "" {
+			return fmt.Errorf("no MCP servers configured")
+		}
+
 		allTools := []*mcp.MCPTool{}
 
 		// Get MCP config from model config
@@ -78,6 +83,10 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 			//	defer tool.Close()
 		}
 
+		if len(cogitoTools) == 0 {
+			return fmt.Errorf("no tools found in the specified MCP servers")
+		}
+
 		fragment := cogito.NewEmptyFragment()
 
 		for _, message := range input.Messages {
diff --git a/core/http/static/chat.js b/core/http/static/chat.js
index 3de1e0724..9b10a626e 100644
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -42,6 +42,32 @@ function toggleLoader(show) {
   }
 }
 
+function processThinkingTags(content) {
+  const thinkingRegex = /<thinking>(.*?)<\/thinking>|<think>(.*?)<\/think>/gs;
+  const parts = content.split(thinkingRegex);
+  
+  let regularContent = "";
+  let thinkingContent = "";
+  
+  for (let i = 0; i < parts.length; i++) {
+    if (i % 3 === 0) {
+      // Regular content
+      regularContent += parts[i];
+    } else if (i % 3 === 1) {
+      // <thinking> content
+      thinkingContent = parts[i];
+    } else if (i % 3 === 2) {
+      // <think> content
+      thinkingContent = parts[i];
+    }
+  }
+  
+  return {
+    regularContent: regularContent.trim(),
+    thinkingContent: thinkingContent.trim()
+  };
+}
+
 function submitSystemPrompt(event) {
   event.preventDefault();
   localStorage.setItem("system_prompt", document.getElementById("systemPrompt").value);
@@ -193,6 +219,7 @@ function readInputAudio() {
 
 async function promptGPT(systemPrompt, input) {
   const model = document.getElementById("chat-model").value;
+  const mcpMode = Alpine.store("chat").mcpMode;
   toggleLoader(true);
 
   messages = Alpine.store("chat").messages();
@@ -254,147 +281,212 @@ async function promptGPT(systemPrompt, input) {
   document.getElementById("input_file").value = null;
   document.getElementById("fileName").innerHTML = "";
 
-  // Source: https://stackoverflow.com/a/75751803/11386095
-  const response = await fetch("v1/chat/completions", {
-    method: "POST",
-    headers: {
-      "Content-Type": "application/json",
-    },
-    body: JSON.stringify({
-      model: model,
-      messages: messages,
-      stream: true,
-    }),
-  });
+  // Choose endpoint based on MCP mode
+  const endpoint = mcpMode ? "mcp/v1/chat/completions" : "v1/chat/completions";
+  const requestBody = {
+    model: model,
+    messages: messages,
+  };
+  
+  // Only add stream parameter for regular chat (MCP doesn't support streaming)
+  if (!mcpMode) {
+    requestBody.stream = true;
+  }
+  
+  let response;
+  try {
+    // Create AbortController for timeout handling
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), mcpMode ? 300000 : 30000); // 5 minutes for MCP, 30 seconds for regular
+    
+    response = await fetch(endpoint, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+      },
+      body: JSON.stringify(requestBody),
+      signal: controller.signal
+    });
+    
+    clearTimeout(timeoutId);
+  } catch (error) {
+    if (error.name === 'AbortError') {
+      Alpine.store("chat").add(
+        "assistant",
+        `<span class='error'>Request timeout: MCP processing is taking longer than expected. Please try again.</span>`,
+      );
+    } else {
+      Alpine.store("chat").add(
+        "assistant",
+        `<span class='error'>Network Error: ${error.message}</span>`,
+      );
+    }
+    toggleLoader(false);
+    return;
+  }
 
   if (!response.ok) {
     Alpine.store("chat").add(
       "assistant",
-      `<span class='error'>Error: POST /v1/chat/completions ${response.status}</span>`,
+      `<span class='error'>Error: POST ${endpoint} ${response.status}</span>`,
     );
+    toggleLoader(false);
     return;
   }
 
-  const reader = response.body
-    ?.pipeThrough(new TextDecoderStream())
-    .getReader();
-
-  if (!reader) {
-    Alpine.store("chat").add(
-      "assistant",
-      `<span class='error'>Error: Failed to decode API response</span>`,
-    );
-    return;
-  }
-
-  // Function to add content to the chat and handle DOM updates efficiently
-  const addToChat = (token) => {
-    const chatStore = Alpine.store("chat");
-    chatStore.add("assistant", token);
-    // Efficiently scroll into view without triggering multiple reflows
-    // const messages = document.getElementById('messages');
-    // messages.scrollTop = messages.scrollHeight;
-  };
-
-  let buffer = "";
-  let contentBuffer = [];
-  let thinkingContent = "";
-  let isThinking = false;
-  let lastThinkingMessageIndex = -1;
-
-  try {
-    while (true) {
-      const { value, done } = await reader.read();
-      if (done) break;
-
-      buffer += value;
-
-      let lines = buffer.split("\n");
-      buffer = lines.pop(); // Retain any incomplete line in the buffer
-
-      lines.forEach((line) => {
-        if (line.length === 0 || line.startsWith(":")) return;
-        if (line === "data: [DONE]") {
-          return;
+  if (mcpMode) {
+    // Handle MCP non-streaming response
+    try {
+      const data = await response.json();
+      // MCP endpoint returns content in choices[0].text, not choices[0].message.content
+      const content = data.choices[0]?.text || "";
+      
+      if (content) {
+        // Process thinking tags using shared function
+        const { regularContent, thinkingContent } = processThinkingTags(content);
+        
+        // Add thinking content if present
+        if (thinkingContent) {
+          Alpine.store("chat").add("thinking", thinkingContent);
         }
+        
+        // Add regular content if present
+        if (regularContent) {
+          Alpine.store("chat").add("assistant", regularContent);
+        }
+      }
+      
+      // Highlight all code blocks
+      hljs.highlightAll();
+    } catch (error) {
+      Alpine.store("chat").add(
+        "assistant",
+        `<span class='error'>Error: Failed to parse MCP response</span>`,
+      );
+    }
+  } else {
+    // Handle regular streaming response
+    const reader = response.body
+      ?.pipeThrough(new TextDecoderStream())
+      .getReader();
 
-        if (line.startsWith("data: ")) {
-          try {
-            const jsonData = JSON.parse(line.substring(6));
-            const token = jsonData.choices[0].delta.content;
+    if (!reader) {
+      Alpine.store("chat").add(
+        "assistant",
+        `<span class='error'>Error: Failed to decode API response</span>`,
+      );
+      return;
+    }
 
-            if (token) {
-              // Check for thinking tags
-              if (token.includes("<thinking>") || token.includes("<think>")) {
-                isThinking = true;
-                thinkingContent = "";
-                lastThinkingMessageIndex = -1;
-                return;
-              }
-              if (token.includes("</thinking>") || token.includes("</think>")) {
-                isThinking = false;
-                if (thinkingContent.trim()) {
-                  // Only add the final thinking message if we don't already have one
-                  if (lastThinkingMessageIndex === -1) {
-                    Alpine.store("chat").add("thinking", thinkingContent);
-                  }
-                }
-                return;
-              }
+    // Function to add content to the chat and handle DOM updates efficiently
+    const addToChat = (token) => {
+      const chatStore = Alpine.store("chat");
+      chatStore.add("assistant", token);
+      // Efficiently scroll into view without triggering multiple reflows
+      // const messages = document.getElementById('messages');
+      // messages.scrollTop = messages.scrollHeight;
+    };
 
-              // Handle content based on thinking state
-              if (isThinking) {
-                thinkingContent += token;
-                // Update the last thinking message or create a new one
-                if (lastThinkingMessageIndex === -1) {
-                  // Create new thinking message
-                  Alpine.store("chat").add("thinking", thinkingContent);
-                  lastThinkingMessageIndex = Alpine.store("chat").history.length - 1;
-                } else {
-                  // Update existing thinking message
-                  const chatStore = Alpine.store("chat");
-                  const lastMessage = chatStore.history[lastThinkingMessageIndex];
-                  if (lastMessage && lastMessage.role === "thinking") {
-                    lastMessage.content = thinkingContent;
-                    lastMessage.html = DOMPurify.sanitize(marked.parse(thinkingContent));
-                  }
-                }
-              } else {
-                contentBuffer.push(token);
-              }
-            }
-          } catch (error) {
-            console.error("Failed to parse line:", line, error);
+    let buffer = "";
+    let contentBuffer = [];
+    let thinkingContent = "";
+    let isThinking = false;
+    let lastThinkingMessageIndex = -1;
+
+    try {
+      while (true) {
+        const { value, done } = await reader.read();
+        if (done) break;
+
+        buffer += value;
+
+        let lines = buffer.split("\n");
+        buffer = lines.pop(); // Retain any incomplete line in the buffer
+
+        lines.forEach((line) => {
+          if (line.length === 0 || line.startsWith(":")) return;
+          if (line === "data: [DONE]") {
+            return;
           }
-        }
-      });
 
-      // Efficiently update the chat in batch
+          if (line.startsWith("data: ")) {
+            try {
+              const jsonData = JSON.parse(line.substring(6));
+              const token = jsonData.choices[0].delta.content;
+
+              if (token) {
+                // Check for thinking tags
+                if (token.includes("<thinking>") || token.includes("<think>")) {
+                  isThinking = true;
+                  thinkingContent = "";
+                  lastThinkingMessageIndex = -1;
+                  return;
+                }
+                if (token.includes("</thinking>") || token.includes("</think>")) {
+                  isThinking = false;
+                  if (thinkingContent.trim()) {
+                    // Only add the final thinking message if we don't already have one
+                    if (lastThinkingMessageIndex === -1) {
+                      Alpine.store("chat").add("thinking", thinkingContent);
+                    }
+                  }
+                  return;
+                }
+
+                // Handle content based on thinking state
+                if (isThinking) {
+                  thinkingContent += token;
+                  // Update the last thinking message or create a new one
+                  if (lastThinkingMessageIndex === -1) {
+                    // Create new thinking message
+                    Alpine.store("chat").add("thinking", thinkingContent);
+                    lastThinkingMessageIndex = Alpine.store("chat").history.length - 1;
+                  } else {
+                    // Update existing thinking message
+                    const chatStore = Alpine.store("chat");
+                    const lastMessage = chatStore.history[lastThinkingMessageIndex];
+                    if (lastMessage && lastMessage.role === "thinking") {
+                      lastMessage.content = thinkingContent;
+                      lastMessage.html = DOMPurify.sanitize(marked.parse(thinkingContent));
+                    }
+                  }
+                } else {
+                  contentBuffer.push(token);
+                }
+              }
+            } catch (error) {
+              console.error("Failed to parse line:", line, error);
+            }
+          }
+        });
+
+        // Efficiently update the chat in batch
+        if (contentBuffer.length > 0) {
+          addToChat(contentBuffer.join(""));
+          contentBuffer = [];
+        }
+      }
+
+      // Final content flush if any data remains
       if (contentBuffer.length > 0) {
         addToChat(contentBuffer.join(""));
-        contentBuffer = [];
       }
-    }
+      if (thinkingContent.trim() && lastThinkingMessageIndex === -1) {
+        Alpine.store("chat").add("thinking", thinkingContent);
+      }
 
-    // Final content flush if any data remains
-    if (contentBuffer.length > 0) {
-      addToChat(contentBuffer.join(""));
+      // Highlight all code blocks once at the end
+      hljs.highlightAll();
+    } catch (error) {
+      Alpine.store("chat").add(
+        "assistant",
+        `<span class='error'>Error: Failed to process stream</span>`,
+      );
+    } finally {
+      // Perform any cleanup if necessary
+      reader.releaseLock();
     }
-    if (thinkingContent.trim() && lastThinkingMessageIndex === -1) {
-      Alpine.store("chat").add("thinking", thinkingContent);
-    }
-
-    // Highlight all code blocks once at the end
-    hljs.highlightAll();
-  } catch (error) {
-    console.error("An error occurred while reading the stream:", error);
-    Alpine.store("chat").add(
-      "assistant",
-      `<span class='error'>Error: Failed to process stream</span>`,
-    );
-  } finally {
-    // Perform any cleanup if necessary
-    reader.releaseLock();
   }
 
   // Remove class "loader" from the element with "loader" id
@@ -431,6 +523,7 @@ document.addEventListener("alpine:init", () => {
     history: [],
     languages: [undefined],
     systemPrompt: "",
+    mcpMode: false,
     clear() {
       this.history.length = 0;
     },
diff --git a/core/http/views/chat.html b/core/http/views/chat.html
index 3683e1bcf..28eccd906 100644
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -156,6 +156,39 @@ SOFTWARE.
 
             <!-- Settings tab -->
             <div x-show="activeTab === 'settings'" x-data="{ showPromptForm: false }" class="space-y-3">           
+              {{ if $model }}
+              {{ $galleryConfig:= index $allGalleryConfigs $model}}
+              {{ if $galleryConfig }}
+              {{ $modelConfig := "" }}
+              {{ range .ModelsConfig }}
+                {{ if eq .Name $model }}
+                  {{ $modelConfig = . }}
+                {{ end }}
+              {{ end }}
+              {{ if and $modelConfig (or (ne $modelConfig.MCP.Servers "") (ne $modelConfig.MCP.Stdio "")) }}
+              <!-- MCP Toggle -->
+              <div class="flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700">
+                <span><i class="fa-solid fa-plug mr-2"></i> Agentic MCP Mode</span>
+                <label class="relative inline-flex items-center cursor-pointer">
+                  <input type="checkbox" id="mcp-toggle" class="sr-only peer" x-model="$store.chat.mcpMode">
+                  <div class="w-11 h-6 bg-gray-600 peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-blue-300 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-blue-600"></div>
+                </label>
+              </div>
+              
+              <!-- MCP Mode Notification -->
+              <div x-show="$store.chat.mcpMode" class="p-3 bg-blue-900/20 border border-blue-700/50 rounded text-blue-100 text-xs">
+                <div class="flex items-start space-x-2">
+                  <i class="fa-solid fa-info-circle text-blue-400 mt-0.5"></i>
+                  <div>
+                    <p class="font-medium text-blue-200 mb-1">Non-streaming Mode Active</p>
+                    <p class="text-blue-300">Responses will be processed in full before display. This may take significantly longer (up to 5 minutes), especially on CPU-only systems.</p>
+                  </div>
+                </div>
+              </div>
+              {{ end }}
+              {{ end }}
+              {{ end }}
+              
               <button 
                 @click="showPromptForm = !showPromptForm" 
                 class="w-full flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
@@ -457,6 +490,7 @@ SOFTWARE.
           history: [],
           languages: [undefined],
           systemPrompt: "",
+          mcpMode: false,
           clear() {
             this.history.length = 0;
           },