feat(ui): add button to enable Agentic MCP (#6400)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-10-07 00:05:34 +02:00
committed by GitHub
parent 8067d25710
commit 2e51871ad5
3 changed files with 258 additions and 122 deletions

View File

@@ -3,6 +3,7 @@ package openai
import (
"encoding/json"
"errors"
"fmt"
"strings"
"sync"
"time"
@@ -49,6 +50,10 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
return fiber.ErrBadRequest
}
if config.MCP.Servers == "" && config.MCP.Stdio == "" {
return fmt.Errorf("no MCP servers configured")
}
allTools := []*mcp.MCPTool{}
// Get MCP config from model config
@@ -78,6 +83,10 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
// defer tool.Close()
}
if len(cogitoTools) == 0 {
return fmt.Errorf("no tools found in the specified MCP servers")
}
fragment := cogito.NewEmptyFragment()
for _, message := range input.Messages {

View File

@@ -42,6 +42,32 @@ function toggleLoader(show) {
}
}
function processThinkingTags(content) {
const thinkingRegex = /<thinking>(.*?)<\/thinking>|<think>(.*?)<\/think>/gs;
const parts = content.split(thinkingRegex);
let regularContent = "";
let thinkingContent = "";
for (let i = 0; i < parts.length; i++) {
if (i % 3 === 0) {
// Regular content
regularContent += parts[i];
} else if (i % 3 === 1) {
// <thinking> content
thinkingContent = parts[i];
} else if (i % 3 === 2) {
// <think> content
thinkingContent = parts[i];
}
}
return {
regularContent: regularContent.trim(),
thinkingContent: thinkingContent.trim()
};
}
function submitSystemPrompt(event) {
event.preventDefault();
localStorage.setItem("system_prompt", document.getElementById("systemPrompt").value);
@@ -193,6 +219,7 @@ function readInputAudio() {
async function promptGPT(systemPrompt, input) {
const model = document.getElementById("chat-model").value;
const mcpMode = Alpine.store("chat").mcpMode;
toggleLoader(true);
messages = Alpine.store("chat").messages();
@@ -254,147 +281,212 @@ async function promptGPT(systemPrompt, input) {
document.getElementById("input_file").value = null;
document.getElementById("fileName").innerHTML = "";
// Source: https://stackoverflow.com/a/75751803/11386095
const response = await fetch("v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: model,
messages: messages,
stream: true,
}),
});
// Choose endpoint based on MCP mode
const endpoint = mcpMode ? "mcp/v1/chat/completions" : "v1/chat/completions";
const requestBody = {
model: model,
messages: messages,
};
// Only add stream parameter for regular chat (MCP doesn't support streaming)
if (!mcpMode) {
requestBody.stream = true;
}
let response;
try {
// Create AbortController for timeout handling
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), mcpMode ? 300000 : 30000); // 5 minutes for MCP, 30 seconds for regular
response = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
"Accept": "application/json",
},
body: JSON.stringify(requestBody),
signal: controller.signal
});
clearTimeout(timeoutId);
} catch (error) {
if (error.name === 'AbortError') {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Request timeout: MCP processing is taking longer than expected. Please try again.</span>`,
);
} else {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Network Error: ${error.message}</span>`,
);
}
toggleLoader(false);
return;
}
if (!response.ok) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: POST /v1/chat/completions ${response.status}</span>`,
`<span class='error'>Error: POST ${endpoint} ${response.status}</span>`,
);
toggleLoader(false);
return;
}
const reader = response.body
?.pipeThrough(new TextDecoderStream())
.getReader();
if (!reader) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to decode API response</span>`,
);
return;
}
// Function to add content to the chat and handle DOM updates efficiently
const addToChat = (token) => {
const chatStore = Alpine.store("chat");
chatStore.add("assistant", token);
// Efficiently scroll into view without triggering multiple reflows
// const messages = document.getElementById('messages');
// messages.scrollTop = messages.scrollHeight;
};
let buffer = "";
let contentBuffer = [];
let thinkingContent = "";
let isThinking = false;
let lastThinkingMessageIndex = -1;
try {
while (true) {
const { value, done } = await reader.read();
if (done) break;
buffer += value;
let lines = buffer.split("\n");
buffer = lines.pop(); // Retain any incomplete line in the buffer
lines.forEach((line) => {
if (line.length === 0 || line.startsWith(":")) return;
if (line === "data: [DONE]") {
return;
if (mcpMode) {
// Handle MCP non-streaming response
try {
const data = await response.json();
// MCP endpoint returns content in choices[0].text, not choices[0].message.content
const content = data.choices[0]?.text || "";
if (content) {
// Process thinking tags using shared function
const { regularContent, thinkingContent } = processThinkingTags(content);
// Add thinking content if present
if (thinkingContent) {
Alpine.store("chat").add("thinking", thinkingContent);
}
// Add regular content if present
if (regularContent) {
Alpine.store("chat").add("assistant", regularContent);
}
}
// Highlight all code blocks
hljs.highlightAll();
} catch (error) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to parse MCP response</span>`,
);
}
} else {
// Handle regular streaming response
const reader = response.body
?.pipeThrough(new TextDecoderStream())
.getReader();
if (line.startsWith("data: ")) {
try {
const jsonData = JSON.parse(line.substring(6));
const token = jsonData.choices[0].delta.content;
if (!reader) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to decode API response</span>`,
);
return;
}
if (token) {
// Check for thinking tags
if (token.includes("<thinking>") || token.includes("<think>")) {
isThinking = true;
thinkingContent = "";
lastThinkingMessageIndex = -1;
return;
}
if (token.includes("</thinking>") || token.includes("</think>")) {
isThinking = false;
if (thinkingContent.trim()) {
// Only add the final thinking message if we don't already have one
if (lastThinkingMessageIndex === -1) {
Alpine.store("chat").add("thinking", thinkingContent);
}
}
return;
}
// Function to add content to the chat and handle DOM updates efficiently
const addToChat = (token) => {
const chatStore = Alpine.store("chat");
chatStore.add("assistant", token);
// Efficiently scroll into view without triggering multiple reflows
// const messages = document.getElementById('messages');
// messages.scrollTop = messages.scrollHeight;
};
// Handle content based on thinking state
if (isThinking) {
thinkingContent += token;
// Update the last thinking message or create a new one
if (lastThinkingMessageIndex === -1) {
// Create new thinking message
Alpine.store("chat").add("thinking", thinkingContent);
lastThinkingMessageIndex = Alpine.store("chat").history.length - 1;
} else {
// Update existing thinking message
const chatStore = Alpine.store("chat");
const lastMessage = chatStore.history[lastThinkingMessageIndex];
if (lastMessage && lastMessage.role === "thinking") {
lastMessage.content = thinkingContent;
lastMessage.html = DOMPurify.sanitize(marked.parse(thinkingContent));
}
}
} else {
contentBuffer.push(token);
}
}
} catch (error) {
console.error("Failed to parse line:", line, error);
let buffer = "";
let contentBuffer = [];
let thinkingContent = "";
let isThinking = false;
let lastThinkingMessageIndex = -1;
try {
while (true) {
const { value, done } = await reader.read();
if (done) break;
buffer += value;
let lines = buffer.split("\n");
buffer = lines.pop(); // Retain any incomplete line in the buffer
lines.forEach((line) => {
if (line.length === 0 || line.startsWith(":")) return;
if (line === "data: [DONE]") {
return;
}
}
});
// Efficiently update the chat in batch
if (line.startsWith("data: ")) {
try {
const jsonData = JSON.parse(line.substring(6));
const token = jsonData.choices[0].delta.content;
if (token) {
// Check for thinking tags
if (token.includes("<thinking>") || token.includes("<think>")) {
isThinking = true;
thinkingContent = "";
lastThinkingMessageIndex = -1;
return;
}
if (token.includes("</thinking>") || token.includes("</think>")) {
isThinking = false;
if (thinkingContent.trim()) {
// Only add the final thinking message if we don't already have one
if (lastThinkingMessageIndex === -1) {
Alpine.store("chat").add("thinking", thinkingContent);
}
}
return;
}
// Handle content based on thinking state
if (isThinking) {
thinkingContent += token;
// Update the last thinking message or create a new one
if (lastThinkingMessageIndex === -1) {
// Create new thinking message
Alpine.store("chat").add("thinking", thinkingContent);
lastThinkingMessageIndex = Alpine.store("chat").history.length - 1;
} else {
// Update existing thinking message
const chatStore = Alpine.store("chat");
const lastMessage = chatStore.history[lastThinkingMessageIndex];
if (lastMessage && lastMessage.role === "thinking") {
lastMessage.content = thinkingContent;
lastMessage.html = DOMPurify.sanitize(marked.parse(thinkingContent));
}
}
} else {
contentBuffer.push(token);
}
}
} catch (error) {
console.error("Failed to parse line:", line, error);
}
}
});
// Efficiently update the chat in batch
if (contentBuffer.length > 0) {
addToChat(contentBuffer.join(""));
contentBuffer = [];
}
}
// Final content flush if any data remains
if (contentBuffer.length > 0) {
addToChat(contentBuffer.join(""));
contentBuffer = [];
}
}
if (thinkingContent.trim() && lastThinkingMessageIndex === -1) {
Alpine.store("chat").add("thinking", thinkingContent);
}
// Final content flush if any data remains
if (contentBuffer.length > 0) {
addToChat(contentBuffer.join(""));
// Highlight all code blocks once at the end
hljs.highlightAll();
} catch (error) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to process stream</span>`,
);
} finally {
// Perform any cleanup if necessary
reader.releaseLock();
}
if (thinkingContent.trim() && lastThinkingMessageIndex === -1) {
Alpine.store("chat").add("thinking", thinkingContent);
}
// Highlight all code blocks once at the end
hljs.highlightAll();
} catch (error) {
console.error("An error occurred while reading the stream:", error);
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to process stream</span>`,
);
} finally {
// Perform any cleanup if necessary
reader.releaseLock();
}
// Remove class "loader" from the element with "loader" id
@@ -431,6 +523,7 @@ document.addEventListener("alpine:init", () => {
history: [],
languages: [undefined],
systemPrompt: "",
mcpMode: false,
clear() {
this.history.length = 0;
},

View File

@@ -156,6 +156,39 @@ SOFTWARE.
<!-- Settings tab -->
<div x-show="activeTab === 'settings'" x-data="{ showPromptForm: false }" class="space-y-3">
{{ if $model }}
{{ $galleryConfig:= index $allGalleryConfigs $model}}
{{ if $galleryConfig }}
{{ $modelConfig := "" }}
{{ range .ModelsConfig }}
{{ if eq .Name $model }}
{{ $modelConfig = . }}
{{ end }}
{{ end }}
{{ if and $modelConfig (or (ne $modelConfig.MCP.Servers "") (ne $modelConfig.MCP.Stdio "")) }}
<!-- MCP Toggle -->
<div class="flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700">
<span><i class="fa-solid fa-plug mr-2"></i> Agentic MCP Mode</span>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" id="mcp-toggle" class="sr-only peer" x-model="$store.chat.mcpMode">
<div class="w-11 h-6 bg-gray-600 peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-blue-300 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-blue-600"></div>
</label>
</div>
<!-- MCP Mode Notification -->
<div x-show="$store.chat.mcpMode" class="p-3 bg-blue-900/20 border border-blue-700/50 rounded text-blue-100 text-xs">
<div class="flex items-start space-x-2">
<i class="fa-solid fa-info-circle text-blue-400 mt-0.5"></i>
<div>
<p class="font-medium text-blue-200 mb-1">Non-streaming Mode Active</p>
<p class="text-blue-300">Responses will be processed in full before display. This may take significantly longer (up to 5 minutes), especially on CPU-only systems.</p>
</div>
</div>
</div>
{{ end }}
{{ end }}
{{ end }}
<button
@click="showPromptForm = !showPromptForm"
class="w-full flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
@@ -457,6 +490,7 @@ SOFTWARE.
history: [],
languages: [undefined],
systemPrompt: "",
mcpMode: false,
clear() {
this.history.length = 0;
},