mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-06 02:29:54 -06:00
feat(mcp): add LocalAI endpoint to stream live results of the agent (#7274)
* feat(mcp): add LocalAI endpoint to stream live results of the agent Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * wip Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Refactoring Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * MCP UX integration Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Enhance UX Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Support also non-SSE Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
a09d49da43
commit
47b546afdc
@@ -205,7 +205,7 @@ func API(application *application.Application) (*echo.Echo, error) {
|
||||
opcache = services.NewOpCache(application.GalleryService())
|
||||
}
|
||||
|
||||
routes.RegisterLocalAIRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
|
||||
routes.RegisterLocalAIRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application.TemplatesEvaluator())
|
||||
routes.RegisterOpenAIRoutes(e, requestExtractor, application)
|
||||
if !application.ApplicationConfig().DisableWebUI {
|
||||
routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
|
||||
|
||||
323
core/http/endpoints/localai/mcp.go
Normal file
323
core/http/endpoints/localai/mcp.go
Normal file
@@ -0,0 +1,323 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/cogito"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// MCP SSE Event Types
|
||||
type MCPReasoningEvent struct {
|
||||
Type string `json:"type"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type MCPToolCallEvent struct {
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name"`
|
||||
Arguments map[string]interface{} `json:"arguments"`
|
||||
Reasoning string `json:"reasoning"`
|
||||
}
|
||||
|
||||
type MCPToolResultEvent struct {
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name"`
|
||||
Result string `json:"result"`
|
||||
}
|
||||
|
||||
type MCPStatusEvent struct {
|
||||
Type string `json:"type"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
type MCPAssistantEvent struct {
|
||||
Type string `json:"type"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type MCPErrorEvent struct {
|
||||
Type string `json:"type"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
// MCPStreamEndpoint is the SSE streaming endpoint for MCP chat completions
|
||||
// @Summary Stream MCP chat completions with reasoning, tool calls, and results
|
||||
// @Param request body schema.OpenAIRequest true "query params"
|
||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||
// @Router /v1/mcp/chat/completions [post]
|
||||
func MCPStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
ctx := c.Request().Context()
|
||||
created := int(time.Now().Unix())
|
||||
|
||||
// Handle Correlation
|
||||
id := c.Request().Header.Get("X-Correlation-ID")
|
||||
if id == "" {
|
||||
id = fmt.Sprintf("mcp-%d", time.Now().UnixNano())
|
||||
}
|
||||
|
||||
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return echo.ErrBadRequest
|
||||
}
|
||||
|
||||
config, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
|
||||
if !ok || config == nil {
|
||||
return echo.ErrBadRequest
|
||||
}
|
||||
|
||||
if config.MCP.Servers == "" && config.MCP.Stdio == "" {
|
||||
return fmt.Errorf("no MCP servers configured")
|
||||
}
|
||||
|
||||
// Get MCP config from model config
|
||||
remote, stdio, err := config.MCP.MCPConfigFromYAML()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get MCP config: %w", err)
|
||||
}
|
||||
|
||||
// Check if we have tools in cache, or we have to have an initial connection
|
||||
sessions, err := mcpTools.SessionsFromMCPConfig(config.Name, remote, stdio)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get MCP sessions: %w", err)
|
||||
}
|
||||
|
||||
if len(sessions) == 0 {
|
||||
return fmt.Errorf("no working MCP servers found")
|
||||
}
|
||||
|
||||
// Build fragment from messages
|
||||
fragment := cogito.NewEmptyFragment()
|
||||
for _, message := range input.Messages {
|
||||
fragment = fragment.AddMessage(message.Role, message.StringContent)
|
||||
}
|
||||
|
||||
port := appConfig.APIAddress[strings.LastIndex(appConfig.APIAddress, ":")+1:]
|
||||
apiKey := ""
|
||||
if len(appConfig.ApiKeys) > 0 {
|
||||
apiKey = appConfig.ApiKeys[0]
|
||||
}
|
||||
|
||||
ctxWithCancellation, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
// TODO: instead of connecting to the API, we should just wire this internally
|
||||
// and act like completion.go.
|
||||
// We can do this as cogito expects an interface and we can create one that
|
||||
// we satisfy to just call internally ComputeChoices
|
||||
defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, "http://127.0.0.1:"+port)
|
||||
|
||||
// Build cogito options using the consolidated method
|
||||
cogitoOpts := config.BuildCogitoOptions()
|
||||
cogitoOpts = append(
|
||||
cogitoOpts,
|
||||
cogito.WithContext(ctxWithCancellation),
|
||||
cogito.WithMCPs(sessions...),
|
||||
)
|
||||
// Check if streaming is requested
|
||||
toStream := input.Stream
|
||||
|
||||
if !toStream {
|
||||
// Non-streaming mode: execute synchronously and return JSON response
|
||||
cogitoOpts = append(
|
||||
cogitoOpts,
|
||||
cogito.WithStatusCallback(func(s string) {
|
||||
log.Debug().Msgf("[model agent] [model: %s] Status: %s", config.Name, s)
|
||||
}),
|
||||
cogito.WithReasoningCallback(func(s string) {
|
||||
log.Debug().Msgf("[model agent] [model: %s] Reasoning: %s", config.Name, s)
|
||||
}),
|
||||
cogito.WithToolCallBack(func(t *cogito.ToolChoice) bool {
|
||||
log.Debug().Str("model", config.Name).Str("tool", t.Name).Str("reasoning", t.Reasoning).Interface("arguments", t.Arguments).Msg("[model agent] Tool call")
|
||||
return true
|
||||
}),
|
||||
cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) {
|
||||
log.Debug().Str("model", config.Name).Str("tool", t.Name).Str("result", t.Result).Interface("tool_arguments", t.ToolArguments).Msg("[model agent] Tool call result")
|
||||
}),
|
||||
)
|
||||
|
||||
f, err := cogito.ExecuteTools(
|
||||
defaultLLM, fragment,
|
||||
cogitoOpts...,
|
||||
)
|
||||
if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) {
|
||||
return err
|
||||
}
|
||||
|
||||
f, err = defaultLLM.Ask(ctxWithCancellation, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp := &schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{Message: &schema.Message{Role: "assistant", Content: &f.LastMessage().Content}}},
|
||||
Object: "chat.completion",
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
log.Debug().Msgf("Response: %s", jsonResult)
|
||||
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(200, resp)
|
||||
}
|
||||
|
||||
// Streaming mode: use SSE
|
||||
// Set up SSE headers
|
||||
c.Response().Header().Set("Content-Type", "text/event-stream")
|
||||
c.Response().Header().Set("Cache-Control", "no-cache")
|
||||
c.Response().Header().Set("Connection", "keep-alive")
|
||||
c.Response().Header().Set("X-Correlation-ID", id)
|
||||
|
||||
// Create channel for streaming events
|
||||
events := make(chan interface{})
|
||||
ended := make(chan error, 1)
|
||||
|
||||
// Set up callbacks for streaming
|
||||
statusCallback := func(s string) {
|
||||
events <- MCPStatusEvent{
|
||||
Type: "status",
|
||||
Message: s,
|
||||
}
|
||||
}
|
||||
|
||||
reasoningCallback := func(s string) {
|
||||
events <- MCPReasoningEvent{
|
||||
Type: "reasoning",
|
||||
Content: s,
|
||||
}
|
||||
}
|
||||
|
||||
toolCallCallback := func(t *cogito.ToolChoice) bool {
|
||||
events <- MCPToolCallEvent{
|
||||
Type: "tool_call",
|
||||
Name: t.Name,
|
||||
Arguments: t.Arguments,
|
||||
Reasoning: t.Reasoning,
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
toolCallResultCallback := func(t cogito.ToolStatus) {
|
||||
events <- MCPToolResultEvent{
|
||||
Type: "tool_result",
|
||||
Name: t.Name,
|
||||
Result: t.Result,
|
||||
}
|
||||
}
|
||||
|
||||
cogitoOpts = append(cogitoOpts,
|
||||
cogito.WithStatusCallback(statusCallback),
|
||||
cogito.WithReasoningCallback(reasoningCallback),
|
||||
cogito.WithToolCallBack(toolCallCallback),
|
||||
cogito.WithToolCallResultCallback(toolCallResultCallback),
|
||||
)
|
||||
|
||||
// Execute tools in a goroutine
|
||||
go func() {
|
||||
defer close(events)
|
||||
|
||||
f, err := cogito.ExecuteTools(
|
||||
defaultLLM, fragment,
|
||||
cogitoOpts...,
|
||||
)
|
||||
if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) {
|
||||
events <- MCPErrorEvent{
|
||||
Type: "error",
|
||||
Message: fmt.Sprintf("Failed to execute tools: %v", err),
|
||||
}
|
||||
ended <- err
|
||||
return
|
||||
}
|
||||
|
||||
// Get final response
|
||||
f, err = defaultLLM.Ask(ctxWithCancellation, f)
|
||||
if err != nil {
|
||||
events <- MCPErrorEvent{
|
||||
Type: "error",
|
||||
Message: fmt.Sprintf("Failed to get response: %v", err),
|
||||
}
|
||||
ended <- err
|
||||
return
|
||||
}
|
||||
|
||||
// Stream final assistant response
|
||||
content := f.LastMessage().Content
|
||||
events <- MCPAssistantEvent{
|
||||
Type: "assistant",
|
||||
Content: content,
|
||||
}
|
||||
|
||||
ended <- nil
|
||||
}()
|
||||
|
||||
// Stream events to client
|
||||
LOOP:
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// Context was cancelled (client disconnected or request cancelled)
|
||||
log.Debug().Msgf("Request context cancelled, stopping stream")
|
||||
cancel()
|
||||
break LOOP
|
||||
case event := <-events:
|
||||
if event == nil {
|
||||
// Channel closed
|
||||
break LOOP
|
||||
}
|
||||
eventData, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
log.Debug().Msgf("Failed to marshal event: %v", err)
|
||||
continue
|
||||
}
|
||||
log.Debug().Msgf("Sending event: %s", string(eventData))
|
||||
_, err = fmt.Fprintf(c.Response().Writer, "data: %s\n\n", string(eventData))
|
||||
if err != nil {
|
||||
log.Debug().Msgf("Sending event failed: %v", err)
|
||||
cancel()
|
||||
return err
|
||||
}
|
||||
c.Response().Flush()
|
||||
case err := <-ended:
|
||||
if err == nil {
|
||||
// Send done signal
|
||||
fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n")
|
||||
c.Response().Flush()
|
||||
break LOOP
|
||||
}
|
||||
log.Error().Msgf("Stream ended with error: %v", err)
|
||||
errorEvent := MCPErrorEvent{
|
||||
Type: "error",
|
||||
Message: err.Error(),
|
||||
}
|
||||
errorData, marshalErr := json.Marshal(errorEvent)
|
||||
if marshalErr != nil {
|
||||
fmt.Fprintf(c.Response().Writer, "data: {\"type\":\"error\",\"message\":\"Internal error\"}\n\n")
|
||||
} else {
|
||||
fmt.Fprintf(c.Response().Writer, "data: %s\n\n", string(errorData))
|
||||
}
|
||||
fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n")
|
||||
c.Response().Flush()
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Stream ended")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -90,40 +90,27 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
|
||||
// we satisfy to just call internally ComputeChoices
|
||||
defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, "http://127.0.0.1:"+port)
|
||||
|
||||
cogitoOpts := []cogito.Option{
|
||||
// Build cogito options using the consolidated method
|
||||
cogitoOpts := config.BuildCogitoOptions()
|
||||
|
||||
cogitoOpts = append(
|
||||
cogitoOpts,
|
||||
cogito.WithContext(ctxWithCancellation),
|
||||
cogito.WithMCPs(sessions...),
|
||||
cogito.WithStatusCallback(func(s string) {
|
||||
log.Debug().Msgf("[model agent] [model: %s] Status: %s", config.Name, s)
|
||||
}),
|
||||
cogito.WithContext(ctxWithCancellation),
|
||||
cogito.WithMCPs(sessions...),
|
||||
cogito.WithIterations(3), // default to 3 iterations
|
||||
cogito.WithMaxAttempts(3), // default to 3 attempts
|
||||
cogito.WithForceReasoning(),
|
||||
}
|
||||
|
||||
if config.Agent.EnableReasoning {
|
||||
cogitoOpts = append(cogitoOpts, cogito.EnableToolReasoner)
|
||||
}
|
||||
|
||||
if config.Agent.EnablePlanning {
|
||||
cogitoOpts = append(cogitoOpts, cogito.EnableAutoPlan)
|
||||
}
|
||||
|
||||
if config.Agent.EnableMCPPrompts {
|
||||
cogitoOpts = append(cogitoOpts, cogito.EnableMCPPrompts)
|
||||
}
|
||||
|
||||
if config.Agent.EnablePlanReEvaluator {
|
||||
cogitoOpts = append(cogitoOpts, cogito.EnableAutoPlanReEvaluator)
|
||||
}
|
||||
|
||||
if config.Agent.MaxIterations != 0 {
|
||||
cogitoOpts = append(cogitoOpts, cogito.WithIterations(config.Agent.MaxIterations))
|
||||
}
|
||||
|
||||
if config.Agent.MaxAttempts != 0 {
|
||||
cogitoOpts = append(cogitoOpts, cogito.WithMaxAttempts(config.Agent.MaxAttempts))
|
||||
}
|
||||
cogito.WithReasoningCallback(func(s string) {
|
||||
log.Debug().Msgf("[model agent] [model: %s] Reasoning: %s", config.Name, s)
|
||||
}),
|
||||
cogito.WithToolCallBack(func(t *cogito.ToolChoice) bool {
|
||||
log.Debug().Msgf("[model agent] [model: %s] Tool call: %s, reasoning: %s, arguments: %+v", t.Name, t.Reasoning, t.Arguments)
|
||||
return true
|
||||
}),
|
||||
cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) {
|
||||
log.Debug().Msgf("[model agent] [model: %s] Tool call result: %s, tool arguments: %+v", t.Name, t.Result, t.ToolArguments)
|
||||
}),
|
||||
)
|
||||
|
||||
f, err := cogito.ExecuteTools(
|
||||
defaultLLM, fragment,
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
echoswagger "github.com/swaggo/echo-swagger"
|
||||
@@ -18,7 +19,8 @@ func RegisterLocalAIRoutes(router *echo.Echo,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
galleryService *services.GalleryService,
|
||||
opcache *services.OpCache) {
|
||||
opcache *services.OpCache,
|
||||
evaluator *templates.Evaluator) {
|
||||
|
||||
router.GET("/swagger/*", echoswagger.WrapHandler) // default
|
||||
|
||||
@@ -133,4 +135,23 @@ func RegisterLocalAIRoutes(router *echo.Echo,
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
|
||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }))
|
||||
|
||||
// MCP Stream endpoint
|
||||
if evaluator != nil {
|
||||
mcpStreamHandler := localai.MCPStreamEndpoint(cl, ml, evaluator, appConfig)
|
||||
mcpStreamMiddleware := []echo.MiddlewareFunc{
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
|
||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
func(next echo.HandlerFunc) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
if err := requestExtractor.SetOpenAIRequest(c); err != nil {
|
||||
return err
|
||||
}
|
||||
return next(c)
|
||||
}
|
||||
},
|
||||
}
|
||||
router.POST("/v1/mcp/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
|
||||
router.POST("/mcp/v1/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -267,7 +267,15 @@ function processAndSendMessage(inputValue) {
|
||||
const input = document.getElementById("input");
|
||||
if (input) input.value = "";
|
||||
const systemPrompt = localStorage.getItem("system_prompt");
|
||||
Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); });
|
||||
Alpine.nextTick(() => {
|
||||
const chatContainer = document.getElementById('chat');
|
||||
if (chatContainer) {
|
||||
chatContainer.scrollTo({
|
||||
top: chatContainer.scrollHeight,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Reset token tracking before starting new request
|
||||
requestStartTime = Date.now();
|
||||
@@ -379,16 +387,14 @@ async function promptGPT(systemPrompt, input) {
|
||||
document.getElementById("fileName").innerHTML = "";
|
||||
|
||||
// Choose endpoint based on MCP mode
|
||||
const endpoint = mcpMode ? "mcp/v1/chat/completions" : "v1/chat/completions";
|
||||
const endpoint = mcpMode ? "v1/mcp/chat/completions" : "v1/chat/completions";
|
||||
const requestBody = {
|
||||
model: model,
|
||||
messages: messages,
|
||||
};
|
||||
|
||||
// Only add stream parameter for regular chat (MCP doesn't support streaming)
|
||||
if (!mcpMode) {
|
||||
requestBody.stream = true;
|
||||
}
|
||||
// Add stream parameter for both regular chat and MCP (MCP now supports SSE streaming)
|
||||
requestBody.stream = true;
|
||||
|
||||
let response;
|
||||
try {
|
||||
@@ -444,64 +450,441 @@ async function promptGPT(systemPrompt, input) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle streaming response (both regular and MCP mode now use SSE)
|
||||
if (mcpMode) {
|
||||
// Handle MCP non-streaming response
|
||||
// Handle MCP SSE streaming with new event types
|
||||
const reader = response.body
|
||||
?.pipeThrough(new TextDecoderStream())
|
||||
.getReader();
|
||||
|
||||
if (!reader) {
|
||||
Alpine.store("chat").add(
|
||||
"assistant",
|
||||
`<span class='error'>Error: Failed to decode MCP API response</span>`,
|
||||
);
|
||||
toggleLoader(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Store reader globally so stop button can cancel it
|
||||
currentReader = reader;
|
||||
|
||||
let buffer = "";
|
||||
let assistantContent = "";
|
||||
let assistantContentBuffer = [];
|
||||
let thinkingContent = "";
|
||||
let isThinking = false;
|
||||
let lastAssistantMessageIndex = -1;
|
||||
let lastThinkingMessageIndex = -1;
|
||||
let lastThinkingScrollTime = 0;
|
||||
const THINKING_SCROLL_THROTTLE = 200; // Throttle scrolling to every 200ms
|
||||
|
||||
try {
|
||||
const data = await response.json();
|
||||
|
||||
// Update token usage if present
|
||||
if (data.usage) {
|
||||
Alpine.store("chat").updateTokenUsage(data.usage);
|
||||
}
|
||||
|
||||
// MCP endpoint returns content in choices[0].message.content (chat completion format)
|
||||
// Fallback to choices[0].text for backward compatibility (completion format)
|
||||
const content = data.choices[0]?.message?.content || data.choices[0]?.text || "";
|
||||
|
||||
if (!content && (!data.choices || data.choices.length === 0)) {
|
||||
Alpine.store("chat").add(
|
||||
"assistant",
|
||||
`<span class='error'>Error: Empty response from MCP endpoint</span>`,
|
||||
);
|
||||
toggleLoader(false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (content) {
|
||||
// Count tokens for rate calculation (MCP mode - full content at once)
|
||||
// Prefer actual token count from API if available
|
||||
if (data.usage && data.usage.completion_tokens) {
|
||||
tokensReceived = data.usage.completion_tokens;
|
||||
} else {
|
||||
tokensReceived += Math.ceil(content.length / 4);
|
||||
while (true) {
|
||||
const { value, done } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += value;
|
||||
|
||||
let lines = buffer.split("\n");
|
||||
buffer = lines.pop(); // Retain any incomplete line in the buffer
|
||||
|
||||
lines.forEach((line) => {
|
||||
if (line.length === 0 || line.startsWith(":")) return;
|
||||
if (line === "data: [DONE]") {
|
||||
return;
|
||||
}
|
||||
|
||||
if (line.startsWith("data: ")) {
|
||||
try {
|
||||
const eventData = JSON.parse(line.substring(6));
|
||||
|
||||
// Handle different event types
|
||||
switch (eventData.type) {
|
||||
case "reasoning":
|
||||
if (eventData.content) {
|
||||
const chatStore = Alpine.store("chat");
|
||||
// Insert reasoning before assistant message if it exists
|
||||
if (lastAssistantMessageIndex >= 0 && chatStore.history[lastAssistantMessageIndex]?.role === "assistant") {
|
||||
chatStore.history.splice(lastAssistantMessageIndex, 0, {
|
||||
role: "reasoning",
|
||||
content: eventData.content,
|
||||
html: DOMPurify.sanitize(marked.parse(eventData.content)),
|
||||
image: [],
|
||||
audio: [],
|
||||
expanded: false // Reasoning is always collapsed
|
||||
});
|
||||
lastAssistantMessageIndex++; // Adjust index since we inserted
|
||||
// Scroll smoothly after adding reasoning
|
||||
setTimeout(() => {
|
||||
const chatContainer = document.getElementById('chat');
|
||||
if (chatContainer) {
|
||||
chatContainer.scrollTo({
|
||||
top: chatContainer.scrollHeight,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}
|
||||
}, 100);
|
||||
} else {
|
||||
// No assistant message yet, just add normally
|
||||
chatStore.add("reasoning", eventData.content);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "tool_call":
|
||||
if (eventData.name) {
|
||||
// Store as JSON for better formatting
|
||||
const toolCallData = {
|
||||
name: eventData.name,
|
||||
arguments: eventData.arguments || {},
|
||||
reasoning: eventData.reasoning || ""
|
||||
};
|
||||
Alpine.store("chat").add("tool_call", JSON.stringify(toolCallData, null, 2));
|
||||
// Scroll smoothly after adding tool call
|
||||
setTimeout(() => {
|
||||
const chatContainer = document.getElementById('chat');
|
||||
if (chatContainer) {
|
||||
chatContainer.scrollTo({
|
||||
top: chatContainer.scrollHeight,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
break;
|
||||
|
||||
case "tool_result":
|
||||
if (eventData.name) {
|
||||
// Store as JSON for better formatting
|
||||
const toolResultData = {
|
||||
name: eventData.name,
|
||||
result: eventData.result || ""
|
||||
};
|
||||
Alpine.store("chat").add("tool_result", JSON.stringify(toolResultData, null, 2));
|
||||
// Scroll smoothly after adding tool result
|
||||
setTimeout(() => {
|
||||
const chatContainer = document.getElementById('chat');
|
||||
if (chatContainer) {
|
||||
chatContainer.scrollTo({
|
||||
top: chatContainer.scrollHeight,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
break;
|
||||
|
||||
case "status":
|
||||
// Status messages can be logged but not necessarily displayed
|
||||
console.log("[MCP Status]", eventData.message);
|
||||
break;
|
||||
|
||||
case "assistant":
|
||||
if (eventData.content) {
|
||||
assistantContent += eventData.content;
|
||||
const contentChunk = eventData.content;
|
||||
|
||||
// Count tokens for rate calculation
|
||||
tokensReceived += Math.ceil(contentChunk.length / 4);
|
||||
updateTokensPerSecond();
|
||||
|
||||
// Check for thinking tags in the chunk (incremental detection)
|
||||
if (contentChunk.includes("<thinking>") || contentChunk.includes("<think>")) {
|
||||
isThinking = true;
|
||||
thinkingContent = "";
|
||||
lastThinkingMessageIndex = -1;
|
||||
}
|
||||
|
||||
if (contentChunk.includes("</thinking>") || contentChunk.includes("</think>")) {
|
||||
isThinking = false;
|
||||
// When closing tag is detected, process the accumulated thinking content
|
||||
if (thinkingContent.trim()) {
|
||||
// Extract just the thinking part from the accumulated content
|
||||
const thinkingMatch = thinkingContent.match(/<(?:thinking|redacted_reasoning)>(.*?)<\/(?:thinking|redacted_reasoning)>/s);
|
||||
if (thinkingMatch && thinkingMatch[1]) {
|
||||
const extractedThinking = thinkingMatch[1];
|
||||
const chatStore = Alpine.store("chat");
|
||||
const isMCPMode = chatStore.mcpMode || false;
|
||||
const shouldExpand = !isMCPMode; // Expanded in non-MCP mode, collapsed in MCP mode
|
||||
if (lastThinkingMessageIndex === -1) {
|
||||
// Insert thinking before the last assistant message if it exists
|
||||
if (lastAssistantMessageIndex >= 0 && chatStore.history[lastAssistantMessageIndex]?.role === "assistant") {
|
||||
// Insert before assistant message
|
||||
chatStore.history.splice(lastAssistantMessageIndex, 0, {
|
||||
role: "thinking",
|
||||
content: extractedThinking,
|
||||
html: DOMPurify.sanitize(marked.parse(extractedThinking)),
|
||||
image: [],
|
||||
audio: [],
|
||||
expanded: shouldExpand
|
||||
});
|
||||
lastThinkingMessageIndex = lastAssistantMessageIndex;
|
||||
lastAssistantMessageIndex++; // Adjust index since we inserted
|
||||
} else {
|
||||
// No assistant message yet, just add normally
|
||||
chatStore.add("thinking", extractedThinking);
|
||||
lastThinkingMessageIndex = chatStore.history.length - 1;
|
||||
}
|
||||
} else {
|
||||
// Update existing thinking message
|
||||
const lastMessage = chatStore.history[lastThinkingMessageIndex];
|
||||
if (lastMessage && lastMessage.role === "thinking") {
|
||||
lastMessage.content = extractedThinking;
|
||||
lastMessage.html = DOMPurify.sanitize(marked.parse(extractedThinking));
|
||||
}
|
||||
}
|
||||
// Scroll when thinking is finalized in non-MCP mode
|
||||
if (!isMCPMode) {
|
||||
setTimeout(() => {
|
||||
const chatContainer = document.getElementById('chat');
|
||||
if (chatContainer) {
|
||||
chatContainer.scrollTo({
|
||||
top: chatContainer.scrollHeight,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}
|
||||
}, 50);
|
||||
}
|
||||
}
|
||||
thinkingContent = "";
|
||||
}
|
||||
}
|
||||
|
||||
// Handle content based on thinking state
|
||||
if (isThinking) {
|
||||
thinkingContent += contentChunk;
|
||||
const chatStore = Alpine.store("chat");
|
||||
const isMCPMode = chatStore.mcpMode || false;
|
||||
const shouldExpand = !isMCPMode; // Expanded in non-MCP mode, collapsed in MCP mode
|
||||
// Update the last thinking message or create a new one (incremental)
|
||||
if (lastThinkingMessageIndex === -1) {
|
||||
// Insert thinking before the last assistant message if it exists
|
||||
if (lastAssistantMessageIndex >= 0 && chatStore.history[lastAssistantMessageIndex]?.role === "assistant") {
|
||||
// Insert before assistant message
|
||||
chatStore.history.splice(lastAssistantMessageIndex, 0, {
|
||||
role: "thinking",
|
||||
content: thinkingContent,
|
||||
html: DOMPurify.sanitize(marked.parse(thinkingContent)),
|
||||
image: [],
|
||||
audio: [],
|
||||
expanded: shouldExpand
|
||||
});
|
||||
lastThinkingMessageIndex = lastAssistantMessageIndex;
|
||||
lastAssistantMessageIndex++; // Adjust index since we inserted
|
||||
} else {
|
||||
// No assistant message yet, just add normally
|
||||
chatStore.add("thinking", thinkingContent);
|
||||
lastThinkingMessageIndex = chatStore.history.length - 1;
|
||||
}
|
||||
} else {
|
||||
// Update existing thinking message
|
||||
const lastMessage = chatStore.history[lastThinkingMessageIndex];
|
||||
if (lastMessage && lastMessage.role === "thinking") {
|
||||
lastMessage.content = thinkingContent;
|
||||
lastMessage.html = DOMPurify.sanitize(marked.parse(thinkingContent));
|
||||
}
|
||||
}
|
||||
// Scroll when thinking is updated in non-MCP mode (throttled)
|
||||
if (!isMCPMode) {
|
||||
const now = Date.now();
|
||||
if (now - lastThinkingScrollTime > THINKING_SCROLL_THROTTLE) {
|
||||
lastThinkingScrollTime = now;
|
||||
setTimeout(() => {
|
||||
const chatContainer = document.getElementById('chat');
|
||||
if (chatContainer) {
|
||||
chatContainer.scrollTo({
|
||||
top: chatContainer.scrollHeight,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Regular assistant content - buffer it for batch processing
|
||||
assistantContentBuffer.push(contentChunk);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "error":
|
||||
Alpine.store("chat").add(
|
||||
"assistant",
|
||||
`<span class='error'>MCP Error: ${eventData.message}</span>`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Failed to parse MCP event:", line, error);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Efficiently update assistant message in batch
|
||||
if (assistantContentBuffer.length > 0) {
|
||||
const regularContent = assistantContentBuffer.join("");
|
||||
|
||||
// Process any thinking tags that might be in the accumulated content
|
||||
// This handles cases where tags are split across chunks
|
||||
const { regularContent: processedRegular, thinkingContent: processedThinking } = processThinkingTags(regularContent);
|
||||
|
||||
// Update or create assistant message with processed regular content
|
||||
if (lastAssistantMessageIndex === -1) {
|
||||
if (processedRegular && processedRegular.trim()) {
|
||||
Alpine.store("chat").add("assistant", processedRegular);
|
||||
lastAssistantMessageIndex = Alpine.store("chat").history.length - 1;
|
||||
}
|
||||
} else {
|
||||
const chatStore = Alpine.store("chat");
|
||||
const lastMessage = chatStore.history[lastAssistantMessageIndex];
|
||||
if (lastMessage && lastMessage.role === "assistant") {
|
||||
lastMessage.content = (lastMessage.content || "") + (processedRegular || "");
|
||||
lastMessage.html = DOMPurify.sanitize(marked.parse(lastMessage.content));
|
||||
}
|
||||
}
|
||||
|
||||
// Add any extracted thinking content from the processed buffer BEFORE assistant message
|
||||
if (processedThinking && processedThinking.trim()) {
|
||||
const chatStore = Alpine.store("chat");
|
||||
const isMCPMode = chatStore.mcpMode || false;
|
||||
const shouldExpand = !isMCPMode; // Expanded in non-MCP mode, collapsed in MCP mode
|
||||
// Insert thinking before assistant message if it exists
|
||||
if (lastAssistantMessageIndex >= 0 && chatStore.history[lastAssistantMessageIndex]?.role === "assistant") {
|
||||
chatStore.history.splice(lastAssistantMessageIndex, 0, {
|
||||
role: "thinking",
|
||||
content: processedThinking,
|
||||
html: DOMPurify.sanitize(marked.parse(processedThinking)),
|
||||
image: [],
|
||||
audio: [],
|
||||
expanded: shouldExpand
|
||||
});
|
||||
lastAssistantMessageIndex++; // Adjust index since we inserted
|
||||
} else {
|
||||
// No assistant message yet, just add normally
|
||||
chatStore.add("thinking", processedThinking);
|
||||
}
|
||||
}
|
||||
|
||||
assistantContentBuffer = [];
|
||||
}
|
||||
updateTokensPerSecond();
|
||||
}
|
||||
|
||||
// Final assistant content flush if any data remains
|
||||
if (assistantContentBuffer.length > 0) {
|
||||
const regularContent = assistantContentBuffer.join("");
|
||||
// Process any remaining thinking tags that might be in the buffer
|
||||
const { regularContent: processedRegular, thinkingContent: processedThinking } = processThinkingTags(regularContent);
|
||||
|
||||
// Process thinking tags using shared function
|
||||
const { regularContent, thinkingContent } = processThinkingTags(content);
|
||||
const chatStore = Alpine.store("chat");
|
||||
|
||||
// Add thinking content if present
|
||||
if (thinkingContent) {
|
||||
// First, add any extracted thinking content BEFORE assistant message
|
||||
if (processedThinking && processedThinking.trim()) {
|
||||
const isMCPMode = chatStore.mcpMode || false;
|
||||
const shouldExpand = !isMCPMode; // Expanded in non-MCP mode, collapsed in MCP mode
|
||||
// Insert thinking before assistant message if it exists
|
||||
if (lastAssistantMessageIndex >= 0 && chatStore.history[lastAssistantMessageIndex]?.role === "assistant") {
|
||||
chatStore.history.splice(lastAssistantMessageIndex, 0, {
|
||||
role: "thinking",
|
||||
content: processedThinking,
|
||||
html: DOMPurify.sanitize(marked.parse(processedThinking)),
|
||||
image: [],
|
||||
audio: [],
|
||||
expanded: shouldExpand
|
||||
});
|
||||
lastAssistantMessageIndex++; // Adjust index since we inserted
|
||||
} else {
|
||||
// No assistant message yet, just add normally
|
||||
chatStore.add("thinking", processedThinking);
|
||||
}
|
||||
}
|
||||
|
||||
// Then update or create assistant message
|
||||
if (lastAssistantMessageIndex !== -1) {
|
||||
const lastMessage = chatStore.history[lastAssistantMessageIndex];
|
||||
if (lastMessage && lastMessage.role === "assistant") {
|
||||
lastMessage.content = (lastMessage.content || "") + (processedRegular || "");
|
||||
lastMessage.html = DOMPurify.sanitize(marked.parse(lastMessage.content));
|
||||
}
|
||||
} else if (processedRegular && processedRegular.trim()) {
|
||||
chatStore.add("assistant", processedRegular);
|
||||
lastAssistantMessageIndex = chatStore.history.length - 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Final thinking content flush if any data remains (from incremental detection)
|
||||
if (thinkingContent.trim() && lastThinkingMessageIndex === -1) {
|
||||
// Extract thinking content if tags are present
|
||||
const thinkingMatch = thinkingContent.match(/<(?:thinking|redacted_reasoning)>(.*?)<\/(?:thinking|redacted_reasoning)>/s);
|
||||
if (thinkingMatch && thinkingMatch[1]) {
|
||||
const chatStore = Alpine.store("chat");
|
||||
const isMCPMode = chatStore.mcpMode || false;
|
||||
const shouldExpand = !isMCPMode; // Expanded in non-MCP mode, collapsed in MCP mode
|
||||
// Insert thinking before assistant message if it exists
|
||||
if (lastAssistantMessageIndex >= 0 && chatStore.history[lastAssistantMessageIndex]?.role === "assistant") {
|
||||
chatStore.history.splice(lastAssistantMessageIndex, 0, {
|
||||
role: "thinking",
|
||||
content: thinkingMatch[1],
|
||||
html: DOMPurify.sanitize(marked.parse(thinkingMatch[1])),
|
||||
image: [],
|
||||
audio: [],
|
||||
expanded: shouldExpand
|
||||
});
|
||||
} else {
|
||||
// No assistant message yet, just add normally
|
||||
chatStore.add("thinking", thinkingMatch[1]);
|
||||
}
|
||||
} else {
|
||||
Alpine.store("chat").add("thinking", thinkingContent);
|
||||
}
|
||||
|
||||
// Add regular content if present
|
||||
if (regularContent) {
|
||||
Alpine.store("chat").add("assistant", regularContent);
|
||||
}
|
||||
}
|
||||
|
||||
// Highlight all code blocks
|
||||
// Final pass: process the entire assistantContent to catch any missed thinking tags
|
||||
// This ensures we don't miss tags that were split across chunks
|
||||
if (assistantContent.trim()) {
|
||||
const { regularContent: finalRegular, thinkingContent: finalThinking } = processThinkingTags(assistantContent);
|
||||
|
||||
// Update assistant message with final processed content (without thinking tags)
|
||||
if (finalRegular && finalRegular.trim()) {
|
||||
if (lastAssistantMessageIndex !== -1) {
|
||||
const chatStore = Alpine.store("chat");
|
||||
const lastMessage = chatStore.history[lastAssistantMessageIndex];
|
||||
if (lastMessage && lastMessage.role === "assistant") {
|
||||
lastMessage.content = finalRegular;
|
||||
lastMessage.html = DOMPurify.sanitize(marked.parse(lastMessage.content));
|
||||
}
|
||||
} else {
|
||||
Alpine.store("chat").add("assistant", finalRegular);
|
||||
}
|
||||
}
|
||||
|
||||
// Add any extracted thinking content (only if not already added)
|
||||
if (finalThinking && finalThinking.trim()) {
|
||||
const hasThinking = Alpine.store("chat").history.some(msg =>
|
||||
msg.role === "thinking" && msg.content.trim() === finalThinking.trim()
|
||||
);
|
||||
if (!hasThinking) {
|
||||
Alpine.store("chat").add("thinking", finalThinking);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Highlight all code blocks once at the end
|
||||
hljs.highlightAll();
|
||||
} catch (error) {
|
||||
// Don't show error if request was aborted by user
|
||||
if (error.name !== 'AbortError' || currentAbortController) {
|
||||
if (error.name !== 'AbortError' || !currentAbortController) {
|
||||
Alpine.store("chat").add(
|
||||
"assistant",
|
||||
`<span class='error'>Error: Failed to parse MCP response</span>`,
|
||||
`<span class='error'>Error: Failed to process MCP stream</span>`,
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
// Perform any cleanup if necessary
|
||||
if (reader) {
|
||||
reader.releaseLock();
|
||||
}
|
||||
currentReader = null;
|
||||
currentAbortController = null;
|
||||
}
|
||||
} else {
|
||||
@@ -539,6 +922,8 @@ async function promptGPT(systemPrompt, input) {
|
||||
let thinkingContent = "";
|
||||
let isThinking = false;
|
||||
let lastThinkingMessageIndex = -1;
|
||||
let lastThinkingScrollTime = 0;
|
||||
const THINKING_SCROLL_THROTTLE = 200; // Throttle scrolling to every 200ms
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
@@ -606,6 +991,20 @@ async function promptGPT(systemPrompt, input) {
|
||||
lastMessage.html = DOMPurify.sanitize(marked.parse(thinkingContent));
|
||||
}
|
||||
}
|
||||
// Scroll when thinking is updated (throttled)
|
||||
const now = Date.now();
|
||||
if (now - lastThinkingScrollTime > THINKING_SCROLL_THROTTLE) {
|
||||
lastThinkingScrollTime = now;
|
||||
setTimeout(() => {
|
||||
const chatContainer = document.getElementById('chat');
|
||||
if (chatContainer) {
|
||||
chatContainer.scrollTo({
|
||||
top: chatContainer.scrollHeight,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
} else {
|
||||
contentBuffer.push(token);
|
||||
}
|
||||
@@ -620,6 +1019,16 @@ async function promptGPT(systemPrompt, input) {
|
||||
if (contentBuffer.length > 0) {
|
||||
addToChat(contentBuffer.join(""));
|
||||
contentBuffer = [];
|
||||
// Scroll when assistant content is updated (this will also show thinking messages above)
|
||||
setTimeout(() => {
|
||||
const chatContainer = document.getElementById('chat');
|
||||
if (chatContainer) {
|
||||
chatContainer.scrollTo({
|
||||
top: chatContainer.scrollHeight,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}
|
||||
}, 50);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -654,8 +1063,17 @@ async function promptGPT(systemPrompt, input) {
|
||||
// Remove class "loader" from the element with "loader" id
|
||||
toggleLoader(false);
|
||||
|
||||
// scroll to the bottom of the chat
|
||||
document.getElementById('messages').scrollIntoView(false)
|
||||
// scroll to the bottom of the chat consistently
|
||||
setTimeout(() => {
|
||||
const chatContainer = document.getElementById('chat');
|
||||
if (chatContainer) {
|
||||
chatContainer.scrollTo({
|
||||
top: chatContainer.scrollHeight,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}
|
||||
}, 100);
|
||||
|
||||
// set focus to the input
|
||||
document.getElementById("input").focus();
|
||||
}
|
||||
@@ -784,7 +1202,13 @@ document.addEventListener("alpine:init", () => {
|
||||
audio: audio || []
|
||||
});
|
||||
}
|
||||
document.getElementById('messages').scrollIntoView(false);
|
||||
const chatContainer = document.getElementById('chat');
|
||||
if (chatContainer) {
|
||||
chatContainer.scrollTo({
|
||||
top: chatContainer.scrollHeight,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}
|
||||
const parser = new DOMParser();
|
||||
const html = parser.parseFromString(
|
||||
this.history[this.history.length - 1].html,
|
||||
@@ -812,3 +1236,4 @@ document.addEventListener("alpine:init", () => {
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -111,14 +111,36 @@ SOFTWARE.
|
||||
},
|
||||
add(role, content, image, audio) {
|
||||
const N = this.history.length - 1;
|
||||
// For thinking messages, always create a new message
|
||||
if (role === "thinking") {
|
||||
// For thinking, reasoning, tool_call, and tool_result messages, always create a new message
|
||||
if (role === "thinking" || role === "reasoning" || role === "tool_call" || role === "tool_result") {
|
||||
let c = "";
|
||||
const lines = content.split("\n");
|
||||
lines.forEach((line) => {
|
||||
c += DOMPurify.sanitize(marked.parse(line));
|
||||
});
|
||||
this.history.push({ role, content, html: c, image, audio });
|
||||
if (role === "tool_call" || role === "tool_result") {
|
||||
// For tool calls and results, try to parse as JSON and format nicely
|
||||
try {
|
||||
const parsed = typeof content === 'string' ? JSON.parse(content) : content;
|
||||
// Format JSON with proper indentation
|
||||
const formatted = JSON.stringify(parsed, null, 2);
|
||||
c = DOMPurify.sanitize('<pre><code class="language-json">' + formatted + '</code></pre>');
|
||||
} catch (e) {
|
||||
// If not JSON, treat as markdown
|
||||
const lines = content.split("\n");
|
||||
lines.forEach((line) => {
|
||||
c += DOMPurify.sanitize(marked.parse(line));
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// For thinking and reasoning, format as markdown
|
||||
const lines = content.split("\n");
|
||||
lines.forEach((line) => {
|
||||
c += DOMPurify.sanitize(marked.parse(line));
|
||||
});
|
||||
}
|
||||
// Set expanded state: thinking is expanded by default in non-MCP mode, collapsed in MCP mode
|
||||
// Reasoning, tool_call, and tool_result are always collapsed by default
|
||||
const isMCPMode = this.mcpMode || false;
|
||||
const shouldExpand = (role === "thinking" && !isMCPMode) || false;
|
||||
this.history.push({ role, content, html: c, image, audio, expanded: shouldExpand });
|
||||
|
||||
}
|
||||
// For other messages, merge if same role
|
||||
else if (this.history.length && this.history[N].role === role) {
|
||||
@@ -147,7 +169,16 @@ SOFTWARE.
|
||||
audio: audio || []
|
||||
});
|
||||
}
|
||||
document.getElementById('messages').scrollIntoView(false);
|
||||
// Scroll to bottom consistently for all messages (use #chat as it's the scrollable container)
|
||||
setTimeout(() => {
|
||||
const chatContainer = document.getElementById('chat');
|
||||
if (chatContainer) {
|
||||
chatContainer.scrollTo({
|
||||
top: chatContainer.scrollHeight,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}
|
||||
}, 100);
|
||||
const parser = new DOMParser();
|
||||
const html = parser.parseFromString(
|
||||
this.history[this.history.length - 1].html,
|
||||
@@ -160,9 +191,33 @@ SOFTWARE.
|
||||
if (this.languages.includes(language)) return;
|
||||
const script = document.createElement("script");
|
||||
script.src = `https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/${language}.min.js`;
|
||||
script.onload = () => {
|
||||
// Re-highlight after language script loads
|
||||
if (window.hljs) {
|
||||
const container = document.getElementById('messages');
|
||||
if (container) {
|
||||
container.querySelectorAll('pre code.language-json').forEach(block => {
|
||||
window.hljs.highlightElement(block);
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
document.head.appendChild(script);
|
||||
this.languages.push(language);
|
||||
});
|
||||
// Highlight code blocks immediately if hljs is available
|
||||
if (window.hljs) {
|
||||
setTimeout(() => {
|
||||
const container = document.getElementById('messages');
|
||||
if (container) {
|
||||
container.querySelectorAll('pre code.language-json').forEach(block => {
|
||||
if (!block.classList.contains('hljs')) {
|
||||
window.hljs.highlightElement(block);
|
||||
}
|
||||
});
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
},
|
||||
messages() {
|
||||
return this.history.map((message) => ({
|
||||
@@ -484,9 +539,108 @@ SOFTWARE.
|
||||
<li>To send a text, markdown or PDF file, click the <i class="fa-solid fa-file text-[#38BDF8]"></i> icon.</li>
|
||||
</ul>
|
||||
</p>
|
||||
<div id="messages" class="max-w-3xl mx-auto">
|
||||
<template x-for="message in history">
|
||||
<div :class="message.role === 'user' ? 'flex items-start space-x-2 my-2 justify-end' : 'flex items-start space-x-2 my-2'">
|
||||
<div id="messages" class="max-w-3xl mx-auto space-y-2">
|
||||
<template x-for="(message, index) in history" :key="index">
|
||||
<div>
|
||||
<!-- Reasoning/Thinking messages appear first (before assistant) - collapsible in MCP mode -->
|
||||
<template x-if="message.role === 'reasoning' || message.role === 'thinking'">
|
||||
<div class="flex items-start space-x-2 mb-1">
|
||||
<div class="flex flex-col flex-1">
|
||||
<div class="p-2 flex-1 rounded-lg bg-[#38BDF8]/10 text-[#94A3B8] border border-[#38BDF8]/30">
|
||||
<button
|
||||
@click="message.expanded = !message.expanded"
|
||||
class="w-full flex items-center justify-between text-left hover:bg-[#38BDF8]/20 rounded p-2 transition-colors"
|
||||
>
|
||||
<div class="flex items-center space-x-2">
|
||||
<i :class="message.role === 'thinking' ? 'fa-solid fa-brain' : 'fa-solid fa-lightbulb'" class="text-[#38BDF8]"></i>
|
||||
<span class="text-xs font-semibold text-[#38BDF8]" x-text="message.role === 'thinking' ? 'Thinking' : 'Reasoning'"></span>
|
||||
<span class="text-xs text-[#94A3B8]" x-show="message.content && message.content.length > 0" x-text="'(' + Math.ceil(message.content.length / 100) + ' lines)'"></span>
|
||||
</div>
|
||||
<i
|
||||
class="fa-solid text-[#38BDF8] transition-transform text-xs"
|
||||
:class="message.expanded ? 'fa-chevron-up' : 'fa-chevron-down'"
|
||||
></i>
|
||||
</button>
|
||||
<div
|
||||
x-show="message.expanded"
|
||||
x-transition
|
||||
class="mt-2 pt-2 border-t border-[#38BDF8]/20"
|
||||
>
|
||||
<div class="text-[#E5E7EB] text-sm max-h-96 overflow-auto" x-html="message.html"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<!-- Tool calls (collapsible) -->
|
||||
<template x-if="message.role === 'tool_call'">
|
||||
<div class="flex items-start space-x-2 mb-1">
|
||||
<div class="flex flex-col flex-1">
|
||||
<div class="p-2 flex-1 rounded-lg bg-[#8B5CF6]/10 text-[#94A3B8] border border-[#8B5CF6]/30">
|
||||
<button
|
||||
@click="message.expanded = !message.expanded"
|
||||
class="w-full flex items-center justify-between text-left hover:bg-[#8B5CF6]/20 rounded p-2 transition-colors"
|
||||
>
|
||||
<div class="flex items-center space-x-2">
|
||||
<i class="fa-solid fa-wrench text-[#8B5CF6]"></i>
|
||||
<span class="text-xs font-semibold text-[#8B5CF6]">Tool Call</span>
|
||||
<span class="text-xs text-[#94A3B8]" x-text="getToolName(message.content)"></span>
|
||||
</div>
|
||||
<i
|
||||
class="fa-solid text-[#8B5CF6] transition-transform text-xs"
|
||||
:class="message.expanded ? 'fa-chevron-up' : 'fa-chevron-down'"
|
||||
></i>
|
||||
</button>
|
||||
<div
|
||||
x-show="message.expanded"
|
||||
x-transition
|
||||
class="mt-2 pt-2 border-t border-[#8B5CF6]/20"
|
||||
>
|
||||
<div class="text-[#E5E7EB] text-xs max-h-96 overflow-auto overflow-x-auto tool-call-content"
|
||||
x-html="message.html"
|
||||
x-effect="if (message.expanded && window.hljs) { setTimeout(() => { $el.querySelectorAll('pre code.language-json').forEach(block => { if (!block.classList.contains('hljs')) window.hljs.highlightElement(block); }); }, 50); }"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<!-- Tool results (collapsible) -->
|
||||
<template x-if="message.role === 'tool_result'">
|
||||
<div class="flex items-start space-x-2 mb-1">
|
||||
<div class="flex flex-col flex-1">
|
||||
<div class="p-2 flex-1 rounded-lg bg-[#10B981]/10 text-[#94A3B8] border border-[#10B981]/30">
|
||||
<button
|
||||
@click="message.expanded = !message.expanded"
|
||||
class="w-full flex items-center justify-between text-left hover:bg-[#10B981]/20 rounded p-2 transition-colors"
|
||||
>
|
||||
<div class="flex items-center space-x-2">
|
||||
<i class="fa-solid fa-check-circle text-[#10B981]"></i>
|
||||
<span class="text-xs font-semibold text-[#10B981]">Tool Result</span>
|
||||
<span class="text-xs text-[#94A3B8]" x-text="getToolName(message.content) || 'Success'"></span>
|
||||
</div>
|
||||
<i
|
||||
class="fa-solid text-[#10B981] transition-transform text-xs"
|
||||
:class="message.expanded ? 'fa-chevron-up' : 'fa-chevron-down'"
|
||||
></i>
|
||||
</button>
|
||||
<div
|
||||
x-show="message.expanded"
|
||||
x-transition
|
||||
class="mt-2 pt-2 border-t border-[#10B981]/20"
|
||||
>
|
||||
<div class="text-[#E5E7EB] text-xs max-h-96 overflow-auto overflow-x-auto tool-result-content"
|
||||
x-html="formatToolResult(message.content)"
|
||||
x-effect="if (message.expanded && window.hljs) { setTimeout(() => { $el.querySelectorAll('pre code.language-json').forEach(block => { if (!block.classList.contains('hljs')) window.hljs.highlightElement(block); }); }, 50); }"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<!-- User and Assistant messages -->
|
||||
<div :class="message.role === 'user' ? 'flex items-start space-x-2 justify-end' : 'flex items-start space-x-2'">
|
||||
{{ if .Model }}
|
||||
{{ $galleryConfig:= index $allGalleryConfigs .Model}}
|
||||
<template x-if="message.role === 'user'">
|
||||
@@ -514,20 +668,7 @@ SOFTWARE.
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
<template x-if="message.role === 'thinking'">
|
||||
<div class="flex items-center space-x-2 w-full">
|
||||
<div class="flex flex-col flex-1">
|
||||
<div class="p-3 flex-1 rounded-lg bg-[#38BDF8]/10 text-[#94A3B8] border border-[#38BDF8]/30">
|
||||
<div class="flex items-center space-x-2 mb-2">
|
||||
<i class="fa-solid fa-brain text-[#38BDF8]"></i>
|
||||
<span class="text-xs font-semibold text-[#38BDF8]">Thinking</span>
|
||||
</div>
|
||||
<div class="mt-1 text-[#E5E7EB]" x-html="message.html"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
<template x-if="message.role != 'user' && message.role != 'thinking'">
|
||||
<template x-if="message.role != 'user' && message.role != 'thinking' && message.role != 'reasoning' && message.role != 'tool_call' && message.role != 'tool_result'">
|
||||
<div class="flex items-center space-x-2">
|
||||
{{ if $galleryConfig }}
|
||||
{{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[#38BDF8]/20">{{end}}
|
||||
@@ -566,6 +707,7 @@ SOFTWARE.
|
||||
:class="message.role === 'user' ? 'fa-user text-[#38BDF8]' : 'fa-robot text-[#8B5CF6]'"
|
||||
></i>
|
||||
{{ end }}
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
@@ -775,6 +917,83 @@ SOFTWARE.
|
||||
console.error('Failed to copy: ', err);
|
||||
});
|
||||
};
|
||||
|
||||
// Format tool result for better display
|
||||
window.formatToolResult = (content) => {
|
||||
if (!content) return '';
|
||||
try {
|
||||
// Try to parse as JSON
|
||||
const parsed = JSON.parse(content);
|
||||
|
||||
// If it has a 'result' field, try to parse that too
|
||||
if (parsed.result && typeof parsed.result === 'string') {
|
||||
try {
|
||||
const resultParsed = JSON.parse(parsed.result);
|
||||
parsed.result = resultParsed;
|
||||
} catch (e) {
|
||||
// Keep as string if not JSON
|
||||
}
|
||||
}
|
||||
|
||||
// Format the JSON nicely
|
||||
const formatted = JSON.stringify(parsed, null, 2);
|
||||
return DOMPurify.sanitize('<pre class="bg-[#101827] p-3 rounded border border-[#10B981]/20 overflow-x-auto"><code class="language-json">' + formatted + '</code></pre>');
|
||||
} catch (e) {
|
||||
// If not JSON, try to format as markdown or plain text
|
||||
try {
|
||||
// Check if it's a markdown code block
|
||||
if (content.includes('```')) {
|
||||
return DOMPurify.sanitize(marked.parse(content));
|
||||
}
|
||||
// Otherwise, try to parse as JSON one more time with error handling
|
||||
const lines = content.split('\n');
|
||||
let jsonStart = -1;
|
||||
let jsonEnd = -1;
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
if (lines[i].trim().startsWith('{') || lines[i].trim().startsWith('[')) {
|
||||
jsonStart = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (jsonStart >= 0) {
|
||||
for (let i = lines.length - 1; i >= jsonStart; i--) {
|
||||
if (lines[i].trim().endsWith('}') || lines[i].trim().endsWith(']')) {
|
||||
jsonEnd = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (jsonEnd >= jsonStart) {
|
||||
const jsonStr = lines.slice(jsonStart, jsonEnd + 1).join('\n');
|
||||
try {
|
||||
const parsed = JSON.parse(jsonStr);
|
||||
const formatted = JSON.stringify(parsed, null, 2);
|
||||
return DOMPurify.sanitize('<pre class="bg-[#101827] p-3 rounded border border-[#10B981]/20 overflow-x-auto"><code class="language-json">' + formatted + '</code></pre>');
|
||||
} catch (e2) {
|
||||
// Fall through to markdown
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fall back to markdown
|
||||
return DOMPurify.sanitize(marked.parse(content));
|
||||
} catch (e2) {
|
||||
// Last resort: plain text
|
||||
return DOMPurify.sanitize('<pre class="bg-[#101827] p-3 rounded border border-[#10B981]/20 overflow-x-auto text-xs">' + content.replace(/</g, '<').replace(/>/g, '>') + '</pre>');
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Get tool name from content
|
||||
window.getToolName = (content) => {
|
||||
if (!content || typeof content !== 'string') return '';
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
return parsed.name || '';
|
||||
} catch (e) {
|
||||
// Try to extract name from string
|
||||
const nameMatch = content.match(/"name"\s*:\s*"([^"]+)"/);
|
||||
return nameMatch ? nameMatch[1] : '';
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
// Context size is now initialized in the Alpine store initialization above
|
||||
@@ -904,6 +1123,76 @@ SOFTWARE.
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/* Prevent JSON overflow in tool calls and results */
|
||||
.tool-call-content pre,
|
||||
.tool-result-content pre {
|
||||
overflow-x: auto;
|
||||
overflow-y: auto;
|
||||
max-width: 100%;
|
||||
word-wrap: break-word;
|
||||
white-space: pre-wrap;
|
||||
background: #101827 !important;
|
||||
border: 1px solid #1E293B;
|
||||
border-radius: 6px;
|
||||
padding: 12px;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.tool-call-content code,
|
||||
.tool-result-content code {
|
||||
word-wrap: break-word;
|
||||
white-space: pre-wrap;
|
||||
overflow-wrap: break-word;
|
||||
background: transparent !important;
|
||||
color: #E5E7EB;
|
||||
font-family: 'ui-monospace', 'Monaco', 'Consolas', monospace;
|
||||
font-size: 0.875rem;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
/* Dark theme syntax highlighting for JSON */
|
||||
.tool-call-content .hljs,
|
||||
.tool-result-content .hljs {
|
||||
background: #101827 !important;
|
||||
color: #E5E7EB !important;
|
||||
}
|
||||
|
||||
.tool-call-content .hljs-keyword,
|
||||
.tool-result-content .hljs-keyword {
|
||||
color: #8B5CF6 !important;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.tool-call-content .hljs-string,
|
||||
.tool-result-content .hljs-string {
|
||||
color: #10B981 !important;
|
||||
}
|
||||
|
||||
.tool-call-content .hljs-number,
|
||||
.tool-result-content .hljs-number {
|
||||
color: #38BDF8 !important;
|
||||
}
|
||||
|
||||
.tool-call-content .hljs-literal,
|
||||
.tool-result-content .hljs-literal {
|
||||
color: #F59E0B !important;
|
||||
}
|
||||
|
||||
.tool-call-content .hljs-punctuation,
|
||||
.tool-result-content .hljs-punctuation {
|
||||
color: #94A3B8 !important;
|
||||
}
|
||||
|
||||
.tool-call-content .hljs-property,
|
||||
.tool-result-content .hljs-property {
|
||||
color: #38BDF8 !important;
|
||||
}
|
||||
|
||||
.tool-call-content .hljs-attr,
|
||||
.tool-result-content .hljs-attr {
|
||||
color: #8B5CF6 !important;
|
||||
}
|
||||
</style>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
Reference in New Issue
Block a user