fix: Highly inconsistent agent response to cogito agent calling MCP server - Body "Invalid http method" (#7790)

* fix: resolve duplicate MCP route registration causing 50% failure rate

Fixes #7772

The issue was caused by duplicate registration of the MCP endpoint
/mcp/v1/chat/completions in both openai.go and localai.go, leading
to a race condition where requests would randomly hit different
handlers with incompatible behaviors.

Changes:
- Removed duplicate MCP route registration from openai.go
- Kept the localai.MCPStreamEndpoint as the canonical handler
- Added all three MCP route patterns for backward compatibility:
  * /v1/mcp/chat/completions
  * /mcp/v1/chat/completions
  * /mcp/chat/completions
- Added comments to clarify route ownership and prevent future conflicts
- Fixed formatting in ui_api.go

The localai.MCPStreamEndpoint handler is more feature-complete as it
supports both streaming and non-streaming modes, while the removed
openai.MCPCompletionEndpoint only supported synchronous requests.

This eliminates the ~50% failure rate where the cogito library would
receive "Invalid http method" errors when internal HTTP requests were
routed to the wrong handler.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Signed-off-by: majiayu000 <1835304752@qq.com>

* Address feedback from review

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: majiayu000 <1835304752@qq.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
lif
2026-01-03 22:43:23 +08:00
committed by GitHub
parent 8c504113a2
commit 4cd95b8a9d
5 changed files with 7 additions and 171 deletions

View File

@@ -53,12 +53,12 @@ type MCPErrorEvent struct {
Message string `json:"message"`
}
// MCPStreamEndpoint is the SSE streaming endpoint for MCP chat completions
// MCPEndpoint is the endpoint for MCP chat completions. Supports SSE mode, but it is not compatible with the OpenAI apis.
// @Summary Stream MCP chat completions with reasoning, tool calls, and results
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/mcp/chat/completions [post]
func MCPStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
func MCPEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
ctx := c.Request().Context()
created := int(time.Now().Unix())

View File

@@ -1,148 +0,0 @@
package openai
import (
"context"
"encoding/json"
"errors"
"fmt"
"net"
"time"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/cogito"
"github.com/mudler/xlog"
)
// MCPCompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions
// @Summary Generate completions for a given prompt and model.
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /mcp/v1/completions [post]
func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
// We do not support streaming mode (Yet?)
return func(c echo.Context) error {
created := int(time.Now().Unix())
ctx := c.Request().Context()
// Handle Correlation
id := c.Request().Header.Get("X-Correlation-ID")
if id == "" {
id = uuid.New().String()
}
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
if !ok || input.Model == "" {
return echo.ErrBadRequest
}
config, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || config == nil {
return echo.ErrBadRequest
}
if config.MCP.Servers == "" && config.MCP.Stdio == "" {
return fmt.Errorf("no MCP servers configured")
}
// Get MCP config from model config
remote, stdio, err := config.MCP.MCPConfigFromYAML()
if err != nil {
return fmt.Errorf("failed to get MCP config: %w", err)
}
// Check if we have tools in cache, or we have to have an initial connection
sessions, err := mcpTools.SessionsFromMCPConfig(config.Name, remote, stdio)
if err != nil {
return fmt.Errorf("failed to get MCP sessions: %w", err)
}
if len(sessions) == 0 {
return fmt.Errorf("no working MCP servers found")
}
fragment := cogito.NewEmptyFragment()
for _, message := range input.Messages {
fragment = fragment.AddMessage(message.Role, message.StringContent)
}
_, port, err := net.SplitHostPort(appConfig.APIAddress)
if err != nil {
return err
}
apiKey := ""
if appConfig.ApiKeys != nil {
apiKey = appConfig.ApiKeys[0]
}
ctxWithCancellation, cancel := context.WithCancel(ctx)
defer cancel()
// TODO: instead of connecting to the API, we should just wire this internally
// and act like completion.go.
// We can do this as cogito expects an interface and we can create one that
// we satisfy to just call internally ComputeChoices
defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, "http://127.0.0.1:"+port)
// Build cogito options using the consolidated method
cogitoOpts := config.BuildCogitoOptions()
cogitoOpts = append(
cogitoOpts,
cogito.WithContext(ctxWithCancellation),
cogito.WithMCPs(sessions...),
cogito.WithStatusCallback(func(s string) {
xlog.Debug("[model agent] Status", "model", config.Name, "status", s)
}),
cogito.WithReasoningCallback(func(s string) {
xlog.Debug("[model agent] Reasoning", "model", config.Name, "reasoning", s)
}),
cogito.WithToolCallBack(func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision {
xlog.Debug("[model agent] Tool call", "model", config.Name, "tool", t.Name, "reasoning", t.Reasoning, "arguments", t.Arguments)
return cogito.ToolCallDecision{
Approved: true,
}
}),
cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) {
xlog.Debug("[model agent] Tool call result", "model", config.Name, "tool", t.Name, "result", t.Result, "tool_arguments", t.ToolArguments)
}),
)
f, err := cogito.ExecuteTools(
defaultLLM, fragment,
cogitoOpts...,
)
if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) {
return err
}
f, err = defaultLLM.Ask(ctx, f)
if err != nil {
return err
}
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Message: &schema.Message{Role: "assistant", Content: &f.LastMessage().Content}}},
Object: "text_completion",
}
jsonResult, _ := json.Marshal(resp)
xlog.Debug("Response", "response", string(jsonResult))
// Return the prediction in the response body
return c.JSON(200, resp)
}
}

View File

@@ -137,9 +137,10 @@ func RegisterLocalAIRoutes(router *echo.Echo,
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }))
// MCP Stream endpoint
// MCP endpoint - supports both streaming and non-streaming modes
// Note: streaming mode is NOT compatible with the OpenAI apis. We have a set which streams more states.
if evaluator != nil {
mcpStreamHandler := localai.MCPStreamEndpoint(cl, ml, evaluator, appConfig)
mcpStreamHandler := localai.MCPEndpoint(cl, ml, evaluator, appConfig)
mcpStreamMiddleware := []echo.MiddlewareFunc{
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
@@ -154,6 +155,7 @@ func RegisterLocalAIRoutes(router *echo.Echo,
}
router.POST("/v1/mcp/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
router.POST("/mcp/v1/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
router.POST("/mcp/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
}
// Agent job routes

View File

@@ -79,24 +79,6 @@ func RegisterOpenAIRoutes(app *echo.Echo,
app.POST("/completions", completionHandler, completionMiddleware...)
app.POST("/v1/engines/:model/completions", completionHandler, completionMiddleware...)
// MCPcompletion
mcpCompletionHandler := openai.MCPCompletionEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig())
mcpCompletionMiddleware := []echo.MiddlewareFunc{
traceMiddleware,
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
if err := re.SetOpenAIRequest(c); err != nil {
return err
}
return next(c)
}
},
}
app.POST("/mcp/v1/chat/completions", mcpCompletionHandler, mcpCompletionMiddleware...)
app.POST("/mcp/chat/completions", mcpCompletionHandler, mcpCompletionMiddleware...)
// embeddings
embeddingHandler := openai.EmbeddingsEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
embeddingMiddleware := []echo.MiddlewareFunc{

View File

@@ -954,7 +954,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
if !appConfig.EnableTracing {
return c.JSON(503, map[string]any{
"error": "Tracing disabled",
})
})
}
traces := middleware.GetTraces()
return c.JSON(200, map[string]interface{}{