mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-06 10:39:55 -06:00
fix(api): SSE streaming format to comply with specification (#7182)
* Initial plan * Fix SSE streaming format to comply with specification - Replace json.Encoder with json.Marshal for explicit formatting - Use explicit \n\n for all SSE messages (instead of relying on implicit newlines) - Change %v to %s format specifier for proper string formatting - Fix error message streaming to include proper SSE format - Ensure consistency between chat.go and completion.go endpoints Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Add proper error handling for JSON marshal failures in streaming - Handle json.Marshal errors explicitly in error response paths - Add fallback simple error message if marshal fails - Prevents sending 'data: <nil>' on marshal failures - Addresses code review feedback Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Fix SSE streaming format to comply with specification Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Fix finish_reason field to use pointer for proper null handling - Change FinishReason from string to *string in Choice schema - Streaming chunks now omit finish_reason (null) instead of empty string - Final chunks properly set finish_reason to "stop", "tool_calls", etc. - Remove empty content from initial streaming chunks (only send role) - Final streaming chunk sends empty delta with finish_reason - Addresses OpenAI API compliance issues causing client failures Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Improve code consistency for string pointer creation - Use consistent pattern: declare variable then take address - Remove inline anonymous function for better readability - Addresses code review feedback Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Move common finish reasons to constants - Create constants.go with FinishReasonStop, FinishReasonToolCalls, FinishReasonFunctionCall - Replace all string literals with constants in chat.go, completion.go, realtime.go - Improves code maintainability and prevents typos Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Make it build Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix finish_reason to always be present with null or string value - Remove omitempty from FinishReason field in Choice struct - Explicitly set FinishReason to nil for all streaming chunks - Ensures finish_reason appears as null in JSON for streaming chunks - Final chunks still properly set finish_reason to "stop", "tool_calls", etc. - Complies with OpenAI API specification example Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -2,7 +2,6 @@ package openai
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
@@ -91,7 +90,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
responses <- initialMessage
|
||||
@@ -111,7 +110,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0, FinishReason: nil}},
|
||||
Object: "chat.completion.chunk",
|
||||
Usage: usage,
|
||||
}
|
||||
@@ -145,7 +144,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
responses <- initialMessage
|
||||
@@ -169,7 +168,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0, FinishReason: nil}},
|
||||
Object: "chat.completion.chunk",
|
||||
Usage: usage,
|
||||
}
|
||||
@@ -197,7 +196,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
},
|
||||
},
|
||||
},
|
||||
}}},
|
||||
},
|
||||
Index: 0,
|
||||
FinishReason: nil,
|
||||
}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
responses <- initialMessage
|
||||
@@ -220,7 +222,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
},
|
||||
},
|
||||
},
|
||||
}}},
|
||||
},
|
||||
Index: 0,
|
||||
FinishReason: nil,
|
||||
}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
}
|
||||
@@ -427,11 +432,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
if len(ev.Choices[0].Delta.ToolCalls) > 0 {
|
||||
toolsCalled = true
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
enc := json.NewEncoder(&buf)
|
||||
enc.Encode(ev)
|
||||
log.Debug().Msgf("Sending chunk: %s", buf.String())
|
||||
_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
|
||||
respData, err := json.Marshal(ev)
|
||||
if err != nil {
|
||||
log.Debug().Msgf("Failed to marshal response: %v", err)
|
||||
input.Cancel()
|
||||
continue
|
||||
}
|
||||
log.Debug().Msgf("Sending chunk: %s", string(respData))
|
||||
_, err = fmt.Fprintf(w, "data: %s\n\n", string(respData))
|
||||
if err != nil {
|
||||
log.Debug().Msgf("Sending chunk failed: %v", err)
|
||||
input.Cancel()
|
||||
@@ -443,22 +451,28 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
}
|
||||
log.Error().Msgf("Stream ended with error: %v", err)
|
||||
|
||||
stopReason := FinishReasonStop
|
||||
resp := &schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{
|
||||
{
|
||||
FinishReason: "stop",
|
||||
FinishReason: &stopReason,
|
||||
Index: 0,
|
||||
Delta: &schema.Message{Content: "Internal error: " + err.Error()},
|
||||
}},
|
||||
Object: "chat.completion.chunk",
|
||||
Usage: *usage,
|
||||
}
|
||||
respData, _ := json.Marshal(resp)
|
||||
|
||||
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
|
||||
respData, marshalErr := json.Marshal(resp)
|
||||
if marshalErr != nil {
|
||||
log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
|
||||
// Send a simple error message as fallback
|
||||
w.WriteString("data: {\"error\":\"Internal error\"}\n\n")
|
||||
} else {
|
||||
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
|
||||
}
|
||||
w.WriteString("data: [DONE]\n\n")
|
||||
w.Flush()
|
||||
|
||||
@@ -466,11 +480,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
}
|
||||
}
|
||||
|
||||
finishReason := "stop"
|
||||
finishReason := FinishReasonStop
|
||||
if toolsCalled && len(input.Tools) > 0 {
|
||||
finishReason = "tool_calls"
|
||||
finishReason = FinishReasonToolCalls
|
||||
} else if toolsCalled {
|
||||
finishReason = "function_call"
|
||||
finishReason = FinishReasonFunctionCall
|
||||
}
|
||||
|
||||
resp := &schema.OpenAIResponse{
|
||||
@@ -479,9 +493,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{
|
||||
{
|
||||
FinishReason: finishReason,
|
||||
FinishReason: &finishReason,
|
||||
Index: 0,
|
||||
Delta: &schema.Message{Content: &textContentToReturn},
|
||||
Delta: &schema.Message{},
|
||||
}},
|
||||
Object: "chat.completion.chunk",
|
||||
Usage: *usage,
|
||||
@@ -502,7 +516,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
tokenCallback := func(s string, c *[]schema.Choice) {
|
||||
if !shouldUseFn {
|
||||
// no function is called, just reply and use stop as finish reason
|
||||
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
|
||||
stopReason := FinishReasonStop
|
||||
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -520,12 +535,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
return
|
||||
}
|
||||
|
||||
stopReason := FinishReasonStop
|
||||
*c = append(*c, schema.Choice{
|
||||
FinishReason: "stop",
|
||||
FinishReason: &stopReason,
|
||||
Message: &schema.Message{Role: "assistant", Content: &result}})
|
||||
default:
|
||||
toolCallsReason := FinishReasonToolCalls
|
||||
toolChoice := schema.Choice{
|
||||
FinishReason: "tool_calls",
|
||||
FinishReason: &toolCallsReason,
|
||||
Message: &schema.Message{
|
||||
Role: "assistant",
|
||||
},
|
||||
@@ -549,8 +566,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
)
|
||||
} else {
|
||||
// otherwise we return more choices directly (deprecated)
|
||||
functionCallReason := FinishReasonFunctionCall
|
||||
*c = append(*c, schema.Choice{
|
||||
FinishReason: "function_call",
|
||||
FinishReason: &functionCallReason,
|
||||
Message: &schema.Message{
|
||||
Role: "assistant",
|
||||
Content: &textContentToReturn,
|
||||
|
||||
@@ -2,7 +2,6 @@ package openai
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -47,8 +46,9 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{
|
||||
{
|
||||
Index: 0,
|
||||
Text: s,
|
||||
Index: 0,
|
||||
Text: s,
|
||||
FinishReason: nil,
|
||||
},
|
||||
},
|
||||
Object: "text_completion",
|
||||
@@ -140,24 +140,49 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
|
||||
log.Debug().Msgf("No choices in the response, skipping")
|
||||
continue
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
enc := json.NewEncoder(&buf)
|
||||
enc.Encode(ev)
|
||||
respData, err := json.Marshal(ev)
|
||||
if err != nil {
|
||||
log.Debug().Msgf("Failed to marshal response: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Sending chunk: %s", buf.String())
|
||||
fmt.Fprintf(w, "data: %v\n", buf.String())
|
||||
log.Debug().Msgf("Sending chunk: %s", string(respData))
|
||||
fmt.Fprintf(w, "data: %s\n\n", string(respData))
|
||||
w.Flush()
|
||||
case err := <-ended:
|
||||
if err == nil {
|
||||
break LOOP
|
||||
}
|
||||
log.Error().Msgf("Stream ended with error: %v", err)
|
||||
fmt.Fprintf(w, "data: %v\n", "Internal error: "+err.Error())
|
||||
|
||||
stopReason := FinishReasonStop
|
||||
errorResp := schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: input.Model,
|
||||
Choices: []schema.Choice{
|
||||
{
|
||||
Index: 0,
|
||||
FinishReason: &stopReason,
|
||||
Text: "Internal error: " + err.Error(),
|
||||
},
|
||||
},
|
||||
Object: "text_completion",
|
||||
}
|
||||
errorData, marshalErr := json.Marshal(errorResp)
|
||||
if marshalErr != nil {
|
||||
log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
|
||||
// Send a simple error message as fallback
|
||||
fmt.Fprintf(w, "data: {\"error\":\"Internal error\"}\n\n")
|
||||
} else {
|
||||
fmt.Fprintf(w, "data: %s\n\n", string(errorData))
|
||||
}
|
||||
w.Flush()
|
||||
break LOOP
|
||||
}
|
||||
}
|
||||
|
||||
stopReason := FinishReasonStop
|
||||
resp := &schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
@@ -165,7 +190,7 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
|
||||
Choices: []schema.Choice{
|
||||
{
|
||||
Index: 0,
|
||||
FinishReason: "stop",
|
||||
FinishReason: &stopReason,
|
||||
},
|
||||
},
|
||||
Object: "text_completion",
|
||||
@@ -197,7 +222,8 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
|
||||
|
||||
r, tokenUsage, err := ComputeChoices(
|
||||
input, i, config, cl, appConfig, ml, func(s string, c *[]schema.Choice) {
|
||||
*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
|
||||
stopReason := FinishReasonStop
|
||||
*c = append(*c, schema.Choice{Text: s, FinishReason: &stopReason, Index: k})
|
||||
}, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
8
core/http/endpoints/openai/constants.go
Normal file
8
core/http/endpoints/openai/constants.go
Normal file
@@ -0,0 +1,8 @@
|
||||
package openai
|
||||
|
||||
// Finish reason constants for OpenAI API responses
|
||||
const (
|
||||
FinishReasonStop = "stop"
|
||||
FinishReasonToolCalls = "tool_calls"
|
||||
FinishReasonFunctionCall = "function_call"
|
||||
)
|
||||
@@ -1072,7 +1072,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
|
||||
result, tokenUsage, err := ComputeChoices(input, prompt, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
|
||||
if !shouldUseFn {
|
||||
// no function is called, just reply and use stop as finish reason
|
||||
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
|
||||
stopReason := FinishReasonStop
|
||||
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1099,7 +1100,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
|
||||
}
|
||||
|
||||
if len(input.Tools) > 0 {
|
||||
toolChoice.FinishReason = "tool_calls"
|
||||
toolCallsReason := FinishReasonToolCalls
|
||||
toolChoice.FinishReason = &toolCallsReason
|
||||
}
|
||||
|
||||
for _, ss := range results {
|
||||
@@ -1120,8 +1122,9 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
|
||||
)
|
||||
} else {
|
||||
// otherwise we return more choices directly
|
||||
functionCallReason := FinishReasonFunctionCall
|
||||
*c = append(*c, schema.Choice{
|
||||
FinishReason: "function_call",
|
||||
FinishReason: &functionCallReason,
|
||||
Message: &schema.Message{
|
||||
Role: "assistant",
|
||||
Content: &textContentToReturn,
|
||||
|
||||
Reference in New Issue
Block a user