fix(api): SSE streaming format to comply with specification (#7182)

* Initial plan

* Fix SSE streaming format to comply with specification

- Replace json.Encoder with json.Marshal for explicit formatting
- Use explicit \n\n for all SSE messages (instead of relying on implicit newlines)
- Change %v to %s format specifier for proper string formatting
- Fix error message streaming to include proper SSE format
- Ensure consistency between chat.go and completion.go endpoints

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Add proper error handling for JSON marshal failures in streaming

- Handle json.Marshal errors explicitly in error response paths
- Add fallback simple error message if marshal fails
- Prevents sending 'data: <nil>' on marshal failures
- Addresses code review feedback

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Fix SSE streaming format to comply with specification

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Fix finish_reason field to use pointer for proper null handling

- Change FinishReason from string to *string in Choice schema
- Streaming chunks now omit finish_reason (null) instead of empty string
- Final chunks properly set finish_reason to "stop", "tool_calls", etc.
- Remove empty content from initial streaming chunks (only send role)
- Final streaming chunk sends empty delta with finish_reason
- Addresses OpenAI API compliance issues causing client failures

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Improve code consistency for string pointer creation

- Use consistent pattern: declare variable then take address
- Remove inline anonymous function for better readability
- Addresses code review feedback

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Move common finish reasons to constants

- Create constants.go with FinishReasonStop, FinishReasonToolCalls, FinishReasonFunctionCall
- Replace all string literals with constants in chat.go, completion.go, realtime.go
- Improves code maintainability and prevents typos

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Make it build

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix finish_reason to always be present with null or string value

- Remove omitempty from FinishReason field in Choice struct
- Explicitly set FinishReason to nil for all streaming chunks
- Ensures finish_reason appears as null in JSON for streaming chunks
- Final chunks still properly set finish_reason to "stop", "tool_calls", etc.
- Complies with OpenAI API specification example

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Copilot
2025-11-09 22:00:27 +01:00
committed by GitHub
parent 01cd58a739
commit 34bc1bda1e
8 changed files with 95 additions and 47 deletions

View File

@@ -2,7 +2,6 @@ package openai
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
@@ -91,7 +90,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
Object: "chat.completion.chunk",
}
responses <- initialMessage
@@ -111,7 +110,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0, FinishReason: nil}},
Object: "chat.completion.chunk",
Usage: usage,
}
@@ -145,7 +144,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
Object: "chat.completion.chunk",
}
responses <- initialMessage
@@ -169,7 +168,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0, FinishReason: nil}},
Object: "chat.completion.chunk",
Usage: usage,
}
@@ -197,7 +196,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
},
},
},
}}},
},
Index: 0,
FinishReason: nil,
}},
Object: "chat.completion.chunk",
}
responses <- initialMessage
@@ -220,7 +222,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
},
},
},
}}},
},
Index: 0,
FinishReason: nil,
}},
Object: "chat.completion.chunk",
}
}
@@ -427,11 +432,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
if len(ev.Choices[0].Delta.ToolCalls) > 0 {
toolsCalled = true
}
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.Encode(ev)
log.Debug().Msgf("Sending chunk: %s", buf.String())
_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
respData, err := json.Marshal(ev)
if err != nil {
log.Debug().Msgf("Failed to marshal response: %v", err)
input.Cancel()
continue
}
log.Debug().Msgf("Sending chunk: %s", string(respData))
_, err = fmt.Fprintf(w, "data: %s\n\n", string(respData))
if err != nil {
log.Debug().Msgf("Sending chunk failed: %v", err)
input.Cancel()
@@ -443,22 +451,28 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}
log.Error().Msgf("Stream ended with error: %v", err)
stopReason := FinishReasonStop
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
FinishReason: "stop",
FinishReason: &stopReason,
Index: 0,
Delta: &schema.Message{Content: "Internal error: " + err.Error()},
}},
Object: "chat.completion.chunk",
Usage: *usage,
}
respData, _ := json.Marshal(resp)
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
respData, marshalErr := json.Marshal(resp)
if marshalErr != nil {
log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
// Send a simple error message as fallback
w.WriteString("data: {\"error\":\"Internal error\"}\n\n")
} else {
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
}
w.WriteString("data: [DONE]\n\n")
w.Flush()
@@ -466,11 +480,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}
}
finishReason := "stop"
finishReason := FinishReasonStop
if toolsCalled && len(input.Tools) > 0 {
finishReason = "tool_calls"
finishReason = FinishReasonToolCalls
} else if toolsCalled {
finishReason = "function_call"
finishReason = FinishReasonFunctionCall
}
resp := &schema.OpenAIResponse{
@@ -479,9 +493,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
FinishReason: finishReason,
FinishReason: &finishReason,
Index: 0,
Delta: &schema.Message{Content: &textContentToReturn},
Delta: &schema.Message{},
}},
Object: "chat.completion.chunk",
Usage: *usage,
@@ -502,7 +516,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
tokenCallback := func(s string, c *[]schema.Choice) {
if !shouldUseFn {
// no function is called, just reply and use stop as finish reason
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
stopReason := FinishReasonStop
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
return
}
@@ -520,12 +535,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
return
}
stopReason := FinishReasonStop
*c = append(*c, schema.Choice{
FinishReason: "stop",
FinishReason: &stopReason,
Message: &schema.Message{Role: "assistant", Content: &result}})
default:
toolCallsReason := FinishReasonToolCalls
toolChoice := schema.Choice{
FinishReason: "tool_calls",
FinishReason: &toolCallsReason,
Message: &schema.Message{
Role: "assistant",
},
@@ -549,8 +566,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
)
} else {
// otherwise we return more choices directly (deprecated)
functionCallReason := FinishReasonFunctionCall
*c = append(*c, schema.Choice{
FinishReason: "function_call",
FinishReason: &functionCallReason,
Message: &schema.Message{
Role: "assistant",
Content: &textContentToReturn,

View File

@@ -2,7 +2,6 @@ package openai
import (
"bufio"
"bytes"
"encoding/json"
"errors"
"fmt"
@@ -47,8 +46,9 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
Index: 0,
Text: s,
Index: 0,
Text: s,
FinishReason: nil,
},
},
Object: "text_completion",
@@ -140,24 +140,49 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
log.Debug().Msgf("No choices in the response, skipping")
continue
}
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.Encode(ev)
respData, err := json.Marshal(ev)
if err != nil {
log.Debug().Msgf("Failed to marshal response: %v", err)
continue
}
log.Debug().Msgf("Sending chunk: %s", buf.String())
fmt.Fprintf(w, "data: %v\n", buf.String())
log.Debug().Msgf("Sending chunk: %s", string(respData))
fmt.Fprintf(w, "data: %s\n\n", string(respData))
w.Flush()
case err := <-ended:
if err == nil {
break LOOP
}
log.Error().Msgf("Stream ended with error: %v", err)
fmt.Fprintf(w, "data: %v\n", "Internal error: "+err.Error())
stopReason := FinishReasonStop
errorResp := schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model,
Choices: []schema.Choice{
{
Index: 0,
FinishReason: &stopReason,
Text: "Internal error: " + err.Error(),
},
},
Object: "text_completion",
}
errorData, marshalErr := json.Marshal(errorResp)
if marshalErr != nil {
log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
// Send a simple error message as fallback
fmt.Fprintf(w, "data: {\"error\":\"Internal error\"}\n\n")
} else {
fmt.Fprintf(w, "data: %s\n\n", string(errorData))
}
w.Flush()
break LOOP
}
}
stopReason := FinishReasonStop
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
@@ -165,7 +190,7 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
Choices: []schema.Choice{
{
Index: 0,
FinishReason: "stop",
FinishReason: &stopReason,
},
},
Object: "text_completion",
@@ -197,7 +222,8 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
r, tokenUsage, err := ComputeChoices(
input, i, config, cl, appConfig, ml, func(s string, c *[]schema.Choice) {
*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
stopReason := FinishReasonStop
*c = append(*c, schema.Choice{Text: s, FinishReason: &stopReason, Index: k})
}, nil)
if err != nil {
return err

View File

@@ -0,0 +1,8 @@
package openai
// Finish reason constants for OpenAI API responses
const (
FinishReasonStop = "stop"
FinishReasonToolCalls = "tool_calls"
FinishReasonFunctionCall = "function_call"
)

View File

@@ -1072,7 +1072,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
result, tokenUsage, err := ComputeChoices(input, prompt, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
if !shouldUseFn {
// no function is called, just reply and use stop as finish reason
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
stopReason := FinishReasonStop
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
return
}
@@ -1099,7 +1100,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
}
if len(input.Tools) > 0 {
toolChoice.FinishReason = "tool_calls"
toolCallsReason := FinishReasonToolCalls
toolChoice.FinishReason = &toolCallsReason
}
for _, ss := range results {
@@ -1120,8 +1122,9 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
)
} else {
// otherwise we return more choices directly
functionCallReason := FinishReasonFunctionCall
*c = append(*c, schema.Choice{
FinishReason: "function_call",
FinishReason: &functionCallReason,
Message: &schema.Message{
Role: "assistant",
Content: &textContentToReturn,