fix(api): SSE streaming format to comply with specification (#7182)

* Initial plan * Fix SSE streaming format to comply with specification - Replace json.Encoder with json.Marshal for explicit formatting - Use explicit \n\n for all SSE messages (instead of relying on implicit newlines) - Change %v to %s format specifier for proper string formatting - Fix error message streaming to include proper SSE format - Ensure consistency between chat.go and completion.go endpoints Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Add proper error handling for JSON marshal failures in streaming - Handle json.Marshal errors explicitly in error response paths - Add fallback simple error message if marshal fails - Prevents sending 'data: <nil>' on marshal failures - Addresses code review feedback Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Fix SSE streaming format to comply with specification Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Fix finish_reason field to use pointer for proper null handling - Change FinishReason from string to *string in Choice schema - Streaming chunks now omit finish_reason (null) instead of empty string - Final chunks properly set finish_reason to "stop", "tool_calls", etc. - Remove empty content from initial streaming chunks (only send role) - Final streaming chunk sends empty delta with finish_reason - Addresses OpenAI API compliance issues causing client failures Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Improve code consistency for string pointer creation - Use consistent pattern: declare variable then take address - Remove inline anonymous function for better readability - Addresses code review feedback Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Move common finish reasons to constants - Create constants.go with FinishReasonStop, FinishReasonToolCalls, FinishReasonFunctionCall - Replace all string literals with constants in chat.go, completion.go, realtime.go - Improves code maintainability and prevents typos Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Make it build Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix finish_reason to always be present with null or string value - Remove omitempty from FinishReason field in Choice struct - Explicitly set FinishReason to nil for all streaming chunks - Ensures finish_reason appears as null in JSON for streaming chunks - Final chunks still properly set finish_reason to "stop", "tool_calls", etc. - Complies with OpenAI API specification example Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-06 10:39:55 -06:00 · 2025-11-09 22:00:27 +01:00
parent 01cd58a739
commit 34bc1bda1e
8 changed files with 95 additions and 47 deletions
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -2,7 +2,6 @@ package openai

 import (
 	"bufio"
-	"bytes"
 	"context"
 	"encoding/json"
 	"fmt"
@@ -91,7 +90,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			ID:      id,
 			Created: created,
 			Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-			Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
+			Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
 			Object:  "chat.completion.chunk",
 		}
 		responses <- initialMessage
@@ -111,7 +110,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
+				Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0, FinishReason: nil}},
 				Object:  "chat.completion.chunk",
 				Usage:   usage,
 			}
@@ -145,7 +144,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
+				Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
 				Object:  "chat.completion.chunk",
 			}
 			responses <- initialMessage
@@ -169,7 +168,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
+				Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0, FinishReason: nil}},
 				Object:  "chat.completion.chunk",
 				Usage:   usage,
 			}
@@ -197,7 +196,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 									},
 								},
 							},
-						}}},
+						},
+						Index:        0,
+						FinishReason: nil,
+					}},
 					Object: "chat.completion.chunk",
 				}
 				responses <- initialMessage
@@ -220,7 +222,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 									},
 								},
 							},
-						}}},
+						},
+						Index:        0,
+						FinishReason: nil,
+					}},
 					Object: "chat.completion.chunk",
 				}
 			}
@@ -427,11 +432,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 						if len(ev.Choices[0].Delta.ToolCalls) > 0 {
 							toolsCalled = true
 						}
-						var buf bytes.Buffer
-						enc := json.NewEncoder(&buf)
-						enc.Encode(ev)
-						log.Debug().Msgf("Sending chunk: %s", buf.String())
-						_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
+						respData, err := json.Marshal(ev)
+						if err != nil {
+							log.Debug().Msgf("Failed to marshal response: %v", err)
+							input.Cancel()
+							continue
+						}
+						log.Debug().Msgf("Sending chunk: %s", string(respData))
+						_, err = fmt.Fprintf(w, "data: %s\n\n", string(respData))
 						if err != nil {
 							log.Debug().Msgf("Sending chunk failed: %v", err)
 							input.Cancel()
@@ -443,22 +451,28 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 						}
 						log.Error().Msgf("Stream ended with error: %v", err)

+						stopReason := FinishReasonStop
 						resp := &schema.OpenAIResponse{
 							ID:      id,
 							Created: created,
 							Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 							Choices: []schema.Choice{
 								{
-									FinishReason: "stop",
+									FinishReason: &stopReason,
 									Index:        0,
 									Delta:        &schema.Message{Content: "Internal error: " + err.Error()},
 								}},
 							Object: "chat.completion.chunk",
 							Usage:  *usage,
 						}
-						respData, _ := json.Marshal(resp)
-
-						w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+						respData, marshalErr := json.Marshal(resp)
+						if marshalErr != nil {
+							log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
+							// Send a simple error message as fallback
+							w.WriteString("data: {\"error\":\"Internal error\"}\n\n")
+						} else {
+							w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+						}
 						w.WriteString("data: [DONE]\n\n")
 						w.Flush()

@@ -466,11 +480,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 					}
 				}

-				finishReason := "stop"
+				finishReason := FinishReasonStop
 				if toolsCalled && len(input.Tools) > 0 {
-					finishReason = "tool_calls"
+					finishReason = FinishReasonToolCalls
 				} else if toolsCalled {
-					finishReason = "function_call"
+					finishReason = FinishReasonFunctionCall
 				}

 				resp := &schema.OpenAIResponse{
@@ -479,9 +493,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 					Choices: []schema.Choice{
 						{
-							FinishReason: finishReason,
+							FinishReason: &finishReason,
 							Index:        0,
-							Delta:        &schema.Message{Content: &textContentToReturn},
+							Delta:        &schema.Message{},
 						}},
 					Object: "chat.completion.chunk",
 					Usage:  *usage,
@@ -502,7 +516,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			tokenCallback := func(s string, c *[]schema.Choice) {
 				if !shouldUseFn {
 					// no function is called, just reply and use stop as finish reason
-					*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
+					stopReason := FinishReasonStop
+					*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
 					return
 				}

@@ -520,12 +535,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 						return
 					}

+					stopReason := FinishReasonStop
 					*c = append(*c, schema.Choice{
-						FinishReason: "stop",
+						FinishReason: &stopReason,
 						Message:      &schema.Message{Role: "assistant", Content: &result}})
 				default:
+					toolCallsReason := FinishReasonToolCalls
 					toolChoice := schema.Choice{
-						FinishReason: "tool_calls",
+						FinishReason: &toolCallsReason,
 						Message: &schema.Message{
 							Role: "assistant",
 						},
@@ -549,8 +566,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 							)
 						} else {
 							// otherwise we return more choices directly (deprecated)
+							functionCallReason := FinishReasonFunctionCall
 							*c = append(*c, schema.Choice{
-								FinishReason: "function_call",
+								FinishReason: &functionCallReason,
 								Message: &schema.Message{
 									Role:    "assistant",
 									Content: &textContentToReturn,
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -2,7 +2,6 @@ package openai

 import (
 	"bufio"
-	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -47,8 +46,9 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 				Choices: []schema.Choice{
 					{
-						Index: 0,
-						Text:  s,
+						Index:        0,
+						Text:         s,
+						FinishReason: nil,
 					},
 				},
 				Object: "text_completion",
@@ -140,24 +140,49 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
 							log.Debug().Msgf("No choices in the response, skipping")
 							continue
 						}
-						var buf bytes.Buffer
-						enc := json.NewEncoder(&buf)
-						enc.Encode(ev)
+						respData, err := json.Marshal(ev)
+						if err != nil {
+							log.Debug().Msgf("Failed to marshal response: %v", err)
+							continue
+						}

-						log.Debug().Msgf("Sending chunk: %s", buf.String())
-						fmt.Fprintf(w, "data: %v\n", buf.String())
+						log.Debug().Msgf("Sending chunk: %s", string(respData))
+						fmt.Fprintf(w, "data: %s\n\n", string(respData))
 						w.Flush()
 					case err := <-ended:
 						if err == nil {
 							break LOOP
 						}
 						log.Error().Msgf("Stream ended with error: %v", err)
-						fmt.Fprintf(w, "data: %v\n", "Internal error: "+err.Error())
+
+						stopReason := FinishReasonStop
+						errorResp := schema.OpenAIResponse{
+							ID:      id,
+							Created: created,
+							Model:   input.Model,
+							Choices: []schema.Choice{
+								{
+									Index:        0,
+									FinishReason: &stopReason,
+									Text:         "Internal error: " + err.Error(),
+								},
+							},
+							Object: "text_completion",
+						}
+						errorData, marshalErr := json.Marshal(errorResp)
+						if marshalErr != nil {
+							log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
+							// Send a simple error message as fallback
+							fmt.Fprintf(w, "data: {\"error\":\"Internal error\"}\n\n")
+						} else {
+							fmt.Fprintf(w, "data: %s\n\n", string(errorData))
+						}
 						w.Flush()
 						break LOOP
 					}
 				}

+				stopReason := FinishReasonStop
 				resp := &schema.OpenAIResponse{
 					ID:      id,
 					Created: created,
@@ -165,7 +190,7 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
 					Choices: []schema.Choice{
 						{
 							Index:        0,
-							FinishReason: "stop",
+							FinishReason: &stopReason,
 						},
 					},
 					Object: "text_completion",
@@ -197,7 +222,8 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva

 			r, tokenUsage, err := ComputeChoices(
 				input, i, config, cl, appConfig, ml, func(s string, c *[]schema.Choice) {
-					*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
+					stopReason := FinishReasonStop
+					*c = append(*c, schema.Choice{Text: s, FinishReason: &stopReason, Index: k})
 				}, nil)
 			if err != nil {
 				return err
--- a/core/http/endpoints/openai/constants.go
+++ b/core/http/endpoints/openai/constants.go
@@ -0,0 +1,8 @@
+package openai
+
+// Finish reason constants for OpenAI API responses
+const (
+	FinishReasonStop         = "stop"
+	FinishReasonToolCalls    = "tool_calls"
+	FinishReasonFunctionCall = "function_call"
+)
--- a/core/http/endpoints/openai/realtime.go
+++ b/core/http/endpoints/openai/realtime.go
@@ -1072,7 +1072,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
 		result, tokenUsage, err := ComputeChoices(input, prompt, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
 			if !shouldUseFn {
 				// no function is called, just reply and use stop as finish reason
-				*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
+				stopReason := FinishReasonStop
+				*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
 				return
 			}

@@ -1099,7 +1100,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
 				}

 				if len(input.Tools) > 0 {
-					toolChoice.FinishReason = "tool_calls"
+					toolCallsReason := FinishReasonToolCalls
+					toolChoice.FinishReason = &toolCallsReason
 				}

 				for _, ss := range results {
@@ -1120,8 +1122,9 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
 						)
 					} else {
 						// otherwise we return more choices directly
+						functionCallReason := FinishReasonFunctionCall
 						*c = append(*c, schema.Choice{
-							FinishReason: "function_call",
+							FinishReason: &functionCallReason,
 							Message: &schema.Message{
 								Role:    "assistant",
 								Content: &textContentToReturn,