mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-17 16:09:38 -06:00
feat(function): Add tool streaming, XML Tool Call Parsing Support (#7865)
* feat(function): Add XML Tool Call Parsing Support Extend the function parsing system in LocalAI to support XML-style tool calls, similar to how JSON tool calls are currently parsed. This will allow models that return XML format (like <tool_call><function=name><parameter=key>value</parameter></function></tool_call>) to be properly parsed alongside text content. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * thinking before tool calls, more strict support for corner cases with no tools Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Support streaming tools Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Iterative JSON Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Iterative parsing Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Consume JSON marker Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * add tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix pending TODOs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Don't run other parsing with ParseRegex Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
9d3da0bed5
commit
21c84f432f
46
AGENTS.md
46
AGENTS.md
@@ -77,3 +77,49 @@ When fixing compilation errors after upstream changes:
|
||||
- HTTP server uses `server_routes` with HTTP handlers
|
||||
- Both use the same `server_context` and task queue infrastructure
|
||||
- gRPC methods: `LoadModel`, `Predict`, `PredictStream`, `Embedding`, `Rerank`, `TokenizeString`, `GetMetrics`, `Health`
|
||||
|
||||
## Tool Call Parsing Maintenance
|
||||
|
||||
When working on JSON/XML tool call parsing functionality, always check llama.cpp for reference implementation and updates:
|
||||
|
||||
### Checking for XML Parsing Changes
|
||||
|
||||
1. **Review XML Format Definitions**: Check `llama.cpp/common/chat-parser-xml-toolcall.h` for `xml_tool_call_format` struct changes
|
||||
2. **Review Parsing Logic**: Check `llama.cpp/common/chat-parser-xml-toolcall.cpp` for parsing algorithm updates
|
||||
3. **Review Format Presets**: Check `llama.cpp/common/chat-parser.cpp` for new XML format presets (search for `xml_tool_call_format form`)
|
||||
4. **Review Model Lists**: Check `llama.cpp/common/chat.h` for `COMMON_CHAT_FORMAT_*` enum values that use XML parsing:
|
||||
- `COMMON_CHAT_FORMAT_GLM_4_5`
|
||||
- `COMMON_CHAT_FORMAT_MINIMAX_M2`
|
||||
- `COMMON_CHAT_FORMAT_KIMI_K2`
|
||||
- `COMMON_CHAT_FORMAT_QWEN3_CODER_XML`
|
||||
- `COMMON_CHAT_FORMAT_APRIEL_1_5`
|
||||
- `COMMON_CHAT_FORMAT_XIAOMI_MIMO`
|
||||
- Any new formats added
|
||||
|
||||
### Model Configuration Options
|
||||
|
||||
Always check `llama.cpp` for new model configuration options that should be supported in LocalAI:
|
||||
|
||||
1. **Check Server Context**: Review `llama.cpp/tools/server/server-context.cpp` for new parameters
|
||||
2. **Check Chat Params**: Review `llama.cpp/common/chat.h` for `common_chat_params` struct changes
|
||||
3. **Check Server Options**: Review `llama.cpp/tools/server/server.cpp` for command-line argument changes
|
||||
4. **Examples of options to check**:
|
||||
- `ctx_shift` - Context shifting support
|
||||
- `parallel_tool_calls` - Parallel tool calling
|
||||
- `reasoning_format` - Reasoning format options
|
||||
- Any new flags or parameters
|
||||
|
||||
### Implementation Guidelines
|
||||
|
||||
1. **Feature Parity**: Always aim for feature parity with llama.cpp's implementation
|
||||
2. **Test Coverage**: Add tests for new features matching llama.cpp's behavior
|
||||
3. **Documentation**: Update relevant documentation when adding new formats or options
|
||||
4. **Backward Compatibility**: Ensure changes don't break existing functionality
|
||||
|
||||
### Files to Monitor
|
||||
|
||||
- `llama.cpp/common/chat-parser-xml-toolcall.h` - Format definitions
|
||||
- `llama.cpp/common/chat-parser-xml-toolcall.cpp` - Parsing logic
|
||||
- `llama.cpp/common/chat-parser.cpp` - Format presets and model-specific handlers
|
||||
- `llama.cpp/common/chat.h` - Format enums and parameter structures
|
||||
- `llama.cpp/tools/server/server-context.cpp` - Server configuration options
|
||||
|
||||
@@ -66,10 +66,111 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
}
|
||||
processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.ModelConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) error {
|
||||
result := ""
|
||||
lastEmittedCount := 0
|
||||
_, tokenUsage, err := ComputeChoices(req, prompt, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||
result += s
|
||||
// TODO: Change generated BNF grammar to be compliant with the schema so we can
|
||||
// stream the result token by token here.
|
||||
// Try incremental XML parsing for streaming support using iterative parser
|
||||
// This allows emitting partial tool calls as they're being generated
|
||||
cleanedResult := functions.CleanupLLMResult(result, config.FunctionsConfig)
|
||||
|
||||
// Determine XML format from config
|
||||
var xmlFormat *functions.XMLToolCallFormat
|
||||
if config.FunctionsConfig.XMLFormat != nil {
|
||||
xmlFormat = config.FunctionsConfig.XMLFormat
|
||||
} else if config.FunctionsConfig.XMLFormatPreset != "" {
|
||||
xmlFormat = functions.GetXMLFormatPreset(config.FunctionsConfig.XMLFormatPreset)
|
||||
}
|
||||
|
||||
// Use iterative parser for streaming (partial parsing enabled)
|
||||
// Try XML parsing first
|
||||
partialResults, parseErr := functions.ParseXMLIterative(cleanedResult, xmlFormat, true)
|
||||
if parseErr == nil && len(partialResults) > 0 {
|
||||
// Emit new XML tool calls that weren't emitted before
|
||||
if len(partialResults) > lastEmittedCount {
|
||||
for i := lastEmittedCount; i < len(partialResults); i++ {
|
||||
toolCall := partialResults[i]
|
||||
initialMessage := schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model,
|
||||
Choices: []schema.Choice{{
|
||||
Delta: &schema.Message{
|
||||
Role: "assistant",
|
||||
ToolCalls: []schema.ToolCall{
|
||||
{
|
||||
Index: i,
|
||||
ID: id,
|
||||
Type: "function",
|
||||
FunctionCall: schema.FunctionCall{
|
||||
Name: toolCall.Name,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Index: 0,
|
||||
FinishReason: nil,
|
||||
}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
select {
|
||||
case responses <- initialMessage:
|
||||
default:
|
||||
}
|
||||
}
|
||||
lastEmittedCount = len(partialResults)
|
||||
}
|
||||
} else {
|
||||
// Try JSON tool call parsing for streaming
|
||||
// Check if the result looks like JSON tool calls
|
||||
jsonResults, jsonErr := functions.ParseJSONIterative(cleanedResult, true)
|
||||
if jsonErr == nil && len(jsonResults) > 0 {
|
||||
// Check if these are tool calls (have "name" and optionally "arguments")
|
||||
for _, jsonObj := range jsonResults {
|
||||
if name, ok := jsonObj["name"].(string); ok && name != "" {
|
||||
// This looks like a tool call
|
||||
args := "{}"
|
||||
if argsVal, ok := jsonObj["arguments"]; ok {
|
||||
if argsStr, ok := argsVal.(string); ok {
|
||||
args = argsStr
|
||||
} else {
|
||||
argsBytes, _ := json.Marshal(argsVal)
|
||||
args = string(argsBytes)
|
||||
}
|
||||
}
|
||||
// Emit tool call
|
||||
initialMessage := schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model,
|
||||
Choices: []schema.Choice{{
|
||||
Delta: &schema.Message{
|
||||
Role: "assistant",
|
||||
ToolCalls: []schema.ToolCall{
|
||||
{
|
||||
Index: lastEmittedCount,
|
||||
ID: id,
|
||||
Type: "function",
|
||||
FunctionCall: schema.FunctionCall{
|
||||
Name: name,
|
||||
Arguments: args,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Index: 0,
|
||||
FinishReason: nil,
|
||||
}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
select {
|
||||
case responses <- initialMessage:
|
||||
default:
|
||||
}
|
||||
lastEmittedCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
if err != nil {
|
||||
|
||||
1395
pkg/functions/iterative_parser.go
Normal file
1395
pkg/functions/iterative_parser.go
Normal file
File diff suppressed because it is too large
Load Diff
431
pkg/functions/json_stack_parser.go
Normal file
431
pkg/functions/json_stack_parser.go
Normal file
@@ -0,0 +1,431 @@
|
||||
package functions
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// JSONStackElementType represents the type of JSON stack element
|
||||
type JSONStackElementType int
|
||||
|
||||
const (
|
||||
JSONStackElementObject JSONStackElementType = iota
|
||||
JSONStackElementKey
|
||||
JSONStackElementArray
|
||||
)
|
||||
|
||||
// JSONStackElement represents an element in the JSON parsing stack
|
||||
type JSONStackElement struct {
|
||||
Type JSONStackElementType
|
||||
Key string
|
||||
}
|
||||
|
||||
// JSONErrorLocator tracks JSON parsing state and errors
|
||||
type JSONErrorLocator struct {
|
||||
position int
|
||||
foundError bool
|
||||
lastToken string
|
||||
exceptionMessage string
|
||||
stack []JSONStackElement
|
||||
}
|
||||
|
||||
// parseJSONWithStack parses JSON with stack tracking, matching llama.cpp's common_json_parse
|
||||
// Returns the parsed JSON value, whether it was healed, and any error
|
||||
func parseJSONWithStack(input string, healingMarker string) (any, bool, string, error) {
|
||||
if healingMarker == "" {
|
||||
// No healing marker, just try to parse normally
|
||||
var result any
|
||||
if err := json.Unmarshal([]byte(input), &result); err != nil {
|
||||
return nil, false, "", err
|
||||
}
|
||||
return result, false, "", nil
|
||||
}
|
||||
|
||||
// Try to parse complete JSON first
|
||||
var result any
|
||||
if err := json.Unmarshal([]byte(input), &result); err == nil {
|
||||
return result, false, "", nil
|
||||
}
|
||||
|
||||
// Parsing failed, need to track stack and heal
|
||||
errLoc := &JSONErrorLocator{
|
||||
position: 0,
|
||||
foundError: false,
|
||||
stack: make([]JSONStackElement, 0),
|
||||
}
|
||||
|
||||
// Parse with stack tracking to find where error occurs
|
||||
errorPos, err := parseJSONWithStackTracking(input, errLoc)
|
||||
if err == nil && !errLoc.foundError {
|
||||
// No error found, should have parsed successfully
|
||||
var result any
|
||||
if err := json.Unmarshal([]byte(input), &result); err != nil {
|
||||
return nil, false, "", err
|
||||
}
|
||||
return result, false, "", nil
|
||||
}
|
||||
|
||||
if !errLoc.foundError || len(errLoc.stack) == 0 {
|
||||
// Can't heal without stack information
|
||||
return nil, false, "", errors.New("incomplete JSON")
|
||||
}
|
||||
|
||||
// Build closing braces/brackets from stack
|
||||
closing := ""
|
||||
for i := len(errLoc.stack) - 1; i >= 0; i-- {
|
||||
el := errLoc.stack[i]
|
||||
if el.Type == JSONStackElementObject {
|
||||
closing += "}"
|
||||
} else if el.Type == JSONStackElementArray {
|
||||
closing += "]"
|
||||
}
|
||||
// Keys don't add closing characters
|
||||
}
|
||||
|
||||
// Get the partial input up to error position
|
||||
partialInput := input
|
||||
if errorPos > 0 && errorPos < len(input) {
|
||||
partialInput = input[:errorPos]
|
||||
}
|
||||
|
||||
// Find last non-space character
|
||||
lastNonSpacePos := strings.LastIndexFunc(partialInput, func(r rune) bool {
|
||||
return !unicode.IsSpace(r)
|
||||
})
|
||||
if lastNonSpacePos == -1 {
|
||||
return nil, false, "", errors.New("cannot heal a truncated JSON that stopped in an unknown location")
|
||||
}
|
||||
lastNonSpaceChar := rune(partialInput[lastNonSpacePos])
|
||||
|
||||
// Check if we stopped on a number
|
||||
wasMaybeNumber := func() bool {
|
||||
if len(partialInput) > 0 && unicode.IsSpace(rune(partialInput[len(partialInput)-1])) {
|
||||
return false
|
||||
}
|
||||
return unicode.IsDigit(lastNonSpaceChar) ||
|
||||
lastNonSpaceChar == '.' ||
|
||||
lastNonSpaceChar == 'e' ||
|
||||
lastNonSpaceChar == 'E' ||
|
||||
lastNonSpaceChar == '-'
|
||||
}
|
||||
|
||||
// Check for partial unicode escape sequences
|
||||
partialUnicodeRegex := regexp.MustCompile(`\\u(?:[0-9a-fA-F](?:[0-9a-fA-F](?:[0-9a-fA-F](?:[0-9a-fA-F])?)?)?)?$`)
|
||||
unicodeMarkerPadding := "udc00"
|
||||
lastUnicodeMatch := partialUnicodeRegex.FindStringSubmatch(partialInput)
|
||||
if lastUnicodeMatch != nil {
|
||||
// Pad the escape sequence
|
||||
unicodeMarkerPadding = strings.Repeat("0", 6-len(lastUnicodeMatch[0]))
|
||||
// Check if it's a high surrogate
|
||||
if len(lastUnicodeMatch[0]) >= 4 {
|
||||
seq := lastUnicodeMatch[0]
|
||||
if seq[0] == '\\' && seq[1] == 'u' {
|
||||
third := strings.ToLower(string(seq[2]))
|
||||
if third == "d" {
|
||||
fourth := strings.ToLower(string(seq[3]))
|
||||
if fourth == "8" || fourth == "9" || fourth == "a" || fourth == "b" {
|
||||
// High surrogate, add low surrogate
|
||||
unicodeMarkerPadding += "\\udc00"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
canParse := func(str string) bool {
|
||||
var test any
|
||||
return json.Unmarshal([]byte(str), &test) == nil
|
||||
}
|
||||
|
||||
// Heal based on stack top element type
|
||||
healedJSON := partialInput
|
||||
jsonDumpMarker := ""
|
||||
topElement := errLoc.stack[len(errLoc.stack)-1]
|
||||
|
||||
if topElement.Type == JSONStackElementKey {
|
||||
// We're inside an object value
|
||||
if lastNonSpaceChar == ':' && canParse(healedJSON+"1"+closing) {
|
||||
jsonDumpMarker = "\"" + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\"" + closing
|
||||
} else if canParse(healedJSON + ": 1" + closing) {
|
||||
jsonDumpMarker = ":\"" + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\"" + closing
|
||||
} else if lastNonSpaceChar == '{' && canParse(healedJSON+closing) {
|
||||
jsonDumpMarker = "\"" + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\": 1" + closing
|
||||
} else if canParse(healedJSON + "\"" + closing) {
|
||||
jsonDumpMarker = healingMarker
|
||||
healedJSON += jsonDumpMarker + "\"" + closing
|
||||
} else if len(healedJSON) > 0 && healedJSON[len(healedJSON)-1] == '\\' && canParse(healedJSON+"\\\""+closing) {
|
||||
jsonDumpMarker = "\\" + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\"" + closing
|
||||
} else if canParse(healedJSON + unicodeMarkerPadding + "\"" + closing) {
|
||||
jsonDumpMarker = unicodeMarkerPadding + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\"" + closing
|
||||
} else {
|
||||
// Find last colon and cut back
|
||||
lastColon := strings.LastIndex(healedJSON, ":")
|
||||
if lastColon == -1 {
|
||||
return nil, false, "", errors.New("cannot heal a truncated JSON that stopped in an unknown location")
|
||||
}
|
||||
jsonDumpMarker = "\"" + healingMarker
|
||||
healedJSON = healedJSON[:lastColon+1] + jsonDumpMarker + "\"" + closing
|
||||
}
|
||||
} else if topElement.Type == JSONStackElementArray {
|
||||
// We're inside an array
|
||||
if (lastNonSpaceChar == ',' || lastNonSpaceChar == '[') && canParse(healedJSON+"1"+closing) {
|
||||
jsonDumpMarker = "\"" + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\"" + closing
|
||||
} else if canParse(healedJSON + "\"" + closing) {
|
||||
jsonDumpMarker = healingMarker
|
||||
healedJSON += jsonDumpMarker + "\"" + closing
|
||||
} else if len(healedJSON) > 0 && healedJSON[len(healedJSON)-1] == '\\' && canParse(healedJSON+"\\\""+closing) {
|
||||
jsonDumpMarker = "\\" + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\"" + closing
|
||||
} else if canParse(healedJSON + unicodeMarkerPadding + "\"" + closing) {
|
||||
jsonDumpMarker = unicodeMarkerPadding + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\"" + closing
|
||||
} else if !wasMaybeNumber() && canParse(healedJSON+", 1"+closing) {
|
||||
jsonDumpMarker = ",\"" + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\"" + closing
|
||||
} else {
|
||||
lastBracketOrComma := strings.LastIndexAny(healedJSON, "[,")
|
||||
if lastBracketOrComma == -1 {
|
||||
return nil, false, "", errors.New("cannot heal a truncated JSON array stopped in an unknown location")
|
||||
}
|
||||
jsonDumpMarker = "\"" + healingMarker
|
||||
healedJSON = healedJSON[:lastBracketOrComma+1] + jsonDumpMarker + "\"" + closing
|
||||
}
|
||||
} else if topElement.Type == JSONStackElementObject {
|
||||
// We're inside an object (expecting a key)
|
||||
if (lastNonSpaceChar == '{' && canParse(healedJSON+closing)) ||
|
||||
(lastNonSpaceChar == ',' && canParse(healedJSON+"\"\": 1"+closing)) {
|
||||
jsonDumpMarker = "\"" + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\": 1" + closing
|
||||
} else if !wasMaybeNumber() && canParse(healedJSON+",\"\": 1"+closing) {
|
||||
jsonDumpMarker = ",\"" + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\": 1" + closing
|
||||
} else if canParse(healedJSON + "\": 1" + closing) {
|
||||
jsonDumpMarker = healingMarker
|
||||
healedJSON += jsonDumpMarker + "\": 1" + closing
|
||||
} else if len(healedJSON) > 0 && healedJSON[len(healedJSON)-1] == '\\' && canParse(healedJSON+"\\\": 1"+closing) {
|
||||
jsonDumpMarker = "\\" + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\": 1" + closing
|
||||
} else if canParse(healedJSON + unicodeMarkerPadding + "\": 1" + closing) {
|
||||
jsonDumpMarker = unicodeMarkerPadding + healingMarker
|
||||
healedJSON += jsonDumpMarker + "\": 1" + closing
|
||||
} else {
|
||||
lastColon := strings.LastIndex(healedJSON, ":")
|
||||
if lastColon == -1 {
|
||||
return nil, false, "", errors.New("cannot heal a truncated JSON object stopped in an unknown location")
|
||||
}
|
||||
jsonDumpMarker = "\"" + healingMarker
|
||||
healedJSON = healedJSON[:lastColon+1] + jsonDumpMarker + "\"" + closing
|
||||
}
|
||||
} else {
|
||||
return nil, false, "", errors.New("cannot heal a truncated JSON object stopped in an unknown location")
|
||||
}
|
||||
|
||||
// Try to parse the healed JSON
|
||||
var healedValue any
|
||||
if err := json.Unmarshal([]byte(healedJSON), &healedValue); err != nil {
|
||||
return nil, false, "", err
|
||||
}
|
||||
|
||||
// Remove healing marker from result
|
||||
cleaned := removeHealingMarkerFromJSONAny(healedValue, healingMarker)
|
||||
return cleaned, true, jsonDumpMarker, nil
|
||||
}
|
||||
|
||||
// parseJSONWithStackTracking parses JSON while tracking the stack structure
|
||||
// Returns the error position and any error encountered
|
||||
// This implements stack tracking similar to llama.cpp's json_error_locator
|
||||
func parseJSONWithStackTracking(input string, errLoc *JSONErrorLocator) (int, error) {
|
||||
// First, try to parse to get exact error position
|
||||
decoder := json.NewDecoder(strings.NewReader(input))
|
||||
var test any
|
||||
err := decoder.Decode(&test)
|
||||
if err != nil {
|
||||
errLoc.foundError = true
|
||||
errLoc.exceptionMessage = err.Error()
|
||||
|
||||
var errorPos int
|
||||
if syntaxErr, ok := err.(*json.SyntaxError); ok {
|
||||
errorPos = int(syntaxErr.Offset)
|
||||
errLoc.position = errorPos
|
||||
} else {
|
||||
// Fallback: use end of input
|
||||
errorPos = len(input)
|
||||
errLoc.position = errorPos
|
||||
}
|
||||
|
||||
// Now build the stack by parsing up to the error position
|
||||
// This matches llama.cpp's approach of tracking stack during SAX parsing
|
||||
partialInput := input
|
||||
if errorPos > 0 && errorPos < len(input) {
|
||||
partialInput = input[:errorPos]
|
||||
}
|
||||
|
||||
// Track stack by parsing character by character up to error
|
||||
pos := 0
|
||||
inString := false
|
||||
escape := false
|
||||
keyStart := -1
|
||||
keyEnd := -1
|
||||
|
||||
for pos < len(partialInput) {
|
||||
ch := partialInput[pos]
|
||||
|
||||
if escape {
|
||||
escape = false
|
||||
pos++
|
||||
continue
|
||||
}
|
||||
|
||||
if ch == '\\' {
|
||||
escape = true
|
||||
pos++
|
||||
continue
|
||||
}
|
||||
|
||||
if ch == '"' {
|
||||
if !inString {
|
||||
// Starting a string
|
||||
inString = true
|
||||
// Check if we're in an object context (expecting a key)
|
||||
if len(errLoc.stack) > 0 {
|
||||
top := errLoc.stack[len(errLoc.stack)-1]
|
||||
if top.Type == JSONStackElementObject {
|
||||
// This could be a key
|
||||
keyStart = pos + 1 // Start after quote
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Ending a string
|
||||
inString = false
|
||||
if keyStart != -1 {
|
||||
// This was potentially a key, extract it
|
||||
keyEnd = pos
|
||||
key := partialInput[keyStart:keyEnd]
|
||||
|
||||
// Look ahead to see if next non-whitespace is ':'
|
||||
nextPos := pos + 1
|
||||
for nextPos < len(partialInput) && unicode.IsSpace(rune(partialInput[nextPos])) {
|
||||
nextPos++
|
||||
}
|
||||
if nextPos < len(partialInput) && partialInput[nextPos] == ':' {
|
||||
// This is a key, add it to stack
|
||||
errLoc.stack = append(errLoc.stack, JSONStackElement{Type: JSONStackElementKey, Key: key})
|
||||
}
|
||||
keyStart = -1
|
||||
keyEnd = -1
|
||||
}
|
||||
}
|
||||
pos++
|
||||
continue
|
||||
}
|
||||
|
||||
if inString {
|
||||
pos++
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle stack operations (outside strings)
|
||||
if ch == '{' {
|
||||
errLoc.stack = append(errLoc.stack, JSONStackElement{Type: JSONStackElementObject})
|
||||
} else if ch == '}' {
|
||||
// Pop object and any key on top (keys are popped when value starts, but handle here too)
|
||||
for len(errLoc.stack) > 0 {
|
||||
top := errLoc.stack[len(errLoc.stack)-1]
|
||||
errLoc.stack = errLoc.stack[:len(errLoc.stack)-1]
|
||||
if top.Type == JSONStackElementObject {
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if ch == '[' {
|
||||
errLoc.stack = append(errLoc.stack, JSONStackElement{Type: JSONStackElementArray})
|
||||
} else if ch == ']' {
|
||||
// Pop array
|
||||
for len(errLoc.stack) > 0 {
|
||||
top := errLoc.stack[len(errLoc.stack)-1]
|
||||
errLoc.stack = errLoc.stack[:len(errLoc.stack)-1]
|
||||
if top.Type == JSONStackElementArray {
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if ch == ':' {
|
||||
// Colon means we're starting a value, pop the key if it's on stack
|
||||
if len(errLoc.stack) > 0 && errLoc.stack[len(errLoc.stack)-1].Type == JSONStackElementKey {
|
||||
errLoc.stack = errLoc.stack[:len(errLoc.stack)-1]
|
||||
}
|
||||
}
|
||||
// Note: commas and whitespace don't affect stack structure
|
||||
|
||||
pos++
|
||||
}
|
||||
|
||||
return errorPos, err
|
||||
}
|
||||
|
||||
// No error, parse was successful - build stack anyway for completeness
|
||||
// (though we shouldn't need healing in this case)
|
||||
pos := 0
|
||||
inString := false
|
||||
escape := false
|
||||
|
||||
for pos < len(input) {
|
||||
ch := input[pos]
|
||||
|
||||
if escape {
|
||||
escape = false
|
||||
pos++
|
||||
continue
|
||||
}
|
||||
|
||||
if ch == '\\' {
|
||||
escape = true
|
||||
pos++
|
||||
continue
|
||||
}
|
||||
|
||||
if ch == '"' {
|
||||
inString = !inString
|
||||
pos++
|
||||
continue
|
||||
}
|
||||
|
||||
if inString {
|
||||
pos++
|
||||
continue
|
||||
}
|
||||
|
||||
if ch == '{' {
|
||||
errLoc.stack = append(errLoc.stack, JSONStackElement{Type: JSONStackElementObject})
|
||||
} else if ch == '}' {
|
||||
for len(errLoc.stack) > 0 {
|
||||
top := errLoc.stack[len(errLoc.stack)-1]
|
||||
errLoc.stack = errLoc.stack[:len(errLoc.stack)-1]
|
||||
if top.Type == JSONStackElementObject {
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if ch == '[' {
|
||||
errLoc.stack = append(errLoc.stack, JSONStackElement{Type: JSONStackElementArray})
|
||||
} else if ch == ']' {
|
||||
for len(errLoc.stack) > 0 {
|
||||
top := errLoc.stack[len(errLoc.stack)-1]
|
||||
errLoc.stack = errLoc.stack[:len(errLoc.stack)-1]
|
||||
if top.Type == JSONStackElementArray {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pos++
|
||||
}
|
||||
|
||||
return len(input), nil
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user