Fix JSON path parsing for unnecessarily quoted field names

Fixes dolthub/dolt#9556

The issue was in dolt's JSON path parser where unnecessarily quoted
simple field names like $."a" were being rejected, despite MySQL
accepting them. This caused compatibility issues with Django's
compile_json_path() function which always quotes keys.

Changes:
- Fixed lexer in json_location.go to properly handle quoted field names
- Adjusted token position when encountering opening quotes
- Corrected empty string detection logic for quoted keys
- Added comprehensive tests matching the customer's use case

The fix ensures MySQL compatibility for:
- $.a (unquoted field names)
- $."a" (unnecessarily quoted simple field names)
- $."a key" (necessarily quoted field names with spaces)
- $."a"."b" (nested quoted field names)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
elianddb
2025-07-23 17:05:09 +00:00
parent 8eca9c72b8
commit e8167be7df
2 changed files with 46 additions and 1 deletions

View File

@@ -275,6 +275,7 @@ func jsonPathElementsFromMySQLJsonPath(pathBytes []byte) (jsonLocation, error) {
}
case lexStateKey:
if pathBytes[i] == '"' {
tok = i // Point tok to the opening quote
state = lexStateQuotedKey
i += 1
} else if pathBytes[i] == '.' || pathBytes[i] == '[' {
@@ -292,7 +293,7 @@ func jsonPathElementsFromMySQLJsonPath(pathBytes []byte) (jsonLocation, error) {
}
case lexStateQuotedKey:
if pathBytes[i] == '"' {
if tok+1 == i-1 {
if tok+1 == i {
return jsonLocation{}, fmt.Errorf("Invalid JSON path expression. Expected field name after '.' at character %v of %s", i, string(pathBytes))
}
pathKey := unescapeKey(pathBytes[tok+1 : i])

View File

@@ -294,3 +294,47 @@ SQL
[ "$status" -eq 0 ]
[[ "$output" =~ "Blob" ]] || false
}
# Tests for dolthub/dolt#9556: JSON path handling with unnecessary quotes
# This tests Django compatibility where compile_json_path() always quotes keys
@test "json: JSON_SET with quoted field names" {
dolt sql <<SQL
CREATE TABLE test_data (data JSON);
INSERT INTO test_data VALUES ('{}');
SQL
# Test unquoted JSON paths (should work)
run dolt sql -q "UPDATE test_data SET data = JSON_SET(data, '$.a', 'b');"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT data FROM test_data;" -r csv
[ "$status" -eq 0 ]
[ "${lines[1]}" = '"{""a"":""b""}"' ]
# Test necessarily quoted JSON paths (should work)
run dolt sql -q "UPDATE test_data SET data = JSON_SET(data, '$.\"a key\"', 'b');"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT data FROM test_data;" -r csv
[ "$status" -eq 0 ]
[ "${lines[1]}" = '"{""a"":""b"",""a key"":""b""}"' ]
# Test unnecessarily quoted JSON paths (this was the failing case)
run dolt sql -q "UPDATE test_data SET data = JSON_SET(data, '$.\"c\"', 'test');"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT data FROM test_data;" -r csv
[ "$status" -eq 0 ]
[ "${lines[1]}" = '"{""a"":""b"",""a key"":""b"",""c"":""test""}"' ]
# Test nested unnecessarily quoted paths (first create the parent object)
run dolt sql -q "UPDATE test_data SET data = JSON_SET(data, '$.\"d\"', JSON_OBJECT());"
[ "$status" -eq 0 ]
run dolt sql -q "UPDATE test_data SET data = JSON_SET(data, '$.\"d\".\"e\"', 'nested');"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT data FROM test_data;" -r csv
[ "$status" -eq 0 ]
[ "${lines[1]}" = '"{""a"":""b"",""a key"":""b"",""c"":""test"",""d"":{""e"":""nested""}}"' ]
}