diff --git a/go/go.mod b/go/go.mod index a63f5221b0..0ffe6aedba 100644 --- a/go/go.mod +++ b/go/go.mod @@ -57,7 +57,7 @@ require ( github.com/cespare/xxhash v1.1.0 github.com/creasty/defaults v1.6.0 github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2 - github.com/dolthub/go-mysql-server v0.18.2-0.20240429214844-6feb67867355 + github.com/dolthub/go-mysql-server v0.18.2-0.20240430015631-3d60d20186c8 github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 github.com/dolthub/swiss v0.1.0 github.com/goccy/go-json v0.10.2 diff --git a/go/go.sum b/go/go.sum index 1e65e20880..fb8e9f1210 100644 --- a/go/go.sum +++ b/go/go.sum @@ -183,8 +183,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168= -github.com/dolthub/go-mysql-server v0.18.2-0.20240429214844-6feb67867355 h1:Dylx0T0J40z3momZ0pDlUm0PWEvPWrcOVkeZ9jFXtVQ= -github.com/dolthub/go-mysql-server v0.18.2-0.20240429214844-6feb67867355/go.mod h1:T6EEu2iQoasR13Ovtp44yDn+rXQOBgh3BACPZMxSF/8= +github.com/dolthub/go-mysql-server v0.18.2-0.20240430015631-3d60d20186c8 h1:Xm6syv6978frTakO8OAvmcwXDEKq1Eij7rJFr6F+BNQ= +github.com/dolthub/go-mysql-server v0.18.2-0.20240430015631-3d60d20186c8/go.mod h1:T6EEu2iQoasR13Ovtp44yDn+rXQOBgh3BACPZMxSF/8= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q= github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488 h1:0HHu0GWJH0N6a6keStrHhUAK5/o9LVfkh44pvsV4514= diff --git a/go/libraries/doltcore/merge/merge_prolly_rows.go b/go/libraries/doltcore/merge/merge_prolly_rows.go index 7a569e8588..96596909f5 100644 --- a/go/libraries/doltcore/merge/merge_prolly_rows.go +++ b/go/libraries/doltcore/merge/merge_prolly_rows.go @@ -1998,7 +1998,11 @@ func (m *valueMerger) mergeJSONAddr(ctx context.Context, baseAddr []byte, leftAd return nil, true, nil } - mergedBytes, err := json.Marshal(mergedDoc.ToInterface()) + mergedVal, err := mergedDoc.ToInterface() + if err != nil { + return nil, true, err + } + mergedBytes, err := json.Marshal(mergedVal) if err != nil { return nil, true, err } diff --git a/go/libraries/doltcore/merge/violations_fk_prolly.go b/go/libraries/doltcore/merge/violations_fk_prolly.go index cb99f8b570..417c3838d8 100644 --- a/go/libraries/doltcore/merge/violations_fk_prolly.go +++ b/go/libraries/doltcore/merge/violations_fk_prolly.go @@ -396,7 +396,7 @@ type FkCVMeta struct { var _ sql.JSONWrapper = FkCVMeta{} -func (m FkCVMeta) ToInterface() interface{} { +func (m FkCVMeta) ToInterface() (interface{}, error) { return map[string]interface{}{ "Columns": m.Columns, "ForeignKey": m.ForeignKey, @@ -407,7 +407,7 @@ func (m FkCVMeta) ToInterface() interface{} { "ReferencedIndex": m.ReferencedIndex, "ReferencedTable": m.ReferencedTable, "Table": m.Table, - } + }, nil } // PrettyPrint is a custom pretty print function to match the old format's diff --git a/go/libraries/doltcore/merge/violations_unique_prolly.go b/go/libraries/doltcore/merge/violations_unique_prolly.go index b302f630d4..2fe65e41f1 100644 --- a/go/libraries/doltcore/merge/violations_unique_prolly.go +++ b/go/libraries/doltcore/merge/violations_unique_prolly.go @@ -54,11 +54,11 @@ type UniqCVMeta struct { Name string `json:"Name"` } -func (m UniqCVMeta) ToInterface() interface{} { +func (m UniqCVMeta) ToInterface() (interface{}, error) { return map[string]interface{}{ "Columns": m.Columns, "Name": m.Name, - } + }, nil } var _ sql.JSONWrapper = UniqCVMeta{} @@ -149,10 +149,10 @@ func newNotNullViolationMeta(violations []string, value val.Tuple) (prolly.Const }, nil } -func (m NullViolationMeta) ToInterface() interface{} { +func (m NullViolationMeta) ToInterface() (interface{}, error) { return map[string]interface{}{ "Columns": m.Columns, - } + }, nil } func (m NullViolationMeta) Unmarshall(ctx *sql.Context) (val types.JSONDocument, err error) { @@ -193,9 +193,9 @@ func (m CheckCVMeta) Unmarshall(_ *sql.Context) (val types.JSONDocument, err err return types.JSONDocument{Val: m}, nil } -func (m CheckCVMeta) ToInterface() interface{} { +func (m CheckCVMeta) ToInterface() (interface{}, error) { return map[string]interface{}{ "Name": m.Name, "Expression": m.Expression, - } + }, nil } diff --git a/go/libraries/doltcore/sqle/binlogreplication/binlog_replication_alltypes_test.go b/go/libraries/doltcore/sqle/binlogreplication/binlog_replication_alltypes_test.go index 37f640f20a..e486b8836e 100644 --- a/go/libraries/doltcore/sqle/binlogreplication/binlog_replication_alltypes_test.go +++ b/go/libraries/doltcore/sqle/binlogreplication/binlog_replication_alltypes_test.go @@ -15,6 +15,7 @@ package binlogreplication import ( + "encoding/json" "fmt" "math/rand" "strings" @@ -521,11 +522,16 @@ func assertValues(t *testing.T, assertionIndex int, row map[string]interface{}) if typeDesc.TypeDefinition == "json" { // LD_1 and DOLT storage formats return JSON strings slightly differently; DOLT removes spaces // while LD_1 add whitespace, so for json comparison, we sanitize by removing whitespace. - actualValue = strings.ReplaceAll(actualValue, " ", "") + var actual interface{} + json.Unmarshal([]byte(actualValue), &actual) + var expected interface{} + json.Unmarshal([]byte(expectedValue.(string)), &expected) + require.EqualValues(t, expected, actual, + "Failed on assertion %d for for column %q", assertionIndex, typeDesc.ColumnName()) + } else { + require.EqualValues(t, expectedValue, actualValue, + "Failed on assertion %d for for column %q", assertionIndex, typeDesc.ColumnName()) } - - require.EqualValues(t, expectedValue, actualValue, - "Failed on assertion %d for for column %q", assertionIndex, typeDesc.ColumnName()) } } diff --git a/go/libraries/doltcore/sqle/json/noms_json_value.go b/go/libraries/doltcore/sqle/json/noms_json_value.go index 914a859eaf..c75ecf6b81 100644 --- a/go/libraries/doltcore/sqle/json/noms_json_value.go +++ b/go/libraries/doltcore/sqle/json/noms_json_value.go @@ -46,7 +46,10 @@ func NomsJSONFromJSONValue(ctx context.Context, vrw types.ValueReadWriter, val s return noms, nil } - sqlVal := val.ToInterface() + sqlVal, err := val.ToInterface() + if err != nil { + return NomsJSON{}, err + } v, err := marshalJSON(ctx, vrw, sqlVal) if err != nil { @@ -133,17 +136,17 @@ func marshalJSONObject(ctx context.Context, vrw types.ValueReadWriter, obj map[s return types.NewMap(ctx, vrw, vals...) } -func (v NomsJSON) ToInterface() interface{} { +func (v NomsJSON) ToInterface() (interface{}, error) { nomsVal, err := types.JSON(v).Inner() if err != nil { - panic(err) + return nil, err } val, err := unmarshalJSON(context.Background(), nomsVal) if err != nil { - panic(err) + return nil, err } - return val + return val, nil } // Unmarshall implements the sql.JSONValue interface. diff --git a/go/libraries/doltcore/sqle/schema_table.go b/go/libraries/doltcore/sqle/schema_table.go index a8d3eb5a51..b181b565f0 100644 --- a/go/libraries/doltcore/sqle/schema_table.go +++ b/go/libraries/doltcore/sqle/schema_table.go @@ -300,6 +300,9 @@ func getSchemaFragmentsOfType(ctx *sql.Context, tbl *WritableDoltTable, fragType // Extract Created Time from JSON column createdTime, err := getCreatedTime(ctx, sqlRow[extraIdx].(sql.JSONWrapper)) + if err != nil { + return nil, err + } frags = append(frags, schemaFragment{ name: sqlRow[nameIdx].(string), @@ -327,9 +330,12 @@ func loadDefaultSqlMode() (string, error) { } func getCreatedTime(ctx *sql.Context, extraCol sql.JSONWrapper) (int64, error) { - doc := extraCol.ToInterface() + doc, err := extraCol.ToInterface() + if err != nil { + return 0, err + } - err := fmt.Errorf("value %v does not contain creation time", doc) + err = fmt.Errorf("value %v does not contain creation time", doc) obj, ok := doc.(map[string]interface{}) if !ok { diff --git a/go/libraries/doltcore/sqle/schema_table_test.go b/go/libraries/doltcore/sqle/schema_table_test.go index a435329832..8272429ff8 100644 --- a/go/libraries/doltcore/sqle/schema_table_test.go +++ b/go/libraries/doltcore/sqle/schema_table_test.go @@ -27,7 +27,6 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/json" "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" ) @@ -137,16 +136,9 @@ func TestSchemaTableMigrationV1(t *testing.T) { require.NoError(t, err) // convert the JSONDocument to a string for comparison if row[3] != nil { - // Annoying difference in representation between storage versions here - jsonDoc, ok := row[3].(gmstypes.JSONDocument) + jsonDoc, ok := row[3].(sql.JSONWrapper) if ok { - row[3], err = jsonDoc.JSONString() - row[3] = strings.ReplaceAll(row[3].(string), " ", "") // remove spaces - } - - nomsJson, ok := row[3].(json.NomsJSON) - if ok { - row[3], err = nomsJson.JSONString() + row[3], err = gmstypes.StringifyJSON(jsonDoc) row[3] = strings.ReplaceAll(row[3].(string), " ", "") // remove spaces } diff --git a/go/libraries/doltcore/sqle/statspro/dolt_stats.go b/go/libraries/doltcore/sqle/statspro/dolt_stats.go index 8f75b9229e..4a0e8e7ea4 100644 --- a/go/libraries/doltcore/sqle/statspro/dolt_stats.go +++ b/go/libraries/doltcore/sqle/statspro/dolt_stats.go @@ -136,15 +136,23 @@ func NewDoltStats() *DoltStats { return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}} } -func (s *DoltStats) ToInterface() interface{} { - ret := s.Statistic.ToInterface().(map[string]interface{}) +func (s *DoltStats) ToInterface() (interface{}, error) { + statVal, err := s.Statistic.ToInterface() + if err != nil { + return nil, err + } + ret := statVal.(map[string]interface{}) var hist sql.Histogram for _, b := range s.Hist { hist = append(hist, b) } - ret["statistic"].(map[string]interface{})["buckets"] = hist.ToInterface() - return ret + histVal, err := hist.ToInterface() + if err != nil { + return nil, err + } + ret["statistic"].(map[string]interface{})["buckets"] = histVal + return ret, nil } func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) { diff --git a/go/libraries/doltcore/table/typed/json/writer.go b/go/libraries/doltcore/table/typed/json/writer.go index 0c1d1b7d8c..ac51f92cdc 100644 --- a/go/libraries/doltcore/table/typed/json/writer.go +++ b/go/libraries/doltcore/table/typed/json/writer.go @@ -220,6 +220,8 @@ func (j *RowWriter) jsonDataForSqlSchema(row sql.Row) ([]byte, error) { // This is kind of silly: we are unmarshalling JSON just to marshall it back again // But it makes marshalling much simpler + // Reset val so we don't unmarshall into the old value. + val = nil err = json.Unmarshal([]byte(str), &val) if err != nil { return nil, err diff --git a/go/store/prolly/tree/blob_builder.go b/go/store/prolly/tree/blob_builder.go index 2c8b60c619..c2578cf0fc 100644 --- a/go/store/prolly/tree/blob_builder.go +++ b/go/store/prolly/tree/blob_builder.go @@ -20,11 +20,13 @@ import ( "errors" "io" - "github.com/dolthub/go-mysql-server/sql/types" + "github.com/dolthub/go-mysql-server/sql" + sqltypes "github.com/dolthub/go-mysql-server/sql/types" "github.com/goccy/go-json" "github.com/dolthub/dolt/go/store/hash" "github.com/dolthub/dolt/go/store/prolly/message" + "github.com/dolthub/dolt/go/store/types" ) const DefaultFixedChunkLength = 4000 @@ -272,19 +274,28 @@ func NewJSONDoc(addr hash.Hash, ns NodeStore) *JSONDoc { return &JSONDoc{ImmutableTree{Addr: addr, ns: ns}} } -func (b *JSONDoc) ToJSONDocument(ctx context.Context) (types.JSONDocument, error) { +func (b *JSONDoc) ToJSONDocument(ctx context.Context) (sqltypes.JSONDocument, error) { buf, err := b.bytes(ctx) if err != nil { - return types.JSONDocument{}, err + return sqltypes.JSONDocument{}, err } - var doc types.JSONDocument + var doc sqltypes.JSONDocument err = json.Unmarshal(buf, &doc.Val) if err != nil { - return types.JSONDocument{}, err + return sqltypes.JSONDocument{}, err } return doc, err } +func (b *JSONDoc) ToLazyJSONDocument(ctx context.Context) (sql.JSONWrapper, error) { + buf, err := b.bytes(ctx) + if err != nil { + return sqltypes.JSONDocument{}, err + } + buf = types.UnescapeHTMLCodepoints(buf) + return sqltypes.NewLazyJSONDocument(buf), nil +} + func (b *JSONDoc) ToString(ctx context.Context) (string, error) { buf, err := b.bytes(ctx) if err != nil { diff --git a/go/store/prolly/tree/prolly_fields.go b/go/store/prolly/tree/prolly_fields.go index b8e760154f..4874692c93 100644 --- a/go/store/prolly/tree/prolly_fields.go +++ b/go/store/prolly/tree/prolly_fields.go @@ -24,6 +24,7 @@ import ( "math" "time" + "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/types" "github.com/shopspring/decimal" @@ -127,7 +128,7 @@ func GetField(ctx context.Context, td val.TupleDesc, i int, tup val.Tuple, ns No var h hash.Hash h, ok = td.GetJSONAddr(i, tup) if ok { - v, err = NewJSONDoc(h, ns).ToJSONDocument(ctx) + v, err = NewJSONDoc(h, ns).ToLazyJSONDocument(ctx) } case val.StringAddrEnc: var h hash.Hash @@ -409,5 +410,5 @@ func convJson(v interface{}) (buf []byte, err error) { if err != nil { return nil, err } - return json.Marshal(v.(types.JSONDocument).Val) + return types.MarshallJson(v.(sql.JSONWrapper)) } diff --git a/go/store/prolly/tree/prolly_fields_test.go b/go/store/prolly/tree/prolly_fields_test.go index e0fefc441d..fbe3967d62 100644 --- a/go/store/prolly/tree/prolly_fields_test.go +++ b/go/store/prolly/tree/prolly_fields_test.go @@ -21,6 +21,7 @@ import ( "testing" "time" + "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/expression/function/spatial" "github.com/dolthub/go-mysql-server/sql/types" "github.com/shopspring/decimal" @@ -195,7 +196,19 @@ func testRoundTripProllyFields(t *testing.T, test prollyFieldTest) { v, err := GetField(context.Background(), desc, 0, tup, ns) assert.NoError(t, err) - assert.Equal(t, test.value, v) + jsonType := val.Type{Enc: val.JSONAddrEnc} + if test.typ == jsonType { + getJson := func(field interface{}) interface{} { + jsonWrapper, ok := field.(sql.JSONWrapper) + require.Equal(t, ok, true) + val, err := jsonWrapper.ToInterface() + require.NoError(t, err) + return val + } + assert.Equal(t, getJson(test.value), getJson(v)) + } else { + assert.Equal(t, test.value, v) + } } func mustParseGeometryType(t *testing.T, s string) (v interface{}) { diff --git a/go/store/types/json.go b/go/store/types/json.go index f1b0e54427..85a308c838 100644 --- a/go/store/types/json.go +++ b/go/store/types/json.go @@ -18,6 +18,7 @@ import ( "context" "errors" "fmt" + "slices" "strings" "github.com/dolthub/dolt/go/store/d" @@ -376,3 +377,50 @@ func compareJSONNumber(a Float, b Value) (int, error) { return 1, nil } } + +// UnescapeHTMLCodepoints replaces escaped HTML characters in serialized JSON with their unescaped equivalents. +// Due to an oversight, the representation of JSON in storage escapes these characters, and we unescape them +// before displaying them to the user. +func UnescapeHTMLCodepoints(path []byte) []byte { + nextToRead := path + nextToWrite := path + + matches := 0 + index := findNextEscapedUnicodeCodepoint(nextToRead) + for index != -1 { + newChar := byte(0) + if slices.Equal(nextToRead[index+2:index+6], []byte{'0', '0', '3', 'c'}) { + newChar = '<' + } else if slices.Equal(nextToRead[index+2:index+6], []byte{'0', '0', '3', 'e'}) { + newChar = '>' + } else if slices.Equal(nextToRead[index+2:index+6], []byte{'0', '0', '2', '6'}) { + newChar = '&' + } + if newChar != 0 { + matches += 1 + copy(nextToWrite, nextToRead[:index]) + nextToWrite[index] = newChar + nextToWrite = nextToWrite[index+1:] + } + nextToRead = nextToRead[index+6:] + index = findNextEscapedUnicodeCodepoint(nextToRead) + } + copy(nextToWrite, nextToRead) + return path[:len(path)-5*matches] +} + +func findNextEscapedUnicodeCodepoint(path []byte) int { + index := 0 + for { + if index >= len(path) { + return -1 + } + if path[index] == '\\' { + if path[index+1] == 'u' { + return index + } + index++ + } + index++ + } +} diff --git a/go/store/types/json_test.go b/go/store/types/json_test.go new file mode 100644 index 0000000000..f1c8f88e4d --- /dev/null +++ b/go/store/types/json_test.go @@ -0,0 +1,70 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestUnescapeHTMLCodepoints(t *testing.T) { + testCases := []struct { + name string + input []byte + expected []byte + }{ + { + name: "Unescape <", + input: []byte("\\u003c"), + expected: []byte("<"), + }, + { + name: "Unescape >", + input: []byte("\\u003e"), + expected: []byte(">"), + }, + { + name: "Unescape &", + input: []byte("\\u0026"), + expected: []byte("&"), + }, + { + name: "Don't unescape other codepoints", + input: []byte("\\u00ff"), + expected: []byte("\\u00ff"), + }, + { + name: "Escape multiple codepoints", + input: []byte("\\u003c\\u003e\\u0026"), + expected: []byte("<>&"), + }, + { + name: "Don't escape if the \\ is escaped", + input: []byte("\\\\u003c"), + expected: []byte("\\\\u003c"), + }, + { + name: "Escape codepoints w/ surrounding text", + input: []byte("A\\u003cB"), + expected: []byte("A