mirror of
https://github.com/dolthub/dolt.git
synced 2026-05-12 11:29:01 -05:00
Updating journal to allow chunk records larger than 1MB. Adding tests for JSON cases that triggered this.
This commit is contained in:
@@ -23,6 +23,7 @@ import (
|
||||
"github.com/dolthub/go-mysql-server/sql/plan"
|
||||
"github.com/dolthub/go-mysql-server/sql/types"
|
||||
"github.com/dolthub/vitess/go/vt/sqlparser"
|
||||
"github.com/hashicorp/go-uuid"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dtablefunctions"
|
||||
)
|
||||
@@ -4769,6 +4770,46 @@ var LargeJsonObjectScriptTests = []queries.ScriptTest{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
// JSON chunking can't currently break chunks in a JSON value, so large string values can
|
||||
// generate chunks that are larger than typical chunks.
|
||||
Name: "JSON with large string (> 1MB)",
|
||||
SetUpScript: []string{
|
||||
"create table t (pk int primary key, j1 JSON)",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
// NOTE: This doesn't trigger the same error that we see with sql-server
|
||||
// because the Golang enginetests use an in-memory chunk store, and
|
||||
// not the filesystem journaling chunk store.
|
||||
Query: fmt.Sprintf(`insert into t (pk, j1) VALUES (1, '{"large_value": "%s"}');`, generateStringData(1024*1024*3)),
|
||||
Expected: []sql.Row{{types.OkResult{RowsAffected: 1}}},
|
||||
},
|
||||
{
|
||||
Skip: true,
|
||||
// TODO: The JSON is coming back truncated for some reason and failing this test.
|
||||
// When that's fixed, unskip this test, and fix the length value below.
|
||||
Query: `SELECT pk, length(j1) from t;`,
|
||||
Expected: []sql.Row{{1, 123}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// generateStringData generates random string data of length |length|. The data is generated
|
||||
// using UUIDs to avoid data that could be easily compressed.
|
||||
func generateStringData(length int) string {
|
||||
var b strings.Builder
|
||||
for length > 0 {
|
||||
uuid, err := uuid.GenerateUUID()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
uuid = strings.ReplaceAll(uuid, "-", "")
|
||||
b.WriteString(uuid)
|
||||
length -= len(uuid)
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
var DoltTagTestScripts = []queries.ScriptTest{
|
||||
|
||||
@@ -92,9 +92,6 @@ const (
|
||||
journalRecAddrSz = 20
|
||||
journalRecChecksumSz = 4
|
||||
journalRecTimestampSz = 8
|
||||
|
||||
// todo(andy): less arbitrary
|
||||
journalRecMaxSz = 128 * 1024
|
||||
)
|
||||
|
||||
// journalRecordTimestampGenerator returns the current time in Unix epoch seconds. This function is stored in a
|
||||
@@ -248,9 +245,7 @@ func processJournalRecords(ctx context.Context, r io.ReadSeeker, off int64, cb f
|
||||
}
|
||||
|
||||
l := readUint32(buf)
|
||||
if l > journalRecMaxSz {
|
||||
break
|
||||
} else if buf, err = rdr.Peek(int(l)); err != nil {
|
||||
if buf, err = rdr.Peek(int(l)); err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
|
||||
@@ -38,8 +38,11 @@ const (
|
||||
chunkJournalFileSize = 16 * 1024
|
||||
|
||||
// todo(andy): buffer must be able to hold an entire record,
|
||||
// but we don't have a hard limit on record size right now
|
||||
journalWriterBuffSize = 1024 * 1024
|
||||
// but we don't have a hard limit on record size right now.
|
||||
// JSON data has cases where it won't chunk down as small as other data,
|
||||
// so we have increased this to 5MB. If/when JSON chunking handles those
|
||||
// cases, we could decrease this size to 1MB again.
|
||||
journalWriterBuffSize = 5 * 1024 * 1024
|
||||
|
||||
chunkJournalAddr = chunks.JournalFileID
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -261,3 +261,23 @@ SQL
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = '1,"[{""a"":""<>&""}]"' ]
|
||||
}
|
||||
|
||||
@test "json: insert large string value (> 1MB)" {
|
||||
dolt sql <<SQL
|
||||
CREATE TABLE t (
|
||||
pk int PRIMARY KEY,
|
||||
j1 json
|
||||
);
|
||||
SQL
|
||||
|
||||
dolt sql -f $BATS_TEST_DIRNAME/json-large-value-insert.sql
|
||||
|
||||
# TODO: Retrieving the JSON errors with a JSON truncated message
|
||||
# Unskip this once the JSON truncation issue is fixed and
|
||||
# fill in the expected length below.
|
||||
skip "Function Support is currently disabled"
|
||||
|
||||
run dolt sql -q "SELECT pk, length(j1) FROM t;" -r csv
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${lines[1]}" = '1,???' ]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user