From 30f190ef621608e31037d2f5b6201e7435061993 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Tue, 1 Feb 2022 11:21:21 -0800 Subject: [PATCH 001/105] Allow duplicate indexes --- go/libraries/doltcore/schema/index_coll.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/go/libraries/doltcore/schema/index_coll.go b/go/libraries/doltcore/schema/index_coll.go index ab1b654fb4..83a32225f4 100644 --- a/go/libraries/doltcore/schema/index_coll.go +++ b/go/libraries/doltcore/schema/index_coll.go @@ -141,9 +141,9 @@ func (ixc *indexCollectionImpl) AddIndexByColTags(indexName string, tags []uint6 if !ixc.tagsExist(tags...) { return nil, fmt.Errorf("tags %v do not exist on this table", tags) } - if ixc.hasIndexOnTags(tags...) { - return nil, fmt.Errorf("cannot create a duplicate index on this table") - } + // if ixc.hasIndexOnTags(tags...) { + // return nil, fmt.Errorf("cannot create a duplicate index on this table") + // } index := &indexImpl{ indexColl: ixc, name: indexName, From 1b4f9461e1ea6364818db8d6a3b71900c1db4de9 Mon Sep 17 00:00:00 2001 From: Lars Lehtonen Date: Fri, 4 Feb 2022 10:01:15 -0800 Subject: [PATCH 002/105] libraries/doltcore/doltdb: fix dropped GC error --- go/libraries/doltcore/doltdb/doltdb.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/go/libraries/doltcore/doltdb/doltdb.go b/go/libraries/doltcore/doltdb/doltdb.go index 8dfc35654f..9fe5a051f6 100644 --- a/go/libraries/doltcore/doltdb/doltdb.go +++ b/go/libraries/doltcore/doltdb/doltdb.go @@ -1171,6 +1171,9 @@ func (ddb *DoltDB) GC(ctx context.Context, uncommitedVals ...hash.Hash) error { } datasets, err := ddb.db.Datasets(ctx) + if err != nil { + return err + } newGen := hash.NewHashSet(uncommitedVals...) oldGen := make(hash.HashSet) err = datasets.IterAll(ctx, func(key, value types.Value) error { From 47467c47c6e4561086a0c4f320b29afa90cac615 Mon Sep 17 00:00:00 2001 From: Lars Lehtonen Date: Fri, 4 Feb 2022 10:08:41 -0800 Subject: [PATCH 003/105] libraries/doltcore/doltdb: fix dropped test errors --- go/libraries/doltcore/doltdb/commit_hooks_test.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/go/libraries/doltcore/doltdb/commit_hooks_test.go b/go/libraries/doltcore/doltdb/commit_hooks_test.go index 2e2e41d8bb..1e1cfe5d42 100644 --- a/go/libraries/doltcore/doltdb/commit_hooks_test.go +++ b/go/libraries/doltcore/doltdb/commit_hooks_test.go @@ -23,6 +23,7 @@ import ( "github.com/dolthub/go-mysql-server/sql" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.uber.org/zap/buffer" "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" @@ -128,13 +129,17 @@ func TestPushOnWriteHook(t *testing.T) { t.Run("replicate to remote", func(t *testing.T) { srcCommit, err := ddb.Commit(context.Background(), valHash, ref.NewBranchRef(defaultBranch), meta) + require.NoError(t, err) + ds, err := ddb.db.GetDataset(ctx, "refs/heads/main") + require.NoError(t, err) + err = hook.Execute(ctx, ds, ddb.db) - assert.NoError(t, err) + require.NoError(t, err) cs, _ = NewCommitSpec(defaultBranch) destCommit, err := destDB.Resolve(context.Background(), cs, nil) - + require.NoError(t, err) srcHash, _ := srcCommit.HashOf() destHash, _ := destCommit.HashOf() assert.Equal(t, srcHash, destHash) @@ -252,12 +257,15 @@ func TestAsyncPushOnWrite(t *testing.T) { meta, err = NewCommitMeta(committerName, committerEmail, "Sample data") if err != nil { - t.Error("Failed to commit") + t.Error("Failed to create CommitMeta") } _, err = ddb.Commit(context.Background(), valHash, ref.NewBranchRef(defaultBranch), meta) + require.NoError(t, err) ds, err := ddb.db.GetDataset(ctx, "refs/heads/main") + require.NoError(t, err) err = hook.Execute(ctx, ds, ddb.db) + require.NoError(t, err) } }) } From bdd769960ff4cbc3fdcd9a033b4a3744f4f83c33 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Mon, 7 Feb 2022 16:03:58 -0800 Subject: [PATCH 004/105] Added new param to sql.RowIterToRows --- go/cmd/dolt/commands/sql.go | 6 +++--- go/libraries/doltcore/mvdata/engine_table_writer.go | 4 ++-- go/libraries/doltcore/sqle/enginetest/dolt_harness.go | 4 ++-- go/libraries/doltcore/sqle/index/mergeable_indexes_test.go | 4 ++-- go/libraries/doltcore/sqle/show_create_table.go | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/go/cmd/dolt/commands/sql.go b/go/cmd/dolt/commands/sql.go index d8713d13cb..6ec88738be 100644 --- a/go/cmd/dolt/commands/sql.go +++ b/go/cmd/dolt/commands/sql.go @@ -996,7 +996,7 @@ func processQuery(ctx *sql.Context, query string, se *engine.SqlEngine) (sql.Sch if err != nil { return nil, nil, err } - _, err = sql.RowIterToRows(ctx, ri) + _, err = sql.RowIterToRows(ctx, nil, ri) if err != nil { return nil, nil, err } @@ -1007,7 +1007,7 @@ func processQuery(ctx *sql.Context, query string, se *engine.SqlEngine) (sql.Sch if err != nil { return nil, nil, err } - _, err = sql.RowIterToRows(ctx, ri) + _, err = sql.RowIterToRows(ctx, nil, ri) if err != nil { return nil, nil, err } @@ -1017,7 +1017,7 @@ func processQuery(ctx *sql.Context, query string, se *engine.SqlEngine) (sql.Sch if err != nil { return nil, nil, err } - _, err = sql.RowIterToRows(ctx, ri) + _, err = sql.RowIterToRows(ctx, nil, ri) if err != nil { return nil, nil, err } diff --git a/go/libraries/doltcore/mvdata/engine_table_writer.go b/go/libraries/doltcore/mvdata/engine_table_writer.go index ec59624991..9e41fa758d 100644 --- a/go/libraries/doltcore/mvdata/engine_table_writer.go +++ b/go/libraries/doltcore/mvdata/engine_table_writer.go @@ -298,7 +298,7 @@ func (s *SqlEngineTableWriter) createTable() error { return err } - analyzedQueryProcess := analyzer.StripQueryProcess(analyzed.(*plan.QueryProcess)) + analyzedQueryProcess := analyzer.StripPassthroughNodes(analyzed.(*plan.QueryProcess)) ri, err := analyzedQueryProcess.RowIter(s.sqlCtx, nil) if err != nil { @@ -342,7 +342,7 @@ func (s *SqlEngineTableWriter) createInsertImportNode(source chan sql.Row, ignor return nil, err } - analyzed = analyzer.StripQueryProcess(analyzed) + analyzed = analyzer.StripPassthroughNodes(analyzed) // Get the first insert (wrapped with the error handler) plan.Inspect(analyzed, func(node sql.Node) bool { diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go index 84bbf73e46..6c10142aa3 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go @@ -283,7 +283,7 @@ func (d *DoltHarness) SnapshotTable(db sql.VersionedDatabase, name string, asOf _, iter, err := e.Query(ctx, "set @@"+dsess.HeadKey(db.Name())+" = COMMIT('-m', 'test commit');") require.NoError(d.t, err) - _, err = sql.RowIterToRows(ctx, iter) + _, err = sql.RowIterToRows(ctx, nil, iter) require.NoError(d.t, err) headHash, err := ctx.GetSessionVariable(ctx, dsess.HeadKey(db.Name())) @@ -298,7 +298,7 @@ func (d *DoltHarness) SnapshotTable(db sql.VersionedDatabase, name string, asOf _, iter, err = e.Query(ctx, query) require.NoError(d.t, err) - _, err = sql.RowIterToRows(ctx, iter) + _, err = sql.RowIterToRows(ctx, nil, iter) require.NoError(d.t, err) return nil diff --git a/go/libraries/doltcore/sqle/index/mergeable_indexes_test.go b/go/libraries/doltcore/sqle/index/mergeable_indexes_test.go index 5ee423a074..3473864a5b 100644 --- a/go/libraries/doltcore/sqle/index/mergeable_indexes_test.go +++ b/go/libraries/doltcore/sqle/index/mergeable_indexes_test.go @@ -1327,7 +1327,7 @@ func TestMergeableIndexes(t *testing.T) { _, iter, err := engine.Query(sqlCtx, query) require.NoError(t, err) - res, err := sql.RowIterToRows(sqlCtx, iter) + res, err := sql.RowIterToRows(sqlCtx, nil, iter) require.NoError(t, err) if assert.Equal(t, len(test.pks), len(res)) { @@ -1543,7 +1543,7 @@ func TestMergeableIndexesNulls(t *testing.T) { _, iter, err := engine.Query(sqlCtx, query) require.NoError(t, err) - res, err := sql.RowIterToRows(sqlCtx, iter) + res, err := sql.RowIterToRows(sqlCtx, nil, iter) require.NoError(t, err) if assert.Equal(t, len(test.pks), len(res)) { for i, pk := range test.pks { diff --git a/go/libraries/doltcore/sqle/show_create_table.go b/go/libraries/doltcore/sqle/show_create_table.go index 4cdc0492e0..fd1715efd7 100644 --- a/go/libraries/doltcore/sqle/show_create_table.go +++ b/go/libraries/doltcore/sqle/show_create_table.go @@ -46,7 +46,7 @@ func GetCreateTableStmt(ctx *sql.Context, engine *sqle.Engine, tableName string) if err != nil { return "", err } - rows, err := sql.RowIterToRows(ctx, rowIter) + rows, err := sql.RowIterToRows(ctx, nil, rowIter) if err != nil { return "", err } From a8516cf95cf0f07c5d39132f97875de13d2cfe64 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 8 Feb 2022 13:04:05 -0800 Subject: [PATCH 005/105] initial --- go/store/nbs/index_transformer.go | 97 +++++++++++++++++++++++++++++++ go/store/nbs/table.go | 1 + 2 files changed, 98 insertions(+) create mode 100644 go/store/nbs/index_transformer.go diff --git a/go/store/nbs/index_transformer.go b/go/store/nbs/index_transformer.go new file mode 100644 index 0000000000..10644bf20a --- /dev/null +++ b/go/store/nbs/index_transformer.go @@ -0,0 +1,97 @@ +package nbs + +import ( + "encoding/binary" + "errors" + "io" +) + +var ( + ErrNotEnoughBytesForLength = errors.New("could not read enough bytes to decode length") +) + +// Transforms a byte stream of table file index and instead of writing +// lengths in ordinal order, it writes offsets in ordinal order. +type IndexTransformer struct { + src io.Reader + lengthsIdx int64 // Start index of lengths in table file byte stream + suffixesIdx int64 // Start index of suffixes in table file byte stream + + buff []byte + idx int64 + offset uint64 +} + +// Create an IndexTransform given a src reader, chunkCount, and maximum size of read +func NewIndexTransformer(src io.Reader, chunkCount int, maxReadSize int) IndexTransformer { + tuplesSize := int64(chunkCount) * prefixTupleSize + lengthsSize := int64(chunkCount) * lengthSize + + maxNumOffsetsToRead := maxReadSize / offsetSize + buffSize := maxNumOffsetsToRead * lengthSize + + return IndexTransformer{ + src: src, + buff: make([]byte, buffSize), + lengthsIdx: tuplesSize, + suffixesIdx: tuplesSize + lengthsSize, + } +} + +func (itr IndexTransformer) Read(p []byte) (n int, err error) { + // If we will read outside of lengths, just read. + if itr.idx+int64(len(p)) < itr.lengthsIdx || itr.idx >= itr.suffixesIdx { + n, err = itr.src.Read(p) + itr.idx += int64(n) + return n, err + } + + // If we will read on the boundary between tuples and lengths, + // read up to the start of the lengths. + if itr.idx < itr.lengthsIdx { + b := p[:itr.lengthsIdx-itr.idx] + n, err := itr.src.Read(b) + itr.idx += int64(n) + return n, err + } + + if len(p) < offsetSize { + // ASK: Should this be a panic? + // If this case is true, 0 bytes will be read and no error will be + // returned which is undesirable behavior for io.Reader + + // We could return an error instead, but this feels like developer error + panic("len(p) must be at-least offsetSize") + } + + // Now we can assume we are on a length boundary. + + // Alter size of p so we don't read any suffix bytes + if int64(len(p)) > itr.idx-itr.suffixesIdx { + p = p[itr.idx-itr.suffixesIdx:] + } + + // Read as many lengths, as offsets we can fit into p. (Assuming lengthsSize < offsetSize) + + num := n / offsetSize + readSize := num * lengthSize + + b := p[readSize:] + n, err = itr.src.Read(b) + if err != nil { + return n, err + } + + // Copy lengths + copy(itr.buff, b) + + // Calculate offsets + for lStart, oStart := 0, 0; lStart < readSize; lStart, oStart = lStart+lengthSize, oStart+offsetSize { + lengthBytes := itr.buff[lStart : lStart+lengthSize] + length := binary.BigEndian.Uint32(lengthBytes) + itr.offset += uint64(length) + binary.BigEndian.PutUint64(p[oStart:oStart+offsetSize], itr.offset) + } + + return n, nil +} diff --git a/go/store/nbs/table.go b/go/store/nbs/table.go index c909889874..f3d195628f 100644 --- a/go/store/nbs/table.go +++ b/go/store/nbs/table.go @@ -130,6 +130,7 @@ const ( uint32Size = 4 ordinalSize = uint32Size lengthSize = uint32Size + offsetSize = uint64Size magicNumber = "\xff\xb5\xd8\xc2\x24\x63\xee\x50" magicNumberSize = 8 //len(magicNumber) footerSize = uint32Size + uint64Size + magicNumberSize From a6ba7a9057caf4595e39ab8910355b635edca041 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 8 Feb 2022 13:24:30 -0800 Subject: [PATCH 006/105] legibility --- go/store/nbs/index_transformer.go | 40 +++++++++++++++++-------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/go/store/nbs/index_transformer.go b/go/store/nbs/index_transformer.go index 10644bf20a..c12d8a02ed 100644 --- a/go/store/nbs/index_transformer.go +++ b/go/store/nbs/index_transformer.go @@ -7,11 +7,11 @@ import ( ) var ( - ErrNotEnoughBytesForLength = errors.New("could not read enough bytes to decode length") + ErrNotEnoughBytes = errors.New("reader did not return enough bytes") ) -// Transforms a byte stream of table file index and instead of writing -// lengths in ordinal order, it writes offsets in ordinal order. +// IndexTransformer transforms a table file index byte stream with lengths +// to a table file index byte stream with offsets type IndexTransformer struct { src io.Reader lengthsIdx int64 // Start index of lengths in table file byte stream @@ -38,20 +38,20 @@ func NewIndexTransformer(src io.Reader, chunkCount int, maxReadSize int) IndexTr } } -func (itr IndexTransformer) Read(p []byte) (n int, err error) { +func (tra *IndexTransformer) Read(p []byte) (n int, err error) { // If we will read outside of lengths, just read. - if itr.idx+int64(len(p)) < itr.lengthsIdx || itr.idx >= itr.suffixesIdx { - n, err = itr.src.Read(p) - itr.idx += int64(n) + if tra.idx+int64(len(p)) < tra.lengthsIdx || tra.idx >= tra.suffixesIdx { + n, err = tra.src.Read(p) + tra.idx += int64(n) return n, err } // If we will read on the boundary between tuples and lengths, // read up to the start of the lengths. - if itr.idx < itr.lengthsIdx { - b := p[:itr.lengthsIdx-itr.idx] - n, err := itr.src.Read(b) - itr.idx += int64(n) + if tra.idx < tra.lengthsIdx { + b := p[:tra.lengthsIdx-tra.idx] + n, err := tra.src.Read(b) + tra.idx += int64(n) return n, err } @@ -67,8 +67,8 @@ func (itr IndexTransformer) Read(p []byte) (n int, err error) { // Now we can assume we are on a length boundary. // Alter size of p so we don't read any suffix bytes - if int64(len(p)) > itr.idx-itr.suffixesIdx { - p = p[itr.idx-itr.suffixesIdx:] + if int64(len(p)) > tra.idx-tra.suffixesIdx { + p = p[tra.idx-tra.suffixesIdx:] } // Read as many lengths, as offsets we can fit into p. (Assuming lengthsSize < offsetSize) @@ -77,20 +77,24 @@ func (itr IndexTransformer) Read(p []byte) (n int, err error) { readSize := num * lengthSize b := p[readSize:] - n, err = itr.src.Read(b) + n, err = tra.src.Read(b) if err != nil { return n, err } + if n != readSize { + return n, ErrNotEnoughBytes + } + tra.idx += int64(n) // Copy lengths - copy(itr.buff, b) + copy(tra.buff, b) // Calculate offsets for lStart, oStart := 0, 0; lStart < readSize; lStart, oStart = lStart+lengthSize, oStart+offsetSize { - lengthBytes := itr.buff[lStart : lStart+lengthSize] + lengthBytes := tra.buff[lStart : lStart+lengthSize] length := binary.BigEndian.Uint32(lengthBytes) - itr.offset += uint64(length) - binary.BigEndian.PutUint64(p[oStart:oStart+offsetSize], itr.offset) + tra.offset += uint64(length) + binary.BigEndian.PutUint64(p[oStart:oStart+offsetSize], tra.offset) } return n, nil From 7eee59ec0eed398b8ce51c806803062f6e3b552d Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 8 Feb 2022 13:25:49 -0800 Subject: [PATCH 007/105] usability --- go/store/nbs/index_transformer.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/go/store/nbs/index_transformer.go b/go/store/nbs/index_transformer.go index c12d8a02ed..bb5378a88a 100644 --- a/go/store/nbs/index_transformer.go +++ b/go/store/nbs/index_transformer.go @@ -13,24 +13,24 @@ var ( // IndexTransformer transforms a table file index byte stream with lengths // to a table file index byte stream with offsets type IndexTransformer struct { - src io.Reader + src io.Reader + lengthsIdx int64 // Start index of lengths in table file byte stream suffixesIdx int64 // Start index of suffixes in table file byte stream - - buff []byte - idx int64 - offset uint64 + buff []byte + idx int64 + offset uint64 } // Create an IndexTransform given a src reader, chunkCount, and maximum size of read -func NewIndexTransformer(src io.Reader, chunkCount int, maxReadSize int) IndexTransformer { +func NewIndexTransformer(src io.Reader, chunkCount int, maxReadSize int) *IndexTransformer { tuplesSize := int64(chunkCount) * prefixTupleSize lengthsSize := int64(chunkCount) * lengthSize maxNumOffsetsToRead := maxReadSize / offsetSize buffSize := maxNumOffsetsToRead * lengthSize - return IndexTransformer{ + return &IndexTransformer{ src: src, buff: make([]byte, buffSize), lengthsIdx: tuplesSize, From 90945c0650afacebcfb151b8dfdfafce53bd4ba0 Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 8 Feb 2022 14:21:13 -0800 Subject: [PATCH 008/105] adding test for SIGINT --- integration-tests/bats/no-repo.bats | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/integration-tests/bats/no-repo.bats b/integration-tests/bats/no-repo.bats index bd6c01c79c..c558f94791 100755 --- a/integration-tests/bats/no-repo.bats +++ b/integration-tests/bats/no-repo.bats @@ -311,3 +311,12 @@ NOT_VALID_REPO_ERROR="The current directory is not a valid dolt repository." [[ "$output" =~ "__DOLT_1__" ]] [[ ! "$output" =~ "__LD_1__" ]] } + +@test "no-repo: dolt login exits when receiving SIGINT" { + dolt login & # run this in the background + sleep 1 # Wait a sec + kill -SIGINT $(ps aux | grep 'dolt' | awk {'print $1'}) # This looks for process dolt and sends it a SIGINT (CTRL + C) + sleep 1 # Wait another sec + run grep -q 'dolt' <(ps aux) + [ "$output" == "" ] +} From 25509fe501df3025979cda353e988c44ddf8247d Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 8 Feb 2022 14:21:44 -0800 Subject: [PATCH 009/105] adding login fix --- go/cmd/dolt/commands/login.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/go/cmd/dolt/commands/login.go b/go/cmd/dolt/commands/login.go index c666368165..c388c5ef9c 100644 --- a/go/cmd/dolt/commands/login.go +++ b/go/cmd/dolt/commands/login.go @@ -17,11 +17,10 @@ package commands import ( "context" "fmt" - "io" - "time" - "github.com/skratchdot/open-golang/open" "google.golang.org/grpc" + "io" + "time" "github.com/dolthub/dolt/go/cmd/dolt/cli" "github.com/dolthub/dolt/go/cmd/dolt/errhand" @@ -45,8 +44,15 @@ var loginDocs = cli.CommandDocumentationContent{ Synopsis: []string{"[{{.LessThan}}creds{{.GreaterThan}}]"}, } +// The LoginCmd doesn't handle its own signals, but should stop cancel global context when receiving SIGINT signal +func (cmd LoginCmd) InstallsSignalHandlers() bool { + return true +} + type LoginCmd struct{} +var _ cli.SignalCommand = SqlCmd{} + // Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command func (cmd LoginCmd) Name() string { return "login" @@ -174,6 +180,7 @@ func loginWithCreds(ctx context.Context, dEnv *env.DoltEnv, dc creds.DoltCreds, linePrinter("requesting update") whoAmI, err = grpcClient.WhoAmI(ctx, &remotesapi.WhoAmIRequest{}) if err != nil { + // TODO: if you get cancelled status code stop retrying for i := 0; i < loginRetryInterval; i++ { linePrinter(fmt.Sprintf("Retrying in %d", loginRetryInterval-i)) time.Sleep(time.Second) From 3b709012e9bed4caf7e84d866c389cf8dc32a8ab Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 8 Feb 2022 14:37:11 -0800 Subject: [PATCH 010/105] removing feature flag --- .../doltcore/schema/typeinfo/typeinfo.go | 40 ++++--------------- .../doltcore/schema/typeinfo/typeinfo_test.go | 17 ++------ 2 files changed, 10 insertions(+), 47 deletions(-) diff --git a/go/libraries/doltcore/schema/typeinfo/typeinfo.go b/go/libraries/doltcore/schema/typeinfo/typeinfo.go index ec3b183040..dd0754d5b2 100644 --- a/go/libraries/doltcore/schema/typeinfo/typeinfo.go +++ b/go/libraries/doltcore/schema/typeinfo/typeinfo.go @@ -17,34 +17,13 @@ package typeinfo import ( "context" "fmt" - "math" - "os" - "sync" - "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/vitess/go/sqltypes" + "math" "github.com/dolthub/dolt/go/store/types" ) -const spatialTypesFeatureFlagKey = "DOLT_ENABLE_SPATIAL_TYPES" - -// use SpatialTypesEnabled() to check, don't access directly -var spatialTypesFeatureFlag = false - -func init() { - // set the spatial types feature flag to true if the env var is set - if v, ok := os.LookupEnv(spatialTypesFeatureFlagKey); ok && v != "" { - spatialTypesFeatureFlag = true - } -} - -var spatialTypesLock = &sync.RWMutex{} - -func SpatialTypesEnabled() bool { - return spatialTypesFeatureFlag -} - type Identifier string const ( @@ -266,17 +245,6 @@ func FromSqlType(sqlType sql.Type) (TypeInfo, error) { // FromTypeParams constructs a TypeInfo from the given identifier and parameters. func FromTypeParams(id Identifier, params map[string]string) (TypeInfo, error) { - if SpatialTypesEnabled() { - switch id { - case PointTypeIdentifier: - return PointType, nil - case LinestringTypeIdentifier: - return LinestringType, nil - case PolygonTypeIdentifier: - return PolygonType, nil - } - } - switch id { case BitTypeIdentifier: return CreateBitTypeFromParams(params) @@ -298,6 +266,12 @@ func FromTypeParams(id Identifier, params map[string]string) (TypeInfo, error) { return CreateIntTypeFromParams(params) case JSONTypeIdentifier: return JSONType, nil + case PointTypeIdentifier: + return PointType, nil + case LinestringTypeIdentifier: + return LinestringType, nil + case PolygonTypeIdentifier: + return PolygonType, nil case SetTypeIdentifier: return CreateSetTypeFromParams(params) case TimeTypeIdentifier: diff --git a/go/libraries/doltcore/schema/typeinfo/typeinfo_test.go b/go/libraries/doltcore/schema/typeinfo/typeinfo_test.go index 702e3385b7..1a7339baef 100644 --- a/go/libraries/doltcore/schema/typeinfo/typeinfo_test.go +++ b/go/libraries/doltcore/schema/typeinfo/typeinfo_test.go @@ -29,15 +29,6 @@ import ( "github.com/dolthub/dolt/go/store/types" ) -func testWithSpatialTypesEnabled(cb func()) { - spatialTypesLock.Lock() - defer spatialTypesLock.Unlock() - - spatialTypesFeatureFlag = true - cb() - spatialTypesFeatureFlag = false -} - func TestTypeInfoSuite(t *testing.T) { typeInfoArrays, validTypeValues := generateTypeInfoArrays(t) t.Run("VerifyArray", func(t *testing.T) { @@ -234,11 +225,9 @@ func testTypeInfoGetTypeParams(t *testing.T, tiArrays [][]TypeInfo) { ti.GetTypeIdentifier() == LinestringTypeIdentifier || ti.GetTypeIdentifier() == PolygonTypeIdentifier { t.Run(ti.String(), func(t *testing.T) { - testWithSpatialTypesEnabled(func() { - newTi, err := FromTypeParams(ti.GetTypeIdentifier(), ti.GetTypeParams()) - require.NoError(t, err) - require.True(t, ti.Equals(newTi), "%v\n%v", ti.String(), newTi.String()) - }) + newTi, err := FromTypeParams(ti.GetTypeIdentifier(), ti.GetTypeParams()) + require.NoError(t, err) + require.True(t, ti.Equals(newTi), "%v\n%v", ti.String(), newTi.String()) }) } else { t.Run(ti.String(), func(t *testing.T) { From 68ad3e8a2925b3d1b7fbba8ae0e98a1e9cbba63e Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Mon, 7 Feb 2022 15:52:10 -0800 Subject: [PATCH 011/105] merge stub --- go/store/prolly/tree_merge.go | 13 +++++++++++++ go/store/prolly/tree_merge_test.go | 13 +++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 go/store/prolly/tree_merge.go create mode 100644 go/store/prolly/tree_merge_test.go diff --git a/go/store/prolly/tree_merge.go b/go/store/prolly/tree_merge.go new file mode 100644 index 0000000000..d4aa764543 --- /dev/null +++ b/go/store/prolly/tree_merge.go @@ -0,0 +1,13 @@ +// Copyright 2021 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. \ No newline at end of file diff --git a/go/store/prolly/tree_merge_test.go b/go/store/prolly/tree_merge_test.go new file mode 100644 index 0000000000..d4aa764543 --- /dev/null +++ b/go/store/prolly/tree_merge_test.go @@ -0,0 +1,13 @@ +// Copyright 2021 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. \ No newline at end of file From 427d22fb47323b000ff5db19a9176cfcd72ae050 Mon Sep 17 00:00:00 2001 From: JCOR11599 Date: Tue, 8 Feb 2022 23:02:21 +0000 Subject: [PATCH 012/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/cmd/dolt/commands/login.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/go/cmd/dolt/commands/login.go b/go/cmd/dolt/commands/login.go index c388c5ef9c..c0022e454e 100644 --- a/go/cmd/dolt/commands/login.go +++ b/go/cmd/dolt/commands/login.go @@ -17,11 +17,12 @@ package commands import ( "context" "fmt" - "github.com/skratchdot/open-golang/open" - "google.golang.org/grpc" "io" "time" + "github.com/skratchdot/open-golang/open" + "google.golang.org/grpc" + "github.com/dolthub/dolt/go/cmd/dolt/cli" "github.com/dolthub/dolt/go/cmd/dolt/errhand" eventsapi "github.com/dolthub/dolt/go/gen/proto/dolt/services/eventsapi/v1alpha1" From 7c44706dcb49ecc2aa6648ecd54fddf3c6fb7c48 Mon Sep 17 00:00:00 2001 From: JCOR11599 Date: Tue, 8 Feb 2022 23:02:35 +0000 Subject: [PATCH 013/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/libraries/doltcore/schema/typeinfo/typeinfo.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/go/libraries/doltcore/schema/typeinfo/typeinfo.go b/go/libraries/doltcore/schema/typeinfo/typeinfo.go index dd0754d5b2..d3f303bdaa 100644 --- a/go/libraries/doltcore/schema/typeinfo/typeinfo.go +++ b/go/libraries/doltcore/schema/typeinfo/typeinfo.go @@ -17,9 +17,10 @@ package typeinfo import ( "context" "fmt" + "math" + "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/vitess/go/sqltypes" - "math" "github.com/dolthub/dolt/go/store/types" ) From 78bf74a7447a7933486492ffd440cb7900705e9f Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 8 Feb 2022 15:11:59 -0800 Subject: [PATCH 014/105] sigint already cancels request --- go/cmd/dolt/commands/login.go | 1 - 1 file changed, 1 deletion(-) diff --git a/go/cmd/dolt/commands/login.go b/go/cmd/dolt/commands/login.go index c388c5ef9c..566f712497 100644 --- a/go/cmd/dolt/commands/login.go +++ b/go/cmd/dolt/commands/login.go @@ -180,7 +180,6 @@ func loginWithCreds(ctx context.Context, dEnv *env.DoltEnv, dc creds.DoltCreds, linePrinter("requesting update") whoAmI, err = grpcClient.WhoAmI(ctx, &remotesapi.WhoAmIRequest{}) if err != nil { - // TODO: if you get cancelled status code stop retrying for i := 0; i < loginRetryInterval; i++ { linePrinter(fmt.Sprintf("Retrying in %d", loginRetryInterval-i)) time.Sleep(time.Second) From 78ab69a1afc59c8f7b790d1cff1ba80da5f81382 Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 8 Feb 2022 16:10:15 -0800 Subject: [PATCH 015/105] removing flag from bats test --- integration-tests/bats/sql-spatial-types.bats | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/integration-tests/bats/sql-spatial-types.bats b/integration-tests/bats/sql-spatial-types.bats index 7139a5840c..df336c0c48 100644 --- a/integration-tests/bats/sql-spatial-types.bats +++ b/integration-tests/bats/sql-spatial-types.bats @@ -16,26 +16,26 @@ teardown() { [[ "$output" =~ "cannot be made" ]] || false } -@test "sql-spatial-types: can make spatial types with flag" { - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "create table point_tbl (p point)" +@test "sql-spatial-types: can make spatial types" { + run dolt sql -q "create table point_tbl (p point)" [ "$status" -eq 0 ] [ "$output" = "" ] || false } @test "sql-spatial-types: create point table and insert points" { - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "create table point_tbl (p point)" + run dolt sql -q "create table point_tbl (p point)" [ "$status" -eq 0 ] [ "$output" = "" ] || false - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "insert into point_tbl () values (point(1,2))" + run dolt sql -q "insert into point_tbl () values (point(1,2))" [ "$status" -eq 0 ] [[ "$output" =~ "Query OK" ]] || false - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "insert into point_tbl () values (point(3,4)), (point(5,6))" + run dolt sql -q "insert into point_tbl () values (point(3,4)), (point(5,6))" [ "$status" -eq 0 ] [[ "$output" =~ "Query OK" ]] || false - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "insert into point_tbl () values (point(123.456, 0.789))" + run dolt sql -q "insert into point_tbl () values (point(123.456, 0.789))" [ "$status" -eq 0 ] [[ "$output" =~ "Query OK" ]] || false - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "select st_aswkt(p) from point_tbl" + run dolt sql -q "select st_aswkt(p) from point_tbl" [ "$status" -eq 0 ] [[ "$output" =~ "POINT(1 2)" ]] || false [[ "$output" =~ "POINT(3 4)" ]] || false @@ -44,16 +44,16 @@ teardown() { } @test "sql-spatial-types: create linestring table and insert linestrings" { - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "create table line_tbl (l linestring)" + run dolt sql -q "create table line_tbl (l linestring)" [ "$status" -eq 0 ] [ "$output" = "" ] || false - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "insert into line_tbl () values (linestring(point(1,2),point(3,4)))" + run dolt sql -q "insert into line_tbl () values (linestring(point(1,2),point(3,4)))" [ "$status" -eq 0 ] [[ "$output" =~ "Query OK" ]] || false - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "insert into line_tbl () values (linestring(point(1.2345, 678.9), point(111.222, 333.444), point(55.66, 77.88))), (linestring(point(1.1, 2.2),point(3.3, 4)))" + run dolt sql -q "insert into line_tbl () values (linestring(point(1.2345, 678.9), point(111.222, 333.444), point(55.66, 77.88))), (linestring(point(1.1, 2.2),point(3.3, 4)))" [ "$status" -eq 0 ] [[ "$output" =~ "Query OK" ]] || false - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "select st_aswkt(l) from line_tbl" + run dolt sql -q "select st_aswkt(l) from line_tbl" [ "$status" -eq 0 ] [[ "$output" =~ "LINESTRING(1 2,3 4)" ]] || false [[ "$output" =~ "LINESTRING(1.2345 678.9,111.222 333.444,55.66 77.88)" ]] || false @@ -61,19 +61,19 @@ teardown() { } @test "sql-spatial-types: create polygon table and insert polygon" { - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "create table poly_tbl (p polygon)" + run dolt sql -q "create table poly_tbl (p polygon)" [ "$status" -eq 0 ] [ "$output" = "" ] || false - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "insert into poly_tbl () values (polygon(linestring(point(1,2),point(3,4),point(5,6),point(7,8))))" + run dolt sql -q "insert into poly_tbl () values (polygon(linestring(point(1,2),point(3,4),point(5,6),point(7,8))))" [ "$status" -eq 1 ] [[ "$output" =~ "Invalid GIS data" ]] || false - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "insert into poly_tbl () values (polygon(linestring(point(1,2),point(3,4),point(5,6),point(1,2))))" + run dolt sql -q "insert into poly_tbl () values (polygon(linestring(point(1,2),point(3,4),point(5,6),point(1,2))))" [ "$status" -eq 0 ] [[ "$output" =~ "Query OK" ]] || false - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "insert into poly_tbl () values (polygon(linestring(point(1,1),point(2,2),point(3,3),point(1,1)))), (polygon(linestring(point(0.123,0.456),point(1.22,1.33),point(1.11,0.99),point(0.123,0.456))))" + run dolt sql -q "insert into poly_tbl () values (polygon(linestring(point(1,1),point(2,2),point(3,3),point(1,1)))), (polygon(linestring(point(0.123,0.456),point(1.22,1.33),point(1.11,0.99),point(0.123,0.456))))" [ "$status" -eq 0 ] [[ "$output" =~ "Query OK" ]] || false - DOLT_ENABLE_SPATIAL_TYPES=true run dolt sql -q "select st_aswkt(p) from poly_tbl" + run dolt sql -q "select st_aswkt(p) from poly_tbl" [ "$status" -eq 0 ] [[ "$output" =~ "POLYGON((1 2,3 4,5 6,1 2))" ]] || false [[ "$output" =~ "POLYGON((1 1,2 2,3 3,1 1))" ]] || false From 666107bc74fbb404c62f91505ef6ca3846f194bd Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Tue, 8 Feb 2022 16:37:45 -0800 Subject: [PATCH 016/105] copied 3way merge code from andy/storage-hack, fixed test call --- go/store/prolly/memory_map.go | 6 +- go/store/prolly/mutable_map_write_test.go | 4 +- go/store/prolly/mutator.go | 17 +- go/store/prolly/node.go | 2 +- go/store/prolly/tree_merge.go | 218 +++++++++++++++++++++- go/store/prolly/tree_merge_test.go | 76 +++++++- go/store/prolly/utils_test.go | 2 +- 7 files changed, 304 insertions(+), 21 deletions(-) diff --git a/go/store/prolly/memory_map.go b/go/store/prolly/memory_map.go index 97ac7dd4eb..92aaa21366 100644 --- a/go/store/prolly/memory_map.go +++ b/go/store/prolly/memory_map.go @@ -171,7 +171,7 @@ func (it *memRangeIter) iterate(context.Context) (err error) { } } -func (it *memRangeIter) nextMutation() (key, value val.Tuple) { +func (it *memRangeIter) nextMutation(context.Context) (key, value val.Tuple) { key, value = it.iter.Current() if key == nil { return @@ -180,10 +180,6 @@ func (it *memRangeIter) nextMutation() (key, value val.Tuple) { return } -func (it *memRangeIter) count() int { - return it.iter.Count() -} - func (it *memRangeIter) close() error { return nil } diff --git a/go/store/prolly/mutable_map_write_test.go b/go/store/prolly/mutable_map_write_test.go index e9a77e0c0f..72377bfabb 100644 --- a/go/store/prolly/mutable_map_write_test.go +++ b/go/store/prolly/mutable_map_write_test.go @@ -476,10 +476,10 @@ func materializeMap(t *testing.T, mut MutableMap) Map { // ensure edits are provided in order iter := mut.overlay.mutations() - prev, _ := iter.nextMutation() + prev, _ := iter.nextMutation(ctx) require.NotNil(t, prev) for { - next, _ := iter.nextMutation() + next, _ := iter.nextMutation(ctx) if next == nil { break } diff --git a/go/store/prolly/mutator.go b/go/store/prolly/mutator.go index e695150ced..0d2b281086 100644 --- a/go/store/prolly/mutator.go +++ b/go/store/prolly/mutator.go @@ -21,21 +21,18 @@ import ( ) type mutationIter interface { - nextMutation() (key, val val.Tuple) - count() int + nextMutation(ctx context.Context) (key, value val.Tuple) close() error } var _ mutationIter = &memRangeIter{} func materializeMutations(ctx context.Context, m Map, edits mutationIter) (Map, error) { - var err error - if edits.count() == 0 { - return m, err + newKey, newValue := edits.nextMutation(ctx) + if newKey == nil { + return m, nil // no mutations } - newKey, newValue := edits.nextMutation() - cur, err := newCursorAtItem(ctx, m.ns, m.root, nodeItem(newKey), m.searchNode) if err != nil { return m, err @@ -65,11 +62,11 @@ func materializeMutations(ctx context.Context, m Map, edits mutationIter) (Map, } if oldValue == nil && newValue == nil { - newKey, newValue = edits.nextMutation() + newKey, newValue = edits.nextMutation(ctx) continue // already non-present } if oldValue != nil && compareValues(m, newValue, oldValue) == 0 { - newKey, newValue = edits.nextMutation() + newKey, newValue = edits.nextMutation(ctx) continue // same newValue } @@ -94,7 +91,7 @@ func materializeMutations(ctx context.Context, m Map, edits mutationIter) (Map, } } - newKey, newValue = edits.nextMutation() + newKey, newValue = edits.nextMutation(ctx) } m.root, err = chunker.Done(ctx) diff --git a/go/store/prolly/node.go b/go/store/prolly/node.go index 166a26925f..69725e89fa 100644 --- a/go/store/prolly/node.go +++ b/go/store/prolly/node.go @@ -79,7 +79,7 @@ func makeMapNode(pool pool.BuffPool, level uint64, keys, values []nodeItem) (nod serial.TupleMapAddKeyFormat(b, serial.TupleFormatV1) serial.TupleMapAddValueFormat(b, serial.TupleFormatV1) serial.TupleMapAddTreeLevel(b, byte(level)) - // todo(andy): tree count + // todo(andy): tree empty b.Finish(serial.TupleMapEnd(b)) return mapNodeFromBytes(b.FinishedBytes()) diff --git a/go/store/prolly/tree_merge.go b/go/store/prolly/tree_merge.go index d4aa764543..7aaf781997 100644 --- a/go/store/prolly/tree_merge.go +++ b/go/store/prolly/tree_merge.go @@ -10,4 +10,220 @@ // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and -// limitations under the License. \ No newline at end of file +// limitations under the License. + +package prolly + +import ( + "bytes" + "context" + "io" + + "golang.org/x/sync/errgroup" + + "github.com/dolthub/dolt/go/store/val" +) + +const patchBufferSize = 1024 + +// TupleMergeFn is a callback that handles 3-way merging of tuples. +// A typical implementation will attempt a cell-wise merge of the tuples, +// or register a conflict if a such a merge is not possible. +type TupleMergeFn func(left, right Diff) (Diff, bool) + +// ThreeWayMerge implements a three-way merge algorithm using |base| as the common ancestor, |right| as +// the source branch, and |left| as the destination branch. Both |left| and |right| are diff'd against +// |base| to compute merge patches, but rather than applying both sets of patches to |base|, patches from +// |right| are applied directly to |left|. This reduces the amount of write work and improves performance. +// In the case that a key-value pair was modified on both |left| and |right| with different resulting +// values, the TupleMergeFn is called to perform a cell-wise merge, or to throw a conflict. +func ThreeWayMerge(ctx context.Context, left, right, base Map, cb TupleMergeFn) (final Map, err error) { + ld, err := treeDifferFromMaps(ctx, base, left) + if err != nil { + return Map{}, err + } + + rd, err := treeDifferFromMaps(ctx, base, right) + if err != nil { + return Map{}, err + } + + eg, ctx := errgroup.WithContext(ctx) + buf := newPatchBuffer(patchBufferSize) + + // iterate |ld| and |rd| in parallel, populating |buf| + eg.Go(func() (err error) { + defer func() { + if cerr := buf.close(); err != nil { + err = cerr + } + }() + err = sendPatches(ctx, ld, rd, buf, cb) + return + }) + + // consume patches from |buf| and apply them to |left| + eg.Go(func() error { + final, err = materializeMutations(ctx, left, buf) + return err + }) + + if err = eg.Wait(); err != nil { + return Map{}, err + } + + return final, nil +} + +// patchBuffer implements mutationIter. It consumes Diffs +// from the parallel treeDiffers and transforms them into +// patches for the treeChunker to apply. +type patchBuffer struct { + buf chan patch +} + +var _ mutationIter = patchBuffer{} + +type patch [2]val.Tuple + +func newPatchBuffer(sz int) patchBuffer { + return patchBuffer{buf: make(chan patch, sz)} +} + +func (ps patchBuffer) sendPatch(ctx context.Context, diff Diff) error { + p := patch{diff.Key, diff.To} + select { + case <-ctx.Done(): + return ctx.Err() + case ps.buf <- p: + return nil + } +} + +// nextMutation implements mutationIter. +func (ps patchBuffer) nextMutation(ctx context.Context) (key, value val.Tuple) { + var p patch + select { + case p = <-ps.buf: + return p[0], p[1] + case <-ctx.Done(): + return nil, nil + } +} + +func (ps patchBuffer) close() error { + close(ps.buf) + return nil +} + +func sendPatches(ctx context.Context, l, r treeDiffer, buf patchBuffer, cb TupleMergeFn) (err error) { + var ( + left, right Diff + lok, rok = true, true + ) + + left, err = l.Next(ctx) + if err == io.EOF { + err, lok = nil, false + } + if err != nil { + return err + } + + right, err = r.Next(ctx) + if err == io.EOF { + err, rok = nil, false + } + if err != nil { + return err + } + + for lok && rok { + cmp := compareDiffKeys(left, right, l.cmp) + + switch { + case cmp < 0: + // already in left + left, err = l.Next(ctx) + if err == io.EOF { + err, lok = nil, false + } + if err != nil { + return err + } + + case cmp > 0: + err = buf.sendPatch(ctx, right) + if err != nil { + return err + } + + right, err = r.Next(ctx) + if err == io.EOF { + err, rok = nil, false + } + if err != nil { + return err + } + + case cmp == 0: + if !equalDiffVals(left, right) { + resolved, ok := cb(left, right) + if ok { + err = buf.sendPatch(ctx, resolved) + } + if err != nil { + return err + } + } + + left, err = l.Next(ctx) + if err == io.EOF { + err, lok = nil, false + } + if err != nil { + return err + } + + right, err = r.Next(ctx) + if err == io.EOF { + err, rok = nil, false + } + if err != nil { + return err + } + } + } + + for lok { + // already in left + break + } + + for rok { + err = buf.sendPatch(ctx, right) + if err != nil { + return err + } + + right, err = r.Next(ctx) + if err == io.EOF { + err, rok = nil, false + } + if err != nil { + return err + } + } + + return nil +} + +func compareDiffKeys(left, right Diff, cmp compareFn) int { + return cmp(nodeItem(left.Key), nodeItem(right.Key)) +} + +func equalDiffVals(left, right Diff) bool { + // todo(andy): bytes must be comparable + ok := left.Type == right.Type + return ok && bytes.Equal(left.To, right.To) +} diff --git a/go/store/prolly/tree_merge_test.go b/go/store/prolly/tree_merge_test.go index d4aa764543..ce04b6abff 100644 --- a/go/store/prolly/tree_merge_test.go +++ b/go/store/prolly/tree_merge_test.go @@ -10,4 +10,78 @@ // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and -// limitations under the License. \ No newline at end of file +// limitations under the License. + +package prolly + +import ( + "context" + "fmt" + "io" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/store/val" +) + +func Test3WayMapMerge(t *testing.T) { + scales := []int{ + 10, + 100, + 1000, + 10000, + } + + for _, s := range scales { + name := fmt.Sprintf("test proCur map at scale %d", s) + t.Run(name, func(t *testing.T) { + prollyMap, tuples := makeProllyMap(t, s) + + t.Run("merge identical maps", func(t *testing.T) { + testEqualMapMerge(t, prollyMap.(Map)) + }) + t.Run("3way merge inserts", func(t *testing.T) { + for k := 0; k < 10; k++ { + testMapMergeInserts(t, prollyMap.(Map), tuples, s/10) + } + }) + }) + } +} + +func testEqualMapMerge(t *testing.T, m Map) { + ctx := context.Background() + mm, err := ThreeWayMerge(ctx, m, m, m, panicOnConflict) + require.NoError(t, err) + assert.NotNil(t, mm) + //assert.Equal(t, m.Count(), mm.Count()) +} + +func testMapMergeInserts(t *testing.T, final Map, tups [][2]val.Tuple, sz int) { + testRand.Shuffle(len(tups), func(i, j int) { + tups[i], tups[j] = tups[j], tups[i] + }) + + left := makeMapWithDeletes(t, final, tups[:sz]...) + right := makeMapWithDeletes(t, final, tups[sz:sz*2]...) + base := makeMapWithDeletes(t, final, tups[:sz*2]...) + + ctx := context.Background() + final2, err := ThreeWayMerge(ctx, left, right, base, panicOnConflict) + require.NoError(t, err) + assert.Equal(t, final.HashOf(), final2.HashOf()) + + cnt := 0 + err = DiffMaps(ctx, final, final2, func(ctx context.Context, diff Diff) error { + cnt++ + return nil + }) + require.Error(t, io.EOF, err) + assert.Equal(t, 0, cnt) +} + +func panicOnConflict(left, right Diff) (Diff, bool) { + panic("cannot merge cells") +} diff --git a/go/store/prolly/utils_test.go b/go/store/prolly/utils_test.go index f942214e1c..7633720c13 100644 --- a/go/store/prolly/utils_test.go +++ b/go/store/prolly/utils_test.go @@ -93,7 +93,7 @@ func randomTuplePairs(count int, keyDesc, valDesc val.TupleDesc) (items [][2]val func randomCompositeTuplePairs(count int, keyDesc, valDesc val.TupleDesc) (items [][2]val.Tuple) { // preconditions if count%5 != 0 { - panic("expected count divisible by 5") + panic("expected empty divisible by 5") } if len(keyDesc.Types) < 2 { panic("expected composite key") From d4ada1e73a5e00912d7d60e29275606d89c9f504 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 8 Feb 2022 16:58:53 -0800 Subject: [PATCH 017/105] use MultiReader and LimitReaders, use second half of p for scratch work --- go/store/nbs/index_transformer.go | 120 +++++++++----------- go/store/nbs/index_transformer_test.go | 145 +++++++++++++++++++++++++ 2 files changed, 197 insertions(+), 68 deletions(-) create mode 100644 go/store/nbs/index_transformer_test.go diff --git a/go/store/nbs/index_transformer.go b/go/store/nbs/index_transformer.go index bb5378a88a..c0b44764be 100644 --- a/go/store/nbs/index_transformer.go +++ b/go/store/nbs/index_transformer.go @@ -10,92 +10,76 @@ var ( ErrNotEnoughBytes = errors.New("reader did not return enough bytes") ) -// IndexTransformer transforms a table file index byte stream with lengths -// to a table file index byte stream with offsets -type IndexTransformer struct { - src io.Reader +func NewIndexTransformer(src io.Reader, chunkCount int) io.Reader { + tuplesSize := chunkCount * prefixTupleSize + lengthsSize := chunkCount * lengthSize + suffixesSize := chunkCount * addrSuffixSize - lengthsIdx int64 // Start index of lengths in table file byte stream - suffixesIdx int64 // Start index of suffixes in table file byte stream - buff []byte - idx int64 - offset uint64 + tupleReader := io.LimitReader(src, int64(tuplesSize)) + lengthsReader := io.LimitReader(src, int64(lengthsSize)) + suffixesReader := io.LimitReader(src, int64(suffixesSize)) + + return io.MultiReader( + tupleReader, + NewOffsetsReader(lengthsReader), + suffixesReader, + ) } -// Create an IndexTransform given a src reader, chunkCount, and maximum size of read -func NewIndexTransformer(src io.Reader, chunkCount int, maxReadSize int) *IndexTransformer { - tuplesSize := int64(chunkCount) * prefixTupleSize - lengthsSize := int64(chunkCount) * lengthSize +// OffsetsReader transforms a byte stream of table file lengths +// into a byte stream of table file offsets +type OffsetsReader struct { + lengthsReader io.Reader + offset uint64 +} - maxNumOffsetsToRead := maxReadSize / offsetSize - buffSize := maxNumOffsetsToRead * lengthSize - - return &IndexTransformer{ - src: src, - buff: make([]byte, buffSize), - lengthsIdx: tuplesSize, - suffixesIdx: tuplesSize + lengthsSize, +func NewOffsetsReader(lengthsReader io.Reader) *OffsetsReader { + return &OffsetsReader{ + lengthsReader: lengthsReader, } } -func (tra *IndexTransformer) Read(p []byte) (n int, err error) { - // If we will read outside of lengths, just read. - if tra.idx+int64(len(p)) < tra.lengthsIdx || tra.idx >= tra.suffixesIdx { - n, err = tra.src.Read(p) - tra.idx += int64(n) - return n, err - } +func (tra *OffsetsReader) Read(p []byte) (n int, err error) { + // if len(p) < offsetSize { + // // ASK: Should this be a panic? + // // If this case is true, 0 bytes will be read and no error will be + // // returned which is undesirable behavior for io.Reader - // If we will read on the boundary between tuples and lengths, - // read up to the start of the lengths. - if tra.idx < tra.lengthsIdx { - b := p[:tra.lengthsIdx-tra.idx] - n, err := tra.src.Read(b) - tra.idx += int64(n) - return n, err - } + // // We could return an error instead, but this feels like developer error + // panic("len(p) must be at-least offsetSize") + // } - if len(p) < offsetSize { - // ASK: Should this be a panic? - // If this case is true, 0 bytes will be read and no error will be - // returned which is undesirable behavior for io.Reader + // Read as many lengths, as offsets we can fit into p. Which is half. + // Below assumes that lengthSize * 2 = offsetSize - // We could return an error instead, but this feels like developer error - panic("len(p) must be at-least offsetSize") - } + // Strategy is to first read lengths into the second half of p + // Then, while iterating the lengths, compute the current offset, + // and write it to the beginning of p. - // Now we can assume we are on a length boundary. + // Align p + rem := len(p) % offsetSize + p = p[:len(p)-rem] - // Alter size of p so we don't read any suffix bytes - if int64(len(p)) > tra.idx-tra.suffixesIdx { - p = p[tra.idx-tra.suffixesIdx:] - } - - // Read as many lengths, as offsets we can fit into p. (Assuming lengthsSize < offsetSize) - - num := n / offsetSize - readSize := num * lengthSize - - b := p[readSize:] - n, err = tra.src.Read(b) + // Read lengths into second half of p + secondHalf := p[len(p)/2:] + n, err = tra.lengthsReader.Read(secondHalf) if err != nil { - return n, err + return 0, err } - if n != readSize { - return n, ErrNotEnoughBytes + if n%lengthSize != 0 { + return 0, ErrNotEnoughBytes } - tra.idx += int64(n) - // Copy lengths - copy(tra.buff, b) - - // Calculate offsets - for lStart, oStart := 0, 0; lStart < readSize; lStart, oStart = lStart+lengthSize, oStart+offsetSize { - lengthBytes := tra.buff[lStart : lStart+lengthSize] + // Iterate lengths in second half of p while writing offsets starting from the beginning. + // On the last iteration, we overwrite the last length with the final offset. + for l, r := 0, 0; r < n; l, r = l+offsetSize, r+lengthSize { + lengthBytes := secondHalf[r : r+lengthSize] length := binary.BigEndian.Uint32(lengthBytes) tra.offset += uint64(length) - binary.BigEndian.PutUint64(p[oStart:oStart+offsetSize], tra.offset) + + offsetBytes := p[l : l+offsetSize] + binary.BigEndian.PutUint64(offsetBytes, tra.offset) } - return n, nil + return n * 2, nil } diff --git a/go/store/nbs/index_transformer_test.go b/go/store/nbs/index_transformer_test.go new file mode 100644 index 0000000000..c171ec513d --- /dev/null +++ b/go/store/nbs/index_transformer_test.go @@ -0,0 +1,145 @@ +package nbs + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "math/rand" + "testing" + + "github.com/dolthub/dolt/go/libraries/utils/test" + "github.com/stretchr/testify/require" +) + +// minByteReader is a copy of smallerByteReader from testing/iotest +// but with a minimum read size of min bytes. + +type minByteReader struct { + r io.Reader + min int + + n int + off int +} + +func (r *minByteReader) Read(p []byte) (int, error) { + if len(p) == 0 { + return 0, nil + } + + r.n = r.min + rand.Intn(r.min*100) + + n := r.n + if n > len(p) { + n = len(p) + } + n, err := r.r.Read(p[0:n]) + if err != nil && err != io.EOF { + err = fmt.Errorf("Read(%d bytes at offset %d): %v", n, r.off, err) + } + r.off += n + return n, err +} + +// Altered from testing/iotest.TestReader to use alignedByteReader +func testReader(r io.Reader, content []byte) error { + if len(content) > 0 { + n, err := r.Read(nil) + if n != 0 || err != nil { + return fmt.Errorf("Read(0) = %d, %v, want 0, nil", n, err) + } + } + + data, err := io.ReadAll(&minByteReader{r: r, min: offsetSize}) + if err != nil { + return err + } + if !bytes.Equal(data, content) { + return fmt.Errorf("ReadAll(varied amounts) = %q\n\twant %q", data, content) + } + + n, err := r.Read(make([]byte, offsetSize)) + if n != 0 || err != io.EOF { + return fmt.Errorf("Read(offsetSize) at EOF = %v, %v, want 0, EOF", n, err) + } + + return nil +} + +func get32Bytes(src []uint32) []byte { + dst := make([]byte, len(src)*uint32Size) + for i, start, end := 0, 0, lengthSize; i < len(src); i, start, end = i+1, end, end+lengthSize { + p := dst[start:end] + binary.BigEndian.PutUint32(p, src[i]) + } + return dst +} + +func get64Bytes(src []uint64) []byte { + dst := make([]byte, len(src)*uint64Size) + for i, start, end := 0, 0, offsetSize; i < len(src); i, start, end = i+1, end, end+offsetSize { + p := dst[start:end] + binary.BigEndian.PutUint64(p, src[i]) + } + return dst +} + +func randomUInt32s(n int) []uint32 { + out := make([]uint32, n) + for i := 0; i < n; i++ { + out[i] = uint32(rand.Intn(1000)) + } + return out +} + +func calcOffsets(arr []uint32) []uint64 { + out := make([]uint64, len(arr)) + out[0] = uint64(arr[0]) + for i := 1; i < len(arr); i++ { + out[i] = out[i-1] + uint64(arr[i]) + } + return out +} + +func TestLengthsTransformer(t *testing.T) { + testSize := rand.Intn(100) + 1 + lengths := randomUInt32s(testSize) + offsets := calcOffsets(lengths) + + lengthBytes := get32Bytes(lengths) + offsetBytes := get64Bytes(offsets) + + lengthsReader := bytes.NewReader(lengthBytes) + offsetReader := NewOffsetsReader(lengthsReader) + + err := testReader(offsetReader, offsetBytes) + require.NoError(t, err) +} + +func TestIndexTransformer(t *testing.T) { + chunkCount := rand.Intn(1000) + 1 + lengths := randomUInt32s(chunkCount) + offsets := calcOffsets(lengths) + lengthBytes := get32Bytes(lengths) + offsetBytes := get64Bytes(offsets) + + tupleBytes := test.RandomData(chunkCount * prefixTupleSize) + suffixBytes := test.RandomData(chunkCount * addrSuffixSize) + + var inBytes []byte + inBytes = append(inBytes, tupleBytes...) + inBytes = append(inBytes, lengthBytes...) + inBytes = append(inBytes, suffixBytes...) + + var outBytes []byte + outBytes = append(outBytes, tupleBytes...) + outBytes = append(outBytes, offsetBytes...) + outBytes = append(outBytes, suffixBytes...) + + inReader := bytes.NewBuffer(inBytes) + outReader := NewIndexTransformer(inReader, chunkCount) + + err := testReader(outReader, outBytes) + require.NoError(t, err) +} From 09102a840524b5a8cabffdf2ead047ef98647e0d Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 8 Feb 2022 17:05:08 -0800 Subject: [PATCH 018/105] test names --- go/store/nbs/index_transformer_test.go | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/go/store/nbs/index_transformer_test.go b/go/store/nbs/index_transformer_test.go index c171ec513d..6cf9d1e8c4 100644 --- a/go/store/nbs/index_transformer_test.go +++ b/go/store/nbs/index_transformer_test.go @@ -110,11 +110,13 @@ func TestLengthsTransformer(t *testing.T) { lengthBytes := get32Bytes(lengths) offsetBytes := get64Bytes(offsets) - lengthsReader := bytes.NewReader(lengthBytes) - offsetReader := NewOffsetsReader(lengthsReader) + t.Run("converts lengths into offsets", func(t *testing.T) { + lengthsReader := bytes.NewReader(lengthBytes) + offsetReader := NewOffsetsReader(lengthsReader) - err := testReader(offsetReader, offsetBytes) - require.NoError(t, err) + err := testReader(offsetReader, offsetBytes) + require.NoError(t, err) + }) } func TestIndexTransformer(t *testing.T) { @@ -137,9 +139,12 @@ func TestIndexTransformer(t *testing.T) { outBytes = append(outBytes, offsetBytes...) outBytes = append(outBytes, suffixBytes...) - inReader := bytes.NewBuffer(inBytes) - outReader := NewIndexTransformer(inReader, chunkCount) + t.Run("only converts lengths into offsets", func(t *testing.T) { + inReader := bytes.NewBuffer(inBytes) + outReader := NewIndexTransformer(inReader, chunkCount) + + err := testReader(outReader, outBytes) + require.NoError(t, err) + }) - err := testReader(outReader, outBytes) - require.NoError(t, err) } From b17e55fab7750c5a534e1fcf72d3ca9a9db0f128 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 8 Feb 2022 17:09:50 -0800 Subject: [PATCH 019/105] copyright --- go/store/nbs/index_transformer.go | 14 ++++++++++++++ go/store/nbs/index_transformer_test.go | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/go/store/nbs/index_transformer.go b/go/store/nbs/index_transformer.go index c0b44764be..59d2c2b8b6 100644 --- a/go/store/nbs/index_transformer.go +++ b/go/store/nbs/index_transformer.go @@ -1,3 +1,17 @@ +// Copyright 2019 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package nbs import ( diff --git a/go/store/nbs/index_transformer_test.go b/go/store/nbs/index_transformer_test.go index 6cf9d1e8c4..0cc379b165 100644 --- a/go/store/nbs/index_transformer_test.go +++ b/go/store/nbs/index_transformer_test.go @@ -1,3 +1,17 @@ +// Copyright 2019 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package nbs import ( From f75c02b0574fae536451930b5a00e6d29178d1c9 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 8 Feb 2022 17:11:02 -0800 Subject: [PATCH 020/105] copyright year --- go/store/nbs/index_transformer.go | 2 +- go/store/nbs/index_transformer_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/go/store/nbs/index_transformer.go b/go/store/nbs/index_transformer.go index 59d2c2b8b6..c550859e4d 100644 --- a/go/store/nbs/index_transformer.go +++ b/go/store/nbs/index_transformer.go @@ -1,4 +1,4 @@ -// Copyright 2019 Dolthub, Inc. +// Copyright 2022 Dolthub, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/go/store/nbs/index_transformer_test.go b/go/store/nbs/index_transformer_test.go index 0cc379b165..5ce9187cb4 100644 --- a/go/store/nbs/index_transformer_test.go +++ b/go/store/nbs/index_transformer_test.go @@ -1,4 +1,4 @@ -// Copyright 2019 Dolthub, Inc. +// Copyright 2022 Dolthub, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. From 78600516aa54fe7aaa55bd3f0065624708b0f260 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 8 Feb 2022 17:17:10 -0800 Subject: [PATCH 021/105] bad comment --- go/store/nbs/index_transformer_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/store/nbs/index_transformer_test.go b/go/store/nbs/index_transformer_test.go index 5ce9187cb4..ad4f0c3e35 100644 --- a/go/store/nbs/index_transformer_test.go +++ b/go/store/nbs/index_transformer_test.go @@ -56,7 +56,7 @@ func (r *minByteReader) Read(p []byte) (int, error) { return n, err } -// Altered from testing/iotest.TestReader to use alignedByteReader +// Altered from testing/iotest.TestReader to use minByteReader func testReader(r io.Reader, content []byte) error { if len(content) > 0 { n, err := r.Read(nil) From 781621545d63a9c0eecbcb5ccd5a00588d40f743 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 8 Feb 2022 17:26:37 -0800 Subject: [PATCH 022/105] unused panic comment --- go/store/nbs/index_transformer.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/go/store/nbs/index_transformer.go b/go/store/nbs/index_transformer.go index c550859e4d..59410b7a75 100644 --- a/go/store/nbs/index_transformer.go +++ b/go/store/nbs/index_transformer.go @@ -54,14 +54,6 @@ func NewOffsetsReader(lengthsReader io.Reader) *OffsetsReader { } func (tra *OffsetsReader) Read(p []byte) (n int, err error) { - // if len(p) < offsetSize { - // // ASK: Should this be a panic? - // // If this case is true, 0 bytes will be read and no error will be - // // returned which is undesirable behavior for io.Reader - - // // We could return an error instead, but this feels like developer error - // panic("len(p) must be at-least offsetSize") - // } // Read as many lengths, as offsets we can fit into p. Which is half. // Below assumes that lengthSize * 2 = offsetSize From b60de29e4016eac5389bfe5f4ef8ffbaf884ea19 Mon Sep 17 00:00:00 2001 From: druvv Date: Wed, 9 Feb 2022 01:58:44 +0000 Subject: [PATCH 023/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/store/nbs/index_transformer_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/go/store/nbs/index_transformer_test.go b/go/store/nbs/index_transformer_test.go index ad4f0c3e35..e6af3c42f2 100644 --- a/go/store/nbs/index_transformer_test.go +++ b/go/store/nbs/index_transformer_test.go @@ -22,8 +22,9 @@ import ( "math/rand" "testing" - "github.com/dolthub/dolt/go/libraries/utils/test" "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/libraries/utils/test" ) // minByteReader is a copy of smallerByteReader from testing/iotest From a9977480d522be021feaa9ba256057de0537f2b6 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Tue, 8 Feb 2022 18:10:36 -0800 Subject: [PATCH 024/105] more 3way merge tests --- go/store/prolly/map_test.go | 6 +- go/store/prolly/tree_diff_test.go | 48 ++++++--- go/store/prolly/tree_merge_test.go | 162 +++++++++++++++++++++++++---- 3 files changed, 173 insertions(+), 43 deletions(-) diff --git a/go/store/prolly/map_test.go b/go/store/prolly/map_test.go index 3694ffbeb8..2f34c29a15 100644 --- a/go/store/prolly/map_test.go +++ b/go/store/prolly/map_test.go @@ -76,7 +76,7 @@ func makeProllyMap(t *testing.T, count int) (orderedMap, [][2]val.Tuple) { ) tuples := randomTuplePairs(count, kd, vd) - om := prollyMapFromTuples(t, count, kd, vd, tuples) + om := prollyMapFromTuples(t, kd, vd, tuples) return om, tuples } @@ -89,12 +89,12 @@ func makeProllySecondaryIndex(t *testing.T, count int) (orderedMap, [][2]val.Tup vd := val.NewTupleDescriptor() tuples := randomCompositeTuplePairs(count, kd, vd) - om := prollyMapFromTuples(t, count, kd, vd, tuples) + om := prollyMapFromTuples(t, kd, vd, tuples) return om, tuples } -func prollyMapFromTuples(t *testing.T, count int, kd, vd val.TupleDesc, tuples [][2]val.Tuple) orderedMap { +func prollyMapFromTuples(t *testing.T, kd, vd val.TupleDesc, tuples [][2]val.Tuple) orderedMap { ctx := context.Background() ns := newTestNodeStore() diff --git a/go/store/prolly/tree_diff_test.go b/go/store/prolly/tree_diff_test.go index dc10be23d9..c1bac63413 100644 --- a/go/store/prolly/tree_diff_test.go +++ b/go/store/prolly/tree_diff_test.go @@ -195,27 +195,21 @@ func testUpdateDiffs(t *testing.T, from Map, tups [][2]val.Tuple, numUpdates int tups[i], tups[j] = tups[j], tups[i] }) - oldPairs := tups[:numUpdates] - sort.Slice(oldPairs, func(i, j int) bool { - return from.keyDesc.Compare(oldPairs[i][0], oldPairs[j][0]) < 0 + sub := tups[:numUpdates] + sort.Slice(sub, func(i, j int) bool { + return from.keyDesc.Compare(sub[i][0], sub[j][0]) < 0 }) - kd, vd := from.Descriptors() - newPairs := randomTuplePairs(numUpdates, kd, vd) - require.Equal(t, len(oldPairs), len(newPairs)) - for i := range oldPairs { - // set keys for updates - newPairs[i][0] = oldPairs[i][0] - } - to := makeMapWithUpdates(t, from, newPairs...) + _, vd := from.Descriptors() + updates := makeUpdatesToTuples(vd, sub...) + to := makeMapWithUpdates(t, from, updates...) var cnt int err := DiffMaps(ctx, from, to, func(ctx context.Context, diff Diff) error { assert.Equal(t, ModifiedDiff, diff.Type) - assert.Equal(t, oldPairs[cnt][0], diff.Key) - assert.Equal(t, oldPairs[cnt][1], diff.From) - assert.Equal(t, newPairs[cnt][0], diff.Key) - assert.Equal(t, newPairs[cnt][1], diff.To) + assert.Equal(t, updates[cnt][0], diff.Key) + assert.Equal(t, updates[cnt][1], diff.From) + assert.Equal(t, updates[cnt][2], diff.To) cnt++ return nil }) @@ -247,6 +241,26 @@ func makeMapWithInserts(t *testing.T, m Map, inserts ...[2]val.Tuple) Map { return mm } -func makeMapWithUpdates(t *testing.T, m Map, updates ...[2]val.Tuple) Map { - return makeMapWithInserts(t, m, updates...) +func makeMapWithUpdates(t *testing.T, m Map, updates ...[3]val.Tuple) Map { + ctx := context.Background() + mut := m.Mutate() + for _, pair := range updates { + err := mut.Put(ctx, pair[0], pair[2]) + require.NoError(t, err) + } + mm, err := mut.Map(ctx) + require.NoError(t, err) + return mm +} + +func makeUpdatesToTuples(vd val.TupleDesc, tuples ...[2]val.Tuple) (updates [][3]val.Tuple) { + updates = make([][3]val.Tuple, len(tuples)) + + valBuilder := val.NewTupleBuilder(vd) + for i := range updates { + updates[i][0] = tuples[i][0] + updates[i][1] = tuples[i][1] + updates[i][2] = randomTuple(valBuilder) + } + return } diff --git a/go/store/prolly/tree_merge_test.go b/go/store/prolly/tree_merge_test.go index ce04b6abff..b6efccfbed 100644 --- a/go/store/prolly/tree_merge_test.go +++ b/go/store/prolly/tree_merge_test.go @@ -17,7 +17,6 @@ package prolly import ( "context" "fmt" - "io" "testing" "github.com/stretchr/testify/assert" @@ -34,52 +33,169 @@ func Test3WayMapMerge(t *testing.T) { 10000, } + kd := val.NewTupleDescriptor( + val.Type{Enc: val.Uint32Enc, Nullable: false}, + ) + vd := val.NewTupleDescriptor( + val.Type{Enc: val.Uint32Enc, Nullable: true}, + val.Type{Enc: val.Uint32Enc, Nullable: true}, + val.Type{Enc: val.Uint32Enc, Nullable: true}, + ) + for _, s := range scales { name := fmt.Sprintf("test proCur map at scale %d", s) t.Run(name, func(t *testing.T) { - prollyMap, tuples := makeProllyMap(t, s) - t.Run("merge identical maps", func(t *testing.T) { - testEqualMapMerge(t, prollyMap.(Map)) + testEqualMapMerge(t, s) }) t.Run("3way merge inserts", func(t *testing.T) { for k := 0; k < 10; k++ { - testMapMergeInserts(t, prollyMap.(Map), tuples, s/10) + testThreeWayMapMerge(t, kd, vd, s) } }) + // todo(andy): tests conflicts, cell-wise merge }) } } -func testEqualMapMerge(t *testing.T, m Map) { +func testEqualMapMerge(t *testing.T, sz int) { + om, _ := makeProllyMap(t, sz) + m := om.(Map) ctx := context.Background() mm, err := ThreeWayMerge(ctx, m, m, m, panicOnConflict) require.NoError(t, err) assert.NotNil(t, mm) - //assert.Equal(t, m.Count(), mm.Count()) + assert.Equal(t, m.HashOf(), mm.HashOf()) } -func testMapMergeInserts(t *testing.T, final Map, tups [][2]val.Tuple, sz int) { - testRand.Shuffle(len(tups), func(i, j int) { - tups[i], tups[j] = tups[j], tups[i] - }) +func testThreeWayMapMerge(t *testing.T, kd, vd val.TupleDesc, sz int) { + baseTuples, leftEdits, rightEdits := makeTuplesAndMutations(kd, vd, sz) + om := prollyMapFromTuples(t, kd, vd, baseTuples) - left := makeMapWithDeletes(t, final, tups[:sz]...) - right := makeMapWithDeletes(t, final, tups[sz:sz*2]...) - base := makeMapWithDeletes(t, final, tups[:sz*2]...) + base := om.(Map) + left := applyMutationSet(t, base, leftEdits) + right := applyMutationSet(t, base, rightEdits) ctx := context.Background() - final2, err := ThreeWayMerge(ctx, left, right, base, panicOnConflict) - require.NoError(t, err) - assert.Equal(t, final.HashOf(), final2.HashOf()) + final, err := ThreeWayMerge(ctx, left, right, base, panicOnConflict) + assert.NoError(t, err) - cnt := 0 - err = DiffMaps(ctx, final, final2, func(ctx context.Context, diff Diff) error { - cnt++ - return nil + for _, add := range leftEdits.adds { + ok, err := final.Has(ctx, add[0]) + assert.NoError(t, err) + assert.True(t, ok) + err = final.Get(ctx, add[0], func(key, value val.Tuple) error { + assert.Equal(t, value, add[1]) + return nil + }) + assert.NoError(t, err) + } + for _, add := range rightEdits.adds { + ok, err := final.Has(ctx, add[0]) + assert.NoError(t, err) + assert.True(t, ok) + err = final.Get(ctx, add[0], func(key, value val.Tuple) error { + assert.Equal(t, value, add[1]) + return nil + }) + assert.NoError(t, err) + } + + for _, del := range leftEdits.deletes { + ok, err := final.Has(ctx, del) + assert.NoError(t, err) + assert.False(t, ok) + } + for _, del := range rightEdits.deletes { + ok, err := final.Has(ctx, del) + assert.NoError(t, err) + assert.False(t, ok) + } + + for _, up := range leftEdits.updates { + ok, err := final.Has(ctx, up[0]) + assert.NoError(t, err) + assert.True(t, ok) + err = final.Get(ctx, up[0], func(key, value val.Tuple) error { + assert.Equal(t, value, up[1]) + return nil + }) + assert.NoError(t, err) + } + for _, up := range rightEdits.updates { + ok, err := final.Has(ctx, up[0]) + assert.NoError(t, err) + assert.True(t, ok) + err = final.Get(ctx, up[0], func(key, value val.Tuple) error { + assert.Equal(t, value, up[1]) + return nil + }) + assert.NoError(t, err) + } +} + +type mutationSet struct { + adds [][2]val.Tuple + deletes []val.Tuple + updates [][3]val.Tuple +} + +func makeTuplesAndMutations(kd, vd val.TupleDesc, sz int) (base [][2]val.Tuple, left, right mutationSet) { + mutSz := sz / 10 + totalSz := sz + (mutSz * 2) + tuples := randomTuplePairs(totalSz, kd, vd) + + base = tuples[:sz] + + left = mutationSet{ + adds: tuples[sz : sz+mutSz], + deletes: make([]val.Tuple, mutSz), + } + right = mutationSet{ + adds: tuples[sz+mutSz:], + deletes: make([]val.Tuple, mutSz), + } + + edits := make([][2]val.Tuple, len(base)) + copy(edits, base) + testRand.Shuffle(len(edits), func(i, j int) { + edits[i], edits[j] = edits[j], edits[i] }) - require.Error(t, io.EOF, err) - assert.Equal(t, 0, cnt) + + for i, pair := range edits[:mutSz] { + left.deletes[i] = pair[0] + } + for i, pair := range edits[mutSz : mutSz*2] { + right.deletes[i] = pair[0] + } + + left.updates = makeUpdatesToTuples(vd, edits[mutSz*2:mutSz*3]...) + right.updates = makeUpdatesToTuples(vd, edits[mutSz*3:mutSz*4]...) + + return +} + +func applyMutationSet(t *testing.T, base Map, edits mutationSet) (m Map) { + ctx := context.Background() + mut := base.Mutate() + + var err error + for _, add := range edits.adds { + err = mut.Put(ctx, add[0], add[1]) + require.NoError(t, err) + } + for _, del := range edits.deletes { + err = mut.Delete(ctx, del) + require.NoError(t, err) + } + for _, up := range edits.updates { + err = mut.Put(ctx, up[0], up[1]) + require.NoError(t, err) + } + + m, err = mut.Map(ctx) + require.NoError(t, err) + return } func panicOnConflict(left, right Diff) (Diff, bool) { From 70c819768743c9c738de115c29ad7326a1be05a2 Mon Sep 17 00:00:00 2001 From: James Cor Date: Wed, 9 Feb 2022 10:04:46 -0800 Subject: [PATCH 025/105] fixing tests again --- integration-tests/bats/sql-spatial-types.bats | 6 ------ 1 file changed, 6 deletions(-) diff --git a/integration-tests/bats/sql-spatial-types.bats b/integration-tests/bats/sql-spatial-types.bats index addd9aed34..db024289eb 100644 --- a/integration-tests/bats/sql-spatial-types.bats +++ b/integration-tests/bats/sql-spatial-types.bats @@ -10,12 +10,6 @@ teardown() { teardown_common } -@test "sql-spatial-types: can't make spatial types without enabling feature flag" { - run dolt sql -q "create table point_tbl (p point)" - [ "$status" -eq 1 ] - [[ "$output" =~ "cannot be made" ]] || false -} - @test "sql-spatial-types: can make spatial types" { run dolt sql -q "create table point_tbl (p point)" [ "$status" -eq 0 ] From 1f52b4c7a291dcfa677ee5eb56e25cda9942cf7e Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Wed, 9 Feb 2022 10:08:43 -0800 Subject: [PATCH 026/105] reduce test size and add additional tests --- go/store/nbs/index_transformer_test.go | 30 +++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/go/store/nbs/index_transformer_test.go b/go/store/nbs/index_transformer_test.go index e6af3c42f2..8a50530f3e 100644 --- a/go/store/nbs/index_transformer_test.go +++ b/go/store/nbs/index_transformer_test.go @@ -117,8 +117,8 @@ func calcOffsets(arr []uint32) []uint64 { return out } -func TestLengthsTransformer(t *testing.T) { - testSize := rand.Intn(100) + 1 +func TestOffsetReader(t *testing.T) { + testSize := rand.Intn(10) + 1 lengths := randomUInt32s(testSize) offsets := calcOffsets(lengths) @@ -132,10 +132,34 @@ func TestLengthsTransformer(t *testing.T) { err := testReader(offsetReader, offsetBytes) require.NoError(t, err) }) + + t.Run("err not enough bytes when expected", func(t *testing.T) { + lengthsReader := bytes.NewReader(lengthBytes[:len(lengthBytes)-1]) + offsetReader := NewOffsetsReader(lengthsReader) + _, err := io.ReadAll(offsetReader) + require.ErrorAsf(t, err, &ErrNotEnoughBytes, "should return ErrNotEnoughBytes") + }) + + t.Run("fills provided buffer correctly", func(t *testing.T) { + lengthsReader := bytes.NewReader(lengthBytes) + offsetReader := NewOffsetsReader(lengthsReader) + p := make([]byte, offsetSize) + n, err := offsetReader.Read(p) + require.NoError(t, err) + require.Equal(t, offsetSize, n) + }) + + t.Run("works with io.ReadAll", func(t *testing.T) { + lengthsReader := bytes.NewReader(lengthBytes[:lengthSize]) + offsetReader := NewOffsetsReader(lengthsReader) + data, err := io.ReadAll(offsetReader) + require.NoError(t, err) + require.True(t, bytes.Equal(data, offsetBytes[:offsetSize])) + }) } func TestIndexTransformer(t *testing.T) { - chunkCount := rand.Intn(1000) + 1 + chunkCount := rand.Intn(10) + 1 lengths := randomUInt32s(chunkCount) offsets := calcOffsets(lengths) lengthBytes := get32Bytes(lengths) From 02eff200039cc384174d6e0119dfa840acee78fa Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Wed, 9 Feb 2022 11:11:31 -0800 Subject: [PATCH 027/105] added dolt insepct utility for investigating table file indexes --- go/cmd/dolt/commands/inspect.go | 167 ++++++++++++++++++++++++++++++++ go/cmd/dolt/commands/roots.go | 4 + go/cmd/dolt/dolt.go | 1 + go/store/nbs/store_test.go | 12 +++ go/store/nbs/util.go | 21 ++++ 5 files changed, 205 insertions(+) create mode 100644 go/cmd/dolt/commands/inspect.go diff --git a/go/cmd/dolt/commands/inspect.go b/go/cmd/dolt/commands/inspect.go new file mode 100644 index 0000000000..fb401545e2 --- /dev/null +++ b/go/cmd/dolt/commands/inspect.go @@ -0,0 +1,167 @@ +// Copyright 2021 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package commands + +import ( + "context" + "fmt" + "io" + "math" + "path/filepath" + + "github.com/dolthub/dolt/go/cmd/dolt/cli" + "github.com/dolthub/dolt/go/cmd/dolt/errhand" + "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" + "github.com/dolthub/dolt/go/libraries/doltcore/env" + "github.com/dolthub/dolt/go/libraries/utils/argparser" + "github.com/dolthub/dolt/go/libraries/utils/filesys" + "github.com/dolthub/dolt/go/store/nbs" +) + +const tableFileIndexFlag = "index" + +type InspectCmd struct { +} + +// Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command +func (cmd InspectCmd) Name() string { + return "inspect" +} + +// Hidden should return true if this command should be hidden from the help text +func (cmd InspectCmd) Hidden() bool { + return true +} + +// RequiresRepo should return false if this interface is implemented, and the command does not have the requirement +// that it be run from within a data repository directory +func (cmd InspectCmd) RequiresRepo() bool { + return true +} + +// Description returns a description of the command +func (cmd InspectCmd) Description() string { + return "Inspects a Dolt Database and collects stats." +} + +// CreateMarkdown creates a markdown file containing the helptext for the command at the given path +func (cmd InspectCmd) CreateMarkdown(wr io.Writer, commandStr string) error { + return nil +} + +func (cmd InspectCmd) ArgParser() *argparser.ArgParser { + ap := argparser.NewArgParser() + ap.SupportsFlag(tableFileIndexFlag, "i", "Measure distribution error in table file chunk indexes.") + return ap +} + +// Exec executes the command +func (cmd InspectCmd) Exec(ctx context.Context, commandStr string, args []string, dEnv *env.DoltEnv) int { + ap := cmd.ArgParser() + help, usage := cli.HelpAndUsagePrinters(cli.GetCommandDocumentation(commandStr, cli.CommandDocumentationContent{}, ap)) + apr := cli.ParseArgsOrDie(ap, args, help) + + var verr errhand.VerboseError + if apr.Contains(tableFileIndexFlag) { + verr = cmd.measureChunkIndexDistribution(ctx, dEnv) + } + + return HandleVErrAndExitCode(verr, usage) +} + +func (cmd InspectCmd) measureChunkIndexDistribution(ctx context.Context, dEnv *env.DoltEnv) errhand.VerboseError { + newGen := filepath.Join(dEnv.GetDoltDir(), dbfactory.DataDir) + oldGen := filepath.Join(newGen, "oldGen") + + itr, err := NewTableFileIter([]string{newGen, oldGen}, dEnv.FS) + if err != nil { + return errhand.VerboseErrorFromError(err) + } + + sumErr, sumCnt := 0.0, 0 + for { + path, _ := itr.next() + if path == "" { + break + } + + summary, err := cmd.processTableFile(path, dEnv.FS) + if err != nil { + return errhand.VerboseErrorFromError(err) + } + sumErr += summary.sumErr + sumCnt += int(summary.count) + + cli.Println(summary.format()) + } + cli.Printf("average guess error: %f", sumErr / float64(sumCnt)) + + return nil +} + +func (cmd InspectCmd) processTableFile(path string, fs filesys.Filesys) (sum *chunkIndexSummary, err error) { + var rdr io.ReadCloser + rdr, err = fs.OpenForRead(path) + if err != nil { + return sum, err + } + defer func() { + cerr := rdr.Close() + if err == nil { + err = cerr + } + }() + + var prefixes []uint64 + prefixes, err = nbs.GetTableIndexPrefixes(rdr.(io.ReadSeeker)) + if err != nil { + return sum, err + } + + sum = &chunkIndexSummary{ + file: path, + count: uint32(len(prefixes)), + //errs: make([]float64, 0, len(prefixes)), + } + + for i, prefix := range prefixes { + sum.addPrefix(i, prefix) + } + return +} + +type chunkIndexSummary struct { + file string + count uint32 + //errs []float64 + sumErr float64 + maxErr float64 +} + +func (s *chunkIndexSummary) format() string { + return fmt.Sprintf("file: %s \t count: %d sum error: %f \t max error: %f ", + s.file, s.count, s.sumErr, s.maxErr) +} + +func (s *chunkIndexSummary) addPrefix(i int, prefix uint64) { + g := nbs.GuessPrefixOrdinal(prefix, s.count) + guessErr := math.Abs(float64(i-g)) + + //s.errs = append(s.errs, guessErr) + s.sumErr += guessErr + if guessErr > s.maxErr { + s.maxErr = guessErr + } +} diff --git a/go/cmd/dolt/commands/roots.go b/go/cmd/dolt/commands/roots.go index a17fdbc184..fc0177f961 100644 --- a/go/cmd/dolt/commands/roots.go +++ b/go/cmd/dolt/commands/roots.go @@ -200,6 +200,10 @@ func NewTableFileIter(dirs []string, fs filesys.Filesys) (*TableFileIter, error) } func (itr *TableFileIter) next() (string, time.Time) { + if itr.pos >= len(itr.files) { + return "", time.Time{} + } + curr := itr.files[itr.pos] itr.pos++ diff --git a/go/cmd/dolt/dolt.go b/go/cmd/dolt/dolt.go index 212247b915..b4e5a7d5cb 100644 --- a/go/cmd/dolt/dolt.go +++ b/go/cmd/dolt/dolt.go @@ -97,6 +97,7 @@ var doltCommand = cli.NewSubCommandHandler("dolt", "it's git for data", []cli.Co commands.RootsCmd{}, commands.VersionCmd{VersionStr: Version}, commands.DumpCmd{}, + commands.InspectCmd{}, dumpDocsCommand, dumpZshCommand, }) diff --git a/go/store/nbs/store_test.go b/go/store/nbs/store_test.go index 6ef2df6664..967c85cfb7 100644 --- a/go/store/nbs/store_test.go +++ b/go/store/nbs/store_test.go @@ -525,3 +525,15 @@ func TestNBSCommitRetainsAppendix(t *testing.T) { assert.Equal(upstream.GetAppendixTableSpecInfo(0), newUpstream.GetTableSpecInfo(0)) assert.Equal(newUpstream.GetTableSpecInfo(0), newUpstream.GetAppendixTableSpecInfo(0)) } + +func TestGuessPrefixOrdinal(t *testing.T) { + prefixes := make([]uint64, 256) + for i := range prefixes { + prefixes[i] = uint64(i<<56) + } + + for i, pre := range prefixes { + guess := GuessPrefixOrdinal(pre, 256) + assert.Equal(t, i, guess) + } +} diff --git a/go/store/nbs/util.go b/go/store/nbs/util.go index b5eb6726cd..17517d77d7 100644 --- a/go/store/nbs/util.go +++ b/go/store/nbs/util.go @@ -16,6 +16,7 @@ package nbs import ( "io" + "math" "github.com/dolthub/dolt/go/libraries/utils/iohelp" @@ -64,6 +65,26 @@ func IterChunks(rd io.ReadSeeker, cb func(chunk chunks.Chunk) (stop bool, err er return nil } +func GetTableIndexPrefixes(rd io.ReadSeeker) (prefixes []uint64, err error) { + idx, err := ReadTableIndex(rd) + if err != nil { + return nil, err + } + defer func() { + cerr := idx.Close() + if err == nil { + err = cerr + } + }() + + return idx.prefixes, nil +} + +func GuessPrefixOrdinal(prefix uint64, n uint32) int { + hi := prefix >> 32 + return int((hi * uint64(n)) / uint64(math.MaxUint32)) +} + func readNFrom(rd io.ReadSeeker, offset uint64, length uint32) ([]byte, error) { _, err := rd.Seek(int64(offset), io.SeekStart) From a8306231e47b9f9efb1672b5322f6295179383b0 Mon Sep 17 00:00:00 2001 From: andrew-wm-arthur Date: Wed, 9 Feb 2022 19:17:43 +0000 Subject: [PATCH 028/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/cmd/dolt/commands/inspect.go | 8 ++++---- go/store/nbs/store_test.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/go/cmd/dolt/commands/inspect.go b/go/cmd/dolt/commands/inspect.go index fb401545e2..dea312e63c 100644 --- a/go/cmd/dolt/commands/inspect.go +++ b/go/cmd/dolt/commands/inspect.go @@ -106,7 +106,7 @@ func (cmd InspectCmd) measureChunkIndexDistribution(ctx context.Context, dEnv *e cli.Println(summary.format()) } - cli.Printf("average guess error: %f", sumErr / float64(sumCnt)) + cli.Printf("average guess error: %f", sumErr/float64(sumCnt)) return nil } @@ -143,8 +143,8 @@ func (cmd InspectCmd) processTableFile(path string, fs filesys.Filesys) (sum *ch } type chunkIndexSummary struct { - file string - count uint32 + file string + count uint32 //errs []float64 sumErr float64 maxErr float64 @@ -157,7 +157,7 @@ func (s *chunkIndexSummary) format() string { func (s *chunkIndexSummary) addPrefix(i int, prefix uint64) { g := nbs.GuessPrefixOrdinal(prefix, s.count) - guessErr := math.Abs(float64(i-g)) + guessErr := math.Abs(float64(i - g)) //s.errs = append(s.errs, guessErr) s.sumErr += guessErr diff --git a/go/store/nbs/store_test.go b/go/store/nbs/store_test.go index 967c85cfb7..7251f4db74 100644 --- a/go/store/nbs/store_test.go +++ b/go/store/nbs/store_test.go @@ -529,7 +529,7 @@ func TestNBSCommitRetainsAppendix(t *testing.T) { func TestGuessPrefixOrdinal(t *testing.T) { prefixes := make([]uint64, 256) for i := range prefixes { - prefixes[i] = uint64(i<<56) + prefixes[i] = uint64(i << 56) } for i, pre := range prefixes { From 317b6f9bc1f3269b2ced48866b7626bd6a65cd31 Mon Sep 17 00:00:00 2001 From: James Cor Date: Wed, 9 Feb 2022 11:28:03 -0800 Subject: [PATCH 029/105] added dolt log --oneline flag --- go/cmd/dolt/commands/log.go | 102 +++++++++++++++++++++++++----------- 1 file changed, 71 insertions(+), 31 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index c5f80fe893..86c2a9b51c 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -38,12 +38,15 @@ const ( mergesParam = "merges" minParentsParam = "min-parents" parentsParam = "parents" + decorateParam = "decorate" + oneLineParam = "oneline" ) type logOpts struct { numLines int showParents bool minParents int + oneLine bool } type logNode struct { @@ -91,6 +94,8 @@ func (cmd LogCmd) ArgParser() *argparser.ArgParser { ap.SupportsInt(minParentsParam, "", "parent_count", "The minimum number of parents a commit must have to be included in the log.") ap.SupportsFlag(mergesParam, "", "Equivalent to min-parents == 2, this will limit the log to commits with 2 or more parents.") ap.SupportsFlag(parentsParam, "", "Shows all parents of each commit in the log.") + ap.SupportsFlag(decorateParam, "", "Shows refs next to commits.") + ap.SupportsFlag(oneLineParam, "", "Shows logs in a compact format.") return ap } @@ -118,6 +123,7 @@ func (cmd LogCmd) logWithLoggerFunc(ctx context.Context, commandStr string, args numLines: apr.GetIntOrDefault(numLinesParam, -1), showParents: apr.Contains(parentsParam), minParents: minParents, + oneLine: apr.Contains(oneLineParam), } // Just dolt log @@ -307,43 +313,77 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d return nil } +func logCompact(opts logOpts, commits []logNode) { + pager := outputpager.Start() + defer pager.Stop() + + for _, comm := range commits { + if len(comm.parentHashes) < opts.minParents { + return + } + + chStr := comm.commitHash.String() + if opts.showParents { + for _, h := range comm.parentHashes { + chStr += " " + h.String() + } + } + + pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33m%s \033[0m", chStr[:8]))) + + // TODO: write refs here + + formattedDesc := strings.Replace(comm.commitMeta.Description, "\n", " ", -1) + "\n" + pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc))) + } +} + +func logDefault(opts logOpts, commits []logNode) { + pager := outputpager.Start() + defer pager.Stop() + + for _, comm := range commits { + if len(comm.parentHashes) < opts.minParents { + return + } + + chStr := comm.commitHash.String() + if opts.showParents { + for _, h := range comm.parentHashes { + chStr += " " + h.String() + } + } + + pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33mcommit %s \033[0m", chStr))) + + if len(comm.parentHashes) > 1 { + pager.Writer.Write([]byte(fmt.Sprintf("\nMerge:"))) + for _, h := range comm.parentHashes { + pager.Writer.Write([]byte(fmt.Sprintf(" " + h.String()))) + } + } + + pager.Writer.Write([]byte(fmt.Sprintf("\nAuthor: %s <%s>", comm.commitMeta.Name, comm.commitMeta.Email))) + + timeStr := comm.commitMeta.FormatTS() + pager.Writer.Write([]byte(fmt.Sprintf("\nDate: %s", timeStr))) + + formattedDesc := "\n\n\t" + strings.Replace(comm.commitMeta.Description, "\n", "\n\t", -1) + "\n\n" + pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc))) + } +} + func logToStdOut(opts logOpts, commits []logNode) { if cli.ExecuteWithStdioRestored == nil { return } cli.ExecuteWithStdioRestored(func() { - pager := outputpager.Start() - defer pager.Stop() - - for _, comm := range commits { - if len(comm.parentHashes) < opts.minParents { - return - } - - chStr := comm.commitHash.String() - if opts.showParents { - for _, h := range comm.parentHashes { - chStr += " " + h.String() - } - } - - pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33mcommit %s \033[0m", chStr))) - - if len(comm.parentHashes) > 1 { - pager.Writer.Write([]byte(fmt.Sprintf("\nMerge:"))) - for _, h := range comm.parentHashes { - pager.Writer.Write([]byte(fmt.Sprintf(" " + h.String()))) - } - } - - pager.Writer.Write([]byte(fmt.Sprintf("\nAuthor: %s <%s>", comm.commitMeta.Name, comm.commitMeta.Email))) - - timeStr := comm.commitMeta.FormatTS() - pager.Writer.Write([]byte(fmt.Sprintf("\nDate: %s", timeStr))) - - formattedDesc := "\n\n\t" + strings.Replace(comm.commitMeta.Description, "\n", "\n\t", -1) + "\n\n" - pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc))) + if opts.oneLine { + logCompact(opts, commits) + } else { + logDefault(opts, commits) } + }) } From 0920e50909af1193b5132df42ec2a2289627f05f Mon Sep 17 00:00:00 2001 From: JCOR11599 Date: Wed, 9 Feb 2022 19:49:36 +0000 Subject: [PATCH 030/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/cmd/dolt/commands/log.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 86c2a9b51c..f767afe340 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -38,15 +38,15 @@ const ( mergesParam = "merges" minParentsParam = "min-parents" parentsParam = "parents" - decorateParam = "decorate" - oneLineParam = "oneline" + decorateParam = "decorate" + oneLineParam = "oneline" ) type logOpts struct { numLines int showParents bool minParents int - oneLine bool + oneLine bool } type logNode struct { @@ -123,7 +123,7 @@ func (cmd LogCmd) logWithLoggerFunc(ctx context.Context, commandStr string, args numLines: apr.GetIntOrDefault(numLinesParam, -1), showParents: apr.Contains(parentsParam), minParents: minParents, - oneLine: apr.Contains(oneLineParam), + oneLine: apr.Contains(oneLineParam), } // Just dolt log From 4b9361a95117e0bdde75033c34a9743cdcd7f78c Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Wed, 9 Feb 2022 12:33:08 -0800 Subject: [PATCH 031/105] fixed oldgen address --- go/cmd/dolt/commands/inspect.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/cmd/dolt/commands/inspect.go b/go/cmd/dolt/commands/inspect.go index dea312e63c..2ff5d2ee14 100644 --- a/go/cmd/dolt/commands/inspect.go +++ b/go/cmd/dolt/commands/inspect.go @@ -83,7 +83,7 @@ func (cmd InspectCmd) Exec(ctx context.Context, commandStr string, args []string func (cmd InspectCmd) measureChunkIndexDistribution(ctx context.Context, dEnv *env.DoltEnv) errhand.VerboseError { newGen := filepath.Join(dEnv.GetDoltDir(), dbfactory.DataDir) - oldGen := filepath.Join(newGen, "oldGen") + oldGen := filepath.Join(newGen, "oldgen") itr, err := NewTableFileIter([]string{newGen, oldGen}, dEnv.FS) if err != nil { From a1d8750846d7973880793733c4b799585f94a2ed Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Wed, 9 Feb 2022 14:06:00 -0800 Subject: [PATCH 032/105] added tuple merge fn test --- go/store/prolly/tree_diff_test.go | 11 ++-- go/store/prolly/tree_merge_test.go | 81 +++++++++++++++++++++++++++--- go/store/prolly/utils_test.go | 25 ++++++--- 3 files changed, 101 insertions(+), 16 deletions(-) diff --git a/go/store/prolly/tree_diff_test.go b/go/store/prolly/tree_diff_test.go index c1bac63413..0e8b4102b2 100644 --- a/go/store/prolly/tree_diff_test.go +++ b/go/store/prolly/tree_diff_test.go @@ -200,8 +200,8 @@ func testUpdateDiffs(t *testing.T, from Map, tups [][2]val.Tuple, numUpdates int return from.keyDesc.Compare(sub[i][0], sub[j][0]) < 0 }) - _, vd := from.Descriptors() - updates := makeUpdatesToTuples(vd, sub...) + kd, vd := from.Descriptors() + updates := makeUpdatesToTuples(kd, vd, sub...) to := makeMapWithUpdates(t, from, updates...) var cnt int @@ -253,7 +253,7 @@ func makeMapWithUpdates(t *testing.T, m Map, updates ...[3]val.Tuple) Map { return mm } -func makeUpdatesToTuples(vd val.TupleDesc, tuples ...[2]val.Tuple) (updates [][3]val.Tuple) { +func makeUpdatesToTuples(kd, vd val.TupleDesc, tuples ...[2]val.Tuple) (updates [][3]val.Tuple) { updates = make([][3]val.Tuple, len(tuples)) valBuilder := val.NewTupleBuilder(vd) @@ -262,5 +262,10 @@ func makeUpdatesToTuples(vd val.TupleDesc, tuples ...[2]val.Tuple) (updates [][3 updates[i][1] = tuples[i][1] updates[i][2] = randomTuple(valBuilder) } + + sort.Slice(updates, func(i, j int) bool { + return kd.Compare(updates[i][0], updates[j][0]) < 0 + }) + return } diff --git a/go/store/prolly/tree_merge_test.go b/go/store/prolly/tree_merge_test.go index b6efccfbed..5999c6512c 100644 --- a/go/store/prolly/tree_merge_test.go +++ b/go/store/prolly/tree_merge_test.go @@ -27,9 +27,9 @@ import ( func Test3WayMapMerge(t *testing.T) { scales := []int{ - 10, - 100, - 1000, + //10, + //100, + //1000, 10000, } @@ -53,7 +53,11 @@ func Test3WayMapMerge(t *testing.T) { testThreeWayMapMerge(t, kd, vd, s) } }) - // todo(andy): tests conflicts, cell-wise merge + t.Run("tuple merge fn", func(t *testing.T) { + for k := 0; k < 10; k++ { + testTupleMergeFn(t, kd, vd, s) + } + }) }) } } @@ -134,6 +138,71 @@ func testThreeWayMapMerge(t *testing.T, kd, vd val.TupleDesc, sz int) { } } +func testTupleMergeFn(t *testing.T, kd, vd val.TupleDesc, sz int) { + ctx := context.Background() + tuples := randomTuplePairs(sz, kd, vd) + om := prollyMapFromTuples(t, kd, vd, tuples) + base := om.(Map) + + mutSz := sz / 10 + testRand.Shuffle(len(tuples), func(i, j int) { + tuples[i], tuples[j] = tuples[j], tuples[i] + }) + + // make overlapping edits + left := makeUpdatesToTuples(kd, vd, tuples[:mutSz]...) + right := makeUpdatesToTuples(kd, vd, tuples[:mutSz]...) + + l := base.Mutate() + for _, update := range left { + err := l.Put(ctx, update[0], update[2]) + require.NoError(t, err) + } + leftMap, err := l.Map(ctx) + require.NoError(t, err) + + r := base.Mutate() + for _, update := range right { + err := r.Put(ctx, update[0], update[2]) + require.NoError(t, err) + } + rightMap, err := r.Map(ctx) + require.NoError(t, err) + + idx := 0 + final, err := ThreeWayMerge(ctx, leftMap, rightMap, base, func(l, r Diff) (merged Diff, ok bool) { + assert.Equal(t, l.Key, r.Key) + assert.Equal(t, l.From, r.From) + + assert.Equal(t, l.To, left[idx][2]) + assert.Equal(t, r.To, right[idx][2]) + + // right diff wins + merged, ok = r, true + idx++ + return + }) + require.NoError(t, err) + + for _, update := range left { + err = final.Get(ctx, update[0], func(key, value val.Tuple) error { + assert.Equal(t, key, update[0]) + assert.NotEqual(t, value, update[2]) + return nil + }) + require.NoError(t, err) + } + + for _, update := range right { + err = final.Get(ctx, update[0], func(key, value val.Tuple) error { + assert.Equal(t, key, update[0]) + assert.Equal(t, value, update[2]) + return nil + }) + require.NoError(t, err) + } +} + type mutationSet struct { adds [][2]val.Tuple deletes []val.Tuple @@ -169,8 +238,8 @@ func makeTuplesAndMutations(kd, vd val.TupleDesc, sz int) (base [][2]val.Tuple, right.deletes[i] = pair[0] } - left.updates = makeUpdatesToTuples(vd, edits[mutSz*2:mutSz*3]...) - right.updates = makeUpdatesToTuples(vd, edits[mutSz*3:mutSz*4]...) + left.updates = makeUpdatesToTuples(kd, vd, edits[mutSz*2:mutSz*3]...) + right.updates = makeUpdatesToTuples(kd, vd, edits[mutSz*3:mutSz*4]...) return } diff --git a/go/store/prolly/utils_test.go b/go/store/prolly/utils_test.go index 7633720c13..0ca9f47738 100644 --- a/go/store/prolly/utils_test.go +++ b/go/store/prolly/utils_test.go @@ -77,15 +77,26 @@ func randomTuplePairs(count int, keyDesc, valDesc val.TupleDesc) (items [][2]val items[i][1] = randomTuple(valBuilder) } - sortTuplePairs(items, keyDesc) + dupes := make([]int, 0, count) + for { + sortTuplePairs(items, keyDesc) + for i := range items { + if i == 0 { + continue + } + if keyDesc.Compare(items[i][0], items[i-1][0]) == 0 { + dupes = append(dupes, i) + } + } + if len(dupes) == 0 { + break + } - for i := range items { - if i == 0 { - continue - } - if keyDesc.Compare(items[i][0], items[i-1][0]) == 0 { - panic("duplicate key, unlucky!") + // replace duplicates and validate again + for _, d := range dupes { + items[d][0] = randomTuple(keyBuilder) } + dupes = dupes[:0] } return } From dd7093bf0e736981123d8bac630b6542fbb8feed Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Wed, 9 Feb 2022 14:09:21 -0800 Subject: [PATCH 033/105] uncommented merge tests, randomize test data for pkg prolly --- go/store/prolly/map_test.go | 3 ++- go/store/prolly/tree_merge_test.go | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/go/store/prolly/map_test.go b/go/store/prolly/map_test.go index 2f34c29a15..51038b8ab5 100644 --- a/go/store/prolly/map_test.go +++ b/go/store/prolly/map_test.go @@ -20,6 +20,7 @@ import ( "io" "math/rand" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -27,7 +28,7 @@ import ( "github.com/dolthub/dolt/go/store/val" ) -var testRand = rand.New(rand.NewSource(1)) +var testRand = rand.New(rand.NewSource(time.Now().UnixNano())) func TestMap(t *testing.T) { scales := []int{ diff --git a/go/store/prolly/tree_merge_test.go b/go/store/prolly/tree_merge_test.go index 5999c6512c..bb46d8eaff 100644 --- a/go/store/prolly/tree_merge_test.go +++ b/go/store/prolly/tree_merge_test.go @@ -27,9 +27,9 @@ import ( func Test3WayMapMerge(t *testing.T) { scales := []int{ - //10, - //100, - //1000, + 10, + 100, + 1000, 10000, } From 90d9f8693ac45903ad17115fdde2ed376d1d2467 Mon Sep 17 00:00:00 2001 From: James Cor Date: Wed, 9 Feb 2022 16:32:59 -0800 Subject: [PATCH 034/105] printing references somewhat working --- go/cmd/dolt/commands/log.go | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index f767afe340..e42d8e5d78 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -53,6 +53,8 @@ type logNode struct { commitMeta *doltdb.CommitMeta commitHash hash.Hash parentHashes []hash.Hash + branchNames []string + isHead bool } var logDocs = cli.CommandDocumentationContent{ @@ -158,6 +160,13 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o return 1 } + // TODO: are branches associate to commits somewhere already? + branches, _ := dEnv.DoltDB.GetBranchesWithHashes(ctx) + branchHashToName := map[hash.Hash][]string{} + for _, b := range branches { + branchHashToName[b.Hash] = append(branchHashToName[b.Hash], b.Ref.String()) + } + h, err := commit.HashOf() if err != nil { @@ -201,7 +210,7 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o return 1 } - commitsInfo = append(commitsInfo, logNode{meta, cmHash, pHashes}) + commitsInfo = append(commitsInfo, logNode{meta, cmHash, pHashes, branchHashToName[cmHash], cmHash == h}) } logToStdOut(opts, commitsInfo) @@ -299,7 +308,8 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d return err } - commitsInfo = append(commitsInfo, logNode{meta, prevHash, ph}) + // TODO: fill this with correct info + commitsInfo = append(commitsInfo, logNode{meta, prevHash, ph, []string{}, false}) numLines-- } @@ -329,9 +339,16 @@ func logCompact(opts logOpts, commits []logNode) { } } - pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33m%s \033[0m", chStr[:8]))) + // TODO: use short hash instead + pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33m%s\033[0m", chStr))) - // TODO: write refs here + // TODO: write refs and tags here + pager.Writer.Write([]byte("\033[33m (\033[0m")) + if comm.isHead { + pager.Writer.Write([]byte("\033[94mHEAD -> \033[0m")) + } + pager.Writer.Write([]byte(fmt.Sprintf("\033[92m%s\033[0m", strings.Join(comm.branchNames, ", ")))) + pager.Writer.Write([]byte("\033[33m) \033[0m")) formattedDesc := strings.Replace(comm.commitMeta.Description, "\n", " ", -1) + "\n" pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc))) From 61db59f08e7d6050c66f3b26857ef9a982725ca5 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Wed, 9 Feb 2022 16:44:45 -0800 Subject: [PATCH 035/105] removed collation field from val.Type --- go/store/val/codec.go | 21 ++++++--------------- go/store/val/tuple_builder.go | 12 ++++++------ 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/go/store/val/codec.go b/go/store/val/codec.go index 0bad18e595..c83f99db9e 100644 --- a/go/store/val/codec.go +++ b/go/store/val/codec.go @@ -25,7 +25,6 @@ import ( type Type struct { Enc Encoding - Coll Collation Nullable bool } @@ -49,12 +48,6 @@ const ( timestampSize ByteSize = 15 ) -type Collation uint16 - -const ( - ByteOrderCollation Collation = 0 -) - type Encoding uint8 // Constant Size Encodings @@ -308,12 +301,12 @@ func WriteTime(buf []byte, val time.Time) { copy(buf, m) } -func writeString(buf []byte, val string, coll Collation) { +func writeString(buf []byte, val string) { expectSize(buf, ByteSize(len(val))) copy(buf, val) } -func writeBytes(buf, val []byte, coll Collation) { +func writeBytes(buf, val []byte) { expectSize(buf, ByteSize(len(val))) copy(buf, val) } @@ -371,9 +364,9 @@ func compare(typ Type, left, right []byte) int { // todo(andy): temporary Decimal implementation fallthrough case StringEnc: - return compareString(ReadString(left), ReadString(right), typ.Coll) + return compareString(ReadString(left), ReadString(right)) case BytesEnc: - return compareBytes(readBytes(left), readBytes(right), typ.Coll) + return compareBytes(readBytes(left), readBytes(right)) default: panic("unknown encoding") } @@ -501,13 +494,11 @@ func compareTimestamp(l, r time.Time) int { } } -func compareString(l, r string, coll Collation) int { - // todo(andy): collations +func compareString(l, r string) int { return bytes.Compare([]byte(l), []byte(r)) } -func compareBytes(l, r []byte, coll Collation) int { - // todo(andy): collations +func compareBytes(l, r []byte) int { return bytes.Compare(l, r) } diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go index 18290c6bcb..46edcbbcf0 100644 --- a/go/store/val/tuple_builder.go +++ b/go/store/val/tuple_builder.go @@ -174,7 +174,7 @@ func (tb *TupleBuilder) PutSqlTime(i int, v string) { tb.Desc.expectEncoding(i, TimeEnc) sz := ByteSize(len(v)) tb.fields[i] = tb.buf[tb.pos : tb.pos+sz] - writeString(tb.fields[i], v, tb.Desc.Types[i].Coll) + writeString(tb.fields[i], v) tb.pos += sz } @@ -192,7 +192,7 @@ func (tb *TupleBuilder) PutDecimal(i int, v string) { // todo(andy): temporary implementation sz := ByteSize(len(v)) tb.fields[i] = tb.buf[tb.pos : tb.pos+sz] - writeString(tb.fields[i], v, tb.Desc.Types[i].Coll) + writeString(tb.fields[i], v) tb.pos += sz } @@ -201,7 +201,7 @@ func (tb *TupleBuilder) PutString(i int, v string) { tb.Desc.expectEncoding(i, StringEnc) sz := ByteSize(len(v)) tb.fields[i] = tb.buf[tb.pos : tb.pos+sz] - writeString(tb.fields[i], v, tb.Desc.Types[i].Coll) + writeString(tb.fields[i], v) tb.pos += sz } @@ -210,7 +210,7 @@ func (tb *TupleBuilder) PutBytes(i int, v []byte) { tb.Desc.expectEncoding(i, BytesEnc) sz := ByteSize(len(v)) tb.fields[i] = tb.buf[tb.pos : tb.pos+sz] - writeBytes(tb.fields[i], v, tb.Desc.Types[i].Coll) + writeBytes(tb.fields[i], v) tb.pos += sz } @@ -223,7 +223,7 @@ func (tb *TupleBuilder) PutJSON(i int, v interface{}) { } sz := ByteSize(len(buf)) tb.fields[i] = tb.buf[tb.pos : tb.pos+sz] - writeBytes(tb.fields[i], buf, tb.Desc.Types[i].Coll) + writeBytes(tb.fields[i], buf) tb.pos += sz } @@ -236,7 +236,7 @@ func (tb *TupleBuilder) PutRaw(i int, buf []byte) { } sz := ByteSize(len(buf)) tb.fields[i] = tb.buf[tb.pos : tb.pos+sz] - writeBytes(tb.fields[i], buf, tb.Desc.Types[i].Coll) + writeBytes(tb.fields[i], buf) tb.pos += sz } From 6d9024a85942b667df9d3adbda39088a3d920ab7 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Wed, 9 Feb 2022 16:47:46 -0800 Subject: [PATCH 036/105] removed (u)int48 encodings --- go/store/val/codec.go | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/go/store/val/codec.go b/go/store/val/codec.go index c83f99db9e..4d4dcd08ca 100644 --- a/go/store/val/codec.go +++ b/go/store/val/codec.go @@ -37,8 +37,6 @@ const ( uint16Size ByteSize = 2 int32Size ByteSize = 4 uint32Size ByteSize = 4 - int48Size ByteSize = 6 - uint48Size ByteSize = 6 int64Size ByteSize = 8 uint64Size ByteSize = 8 float32Size ByteSize = 4 @@ -57,8 +55,6 @@ const ( Uint8Enc Encoding = 2 Int16Enc Encoding = 3 Uint16Enc Encoding = 4 - // Int24Enc Encoding = 5 - // Uint24Enc Encoding = 6 Int32Enc Encoding = 7 Uint32Enc Encoding = 8 Int64Enc Encoding = 9 @@ -67,7 +63,6 @@ const ( Float64Enc Encoding = 12 // todo(andy): experimental encodings - // consolidate into one TimestampEnc Encoding = 14 DateEnc Encoding = 15 DatetimeEnc Encoding = 16 @@ -166,17 +161,6 @@ func ReadUint32(val []byte) uint32 { return binary.LittleEndian.Uint32(val) } -func ReadUint48(val []byte) (u uint64) { - expectSize(val, uint48Size) - var tmp [8]byte - // copy |val| to |tmp| - tmp[5], tmp[4] = val[5], val[4] - tmp[3], tmp[2] = val[3], val[2] - tmp[1], tmp[0] = val[1], val[0] - u = binary.LittleEndian.Uint64(tmp[:]) - return -} - func ReadInt64(val []byte) int64 { expectSize(val, int64Size) return int64(binary.LittleEndian.Uint64(val)) @@ -259,21 +243,6 @@ func WriteUint32(buf []byte, val uint32) { binary.LittleEndian.PutUint32(buf, val) } -func WriteUint48(buf []byte, u uint64) { - const maxUint48 = uint64(1<<48 - 1) - - expectSize(buf, uint48Size) - if u > maxUint48 { - panic("uint is greater than max uint48") - } - var tmp [8]byte - binary.LittleEndian.PutUint64(tmp[:], u) - // copy |tmp| to |buf| - buf[5], buf[4] = tmp[5], tmp[4] - buf[3], buf[2] = tmp[3], tmp[2] - buf[1], buf[0] = tmp[1], tmp[0] -} - func WriteInt64(buf []byte, val int64) { expectSize(buf, int64Size) binary.LittleEndian.PutUint64(buf, uint64(val)) From c1bd4e2469a6f406e5675aa7757c924b793bf188 Mon Sep 17 00:00:00 2001 From: JCOR11599 Date: Thu, 10 Feb 2022 00:56:21 +0000 Subject: [PATCH 037/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/cmd/dolt/commands/log.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index e42d8e5d78..47c00d4a71 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -53,8 +53,8 @@ type logNode struct { commitMeta *doltdb.CommitMeta commitHash hash.Hash parentHashes []hash.Hash - branchNames []string - isHead bool + branchNames []string + isHead bool } var logDocs = cli.CommandDocumentationContent{ From 5aa1e70451052669e080e2c25005ce3983c4ab14 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Wed, 9 Feb 2022 16:56:39 -0800 Subject: [PATCH 038/105] reordered codec.go to colocate read, right, and compare functions --- go/store/val/codec.go | 406 +++++++++++++++++++++--------------------- 1 file changed, 203 insertions(+), 203 deletions(-) diff --git a/go/store/val/codec.go b/go/store/val/codec.go index 4d4dcd08ca..a4ff20d4c6 100644 --- a/go/store/val/codec.go +++ b/go/store/val/codec.go @@ -50,11 +50,11 @@ type Encoding uint8 // Constant Size Encodings const ( - NullEnc Encoding = 0 - Int8Enc Encoding = 1 - Uint8Enc Encoding = 2 - Int16Enc Encoding = 3 - Uint16Enc Encoding = 4 + NullEnc Encoding = 0 + Int8Enc Encoding = 1 + Uint8Enc Encoding = 2 + Int16Enc Encoding = 3 + Uint16Enc Encoding = 4 Int32Enc Encoding = 7 Uint32Enc Encoding = 8 Int64Enc Encoding = 9 @@ -131,56 +131,227 @@ func ReadBool(val []byte) bool { expectSize(val, int8Size) return val[0] == 1 } + +func writeBool(buf []byte, val bool) { + expectSize(buf, 1) + if val { + buf[0] = byte(1) + } else { + buf[0] = byte(0) + } +} + +// false is less that true +func compareBool(l, r bool) int { + if l == r { + return 0 + } + if !l && r { + return -1 + } + return 1 +} + func ReadInt8(val []byte) int8 { expectSize(val, int8Size) return int8(val[0]) } +func WriteInt8(buf []byte, val int8) { + expectSize(buf, int8Size) + buf[0] = byte(val) +} + +func compareInt8(l, r int8) int { + if l == r { + return 0 + } else if l < r { + return -1 + } else { + return 1 + } +} + func ReadUint8(val []byte) uint8 { expectSize(val, uint8Size) return val[0] } +func WriteUint8(buf []byte, val uint8) { + expectSize(buf, uint8Size) + buf[0] = byte(val) +} + +func compareUint8(l, r uint8) int { + if l == r { + return 0 + } else if l < r { + return -1 + } else { + return 1 + } +} + func ReadInt16(val []byte) int16 { expectSize(val, int16Size) return int16(binary.LittleEndian.Uint16(val)) } +func WriteInt16(buf []byte, val int16) { + expectSize(buf, int16Size) + binary.LittleEndian.PutUint16(buf, uint16(val)) +} + +func compareInt16(l, r int16) int { + if l == r { + return 0 + } else if l < r { + return -1 + } else { + return 1 + } +} + func ReadUint16(val []byte) uint16 { expectSize(val, uint16Size) return binary.LittleEndian.Uint16(val) } +func WriteUint16(buf []byte, val uint16) { + expectSize(buf, uint16Size) + binary.LittleEndian.PutUint16(buf, val) +} + +func compareUint16(l, r uint16) int { + if l == r { + return 0 + } else if l < r { + return -1 + } else { + return 1 + } +} + func ReadInt32(val []byte) int32 { expectSize(val, int32Size) return int32(binary.LittleEndian.Uint32(val)) } +func WriteInt32(buf []byte, val int32) { + expectSize(buf, int32Size) + binary.LittleEndian.PutUint32(buf, uint32(val)) +} + +func compareInt32(l, r int32) int { + if l == r { + return 0 + } else if l < r { + return -1 + } else { + return 1 + } +} + func ReadUint32(val []byte) uint32 { expectSize(val, uint32Size) return binary.LittleEndian.Uint32(val) } +func WriteUint32(buf []byte, val uint32) { + expectSize(buf, uint32Size) + binary.LittleEndian.PutUint32(buf, val) +} + +func compareUint32(l, r uint32) int { + if l == r { + return 0 + } else if l < r { + return -1 + } else { + return 1 + } +} + func ReadInt64(val []byte) int64 { expectSize(val, int64Size) return int64(binary.LittleEndian.Uint64(val)) } +func WriteInt64(buf []byte, val int64) { + expectSize(buf, int64Size) + binary.LittleEndian.PutUint64(buf, uint64(val)) +} + +func compareInt64(l, r int64) int { + if l == r { + return 0 + } else if l < r { + return -1 + } else { + return 1 + } +} + func ReadUint64(val []byte) uint64 { expectSize(val, uint64Size) return binary.LittleEndian.Uint64(val) } +func WriteUint64(buf []byte, val uint64) { + expectSize(buf, uint64Size) + binary.LittleEndian.PutUint64(buf, val) +} + +func compareUint64(l, r uint64) int { + if l == r { + return 0 + } else if l < r { + return -1 + } else { + return 1 + } +} + func ReadFloat32(val []byte) float32 { expectSize(val, float32Size) return math.Float32frombits(ReadUint32(val)) } +func WriteFloat32(buf []byte, val float32) { + expectSize(buf, float32Size) + binary.LittleEndian.PutUint32(buf, math.Float32bits(val)) +} + +func compareFloat32(l, r float32) int { + if l == r { + return 0 + } else if l < r { + return -1 + } else { + return 1 + } +} + func ReadFloat64(val []byte) float64 { expectSize(val, float64Size) return math.Float64frombits(ReadUint64(val)) } +func WriteFloat64(buf []byte, val float64) { + expectSize(buf, float64Size) + binary.LittleEndian.PutUint64(buf, math.Float64bits(val)) +} + +func compareFloat64(l, r float64) int { + if l == r { + return 0 + } else if l < r { + return -1 + } else { + return 1 + } +} + func ReadDecimal(val []byte) decimal.Decimal { // todo(andy): temporary lossy implementation //return decimal.NewFromFloat(ReadFloat64(val)) @@ -195,74 +366,6 @@ func ReadTime(buf []byte) (t time.Time) { return t } -func ReadString(val []byte) string { - // todo(andy): fix allocation - return string(val) -} - -func readBytes(val []byte) []byte { - return val -} - -func writeBool(buf []byte, val bool) { - expectSize(buf, 1) - if val { - buf[0] = byte(1) - } else { - buf[0] = byte(0) - } -} - -func WriteInt8(buf []byte, val int8) { - expectSize(buf, int8Size) - buf[0] = byte(val) -} - -func WriteUint8(buf []byte, val uint8) { - expectSize(buf, uint8Size) - buf[0] = byte(val) -} - -func WriteInt16(buf []byte, val int16) { - expectSize(buf, int16Size) - binary.LittleEndian.PutUint16(buf, uint16(val)) -} - -func WriteUint16(buf []byte, val uint16) { - expectSize(buf, uint16Size) - binary.LittleEndian.PutUint16(buf, val) -} - -func WriteInt32(buf []byte, val int32) { - expectSize(buf, int32Size) - binary.LittleEndian.PutUint32(buf, uint32(val)) -} - -func WriteUint32(buf []byte, val uint32) { - expectSize(buf, uint32Size) - binary.LittleEndian.PutUint32(buf, val) -} - -func WriteInt64(buf []byte, val int64) { - expectSize(buf, int64Size) - binary.LittleEndian.PutUint64(buf, uint64(val)) -} - -func WriteUint64(buf []byte, val uint64) { - expectSize(buf, uint64Size) - binary.LittleEndian.PutUint64(buf, val) -} - -func WriteFloat32(buf []byte, val float32) { - expectSize(buf, float32Size) - binary.LittleEndian.PutUint32(buf, math.Float32bits(val)) -} - -func WriteFloat64(buf []byte, val float64) { - expectSize(buf, float64Size) - binary.LittleEndian.PutUint64(buf, math.Float64bits(val)) -} - func WriteTime(buf []byte, val time.Time) { expectSize(buf, timestampSize) // todo(andy): fix allocation here @@ -270,16 +373,43 @@ func WriteTime(buf []byte, val time.Time) { copy(buf, m) } +func compareTimestamp(l, r time.Time) int { + if l.Equal(r) { + return 0 + } else if l.Before(r) { + return -1 + } else { + return 1 + } +} + +func ReadString(val []byte) string { + // todo(andy): fix allocation + return string(val) +} + func writeString(buf []byte, val string) { expectSize(buf, ByteSize(len(val))) copy(buf, val) } +func compareString(l, r string) int { + return bytes.Compare([]byte(l), []byte(r)) +} + +func readBytes(val []byte) []byte { + return val +} + func writeBytes(buf, val []byte) { expectSize(buf, ByteSize(len(val))) copy(buf, val) } +func compareBytes(l, r []byte) int { + return bytes.Compare(l, r) +} + func expectSize(buf []byte, sz ByteSize) { if ByteSize(len(buf)) != sz { panic("byte slice is not of expected size") @@ -341,136 +471,6 @@ func compare(typ Type, left, right []byte) int { } } -// false is less that true -func compareBool(l, r bool) int { - if l == r { - return 0 - } - if !l && r { - return -1 - } - return 1 -} - -func compareInt8(l, r int8) int { - if l == r { - return 0 - } else if l < r { - return -1 - } else { - return 1 - } -} - -func compareUint8(l, r uint8) int { - if l == r { - return 0 - } else if l < r { - return -1 - } else { - return 1 - } -} - -func compareInt16(l, r int16) int { - if l == r { - return 0 - } else if l < r { - return -1 - } else { - return 1 - } -} - -func compareUint16(l, r uint16) int { - if l == r { - return 0 - } else if l < r { - return -1 - } else { - return 1 - } -} - -func compareInt32(l, r int32) int { - if l == r { - return 0 - } else if l < r { - return -1 - } else { - return 1 - } -} - -func compareUint32(l, r uint32) int { - if l == r { - return 0 - } else if l < r { - return -1 - } else { - return 1 - } -} - -func compareInt64(l, r int64) int { - if l == r { - return 0 - } else if l < r { - return -1 - } else { - return 1 - } -} - -func compareUint64(l, r uint64) int { - if l == r { - return 0 - } else if l < r { - return -1 - } else { - return 1 - } -} - -func compareFloat32(l, r float32) int { - if l == r { - return 0 - } else if l < r { - return -1 - } else { - return 1 - } -} - -func compareFloat64(l, r float64) int { - if l == r { - return 0 - } else if l < r { - return -1 - } else { - return 1 - } -} - -func compareTimestamp(l, r time.Time) int { - if l.Equal(r) { - return 0 - } - if l.Before(r) { - return -1 - } else { - return 1 - } -} - -func compareString(l, r string) int { - return bytes.Compare([]byte(l), []byte(r)) -} - -func compareBytes(l, r []byte) int { - return bytes.Compare(l, r) -} - // rawCmp is an array of indexes used to perform raw Tuple comparisons. // Under certain conditions, Tuple comparisons can be optimized by // directly comparing Tuples as byte slices, rather than accessing From 9a2dae47e6d2e2c36799da10db023c49f707079e Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Wed, 9 Feb 2022 17:01:07 -0800 Subject: [PATCH 039/105] removed raw comparison from TupleDescriptor --- go/store/val/codec.go | 80 +++----------------------------- go/store/val/tuple_builder.go | 2 +- go/store/val/tuple_descriptor.go | 17 +------ 3 files changed, 9 insertions(+), 90 deletions(-) diff --git a/go/store/val/codec.go b/go/store/val/codec.go index a4ff20d4c6..7099eb5228 100644 --- a/go/store/val/codec.go +++ b/go/store/val/codec.go @@ -19,8 +19,6 @@ import ( "encoding/binary" "math" "time" - - "github.com/shopspring/decimal" ) type Type struct { @@ -352,13 +350,7 @@ func compareFloat64(l, r float64) int { } } -func ReadDecimal(val []byte) decimal.Decimal { - // todo(andy): temporary lossy implementation - //return decimal.NewFromFloat(ReadFloat64(val)) - return decimal.NewFromFloat(ReadFloat64(val)) -} - -func ReadTime(buf []byte) (t time.Time) { +func ReadTimestamp(buf []byte) (t time.Time) { expectSize(buf, timestampSize) if err := t.UnmarshalBinary(buf); err != nil { panic(err) @@ -366,7 +358,7 @@ func ReadTime(buf []byte) (t time.Time) { return t } -func WriteTime(buf []byte, val time.Time) { +func WriteTimestamp(buf []byte, val time.Time) { expectSize(buf, timestampSize) // todo(andy): fix allocation here m, _ := val.MarshalBinary() @@ -410,12 +402,6 @@ func compareBytes(l, r []byte) int { return bytes.Compare(l, r) } -func expectSize(buf []byte, sz ByteSize) { - if ByteSize(len(buf)) != sz { - panic("byte slice is not of expected size") - } -} - func compare(typ Type, left, right []byte) int { // order NULLs last if left == nil { @@ -456,7 +442,7 @@ func compare(typ Type, left, right []byte) int { case YearEnc: return compareInt16(ReadInt16(left), ReadInt16(right)) case DateEnc, DatetimeEnc, TimestampEnc: - return compareTimestamp(ReadTime(left), ReadTime(right)) + return compareTimestamp(ReadTimestamp(left), ReadTimestamp(right)) case TimeEnc: panic("unimplemented") case DecimalEnc: @@ -471,62 +457,8 @@ func compare(typ Type, left, right []byte) int { } } -// rawCmp is an array of indexes used to perform raw Tuple comparisons. -// Under certain conditions, Tuple comparisons can be optimized by -// directly comparing Tuples as byte slices, rather than accessing -// and deserializing each field. -// If each of these conditions is met, raw comparisons can be used: -// (1) All fields in the Tuple must be non-nullable. -// (2) All fields in the Tuple must be of constant size -// (eg Ints, Uints, Floats, Time types, etc.) -// -type rawCmp []int - -var rawCmpLookup = map[Encoding]rawCmp{ - Int8Enc: {0}, - Uint8Enc: {0}, - Int16Enc: {1, 0}, - Uint16Enc: {1, 0}, - Int32Enc: {3, 2, 1, 0}, - Uint32Enc: {3, 2, 1, 0}, - Int64Enc: {7, 6, 5, 4, 3, 2, 1, 0}, - Uint64Enc: {7, 6, 5, 4, 3, 2, 1, 0}, -} - -func compareRaw(left, right Tuple, mapping rawCmp) int { - var l, r byte - for _, idx := range mapping { - l, r = left[idx], right[idx] - if l != r { - break - } +func expectSize(buf []byte, sz ByteSize) { + if ByteSize(len(buf)) != sz { + panic("byte slice is not of expected size") } - if l > r { - return 1 - } else if l < r { - return -1 - } - return 0 -} - -func maybeGetRawComparison(types ...Type) rawCmp { - var raw []int - offset := 0 - for _, typ := range types { - if typ.Nullable { - return nil - } - - mapping, ok := rawCmpLookup[typ.Enc] - if !ok { - return nil - } - - for i := range mapping { - mapping[i] += offset - } - raw = append(raw, mapping...) - offset += len(mapping) - } - return raw } diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go index 46edcbbcf0..183772279c 100644 --- a/go/store/val/tuple_builder.go +++ b/go/store/val/tuple_builder.go @@ -165,7 +165,7 @@ func (tb *TupleBuilder) PutFloat64(i int, v float64) { func (tb *TupleBuilder) PutTimestamp(i int, v time.Time) { tb.Desc.expectEncoding(i, DateEnc, DatetimeEnc, TimestampEnc) tb.fields[i] = tb.buf[tb.pos : tb.pos+timestampSize] - WriteTime(tb.fields[i], v) + WriteTimestamp(tb.fields[i], v) tb.pos += timestampSize } diff --git a/go/store/val/tuple_descriptor.go b/go/store/val/tuple_descriptor.go index 159b6195ea..0cbcdcc886 100644 --- a/go/store/val/tuple_descriptor.go +++ b/go/store/val/tuple_descriptor.go @@ -27,12 +27,6 @@ import ( type TupleDesc struct { Types []Type cmp TupleComparator - - // Under certain conditions, Tuple comparisons can be - // optimized by directly comparing Tuples as byte slices, - // rather than accessing and deserializing each field. - // See definition of rawCmp for more information. - raw rawCmp } type TupleComparator interface { @@ -71,7 +65,7 @@ func NewTupleDescriptorWithComparator(cmp TupleComparator, types ...Type) (td Tu td.Types = types td.cmp = cmp - td.raw = maybeGetRawComparison(types...) + return } @@ -81,13 +75,6 @@ func TupleDescriptorPrefix(td TupleDesc, count int) TupleDesc { // Compare returns the Comaparison of |left| and |right|. func (td TupleDesc) Compare(left, right Tuple) (cmp int) { - // todo(andy): compare raw is broken - //if td.raw != nil { - // return compareRaw(left, right, td.raw) - //} else { - // return td.cmp(left, right, td) - //} - return td.cmp.Compare(left, right, td) } @@ -240,7 +227,7 @@ func (td TupleDesc) GetTimestamp(i int, tup Tuple) (v time.Time, ok bool) { td.expectEncoding(i, TimestampEnc, DateEnc, DatetimeEnc, YearEnc) b := tup.GetField(i) if b != nil { - v, ok = ReadTime(b), true + v, ok = ReadTimestamp(b), true } return } From 553762ee149e7919186727da5751633b41e10001 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Wed, 9 Feb 2022 17:05:25 -0800 Subject: [PATCH 040/105] made codec functions pkg private --- go/store/val/codec.go | 78 ++++++++++++++++---------------- go/store/val/codec_test.go | 48 ++++++++++---------- go/store/val/offsets.go | 4 +- go/store/val/tuple.go | 4 +- go/store/val/tuple_builder.go | 24 +++++----- go/store/val/tuple_descriptor.go | 32 ++++++------- 6 files changed, 95 insertions(+), 95 deletions(-) diff --git a/go/store/val/codec.go b/go/store/val/codec.go index 7099eb5228..ec04d8cf89 100644 --- a/go/store/val/codec.go +++ b/go/store/val/codec.go @@ -125,7 +125,7 @@ func sizeFromType(t Type) (ByteSize, bool) { } } -func ReadBool(val []byte) bool { +func readBool(val []byte) bool { expectSize(val, int8Size) return val[0] == 1 } @@ -150,12 +150,12 @@ func compareBool(l, r bool) int { return 1 } -func ReadInt8(val []byte) int8 { +func readInt8(val []byte) int8 { expectSize(val, int8Size) return int8(val[0]) } -func WriteInt8(buf []byte, val int8) { +func writeInt8(buf []byte, val int8) { expectSize(buf, int8Size) buf[0] = byte(val) } @@ -170,12 +170,12 @@ func compareInt8(l, r int8) int { } } -func ReadUint8(val []byte) uint8 { +func readUint8(val []byte) uint8 { expectSize(val, uint8Size) return val[0] } -func WriteUint8(buf []byte, val uint8) { +func writeUint8(buf []byte, val uint8) { expectSize(buf, uint8Size) buf[0] = byte(val) } @@ -190,12 +190,12 @@ func compareUint8(l, r uint8) int { } } -func ReadInt16(val []byte) int16 { +func readInt16(val []byte) int16 { expectSize(val, int16Size) return int16(binary.LittleEndian.Uint16(val)) } -func WriteInt16(buf []byte, val int16) { +func writeInt16(buf []byte, val int16) { expectSize(buf, int16Size) binary.LittleEndian.PutUint16(buf, uint16(val)) } @@ -210,12 +210,12 @@ func compareInt16(l, r int16) int { } } -func ReadUint16(val []byte) uint16 { +func readUint16(val []byte) uint16 { expectSize(val, uint16Size) return binary.LittleEndian.Uint16(val) } -func WriteUint16(buf []byte, val uint16) { +func writeUint16(buf []byte, val uint16) { expectSize(buf, uint16Size) binary.LittleEndian.PutUint16(buf, val) } @@ -230,12 +230,12 @@ func compareUint16(l, r uint16) int { } } -func ReadInt32(val []byte) int32 { +func readInt32(val []byte) int32 { expectSize(val, int32Size) return int32(binary.LittleEndian.Uint32(val)) } -func WriteInt32(buf []byte, val int32) { +func writeInt32(buf []byte, val int32) { expectSize(buf, int32Size) binary.LittleEndian.PutUint32(buf, uint32(val)) } @@ -250,12 +250,12 @@ func compareInt32(l, r int32) int { } } -func ReadUint32(val []byte) uint32 { +func readUint32(val []byte) uint32 { expectSize(val, uint32Size) return binary.LittleEndian.Uint32(val) } -func WriteUint32(buf []byte, val uint32) { +func writeUint32(buf []byte, val uint32) { expectSize(buf, uint32Size) binary.LittleEndian.PutUint32(buf, val) } @@ -270,12 +270,12 @@ func compareUint32(l, r uint32) int { } } -func ReadInt64(val []byte) int64 { +func readInt64(val []byte) int64 { expectSize(val, int64Size) return int64(binary.LittleEndian.Uint64(val)) } -func WriteInt64(buf []byte, val int64) { +func writeInt64(buf []byte, val int64) { expectSize(buf, int64Size) binary.LittleEndian.PutUint64(buf, uint64(val)) } @@ -290,12 +290,12 @@ func compareInt64(l, r int64) int { } } -func ReadUint64(val []byte) uint64 { +func readUint64(val []byte) uint64 { expectSize(val, uint64Size) return binary.LittleEndian.Uint64(val) } -func WriteUint64(buf []byte, val uint64) { +func writeUint64(buf []byte, val uint64) { expectSize(buf, uint64Size) binary.LittleEndian.PutUint64(buf, val) } @@ -310,12 +310,12 @@ func compareUint64(l, r uint64) int { } } -func ReadFloat32(val []byte) float32 { +func readFloat32(val []byte) float32 { expectSize(val, float32Size) - return math.Float32frombits(ReadUint32(val)) + return math.Float32frombits(readUint32(val)) } -func WriteFloat32(buf []byte, val float32) { +func writeFloat32(buf []byte, val float32) { expectSize(buf, float32Size) binary.LittleEndian.PutUint32(buf, math.Float32bits(val)) } @@ -330,12 +330,12 @@ func compareFloat32(l, r float32) int { } } -func ReadFloat64(val []byte) float64 { +func readFloat64(val []byte) float64 { expectSize(val, float64Size) - return math.Float64frombits(ReadUint64(val)) + return math.Float64frombits(readUint64(val)) } -func WriteFloat64(buf []byte, val float64) { +func writeFloat64(buf []byte, val float64) { expectSize(buf, float64Size) binary.LittleEndian.PutUint64(buf, math.Float64bits(val)) } @@ -350,7 +350,7 @@ func compareFloat64(l, r float64) int { } } -func ReadTimestamp(buf []byte) (t time.Time) { +func readTimestamp(buf []byte) (t time.Time) { expectSize(buf, timestampSize) if err := t.UnmarshalBinary(buf); err != nil { panic(err) @@ -358,7 +358,7 @@ func ReadTimestamp(buf []byte) (t time.Time) { return t } -func WriteTimestamp(buf []byte, val time.Time) { +func writeTimestamp(buf []byte, val time.Time) { expectSize(buf, timestampSize) // todo(andy): fix allocation here m, _ := val.MarshalBinary() @@ -375,7 +375,7 @@ func compareTimestamp(l, r time.Time) int { } } -func ReadString(val []byte) string { +func readString(val []byte) string { // todo(andy): fix allocation return string(val) } @@ -420,36 +420,36 @@ func compare(typ Type, left, right []byte) int { switch typ.Enc { case Int8Enc: - return compareInt8(ReadInt8(left), ReadInt8(right)) + return compareInt8(readInt8(left), readInt8(right)) case Uint8Enc: - return compareUint8(ReadUint8(left), ReadUint8(right)) + return compareUint8(readUint8(left), readUint8(right)) case Int16Enc: - return compareInt16(ReadInt16(left), ReadInt16(right)) + return compareInt16(readInt16(left), readInt16(right)) case Uint16Enc: - return compareUint16(ReadUint16(left), ReadUint16(right)) + return compareUint16(readUint16(left), readUint16(right)) case Int32Enc: - return compareInt32(ReadInt32(left), ReadInt32(right)) + return compareInt32(readInt32(left), readInt32(right)) case Uint32Enc: - return compareUint32(ReadUint32(left), ReadUint32(right)) + return compareUint32(readUint32(left), readUint32(right)) case Int64Enc: - return compareInt64(ReadInt64(left), ReadInt64(right)) + return compareInt64(readInt64(left), readInt64(right)) case Uint64Enc: - return compareUint64(ReadUint64(left), ReadUint64(right)) + return compareUint64(readUint64(left), readUint64(right)) case Float32Enc: - return compareFloat32(ReadFloat32(left), ReadFloat32(right)) + return compareFloat32(readFloat32(left), readFloat32(right)) case Float64Enc: - return compareFloat64(ReadFloat64(left), ReadFloat64(right)) + return compareFloat64(readFloat64(left), readFloat64(right)) case YearEnc: - return compareInt16(ReadInt16(left), ReadInt16(right)) + return compareInt16(readInt16(left), readInt16(right)) case DateEnc, DatetimeEnc, TimestampEnc: - return compareTimestamp(ReadTimestamp(left), ReadTimestamp(right)) + return compareTimestamp(readTimestamp(left), readTimestamp(right)) case TimeEnc: panic("unimplemented") case DecimalEnc: // todo(andy): temporary Decimal implementation fallthrough case StringEnc: - return compareString(ReadString(left), ReadString(right)) + return compareString(readString(left), readString(right)) case BytesEnc: return compareBytes(readBytes(left), readBytes(right)) default: diff --git a/go/store/val/codec_test.go b/go/store/val/codec_test.go index 3c31558a54..6f8ef6201c 100644 --- a/go/store/val/codec_test.go +++ b/go/store/val/codec_test.go @@ -116,19 +116,19 @@ func TestCompare(t *testing.T) { func encInt(i int64) []byte { buf := make([]byte, 8) - WriteInt64(buf, i) + writeInt64(buf, i) return buf } func encUint(u uint64) []byte { buf := make([]byte, 8) - WriteUint64(buf, u) + writeUint64(buf, u) return buf } func encFloat(f float64) []byte { buf := make([]byte, 8) - WriteFloat64(buf, f) + writeFloat64(buf, f) return buf } @@ -156,7 +156,7 @@ func roundTripBools(t *testing.T) { integers := []bool{true, false} for _, exp := range integers { writeBool(buf, exp) - assert.Equal(t, exp, ReadBool(buf)) + assert.Equal(t, exp, readBool(buf)) zero(buf) } } @@ -166,8 +166,8 @@ func roundTripInts(t *testing.T) { integers := []int64{-1, 0, -1, math.MaxInt8, math.MinInt8} for _, value := range integers { exp := int8(value) - WriteInt8(buf, exp) - assert.Equal(t, exp, ReadInt8(buf)) + writeInt8(buf, exp) + assert.Equal(t, exp, readInt8(buf)) zero(buf) } @@ -175,8 +175,8 @@ func roundTripInts(t *testing.T) { integers = append(integers, math.MaxInt16, math.MaxInt16) for _, value := range integers { exp := int16(value) - WriteInt16(buf, exp) - assert.Equal(t, exp, ReadInt16(buf)) + writeInt16(buf, exp) + assert.Equal(t, exp, readInt16(buf)) zero(buf) } @@ -184,8 +184,8 @@ func roundTripInts(t *testing.T) { integers = append(integers, math.MaxInt32, math.MaxInt32) for _, value := range integers { exp := int32(value) - WriteInt32(buf, exp) - assert.Equal(t, exp, ReadInt32(buf)) + writeInt32(buf, exp) + assert.Equal(t, exp, readInt32(buf)) zero(buf) } @@ -193,8 +193,8 @@ func roundTripInts(t *testing.T) { integers = append(integers, math.MaxInt64, math.MaxInt64) for _, value := range integers { exp := int64(value) - WriteInt64(buf, exp) - assert.Equal(t, exp, ReadInt64(buf)) + writeInt64(buf, exp) + assert.Equal(t, exp, readInt64(buf)) zero(buf) } } @@ -204,8 +204,8 @@ func roundTripUints(t *testing.T) { uintegers := []uint64{0, 1, math.MaxUint8} for _, value := range uintegers { exp := uint8(value) - WriteUint8(buf, exp) - assert.Equal(t, exp, ReadUint8(buf)) + writeUint8(buf, exp) + assert.Equal(t, exp, readUint8(buf)) zero(buf) } @@ -213,8 +213,8 @@ func roundTripUints(t *testing.T) { uintegers = append(uintegers, math.MaxUint16) for _, value := range uintegers { exp := uint16(value) - WriteUint16(buf, exp) - assert.Equal(t, exp, ReadUint16(buf)) + writeUint16(buf, exp) + assert.Equal(t, exp, readUint16(buf)) zero(buf) } @@ -222,8 +222,8 @@ func roundTripUints(t *testing.T) { uintegers = append(uintegers, math.MaxUint32) for _, value := range uintegers { exp := uint32(value) - WriteUint32(buf, exp) - assert.Equal(t, exp, ReadUint32(buf)) + writeUint32(buf, exp) + assert.Equal(t, exp, readUint32(buf)) zero(buf) } @@ -231,8 +231,8 @@ func roundTripUints(t *testing.T) { uintegers = append(uintegers, math.MaxUint64) for _, value := range uintegers { exp := uint64(value) - WriteUint64(buf, exp) - assert.Equal(t, exp, ReadUint64(buf)) + writeUint64(buf, exp) + assert.Equal(t, exp, readUint64(buf)) zero(buf) } } @@ -242,8 +242,8 @@ func roundTripFloats(t *testing.T) { floats := []float64{-1, 0, 1, math.MaxFloat32, math.SmallestNonzeroFloat32} for _, value := range floats { exp := float32(value) - WriteFloat32(buf, exp) - assert.Equal(t, exp, ReadFloat32(buf)) + writeFloat32(buf, exp) + assert.Equal(t, exp, readFloat32(buf)) zero(buf) } @@ -251,8 +251,8 @@ func roundTripFloats(t *testing.T) { floats = append(floats, math.MaxFloat64, math.SmallestNonzeroFloat64) for _, value := range floats { exp := float64(value) - WriteFloat64(buf, exp) - assert.Equal(t, exp, ReadFloat64(buf)) + writeFloat64(buf, exp) + assert.Equal(t, exp, readFloat64(buf)) zero(buf) } } diff --git a/go/store/val/offsets.go b/go/store/val/offsets.go index d4ec83e88f..d467bb4ccf 100644 --- a/go/store/val/offsets.go +++ b/go/store/val/offsets.go @@ -54,7 +54,7 @@ func (os Offsets) getOffset(i int) ByteSize { return 0 } start := (i - 1) * 2 - off := ReadUint16(os[start : start+2]) + off := readUint16(os[start : start+2]) return ByteSize(off) } @@ -64,7 +64,7 @@ func (os Offsets) Put(i int, off ByteSize) { return } start := (i - 1) * 2 - WriteUint16(os[start:start+2], uint16(off)) + writeUint16(os[start:start+2], uint16(off)) } // isLastIndex returns true if |i| is the last index in |sl|. diff --git a/go/store/val/tuple.go b/go/store/val/tuple.go index b957ca0cab..f87a172d33 100644 --- a/go/store/val/tuple.go +++ b/go/store/val/tuple.go @@ -157,7 +157,7 @@ func (tup Tuple) Count() int { func (tup Tuple) fieldCount() int { sl := tup[tup.size()-numFieldsSize:] - return int(ReadUint16(sl)) + return int(readUint16(sl)) } func (tup Tuple) valueCount() int { @@ -192,5 +192,5 @@ func sizeOf(val []byte) ByteSize { func writeFieldCount(tup Tuple, count int) { sl := tup[len(tup)-int(numFieldsSize):] - WriteUint16(sl, uint16(count)) + writeUint16(sl, uint16(count)) } diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go index 183772279c..07fb0b4b2e 100644 --- a/go/store/val/tuple_builder.go +++ b/go/store/val/tuple_builder.go @@ -86,7 +86,7 @@ func (tb *TupleBuilder) PutBool(i int, v bool) { func (tb *TupleBuilder) PutInt8(i int, v int8) { tb.Desc.expectEncoding(i, Int8Enc) tb.fields[i] = tb.buf[tb.pos : tb.pos+int8Size] - WriteInt8(tb.fields[i], v) + writeInt8(tb.fields[i], v) tb.pos += int8Size } @@ -94,7 +94,7 @@ func (tb *TupleBuilder) PutInt8(i int, v int8) { func (tb *TupleBuilder) PutUint8(i int, v uint8) { tb.Desc.expectEncoding(i, Uint8Enc) tb.fields[i] = tb.buf[tb.pos : tb.pos+uint8Size] - WriteUint8(tb.fields[i], v) + writeUint8(tb.fields[i], v) tb.pos += uint8Size } @@ -102,7 +102,7 @@ func (tb *TupleBuilder) PutUint8(i int, v uint8) { func (tb *TupleBuilder) PutInt16(i int, v int16) { tb.Desc.expectEncoding(i, Int16Enc) tb.fields[i] = tb.buf[tb.pos : tb.pos+int16Size] - WriteInt16(tb.fields[i], v) + writeInt16(tb.fields[i], v) tb.pos += int16Size } @@ -110,7 +110,7 @@ func (tb *TupleBuilder) PutInt16(i int, v int16) { func (tb *TupleBuilder) PutUint16(i int, v uint16) { tb.Desc.expectEncoding(i, Uint16Enc) tb.fields[i] = tb.buf[tb.pos : tb.pos+uint16Size] - WriteUint16(tb.fields[i], v) + writeUint16(tb.fields[i], v) tb.pos += uint16Size } @@ -118,7 +118,7 @@ func (tb *TupleBuilder) PutUint16(i int, v uint16) { func (tb *TupleBuilder) PutInt32(i int, v int32) { tb.Desc.expectEncoding(i, Int32Enc) tb.fields[i] = tb.buf[tb.pos : tb.pos+int32Size] - WriteInt32(tb.fields[i], v) + writeInt32(tb.fields[i], v) tb.pos += int32Size } @@ -126,7 +126,7 @@ func (tb *TupleBuilder) PutInt32(i int, v int32) { func (tb *TupleBuilder) PutUint32(i int, v uint32) { tb.Desc.expectEncoding(i, Uint32Enc) tb.fields[i] = tb.buf[tb.pos : tb.pos+uint32Size] - WriteUint32(tb.fields[i], v) + writeUint32(tb.fields[i], v) tb.pos += uint32Size } @@ -134,7 +134,7 @@ func (tb *TupleBuilder) PutUint32(i int, v uint32) { func (tb *TupleBuilder) PutInt64(i int, v int64) { tb.Desc.expectEncoding(i, Int64Enc) tb.fields[i] = tb.buf[tb.pos : tb.pos+int64Size] - WriteInt64(tb.fields[i], v) + writeInt64(tb.fields[i], v) tb.pos += int64Size } @@ -142,7 +142,7 @@ func (tb *TupleBuilder) PutInt64(i int, v int64) { func (tb *TupleBuilder) PutUint64(i int, v uint64) { tb.Desc.expectEncoding(i, Uint64Enc) tb.fields[i] = tb.buf[tb.pos : tb.pos+uint64Size] - WriteUint64(tb.fields[i], v) + writeUint64(tb.fields[i], v) tb.pos += uint64Size } @@ -150,7 +150,7 @@ func (tb *TupleBuilder) PutUint64(i int, v uint64) { func (tb *TupleBuilder) PutFloat32(i int, v float32) { tb.Desc.expectEncoding(i, Float32Enc) tb.fields[i] = tb.buf[tb.pos : tb.pos+float32Size] - WriteFloat32(tb.fields[i], v) + writeFloat32(tb.fields[i], v) tb.pos += float32Size } @@ -158,14 +158,14 @@ func (tb *TupleBuilder) PutFloat32(i int, v float32) { func (tb *TupleBuilder) PutFloat64(i int, v float64) { tb.Desc.expectEncoding(i, Float64Enc) tb.fields[i] = tb.buf[tb.pos : tb.pos+float64Size] - WriteFloat64(tb.fields[i], v) + writeFloat64(tb.fields[i], v) tb.pos += float64Size } func (tb *TupleBuilder) PutTimestamp(i int, v time.Time) { tb.Desc.expectEncoding(i, DateEnc, DatetimeEnc, TimestampEnc) tb.fields[i] = tb.buf[tb.pos : tb.pos+timestampSize] - WriteTimestamp(tb.fields[i], v) + writeTimestamp(tb.fields[i], v) tb.pos += timestampSize } @@ -183,7 +183,7 @@ func (tb *TupleBuilder) PutYear(i int, v int16) { // todo(andy): yearSize, etc? tb.Desc.expectEncoding(i, YearEnc) tb.fields[i] = tb.buf[tb.pos : tb.pos+int16Size] - WriteInt16(tb.fields[i], v) + writeInt16(tb.fields[i], v) tb.pos += int16Size } diff --git a/go/store/val/tuple_descriptor.go b/go/store/val/tuple_descriptor.go index 0cbcdcc886..470c7aaf5d 100644 --- a/go/store/val/tuple_descriptor.go +++ b/go/store/val/tuple_descriptor.go @@ -95,7 +95,7 @@ func (td TupleDesc) GetBool(i int, tup Tuple) (v bool, ok bool) { td.expectEncoding(i, Int8Enc) b := tup.GetField(i) if b != nil { - v, ok = ReadBool(b), true + v, ok = readBool(b), true } return } @@ -106,7 +106,7 @@ func (td TupleDesc) GetInt8(i int, tup Tuple) (v int8, ok bool) { td.expectEncoding(i, Int8Enc) b := tup.GetField(i) if b != nil { - v, ok = ReadInt8(b), true + v, ok = readInt8(b), true } return } @@ -117,7 +117,7 @@ func (td TupleDesc) GetUint8(i int, tup Tuple) (v uint8, ok bool) { td.expectEncoding(i, Uint8Enc) b := tup.GetField(i) if b != nil { - v, ok = ReadUint8(b), true + v, ok = readUint8(b), true } return } @@ -128,7 +128,7 @@ func (td TupleDesc) GetInt16(i int, tup Tuple) (v int16, ok bool) { td.expectEncoding(i, Int16Enc) b := tup.GetField(i) if b != nil { - v, ok = ReadInt16(b), true + v, ok = readInt16(b), true } return } @@ -139,7 +139,7 @@ func (td TupleDesc) GetUint16(i int, tup Tuple) (v uint16, ok bool) { td.expectEncoding(i, Uint16Enc) b := tup.GetField(i) if b != nil { - v, ok = ReadUint16(b), true + v, ok = readUint16(b), true } return } @@ -150,7 +150,7 @@ func (td TupleDesc) GetInt32(i int, tup Tuple) (v int32, ok bool) { td.expectEncoding(i, Int32Enc) b := tup.GetField(i) if b != nil { - v, ok = ReadInt32(b), true + v, ok = readInt32(b), true } return } @@ -161,7 +161,7 @@ func (td TupleDesc) GetUint32(i int, tup Tuple) (v uint32, ok bool) { td.expectEncoding(i, Uint32Enc) b := tup.GetField(i) if b != nil { - v, ok = ReadUint32(b), true + v, ok = readUint32(b), true } return } @@ -172,7 +172,7 @@ func (td TupleDesc) GetInt64(i int, tup Tuple) (v int64, ok bool) { td.expectEncoding(i, Int64Enc) b := tup.GetField(i) if b != nil { - v, ok = ReadInt64(b), true + v, ok = readInt64(b), true } return } @@ -183,7 +183,7 @@ func (td TupleDesc) GetUint64(i int, tup Tuple) (v uint64, ok bool) { td.expectEncoding(i, Uint64Enc) b := tup.GetField(i) if b != nil { - v, ok = ReadUint64(b), true + v, ok = readUint64(b), true } return } @@ -194,7 +194,7 @@ func (td TupleDesc) GetFloat32(i int, tup Tuple) (v float32, ok bool) { td.expectEncoding(i, Float32Enc) b := tup.GetField(i) if b != nil { - v, ok = ReadFloat32(b), true + v, ok = readFloat32(b), true } return } @@ -205,7 +205,7 @@ func (td TupleDesc) GetFloat64(i int, tup Tuple) (v float64, ok bool) { td.expectEncoding(i, Float64Enc) b := tup.GetField(i) if b != nil { - v, ok = ReadFloat64(b), true + v, ok = readFloat64(b), true } return } @@ -216,7 +216,7 @@ func (td TupleDesc) GetDecimal(i int, tup Tuple) (v string, ok bool) { td.expectEncoding(i, DecimalEnc) b := tup.GetField(i) if b != nil { - v, ok = ReadString(b), true + v, ok = readString(b), true } return } @@ -227,7 +227,7 @@ func (td TupleDesc) GetTimestamp(i int, tup Tuple) (v time.Time, ok bool) { td.expectEncoding(i, TimestampEnc, DateEnc, DatetimeEnc, YearEnc) b := tup.GetField(i) if b != nil { - v, ok = ReadTimestamp(b), true + v, ok = readTimestamp(b), true } return } @@ -238,7 +238,7 @@ func (td TupleDesc) GetSqlTime(i int, tup Tuple) (v string, ok bool) { td.expectEncoding(i, TimeEnc) b := tup.GetField(i) if b != nil { - v, ok = ReadString(b), true + v, ok = readString(b), true } return } @@ -249,7 +249,7 @@ func (td TupleDesc) GetYear(i int, tup Tuple) (v int16, ok bool) { td.expectEncoding(i, YearEnc) b := tup.GetField(i) if b != nil { - v, ok = ReadInt16(b), true + v, ok = readInt16(b), true } return } @@ -260,7 +260,7 @@ func (td TupleDesc) GetString(i int, tup Tuple) (v string, ok bool) { td.expectEncoding(i, StringEnc) b := tup.GetField(i) if b != nil { - v = ReadString(b) + v = readString(b) ok = true } return From d62153719104ea44c025121916c4cc677918323f Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Wed, 9 Feb 2022 17:32:00 -0800 Subject: [PATCH 041/105] refactored offsets array to slicedBuffer --- go/store/val/offsets.go | 71 ++++++++++++++++++++++------------------- go/store/val/tuple.go | 27 ++++++---------- 2 files changed, 47 insertions(+), 51 deletions(-) diff --git a/go/store/val/offsets.go b/go/store/val/offsets.go index d467bb4ccf..bd8cf8272d 100644 --- a/go/store/val/offsets.go +++ b/go/store/val/offsets.go @@ -14,42 +14,52 @@ package val -// todo(andy): more ergonomic offsets -// type SlicedBuffer struct { -// buf []byte -// offs []uint16 -// } +type slicedBuffer struct { + buf []byte + offs offsets +} -type Offsets []byte +func slicedTupleBuffer(tup Tuple) slicedBuffer { + mask := tup.mask() + offStop := tup.size() - numFieldsSize - mask.size() + bufStop := offStop - offsetsSize(mask.count()) -// OffsetsSize returns the number of bytes needed to + return slicedBuffer{ + buf: tup[:bufStop], + offs: offsets(tup[bufStop:offStop]), + } +} + +// GetBounds returns the ith offset. |last| is the byte position +// of the _end_ of the last element. +func (sb slicedBuffer) getBounds(i int) (start, stop ByteSize) { + start = sb.offs.getOffset(i) + if sb.isLastIndex(i) { + stop = ByteSize(len(sb.buf)) + } else { + stop = sb.offs.getOffset(i + 1) + } + return +} + +// isLastIndex returns true if |i| is the last index in |sl|. +func (sb slicedBuffer) isLastIndex(i int) bool { + return len(sb.offs) == i*2 +} + +type offsets []byte + +// offsetsSize returns the number of bytes needed to // store |fieldCount| offsets. -func OffsetsSize(count int) ByteSize { +func offsetsSize(count int) ByteSize { if count == 0 { return 0 } return ByteSize((count - 1) * 2) } -// Count returns the number of offsets stored in |sl|. -func (os Offsets) Count() int { - return (len(os) / 2) + 1 -} - -// GetBounds returns the ith offset. |last| is the byte position -// of the _end_ of the last element. -func (os Offsets) GetBounds(i int, last ByteSize) (start, stop ByteSize) { - start = os.getOffset(i) - if os.isLastIndex(i) { - stop = last - } else { - stop = os.getOffset(i + 1) - } - return -} - // getOffset gets the byte position of the _start_ of element |i|. -func (os Offsets) getOffset(i int) ByteSize { +func (os offsets) getOffset(i int) ByteSize { if i == 0 { return 0 } @@ -58,16 +68,11 @@ func (os Offsets) getOffset(i int) ByteSize { return ByteSize(off) } -// Put writes offset |pos| at index |i|. -func (os Offsets) Put(i int, off ByteSize) { +// putOffset writes offset |pos| at index |i|. +func (os offsets) putOffset(i int, off ByteSize) { if i == 0 { return } start := (i - 1) * 2 writeUint16(os[start:start+2], uint16(off)) } - -// isLastIndex returns true if |i| is the last index in |sl|. -func (os Offsets) isLastIndex(i int) bool { - return len(os) == i*2 -} diff --git a/go/store/val/tuple.go b/go/store/val/tuple.go index f87a172d33..61d3208b89 100644 --- a/go/store/val/tuple.go +++ b/go/store/val/tuple.go @@ -30,7 +30,7 @@ const ( // Tuples are byte slices containing field values and a footer. Tuples only // contain Values for non-NULL Fields. Value i contains the data for ith non- // NULL Field. Values are packed contiguously from the front of the Tuple. The -// footer contains offsets, a member mask, and a field count. Offsets enable +// footer contains offsets, a member mask, and a field count. offsets enable // random access to Values. The member mask enables NULL-compaction for Values. // // Tuples read and write Values as byte slices. (De)serialization is delegated @@ -42,10 +42,10 @@ const ( // // Tuple: // +---------+---------+-----+---------+---------+-------------+-------------+ -// | Value 0 | Value 1 | ... | Value K | Offsets | Member Mask | Field Count | +// | Value 0 | Value 1 | ... | Value K | offsets | Member Mask | Field Count | // +---------+---------+-----+---------+---------+-------------+-------------+ // -// Offsets: +// offsets: // The offset array contains a uint16 for each non-NULL field after field 0. // Offset i encodes the distance to the ith Value from the front of the Tuple. // The size of the offset array is 2*(K-1) bytes, where K is the number of @@ -91,7 +91,7 @@ func NewTuple(pool pool.BuffPool, values ...[]byte) Tuple { panic("tuple data size exceeds maximum") } - tup, offs, mask := makeTuple(pool, pos, count, len(values)) + tup, offs, mask := allocateTuple(pool, pos, count, len(values)) count = 0 pos = ByteSize(0) @@ -100,7 +100,7 @@ func NewTuple(pool pool.BuffPool, values ...[]byte) Tuple { continue } mask.set(i) - offs.Put(count, pos) + offs.putOffset(count, pos) count++ copy(tup[pos:pos+sizeOf(v)], v) @@ -116,15 +116,15 @@ func CloneTuple(pool pool.BuffPool, tup Tuple) Tuple { return buf } -func makeTuple(pool pool.BuffPool, bufSz ByteSize, values, fields int) (tup Tuple, offs Offsets, ms memberMask) { - offSz := OffsetsSize(values) +func allocateTuple(pool pool.BuffPool, bufSz ByteSize, values, fields int) (tup Tuple, offs offsets, ms memberMask) { + offSz := offsetsSize(values) maskSz := maskSize(fields) countSz := numFieldsSize tup = pool.Get(uint64(bufSz + offSz + maskSz + countSz)) writeFieldCount(tup, fields) - offs = Offsets(tup[bufSz : bufSz+offSz]) + offs = offsets(tup[bufSz : bufSz+offSz]) ms = memberMask(tup[bufSz+offSz : bufSz+offSz+maskSz]) return @@ -141,8 +141,7 @@ func (tup Tuple) GetField(i int) []byte { // index to compensate for NULL fields i = tup.fieldToValue(i) - offs, valStop := tup.offsets() - start, stop := offs.GetBounds(i, valStop) + start, stop := slicedTupleBuffer(tup).getBounds(i) return tup[start:stop] } @@ -170,14 +169,6 @@ func (tup Tuple) mask() memberMask { return memberMask(tup[start:stop]) } -func (tup Tuple) offsets() (offs Offsets, valStop ByteSize) { - mask := tup.mask() - offStop := tup.size() - numFieldsSize - mask.size() - valStop = offStop - OffsetsSize(mask.count()) - offs = Offsets(tup[valStop:offStop]) - return -} - func (tup Tuple) fieldToValue(i int) int { return tup.mask().countPrefix(i) - 1 } From 521d4f27283dadea5c37007abab12dd561ebf58e Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Wed, 9 Feb 2022 19:02:41 -0800 Subject: [PATCH 042/105] change tableIndex interface to return errors --- go/store/nbs/block_store_test.go | 4 +- go/store/nbs/chunk_source_adapter.go | 8 +- go/store/nbs/conjoiner_test.go | 4 +- go/store/nbs/mmap_table_reader.go | 8 +- go/store/nbs/persisting_chunk_source.go | 8 +- go/store/nbs/store.go | 10 +- go/store/nbs/table.go | 4 +- go/store/nbs/table_persister.go | 5 +- go/store/nbs/table_reader.go | 145 +++++++++++++++--------- go/store/nbs/table_set.go | 23 +++- go/store/nbs/util.go | 5 +- 11 files changed, 151 insertions(+), 73 deletions(-) diff --git a/go/store/nbs/block_store_test.go b/go/store/nbs/block_store_test.go index 734f2adb45..727e7db2eb 100644 --- a/go/store/nbs/block_store_test.go +++ b/go/store/nbs/block_store_test.go @@ -418,7 +418,9 @@ func TestBlockStoreConjoinOnCommit(t *testing.T) { assertContainAll := func(t *testing.T, store chunks.ChunkStore, srcs ...chunkSource) { rdrs := make(chunkReaderGroup, len(srcs)) for i, src := range srcs { - rdrs[i] = src.Clone() + c, err := src.Clone() + require.NoError(t, err) + rdrs[i] = c } chunkChan := make(chan extractRecord, mustUint32(rdrs.count())) err := rdrs.extract(context.Background(), chunkChan) diff --git a/go/store/nbs/chunk_source_adapter.go b/go/store/nbs/chunk_source_adapter.go index 52469d0f4f..b291f19022 100644 --- a/go/store/nbs/chunk_source_adapter.go +++ b/go/store/nbs/chunk_source_adapter.go @@ -49,6 +49,10 @@ func (csa chunkSourceAdapter) Close() error { return csa.tableReader.Close() } -func (csa chunkSourceAdapter) Clone() chunkSource { - return &chunkSourceAdapter{csa.tableReader.Clone(), csa.h} +func (csa chunkSourceAdapter) Clone() (chunkSource, error) { + tr, err := csa.tableReader.Clone() + if err != nil { + return &chunkSourceAdapter{}, err + } + return &chunkSourceAdapter{tr, csa.h}, nil } diff --git a/go/store/nbs/conjoiner_test.go b/go/store/nbs/conjoiner_test.go index 859ccce8bf..19101836d1 100644 --- a/go/store/nbs/conjoiner_test.go +++ b/go/store/nbs/conjoiner_test.go @@ -64,7 +64,9 @@ func makeTestSrcs(t *testing.T, tableSizes []uint32, p tablePersister) (srcs chu } cs, err := p.Persist(context.Background(), mt, nil, &Stats{}) require.NoError(t, err) - srcs = append(srcs, cs.Clone()) + c, err := cs.Clone() + require.NoError(t, err) + srcs = append(srcs, c) } return } diff --git a/go/store/nbs/mmap_table_reader.go b/go/store/nbs/mmap_table_reader.go index fe14fe443c..d23dbe6a2b 100644 --- a/go/store/nbs/mmap_table_reader.go +++ b/go/store/nbs/mmap_table_reader.go @@ -167,8 +167,12 @@ func (mmtr *mmapTableReader) Close() error { return mmtr.tableReader.Close() } -func (mmtr *mmapTableReader) Clone() chunkSource { - return &mmapTableReader{mmtr.tableReader.Clone(), mmtr.fc, mmtr.h} +func (mmtr *mmapTableReader) Clone() (chunkSource, error) { + tr, err := mmtr.tableReader.Clone() + if err != nil { + return &mmapTableReader{}, err + } + return &mmapTableReader{tr, mmtr.fc, mmtr.h}, nil } type cacheReaderAt struct { diff --git a/go/store/nbs/persisting_chunk_source.go b/go/store/nbs/persisting_chunk_source.go index 09335ca9cd..4cf9d0b267 100644 --- a/go/store/nbs/persisting_chunk_source.go +++ b/go/store/nbs/persisting_chunk_source.go @@ -100,9 +100,9 @@ func (ccs *persistingChunkSource) Close() error { return nil } -func (ccs *persistingChunkSource) Clone() chunkSource { +func (ccs *persistingChunkSource) Clone() (chunkSource, error) { // persistingChunkSource does not own |cs| or |mt|. No need to Clone. - return ccs + return ccs, nil } func (ccs *persistingChunkSource) has(h addr) (bool, error) { @@ -308,6 +308,6 @@ func (ecs emptyChunkSource) Close() error { return nil } -func (ecs emptyChunkSource) Clone() chunkSource { - return ecs +func (ecs emptyChunkSource) Clone() (chunkSource, error) { + return ecs, nil } diff --git a/go/store/nbs/store.go b/go/store/nbs/store.go index 2e0f79d677..71f69f8d27 100644 --- a/go/store/nbs/store.go +++ b/go/store/nbs/store.go @@ -117,7 +117,10 @@ func (nbs *NomsBlockStore) GetChunkLocations(hashes hash.HashSet) (map[hash.Hash for _, cs := range css { switch tr := cs.(type) { case *mmapTableReader: - offsetRecSlice, _ := tr.findOffsets(gr) + offsetRecSlice, _, err := tr.findOffsets(gr) + if err != nil { + return err + } if len(offsetRecSlice) > 0 { y, ok := ranges[hash.Hash(tr.h)] @@ -154,7 +157,10 @@ func (nbs *NomsBlockStore) GetChunkLocations(hashes hash.HashSet) (map[hash.Hash var foundHashes []hash.Hash for h := range hashes { a := addr(h) - e, ok := tableIndex.Lookup(&a) + e, ok, err := tableIndex.Lookup(&a) + if err != nil { + return err + } if ok { foundHashes = append(foundHashes, h) y[h] = Range{Offset: e.Offset(), Length: e.Length()} diff --git a/go/store/nbs/table.go b/go/store/nbs/table.go index c909889874..db9db54dea 100644 --- a/go/store/nbs/table.go +++ b/go/store/nbs/table.go @@ -238,7 +238,7 @@ type chunkReader interface { } type chunkReadPlanner interface { - findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool) + findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool, err error) getManyAtOffsets( ctx context.Context, eg *errgroup.Group, @@ -269,7 +269,7 @@ type chunkSource interface { // cannot be |Close|d more than once, so if a |chunkSource| is being // retained in two objects with independent life-cycle, it should be // |Clone|d first. - Clone() chunkSource + Clone() (chunkSource, error) } type chunkSources []chunkSource diff --git a/go/store/nbs/table_persister.go b/go/store/nbs/table_persister.go index 20f1cabc3e..467cb3c21f 100644 --- a/go/store/nbs/table_persister.go +++ b/go/store/nbs/table_persister.go @@ -294,7 +294,10 @@ func planConjoin(sources chunkSources, stats *Stats) (plan compactionPlan, err e // Build up the index one entry at a time. var a addr for i := 0; i < len(ordinals); i++ { - e := index.IndexEntry(uint32(i), &a) + e, err := index.IndexEntry(uint32(i), &a) + if err != nil { + return compactionPlan{}, err + } li := lengthsPos + lengthSize*uint64(ordinals[i]) si := suffixesPos + addrSuffixSize*uint64(ordinals[i]) binary.BigEndian.PutUint32(plan.mergedIndex[li:], e.Length()) diff --git a/go/store/nbs/table_reader.go b/go/store/nbs/table_reader.go index d46b651e5f..7be216a67c 100644 --- a/go/store/nbs/table_reader.go +++ b/go/store/nbs/table_reader.go @@ -199,12 +199,12 @@ func (i mmapTableIndex) Close() error { return nil } -func (i mmapTableIndex) Clone() tableIndex { +func (i mmapTableIndex) Clone() (tableIndex, error) { cnt := atomic.AddInt32(i.refCnt, 1) if cnt == 1 { panic("Clone() called after last Close(). This index is no longer valid.") } - return i + return i, nil } func (i mmapTableIndex) prefixIdx(prefix uint64) (idx uint32) { @@ -223,32 +223,32 @@ func (i mmapTableIndex) prefixIdx(prefix uint64) (idx uint32) { return } -func (i mmapTableIndex) Lookup(h *addr) (indexEntry, bool) { +func (i mmapTableIndex) Lookup(h *addr) (indexEntry, bool, error) { prefix := binary.BigEndian.Uint64(h[:]) for idx := i.prefixIdx(prefix); idx < i.chunkCount && i.prefixes[idx] == prefix; idx++ { mi := idx * mmapIndexEntrySize e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize]) if bytes.Equal(e.suffix(), h[addrPrefixSize:]) { - return e, true + return e, true, nil } } - return mmapIndexEntry{}, false + return mmapIndexEntry{}, false, nil } -func (i mmapTableIndex) EntrySuffixMatches(idx uint32, h *addr) bool { +func (i mmapTableIndex) EntrySuffixMatches(idx uint32, h *addr) (bool, error) { mi := idx * mmapIndexEntrySize e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize]) - return bytes.Equal(e.suffix(), h[addrPrefixSize:]) + return bytes.Equal(e.suffix(), h[addrPrefixSize:]), nil } -func (i mmapTableIndex) IndexEntry(idx uint32, a *addr) indexEntry { +func (i mmapTableIndex) IndexEntry(idx uint32, a *addr) (indexEntry, error) { mi := idx * mmapIndexEntrySize e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize]) if a != nil { binary.BigEndian.PutUint64(a[:], i.prefixes[idx]) copy(a[addrPrefixSize:], e.suffix()) } - return e + return e, nil } type mmapIndexEntry []byte @@ -332,14 +332,14 @@ type tableIndex interface { // EntrySuffixMatches returns true if the entry at index |idx| matches // the suffix of the address |h|. Used by |Lookup| after finding // matching indexes based on |Prefixes|. - EntrySuffixMatches(idx uint32, h *addr) bool + EntrySuffixMatches(idx uint32, h *addr) (bool, error) // IndexEntry returns the |indexEntry| at |idx|. Optionally puts the // full address of that entry in |a| if |a| is not |nil|. - IndexEntry(idx uint32, a *addr) indexEntry + IndexEntry(idx uint32, a *addr) (indexEntry, error) // Lookup returns an |indexEntry| for the chunk corresponding to the // provided address |h|. Second returns is |true| if an entry exists // and |false| otherwise. - Lookup(h *addr) (indexEntry, bool) + Lookup(h *addr) (indexEntry, bool, error) // Ordinals returns a slice of indexes which maps the |i|th chunk in // the indexed file to its corresponding entry in index. The |i|th // entry in the result is the |i|th chunk in the indexed file, and its @@ -359,7 +359,7 @@ type tableIndex interface { // Clone returns a |tableIndex| with the same contents which can be // |Close|d independently. - Clone() tableIndex + Clone() (tableIndex, error) } var _ tableIndex = mmapTableIndex{} @@ -371,28 +371,37 @@ func parseTableIndex(buff []byte) (onHeapTableIndex, error) { return ReadTableIndex(bytes.NewReader(buff)) } -func ReadTableIndex(rd io.ReadSeeker) (onHeapTableIndex, error) { +func ReadTableFooter(rd io.ReadSeeker) (chunkCount uint32, totalUncompressedData uint64, err error) { footerSize := int64(magicNumberSize + uint64Size + uint32Size) - _, err := rd.Seek(-footerSize, io.SeekEnd) + _, err = rd.Seek(-footerSize, io.SeekEnd) if err != nil { - return onHeapTableIndex{}, err + return 0, 0, err } footer, err := iohelp.ReadNBytes(rd, int(footerSize)) if err != nil { - return onHeapTableIndex{}, err + return 0, 0, err } if string(footer[uint32Size+uint64Size:]) != magicNumber { - return onHeapTableIndex{}, ErrInvalidTableFile + return 0, 0, ErrInvalidTableFile } - chunkCount := binary.BigEndian.Uint32(footer) - totalUncompressedData := binary.BigEndian.Uint64(footer[uint32Size:]) + chunkCount = binary.BigEndian.Uint32(footer) + totalUncompressedData = binary.BigEndian.Uint64(footer[uint32Size:]) + + return +} + +func ReadTableIndex(rd io.ReadSeeker) (onHeapTableIndex, error) { + footerSize := int64(magicNumberSize + uint64Size + uint32Size) + chunkCount, totalUncompressedData, err := ReadTableFooter(rd) + if err != nil { + return onHeapTableIndex{}, err + } - // index suffixesSize := int64(chunkCount) * addrSuffixSize lengthsSize := int64(chunkCount) * lengthSize tuplesSize := int64(chunkCount) * prefixTupleSize @@ -456,7 +465,7 @@ func (ti onHeapTableIndex) TableFileSize() uint64 { if ti.chunkCount == 0 { return footerSize } - len, offset := ti.offsets[ti.chunkCount-1], uint64(ti.lengths[ti.chunkCount-1]) + offset, len := ti.offsets[ti.chunkCount-1], uint64(ti.lengths[ti.chunkCount-1]) return offset + len + indexSize(ti.chunkCount) + footerSize } @@ -481,9 +490,9 @@ func (ti onHeapTableIndex) prefixIdx(prefix uint64) (idx uint32) { // EntrySuffixMatches returns true IFF the suffix for prefix entry |idx| // matches the address |a|. -func (ti onHeapTableIndex) EntrySuffixMatches(idx uint32, h *addr) bool { +func (ti onHeapTableIndex) EntrySuffixMatches(idx uint32, h *addr) (bool, error) { li := uint64(ti.ordinals[idx]) * addrSuffixSize - return bytes.Equal(h[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize]) + return bytes.Equal(h[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize]), nil } // lookupOrdinal returns the ordinal of |h| if present. Returns |ti.chunkCount| @@ -492,7 +501,7 @@ func (ti onHeapTableIndex) lookupOrdinal(h *addr) uint32 { prefix := h.Prefix() for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixes[idx] == prefix; idx++ { - if ti.EntrySuffixMatches(idx, h) { + if b, _ := ti.EntrySuffixMatches(idx, h); b { return ti.ordinals[idx] } } @@ -500,22 +509,22 @@ func (ti onHeapTableIndex) lookupOrdinal(h *addr) uint32 { return ti.chunkCount } -func (ti onHeapTableIndex) IndexEntry(idx uint32, a *addr) indexEntry { +func (ti onHeapTableIndex) IndexEntry(idx uint32, a *addr) (indexEntry, error) { ord := ti.ordinals[idx] if a != nil { binary.BigEndian.PutUint64(a[:], ti.prefixes[idx]) li := uint64(ord) * addrSuffixSize copy(a[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize]) } - return indexResult{ti.offsets[ord], ti.lengths[ord]} + return indexResult{ti.offsets[ord], ti.lengths[ord]}, nil } -func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool) { +func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool, error) { ord := ti.lookupOrdinal(h) if ord == ti.chunkCount { - return indexResult{}, false + return indexResult{}, false, nil } - return indexResult{ti.offsets[ord], ti.lengths[ord]}, true + return indexResult{ti.offsets[ord], ti.lengths[ord]}, true, nil } func (ti onHeapTableIndex) Prefixes() []uint64 { @@ -526,20 +535,20 @@ func (ti onHeapTableIndex) Ordinals() []uint32 { return ti.ordinals } -func (i onHeapTableIndex) ChunkCount() uint32 { - return i.chunkCount +func (ti onHeapTableIndex) ChunkCount() uint32 { + return ti.chunkCount } -func (i onHeapTableIndex) TotalUncompressedData() uint64 { - return i.totalUncompressedData +func (ti onHeapTableIndex) TotalUncompressedData() uint64 { + return ti.totalUncompressedData } -func (i onHeapTableIndex) Close() error { +func (ti onHeapTableIndex) Close() error { return nil } -func (i onHeapTableIndex) Clone() tableIndex { - return i +func (ti onHeapTableIndex) Clone() (tableIndex, error) { + return ti, nil } // newTableReader parses a valid nbs table byte stream and returns a reader. buff must end with an NBS index @@ -584,7 +593,11 @@ func (tr tableReader) hasMany(addrs []hasRecord) (bool, error) { // prefixes are equal, so locate and compare against the corresponding suffix for j := filterIdx; j < filterLen && addr.prefix == tr.prefixes[j]; j++ { - if tr.EntrySuffixMatches(j, addr.a) { + m, err := tr.EntrySuffixMatches(j, addr.a) + if err != nil { + return false, err + } + if m { addrs[i].has = true break } @@ -612,14 +625,17 @@ func (tr tableReader) index() (tableIndex, error) { // returns true iff |h| can be found in this table. func (tr tableReader) has(h addr) (bool, error) { - _, ok := tr.Lookup(&h) - return ok, nil + _, ok, err := tr.Lookup(&h) + return ok, err } // returns the storage associated with |h|, iff present. Returns nil if absent. On success, // the returned byte slice directly references the underlying storage. func (tr tableReader) get(ctx context.Context, h addr, stats *Stats) ([]byte, error) { - e, found := tr.Lookup(&h) + e, found, err := tr.Lookup(&h) + if err != nil { + return nil, err + } if !found { return nil, nil } @@ -746,15 +762,21 @@ func (tr tableReader) getMany( // Pass #1: Iterate over |reqs| and |tr.prefixes| (both sorted by address) and build the set // of table locations which must be read in order to satisfy the getMany operation. - offsetRecords, remaining := tr.findOffsets(reqs) - err := tr.getManyAtOffsets(ctx, eg, offsetRecords, found, stats) + offsetRecords, remaining, err := tr.findOffsets(reqs) + if err != nil { + return false, err + } + err = tr.getManyAtOffsets(ctx, eg, offsetRecords, found, stats) return remaining, err } func (tr tableReader) getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, CompressedChunk), stats *Stats) (bool, error) { // Pass #1: Iterate over |reqs| and |tr.prefixes| (both sorted by address) and build the set // of table locations which must be read in order to satisfy the getMany operation. - offsetRecords, remaining := tr.findOffsets(reqs) - err := tr.getManyCompressedAtOffsets(ctx, eg, offsetRecords, found, stats) + offsetRecords, remaining, err := tr.findOffsets(reqs) + if err != nil { + return false, err + } + err = tr.getManyCompressedAtOffsets(ctx, eg, offsetRecords, found, stats) return remaining, err } @@ -867,7 +889,7 @@ func (tr tableReader) getManyAtOffsetsWithReadFunc( // chunks remaining will be set to false upon return. If some are not here, // then remaining will be true. The result offsetRecSlice is sorted in offset // order. -func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool) { +func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool, err error) { filterIdx := uint32(0) filterLen := uint32(len(tr.prefixes)) ors = make(offsetRecSlice, 0, len(reqs)) @@ -896,9 +918,16 @@ func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaini // record all offsets within the table which contain the data required. for j := filterIdx; j < filterLen && req.prefix == tr.prefixes[j]; j++ { - if tr.EntrySuffixMatches(j, req.a) { + m, err := tr.EntrySuffixMatches(j, req.a) + if err != nil { + return nil, false, err + } + if m { reqs[i].found = true - entry := tr.IndexEntry(j, nil) + entry, err := tr.IndexEntry(j, nil) + if err != nil { + return nil, false, err + } ors = append(ors, offsetRec{req.a, entry.Offset(), entry.Length()}) break } @@ -906,7 +935,7 @@ func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaini } sort.Sort(ors) - return ors, remaining + return ors, remaining, nil } func canReadAhead(fRec offsetRec, curStart, curEnd, blockSize uint64) (newEnd uint64, canRead bool) { @@ -933,7 +962,10 @@ func canReadAhead(fRec offsetRec, curStart, curEnd, blockSize uint64) (newEnd ui func (tr tableReader) calcReads(reqs []getRecord, blockSize uint64) (reads int, remaining bool, err error) { var offsetRecords offsetRecSlice // Pass #1: Build the set of table locations which must be read in order to find all the elements of |reqs| which are present in this table. - offsetRecords, remaining = tr.findOffsets(reqs) + offsetRecords, remaining, err = tr.findOffsets(reqs) + if err != nil { + return 0, false, err + } // Now |offsetRecords| contains all locations within the table which must // be searched (note that there may be duplicates of a particular @@ -997,7 +1029,10 @@ func (tr tableReader) extract(ctx context.Context, chunks chan<- extractRecord) var ors offsetRecSlice for i := uint32(0); i < tr.chunkCount; i++ { a := new(addr) - e := tr.IndexEntry(i, a) + e, err := tr.IndexEntry(i, a) + if err != nil { + return err + } ors = append(ors, offsetRec{a, e.Offset(), e.Length()}) } sort.Sort(ors) @@ -1020,8 +1055,12 @@ func (tr tableReader) Close() error { return tr.tableIndex.Close() } -func (tr tableReader) Clone() tableReader { - return tableReader{tr.tableIndex.Clone(), tr.prefixes, tr.chunkCount, tr.totalUncompressedData, tr.r, tr.blockSize} +func (tr tableReader) Clone() (tableReader, error) { + ti, err := tr.tableIndex.Clone() + if err != nil { + return tableReader{}, err + } + return tableReader{ti, tr.prefixes, tr.chunkCount, tr.totalUncompressedData, tr.r, tr.blockSize}, nil } type readerAdapter struct { diff --git a/go/store/nbs/table_set.go b/go/store/nbs/table_set.go index 29e035470c..ffbceb5269 100644 --- a/go/store/nbs/table_set.go +++ b/go/store/nbs/table_set.go @@ -137,7 +137,10 @@ func (ts tableSet) getMany(ctx context.Context, eg *errgroup.Group, reqs []getRe f := func(css chunkSources) bool { for _, haver := range css { if rp, ok := haver.(chunkReadPlanner); ok { - offsets, remaining := rp.findOffsets(reqs) + offsets, remaining, err := rp.findOffsets(reqs) + if err != nil { + return true + } err = rp.getManyAtOffsets(ctx, eg, offsets, found, stats) if err != nil { return true @@ -165,7 +168,10 @@ func (ts tableSet) getManyCompressed(ctx context.Context, eg *errgroup.Group, re f := func(css chunkSources) bool { for _, haver := range css { if rp, ok := haver.(chunkReadPlanner); ok { - offsets, remaining := rp.findOffsets(reqs) + offsets, remaining, err := rp.findOffsets(reqs) + if err != nil { + return true + } if len(offsets) > 0 { err = rp.getManyCompressedAtOffsets(ctx, eg, offsets, found, stats) if err != nil { @@ -428,7 +434,11 @@ func (ts tableSet) Rebase(ctx context.Context, specs []tableSpec, stats *Stats) } if cnt > 0 { - merged.novel = append(merged.novel, t.Clone()) + t2, err := t.Clone() + if err != nil { + return tableSet{}, err + } + merged.novel = append(merged.novel, t2) } } @@ -465,7 +475,12 @@ func (ts tableSet) Rebase(ctx context.Context, specs []tableSpec, stats *Stats) return } if spec.name == h { - merged.upstream[idx] = existing.Clone() + c, err := existing.Clone() + if err != nil { + ae.SetIfError(err) + return + } + merged.upstream[idx] = c return } } diff --git a/go/store/nbs/util.go b/go/store/nbs/util.go index 17517d77d7..f721f9e713 100644 --- a/go/store/nbs/util.go +++ b/go/store/nbs/util.go @@ -35,7 +35,10 @@ func IterChunks(rd io.ReadSeeker, cb func(chunk chunks.Chunk) (stop bool, err er seen := make(map[addr]bool) for i := uint32(0); i < idx.ChunkCount(); i++ { var a addr - ie := idx.IndexEntry(i, &a) + ie, err := idx.IndexEntry(i, &a) + if err != nil { + return err + } if _, ok := seen[a]; !ok { seen[a] = true chunkBytes, err := readNFrom(rd, ie.Offset(), ie.Length()) From c78e02b2b3fd678540b6b089e032278d9208f64c Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Wed, 9 Feb 2022 18:42:00 -0800 Subject: [PATCH 043/105] ordinals and prefixes should err --- go/store/nbs/aws_chunk_source.go | 12 +++++-- go/store/nbs/aws_table_persister_test.go | 3 +- go/store/nbs/bs_persister.go | 12 +++++-- go/store/nbs/chunk_source_adapter.go | 6 +++- go/store/nbs/cmp_chunk_table_writer_test.go | 6 ++-- go/store/nbs/dynamo_fake_test.go | 7 +++- go/store/nbs/file_table_persister_test.go | 9 ++++-- go/store/nbs/mem_table_test.go | 9 ++++-- go/store/nbs/mmap_table_reader.go | 6 +++- go/store/nbs/root_tracker_test.go | 12 +++++-- go/store/nbs/s3_fake_test.go | 12 +++++-- go/store/nbs/table_persister.go | 10 ++++-- go/store/nbs/table_persister_test.go | 7 ++-- go/store/nbs/table_reader.go | 36 +++++++++++++-------- go/store/nbs/table_reader_test.go | 31 ++++++++++++------ go/store/nbs/table_test.go | 24 +++++++++----- 16 files changed, 147 insertions(+), 55 deletions(-) diff --git a/go/store/nbs/aws_chunk_source.go b/go/store/nbs/aws_chunk_source.go index 88dfa6dfa9..3df210052b 100644 --- a/go/store/nbs/aws_chunk_source.go +++ b/go/store/nbs/aws_chunk_source.go @@ -43,7 +43,11 @@ func newAWSChunkSource(ctx context.Context, ddb *ddbTableStore, s3 *s3ObjectRead if index, found := indexCache.get(name); found { tra := &awsTableReaderAt{al: al, ddb: ddb, s3: s3, name: name, chunkCount: chunkCount} - return &chunkSourceAdapter{newTableReader(index, tra, s3BlockSize), name}, nil + tr, err := newTableReader(index, tra, s3BlockSize) + if err != nil { + return &chunkSourceAdapter{}, err + } + return &chunkSourceAdapter{tr, name}, nil } } @@ -98,7 +102,11 @@ func newAWSChunkSource(ctx context.Context, ddb *ddbTableStore, s3 *s3ObjectRead indexCache.put(name, ohi) } - return &chunkSourceAdapter{newTableReader(index, tra, s3BlockSize), name}, nil + tr, err := newTableReader(index, tra, s3BlockSize) + if err != nil { + return &chunkSourceAdapter{}, err + } + return &chunkSourceAdapter{tr, name}, nil } type awsTableReaderAt struct { diff --git a/go/store/nbs/aws_table_persister_test.go b/go/store/nbs/aws_table_persister_test.go index b8899fc684..2185815d4e 100644 --- a/go/store/nbs/aws_table_persister_test.go +++ b/go/store/nbs/aws_table_persister_test.go @@ -548,6 +548,7 @@ func bytesToChunkSource(t *testing.T, bs ...[]byte) chunkSource { data := buff[:tableSize] ti, err := parseTableIndex(data) require.NoError(t, err) - rdr := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) + rdr, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) + require.NoError(t, err) return chunkSourceAdapter{rdr, name} } diff --git a/go/store/nbs/bs_persister.go b/go/store/nbs/bs_persister.go index bf9ddaa553..fb163af85d 100644 --- a/go/store/nbs/bs_persister.go +++ b/go/store/nbs/bs_persister.go @@ -110,7 +110,11 @@ func newBSChunkSource(ctx context.Context, bs blobstore.Blobstore, name addr, ch if index, found := indexCache.get(name); found { bsTRA := &bsTableReaderAt{name.String(), bs} - return &chunkSourceAdapter{newTableReader(index, bsTRA, blockSize), name}, nil + tr, err := newTableReader(index, bsTRA, blockSize) + if err != nil { + return nil, err + } + return &chunkSourceAdapter{tr, name}, nil } } @@ -148,7 +152,11 @@ func newBSChunkSource(ctx context.Context, bs blobstore.Blobstore, name addr, ch indexCache.put(name, index) } - return &chunkSourceAdapter{newTableReader(index, tra, s3BlockSize), name}, nil + tr, err := newTableReader(index, tra, s3BlockSize) + if err != nil { + return nil, err + } + return &chunkSourceAdapter{tr, name}, nil } func (bsp *blobstorePersister) PruneTableFiles(ctx context.Context, contents manifestContents) error { diff --git a/go/store/nbs/chunk_source_adapter.go b/go/store/nbs/chunk_source_adapter.go index b291f19022..1e589aecaf 100644 --- a/go/store/nbs/chunk_source_adapter.go +++ b/go/store/nbs/chunk_source_adapter.go @@ -42,7 +42,11 @@ func newReaderFromIndexData(indexCache *indexCache, idxData []byte, name addr, t indexCache.put(name, index) } - return &chunkSourceAdapter{newTableReader(index, tra, blockSize), name}, nil + tr, err := newTableReader(index, tra, blockSize) + if err != nil { + return nil, err + } + return &chunkSourceAdapter{tr, name}, nil } func (csa chunkSourceAdapter) Close() error { diff --git a/go/store/nbs/cmp_chunk_table_writer_test.go b/go/store/nbs/cmp_chunk_table_writer_test.go index 3fd62b9269..6c72a015d7 100644 --- a/go/store/nbs/cmp_chunk_table_writer_test.go +++ b/go/store/nbs/cmp_chunk_table_writer_test.go @@ -37,7 +37,8 @@ func TestCmpChunkTableWriter(t *testing.T) { // Setup a TableReader to read compressed chunks out of ti, err := parseTableIndex(buff) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) + tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) + require.NoError(t, err) hashes := make(hash.HashSet) for _, chnk := range testMDChunks { @@ -74,7 +75,8 @@ func TestCmpChunkTableWriter(t *testing.T) { outputBuff := output.Bytes() outputTI, err := parseTableIndex(outputBuff) require.NoError(t, err) - outputTR := newTableReader(outputTI, tableReaderAtFromBytes(buff), fileBlockSize) + outputTR, err := newTableReader(outputTI, tableReaderAtFromBytes(buff), fileBlockSize) + require.NoError(t, err) compareContentsOfTables(t, ctx, hashes, tr, outputTR) } diff --git a/go/store/nbs/dynamo_fake_test.go b/go/store/nbs/dynamo_fake_test.go index dcd81ea0ce..1897cd3939 100644 --- a/go/store/nbs/dynamo_fake_test.go +++ b/go/store/nbs/dynamo_fake_test.go @@ -62,7 +62,12 @@ func (m *fakeDDB) readerForTable(name addr) (chunkReader, error) { return nil, err } - return newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize), nil + tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) + if err != nil { + return nil, err + } + + return tr, nil } return nil, nil } diff --git a/go/store/nbs/file_table_persister_test.go b/go/store/nbs/file_table_persister_test.go index 1c5765c8cf..0cf4671219 100644 --- a/go/store/nbs/file_table_persister_test.go +++ b/go/store/nbs/file_table_persister_test.go @@ -129,7 +129,8 @@ func TestFSTablePersisterPersist(t *testing.T) { require.NoError(t, err) ti, err := parseTableIndex(buff) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) + tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) + require.NoError(t, err) assertChunksInReader(testChunks, tr, assert) } } @@ -229,7 +230,8 @@ func TestFSTablePersisterConjoinAll(t *testing.T) { require.NoError(t, err) ti, err := parseTableIndex(buff) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) + tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) + require.NoError(t, err) assertChunksInReader(testChunks, tr, assert) } @@ -267,7 +269,8 @@ func TestFSTablePersisterConjoinAllDups(t *testing.T) { require.NoError(t, err) ti, err := parseTableIndex(buff) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) + tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) + require.NoError(t, err) assertChunksInReader(testChunks, tr, assert) assert.EqualValues(reps*len(testChunks), mustUint32(tr.count())) } diff --git a/go/store/nbs/mem_table_test.go b/go/store/nbs/mem_table_test.go index f34f173868..6842ccaa3e 100644 --- a/go/store/nbs/mem_table_test.go +++ b/go/store/nbs/mem_table_test.go @@ -152,14 +152,16 @@ func TestMemTableWrite(t *testing.T) { require.NoError(t, err) ti1, err := parseTableIndex(td1) require.NoError(t, err) - tr1 := newTableReader(ti1, tableReaderAtFromBytes(td1), fileBlockSize) + tr1, err := newTableReader(ti1, tableReaderAtFromBytes(td1), fileBlockSize) + require.NoError(t, err) assert.True(tr1.has(computeAddr(chunks[1]))) td2, _, err := buildTable(chunks[2:]) require.NoError(t, err) ti2, err := parseTableIndex(td2) require.NoError(t, err) - tr2 := newTableReader(ti2, tableReaderAtFromBytes(td2), fileBlockSize) + tr2, err := newTableReader(ti2, tableReaderAtFromBytes(td2), fileBlockSize) + require.NoError(t, err) assert.True(tr2.has(computeAddr(chunks[2]))) _, data, count, err := mt.write(chunkReaderGroup{tr1, tr2}, &Stats{}) @@ -168,7 +170,8 @@ func TestMemTableWrite(t *testing.T) { ti, err := parseTableIndex(data) require.NoError(t, err) - outReader := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) + outReader, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) + require.NoError(t, err) assert.True(outReader.has(computeAddr(chunks[0]))) assert.False(outReader.has(computeAddr(chunks[1]))) assert.False(outReader.has(computeAddr(chunks[2]))) diff --git a/go/store/nbs/mmap_table_reader.go b/go/store/nbs/mmap_table_reader.go index d23dbe6a2b..454ec9db27 100644 --- a/go/store/nbs/mmap_table_reader.go +++ b/go/store/nbs/mmap_table_reader.go @@ -152,8 +152,12 @@ func newMmapTableReader(dir string, h addr, chunkCount uint32, indexCache *index return nil, errors.New("unexpected chunk count") } + tr, err := newTableReader(index, &cacheReaderAt{path, fc}, fileBlockSize) + if err != nil { + return nil, err + } return &mmapTableReader{ - newTableReader(index, &cacheReaderAt{path, fc}, fileBlockSize), + tr, fc, h, }, nil diff --git a/go/store/nbs/root_tracker_test.go b/go/store/nbs/root_tracker_test.go index 299b3373da..80387fd77e 100644 --- a/go/store/nbs/root_tracker_test.go +++ b/go/store/nbs/root_tracker_test.go @@ -467,7 +467,11 @@ func (ftp fakeTablePersister) Persist(ctx context.Context, mt *memTable, haver c return nil, err } - ftp.sources[name] = newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) + s, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) + if err != nil { + return emptyChunkSource{}, err + } + ftp.sources[name] = s return chunkSourceAdapter{ftp.sources[name], name}, nil } } @@ -490,7 +494,11 @@ func (ftp fakeTablePersister) ConjoinAll(ctx context.Context, sources chunkSourc return nil, err } - ftp.sources[name] = newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) + s, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) + if err != nil { + return nil, err + } + ftp.sources[name] = s return chunkSourceAdapter{ftp.sources[name], name}, nil } return emptyChunkSource{}, nil diff --git a/go/store/nbs/s3_fake_test.go b/go/store/nbs/s3_fake_test.go index 4fc50a4593..dc52d0e116 100644 --- a/go/store/nbs/s3_fake_test.go +++ b/go/store/nbs/s3_fake_test.go @@ -81,7 +81,11 @@ func (m *fakeS3) readerForTable(name addr) (chunkReader, error) { if err != nil { return nil, err } - return newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize), nil + tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize) + if err != nil { + return nil, err + } + return tr, nil } return nil, nil } @@ -100,7 +104,11 @@ func (m *fakeS3) readerForTableWithNamespace(ns string, name addr) (chunkReader, return nil, err } - return newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize), nil + tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize) + if err != nil { + return nil, err + } + return tr, nil } return nil, nil } diff --git a/go/store/nbs/table_persister.go b/go/store/nbs/table_persister.go index 467cb3c21f..0f2d611ac4 100644 --- a/go/store/nbs/table_persister.go +++ b/go/store/nbs/table_persister.go @@ -256,8 +256,14 @@ func planConjoin(sources chunkSources, stats *Stats) (plan compactionPlan, err e return compactionPlan{}, err } - ordinals := index.Ordinals() - prefixes := index.Prefixes() + ordinals, err := index.Ordinals() + if err != nil { + return compactionPlan{}, err + } + prefixes, err := index.Prefixes() + if err != nil { + return compactionPlan{}, err + } // Add all the prefix tuples from this index to the list of all prefixIndexRecs, modifying the ordinals such that all entries from the 1st item in sources come after those in the 0th and so on. for j, prefix := range prefixes { diff --git a/go/store/nbs/table_persister_test.go b/go/store/nbs/table_persister_test.go index 59828df484..8d60d2f0c5 100644 --- a/go/store/nbs/table_persister_test.go +++ b/go/store/nbs/table_persister_test.go @@ -47,7 +47,9 @@ func TestPlanCompaction(t *testing.T) { require.NoError(t, err) ti, err := parseTableIndex(data) require.NoError(t, err) - src := chunkSourceAdapter{newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize), name} + tr, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) + require.NoError(t, err) + src := chunkSourceAdapter{tr, name} dataLens = append(dataLens, uint64(len(data))-indexSize(mustUint32(src.count()))-footerSize) sources = append(sources, src) } @@ -67,7 +69,8 @@ func TestPlanCompaction(t *testing.T) { assert.Equal(totalChunks, idx.chunkCount) assert.Equal(totalUnc, idx.totalUncompressedData) - tr := newTableReader(idx, tableReaderAtFromBytes(nil), fileBlockSize) + tr, err := newTableReader(idx, tableReaderAtFromBytes(nil), fileBlockSize) + require.NoError(t, err) for _, content := range tableContents { assertChunksInReader(content, tr, assert) } diff --git a/go/store/nbs/table_reader.go b/go/store/nbs/table_reader.go index 7be216a67c..32f0e07071 100644 --- a/go/store/nbs/table_reader.go +++ b/go/store/nbs/table_reader.go @@ -143,7 +143,7 @@ func (s mmapOrdinalSlice) Len() int { return len(s) } func (s mmapOrdinalSlice) Less(i, j int) bool { return s[i].offset < s[j].offset } func (s mmapOrdinalSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (i mmapTableIndex) Ordinals() []uint32 { +func (i mmapTableIndex) Ordinals() ([]uint32, error) { s := mmapOrdinalSlice(make([]mmapOrdinal, i.chunkCount)) for idx := 0; uint32(idx) < i.chunkCount; idx++ { mi := idx * mmapIndexEntrySize @@ -155,7 +155,7 @@ func (i mmapTableIndex) Ordinals() []uint32 { for j, r := range s { res[r.idx] = uint32(j) } - return res + return res, nil } type mmapTableIndex struct { @@ -167,8 +167,8 @@ type mmapTableIndex struct { refCnt *int32 } -func (i mmapTableIndex) Prefixes() []uint64 { - return i.prefixes +func (i mmapTableIndex) Prefixes() ([]uint64, error) { + return i.prefixes, nil } type mmapOrdinal struct { @@ -298,11 +298,15 @@ func newMmapTableIndex(ti onHeapTableIndex, f *os.File) (mmapTableIndex, error) refCnt := new(int32) *refCnt = 1 + p, err := ti.Prefixes() + if err != nil { + return mmapTableIndex{}, err + } return mmapTableIndex{ ti.chunkCount, ti.totalUncompressedData, ti.TableFileSize(), - ti.Prefixes(), + p, arr, refCnt, }, nil @@ -344,10 +348,10 @@ type tableIndex interface { // the indexed file to its corresponding entry in index. The |i|th // entry in the result is the |i|th chunk in the indexed file, and its // corresponding value in the slice is the index entry that maps to it. - Ordinals() []uint32 + Ordinals() ([]uint32, error) // Prefixes returns the sorted slice of |uint64| |addr| prefixes; each // entry corresponds to an indexed chunk address. - Prefixes() []uint64 + Prefixes() ([]uint64, error) // TableFileSize returns the total size of the indexed table file, in bytes. TableFileSize() uint64 // TotalUncompressedData returns the total uncompressed data size of @@ -527,12 +531,12 @@ func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool, error) { return indexResult{ti.offsets[ord], ti.lengths[ord]}, true, nil } -func (ti onHeapTableIndex) Prefixes() []uint64 { - return ti.prefixes +func (ti onHeapTableIndex) Prefixes() ([]uint64, error) { + return ti.prefixes, nil } -func (ti onHeapTableIndex) Ordinals() []uint32 { - return ti.ordinals +func (ti onHeapTableIndex) Ordinals() ([]uint32, error) { + return ti.ordinals, nil } func (ti onHeapTableIndex) ChunkCount() uint32 { @@ -554,15 +558,19 @@ func (ti onHeapTableIndex) Clone() (tableIndex, error) { // newTableReader parses a valid nbs table byte stream and returns a reader. buff must end with an NBS index // and footer, though it may contain an unspecified number of bytes before that data. r should allow // retrieving any desired range of bytes from the table. -func newTableReader(index tableIndex, r tableReaderAt, blockSize uint64) tableReader { +func newTableReader(index tableIndex, r tableReaderAt, blockSize uint64) (tableReader, error) { + p, err := index.Prefixes() + if err != nil { + return tableReader{}, err + } return tableReader{ index, - index.Prefixes(), + p, index.ChunkCount(), index.TotalUncompressedData(), r, blockSize, - } + }, nil } // Scan across (logically) two ordered slices of address prefixes. diff --git a/go/store/nbs/table_reader_test.go b/go/store/nbs/table_reader_test.go index 41439426ef..91ebfa8e5f 100644 --- a/go/store/nbs/table_reader_test.go +++ b/go/store/nbs/table_reader_test.go @@ -45,10 +45,12 @@ func TestParseTableIndex(t *testing.T) { seen := make(map[addr]bool) for i := uint32(0); i < idx.ChunkCount(); i++ { var onheapaddr addr - e := idx.IndexEntry(i, &onheapaddr) + e, err := idx.IndexEntry(i, &onheapaddr) + require.NoError(t, err) if _, ok := seen[onheapaddr]; !ok { seen[onheapaddr] = true - lookupe, ok := idx.Lookup(&onheapaddr) + lookupe, ok, err := idx.Lookup(&onheapaddr) + require.NoError(t, err) assert.True(t, ok) assert.Equal(t, e.Offset(), lookupe.Offset(), "%v does not match %v for address %v", e, lookupe, onheapaddr) assert.Equal(t, e.Length(), lookupe.Length()) @@ -72,15 +74,18 @@ func TestMMapIndex(t *testing.T) { seen := make(map[addr]bool) for i := uint32(0); i < idx.ChunkCount(); i++ { var onheapaddr addr - onheapentry := idx.IndexEntry(i, &onheapaddr) + onheapentry, err := idx.IndexEntry(i, &onheapaddr) + require.NoError(t, err) var mmaddr addr - mmentry := mmidx.IndexEntry(i, &mmaddr) + mmentry, err := mmidx.IndexEntry(i, &mmaddr) + require.NoError(t, err) assert.Equal(t, onheapaddr, mmaddr) assert.Equal(t, onheapentry.Offset(), mmentry.Offset()) assert.Equal(t, onheapentry.Length(), mmentry.Length()) if _, ok := seen[onheapaddr]; !ok { seen[onheapaddr] = true - mmentry, found := mmidx.Lookup(&onheapaddr) + mmentry, found, err := mmidx.Lookup(&onheapaddr) + require.NoError(t, err) assert.True(t, found) assert.Equal(t, onheapentry.Offset(), mmentry.Offset(), "%v does not match %v for address %v", onheapentry, mmentry, onheapaddr) assert.Equal(t, onheapentry.Length(), mmentry.Length()) @@ -88,13 +93,21 @@ func TestMMapIndex(t *testing.T) { wrongaddr := onheapaddr if wrongaddr[19] != 0 { wrongaddr[19] = 0 - _, found := mmidx.Lookup(&wrongaddr) + _, found, err := mmidx.Lookup(&wrongaddr) + require.NoError(t, err) assert.False(t, found) } } - - assert.Equal(t, idx.Ordinals(), mmidx.Ordinals()) - assert.Equal(t, idx.Prefixes(), mmidx.Prefixes()) + o1, err := idx.Ordinals() + require.NoError(t, err) + o2, err := mmidx.Ordinals() + require.NoError(t, err) + assert.Equal(t, o1, o2) + p1, err := idx.Prefixes() + require.NoError(t, err) + p2, err := mmidx.Prefixes() + require.NoError(t, err) + assert.Equal(t, p1, p2) assert.Equal(t, idx.TableFileSize(), mmidx.TableFileSize()) assert.Equal(t, idx.TotalUncompressedData(), mmidx.TotalUncompressedData()) } diff --git a/go/store/nbs/table_test.go b/go/store/nbs/table_test.go index a37508e66d..94383af89d 100644 --- a/go/store/nbs/table_test.go +++ b/go/store/nbs/table_test.go @@ -79,7 +79,8 @@ func TestSimple(t *testing.T) { require.NoError(t, err) ti, err := parseTableIndex(tableData) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + require.NoError(t, err) assertChunksInReader(chunks, tr, assert) @@ -125,7 +126,8 @@ func TestHasMany(t *testing.T) { require.NoError(t, err) ti, err := parseTableIndex(tableData) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + require.NoError(t, err) addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} hasAddrs := []hasRecord{ @@ -175,7 +177,8 @@ func TestHasManySequentialPrefix(t *testing.T) { ti, err := parseTableIndex(buff) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) + tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) + require.NoError(t, err) hasAddrs := make([]hasRecord, 2) // Leave out the first address @@ -203,7 +206,8 @@ func TestGetMany(t *testing.T) { require.NoError(t, err) ti, err := parseTableIndex(tableData) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + require.NoError(t, err) addrs := addrSlice{computeAddr(data[0]), computeAddr(data[1]), computeAddr(data[2])} getBatch := []getRecord{ @@ -236,7 +240,8 @@ func TestCalcReads(t *testing.T) { require.NoError(t, err) ti, err := parseTableIndex(tableData) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(tableData), 0) + tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), 0) + require.NoError(t, err) addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} getBatch := []getRecord{ {&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), false}, @@ -272,7 +277,8 @@ func TestExtract(t *testing.T) { require.NoError(t, err) ti, err := parseTableIndex(tableData) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + require.NoError(t, err) addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} @@ -310,7 +316,8 @@ func Test65k(t *testing.T) { require.NoError(t, err) ti, err := parseTableIndex(tableData) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + require.NoError(t, err) for i := 0; i < count; i++ { data := dataFn(i) @@ -362,7 +369,8 @@ func doTestNGetMany(t *testing.T, count int) { require.NoError(t, err) ti, err := parseTableIndex(tableData) require.NoError(t, err) - tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) + require.NoError(t, err) getBatch := make([]getRecord, len(data)) for i := 0; i < count; i++ { From 8d22fae965851bc1bf9ac7683d4e4749fe2ab4fb Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Wed, 9 Feb 2022 20:16:50 -0800 Subject: [PATCH 044/105] improved time serialization, added preliminary support for geometry types --- go/store/prolly/shim.go | 2 +- go/store/val/codec.go | 19 ++++++++----------- go/store/val/tuple_builder.go | 26 ++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/go/store/prolly/shim.go b/go/store/prolly/shim.go index 480bb36c66..121b5118b4 100644 --- a/go/store/prolly/shim.go +++ b/go/store/prolly/shim.go @@ -138,7 +138,7 @@ func encodingFromSqlType(typ query.Type) val.Encoding { case query.Type_YEAR: return val.YearEnc case query.Type_GEOMETRY: - return val.BytesEnc + return val.GeometryEnc } switch typ { diff --git a/go/store/val/codec.go b/go/store/val/codec.go index ec04d8cf89..d9a8995d28 100644 --- a/go/store/val/codec.go +++ b/go/store/val/codec.go @@ -41,7 +41,7 @@ const ( float64Size ByteSize = 8 // todo(andy): experimental encoding - timestampSize ByteSize = 15 + timestampSize ByteSize = 8 ) type Encoding uint8 @@ -75,9 +75,10 @@ const ( BytesEnc Encoding = 129 // todo(andy): experimental encodings - DecimalEnc Encoding = 130 - JSONEnc Encoding = 131 - TimeEnc Encoding = 132 + DecimalEnc Encoding = 130 + JSONEnc Encoding = 131 + TimeEnc Encoding = 132 + GeometryEnc Encoding = 133 // TODO // BitEnc @@ -352,17 +353,13 @@ func compareFloat64(l, r float64) int { func readTimestamp(buf []byte) (t time.Time) { expectSize(buf, timestampSize) - if err := t.UnmarshalBinary(buf); err != nil { - panic(err) - } - return t + t = time.Unix(0, readInt64(buf)).UTC() + return } func writeTimestamp(buf []byte, val time.Time) { expectSize(buf, timestampSize) - // todo(andy): fix allocation here - m, _ := val.MarshalBinary() - copy(buf, m) + writeInt64(buf, val.UnixNano()) } func compareTimestamp(l, r time.Time) int { diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go index 07fb0b4b2e..13242a61a9 100644 --- a/go/store/val/tuple_builder.go +++ b/go/store/val/tuple_builder.go @@ -20,6 +20,7 @@ import ( "time" "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/expression/function" "github.com/dolthub/dolt/go/store/pool" ) @@ -227,6 +228,15 @@ func (tb *TupleBuilder) PutJSON(i int, v interface{}) { tb.pos += sz } +// PutGeometry writes a []byte to the ith field of the Tuple being built. +func (tb *TupleBuilder) PutGeometry(i int, v []byte) { + tb.Desc.expectEncoding(i, GeometryEnc) + sz := ByteSize(len(v)) + tb.fields[i] = tb.buf[tb.pos : tb.pos+sz] + writeBytes(tb.fields[i], v) + tb.pos += sz +} + // PutRaw writes a []byte to the ith field of the Tuple being built. func (tb *TupleBuilder) PutRaw(i int, buf []byte) { if buf == nil { @@ -283,6 +293,9 @@ func (tb *TupleBuilder) PutField(i int, v interface{}) { v = []byte(s) } tb.PutBytes(i, v.([]byte)) + case GeometryEnc: + // todo(andy): remove GMS dependency + tb.PutGeometry(i, convGeometry(v)) case JSONEnc: // todo(andy): remove GMS dependency tb.PutJSON(i, v.(sql.JSONDocument).Val) @@ -342,3 +355,16 @@ func convUint(v interface{}) uint { panic("impossible conversion") } } + +func convGeometry(v interface{}) []byte { + switch t := v.(type) { + case sql.Point: + return function.PointToBytes(t) + case sql.Linestring: + return function.LineToBytes(t) + case sql.Polygon: + return function.PolyToBytes(t) + default: + panic(fmt.Sprintf("unknown geometry %v", v)) + } +} From 1e4f9985bbce309b163059ad3b1390d50fa54bbd Mon Sep 17 00:00:00 2001 From: Maximilian Hoffman Date: Thu, 10 Feb 2022 06:53:47 -0800 Subject: [PATCH 045/105] window enginetests (#2786) * new enginetests for windows * no local refs --- go/go.mod | 4 ++-- go/go.sum | 8 ++++---- .../doltcore/sqle/enginetest/dolt_engine_test.go | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/go/go.mod b/go/go.mod index 1ce04fae98..23095dfdec 100644 --- a/go/go.mod +++ b/go/go.mod @@ -20,7 +20,7 @@ require ( github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66 github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 - github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a + github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38 github.com/dustin/go-humanize v1.0.0 github.com/fatih/color v1.9.0 github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568 @@ -69,7 +69,7 @@ require ( ) require ( - github.com/dolthub/go-mysql-server v0.11.1-0.20220208174427-9756c7d7167f + github.com/dolthub/go-mysql-server v0.11.1-0.20220210013403-fce236834a13 github.com/google/flatbuffers v2.0.5+incompatible github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6 github.com/prometheus/client_golang v1.11.0 diff --git a/go/go.sum b/go/go.sum index 889462aace..2f7822ae36 100755 --- a/go/go.sum +++ b/go/go.sum @@ -172,8 +172,8 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= -github.com/dolthub/go-mysql-server v0.11.1-0.20220208174427-9756c7d7167f h1:bbLVyFtC7Wm2q1QZZGFUSyclY9nKUTsQ+Sn3ywkwpOs= -github.com/dolthub/go-mysql-server v0.11.1-0.20220208174427-9756c7d7167f/go.mod h1:X2i6+DzsBgl5uDu1dzNayauCEZFUE+qIEriSv4M8v3s= +github.com/dolthub/go-mysql-server v0.11.1-0.20220210013403-fce236834a13 h1:zDkSqJNkKzHhqHqa4ZH2KiT1EvcP/Tq+QL9t6DtnPK0= +github.com/dolthub/go-mysql-server v0.11.1-0.20220210013403-fce236834a13/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 h1:oyPHJlzumKta1vnOQqUnfdz+pk3EmnHS3Nd0cCT0I2g= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371/go.mod h1:dhGBqcCEfK5kuFmeO5+WOx3hqc1k3M29c1oS/R7N4ms= github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8= @@ -182,8 +182,8 @@ github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66 h1:WRPDbpJWEnPxP github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66/go.mod h1:N5ZIbMGuDUpTpOFQ7HcsN6WSIpTGQjHP+Mz27AfmAgk= github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9XGFa6q5Ap4Z/OhNkAMBaK5YeuEzwJt+NZdhiE= github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY= -github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a h1:+61CpK9SwG/QFNE+vn6Fxk00GRQgtR+CA6Nvsr87y8g= -github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a/go.mod h1:qpZ4j0dval04OgZJ5fyKnlniSFUosTH280pdzUjUJig= +github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38 h1:qUbVRsX2CPyjj/uLrPu9L69rGiYRb5vwzw7PC5c/Wh8= +github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38/go.mod h1:qpZ4j0dval04OgZJ5fyKnlniSFUosTH280pdzUjUJig= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go index f5b0bf14d3..7af2812bcf 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go @@ -303,6 +303,22 @@ func TestVersionedViews(t *testing.T) { enginetest.TestVersionedViews(t, newDoltHarness(t)) } +func TestWindowFunctions(t *testing.T) { + enginetest.TestWindowFunctions(t, newDoltHarness(t)) +} + +func TestWindowRowFrames(t *testing.T) { + enginetest.TestWindowRowFrames(t, newDoltHarness(t)) +} + +func TestWindowRangeFrames(t *testing.T) { + enginetest.TestWindowRangeFrames(t, newDoltHarness(t)) +} + +func TestNamedWindows(t *testing.T) { + enginetest.TestNamedWindows(t, newDoltHarness(t)) +} + func TestNaturalJoin(t *testing.T) { enginetest.TestNaturalJoin(t, newDoltHarness(t)) } From 0f1e5fbaa9546c54e126aeabd3897c2701cfb500 Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 10:12:17 -0800 Subject: [PATCH 046/105] hopefully stops tests from hanging --- integration-tests/bats/no-repo.bats | 1 + 1 file changed, 1 insertion(+) diff --git a/integration-tests/bats/no-repo.bats b/integration-tests/bats/no-repo.bats index c558f94791..210555a4bd 100755 --- a/integration-tests/bats/no-repo.bats +++ b/integration-tests/bats/no-repo.bats @@ -319,4 +319,5 @@ NOT_VALID_REPO_ERROR="The current directory is not a valid dolt repository." sleep 1 # Wait another sec run grep -q 'dolt' <(ps aux) [ "$output" == "" ] + run kill -9 $(ps aux | grep 'dolt' | awk {'print $1'}) # This looks for process dolt and sends it a SIGINT (CTRL + C) } From 3d78e8b502831f43ee15e634b419b44c8fa67c0b Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 10:13:35 -0800 Subject: [PATCH 047/105] removing hanging test...will cause merge conflict later --- integration-tests/bats/no-repo.bats | 9 --------- 1 file changed, 9 deletions(-) diff --git a/integration-tests/bats/no-repo.bats b/integration-tests/bats/no-repo.bats index c558f94791..bd6c01c79c 100755 --- a/integration-tests/bats/no-repo.bats +++ b/integration-tests/bats/no-repo.bats @@ -311,12 +311,3 @@ NOT_VALID_REPO_ERROR="The current directory is not a valid dolt repository." [[ "$output" =~ "__DOLT_1__" ]] [[ ! "$output" =~ "__LD_1__" ]] } - -@test "no-repo: dolt login exits when receiving SIGINT" { - dolt login & # run this in the background - sleep 1 # Wait a sec - kill -SIGINT $(ps aux | grep 'dolt' | awk {'print $1'}) # This looks for process dolt and sends it a SIGINT (CTRL + C) - sleep 1 # Wait another sec - run grep -q 'dolt' <(ps aux) - [ "$output" == "" ] -} From 8273b2509909286bbc3888d1d2f33128d47caf8b Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Thu, 10 Feb 2022 10:17:49 -0800 Subject: [PATCH 048/105] added read support for geometry encodings --- go/store/val/codec.go | 12 +++++ go/store/val/tuple_builder.go | 91 ++++++++++++++++++++++++++++++-- go/store/val/tuple_descriptor.go | 44 ++++++++++++++- 3 files changed, 141 insertions(+), 6 deletions(-) diff --git a/go/store/val/codec.go b/go/store/val/codec.go index d9a8995d28..9cb63a9ab7 100644 --- a/go/store/val/codec.go +++ b/go/store/val/codec.go @@ -459,3 +459,15 @@ func expectSize(buf []byte, sz ByteSize) { panic("byte slice is not of expected size") } } + +func expectTrue(b bool) { + if !b { + panic("expected true") + } +} + +func expectFalse(b bool) { + if b { + panic("expected false") + } +} diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go index 13242a61a9..692706ac89 100644 --- a/go/store/val/tuple_builder.go +++ b/go/store/val/tuple_builder.go @@ -295,7 +295,7 @@ func (tb *TupleBuilder) PutField(i int, v interface{}) { tb.PutBytes(i, v.([]byte)) case GeometryEnc: // todo(andy): remove GMS dependency - tb.PutGeometry(i, convGeometry(v)) + tb.PutGeometry(i, serializeGeometry(v)) case JSONEnc: // todo(andy): remove GMS dependency tb.PutJSON(i, v.(sql.JSONDocument).Val) @@ -356,15 +356,96 @@ func convUint(v interface{}) uint { } } -func convGeometry(v interface{}) []byte { +// todo(andy): remove GMS dependency +// have the engine pass serialized bytes + +const ( + sridSize = uint32Size + endianSize = uint8Size + typeSize = uint32Size + ewkbHeaderSize = sridSize + endianSize + typeSize +) + +const ( + pointType = uint32(1) + linestringType = uint32(2) + polygonType = uint32(3) + + littleEndian = uint8(1) +) + +type ewkbHeader struct { + srid uint32 + endian uint8 + typ uint32 +} + +func (h ewkbHeader) writeTo(buf []byte) { + expectSize(buf, ewkbHeaderSize) + writeUint32(buf[:sridSize], h.srid) + writeUint8(buf[sridSize:sridSize+endianSize], h.endian) + writeUint32(buf[sridSize+endianSize:ewkbHeaderSize], h.typ) +} + +func readHeaderFrom(buf []byte) (h ewkbHeader) { + expectSize(buf, ewkbHeaderSize) + h.srid = readUint32(buf[:sridSize]) + h.endian = readUint8(buf[sridSize : sridSize+endianSize]) + h.typ = readUint32(buf[sridSize+endianSize : ewkbHeaderSize]) + return +} + +func serializeGeometry(v interface{}) []byte { switch t := v.(type) { case sql.Point: - return function.PointToBytes(t) + return serializePoint(t) case sql.Linestring: - return function.LineToBytes(t) + return serializeLinestring(t) case sql.Polygon: - return function.PolyToBytes(t) + return serializePolygon(t) default: panic(fmt.Sprintf("unknown geometry %v", v)) } } + +func serializePoint(p sql.Point) (buf []byte) { + pb := function.PointToBytes(p) + buf = make([]byte, ewkbHeaderSize+ByteSize(len(pb))) + copy(buf[ewkbHeaderSize:], pb) + + h := ewkbHeader{ + srid: p.SRID, + endian: littleEndian, + typ: pointType, + } + h.writeTo(buf[:ewkbHeaderSize]) + return +} + +func serializeLinestring(l sql.Linestring) (buf []byte) { + lb := function.LineToBytes(l) + buf = make([]byte, ewkbHeaderSize+ByteSize(len(lb))) + copy(buf[ewkbHeaderSize:], lb) + + h := ewkbHeader{ + srid: l.SRID, + endian: littleEndian, + typ: linestringType, + } + h.writeTo(buf[:ewkbHeaderSize]) + return +} + +func serializePolygon(p sql.Polygon) (buf []byte) { + pb := function.PolyToBytes(p) + buf = make([]byte, ewkbHeaderSize+ByteSize(len(pb))) + copy(buf[ewkbHeaderSize:], pb) + + h := ewkbHeader{ + srid: p.SRID, + endian: littleEndian, + typ: polygonType, + } + h.writeTo(buf[:ewkbHeaderSize]) + return +} diff --git a/go/store/val/tuple_descriptor.go b/go/store/val/tuple_descriptor.go index 470c7aaf5d..5fa8fdb624 100644 --- a/go/store/val/tuple_descriptor.go +++ b/go/store/val/tuple_descriptor.go @@ -21,6 +21,8 @@ import ( "strings" "time" + "github.com/dolthub/go-mysql-server/sql/expression/function" + "github.com/dolthub/go-mysql-server/sql" ) @@ -278,7 +280,7 @@ func (td TupleDesc) GetBytes(i int, tup Tuple) (v []byte, ok bool) { return } -// GetBytes reads a []byte from the ith field of the Tuple. +// GetJSON reads a []byte from the ith field of the Tuple. // If the ith field is NULL, |ok| is set to false. func (td TupleDesc) GetJSON(i int, tup Tuple) (v interface{}, ok bool) { td.expectEncoding(i, JSONEnc) @@ -292,6 +294,18 @@ func (td TupleDesc) GetJSON(i int, tup Tuple) (v interface{}, ok bool) { return } +// GetBytes reads a []byte from the ith field of the Tuple. +// If the ith field is NULL, |ok| is set to false. +func (td TupleDesc) GetGeometry(i int, tup Tuple) (v []byte, ok bool) { + td.expectEncoding(i, GeometryEnc) + b := tup.GetField(i) + if b != nil { + v = readBytes(b) + ok = true + } + return +} + // GetField reads the value from the ith field of the Tuple as an interface{}. func (td TupleDesc) GetField(i int, tup Tuple) (v interface{}) { var ok bool @@ -334,6 +348,12 @@ func (td TupleDesc) GetField(i int, tup Tuple) (v interface{}) { if ok { v = sql.JSONDocument{Val: js} } + case GeometryEnc: + var geo []byte + geo, ok = td.GetGeometry(i, tup) + if ok { + v = deserializeGeometry(geo) + } default: panic("unknown encoding") } @@ -418,3 +438,25 @@ func (td TupleDesc) Format(tup Tuple) string { sb.WriteString(" )") return sb.String() } + +func deserializeGeometry(buf []byte) (v interface{}) { + var bigEndian, reverse = false, false + + h := readHeaderFrom(buf[:ewkbHeaderSize]) + + var err error + switch h.typ { + case pointType: + v, err = function.WKBToPoint(buf[function.WKBHeaderLength:], bigEndian, h.srid, reverse) + case linestringType: + v, err = function.WKBToLine(buf[function.WKBHeaderLength:], bigEndian, h.srid, reverse) + case polygonType: + v, err = function.WKBToPoly(buf[function.WKBHeaderLength:], bigEndian, h.srid, reverse) + default: + panic(fmt.Sprintf("unknown geometry tryp %d", h.typ)) + } + if err != nil { + panic(err) + } + return +} From 0664adc4d66c81f9fc2553e159a711cc77d0ff61 Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 11:03:17 -0800 Subject: [PATCH 049/105] added helper methods and fixing spacing issue, still need remote branches to show up --- go/cmd/dolt/commands/log.go | 42 +++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 47c00d4a71..0a09f19dcb 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -160,11 +160,11 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o return 1 } - // TODO: are branches associate to commits somewhere already? + // TODO: are branches associated to commits somewhere already? branches, _ := dEnv.DoltDB.GetBranchesWithHashes(ctx) branchHashToName := map[hash.Hash][]string{} for _, b := range branches { - branchHashToName[b.Hash] = append(branchHashToName[b.Hash], b.Ref.String()) + branchHashToName[b.Hash] = append(branchHashToName[b.Hash], b.Ref.String()[11:]) // trim out "refs/heads/" } h, err := commit.HashOf() @@ -323,10 +323,17 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d return nil } -func logCompact(opts logOpts, commits []logNode) { - pager := outputpager.Start() - defer pager.Stop() +func logRefs (pager *outputpager.Pager, comm logNode) { + // TODO: this doesn't handle remote branches + pager.Writer.Write([]byte("\033[33m (\033[0m")) + if comm.isHead { + pager.Writer.Write([]byte("\033[94mHEAD -> \033[0m")) + } + pager.Writer.Write([]byte(fmt.Sprintf("\033[92m%s\033[0m", strings.Join(comm.branchNames, ", ")))) + pager.Writer.Write([]byte("\033[33m) \033[0m")) +} +func logCompact(pager *outputpager.Pager, opts logOpts, commits []logNode) { for _, comm := range commits { if len(comm.parentHashes) < opts.minParents { return @@ -342,23 +349,15 @@ func logCompact(opts logOpts, commits []logNode) { // TODO: use short hash instead pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33m%s\033[0m", chStr))) - // TODO: write refs and tags here - pager.Writer.Write([]byte("\033[33m (\033[0m")) - if comm.isHead { - pager.Writer.Write([]byte("\033[94mHEAD -> \033[0m")) - } - pager.Writer.Write([]byte(fmt.Sprintf("\033[92m%s\033[0m", strings.Join(comm.branchNames, ", ")))) - pager.Writer.Write([]byte("\033[33m) \033[0m")) + // TODO: write tags here + logRefs(pager, comm) formattedDesc := strings.Replace(comm.commitMeta.Description, "\n", " ", -1) + "\n" pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc))) } } -func logDefault(opts logOpts, commits []logNode) { - pager := outputpager.Start() - defer pager.Stop() - +func logDefault(pager *outputpager.Pager, opts logOpts, commits []logNode) { for _, comm := range commits { if len(comm.parentHashes) < opts.minParents { return @@ -371,7 +370,9 @@ func logDefault(opts logOpts, commits []logNode) { } } - pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33mcommit %s \033[0m", chStr))) + pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33mcommit %s\033[0m", chStr))) + + logRefs(pager, comm) if len(comm.parentHashes) > 1 { pager.Writer.Write([]byte(fmt.Sprintf("\nMerge:"))) @@ -395,12 +396,13 @@ func logToStdOut(opts logOpts, commits []logNode) { return } cli.ExecuteWithStdioRestored(func() { + pager := outputpager.Start() + defer pager.Stop() if opts.oneLine { - logCompact(opts, commits) + logCompact(pager, opts, commits) } else { - logDefault(opts, commits) + logDefault(pager, opts, commits) } - }) } From ad5fd5d74ae1cd1d337b7b41c73c6366e29894bb Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 11:06:01 -0800 Subject: [PATCH 050/105] idk what else to do --- integration-tests/bats/no-repo.bats | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/integration-tests/bats/no-repo.bats b/integration-tests/bats/no-repo.bats index 210555a4bd..d58502df73 100755 --- a/integration-tests/bats/no-repo.bats +++ b/integration-tests/bats/no-repo.bats @@ -319,5 +319,6 @@ NOT_VALID_REPO_ERROR="The current directory is not a valid dolt repository." sleep 1 # Wait another sec run grep -q 'dolt' <(ps aux) [ "$output" == "" ] - run kill -9 $(ps aux | grep 'dolt' | awk {'print $1'}) # This looks for process dolt and sends it a SIGINT (CTRL + C) + run kill -9 $(ps aux | grep 'dolt' | awk {'print $1'}) # Kill process for good if it doesn't pass + run echo "something to get the terminal to stop" } From e71cd6d5920d938bc2dfa0e7cf3fa82981e262ca Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 11:16:13 -0800 Subject: [PATCH 051/105] maybe this? --- integration-tests/bats/no-repo.bats | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/integration-tests/bats/no-repo.bats b/integration-tests/bats/no-repo.bats index d58502df73..a5c28af1e6 100755 --- a/integration-tests/bats/no-repo.bats +++ b/integration-tests/bats/no-repo.bats @@ -315,10 +315,9 @@ NOT_VALID_REPO_ERROR="The current directory is not a valid dolt repository." @test "no-repo: dolt login exits when receiving SIGINT" { dolt login & # run this in the background sleep 1 # Wait a sec - kill -SIGINT $(ps aux | grep 'dolt' | awk {'print $1'}) # This looks for process dolt and sends it a SIGINT (CTRL + C) + kill -SIGINT $(ps | grep 'dolt' | awk {'print $1'}) # This looks for process dolt and sends it a SIGINT (CTRL + C) sleep 1 # Wait another sec run grep -q 'dolt' <(ps aux) [ "$output" == "" ] - run kill -9 $(ps aux | grep 'dolt' | awk {'print $1'}) # Kill process for good if it doesn't pass - run echo "something to get the terminal to stop" + run kill -9 $(ps | grep 'dolt' | awk {'print $1'}) # Kill process for good if it doesn't pass } From 040eee1129a21f5b360b71a89c9ce6c29b71d7ad Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Thu, 10 Feb 2022 11:26:25 -0800 Subject: [PATCH 052/105] moved GetField and PutField methods from pkg val to sqle/index --- .../doltcore/sqle/index/dolt_index.go | 2 +- .../doltcore/sqle/index/prolly_fields.go | 293 ++++++++++++++++++ .../doltcore/sqle/index/prolly_index_iter.go | 8 +- .../index/{shim.go => prolly_row_iter.go} | 17 +- .../sqle/writer/prolly_table_writer.go | 17 +- go/store/val/tuple_builder.go | 204 ------------ go/store/val/tuple_descriptor.go | 83 ----- 7 files changed, 314 insertions(+), 310 deletions(-) create mode 100644 go/libraries/doltcore/sqle/index/prolly_fields.go rename go/libraries/doltcore/sqle/index/{shim.go => prolly_row_iter.go} (88%) diff --git a/go/libraries/doltcore/sqle/index/dolt_index.go b/go/libraries/doltcore/sqle/index/dolt_index.go index 58f06cfc6c..59b24be202 100644 --- a/go/libraries/doltcore/sqle/index/dolt_index.go +++ b/go/libraries/doltcore/sqle/index/dolt_index.go @@ -478,7 +478,7 @@ func isBindingCut(cut sql.RangeCut) bool { func tupleFromKeys(keys sql.Row, tb *val.TupleBuilder) (val.Tuple, error) { for i, v := range keys { - tb.PutField(i, v) + PutField(tb, i, v) } return tb.BuildPermissive(sharePool), nil } diff --git a/go/libraries/doltcore/sqle/index/prolly_fields.go b/go/libraries/doltcore/sqle/index/prolly_fields.go new file mode 100644 index 0000000000..8e458e419a --- /dev/null +++ b/go/libraries/doltcore/sqle/index/prolly_fields.go @@ -0,0 +1,293 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +import ( + "encoding/binary" + "fmt" + "time" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/expression/function" + + "github.com/dolthub/dolt/go/store/val" +) + +// GetField reads the value from the ith field of the Tuple as an interface{}. +func GetField(td val.TupleDesc, i int, tup val.Tuple) (v interface{}) { + var ok bool + switch td.Types[i].Enc { + case val.Int8Enc: + v, ok = td.GetInt8(i, tup) + case val.Uint8Enc: + v, ok = td.GetUint8(i, tup) + case val.Int16Enc: + v, ok = td.GetInt16(i, tup) + case val.Uint16Enc: + v, ok = td.GetUint16(i, tup) + case val.Int32Enc: + v, ok = td.GetInt32(i, tup) + case val.Uint32Enc: + v, ok = td.GetUint32(i, tup) + case val.Int64Enc: + v, ok = td.GetInt64(i, tup) + case val.Uint64Enc: + v, ok = td.GetUint64(i, tup) + case val.Float32Enc: + v, ok = td.GetFloat32(i, tup) + case val.Float64Enc: + v, ok = td.GetFloat64(i, tup) + case val.DecimalEnc: + v, ok = td.GetDecimal(i, tup) + case val.TimeEnc: + v, ok = td.GetSqlTime(i, tup) + case val.YearEnc: + v, ok = td.GetYear(i, tup) + case val.TimestampEnc, val.DateEnc, val.DatetimeEnc: + v, ok = td.GetTimestamp(i, tup) + case val.StringEnc: + v, ok = td.GetString(i, tup) + case val.BytesEnc: + v, ok = td.GetBytes(i, tup) + case val.JSONEnc: + var js interface{} + js, ok = td.GetJSON(i, tup) + if ok { + v = sql.JSONDocument{Val: js} + } + case val.GeometryEnc: + var geo []byte + geo, ok = td.GetGeometry(i, tup) + if ok { + v = deserializeGeometry(geo) + } + default: + panic("unknown val.encoding") + } + if !ok { + return nil + } + return v +} + +// PutField writes an interface{} to the ith field of the Tuple being built. +func PutField(tb *val.TupleBuilder, i int, v interface{}) { + if v == nil { + return // NULL + } + + enc := tb.Desc.Types[i].Enc + switch enc { + case val.Int8Enc: + tb.PutInt8(i, int8(convInt(v))) + case val.Uint8Enc: + tb.PutUint8(i, uint8(convUint(v))) + case val.Int16Enc: + tb.PutInt16(i, int16(convInt(v))) + case val.Uint16Enc: + tb.PutUint16(i, uint16(convUint(v))) + case val.Int32Enc: + tb.PutInt32(i, int32(convInt(v))) + case val.Uint32Enc: + tb.PutUint32(i, uint32(convUint(v))) + case val.Int64Enc: + tb.PutInt64(i, int64(convInt(v))) + case val.Uint64Enc: + tb.PutUint64(i, uint64(convUint(v))) + case val.Float32Enc: + tb.PutFloat32(i, v.(float32)) + case val.Float64Enc: + tb.PutFloat64(i, v.(float64)) + case val.DecimalEnc: + tb.PutDecimal(i, v.(string)) + case val.TimeEnc: + tb.PutSqlTime(i, v.(string)) + case val.YearEnc: + tb.PutYear(i, v.(int16)) + case val.DateEnc, val.DatetimeEnc, val.TimestampEnc: + tb.PutTimestamp(i, v.(time.Time)) + case val.StringEnc: + tb.PutString(i, v.(string)) + case val.BytesEnc: + if s, ok := v.(string); ok { + v = []byte(s) + } + tb.PutBytes(i, v.([]byte)) + case val.GeometryEnc: + // todo(andy): remove GMS dependency + tb.PutGeometry(i, serializeGeometry(v)) + case val.JSONEnc: + // todo(andy): remove GMS dependency + tb.PutJSON(i, v.(sql.JSONDocument).Val) + default: + panic(fmt.Sprintf("unknown encoding %v %v", enc, v)) + } +} + +func deserializeGeometry(buf []byte) (v interface{}) { + return nil +} + +func convInt(v interface{}) int { + switch i := v.(type) { + case int: + return i + case int8: + return int(i) + case uint8: + return int(i) + case int16: + return int(i) + case uint16: + return int(i) + case int32: + return int(i) + case uint32: + return int(i) + case int64: + return int(i) + case uint64: + return int(i) + default: + panic("impossible conversion") + } +} + +func convUint(v interface{}) uint { + switch i := v.(type) { + case uint: + return i + case int: + return uint(i) + case int8: + return uint(i) + case uint8: + return uint(i) + case int16: + return uint(i) + case uint16: + return uint(i) + case int32: + return uint(i) + case uint32: + return uint(i) + case int64: + return uint(i) + case uint64: + return uint(i) + default: + panic("impossible conversion") + } +} + +// todo(andy): remove GMS dependency +// have the engine pass serialized bytes + +const ( + sridSize = val.ByteSize(4) + endianSize = val.ByteSize(1) + typeSize = val.ByteSize(4) + ewkbHeaderSize = sridSize + endianSize + typeSize +) + +const ( + pointType = uint32(1) + linestringType = uint32(2) + polygonType = uint32(3) + + littleEndian = uint8(1) +) + +type ewkbHeader struct { + srid uint32 + endian uint8 + typ uint32 +} + +func (h ewkbHeader) writeTo(buf []byte) { + expectSize(buf, ewkbHeaderSize) + binary.LittleEndian.PutUint32(buf[:sridSize], h.srid) + buf[sridSize] = h.endian + binary.LittleEndian.PutUint32(buf[sridSize+endianSize:ewkbHeaderSize], h.typ) +} + +func readHeaderFrom(buf []byte) (h ewkbHeader) { + expectSize(buf, ewkbHeaderSize) + h.srid = binary.LittleEndian.Uint32(buf[:sridSize]) + h.endian = uint8(buf[sridSize]) + h.typ = binary.LittleEndian.Uint32(buf[sridSize+endianSize : ewkbHeaderSize]) + return +} + +func serializeGeometry(v interface{}) []byte { + switch t := v.(type) { + case sql.Point: + return serializePoint(t) + case sql.Linestring: + return serializeLinestring(t) + case sql.Polygon: + return serializePolygon(t) + default: + panic(fmt.Sprintf("unknown geometry %v", v)) + } +} + +func serializePoint(p sql.Point) (buf []byte) { + pb := function.PointToBytes(p) + buf = make([]byte, ewkbHeaderSize+val.ByteSize(len(pb))) + copy(buf[ewkbHeaderSize:], pb) + + h := ewkbHeader{ + srid: p.SRID, + endian: littleEndian, + typ: pointType, + } + h.writeTo(buf[:ewkbHeaderSize]) + return +} + +func serializeLinestring(l sql.Linestring) (buf []byte) { + lb := function.LineToBytes(l) + buf = make([]byte, ewkbHeaderSize+val.ByteSize(len(lb))) + copy(buf[ewkbHeaderSize:], lb) + + h := ewkbHeader{ + srid: l.SRID, + endian: littleEndian, + typ: linestringType, + } + h.writeTo(buf[:ewkbHeaderSize]) + return +} + +func serializePolygon(p sql.Polygon) (buf []byte) { + pb := function.PolyToBytes(p) + buf = make([]byte, ewkbHeaderSize+val.ByteSize(len(pb))) + copy(buf[ewkbHeaderSize:], pb) + + h := ewkbHeader{ + srid: p.SRID, + endian: littleEndian, + typ: polygonType, + } + h.writeTo(buf[:ewkbHeaderSize]) + return +} + +func expectSize(buf []byte, sz val.ByteSize) { + if val.ByteSize(len(buf)) != sz { + panic("byte slice is not of expected size") + } +} diff --git a/go/libraries/doltcore/sqle/index/prolly_index_iter.go b/go/libraries/doltcore/sqle/index/prolly_index_iter.go index c3971c1e37..7d17aa7965 100644 --- a/go/libraries/doltcore/sqle/index/prolly_index_iter.go +++ b/go/libraries/doltcore/sqle/index/prolly_index_iter.go @@ -144,13 +144,13 @@ func (p prollyIndexIter) rowFromTuples(key, value val.Tuple, r sql.Row) { if rowIdx == -1 { continue } - r[rowIdx] = keyDesc.GetField(keyIdx, key) + r[rowIdx] = GetField(keyDesc, keyIdx, key) } for valIdx, rowIdx := range p.valMap { if rowIdx == -1 { continue } - r[rowIdx] = valDesc.GetField(valIdx, value) + r[rowIdx] = GetField(valDesc, valIdx, value) } return @@ -252,7 +252,7 @@ func (p prollyCoveringIndexIter) writeRowFromTuples(key, value val.Tuple, r sql. if from == -1 { continue } - r[to] = p.keyDesc.GetField(from, key) + r[to] = GetField(p.keyDesc, from, key) } for to := range p.valMap { @@ -260,7 +260,7 @@ func (p prollyCoveringIndexIter) writeRowFromTuples(key, value val.Tuple, r sql. if from == -1 { continue } - r[to] = p.valDesc.GetField(from, value) + r[to] = GetField(p.valDesc, from, value) } return diff --git a/go/libraries/doltcore/sqle/index/shim.go b/go/libraries/doltcore/sqle/index/prolly_row_iter.go similarity index 88% rename from go/libraries/doltcore/sqle/index/shim.go rename to go/libraries/doltcore/sqle/index/prolly_row_iter.go index 9d06f6df2b..013d7c2256 100644 --- a/go/libraries/doltcore/sqle/index/shim.go +++ b/go/libraries/doltcore/sqle/index/prolly_row_iter.go @@ -22,12 +22,11 @@ import ( "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/store/pool" "github.com/dolthub/dolt/go/store/prolly" "github.com/dolthub/dolt/go/store/val" ) -type sqlRowIter struct { +type prollyRowIter struct { ctx context.Context iter prolly.MapRangeIter @@ -38,7 +37,7 @@ type sqlRowIter struct { rowLen int } -var _ sql.RowIter = sqlRowIter{} +var _ sql.RowIter = prollyRowIter{} func NewProllyRowIter(ctx context.Context, sch schema.Schema, rows prolly.Map, rng prolly.Range, projections []string) (sql.RowIter, error) { if schema.IsKeyless(sch) { @@ -71,7 +70,7 @@ func rowIterFromMapIter( kd, vd := m.Descriptors() - return sqlRowIter{ + return prollyRowIter{ ctx: ctx, iter: iter, keyDesc: kd, @@ -110,7 +109,7 @@ func projectionMappings(sch schema.Schema, projs []string) (keyMap, valMap val.O return } -func (it sqlRowIter) Next(ctx *sql.Context) (sql.Row, error) { +func (it prollyRowIter) Next(ctx *sql.Context) (sql.Row, error) { key, value, err := it.iter.Next(it.ctx) if err != nil { return nil, err @@ -122,20 +121,18 @@ func (it sqlRowIter) Next(ctx *sql.Context) (sql.Row, error) { if rowIdx == -1 { continue } - row[rowIdx] = it.keyDesc.GetField(keyIdx, key) + row[rowIdx] = GetField(it.keyDesc, keyIdx, key) } for valIdx, rowIdx := range it.valProj { if rowIdx == -1 { continue } - row[rowIdx] = it.valDesc.GetField(valIdx, value) + row[rowIdx] = GetField(it.valDesc, valIdx, value) } return row, nil } -func (it sqlRowIter) Close(ctx *sql.Context) error { +func (it prollyRowIter) Close(ctx *sql.Context) error { return nil } - -var shimPool = pool.NewBuffPool() diff --git a/go/libraries/doltcore/sqle/writer/prolly_table_writer.go b/go/libraries/doltcore/sqle/writer/prolly_table_writer.go index cdb0f68690..44f5447a7a 100644 --- a/go/libraries/doltcore/sqle/writer/prolly_table_writer.go +++ b/go/libraries/doltcore/sqle/writer/prolly_table_writer.go @@ -24,6 +24,7 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" "github.com/dolthub/dolt/go/libraries/doltcore/schema" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/globalstate" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/index" "github.com/dolthub/dolt/go/store/pool" "github.com/dolthub/dolt/go/store/prolly" "github.com/dolthub/dolt/go/store/val" @@ -284,7 +285,7 @@ func (m prollyIndexWriter) Map(ctx context.Context) (prolly.Map, error) { func (m prollyIndexWriter) Insert(ctx *sql.Context, sqlRow sql.Row) error { for to := range m.keyMap { from := m.keyMap.MapOrdinal(to) - m.keyBld.PutField(to, sqlRow[from]) + index.PutField(m.keyBld, to, sqlRow[from]) } k := m.keyBld.Build(sharePool) @@ -297,7 +298,7 @@ func (m prollyIndexWriter) Insert(ctx *sql.Context, sqlRow sql.Row) error { for to := range m.valMap { from := m.valMap.MapOrdinal(to) - m.valBld.PutField(to, sqlRow[from]) + index.PutField(m.valBld, to, sqlRow[from]) } v := m.valBld.Build(sharePool) @@ -307,7 +308,7 @@ func (m prollyIndexWriter) Insert(ctx *sql.Context, sqlRow sql.Row) error { func (m prollyIndexWriter) Delete(ctx *sql.Context, sqlRow sql.Row) error { for to := range m.keyMap { from := m.keyMap.MapOrdinal(to) - m.keyBld.PutField(to, sqlRow[from]) + index.PutField(m.keyBld, to, sqlRow[from]) } k := m.keyBld.Build(sharePool) @@ -317,7 +318,7 @@ func (m prollyIndexWriter) Delete(ctx *sql.Context, sqlRow sql.Row) error { func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.Row) error { for to := range m.keyMap { from := m.keyMap.MapOrdinal(to) - m.keyBld.PutField(to, oldRow[from]) + index.PutField(m.keyBld, to, oldRow[from]) } oldKey := m.keyBld.Build(sharePool) @@ -329,7 +330,7 @@ func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.R for to := range m.keyMap { from := m.keyMap.MapOrdinal(to) - m.keyBld.PutField(to, newRow[from]) + index.PutField(m.keyBld, to, newRow[from]) } newKey := m.keyBld.Build(sharePool) @@ -342,7 +343,7 @@ func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.R for to := range m.valMap { from := m.valMap.MapOrdinal(to) - m.valBld.PutField(to, newRow[from]) + index.PutField(m.valBld, to, newRow[from]) } v := m.valBld.Build(sharePool) @@ -356,13 +357,13 @@ func (m prollyIndexWriter) primaryKeyError(ctx context.Context, key val.Tuple) e kd := m.keyBld.Desc for from := range m.keyMap { to := m.keyMap.MapOrdinal(from) - existing[to] = kd.GetField(from, key) + existing[to] = index.GetField(kd, from, key) } vd := m.valBld.Desc for from := range m.valMap { to := m.valMap.MapOrdinal(from) - existing[to] = vd.GetField(from, value) + existing[to] = index.GetField(vd, from, value) } return }) diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go index 692706ac89..5cfd231e8d 100644 --- a/go/store/val/tuple_builder.go +++ b/go/store/val/tuple_builder.go @@ -16,12 +16,8 @@ package val import ( "encoding/json" - "fmt" "time" - "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/expression/function" - "github.com/dolthub/dolt/go/store/pool" ) @@ -249,203 +245,3 @@ func (tb *TupleBuilder) PutRaw(i int, buf []byte) { writeBytes(tb.fields[i], buf) tb.pos += sz } - -// PutField writes an interface{} to the ith field of the Tuple being built. -func (tb *TupleBuilder) PutField(i int, v interface{}) { - if v == nil { - return // NULL - } - - enc := tb.Desc.Types[i].Enc - switch enc { - case Int8Enc: - tb.PutInt8(i, int8(convInt(v))) - case Uint8Enc: - tb.PutUint8(i, uint8(convUint(v))) - case Int16Enc: - tb.PutInt16(i, int16(convInt(v))) - case Uint16Enc: - tb.PutUint16(i, uint16(convUint(v))) - case Int32Enc: - tb.PutInt32(i, int32(convInt(v))) - case Uint32Enc: - tb.PutUint32(i, uint32(convUint(v))) - case Int64Enc: - tb.PutInt64(i, int64(convInt(v))) - case Uint64Enc: - tb.PutUint64(i, uint64(convUint(v))) - case Float32Enc: - tb.PutFloat32(i, v.(float32)) - case Float64Enc: - tb.PutFloat64(i, v.(float64)) - case DecimalEnc: - tb.PutDecimal(i, v.(string)) - case TimeEnc: - tb.PutSqlTime(i, v.(string)) - case YearEnc: - tb.PutYear(i, v.(int16)) - case DateEnc, DatetimeEnc, TimestampEnc: - tb.PutTimestamp(i, v.(time.Time)) - case StringEnc: - tb.PutString(i, v.(string)) - case BytesEnc: - if s, ok := v.(string); ok { - v = []byte(s) - } - tb.PutBytes(i, v.([]byte)) - case GeometryEnc: - // todo(andy): remove GMS dependency - tb.PutGeometry(i, serializeGeometry(v)) - case JSONEnc: - // todo(andy): remove GMS dependency - tb.PutJSON(i, v.(sql.JSONDocument).Val) - default: - panic(fmt.Sprintf("unknown encoding %v %v", enc, v)) - } -} - -func convInt(v interface{}) int { - switch i := v.(type) { - case int: - return i - case int8: - return int(i) - case uint8: - return int(i) - case int16: - return int(i) - case uint16: - return int(i) - case int32: - return int(i) - case uint32: - return int(i) - case int64: - return int(i) - case uint64: - return int(i) - default: - panic("impossible conversion") - } -} - -func convUint(v interface{}) uint { - switch i := v.(type) { - case uint: - return i - case int: - return uint(i) - case int8: - return uint(i) - case uint8: - return uint(i) - case int16: - return uint(i) - case uint16: - return uint(i) - case int32: - return uint(i) - case uint32: - return uint(i) - case int64: - return uint(i) - case uint64: - return uint(i) - default: - panic("impossible conversion") - } -} - -// todo(andy): remove GMS dependency -// have the engine pass serialized bytes - -const ( - sridSize = uint32Size - endianSize = uint8Size - typeSize = uint32Size - ewkbHeaderSize = sridSize + endianSize + typeSize -) - -const ( - pointType = uint32(1) - linestringType = uint32(2) - polygonType = uint32(3) - - littleEndian = uint8(1) -) - -type ewkbHeader struct { - srid uint32 - endian uint8 - typ uint32 -} - -func (h ewkbHeader) writeTo(buf []byte) { - expectSize(buf, ewkbHeaderSize) - writeUint32(buf[:sridSize], h.srid) - writeUint8(buf[sridSize:sridSize+endianSize], h.endian) - writeUint32(buf[sridSize+endianSize:ewkbHeaderSize], h.typ) -} - -func readHeaderFrom(buf []byte) (h ewkbHeader) { - expectSize(buf, ewkbHeaderSize) - h.srid = readUint32(buf[:sridSize]) - h.endian = readUint8(buf[sridSize : sridSize+endianSize]) - h.typ = readUint32(buf[sridSize+endianSize : ewkbHeaderSize]) - return -} - -func serializeGeometry(v interface{}) []byte { - switch t := v.(type) { - case sql.Point: - return serializePoint(t) - case sql.Linestring: - return serializeLinestring(t) - case sql.Polygon: - return serializePolygon(t) - default: - panic(fmt.Sprintf("unknown geometry %v", v)) - } -} - -func serializePoint(p sql.Point) (buf []byte) { - pb := function.PointToBytes(p) - buf = make([]byte, ewkbHeaderSize+ByteSize(len(pb))) - copy(buf[ewkbHeaderSize:], pb) - - h := ewkbHeader{ - srid: p.SRID, - endian: littleEndian, - typ: pointType, - } - h.writeTo(buf[:ewkbHeaderSize]) - return -} - -func serializeLinestring(l sql.Linestring) (buf []byte) { - lb := function.LineToBytes(l) - buf = make([]byte, ewkbHeaderSize+ByteSize(len(lb))) - copy(buf[ewkbHeaderSize:], lb) - - h := ewkbHeader{ - srid: l.SRID, - endian: littleEndian, - typ: linestringType, - } - h.writeTo(buf[:ewkbHeaderSize]) - return -} - -func serializePolygon(p sql.Polygon) (buf []byte) { - pb := function.PolyToBytes(p) - buf = make([]byte, ewkbHeaderSize+ByteSize(len(pb))) - copy(buf[ewkbHeaderSize:], pb) - - h := ewkbHeader{ - srid: p.SRID, - endian: littleEndian, - typ: polygonType, - } - h.writeTo(buf[:ewkbHeaderSize]) - return -} diff --git a/go/store/val/tuple_descriptor.go b/go/store/val/tuple_descriptor.go index 5fa8fdb624..3689a8106d 100644 --- a/go/store/val/tuple_descriptor.go +++ b/go/store/val/tuple_descriptor.go @@ -20,10 +20,6 @@ import ( "strconv" "strings" "time" - - "github.com/dolthub/go-mysql-server/sql/expression/function" - - "github.com/dolthub/go-mysql-server/sql" ) type TupleDesc struct { @@ -306,63 +302,6 @@ func (td TupleDesc) GetGeometry(i int, tup Tuple) (v []byte, ok bool) { return } -// GetField reads the value from the ith field of the Tuple as an interface{}. -func (td TupleDesc) GetField(i int, tup Tuple) (v interface{}) { - var ok bool - switch td.Types[i].Enc { - case Int8Enc: - v, ok = td.GetInt8(i, tup) - case Uint8Enc: - v, ok = td.GetUint8(i, tup) - case Int16Enc: - v, ok = td.GetInt16(i, tup) - case Uint16Enc: - v, ok = td.GetUint16(i, tup) - case Int32Enc: - v, ok = td.GetInt32(i, tup) - case Uint32Enc: - v, ok = td.GetUint32(i, tup) - case Int64Enc: - v, ok = td.GetInt64(i, tup) - case Uint64Enc: - v, ok = td.GetUint64(i, tup) - case Float32Enc: - v, ok = td.GetFloat32(i, tup) - case Float64Enc: - v, ok = td.GetFloat64(i, tup) - case DecimalEnc: - v, ok = td.GetDecimal(i, tup) - case TimeEnc: - v, ok = td.GetSqlTime(i, tup) - case YearEnc: - v, ok = td.GetYear(i, tup) - case TimestampEnc, DateEnc, DatetimeEnc: - v, ok = td.GetTimestamp(i, tup) - case StringEnc: - v, ok = td.GetString(i, tup) - case BytesEnc: - v, ok = td.GetBytes(i, tup) - case JSONEnc: - var js interface{} - js, ok = td.GetJSON(i, tup) - if ok { - v = sql.JSONDocument{Val: js} - } - case GeometryEnc: - var geo []byte - geo, ok = td.GetGeometry(i, tup) - if ok { - v = deserializeGeometry(geo) - } - default: - panic("unknown encoding") - } - if !ok { - return nil - } - return v -} - func (td TupleDesc) expectEncoding(i int, encodings ...Encoding) { for _, enc := range encodings { if enc == td.Types[i].Enc { @@ -438,25 +377,3 @@ func (td TupleDesc) Format(tup Tuple) string { sb.WriteString(" )") return sb.String() } - -func deserializeGeometry(buf []byte) (v interface{}) { - var bigEndian, reverse = false, false - - h := readHeaderFrom(buf[:ewkbHeaderSize]) - - var err error - switch h.typ { - case pointType: - v, err = function.WKBToPoint(buf[function.WKBHeaderLength:], bigEndian, h.srid, reverse) - case linestringType: - v, err = function.WKBToLine(buf[function.WKBHeaderLength:], bigEndian, h.srid, reverse) - case polygonType: - v, err = function.WKBToPoly(buf[function.WKBHeaderLength:], bigEndian, h.srid, reverse) - default: - panic(fmt.Sprintf("unknown geometry tryp %d", h.typ)) - } - if err != nil { - panic(err) - } - return -} From b553708ea5aa842c1bc36d1e19218e78cc8a4036 Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 12:16:39 -0800 Subject: [PATCH 053/105] adding decoration options --- go/cmd/dolt/commands/log.go | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 0a09f19dcb..e657d96af3 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -46,6 +46,7 @@ type logOpts struct { numLines int showParents bool minParents int + decoration string oneLine bool } @@ -96,7 +97,7 @@ func (cmd LogCmd) ArgParser() *argparser.ArgParser { ap.SupportsInt(minParentsParam, "", "parent_count", "The minimum number of parents a commit must have to be included in the log.") ap.SupportsFlag(mergesParam, "", "Equivalent to min-parents == 2, this will limit the log to commits with 2 or more parents.") ap.SupportsFlag(parentsParam, "", "Shows all parents of each commit in the log.") - ap.SupportsFlag(decorateParam, "", "Shows refs next to commits.") + ap.SupportsString(decorateParam, "", "decorate_fmt","Shows refs next to commits.") ap.SupportsFlag(oneLineParam, "", "Shows logs in a compact format.") return ap } @@ -121,11 +122,13 @@ func (cmd LogCmd) logWithLoggerFunc(ctx context.Context, commandStr string, args minParents = 2 } + decorateOption := apr.GetValueOrDefault(decorateParam, "auto") opts := logOpts{ numLines: apr.GetIntOrDefault(numLinesParam, -1), showParents: apr.Contains(parentsParam), minParents: minParents, oneLine: apr.Contains(oneLineParam), + decoration: decorateOption, } // Just dolt log @@ -161,10 +164,14 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o } // TODO: are branches associated to commits somewhere already? - branches, _ := dEnv.DoltDB.GetBranchesWithHashes(ctx) branchHashToName := map[hash.Hash][]string{} + branches, _ := dEnv.DoltDB.GetBranchesWithHashes(ctx) for _, b := range branches { - branchHashToName[b.Hash] = append(branchHashToName[b.Hash], b.Ref.String()[11:]) // trim out "refs/heads/" + refName := b.Ref.String() + if opts.decoration != "long" { + refName = refName[11:] // trim out "refs/heads/" + } + branchHashToName[b.Hash] = append(branchHashToName[b.Hash], refName) } h, err := commit.HashOf() @@ -210,7 +217,7 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o return 1 } - commitsInfo = append(commitsInfo, logNode{meta, cmHash, pHashes, branchHashToName[cmHash], cmHash == h}) + commitsInfo = append(commitsInfo, logNode{commitMeta: meta, commitHash: cmHash, parentHashes: pHashes, branchNames: branchHashToName[cmHash], isHead: cmHash == h}) } logToStdOut(opts, commitsInfo) @@ -308,8 +315,7 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d return err } - // TODO: fill this with correct info - commitsInfo = append(commitsInfo, logNode{meta, prevHash, ph, []string{}, false}) + commitsInfo = append(commitsInfo, logNode{commitMeta: meta, commitHash: prevHash, parentHashes: ph}) numLines-- } @@ -325,7 +331,7 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d func logRefs (pager *outputpager.Pager, comm logNode) { // TODO: this doesn't handle remote branches - pager.Writer.Write([]byte("\033[33m (\033[0m")) + pager.Writer.Write([]byte("\033[33m(\033[0m")) if comm.isHead { pager.Writer.Write([]byte("\033[94mHEAD -> \033[0m")) } @@ -347,10 +353,13 @@ func logCompact(pager *outputpager.Pager, opts logOpts, commits []logNode) { } // TODO: use short hash instead - pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33m%s\033[0m", chStr))) + // Write commit hash + pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33m%s \033[0m", chStr))) // TODO: write tags here - logRefs(pager, comm) + if opts.decoration != "no" { + logRefs(pager, comm) + } formattedDesc := strings.Replace(comm.commitMeta.Description, "\n", " ", -1) + "\n" pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc))) @@ -370,9 +379,13 @@ func logDefault(pager *outputpager.Pager, opts logOpts, commits []logNode) { } } - pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33mcommit %s\033[0m", chStr))) + // Write commit hash + pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33mcommit %s \033[0m", chStr))) - logRefs(pager, comm) + // Show decoration + if opts.decoration != "no" { + logRefs(pager, comm) + } if len(comm.parentHashes) > 1 { pager.Writer.Write([]byte(fmt.Sprintf("\nMerge:"))) From 766076f84b2e7d255b838df9dcb98bd06bc45656 Mon Sep 17 00:00:00 2001 From: JCOR11599 Date: Thu, 10 Feb 2022 20:22:45 +0000 Subject: [PATCH 054/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/cmd/dolt/commands/log.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index e657d96af3..0394ad35ed 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -46,7 +46,7 @@ type logOpts struct { numLines int showParents bool minParents int - decoration string + decoration string oneLine bool } @@ -97,7 +97,7 @@ func (cmd LogCmd) ArgParser() *argparser.ArgParser { ap.SupportsInt(minParentsParam, "", "parent_count", "The minimum number of parents a commit must have to be included in the log.") ap.SupportsFlag(mergesParam, "", "Equivalent to min-parents == 2, this will limit the log to commits with 2 or more parents.") ap.SupportsFlag(parentsParam, "", "Shows all parents of each commit in the log.") - ap.SupportsString(decorateParam, "", "decorate_fmt","Shows refs next to commits.") + ap.SupportsString(decorateParam, "", "decorate_fmt", "Shows refs next to commits.") ap.SupportsFlag(oneLineParam, "", "Shows logs in a compact format.") return ap } @@ -329,7 +329,7 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d return nil } -func logRefs (pager *outputpager.Pager, comm logNode) { +func logRefs(pager *outputpager.Pager, comm logNode) { // TODO: this doesn't handle remote branches pager.Writer.Write([]byte("\033[33m(\033[0m")) if comm.isHead { From 068c1daa6bd1300ca482a05c35a5bfe323786a98 Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 12:26:55 -0800 Subject: [PATCH 055/105] apparently ps works differently on different OSes?? --- integration-tests/bats/no-repo.bats | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/integration-tests/bats/no-repo.bats b/integration-tests/bats/no-repo.bats index a5c28af1e6..36e4a978e5 100755 --- a/integration-tests/bats/no-repo.bats +++ b/integration-tests/bats/no-repo.bats @@ -314,10 +314,11 @@ NOT_VALID_REPO_ERROR="The current directory is not a valid dolt repository." @test "no-repo: dolt login exits when receiving SIGINT" { dolt login & # run this in the background + PID=$! # capture background PID sleep 1 # Wait a sec - kill -SIGINT $(ps | grep 'dolt' | awk {'print $1'}) # This looks for process dolt and sends it a SIGINT (CTRL + C) + kill -SIGINT $PID # Send SIGINT (CTRL + C) to PID sleep 1 # Wait another sec - run grep -q 'dolt' <(ps aux) + run grep -q 'dolt' <(ps) # Ensure no process named dolt is running [ "$output" == "" ] - run kill -9 $(ps | grep 'dolt' | awk {'print $1'}) # Kill process for good if it doesn't pass + run kill -SIGINT $PID # Kill process if it doesn't pass } From beeb3d0de086865041ba2d67c71186c89be6f151 Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 12:58:38 -0800 Subject: [PATCH 056/105] fixing test --- integration-tests/bats/log.bats | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/integration-tests/bats/log.bats b/integration-tests/bats/log.bats index c2dbe2944e..c32df27b31 100755 --- a/integration-tests/bats/log.bats +++ b/integration-tests/bats/log.bats @@ -18,26 +18,26 @@ teardown() { @test "log: log respects branches" { dolt branch branch1 - dolt commit --allow-empty -m "commit 1 main" - dolt commit --allow-empty -m "commit 2 main" - dolt commit --allow-empty -m "commit 3 main" + dolt commit --allow-empty -m "commit 1 MAIN" + dolt commit --allow-empty -m "commit 2 MAIN" + dolt commit --allow-empty -m "commit 3 MAIN" run dolt log [ $status -eq 0 ] - [[ "$output" =~ "main" ]] || false - [[ ! "$output" =~ "branch1" ]] || false + [[ "$output" =~ "MAIN" ]] || false + [[ ! "$output" =~ "BRANCH1" ]] || false dolt checkout branch1 - dolt commit --allow-empty -m "commit 1 branch1" - dolt commit --allow-empty -m "commit 2 branch1" - dolt commit --allow-empty -m "commit 3 branch1" + dolt commit --allow-empty -m "commit 1 BRANCH1" + dolt commit --allow-empty -m "commit 2 BRANCH1" + dolt commit --allow-empty -m "commit 3 BRANCH1" run dolt log [ $status -eq 0 ] - [[ ! "$output" =~ "main" ]] || false - [[ "$output" =~ "branch1" ]] || false + [[ ! "$output" =~ "MAIN" ]] || false + [[ "$output" =~ "BRANCH1" ]] || false dolt checkout main run dolt log [ $status -eq 0 ] - [[ "$output" =~ "main" ]] || false - [[ ! "$output" =~ "branch1" ]] || false + [[ "$output" =~ "MAIN" ]] || false + [[ ! "$output" =~ "BRANCH1" ]] || false } @test "log: with -n specified" { From aeea5da0cdf78cc5a5b85254127e07ef1fa35376 Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 12:59:00 -0800 Subject: [PATCH 057/105] prevent printing when no associated branch --- go/cmd/dolt/commands/log.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index e657d96af3..2ed798586b 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -330,6 +330,11 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d } func logRefs (pager *outputpager.Pager, comm logNode) { + // Do nothing if no associate branches + if len(comm.branchNames) == 0 { + return + } + // TODO: this doesn't handle remote branches pager.Writer.Write([]byte("\033[33m(\033[0m")) if comm.isHead { From f516b747683086bd40f1e9f4840f06076eb08d80 Mon Sep 17 00:00:00 2001 From: JCOR11599 Date: Thu, 10 Feb 2022 21:03:44 +0000 Subject: [PATCH 058/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/cmd/dolt/commands/log.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 4f6967fb89..042f78ebee 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -329,7 +329,7 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d return nil } -func logRefs (pager *outputpager.Pager, comm logNode) { +func logRefs(pager *outputpager.Pager, comm logNode) { // Do nothing if no associate branches if len(comm.branchNames) == 0 { return From 231cedcd71974c1ec022a9a50f981ad9a35547c2 Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 13:08:24 -0800 Subject: [PATCH 059/105] it's not short and long; it's short and full; thanks git --- go/cmd/dolt/commands/log.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 4f6967fb89..713d94edee 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -168,7 +168,7 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o branches, _ := dEnv.DoltDB.GetBranchesWithHashes(ctx) for _, b := range branches { refName := b.Ref.String() - if opts.decoration != "long" { + if opts.decoration != "full" { refName = refName[11:] // trim out "refs/heads/" } branchHashToName[b.Hash] = append(branchHashToName[b.Hash], refName) From d65270f6528f81223493ede748b406bcd2e3b81a Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 13:23:58 -0800 Subject: [PATCH 060/105] hard kill? --- integration-tests/bats/no-repo.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-tests/bats/no-repo.bats b/integration-tests/bats/no-repo.bats index 36e4a978e5..bb2f300ff3 100755 --- a/integration-tests/bats/no-repo.bats +++ b/integration-tests/bats/no-repo.bats @@ -320,5 +320,5 @@ NOT_VALID_REPO_ERROR="The current directory is not a valid dolt repository." sleep 1 # Wait another sec run grep -q 'dolt' <(ps) # Ensure no process named dolt is running [ "$output" == "" ] - run kill -SIGINT $PID # Kill process if it doesn't pass + run kill -9 $PID # Kill process if it doesn't pass } From 10cfe95c93d14d564b559da3684352301bcb21df Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Thu, 10 Feb 2022 14:36:09 -0800 Subject: [PATCH 061/105] refactored Node to use SlicedBuffer --- go/store/prolly/meta_tuple.go | 2 +- go/store/prolly/node.go | 205 ++++++++++-------- go/store/prolly/node_cursor.go | 2 +- go/store/prolly/node_test.go | 60 ++++- go/store/prolly/tree_chunker_test.go | 6 +- go/store/val/{offsets.go => sliced_buffer.go} | 34 ++- go/store/val/tuple.go | 4 +- 7 files changed, 198 insertions(+), 115 deletions(-) rename go/store/val/{offsets.go => sliced_buffer.go} (71%) diff --git a/go/store/prolly/meta_tuple.go b/go/store/prolly/meta_tuple.go index 16f81cca81..cb80902ec1 100644 --- a/go/store/prolly/meta_tuple.go +++ b/go/store/prolly/meta_tuple.go @@ -44,7 +44,7 @@ func fetchChild(ctx context.Context, ns NodeStore, ref hash.Hash) (Node, error) } func writeNewChild(ctx context.Context, ns NodeStore, level uint64, keys, values []nodeItem) (Node, metaPair, error) { - child := makeMapNode(ns.Pool(), level, keys, values) + child := buildMapNode(ns.Pool(), level, keys, values) ref, err := ns.Write(ctx, child) if err != nil { diff --git a/go/store/prolly/node.go b/go/store/prolly/node.go index 166a26925f..8f6d70c743 100644 --- a/go/store/prolly/node.go +++ b/go/store/prolly/node.go @@ -23,25 +23,68 @@ import ( "github.com/dolthub/dolt/go/gen/fb/serial" "github.com/dolthub/dolt/go/store/hash" "github.com/dolthub/dolt/go/store/pool" + "github.com/dolthub/dolt/go/store/val" ) const ( maxVectorOffset = uint64(math.MaxUint16) refSize = hash.ByteLen + + // These constants are mirrored from serial.TupleMap.KeyOffsetsLength() + // and serial.TupleMap.ValueOffsetsLength() respectively. + // They are only as stable as the flatbuffers schemas that define them. + keyOffsetsVOffset = 6 + valueOffsetsVOffset = 10 ) func init() { - //emptyNode = makeMapNode(sharedPool, 0, nil, nil) -} - -type Node struct { - buf serial.TupleMap - cnt int + emptyNode = buildMapNode(sharedPool, 0, nil, nil) } var emptyNode Node -func makeMapNode(pool pool.BuffPool, level uint64, keys, values []nodeItem) (node Node) { +type Node struct { + keys, values val.SlicedBuffer + refs refBuffer + count, level int + + buf serial.TupleMap +} + +func mapNodeFromBytes(bb []byte) Node { + buf := serial.GetRootAsTupleMap(bb, 0) + return mapNodeFromFlatbuffer(*buf) +} + +func mapNodeFromFlatbuffer(buf serial.TupleMap) Node { + keys := val.SlicedBuffer{ + Buf: buf.KeyTuplesBytes(), + Offs: getKeyOffsetsVector(buf), + } + values := val.SlicedBuffer{ + Buf: buf.ValueTuplesBytes(), + Offs: getValueOffsetsVector(buf), + } + refs := refBuffer{ + buf: buf.RefArrayBytes(), + } + + count := buf.KeyOffsetsLength() + 1 + if len(keys.Buf) == 0 { + count = 0 + } + + return Node{ + keys: keys, + values: values, + refs: refs, + count: count, + level: int(buf.TreeLevel()), + buf: buf, + } +} + +func buildMapNode(pool pool.BuffPool, level uint64, keys, values []nodeItem) (node Node) { var ( keyTups, keyOffs fb.UOffsetT valTups, valOffs fb.UOffsetT @@ -85,6 +128,57 @@ func makeMapNode(pool pool.BuffPool, level uint64, keys, values []nodeItem) (nod return mapNodeFromBytes(b.FinishedBytes()) } +func (nd Node) hashOf() hash.Hash { + return hash.Of(nd.bytes()) +} + +func (nd Node) getKey(i int) nodeItem { + return nd.keys.GetSlice(i) +} + +func (nd Node) getValue(i int) nodeItem { + if nd.leafNode() { + return nd.values.GetSlice(i) + } else { + r := nd.getRef(i) + return r[:] + } +} + +func (nd Node) getRef(i int) hash.Hash { + return nd.refs.getRef(i) +} + +func (nd Node) nodeCount() int { + return nd.count +} + +// todo(andy): should we support this? +//func (nd Node) cumulativeCount() uint64 { +// return nd.buf.TreeCount() +//} + +func (nd Node) leafNode() bool { + return nd.level == 0 +} + +func (nd Node) empty() bool { + return nd.bytes() == nil || nd.nodeCount() == 0 +} + +func (nd Node) bytes() []byte { + return nd.buf.Table().Bytes +} + +type refBuffer struct { + buf []byte +} + +func (rb refBuffer) getRef(i int) hash.Hash { + start, stop := i*refSize, (i+1)*refSize + return hash.New(rb.buf[start:stop]) +} + func getMapBuilder(pool pool.BuffPool, sz int) *fb.Builder { // todo(andy): initialize builder buffer from pool return fb.NewBuilder(sz) @@ -138,87 +232,22 @@ func writeItemOffsets(b *fb.Builder, items []nodeItem, sz int) (cnt int) { return } -func mapNodeFromBytes(bb []byte) Node { - buf := serial.GetRootAsTupleMap(bb, 0) - // first key offset omitted - cnt := buf.KeyOffsetsLength() + 1 - if len(buf.KeyTuplesBytes()) == 0 { - cnt = 0 - } - return Node{ - buf: *buf, - cnt: cnt, - } +func getKeyOffsetsVector(buf serial.TupleMap) []byte { + sz := buf.KeyOffsetsLength() * 2 + tab := buf.Table() + vec := tab.Offset(keyOffsetsVOffset) + start := int(tab.Vector(fb.UOffsetT(vec))) + stop := start + sz + + return tab.Bytes[start:stop] } -func (nd Node) hashOf() hash.Hash { - return hash.Of(nd.bytes()) -} - -func (nd Node) getKey(i int) nodeItem { - keys := nd.buf.KeyTuplesBytes() - - start, stop := uint16(0), uint16(len(keys)) - if i > 0 { - start = nd.buf.KeyOffsets(i - 1) - } - if i < nd.buf.KeyOffsetsLength() { - stop = nd.buf.KeyOffsets(i) - } - - return keys[start:stop] -} - -func (nd Node) getValue(i int) nodeItem { - if nd.leafNode() { - return nd.getValueTuple(i) - } else { - r := nd.getRef(i) - return r[:] - } -} - -func (nd Node) getValueTuple(i int) nodeItem { - values := nd.buf.ValueTuplesBytes() - - start, stop := uint16(0), uint16(len(values)) - if i > 0 { - start = nd.buf.ValueOffsets(i - 1) - } - if i < nd.buf.ValueOffsetsLength() { - stop = nd.buf.ValueOffsets(i) - } - - return values[start:stop] -} - -func (nd Node) getRef(i int) hash.Hash { - refs := nd.buf.RefArrayBytes() - start, stop := i*refSize, (i+1)*refSize - return hash.New(refs[start:stop]) -} - -func (nd Node) level() int { - return int(nd.buf.TreeLevel()) -} - -func (nd Node) nodeCount() int { - return nd.cnt -} - -// todo(andy): should we support this? -//func (nd Node) cumulativeCount() uint64 { -// return nd.buf.TreeCount() -//} - -func (nd Node) leafNode() bool { - return nd.level() == 0 -} - -func (nd Node) empty() bool { - return nd.bytes() == nil || nd.nodeCount() == 0 -} - -func (nd Node) bytes() []byte { - return nd.buf.Table().Bytes +func getValueOffsetsVector(buf serial.TupleMap) []byte { + sz := buf.ValueOffsetsLength() * 2 + tab := buf.Table() + vec := tab.Offset(valueOffsetsVOffset) + start := int(tab.Vector(fb.UOffsetT(vec))) + stop := start + sz + + return tab.Bytes[start:stop] } diff --git a/go/store/prolly/node_cursor.go b/go/store/prolly/node_cursor.go index 3a3163b90b..161056fe56 100644 --- a/go/store/prolly/node_cursor.go +++ b/go/store/prolly/node_cursor.go @@ -203,7 +203,7 @@ func (cur *nodeCursor) isLeaf() bool { } func (cur *nodeCursor) level() uint64 { - return uint64(cur.nd.level()) + return uint64(cur.nd.level) } func (cur *nodeCursor) seek(ctx context.Context, item nodeItem, cb compareFn) (err error) { diff --git a/go/store/prolly/node_test.go b/go/store/prolly/node_test.go index c3147ba05d..8a6ab93e01 100644 --- a/go/store/prolly/node_test.go +++ b/go/store/prolly/node_test.go @@ -18,6 +18,10 @@ import ( "encoding/binary" "math/rand" "testing" + "unsafe" + + "github.com/dolthub/dolt/go/gen/fb/serial" + "github.com/dolthub/dolt/go/store/val" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -51,8 +55,33 @@ func TestRoundTripNodeItems(t *testing.T) { } } +func TestGetKeyValueOffsetsVectors(t *testing.T) { + for trial := 0; trial < 100; trial++ { + keys, values := randomNodeItemPairs(t, (rand.Int()%101)+50) + require.True(t, sumSize(keys)+sumSize(values) < maxVectorOffset) + nd := newLeafNode(keys, values) + + ko1, vo1 := offsetsFromSlicedBuffers(nd.keys, nd.values) + ko2, vo2 := offsetsFromFlatbuffer(nd.buf) + + assert.Equal(t, len(ko1), len(ko2)) + assert.Equal(t, len(ko1), len(keys)-1) + assert.Equal(t, ko1, ko2) + + assert.Equal(t, len(vo1), len(vo2)) + assert.Equal(t, len(vo1), len(values)-1) + assert.Equal(t, vo1, vo2) + + } +} + +func TestNodeSize(t *testing.T) { + sz := unsafe.Sizeof(Node{}) + assert.Equal(t, 168, int(sz)) +} + func newLeafNode(keys, values []nodeItem) Node { - return makeMapNode(sharedPool, 0, keys, values) + return buildMapNode(sharedPool, 0, keys, values) } func randomNodeItemPairs(t *testing.T, count int) (keys, values []nodeItem) { @@ -89,3 +118,32 @@ func sumSize(items []nodeItem) (sz uint64) { } return } + +func offsetsFromFlatbuffer(buf serial.TupleMap) (ko, vo []uint16) { + ko = make([]uint16, buf.KeyOffsetsLength()) + for i := range ko { + ko[i] = buf.KeyOffsets(i) + } + + vo = make([]uint16, buf.ValueOffsetsLength()) + for i := range vo { + vo[i] = buf.ValueOffsets(i) + } + + return +} + +func offsetsFromSlicedBuffers(keys, values val.SlicedBuffer) (ko, vo []uint16) { + ko = deserializeOffsets(keys.Offs) + vo = deserializeOffsets(values.Offs) + return +} + +func deserializeOffsets(buf []byte) (offs []uint16) { + offs = make([]uint16, len(buf)/2) + for i := range offs { + start, stop := i*2, (i+1)*2 + offs[i] = binary.LittleEndian.Uint16(buf[start:stop]) + } + return +} diff --git a/go/store/prolly/tree_chunker_test.go b/go/store/prolly/tree_chunker_test.go index 0d88bd5697..c7cd1ed7f9 100644 --- a/go/store/prolly/tree_chunker_test.go +++ b/go/store/prolly/tree_chunker_test.go @@ -32,7 +32,7 @@ func roundTripTreeItems(t *testing.T) { root, items, ns := randomTree(t, 1000) assert.NotNil(t, root) assert.True(t, root.nodeCount() > 0) - assert.True(t, root.level() > 0) + assert.True(t, root.level > 0) //assert.Equal(t, uint64(1000), root.cumulativeCount()) assert.Equal(t, countTree(t, ns, root), 1000) validateTreeItems(t, ns, root, items) @@ -40,7 +40,7 @@ func roundTripTreeItems(t *testing.T) { root, items, ns = randomTree(t, 10_000) assert.NotNil(t, root) assert.True(t, root.nodeCount() > 0) - assert.True(t, root.level() > 0) + assert.True(t, root.level > 0) //assert.Equal(t, uint64(10_000), root.cumulativeCount()) assert.Equal(t, countTree(t, ns, root), 10_000) validateTreeItems(t, ns, root, items) @@ -48,7 +48,7 @@ func roundTripTreeItems(t *testing.T) { root, items, ns = randomTree(t, 100_000) assert.NotNil(t, root) assert.True(t, root.nodeCount() > 0) - assert.True(t, root.level() > 0) + assert.True(t, root.level > 0) //assert.Equal(t, uint64(100_000), root.cumulativeCount()) assert.Equal(t, countTree(t, ns, root), 100_000) validateTreeItems(t, ns, root, items) diff --git a/go/store/val/offsets.go b/go/store/val/sliced_buffer.go similarity index 71% rename from go/store/val/offsets.go rename to go/store/val/sliced_buffer.go index bd8cf8272d..000399ad6c 100644 --- a/go/store/val/offsets.go +++ b/go/store/val/sliced_buffer.go @@ -14,37 +14,35 @@ package val -type slicedBuffer struct { - buf []byte - offs offsets +type SlicedBuffer struct { + Buf []byte + Offs offsets } -func slicedTupleBuffer(tup Tuple) slicedBuffer { +func slicedTupleBuffer(tup Tuple) SlicedBuffer { mask := tup.mask() offStop := tup.size() - numFieldsSize - mask.size() bufStop := offStop - offsetsSize(mask.count()) - return slicedBuffer{ - buf: tup[:bufStop], - offs: offsets(tup[bufStop:offStop]), + return SlicedBuffer{ + Buf: tup[:bufStop], + Offs: offsets(tup[bufStop:offStop]), } } -// GetBounds returns the ith offset. |last| is the byte position -// of the _end_ of the last element. -func (sb slicedBuffer) getBounds(i int) (start, stop ByteSize) { - start = sb.offs.getOffset(i) - if sb.isLastIndex(i) { - stop = ByteSize(len(sb.buf)) - } else { - stop = sb.offs.getOffset(i + 1) +// GetSlice returns the ith slice of |sb.Buf|. +func (sb SlicedBuffer) GetSlice(i int) []byte { + start := sb.Offs.getOffset(i) + stop := ByteSize(len(sb.Buf)) + if !sb.isLastIndex(i) { + stop = sb.Offs.getOffset(i + 1) } - return + return sb.Buf[start:stop] } // isLastIndex returns true if |i| is the last index in |sl|. -func (sb slicedBuffer) isLastIndex(i int) bool { - return len(sb.offs) == i*2 +func (sb SlicedBuffer) isLastIndex(i int) bool { + return len(sb.Offs) == i*2 } type offsets []byte diff --git a/go/store/val/tuple.go b/go/store/val/tuple.go index 61d3208b89..3d856e248d 100644 --- a/go/store/val/tuple.go +++ b/go/store/val/tuple.go @@ -141,9 +141,7 @@ func (tup Tuple) GetField(i int) []byte { // index to compensate for NULL fields i = tup.fieldToValue(i) - start, stop := slicedTupleBuffer(tup).getBounds(i) - - return tup[start:stop] + return slicedTupleBuffer(tup).GetSlice(i) } func (tup Tuple) size() ByteSize { From d4760c30448db4083715e188b8424706f9d30700 Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 14:54:44 -0800 Subject: [PATCH 062/105] fixed colors and have tags printing --- go/cmd/dolt/commands/log.go | 37 +++++++++++++++++++------- go/libraries/doltcore/doltdb/doltdb.go | 24 +++++++++++++++++ 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 0960c06d1c..fc0f0afa32 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -122,6 +122,7 @@ func (cmd LogCmd) logWithLoggerFunc(ctx context.Context, commandStr string, args minParents = 2 } + // TODO: need to handle invalid decorate options decorateOption := apr.GetValueOrDefault(decorateParam, "auto") opts := logOpts{ numLines: apr.GetIntOrDefault(numLinesParam, -1), @@ -163,15 +164,33 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o return 1 } - // TODO: are branches associated to commits somewhere already? - branchHashToName := map[hash.Hash][]string{} - branches, _ := dEnv.DoltDB.GetBranchesWithHashes(ctx) + // TODO: easier way to get these associations? + cHashToRefs := map[hash.Hash][]string{} + branches, err := dEnv.DoltDB.GetBranchesWithHashes(ctx) + if err != nil { + cli.PrintErrln(color.HiRedString("Fatal error: cannot get Branch information.")) + return 1 + } for _, b := range branches { refName := b.Ref.String() if opts.decoration != "full" { refName = refName[11:] // trim out "refs/heads/" } - branchHashToName[b.Hash] = append(branchHashToName[b.Hash], refName) + refName = fmt.Sprintf("\033[32;1m%s\033[0m", refName) // branch names are bright green (32;1m) + cHashToRefs[b.Hash] = append(cHashToRefs[b.Hash], refName) + } + tags, err := dEnv.DoltDB.GetTagsWithHashes(ctx) + if err != nil { + cli.PrintErrln(color.HiRedString("Fatal error: cannot get Tag information.")) + return 1 + } + for _, t := range tags { + refName := t.Ref.String() + if opts.decoration != "full" { + refName = refName[11:] // trim out "refs/heads/" + } + refName = fmt.Sprintf("\033[33;1mtag: %s\033[0m", refName) // tags names are bright yellow (33;1m) + cHashToRefs[t.Hash] = append(cHashToRefs[t.Hash], refName) } h, err := commit.HashOf() @@ -217,7 +236,7 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o return 1 } - commitsInfo = append(commitsInfo, logNode{commitMeta: meta, commitHash: cmHash, parentHashes: pHashes, branchNames: branchHashToName[cmHash], isHead: cmHash == h}) + commitsInfo = append(commitsInfo, logNode{commitMeta: meta, commitHash: cmHash, parentHashes: pHashes, branchNames: cHashToRefs[cmHash], isHead: cmHash == h}) } logToStdOut(opts, commitsInfo) @@ -338,9 +357,9 @@ func logRefs(pager *outputpager.Pager, comm logNode) { // TODO: this doesn't handle remote branches pager.Writer.Write([]byte("\033[33m(\033[0m")) if comm.isHead { - pager.Writer.Write([]byte("\033[94mHEAD -> \033[0m")) + pager.Writer.Write([]byte("\033[36;1mHEAD -> \033[0m")) } - pager.Writer.Write([]byte(fmt.Sprintf("\033[92m%s\033[0m", strings.Join(comm.branchNames, ", ")))) + pager.Writer.Write([]byte(strings.Join(comm.branchNames, "\033[33m, \033[0m"))) // Separate with Dim Yellow comma pager.Writer.Write([]byte("\033[33m) \033[0m")) } @@ -359,7 +378,7 @@ func logCompact(pager *outputpager.Pager, opts logOpts, commits []logNode) { // TODO: use short hash instead // Write commit hash - pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33m%s \033[0m", chStr))) + pager.Writer.Write([]byte(fmt.Sprintf("\033[33m%s \033[0m", chStr))) // TODO: write tags here if opts.decoration != "no" { @@ -385,7 +404,7 @@ func logDefault(pager *outputpager.Pager, opts logOpts, commits []logNode) { } // Write commit hash - pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33mcommit %s \033[0m", chStr))) + pager.Writer.Write([]byte(fmt.Sprintf("\033[33mcommit %s \033[0m", chStr))) // Use Dim Yellow (33m) // Show decoration if opts.decoration != "no" { diff --git a/go/libraries/doltcore/doltdb/doltdb.go b/go/libraries/doltcore/doltdb/doltdb.go index 8dfc35654f..09be8866be 100644 --- a/go/libraries/doltcore/doltdb/doltdb.go +++ b/go/libraries/doltcore/doltdb/doltdb.go @@ -811,6 +811,30 @@ func (ddb *DoltDB) GetTags(ctx context.Context) ([]ref.DoltRef, error) { return ddb.GetRefsOfType(ctx, tagsRefFilter) } +type TagWithHash struct { + Ref ref.DoltRef + Hash hash.Hash +} + +func (ddb *DoltDB) GetTagsWithHashes(ctx context.Context) ([]TagWithHash, error) { + var refs []TagWithHash + err := ddb.VisitRefsOfType(ctx, tagsRefFilter, func(r ref.DoltRef, v types.Value) error { + if tr, ok := r.(ref.TagRef); ok { + tag, err := ddb.ResolveTag(ctx, tr) + if err != nil { + return err + } + h, err := tag.Commit.HashOf() + if err != nil { + return err + } + refs = append(refs, TagWithHash{r, h}) + } + return nil + }) + return refs, err +} + var workspacesRefFilter = map[ref.RefType]struct{}{ref.WorkspaceRefType: {}} // GetWorkspaces returns a list of all workspaces in the database. From 57f6d85380896e5472c0e2519b545ae0308a556d Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 16:11:33 -0800 Subject: [PATCH 063/105] this is really messing with bats tests :( --- integration-tests/bats/sql.bats | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/integration-tests/bats/sql.bats b/integration-tests/bats/sql.bats index 69ec58ebda..44753abd05 100755 --- a/integration-tests/bats/sql.bats +++ b/integration-tests/bats/sql.bats @@ -1530,6 +1530,8 @@ SQL head_commit=$(get_head_commit) run dolt sql -q "SELECT COUNT(*) from dolt_diff_mytable where dolt_diff_mytable.to_commit IN ('$head_commit', '00200202')" + echo $head_commit + echo $output [ "$status" -eq 0 ] [[ "$output" =~ "| COUNT(*) |" ]] || false [[ "$output" =~ "| 2 |" ]] || false @@ -1633,7 +1635,7 @@ SQL } get_head_commit() { - dolt log -n 1 | grep -m 1 commit | cut -c 15-46 + dolt log -n 1 | grep -m 1 commit | cut -c 13-44 } @test "sql: sql -q query vertical format check" { From b6d66c3dae25485bc79eb4ae42e678c16756f30c Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 16:55:47 -0800 Subject: [PATCH 064/105] fixing more tests --- go/cmd/dolt/commands/log.go | 2 +- integration-tests/bats/log.bats | 30 +++++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index fc0f0afa32..43362b0d5c 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -187,7 +187,7 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o for _, t := range tags { refName := t.Ref.String() if opts.decoration != "full" { - refName = refName[11:] // trim out "refs/heads/" + refName = refName[10:] // trim out "refs/tags/" } refName = fmt.Sprintf("\033[33;1mtag: %s\033[0m", refName) // tags names are bright yellow (33;1m) cHashToRefs[t.Hash] = append(cHashToRefs[t.Hash], refName) diff --git a/integration-tests/bats/log.bats b/integration-tests/bats/log.bats index c32df27b31..14764ceb87 100755 --- a/integration-tests/bats/log.bats +++ b/integration-tests/bats/log.bats @@ -311,6 +311,34 @@ teardown() { [[ "$output" =~ $regex ]] || false } +@test "log: --oneline removes all new lines" { + dolt commit --allow-empty -m "commit 1" + dolt commit --allow-empty -m "commit 2" + res=$(dolt log --oneline | wc -l) + [ "$res" -eq 3 ] # don't forget initial commit +} + +@test "log: --decorate=short shows trimmed branches and tags" { + dolt tag tag_v0 + run dolt log --decorate=short + [[ "$output" =~ "main" ]] || false + [[ "$output" =~ "tag_v0" ]] || false +} + +@test "log: --decorate=full shows full branches and tags" { + dolt tag tag_v0 + run dolt log --decorate=full + [[ "$output" =~ "refs/heads/main" ]] || false + [[ "$output" =~ "refs/tags/tag_v0" ]] || false +} + +@test "log: --decorate=no doesn't show branches or tags" { + dolt tag tag_v0 + run dolt log --decorate=no + [[ !("$output" =~ "main") ]] || false + [[ !("$output" =~ "tag_v0") ]] || false +} + @test "log: check pager" { skiponwindows "Need to install expect and make this script work on windows." dolt commit --allow-empty -m "commit 1" @@ -332,4 +360,4 @@ teardown() { run expect $BATS_TEST_DIRNAME/log.expect [ "$status" -eq 0 ] -} +} \ No newline at end of file From d1f84d63e66d9af7a2e496b67d67d11c1323a253 Mon Sep 17 00:00:00 2001 From: James Cor Date: Thu, 10 Feb 2022 17:01:28 -0800 Subject: [PATCH 065/105] fixing all things that call get_head_commits --- integration-tests/bats/git-dolt.bats | 2 +- integration-tests/bats/sql-commit.bats | 2 +- integration-tests/bats/sql-merge.bats | 2 +- integration-tests/bats/sql-reset.bats | 2 +- integration-tests/bats/status.bats | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/integration-tests/bats/git-dolt.bats b/integration-tests/bats/git-dolt.bats index a9c1d53b7f..bb03519283 100755 --- a/integration-tests/bats/git-dolt.bats +++ b/integration-tests/bats/git-dolt.bats @@ -192,5 +192,5 @@ EOF } get_head_commit() { - dolt log -n 1 | grep -m 1 commit | cut -c 15-46 + dolt log -n 1 | grep -m 1 commit | cut -c 13-44 } diff --git a/integration-tests/bats/sql-commit.bats b/integration-tests/bats/sql-commit.bats index 91025a15e8..8afe981728 100644 --- a/integration-tests/bats/sql-commit.bats +++ b/integration-tests/bats/sql-commit.bats @@ -258,5 +258,5 @@ SQL } get_head_commit() { - dolt log -n 1 | grep -m 1 commit | cut -c 15-46 + dolt log -n 1 | grep -m 1 commit | cut -c 13-44 } diff --git a/integration-tests/bats/sql-merge.bats b/integration-tests/bats/sql-merge.bats index be744e17a6..2ddb859b5b 100644 --- a/integration-tests/bats/sql-merge.bats +++ b/integration-tests/bats/sql-merge.bats @@ -742,7 +742,7 @@ SQL } get_head_commit() { - dolt log -n 1 | grep -m 1 commit | cut -c 15-46 + dolt log -n 1 | grep -m 1 commit | cut -c 13-44 } get_working_hash() { diff --git a/integration-tests/bats/sql-reset.bats b/integration-tests/bats/sql-reset.bats index 03ea8e71e9..2387a71900 100644 --- a/integration-tests/bats/sql-reset.bats +++ b/integration-tests/bats/sql-reset.bats @@ -280,6 +280,6 @@ SQL } get_head_commit() { - dolt log -n 1 | grep -m 1 commit | cut -c 15-46 + dolt log -n 1 | grep -m 1 commit | cut -c 13-44 } diff --git a/integration-tests/bats/status.bats b/integration-tests/bats/status.bats index 5e5dedd246..519d3ff253 100644 --- a/integration-tests/bats/status.bats +++ b/integration-tests/bats/status.bats @@ -11,7 +11,7 @@ teardown() { } get_head_commit() { - dolt log -n 1 | grep -m 1 commit | cut -c 15-46 + dolt log -n 1 | grep -m 1 commit | cut -c 13-44 } @test "status: dolt version --feature" { From 3527acb4b5f6244ba608921ece24f8f5621d55a0 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Thu, 10 Feb 2022 17:44:04 -0800 Subject: [PATCH 066/105] refactored serialization for JSON, geometry types --- .../doltcore/schema/typeinfo/linestring.go | 7 +- .../doltcore/schema/typeinfo/point.go | 7 +- .../doltcore/schema/typeinfo/polygon.go | 9 +- .../doltcore/sqle/index/prolly_fields.go | 144 +++++------------- .../doltcore/sqle/index/prolly_index_iter.go | 28 +++- .../doltcore/sqle/index/prolly_row_iter.go | 10 +- .../sqle/writer/prolly_table_writer.go | 12 +- go/store/geometry/read_geometry.go | 81 ++++++++++ go/store/geometry/write_geometry.go | 104 +++++++++++++ go/store/types/linestring.go | 22 +-- go/store/types/point.go | 59 ++----- go/store/types/polygon.go | 24 +-- go/store/types/value_decoder.go | 19 +-- go/store/val/tuple_builder.go | 11 +- go/store/val/tuple_descriptor.go | 7 +- 15 files changed, 327 insertions(+), 217 deletions(-) create mode 100644 go/store/geometry/read_geometry.go create mode 100644 go/store/geometry/write_geometry.go diff --git a/go/libraries/doltcore/schema/typeinfo/linestring.go b/go/libraries/doltcore/schema/typeinfo/linestring.go index f51a34b542..d77fe9dcdb 100644 --- a/go/libraries/doltcore/schema/typeinfo/linestring.go +++ b/go/libraries/doltcore/schema/typeinfo/linestring.go @@ -20,6 +20,7 @@ import ( "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/dolt/go/store/geometry" "github.com/dolthub/dolt/go/store/types" ) @@ -108,9 +109,9 @@ func (ti *linestringType) Equals(other TypeInfo) bool { // FormatValue implements TypeInfo interface. func (ti *linestringType) FormatValue(v types.Value) (*string, error) { if val, ok := v.(types.Linestring); ok { - buf := make([]byte, types.EWKBHeaderSize+types.LengthSize+types.PointDataSize*len(val.Points)) - types.WriteEWKBHeader(val, buf[:types.EWKBHeaderSize]) - types.WriteEWKBLineData(val, buf[types.EWKBHeaderSize:]) + buf := make([]byte, geometry.EWKBHeaderSize+types.LengthSize+geometry.PointSize*len(val.Points)) + types.WriteEWKBHeader(val, buf[:geometry.EWKBHeaderSize]) + types.WriteEWKBLineData(val, buf[geometry.EWKBHeaderSize:]) resStr := string(buf) return &resStr, nil } diff --git a/go/libraries/doltcore/schema/typeinfo/point.go b/go/libraries/doltcore/schema/typeinfo/point.go index 0ddfec4154..9fb2c70bea 100644 --- a/go/libraries/doltcore/schema/typeinfo/point.go +++ b/go/libraries/doltcore/schema/typeinfo/point.go @@ -20,6 +20,7 @@ import ( "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/dolt/go/store/geometry" "github.com/dolthub/dolt/go/store/types" ) @@ -102,9 +103,9 @@ func (ti *pointType) Equals(other TypeInfo) bool { // FormatValue implements TypeInfo interface. func (ti *pointType) FormatValue(v types.Value) (*string, error) { if val, ok := v.(types.Point); ok { - buf := make([]byte, types.EWKBHeaderSize+types.PointDataSize) - types.WriteEWKBHeader(val, buf[:types.EWKBHeaderSize]) - types.WriteEWKBPointData(val, buf[types.EWKBHeaderSize:]) + buf := make([]byte, geometry.EWKBHeaderSize+geometry.PointSize) + types.WriteEWKBHeader(val, buf[:geometry.EWKBHeaderSize]) + types.WriteEWKBPointData(val, buf[geometry.EWKBHeaderSize:]) resStr := string(buf) return &resStr, nil } diff --git a/go/libraries/doltcore/schema/typeinfo/polygon.go b/go/libraries/doltcore/schema/typeinfo/polygon.go index 5c572f28b3..867ecb20ca 100644 --- a/go/libraries/doltcore/schema/typeinfo/polygon.go +++ b/go/libraries/doltcore/schema/typeinfo/polygon.go @@ -20,6 +20,7 @@ import ( "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/dolt/go/store/geometry" "github.com/dolthub/dolt/go/store/types" ) @@ -108,13 +109,13 @@ func (ti *polygonType) Equals(other TypeInfo) bool { // FormatValue implements TypeInfo interface. func (ti *polygonType) FormatValue(v types.Value) (*string, error) { if val, ok := v.(types.Polygon); ok { - size := types.EWKBHeaderSize + types.LengthSize + size := geometry.EWKBHeaderSize + types.LengthSize for _, l := range val.Lines { - size += types.LengthSize + types.PointDataSize*len(l.Points) + size += types.LengthSize + geometry.PointSize*len(l.Points) } buf := make([]byte, size) - types.WriteEWKBHeader(val, buf[:types.EWKBHeaderSize]) - types.WriteEWKBPolyData(val, buf[types.EWKBHeaderSize:]) + types.WriteEWKBHeader(val, buf[:geometry.EWKBHeaderSize]) + types.WriteEWKBPolyData(val, buf[geometry.EWKBHeaderSize:]) resStr := string(buf) return &resStr, nil } diff --git a/go/libraries/doltcore/sqle/index/prolly_fields.go b/go/libraries/doltcore/sqle/index/prolly_fields.go index 8e458e419a..e37197046f 100644 --- a/go/libraries/doltcore/sqle/index/prolly_fields.go +++ b/go/libraries/doltcore/sqle/index/prolly_fields.go @@ -15,18 +15,18 @@ package index import ( - "encoding/binary" + "encoding/json" "fmt" "time" "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/expression/function" + geo "github.com/dolthub/dolt/go/store/geometry" "github.com/dolthub/dolt/go/store/val" ) // GetField reads the value from the ith field of the Tuple as an interface{}. -func GetField(td val.TupleDesc, i int, tup val.Tuple) (v interface{}) { +func GetField(td val.TupleDesc, i int, tup val.Tuple) (v interface{}, err error) { var ok bool switch td.Types[i].Enc { case val.Int8Enc: @@ -62,30 +62,32 @@ func GetField(td val.TupleDesc, i int, tup val.Tuple) (v interface{}) { case val.BytesEnc: v, ok = td.GetBytes(i, tup) case val.JSONEnc: - var js interface{} - js, ok = td.GetJSON(i, tup) + var buf []byte + buf, ok = td.GetJSON(i, tup) if ok { - v = sql.JSONDocument{Val: js} + var doc sql.JSONDocument + err = json.Unmarshal(buf, &doc.Val) + v = doc } case val.GeometryEnc: - var geo []byte - geo, ok = td.GetGeometry(i, tup) + var buf []byte + buf, ok = td.GetGeometry(i, tup) if ok { - v = deserializeGeometry(geo) + v = deserializeGeometry(buf) } default: panic("unknown val.encoding") } - if !ok { - return nil + if !ok || err != nil { + return nil, err } - return v + return v, err } // PutField writes an interface{} to the ith field of the Tuple being built. -func PutField(tb *val.TupleBuilder, i int, v interface{}) { +func PutField(tb *val.TupleBuilder, i int, v interface{}) error { if v == nil { - return // NULL + return nil // NULL } enc := tb.Desc.Types[i].Enc @@ -126,17 +128,16 @@ func PutField(tb *val.TupleBuilder, i int, v interface{}) { } tb.PutBytes(i, v.([]byte)) case val.GeometryEnc: - // todo(andy): remove GMS dependency tb.PutGeometry(i, serializeGeometry(v)) case val.JSONEnc: - // todo(andy): remove GMS dependency - tb.PutJSON(i, v.(sql.JSONDocument).Val) + buf, err := json.Marshal(v.(sql.JSONDocument).Val) + if err != nil { + return err + } + tb.PutJSON(i, buf) default: panic(fmt.Sprintf("unknown encoding %v %v", enc, v)) } -} - -func deserializeGeometry(buf []byte) (v interface{}) { return nil } @@ -192,102 +193,31 @@ func convUint(v interface{}) uint { } } -// todo(andy): remove GMS dependency -// have the engine pass serialized bytes - -const ( - sridSize = val.ByteSize(4) - endianSize = val.ByteSize(1) - typeSize = val.ByteSize(4) - ewkbHeaderSize = sridSize + endianSize + typeSize -) - -const ( - pointType = uint32(1) - linestringType = uint32(2) - polygonType = uint32(3) - - littleEndian = uint8(1) -) - -type ewkbHeader struct { - srid uint32 - endian uint8 - typ uint32 -} - -func (h ewkbHeader) writeTo(buf []byte) { - expectSize(buf, ewkbHeaderSize) - binary.LittleEndian.PutUint32(buf[:sridSize], h.srid) - buf[sridSize] = h.endian - binary.LittleEndian.PutUint32(buf[sridSize+endianSize:ewkbHeaderSize], h.typ) -} - -func readHeaderFrom(buf []byte) (h ewkbHeader) { - expectSize(buf, ewkbHeaderSize) - h.srid = binary.LittleEndian.Uint32(buf[:sridSize]) - h.endian = uint8(buf[sridSize]) - h.typ = binary.LittleEndian.Uint32(buf[sridSize+endianSize : ewkbHeaderSize]) +func deserializeGeometry(buf []byte) (v interface{}) { + srid, _, typ := geo.ParseEWKBHeader(buf) + buf = buf[geo.EWKBHeaderSize:] + switch typ { + case geo.PointType: + v = geo.DeserializePoint(buf, srid) + case geo.LinestringType: + v = geo.DeserializeLinestring(buf, srid) + case geo.PolygonType: + v = geo.DeserializePolygon(srid, buf) + default: + panic(fmt.Sprintf("unknown geometry type %d", typ)) + } return } func serializeGeometry(v interface{}) []byte { switch t := v.(type) { case sql.Point: - return serializePoint(t) + return geo.SerializePoint(t) case sql.Linestring: - return serializeLinestring(t) + return geo.SerializeLinestring(t) case sql.Polygon: - return serializePolygon(t) + return geo.SerializePolygon(t) default: panic(fmt.Sprintf("unknown geometry %v", v)) } } - -func serializePoint(p sql.Point) (buf []byte) { - pb := function.PointToBytes(p) - buf = make([]byte, ewkbHeaderSize+val.ByteSize(len(pb))) - copy(buf[ewkbHeaderSize:], pb) - - h := ewkbHeader{ - srid: p.SRID, - endian: littleEndian, - typ: pointType, - } - h.writeTo(buf[:ewkbHeaderSize]) - return -} - -func serializeLinestring(l sql.Linestring) (buf []byte) { - lb := function.LineToBytes(l) - buf = make([]byte, ewkbHeaderSize+val.ByteSize(len(lb))) - copy(buf[ewkbHeaderSize:], lb) - - h := ewkbHeader{ - srid: l.SRID, - endian: littleEndian, - typ: linestringType, - } - h.writeTo(buf[:ewkbHeaderSize]) - return -} - -func serializePolygon(p sql.Polygon) (buf []byte) { - pb := function.PolyToBytes(p) - buf = make([]byte, ewkbHeaderSize+val.ByteSize(len(pb))) - copy(buf[ewkbHeaderSize:], pb) - - h := ewkbHeader{ - srid: p.SRID, - endian: littleEndian, - typ: polygonType, - } - h.writeTo(buf[:ewkbHeaderSize]) - return -} - -func expectSize(buf []byte, sz val.ByteSize) { - if val.ByteSize(len(buf)) != sz { - panic("byte slice is not of expected size") - } -} diff --git a/go/libraries/doltcore/sqle/index/prolly_index_iter.go b/go/libraries/doltcore/sqle/index/prolly_index_iter.go index 7d17aa7965..0a793e7c5f 100644 --- a/go/libraries/doltcore/sqle/index/prolly_index_iter.go +++ b/go/libraries/doltcore/sqle/index/prolly_index_iter.go @@ -137,20 +137,26 @@ func (p prollyIndexIter) queueRows(ctx context.Context) error { } } -func (p prollyIndexIter) rowFromTuples(key, value val.Tuple, r sql.Row) { +func (p prollyIndexIter) rowFromTuples(key, value val.Tuple, r sql.Row) (err error) { keyDesc, valDesc := p.primary.Descriptors() for keyIdx, rowIdx := range p.keyMap { if rowIdx == -1 { continue } - r[rowIdx] = GetField(keyDesc, keyIdx, key) + r[rowIdx], err = GetField(keyDesc, keyIdx, key) + if err != nil { + return err + } } for valIdx, rowIdx := range p.valMap { if rowIdx == -1 { continue } - r[rowIdx] = GetField(valDesc, valIdx, value) + r[rowIdx], err = GetField(valDesc, valIdx, value) + if err != nil { + return err + } } return @@ -241,18 +247,23 @@ func (p prollyCoveringIndexIter) Next(ctx *sql.Context) (sql.Row, error) { } r := make(sql.Row, len(p.keyMap)) - p.writeRowFromTuples(k, v, r) + if err := p.writeRowFromTuples(k, v, r); err != nil { + return nil, err + } return r, nil } -func (p prollyCoveringIndexIter) writeRowFromTuples(key, value val.Tuple, r sql.Row) { +func (p prollyCoveringIndexIter) writeRowFromTuples(key, value val.Tuple, r sql.Row) (err error) { for to := range p.keyMap { from := p.keyMap.MapOrdinal(to) if from == -1 { continue } - r[to] = GetField(p.keyDesc, from, key) + r[to], err = GetField(p.keyDesc, from, key) + if err != nil { + return err + } } for to := range p.valMap { @@ -260,7 +271,10 @@ func (p prollyCoveringIndexIter) writeRowFromTuples(key, value val.Tuple, r sql. if from == -1 { continue } - r[to] = GetField(p.valDesc, from, value) + r[to], err = GetField(p.valDesc, from, value) + if err != nil { + return err + } } return diff --git a/go/libraries/doltcore/sqle/index/prolly_row_iter.go b/go/libraries/doltcore/sqle/index/prolly_row_iter.go index 013d7c2256..51b79f1b98 100644 --- a/go/libraries/doltcore/sqle/index/prolly_row_iter.go +++ b/go/libraries/doltcore/sqle/index/prolly_row_iter.go @@ -121,13 +121,19 @@ func (it prollyRowIter) Next(ctx *sql.Context) (sql.Row, error) { if rowIdx == -1 { continue } - row[rowIdx] = GetField(it.keyDesc, keyIdx, key) + row[rowIdx], err = GetField(it.keyDesc, keyIdx, key) + if err != nil { + return nil, err + } } for valIdx, rowIdx := range it.valProj { if rowIdx == -1 { continue } - row[rowIdx] = GetField(it.valDesc, valIdx, value) + row[rowIdx], err = GetField(it.valDesc, valIdx, value) + if err != nil { + return nil, err + } } return row, nil diff --git a/go/libraries/doltcore/sqle/writer/prolly_table_writer.go b/go/libraries/doltcore/sqle/writer/prolly_table_writer.go index 44f5447a7a..e609077061 100644 --- a/go/libraries/doltcore/sqle/writer/prolly_table_writer.go +++ b/go/libraries/doltcore/sqle/writer/prolly_table_writer.go @@ -351,26 +351,30 @@ func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.R } func (m prollyIndexWriter) primaryKeyError(ctx context.Context, key val.Tuple) error { - existing := make(sql.Row, len(m.keyMap)+len(m.valMap)) + dupe := make(sql.Row, len(m.keyMap)+len(m.valMap)) _ = m.mut.Get(ctx, key, func(key, value val.Tuple) (err error) { kd := m.keyBld.Desc for from := range m.keyMap { to := m.keyMap.MapOrdinal(from) - existing[to] = index.GetField(kd, from, key) + if dupe[to], err = index.GetField(kd, from, key); err != nil { + return err + } } vd := m.valBld.Desc for from := range m.valMap { to := m.valMap.MapOrdinal(from) - existing[to] = index.GetField(vd, from, value) + if dupe[to], err = index.GetField(vd, from, value); err != nil { + return err + } } return }) s := m.keyBld.Desc.Format(key) - return sql.NewUniqueKeyErr(s, true, existing) + return sql.NewUniqueKeyErr(s, true, dupe) } func ordinalMappingsFromSchema(from sql.Schema, to schema.Schema) (km, vm val.OrdinalMapping) { diff --git a/go/store/geometry/read_geometry.go b/go/store/geometry/read_geometry.go new file mode 100644 index 0000000000..5fafff5d03 --- /dev/null +++ b/go/store/geometry/read_geometry.go @@ -0,0 +1,81 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package geometry + +import ( + "encoding/binary" + "math" + + "github.com/dolthub/go-mysql-server/sql" +) + +// ParseEWKBHeader converts the header potion of a EWKB byte array to srid, endianness, and geometry type +func ParseEWKBHeader(buf []byte) (srid uint32, bigEndian bool, typ uint32) { + srid = binary.LittleEndian.Uint32(buf[0:SRIDSize]) // First 4 bytes is SRID always in little endian + bigEndian = buf[SRIDSize] == 0 // Next byte is endianness + typ = binary.LittleEndian.Uint32(buf[SRIDSize+EndianSize : EWKBHeaderSize]) // Next 4 bytes is type + return +} + +func ParseEWKBPoint(buf []byte) (x, y float64) { + x = math.Float64frombits(binary.LittleEndian.Uint64(buf[:PointSize/2])) + y = math.Float64frombits(binary.LittleEndian.Uint64(buf[PointSize/2:])) + return +} + +func DeserializePoint(buf []byte, srid uint32) (p sql.Point) { + p.SRID = srid + p.X, p.Y = ParseEWKBPoint(buf) + return +} + +func DeserializeLinestring(buf []byte, srid uint32) (l sql.Linestring) { + l.SRID = srid + l.Points = readPointSlice(buf, srid) + return +} + +func DeserializePolygon(srid uint32, buf []byte) (p sql.Polygon) { + p.SRID = srid + p.Lines = readLineSlice(buf, srid) + return +} + +func readCount(buf []byte) uint32 { + return binary.LittleEndian.Uint32(buf) +} + +func readPointSlice(buf []byte, srid uint32) (points []sql.Point) { + points = make([]sql.Point, readCount(buf)) + buf = buf[CountSize:] + for i := range points { + points[i].SRID = srid + points[i].X, points[i].Y = ParseEWKBPoint(buf) + buf = buf[PointSize:] + } + return +} + +func readLineSlice(buf []byte, srid uint32) (lines []sql.Linestring) { + lines = make([]sql.Linestring, readCount(buf)) + buf = buf[CountSize:] + for i := range lines { + lines[i].SRID = srid + lines[i].Points = readPointSlice(buf, srid) + sz := len(lines[i].Points) * PointSize + buf = buf[sz:] + } + return +} diff --git a/go/store/geometry/write_geometry.go b/go/store/geometry/write_geometry.go new file mode 100644 index 0000000000..84a06d1afb --- /dev/null +++ b/go/store/geometry/write_geometry.go @@ -0,0 +1,104 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package geometry + +import ( + "encoding/binary" + "math" + + "github.com/dolthub/go-mysql-server/sql" +) + +const ( + SRIDSize = 4 + EndianSize = 1 + TypeSize = 4 + EWKBHeaderSize = SRIDSize + EndianSize + TypeSize + + PointSize = 16 + CountSize = 4 +) + +const ( + PointType = 1 + LinestringType = 2 + PolygonType = 3 +) + +func allocateBuffer(numPoints, numCounts int) []byte { + return make([]byte, EWKBHeaderSize+PointSize*numPoints+CountSize*numCounts) +} + +func WriteEWKBHeader(buf []byte, srid, typ uint32) { + binary.LittleEndian.PutUint32(buf[0:SRIDSize], srid) + buf[SRIDSize] = 1 + binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], typ) +} + +func WriteEWKBPointData(buf []byte, x, y float64) { + binary.LittleEndian.PutUint64(buf[:PointSize/2], math.Float64bits(x)) + binary.LittleEndian.PutUint64(buf[PointSize/2:], math.Float64bits(y)) +} + +func SerializePoint(p sql.Point) (buf []byte) { + buf = allocateBuffer(1, 0) + WriteEWKBHeader(buf[:EWKBHeaderSize], p.SRID, PointType) + WriteEWKBPointData(buf[EWKBHeaderSize:], p.X, p.Y) + return +} + +func SerializeLinestring(l sql.Linestring) (buf []byte) { + buf = allocateBuffer(len(l.Points), 1) + WriteEWKBHeader(buf[:EWKBHeaderSize], l.SRID, LinestringType) + writePointSlice(buf[EWKBHeaderSize:], l.Points) + return +} + +func SerializePolygon(p sql.Polygon) (buf []byte) { + buf = allocateBuffer(countPoints(p), len(p.Lines)+1) + WriteEWKBHeader(buf[:EWKBHeaderSize], p.SRID, PolygonType) + writeLineSlice(buf[EWKBHeaderSize:], p.Lines) + return +} + +func writeCount(buf []byte, count uint32) { + binary.LittleEndian.PutUint32(buf, count) +} + +func writePointSlice(buf []byte, points []sql.Point) { + writeCount(buf, uint32(len(points))) + buf = buf[CountSize:] + for _, p := range points { + WriteEWKBPointData(buf, p.X, p.Y) + buf = buf[PointSize:] + } +} + +func writeLineSlice(buf []byte, lines []sql.Linestring) { + writeCount(buf, uint32(len(lines))) + buf = buf[CountSize:] + for _, l := range lines { + writePointSlice(buf, l.Points) + sz := len(l.Points) * PointSize + buf = buf[sz:] + } +} + +func countPoints(p sql.Polygon) (cnt int) { + for _, line := range p.Lines { + cnt += len(line.Points) + } + return +} diff --git a/go/store/types/linestring.go b/go/store/types/linestring.go index a0957a0e5d..dd853cde50 100644 --- a/go/store/types/linestring.go +++ b/go/store/types/linestring.go @@ -22,6 +22,8 @@ import ( "strconv" "strings" + "github.com/dolthub/dolt/go/store/geometry" + "github.com/dolthub/dolt/go/store/hash" ) @@ -134,7 +136,7 @@ func WriteEWKBLineData(l Linestring, buf []byte) { binary.LittleEndian.PutUint32(buf[:LengthSize], uint32(len(l.Points))) // Append each point for i, p := range l.Points { - WriteEWKBPointData(p, buf[LengthSize+PointDataSize*i:LengthSize+PointDataSize*(i+1)]) + WriteEWKBPointData(p, buf[LengthSize+geometry.PointSize*i:LengthSize+geometry.PointSize*(i+1)]) } } @@ -145,11 +147,11 @@ func (v Linestring) writeTo(w nomsWriter, nbf *NomsBinFormat) error { } // Allocate buffer for linestring - buf := make([]byte, EWKBHeaderSize+LengthSize+PointDataSize*len(v.Points)) + buf := make([]byte, geometry.EWKBHeaderSize+LengthSize+geometry.PointSize*len(v.Points)) // Write header and data to buffer WriteEWKBHeader(v, buf) - WriteEWKBLineData(v, buf[EWKBHeaderSize:]) + WriteEWKBLineData(v, buf[geometry.EWKBHeaderSize:]) w.writeString(string(buf)) return nil @@ -164,7 +166,7 @@ func ParseEWKBLine(buf []byte, srid uint32) Linestring { // Parse points points := make([]Point, numPoints) for i := uint32(0); i < numPoints; i++ { - points[i] = ParseEWKBPoint(buf[LengthSize+PointDataSize*i:LengthSize+PointDataSize*(i+1)], srid) + points[i] = ParseEWKBPoint(buf[LengthSize+geometry.PointSize*i:LengthSize+geometry.PointSize*(i+1)], srid) } return Linestring{SRID: srid, Points: points} @@ -172,20 +174,20 @@ func ParseEWKBLine(buf []byte, srid uint32) Linestring { func readLinestring(nbf *NomsBinFormat, b *valueDecoder) (Linestring, error) { buf := []byte(b.ReadString()) - srid, _, geomType := ParseEWKBHeader(buf) - if geomType != LinestringID { + srid, _, geomType := geometry.ParseEWKBHeader(buf) + if geomType != geometry.LinestringType { return Linestring{}, errors.New("not a linestring") } - return ParseEWKBLine(buf[EWKBHeaderSize:], srid), nil + return ParseEWKBLine(buf[geometry.EWKBHeaderSize:], srid), nil } func (v Linestring) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) { buf := []byte(b.ReadString()) - srid, _, geomType := ParseEWKBHeader(buf) - if geomType != LinestringID { + srid, _, geomType := geometry.ParseEWKBHeader(buf) + if geomType != geometry.LinestringType { return nil, errors.New("not a linestring") } - return ParseEWKBLine(buf[EWKBHeaderSize:], srid), nil + return ParseEWKBLine(buf[geometry.EWKBHeaderSize:], srid), nil } func (v Linestring) skip(nbf *NomsBinFormat, b *binaryNomsReader) { diff --git a/go/store/types/point.go b/go/store/types/point.go index bf27a7d51e..a3d5a06580 100644 --- a/go/store/types/point.go +++ b/go/store/types/point.go @@ -16,24 +16,13 @@ package types import ( "context" - "encoding/binary" "errors" "fmt" - "math" "strconv" - "github.com/dolthub/dolt/go/store/hash" -) + "github.com/dolthub/dolt/go/store/geometry" -const ( - SRIDSize = 4 - EndianSize = 1 - TypeSize = 4 - EWKBHeaderSize = SRIDSize + EndianSize + TypeSize - PointDataSize = 16 - PointID = 1 - LinestringID = 2 - PolygonID = 3 + "github.com/dolthub/dolt/go/store/hash" ) // Point is a Noms Value wrapper around the primitive string type (for now). @@ -93,29 +82,21 @@ func (v Point) valueReadWriter() ValueReadWriter { // WriteEWKBHeader writes the SRID, endianness, and type to the byte buffer // This function assumes v is a valid spatial type func WriteEWKBHeader(v interface{}, buf []byte) { - // Write endianness byte (always little endian) - buf[SRIDSize] = 1 - - // Parse data switch v := v.(type) { case Point: // Write SRID and type - binary.LittleEndian.PutUint32(buf[0:SRIDSize], v.SRID) - binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], PointID) + geometry.WriteEWKBHeader(buf, v.SRID, geometry.PointType) case Linestring: - binary.LittleEndian.PutUint32(buf[0:SRIDSize], v.SRID) - binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], LinestringID) + geometry.WriteEWKBHeader(buf, v.SRID, geometry.LinestringType) case Polygon: - binary.LittleEndian.PutUint32(buf[0:SRIDSize], v.SRID) - binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], PolygonID) + geometry.WriteEWKBHeader(buf, v.SRID, geometry.PolygonType) } } // WriteEWKBPointData converts a Point into a byte array in EWKB format // Very similar to function in GMS func WriteEWKBPointData(p Point, buf []byte) { - binary.LittleEndian.PutUint64(buf[:PointDataSize/2], math.Float64bits(p.X)) - binary.LittleEndian.PutUint64(buf[PointDataSize/2:], math.Float64bits(p.Y)) + geometry.WriteEWKBPointData(buf, p.X, p.Y) } func (v Point) writeTo(w nomsWriter, nbf *NomsBinFormat) error { @@ -126,49 +107,39 @@ func (v Point) writeTo(w nomsWriter, nbf *NomsBinFormat) error { } // Allocate buffer for point 4 + 1 + 4 + 16 - buf := make([]byte, EWKBHeaderSize+PointDataSize) + buf := make([]byte, geometry.EWKBHeaderSize+geometry.PointSize) // Write header and data to buffer WriteEWKBHeader(v, buf) - WriteEWKBPointData(v, buf[EWKBHeaderSize:]) + WriteEWKBPointData(v, buf[geometry.EWKBHeaderSize:]) w.writeString(string(buf)) return nil } -// ParseEWKBHeader converts the header potion of a EWKB byte array to srid, endianness, and geometry type -func ParseEWKBHeader(buf []byte) (uint32, bool, uint32) { - srid := binary.LittleEndian.Uint32(buf[0:SRIDSize]) // First 4 bytes is SRID always in little endian - isBig := buf[SRIDSize] == 0 // Next byte is endianness - geomType := binary.LittleEndian.Uint32(buf[SRIDSize+EndianSize : EWKBHeaderSize]) // Next 4 bytes is type - return srid, isBig, geomType -} - // ParseEWKBPoint converts the data portion of a WKB point to Point // Very similar logic to the function in GMS func ParseEWKBPoint(buf []byte, srid uint32) Point { - // Read floats x and y - x := math.Float64frombits(binary.LittleEndian.Uint64(buf[:PointDataSize/2])) - y := math.Float64frombits(binary.LittleEndian.Uint64(buf[PointDataSize/2:])) + x, y := geometry.ParseEWKBPoint(buf) return Point{SRID: srid, X: x, Y: y} } func readPoint(nbf *NomsBinFormat, b *valueDecoder) (Point, error) { buf := []byte(b.ReadString()) - srid, _, geomType := ParseEWKBHeader(buf) // Assume it's always little endian - if geomType != PointID { + srid, _, geomType := geometry.ParseEWKBHeader(buf) // Assume it's always little endian + if geomType != geometry.PointType { return Point{}, errors.New("not a point") } - return ParseEWKBPoint(buf[EWKBHeaderSize:], srid), nil + return ParseEWKBPoint(buf[geometry.EWKBHeaderSize:], srid), nil } func (v Point) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) { buf := []byte(b.ReadString()) - srid, _, geomType := ParseEWKBHeader(buf) // Assume it's always little endian - if geomType != PointID { + srid, _, geomType := geometry.ParseEWKBHeader(buf) // Assume it's always little endian + if geomType != geometry.PointType { return Point{}, errors.New("not a point") } - return ParseEWKBPoint(buf[EWKBHeaderSize:], srid), nil + return ParseEWKBPoint(buf[geometry.EWKBHeaderSize:], srid), nil } func (v Point) skip(nbf *NomsBinFormat, b *binaryNomsReader) { diff --git a/go/store/types/polygon.go b/go/store/types/polygon.go index 4baf4db7bc..ce108ded56 100644 --- a/go/store/types/polygon.go +++ b/go/store/types/polygon.go @@ -22,6 +22,8 @@ import ( "strconv" "strings" + "github.com/dolthub/dolt/go/store/geometry" + "github.com/dolthub/dolt/go/store/hash" ) @@ -128,7 +130,7 @@ func WriteEWKBPolyData(p Polygon, buf []byte) { // Write each line start, stop := 0, LengthSize for _, l := range p.Lines { - start, stop = stop, stop+LengthSize+PointDataSize*len(l.Points) + start, stop = stop, stop+LengthSize+geometry.PointSize*len(l.Points) WriteEWKBLineData(l, buf[start:stop]) } } @@ -142,15 +144,15 @@ func (v Polygon) writeTo(w nomsWriter, nbf *NomsBinFormat) error { // Calculate space for polygon buffer size := 0 for _, l := range v.Lines { - size += LengthSize + PointDataSize*len(l.Points) + size += LengthSize + geometry.PointSize*len(l.Points) } // Allocate buffer for poly - buf := make([]byte, EWKBHeaderSize+LengthSize+size) + buf := make([]byte, geometry.EWKBHeaderSize+LengthSize+size) // Write header and data to buffer WriteEWKBHeader(v, buf) - WriteEWKBPolyData(v, buf[EWKBHeaderSize:]) + WriteEWKBPolyData(v, buf[geometry.EWKBHeaderSize:]) w.writeString(string(buf)) return nil @@ -167,7 +169,7 @@ func ParseEWKBPoly(buf []byte, srid uint32) Polygon { lines := make([]Linestring, numLines) for i := uint32(0); i < numLines; i++ { lines[i] = ParseEWKBLine(buf[s:], srid) - s += LengthSize * PointDataSize * len(lines[i].Points) + s += LengthSize * geometry.PointSize * len(lines[i].Points) } return Polygon{SRID: srid, Lines: lines} @@ -175,20 +177,20 @@ func ParseEWKBPoly(buf []byte, srid uint32) Polygon { func readPolygon(nbf *NomsBinFormat, b *valueDecoder) (Polygon, error) { buf := []byte(b.ReadString()) - srid, _, geomType := ParseEWKBHeader(buf) - if geomType != PolygonID { + srid, _, geomType := geometry.ParseEWKBHeader(buf) + if geomType != geometry.PolygonType { return Polygon{}, errors.New("not a polygon") } - return ParseEWKBPoly(buf[EWKBHeaderSize:], srid), nil + return ParseEWKBPoly(buf[geometry.EWKBHeaderSize:], srid), nil } func (v Polygon) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) { buf := []byte(b.ReadString()) - srid, _, geomType := ParseEWKBHeader(buf) - if geomType != PolygonID { + srid, _, geomType := geometry.ParseEWKBHeader(buf) + if geomType != geometry.PolygonType { return nil, errors.New("not a polygon") } - return ParseEWKBPoly(buf[EWKBHeaderSize:], srid), nil + return ParseEWKBPoly(buf[geometry.EWKBHeaderSize:], srid), nil } func (v Polygon) skip(nbf *NomsBinFormat, b *binaryNomsReader) { diff --git a/go/store/types/value_decoder.go b/go/store/types/value_decoder.go index 9da6e17e93..d80e54c17c 100644 --- a/go/store/types/value_decoder.go +++ b/go/store/types/value_decoder.go @@ -29,6 +29,7 @@ import ( "github.com/shopspring/decimal" "github.com/dolthub/dolt/go/store/d" + "github.com/dolthub/dolt/go/store/geometry" ) var ErrUnknownType = errors.New("unknown type $@") @@ -374,27 +375,27 @@ func (r *valueDecoder) readValue(nbf *NomsBinFormat) (Value, error) { case PointKind: r.skipKind() buf := []byte(r.ReadString()) - srid, _, geomType := ParseEWKBHeader(buf) - if geomType != PointID { + srid, _, geomType := geometry.ParseEWKBHeader(buf) + if geomType != geometry.PointType { return nil, ErrUnknownType } - return ParseEWKBPoint(buf[EWKBHeaderSize:], srid), nil + return ParseEWKBPoint(buf[geometry.EWKBHeaderSize:], srid), nil case LinestringKind: r.skipKind() buf := []byte(r.ReadString()) - srid, _, geomType := ParseEWKBHeader(buf) - if geomType != LinestringID { + srid, _, geomType := geometry.ParseEWKBHeader(buf) + if geomType != geometry.LinestringType { return nil, ErrUnknownType } - return ParseEWKBLine(buf[EWKBHeaderSize:], srid), nil + return ParseEWKBLine(buf[geometry.EWKBHeaderSize:], srid), nil case PolygonKind: r.skipKind() buf := []byte(r.ReadString()) - srid, _, geomType := ParseEWKBHeader(buf) - if geomType != PolygonID { + srid, _, geomType := geometry.ParseEWKBHeader(buf) + if geomType != geometry.PolygonType { return nil, ErrUnknownType } - return ParseEWKBPoly(buf[EWKBHeaderSize:], srid), nil + return ParseEWKBPoly(buf[geometry.EWKBHeaderSize:], srid), nil case TypeKind: r.skipKind() return r.readType() diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go index 5cfd231e8d..9a61c7f8bf 100644 --- a/go/store/val/tuple_builder.go +++ b/go/store/val/tuple_builder.go @@ -15,7 +15,6 @@ package val import ( - "encoding/json" "time" "github.com/dolthub/dolt/go/store/pool" @@ -212,15 +211,11 @@ func (tb *TupleBuilder) PutBytes(i int, v []byte) { } // PutJSON writes a []byte to the ith field of the Tuple being built. -func (tb *TupleBuilder) PutJSON(i int, v interface{}) { +func (tb *TupleBuilder) PutJSON(i int, v []byte) { tb.Desc.expectEncoding(i, JSONEnc) - buf, err := json.Marshal(v) - if err != nil { - panic(err) - } - sz := ByteSize(len(buf)) + sz := ByteSize(len(v)) tb.fields[i] = tb.buf[tb.pos : tb.pos+sz] - writeBytes(tb.fields[i], buf) + writeBytes(tb.fields[i], v) tb.pos += sz } diff --git a/go/store/val/tuple_descriptor.go b/go/store/val/tuple_descriptor.go index 3689a8106d..899d0c8500 100644 --- a/go/store/val/tuple_descriptor.go +++ b/go/store/val/tuple_descriptor.go @@ -15,7 +15,6 @@ package val import ( - "encoding/json" "fmt" "strconv" "strings" @@ -278,13 +277,11 @@ func (td TupleDesc) GetBytes(i int, tup Tuple) (v []byte, ok bool) { // GetJSON reads a []byte from the ith field of the Tuple. // If the ith field is NULL, |ok| is set to false. -func (td TupleDesc) GetJSON(i int, tup Tuple) (v interface{}, ok bool) { +func (td TupleDesc) GetJSON(i int, tup Tuple) (v []byte, ok bool) { td.expectEncoding(i, JSONEnc) b := tup.GetField(i) if b != nil { - if err := json.Unmarshal(b, &v); err != nil { - panic(err) - } + v = readBytes(b) ok = true } return From 81510021b3b30f0d02a11c2768899b16e3b871e9 Mon Sep 17 00:00:00 2001 From: Hydrocharged Date: Fri, 11 Feb 2022 04:04:56 +0000 Subject: [PATCH 067/105] [ga-bump-dep] Bump dependency in Dolt by Hydrocharged --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index 23095dfdec..c7106b8ec5 100644 --- a/go/go.mod +++ b/go/go.mod @@ -69,7 +69,7 @@ require ( ) require ( - github.com/dolthub/go-mysql-server v0.11.1-0.20220210013403-fce236834a13 + github.com/dolthub/go-mysql-server v0.11.1-0.20220211040258-98fbe52dfe68 github.com/google/flatbuffers v2.0.5+incompatible github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6 github.com/prometheus/client_golang v1.11.0 diff --git a/go/go.sum b/go/go.sum index 2f7822ae36..bcdaa34d8f 100755 --- a/go/go.sum +++ b/go/go.sum @@ -172,8 +172,8 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= -github.com/dolthub/go-mysql-server v0.11.1-0.20220210013403-fce236834a13 h1:zDkSqJNkKzHhqHqa4ZH2KiT1EvcP/Tq+QL9t6DtnPK0= -github.com/dolthub/go-mysql-server v0.11.1-0.20220210013403-fce236834a13/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= +github.com/dolthub/go-mysql-server v0.11.1-0.20220211040258-98fbe52dfe68 h1:NGssc7CjBlz0FIoGU7JwNNl0XLED71WoECzSGpWPla4= +github.com/dolthub/go-mysql-server v0.11.1-0.20220211040258-98fbe52dfe68/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 h1:oyPHJlzumKta1vnOQqUnfdz+pk3EmnHS3Nd0cCT0I2g= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371/go.mod h1:dhGBqcCEfK5kuFmeO5+WOx3hqc1k3M29c1oS/R7N4ms= github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8= From af5f929d61b20d0ea837332b7a38997d27a7d1bd Mon Sep 17 00:00:00 2001 From: Daylon Wilkins Date: Fri, 11 Feb 2022 02:03:26 -0800 Subject: [PATCH 068/105] Fixed gms bump issues --- go/cmd/dolt/commands/engine/sqlengine.go | 3 ++- go/go.mod | 2 +- go/go.sum | 2 ++ go/libraries/doltcore/sqle/database.go | 12 +++++++-- .../doltcore/sqle/database_provider.go | 12 ++++----- .../sqle/enginetest/dolt_engine_test.go | 8 ++++++ .../doltcore/sqle/enginetest/dolt_harness.go | 26 ++++++++++++------- .../sqle/logictest/dolt/doltharness.go | 2 +- 8 files changed, 47 insertions(+), 20 deletions(-) diff --git a/go/cmd/dolt/commands/engine/sqlengine.go b/go/cmd/dolt/commands/engine/sqlengine.go index b8b3c8fd14..cee0a03828 100644 --- a/go/cmd/dolt/commands/engine/sqlengine.go +++ b/go/cmd/dolt/commands/engine/sqlengine.go @@ -167,7 +167,8 @@ func (se *SqlEngine) NewContext(ctx context.Context) (*sql.Context, error) { } func (se *SqlEngine) NewDoltSession(ctx context.Context, mysqlSess *sql.BaseSession) (*dsess.DoltSession, error) { - return se.dsessFactory(ctx, mysqlSess, se.engine.Analyzer.Catalog.AllDatabases()) + tempCtx := sql.NewContext(ctx, sql.WithSession(mysqlSess)) + return se.dsessFactory(ctx, mysqlSess, se.engine.Analyzer.Catalog.AllDatabases(tempCtx)) } // GetReturnFormat() returns the printing format the engine is associated with. diff --git a/go/go.mod b/go/go.mod index c7106b8ec5..758764d55a 100644 --- a/go/go.mod +++ b/go/go.mod @@ -69,7 +69,7 @@ require ( ) require ( - github.com/dolthub/go-mysql-server v0.11.1-0.20220211040258-98fbe52dfe68 + github.com/dolthub/go-mysql-server v0.11.1-0.20220211104023-552e86a7b491 github.com/google/flatbuffers v2.0.5+incompatible github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6 github.com/prometheus/client_golang v1.11.0 diff --git a/go/go.sum b/go/go.sum index bcdaa34d8f..43966f8bd6 100755 --- a/go/go.sum +++ b/go/go.sum @@ -174,6 +174,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= github.com/dolthub/go-mysql-server v0.11.1-0.20220211040258-98fbe52dfe68 h1:NGssc7CjBlz0FIoGU7JwNNl0XLED71WoECzSGpWPla4= github.com/dolthub/go-mysql-server v0.11.1-0.20220211040258-98fbe52dfe68/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= +github.com/dolthub/go-mysql-server v0.11.1-0.20220211104023-552e86a7b491 h1:GU4lnDP8j9MLcjvkNKeUDhvA+819CazYZa0QfHcBRv8= +github.com/dolthub/go-mysql-server v0.11.1-0.20220211104023-552e86a7b491/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 h1:oyPHJlzumKta1vnOQqUnfdz+pk3EmnHS3Nd0cCT0I2g= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371/go.mod h1:dhGBqcCEfK5kuFmeO5+WOx3hqc1k3M29c1oS/R7N4ms= github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8= diff --git a/go/libraries/doltcore/sqle/database.go b/go/libraries/doltcore/sqle/database.go index 6be21ec6ff..39e3d21a61 100644 --- a/go/libraries/doltcore/sqle/database.go +++ b/go/libraries/doltcore/sqle/database.go @@ -22,6 +22,7 @@ import ( "time" "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/grant_tables" "github.com/dolthub/vitess/go/vt/proto/query" "gopkg.in/src-d/go-errors.v1" @@ -59,8 +60,15 @@ type SqlDatabase interface { func DbsAsDSQLDBs(dbs []sql.Database) []SqlDatabase { dsqlDBs := make([]SqlDatabase, 0, len(dbs)) for _, db := range dbs { - sqlDb, ok := db.(SqlDatabase) - if !ok { + var sqlDb SqlDatabase + if sqlDatabase, ok := db.(SqlDatabase); ok { + sqlDb = sqlDatabase + } else if privDatabase, ok := db.(grant_tables.PrivilegedDatabase); ok { + if sqlDatabase, ok := privDatabase.Unwrap().(SqlDatabase); ok { + sqlDb = sqlDatabase + } + } + if sqlDb == nil { continue } switch v := sqlDb.(type) { diff --git a/go/libraries/doltcore/sqle/database_provider.go b/go/libraries/doltcore/sqle/database_provider.go index 3ca476f9ca..741fae568f 100644 --- a/go/libraries/doltcore/sqle/database_provider.go +++ b/go/libraries/doltcore/sqle/database_provider.go @@ -98,7 +98,7 @@ func (p DoltDatabaseProvider) WithDbFactoryUrl(url string) DoltDatabaseProvider return p } -func (p DoltDatabaseProvider) Database(name string) (db sql.Database, err error) { +func (p DoltDatabaseProvider) Database(ctx *sql.Context, name string) (db sql.Database, err error) { name = strings.ToLower(name) var ok bool p.mu.RLock() @@ -108,7 +108,7 @@ func (p DoltDatabaseProvider) Database(name string) (db sql.Database, err error) return db, nil } - db, _, ok, err = p.databaseForRevision(context.Background(), name) + db, _, ok, err = p.databaseForRevision(ctx, name) if err != nil { return nil, err } @@ -128,12 +128,12 @@ func (p DoltDatabaseProvider) Database(name string) (db sql.Database, err error) } -func (p DoltDatabaseProvider) HasDatabase(name string) bool { - _, err := p.Database(name) +func (p DoltDatabaseProvider) HasDatabase(ctx *sql.Context, name string) bool { + _, err := p.Database(ctx, name) return err == nil } -func (p DoltDatabaseProvider) AllDatabases() (all []sql.Database) { +func (p DoltDatabaseProvider) AllDatabases(ctx *sql.Context) (all []sql.Database) { p.mu.RLock() defer p.mu.RUnlock() @@ -286,7 +286,7 @@ func (p DoltDatabaseProvider) RevisionDbState(ctx context.Context, revDB string) return init, nil } -func (p DoltDatabaseProvider) Function(name string) (sql.Function, error) { +func (p DoltDatabaseProvider) Function(ctx *sql.Context, name string) (sql.Function, error) { fn, ok := p.functions[strings.ToLower(name)] if !ok { return nil, sql.ErrFunctionNotFound.New(name) diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go index 7af2812bcf..0e6d44a86d 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go @@ -213,6 +213,14 @@ func TestScripts(t *testing.T) { enginetest.TestScripts(t, newDoltHarness(t).WithSkippedQueries(skipped)) } +func TestUserPrivileges(t *testing.T) { + enginetest.TestUserPrivileges(t, newDoltHarness(t)) +} + +func TestUserAuthentication(t *testing.T) { + enginetest.TestUserAuthentication(t, newDoltHarness(t)) +} + func TestComplexIndexQueries(t *testing.T) { enginetest.TestComplexIndexQueries(t, newDoltHarness(t)) } diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go index 425c5cac83..0b842b7f40 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go @@ -51,6 +51,7 @@ type DoltHarness struct { var _ enginetest.Harness = (*DoltHarness)(nil) var _ enginetest.SkippingHarness = (*DoltHarness)(nil) +var _ enginetest.ClientHarness = (*DoltHarness)(nil) var _ enginetest.IndexHarness = (*DoltHarness)(nil) var _ enginetest.VersionedDBHarness = (*DoltHarness)(nil) var _ enginetest.ForeignKeyHarness = (*DoltHarness)(nil) @@ -144,32 +145,36 @@ func (d *DoltHarness) Parallelism() int { } func (d *DoltHarness) NewContext() *sql.Context { - return sql.NewContext( - context.Background(), - sql.WithSession(d.session)) + return sql.NewContext(context.Background(), sql.WithSession(d.session)) +} + +func (d *DoltHarness) NewContextWithClient(client sql.Client) *sql.Context { + return sql.NewContext(context.Background(), sql.WithSession(d.newSessionWithClient(client))) } func (d *DoltHarness) NewSession() *sql.Context { + d.session = d.newSessionWithClient(sql.Client{Address: "localhost", User: "root"}) + return d.NewContext() +} + +func (d *DoltHarness) newSessionWithClient(client sql.Client) *dsess.DoltSession { states := make([]dsess.InitialDbState, len(d.databases)) for i, db := range d.databases { states[i] = getDbState(d.t, db, d.env) } dbs := dsqleDBsAsSqlDBs(d.databases) pro := d.NewDatabaseProvider(dbs...) - localConfig := d.env.Config.WriteableConfig() - var err error - d.session, err = dsess.NewDoltSession( + dSession, err := dsess.NewDoltSession( enginetest.NewContext(d), - enginetest.NewBaseSession(), + sql.NewBaseSessionWithClientServer("address", client, 1), pro.(dsess.RevisionDatabaseProvider), localConfig, states..., ) require.NoError(d.t, err) - - return d.NewContext() + return dSession } func (d *DoltHarness) SupportsNativeIndexCreation() bool { @@ -224,6 +229,9 @@ func (d *DoltHarness) NewReadOnlyDatabases(names ...string) (dbs []sql.ReadOnlyD } func (d *DoltHarness) NewDatabaseProvider(dbs ...sql.Database) sql.MutableDatabaseProvider { + if d.env == nil { + d.env = dtestutils.CreateTestEnv() + } mrEnv, err := env.DoltEnvAsMultiEnv(context.Background(), d.env) require.NoError(d.t, err) pro := sqle.NewDoltDatabaseProvider(d.env.Config, mrEnv.FileSystem(), dbs...) diff --git a/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go b/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go index f42ee7eab0..b385ca0755 100644 --- a/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go +++ b/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go @@ -133,7 +133,7 @@ func innerInit(h *DoltHarness, dEnv *env.DoltEnv) error { ctx := dsql.NewTestSQLCtx(context.Background()) h.sess = ctx.Session.(*dsess.DoltSession) - dbs := h.engine.Analyzer.Catalog.AllDatabases() + dbs := h.engine.Analyzer.Catalog.AllDatabases(ctx) dsqlDBs := make([]dsql.Database, len(dbs)) for i, db := range dbs { dsqlDB := db.(dsql.Database) From e6378a86f9bd812b80e01887783ae46de8646c9b Mon Sep 17 00:00:00 2001 From: Hydrocharged Date: Fri, 11 Feb 2022 11:10:52 +0000 Subject: [PATCH 069/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index 758764d55a..7628192742 100644 --- a/go/go.mod +++ b/go/go.mod @@ -69,7 +69,7 @@ require ( ) require ( - github.com/dolthub/go-mysql-server v0.11.1-0.20220211104023-552e86a7b491 + github.com/dolthub/go-mysql-server v0.11.1-0.20220211113841-bb16284a110e github.com/google/flatbuffers v2.0.5+incompatible github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6 github.com/prometheus/client_golang v1.11.0 diff --git a/go/go.sum b/go/go.sum index 43966f8bd6..237c4aeebb 100755 --- a/go/go.sum +++ b/go/go.sum @@ -172,10 +172,10 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= -github.com/dolthub/go-mysql-server v0.11.1-0.20220211040258-98fbe52dfe68 h1:NGssc7CjBlz0FIoGU7JwNNl0XLED71WoECzSGpWPla4= -github.com/dolthub/go-mysql-server v0.11.1-0.20220211040258-98fbe52dfe68/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= github.com/dolthub/go-mysql-server v0.11.1-0.20220211104023-552e86a7b491 h1:GU4lnDP8j9MLcjvkNKeUDhvA+819CazYZa0QfHcBRv8= github.com/dolthub/go-mysql-server v0.11.1-0.20220211104023-552e86a7b491/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= +github.com/dolthub/go-mysql-server v0.11.1-0.20220211113841-bb16284a110e h1:zcZ1TlGyrv8kA4NJBPIWIb+0M/YC4XD9nJw+y7OSwyk= +github.com/dolthub/go-mysql-server v0.11.1-0.20220211113841-bb16284a110e/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 h1:oyPHJlzumKta1vnOQqUnfdz+pk3EmnHS3Nd0cCT0I2g= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371/go.mod h1:dhGBqcCEfK5kuFmeO5+WOx3hqc1k3M29c1oS/R7N4ms= github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8= From 824ebfad460707ba276283f383188ca2aadb7297 Mon Sep 17 00:00:00 2001 From: Hydrocharged Date: Fri, 11 Feb 2022 11:47:11 +0000 Subject: [PATCH 070/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/go.sum | 2 -- 1 file changed, 2 deletions(-) diff --git a/go/go.sum b/go/go.sum index 237c4aeebb..50b77762dd 100755 --- a/go/go.sum +++ b/go/go.sum @@ -172,8 +172,6 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= -github.com/dolthub/go-mysql-server v0.11.1-0.20220211104023-552e86a7b491 h1:GU4lnDP8j9MLcjvkNKeUDhvA+819CazYZa0QfHcBRv8= -github.com/dolthub/go-mysql-server v0.11.1-0.20220211104023-552e86a7b491/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= github.com/dolthub/go-mysql-server v0.11.1-0.20220211113841-bb16284a110e h1:zcZ1TlGyrv8kA4NJBPIWIb+0M/YC4XD9nJw+y7OSwyk= github.com/dolthub/go-mysql-server v0.11.1-0.20220211113841-bb16284a110e/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 h1:oyPHJlzumKta1vnOQqUnfdz+pk3EmnHS3Nd0cCT0I2g= From 0befaf68814ccbe0c0baf08799f388504a85b382 Mon Sep 17 00:00:00 2001 From: jennifersp <44716627+jennifersp@users.noreply.github.com> Date: Fri, 11 Feb 2022 10:48:21 -0800 Subject: [PATCH 071/105] add bats test for query statements on different db (#2797) --- integration-tests/bats/auto_increment.bats | 13 +++++++++ integration-tests/bats/foreign-keys.bats | 34 ++++++++++++++++++++++ integration-tests/bats/index.bats | 14 +++++++++ 3 files changed, 61 insertions(+) diff --git a/integration-tests/bats/auto_increment.bats b/integration-tests/bats/auto_increment.bats index a9a9e7b186..2189e16d36 100644 --- a/integration-tests/bats/auto_increment.bats +++ b/integration-tests/bats/auto_increment.bats @@ -682,3 +682,16 @@ SQL [[ "${lines[2]}" =~ "2" ]] || false } +@test "auto_increment: alter table add constraint for different database" { + skip "add constraint for different database fix in progress" + dolt sql < Date: Fri, 11 Feb 2022 10:53:31 -0800 Subject: [PATCH 072/105] Fix auto increment imports (#2777) --- .../bats/import-create-tables.bats | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/integration-tests/bats/import-create-tables.bats b/integration-tests/bats/import-create-tables.bats index 037b510b89..22dd7d7d39 100755 --- a/integration-tests/bats/import-create-tables.bats +++ b/integration-tests/bats/import-create-tables.bats @@ -704,3 +704,40 @@ DELIM run dolt sql -r csv -q "select * from keyless" [ "${lines[1]}" = "0,42,2" ] } + +@test "import-create-tables: auto-increment table" { + cat < schema.sql +CREATE TABLE test ( + pk int PRIMARY KEY AUTO_INCREMENT, + v1 int +); +SQL + + cat < data.csv +pk,v1 +1,1 +2,2 +3,3 +4,4 +DELIM + + run dolt table import -s schema.sql -c test data.csv + [ "$status" -eq 0 ] + [[ "$output" =~ "Rows Processed: 4, Additions: 4, Modifications: 0, Had No Effect: 0" ]] || false + [[ "$output" =~ "Import completed successfully." ]] || false + + run dolt sql -r csv -q "select * from test order by pk ASC" + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 5 ] + [ "${lines[1]}" = 1,1 ] + [ "${lines[2]}" = 2,2 ] + [ "${lines[3]}" = 3,3 ] + [ "${lines[4]}" = 4,4 ] + + dolt sql -q "insert into test values (NULL, 5)" + + run dolt sql -r csv -q "select * from test where pk = 5" + [ "$status" -eq 0 ] + [ "${#lines[@]}" -eq 2 ] + [ "${lines[1]}" = 5,5 ] +} From 93204015c7da852c70ec7c520756579b5a16c01a Mon Sep 17 00:00:00 2001 From: James Cor Date: Fri, 11 Feb 2022 12:05:52 -0800 Subject: [PATCH 073/105] use GetPath on ref instead of slicing --- go/cmd/dolt/commands/log.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 43362b0d5c..1925331ab6 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -174,7 +174,7 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o for _, b := range branches { refName := b.Ref.String() if opts.decoration != "full" { - refName = refName[11:] // trim out "refs/heads/" + refName = b.Ref.GetPath() // trim out "refs/heads/" } refName = fmt.Sprintf("\033[32;1m%s\033[0m", refName) // branch names are bright green (32;1m) cHashToRefs[b.Hash] = append(cHashToRefs[b.Hash], refName) @@ -187,7 +187,7 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o for _, t := range tags { refName := t.Ref.String() if opts.decoration != "full" { - refName = refName[10:] // trim out "refs/tags/" + refName = t.Ref.GetPath() // trim out "refs/tags/" } refName = fmt.Sprintf("\033[33;1mtag: %s\033[0m", refName) // tags names are bright yellow (33;1m) cHashToRefs[t.Hash] = append(cHashToRefs[t.Hash], refName) From 0bbb3cd9f351e2aff013651a4f91dfbbe5d6a514 Mon Sep 17 00:00:00 2001 From: James Cor Date: Fri, 11 Feb 2022 12:08:22 -0800 Subject: [PATCH 074/105] formatting --- go/cmd/dolt/commands/log.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 1925331ab6..87edcd3779 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -164,8 +164,9 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o return 1 } - // TODO: easier way to get these associations? cHashToRefs := map[hash.Hash][]string{} + + // Get all branches branches, err := dEnv.DoltDB.GetBranchesWithHashes(ctx) if err != nil { cli.PrintErrln(color.HiRedString("Fatal error: cannot get Branch information.")) @@ -179,6 +180,10 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o refName = fmt.Sprintf("\033[32;1m%s\033[0m", refName) // branch names are bright green (32;1m) cHashToRefs[b.Hash] = append(cHashToRefs[b.Hash], refName) } + + // TODO: Get all remote branches + + // Get all tags tags, err := dEnv.DoltDB.GetTagsWithHashes(ctx) if err != nil { cli.PrintErrln(color.HiRedString("Fatal error: cannot get Tag information.")) @@ -236,7 +241,11 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o return 1 } - commitsInfo = append(commitsInfo, logNode{commitMeta: meta, commitHash: cmHash, parentHashes: pHashes, branchNames: cHashToRefs[cmHash], isHead: cmHash == h}) + commitsInfo = append(commitsInfo, logNode{commitMeta: meta, + commitHash: cmHash, + parentHashes: pHashes, + branchNames: cHashToRefs[cmHash], + isHead: cmHash == h}) } logToStdOut(opts, commitsInfo) @@ -334,7 +343,9 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d return err } - commitsInfo = append(commitsInfo, logNode{commitMeta: meta, commitHash: prevHash, parentHashes: ph}) + commitsInfo = append(commitsInfo, logNode{commitMeta: meta, + commitHash: prevHash, + parentHashes: ph}) numLines-- } From 6f1e7ef88a0c253385449776eb0e025affb04340 Mon Sep 17 00:00:00 2001 From: James Cor Date: Fri, 11 Feb 2022 12:20:49 -0800 Subject: [PATCH 075/105] adding remotes and some comments --- go/cmd/dolt/commands/log.go | 19 ++++++++++++++++--- go/libraries/doltcore/doltdb/doltdb.go | 17 +++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 87edcd3779..5272e9cdde 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -175,13 +175,26 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o for _, b := range branches { refName := b.Ref.String() if opts.decoration != "full" { - refName = b.Ref.GetPath() // trim out "refs/heads/" + refName = b.Ref.GetPath() // trim out "refs/remotes/" } - refName = fmt.Sprintf("\033[32;1m%s\033[0m", refName) // branch names are bright green (32;1m) + refName = fmt.Sprintf("\033[31;1m%s\033[0m", refName) // branch names are bright red (31;1m) cHashToRefs[b.Hash] = append(cHashToRefs[b.Hash], refName) } - // TODO: Get all remote branches + // Get all remote branches + remotes, err := dEnv.DoltDB.GetRemotesWithHashes(ctx) + if err != nil { + cli.PrintErrln(color.HiRedString("Fatal error: cannot get Branch information.")) + return 1 + } + for _, r := range remotes { + refName := r.Ref.String() + if opts.decoration != "full" { + refName = r.Ref.GetPath() // trim out "refs/heads/" + } + refName = fmt.Sprintf("\033[32;1m%s\033[0m", refName) // remote names are bright red (32;1m) + cHashToRefs[r.Hash] = append(cHashToRefs[r.Hash], refName) + } // Get all tags tags, err := dEnv.DoltDB.GetTagsWithHashes(ctx) diff --git a/go/libraries/doltcore/doltdb/doltdb.go b/go/libraries/doltcore/doltdb/doltdb.go index 09be8866be..29dc3b15cc 100644 --- a/go/libraries/doltcore/doltdb/doltdb.go +++ b/go/libraries/doltcore/doltdb/doltdb.go @@ -816,6 +816,7 @@ type TagWithHash struct { Hash hash.Hash } +// GetTagsWithHashes returns a list of objects containing TagRefs with their associated Commit's hash func (ddb *DoltDB) GetTagsWithHashes(ctx context.Context) ([]TagWithHash, error) { var refs []TagWithHash err := ddb.VisitRefsOfType(ctx, tagsRefFilter, func(r ref.DoltRef, v types.Value) error { @@ -849,6 +850,22 @@ func (ddb *DoltDB) GetRemoteRefs(ctx context.Context) ([]ref.DoltRef, error) { return ddb.GetRefsOfType(ctx, remotesRefFilter) } +type RemoteWithHash struct { + Ref ref.DoltRef + Hash hash.Hash +} + +func (ddb *DoltDB) GetRemotesWithHashes(ctx context.Context) ([]RemoteWithHash, error) { + var refs []RemoteWithHash + err := ddb.VisitRefsOfType(ctx, remotesRefFilter, func(r ref.DoltRef, v types.Value) error { + if tr, ok := v.(types.Ref); ok { + refs = append(refs, RemoteWithHash{r, tr.TargetHash()}) + } + return nil + }) + return refs, err +} + // GetHeadRefs returns a list of all refs that point to a Commit func (ddb *DoltDB) GetHeadRefs(ctx context.Context) ([]ref.DoltRef, error) { return ddb.GetRefsOfType(ctx, ref.HeadRefTypes) From 282b25a55521811ea795c01f62ef7a315e04d7ea Mon Sep 17 00:00:00 2001 From: JCOR11599 Date: Fri, 11 Feb 2022 20:21:52 +0000 Subject: [PATCH 076/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/cmd/dolt/commands/log.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 5272e9cdde..f1308fb84e 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -255,10 +255,10 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o } commitsInfo = append(commitsInfo, logNode{commitMeta: meta, - commitHash: cmHash, + commitHash: cmHash, parentHashes: pHashes, - branchNames: cHashToRefs[cmHash], - isHead: cmHash == h}) + branchNames: cHashToRefs[cmHash], + isHead: cmHash == h}) } logToStdOut(opts, commitsInfo) @@ -357,7 +357,7 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d } commitsInfo = append(commitsInfo, logNode{commitMeta: meta, - commitHash: prevHash, + commitHash: prevHash, parentHashes: ph}) numLines-- From f4464796ce96266716064c40131b86007ec48820 Mon Sep 17 00:00:00 2001 From: jennifersp <44716627+jennifersp@users.noreply.github.com> Date: Fri, 11 Feb 2022 12:31:50 -0800 Subject: [PATCH 077/105] add bats test for DROP TABLE for out-of-scope database (#2784) --- integration-tests/bats/drop-create.bats | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/integration-tests/bats/drop-create.bats b/integration-tests/bats/drop-create.bats index 68e5c838de..3fe6749eaa 100755 --- a/integration-tests/bats/drop-create.bats +++ b/integration-tests/bats/drop-create.bats @@ -344,3 +344,28 @@ SQL [[ "$output" =~ "| > | a | b | c |" ]] || false } +@test "drop-create: drop table from different database" { + skip "fix not merged yet" + dolt sql < Date: Fri, 11 Feb 2022 13:40:09 -0800 Subject: [PATCH 078/105] more test --- go/cmd/dolt/commands/log.go | 19 +++++++++------ integration-tests/bats/log.bats | 43 +++++++++++++++++++++++++++++---- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 5272e9cdde..2fefefeb46 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -97,7 +97,7 @@ func (cmd LogCmd) ArgParser() *argparser.ArgParser { ap.SupportsInt(minParentsParam, "", "parent_count", "The minimum number of parents a commit must have to be included in the log.") ap.SupportsFlag(mergesParam, "", "Equivalent to min-parents == 2, this will limit the log to commits with 2 or more parents.") ap.SupportsFlag(parentsParam, "", "Shows all parents of each commit in the log.") - ap.SupportsString(decorateParam, "", "decorate_fmt", "Shows refs next to commits.") + ap.SupportsString(decorateParam, "", "decorate_fmt", "Shows refs next to commits. Valid options are short, full, no, and auto") ap.SupportsFlag(oneLineParam, "", "Shows logs in a compact format.") return ap } @@ -122,8 +122,13 @@ func (cmd LogCmd) logWithLoggerFunc(ctx context.Context, commandStr string, args minParents = 2 } - // TODO: need to handle invalid decorate options decorateOption := apr.GetValueOrDefault(decorateParam, "auto") + switch decorateOption { + case "short", "full", "auto", "no": + default: + cli.PrintErrln(color.HiRedString("fatal : invalid --decorate option: " + decorateOption)) + return 1 + } opts := logOpts{ numLines: apr.GetIntOrDefault(numLinesParam, -1), showParents: apr.Contains(parentsParam), @@ -175,9 +180,9 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o for _, b := range branches { refName := b.Ref.String() if opts.decoration != "full" { - refName = b.Ref.GetPath() // trim out "refs/remotes/" + refName = b.Ref.GetPath() // trim out "refs/heads/" } - refName = fmt.Sprintf("\033[31;1m%s\033[0m", refName) // branch names are bright red (31;1m) + refName = fmt.Sprintf("\033[32;1m%s\033[0m", refName) // branch names are bright green (32;1m) cHashToRefs[b.Hash] = append(cHashToRefs[b.Hash], refName) } @@ -190,9 +195,9 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o for _, r := range remotes { refName := r.Ref.String() if opts.decoration != "full" { - refName = r.Ref.GetPath() // trim out "refs/heads/" + refName = r.Ref.GetPath() // trim out "refs/remotes/" } - refName = fmt.Sprintf("\033[32;1m%s\033[0m", refName) // remote names are bright red (32;1m) + refName = fmt.Sprintf("\033[31;1m%s\033[0m", refName) // remote names are bright red (31;1m) cHashToRefs[r.Hash] = append(cHashToRefs[r.Hash], refName) } @@ -378,7 +383,6 @@ func logRefs(pager *outputpager.Pager, comm logNode) { return } - // TODO: this doesn't handle remote branches pager.Writer.Write([]byte("\033[33m(\033[0m")) if comm.isHead { pager.Writer.Write([]byte("\033[36;1mHEAD -> \033[0m")) @@ -404,7 +408,6 @@ func logCompact(pager *outputpager.Pager, opts logOpts, commits []logNode) { // Write commit hash pager.Writer.Write([]byte(fmt.Sprintf("\033[33m%s \033[0m", chStr))) - // TODO: write tags here if opts.decoration != "no" { logRefs(pager, comm) } diff --git a/integration-tests/bats/log.bats b/integration-tests/bats/log.bats index 14764ceb87..b02c001108 100755 --- a/integration-tests/bats/log.bats +++ b/integration-tests/bats/log.bats @@ -311,34 +311,67 @@ teardown() { [[ "$output" =~ $regex ]] || false } -@test "log: --oneline removes all new lines" { - dolt commit --allow-empty -m "commit 1" - dolt commit --allow-empty -m "commit 2" +@test "log: --oneline only shows commit message in one line" { + dolt commit --allow-empty -m "a message 1" + dolt commit --allow-empty -m "a message 2" + run dolt log --oneline + [[ !("$output" =~ "Author") ]] || false + [[ !("$output" =~ "Date") ]] || false + [[ !("$output" =~ "commit") ]] || false res=$(dolt log --oneline | wc -l) [ "$res" -eq 3 ] # don't forget initial commit + dolt commit --allow-empty -m "a message 3" + res=$(dolt log --oneline | wc -l) + [ "$res" -eq 4 ] # exactly 1 line is added } @test "log: --decorate=short shows trimmed branches and tags" { dolt tag tag_v0 run dolt log --decorate=short + [[ "$output" =~ "commit" ]] || false + [[ "$output" =~ "Author" ]] || false + [[ "$output" =~ "Date" ]] || false [[ "$output" =~ "main" ]] || false - [[ "$output" =~ "tag_v0" ]] || false + [[ "$output" =~ "tag: tag_v0" ]] || false + [[ !("$output" =~ "/refs/heads/") ]] || false + [[ !("$output" =~ "/refs/tags/") ]] || false } @test "log: --decorate=full shows full branches and tags" { dolt tag tag_v0 run dolt log --decorate=full + [[ "$output" =~ "commit" ]] || false + [[ "$output" =~ "Author" ]] || false + [[ "$output" =~ "Date" ]] || false [[ "$output" =~ "refs/heads/main" ]] || false - [[ "$output" =~ "refs/tags/tag_v0" ]] || false + [[ "$output" =~ "tag: refs/tags/tag_v0" ]] || false } @test "log: --decorate=no doesn't show branches or tags" { dolt tag tag_v0 run dolt log --decorate=no + [[ "$output" =~ "commit" ]] || false + [[ "$output" =~ "Author" ]] || false + [[ "$output" =~ "Date" ]] || false [[ !("$output" =~ "main") ]] || false [[ !("$output" =~ "tag_v0") ]] || false } +@test "log: decorate and oneline work together" { + dolt commit --allow-empty -m "a message 1" + dolt commit --allow-empty -m "a message 2" + run dolt log --oneline --decorate=full + [[ !("$output" =~ "commit") ]] || false + [[ !("$output" =~ "Author") ]] || false + [[ !("$output" =~ "Date") ]] || false + [[ "$output" =~ "refs/heads/main" ]] || false + res=$(dolt log --oneline --decorate=full | wc -l) + [ "$res" -eq 3 ] # don't forget initial commit + dolt commit --allow-empty -m "a message 3" + res=$(dolt log --oneline | wc -l) + [ "$res" -eq 4 ] # exactly 1 line is added +} + @test "log: check pager" { skiponwindows "Need to install expect and make this script work on windows." dolt commit --allow-empty -m "commit 1" From b90dbd8135a182dc8dfdf2d97dd71be77563a810 Mon Sep 17 00:00:00 2001 From: James Cor Date: Fri, 11 Feb 2022 14:04:21 -0800 Subject: [PATCH 079/105] adding tests for error --- go/cmd/dolt/commands/log.go | 2 +- integration-tests/bats/log.bats | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 341b8b0e19..666e7b8c9e 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -126,7 +126,7 @@ func (cmd LogCmd) logWithLoggerFunc(ctx context.Context, commandStr string, args switch decorateOption { case "short", "full", "auto", "no": default: - cli.PrintErrln(color.HiRedString("fatal : invalid --decorate option: " + decorateOption)) + cli.PrintErrln(color.HiRedString("fatal: invalid --decorate option: " + decorateOption)) return 1 } opts := logOpts{ diff --git a/integration-tests/bats/log.bats b/integration-tests/bats/log.bats index b02c001108..bc30f6f43b 100755 --- a/integration-tests/bats/log.bats +++ b/integration-tests/bats/log.bats @@ -372,6 +372,12 @@ teardown() { [ "$res" -eq 4 ] # exactly 1 line is added } +@test "log: --decorate=notanoption throws error" { + run dolt log --decorate=notanoption + [ "$status" -eq 1 ] + [[ "$output" =~ "fatal: invalid --decorate option" ]] || false +} + @test "log: check pager" { skiponwindows "Need to install expect and make this script work on windows." dolt commit --allow-empty -m "commit 1" From 3c606a9489691042a2c8fc3f0aff8506c108fc18 Mon Sep 17 00:00:00 2001 From: zachmu Date: Fri, 11 Feb 2022 22:15:28 +0000 Subject: [PATCH 080/105] [ga-bump-release] Update Dolt version to 0.36.2 and release v0.36.2 --- go/cmd/dolt/dolt.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/cmd/dolt/dolt.go b/go/cmd/dolt/dolt.go index b4e5a7d5cb..6d225a949b 100644 --- a/go/cmd/dolt/dolt.go +++ b/go/cmd/dolt/dolt.go @@ -51,7 +51,7 @@ import ( ) const ( - Version = "0.36.1" + Version = "0.36.2" ) var dumpDocsCommand = &commands.DumpDocsCmd{} From bdedb5155d4cacda3d40d7e6356e655ad6a6b1f3 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Fri, 11 Feb 2022 14:32:51 -0800 Subject: [PATCH 081/105] Killed blake2 dependency (not usable with gcc >= 11) and the perf rig that used it --- go/go.mod | 5 +- go/go.sum | 8 +- .../sqle/enginetest/dolt_engine_test.go | 14 ++- go/store/perf/hash-perf-rig/README.md | 21 ---- go/store/perf/hash-perf-rig/main.go | 96 ------------------- 5 files changed, 15 insertions(+), 129 deletions(-) delete mode 100644 go/store/perf/hash-perf-rig/README.md delete mode 100644 go/store/perf/hash-perf-rig/main.go diff --git a/go/go.mod b/go/go.mod index 25f226e3ca..8fbef59e34 100644 --- a/go/go.mod +++ b/go/go.mod @@ -14,14 +14,13 @@ require ( github.com/bcicen/jstream v1.0.0 github.com/boltdb/bolt v1.3.1 github.com/cenkalti/backoff v2.2.1+incompatible - github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf github.com/denisbrodbeck/machineid v1.0.1 github.com/dolthub/dolt/go/gen/proto/dolt/services/eventsapi v0.0.0-20201005193433-3ee972b1d078 github.com/dolthub/fslock v0.0.3 github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66 github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 - github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a + github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38 github.com/dustin/go-humanize v1.0.0 github.com/fatih/color v1.9.0 github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568 @@ -133,4 +132,6 @@ replace ( github.com/oliveagle/jsonpath => github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 ) +replace github.com/dolthub/go-mysql-server => ../../go-mysql-server + go 1.17 diff --git a/go/go.sum b/go/go.sum index 763e1b259f..833a0d6249 100755 --- a/go/go.sum +++ b/go/go.sum @@ -142,8 +142,6 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= -github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf h1:5ZeQB3mThuz5C2MSER6T5GdtXTF9CMMk42F9BOyRsEQ= -github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf/go.mod h1:BO2rLUAZMrpgh6GBVKi0Gjdqw2MgCtJrtmUdDeZRKjY= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= @@ -172,8 +170,6 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= -github.com/dolthub/go-mysql-server v0.11.1-0.20220205073531-abbcc57d6d1e h1:PloDu3xEk+Mhg4RVWzz1gA/0MqQ+YLB38gjmcEHRnr4= -github.com/dolthub/go-mysql-server v0.11.1-0.20220205073531-abbcc57d6d1e/go.mod h1:X2i6+DzsBgl5uDu1dzNayauCEZFUE+qIEriSv4M8v3s= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 h1:oyPHJlzumKta1vnOQqUnfdz+pk3EmnHS3Nd0cCT0I2g= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371/go.mod h1:dhGBqcCEfK5kuFmeO5+WOx3hqc1k3M29c1oS/R7N4ms= github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8= @@ -182,8 +178,8 @@ github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66 h1:WRPDbpJWEnPxP github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66/go.mod h1:N5ZIbMGuDUpTpOFQ7HcsN6WSIpTGQjHP+Mz27AfmAgk= github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9XGFa6q5Ap4Z/OhNkAMBaK5YeuEzwJt+NZdhiE= github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY= -github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a h1:+61CpK9SwG/QFNE+vn6Fxk00GRQgtR+CA6Nvsr87y8g= -github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a/go.mod h1:qpZ4j0dval04OgZJ5fyKnlniSFUosTH280pdzUjUJig= +github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38 h1:qUbVRsX2CPyjj/uLrPu9L69rGiYRb5vwzw7PC5c/Wh8= +github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38/go.mod h1:qpZ4j0dval04OgZJ5fyKnlniSFUosTH280pdzUjUJig= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go index 093946c172..e2ceb5cca3 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go @@ -72,14 +72,20 @@ func TestSingleScript(t *testing.T) { var scripts = []enginetest.ScriptTest{ { - Name: "CrossDB Queries", + Name: "insert into sparse auto_increment table", SetUpScript: []string{ - "create table mytable (i bigint primary key, s varchar(200));", + "create table auto (pk int primary key auto_increment)", + "insert into auto values (10), (20), (30)", + "insert into auto values (NULL)", + "insert into auto values (40)", + "insert into auto values (0)", }, Assertions: []enginetest.ScriptTestAssertion{ { - Query: "ALTER TABLE mytable ADD COLUMN s2 TEXT COMMENT 'hello' AFTER i", - Expected: nil, + Query: "select * from auto order by 1", + Expected: []sql.Row{ + {10}, {20}, {30}, {31}, {40}, {41}, + }, }, }, }, diff --git a/go/store/perf/hash-perf-rig/README.md b/go/store/perf/hash-perf-rig/README.md deleted file mode 100644 index d350cbf618..0000000000 --- a/go/store/perf/hash-perf-rig/README.md +++ /dev/null @@ -1,21 +0,0 @@ -This is a performance test rig for the two main types of hashing we do in NOMS - buzhash and sha1. There's also support for sha256, sha512, and blake2b hash functions for comparison. - -As of May 9, these are the numbers I get on a macbook pro 3.1 GHz Intel Core i7. - -- no hashing : 3500 MB/s -- sha1 only : 470 MB/s -- sha256 only : 185 MB/s -- sha512 only : 299 MB/s -- blake2b only : 604 MB/s -- bh only : 139 MB/s -- sha1 and bh : 110 MB/s -- sha256 and bh : 80 MB/s -- sha512 and bh : 96 MB/s -- blake2b and bh: 115 MB/s - -I think that in the no hashing case there is some compiler optimization going -on because I note that if all I do is add a loop that reads out bytes one by -one from the slice, it drops to 1000MB/s. - -One outcome of this is that there's no sense going to sha256 - we should just -jump straight to sha512. \ No newline at end of file diff --git a/go/store/perf/hash-perf-rig/main.go b/go/store/perf/hash-perf-rig/main.go deleted file mode 100644 index 4eaec7ba66..0000000000 --- a/go/store/perf/hash-perf-rig/main.go +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2019 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// This file incorporates work covered by the following copyright and -// permission notice: -// -// Copyright 2016 Attic Labs, Inc. All rights reserved. -// Licensed under the Apache License, version 2.0: -// http://www.apache.org/licenses/LICENSE-2.0 - -package main - -import ( - "crypto/sha1" - "crypto/sha256" - "crypto/sha512" - "fmt" - "hash" - "io" - "os" - "time" - - "github.com/codahale/blake2" - humanize "github.com/dustin/go-humanize" - flag "github.com/juju/gnuflag" - "github.com/silvasur/buzhash" -) - -func main() { - useSHA := flag.String("use-sha", "", "=no hashing, 1=sha1, 256=sha256, 512=sha512, blake=blake2b") - useBH := flag.Bool("use-bh", false, "whether we buzhash the bytes") - flag.Parse(true) - - flag.Usage = func() { - fmt.Printf("%s \n", os.Args[0]) - flag.PrintDefaults() - } - - if len(flag.Args()) < 1 { - flag.Usage() - return - } - - p := flag.Args()[0] - bh := buzhash.NewBuzHash(64 * 8) - f, _ := os.Open(p) - defer f.Close() - t0 := time.Now() - buf := make([]byte, 4*1024) - l := uint64(0) - - var h hash.Hash - if *useSHA == "1" { - h = sha1.New() - } else if *useSHA == "256" { - h = sha256.New() - } else if *useSHA == "512" { - h = sha512.New() - } else if *useSHA == "blake" { - h = blake2.NewBlake2B() - } - - for { - n, err := f.Read(buf) - l += uint64(n) - if err == io.EOF { - break - } - s := buf[:n] - if h != nil { - h.Write(s) - } - if *useBH { - bh.Write(s) - } - } - - t1 := time.Now() - d := t1.Sub(t0) - fmt.Printf("Read %s in %s (%s/s)\n", humanize.Bytes(l), d, humanize.Bytes(uint64(float64(l)/d.Seconds()))) - digest := []byte{} - if h != nil { - fmt.Printf("%x\n", h.Sum(digest)) - } -} From af9bef4c9f0ecd20f51568c46e172f5c00fccaba Mon Sep 17 00:00:00 2001 From: Vinai Rachakonda Date: Mon, 14 Feb 2022 10:31:41 -0800 Subject: [PATCH 082/105] Add Generational Chunk Store for Noms Show (#2774) --- go/store/spec/spec.go | 44 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/go/store/spec/spec.go b/go/store/spec/spec.go index 67e81b392a..eb7b3470fa 100644 --- a/go/store/spec/spec.go +++ b/go/store/spec/spec.go @@ -40,6 +40,7 @@ import ( "github.com/aws/aws-sdk-go/service/dynamodb" "github.com/aws/aws-sdk-go/service/s3" + "github.com/dolthub/dolt/go/libraries/utils/filesys" "github.com/dolthub/dolt/go/store/chunks" "github.com/dolthub/dolt/go/store/d" "github.com/dolthub/dolt/go/store/datas" @@ -456,9 +457,27 @@ func (sp Spec) createDatabase(ctx context.Context) datas.Database { case "gs": return datas.NewDatabase(parseGCSSpec(ctx, sp.Href(), sp.Options)) case "nbs": - os.Mkdir(sp.DatabaseName, 0777) - cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28) + // If the database is the oldgen database return a standard NBS store. + if strings.Contains(sp.DatabaseName, "oldgen") { + return getStandardLocalStore(ctx, sp.DatabaseName) + } + + oldgenDb := filepath.Join(sp.DatabaseName, "oldgen") + + err := validateDir(oldgenDb) + // If we can't validate that an oldgen db exists just use a standard local store. + if err != nil { + return getStandardLocalStore(ctx, sp.DatabaseName) + } + + newGenSt, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28) d.PanicIfError(err) + + oldGenSt, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), oldgenDb, 1<<28) + d.PanicIfError(err) + + cs := nbs.NewGenerationalCS(oldGenSt, newGenSt) + return datas.NewDatabase(cs) case "mem": storage := &chunks.MemoryStorage{} @@ -474,6 +493,27 @@ func (sp Spec) createDatabase(ctx context.Context) datas.Database { } } +func getStandardLocalStore(ctx context.Context, dbName string) datas.Database { + os.Mkdir(dbName, 0777) + + cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), dbName, 1<<28) + d.PanicIfError(err) + + return datas.NewDatabase(cs) +} + +func validateDir(path string) error { + info, err := os.Stat(path) + + if err != nil { + return err + } else if !info.IsDir() { + return filesys.ErrIsFile + } + + return nil +} + func parseDatabaseSpec(spec string) (protocol, name string, err error) { if len(spec) == 0 { err = fmt.Errorf("empty spec") From 66555debfaf49711bafdb49b124e2a144d07890e Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Mon, 14 Feb 2022 11:27:02 -0800 Subject: [PATCH 083/105] Upgraded gms --- go/go.mod | 2 +- go/go.sum | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/go/go.mod b/go/go.mod index 66dbb85320..98e9307372 100644 --- a/go/go.mod +++ b/go/go.mod @@ -68,7 +68,7 @@ require ( ) require ( - github.com/dolthub/go-mysql-server v0.11.1-0.20220211113841-bb16284a110e + github.com/dolthub/go-mysql-server v0.11.1-0.20220214192607-a43d3762ebb4 github.com/google/flatbuffers v2.0.5+incompatible github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6 github.com/prometheus/client_golang v1.11.0 diff --git a/go/go.sum b/go/go.sum index 8a16e67308..833a0d6249 100755 --- a/go/go.sum +++ b/go/go.sum @@ -170,7 +170,6 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= -github.com/dolthub/go-mysql-server v0.11.1-0.20220211113841-bb16284a110e/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 h1:oyPHJlzumKta1vnOQqUnfdz+pk3EmnHS3Nd0cCT0I2g= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371/go.mod h1:dhGBqcCEfK5kuFmeO5+WOx3hqc1k3M29c1oS/R7N4ms= github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8= From 34321e96af1251d73b8a11d33360f1329de5b4bf Mon Sep 17 00:00:00 2001 From: James Cor Date: Mon, 14 Feb 2022 11:27:51 -0800 Subject: [PATCH 084/105] adding new line for in line structs --- go/cmd/dolt/commands/log.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index 666e7b8c9e..c8c4ebdc5f 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -259,7 +259,8 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o return 1 } - commitsInfo = append(commitsInfo, logNode{commitMeta: meta, + commitsInfo = append(commitsInfo, logNode{ + commitMeta: meta, commitHash: cmHash, parentHashes: pHashes, branchNames: cHashToRefs[cmHash], @@ -361,7 +362,8 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d return err } - commitsInfo = append(commitsInfo, logNode{commitMeta: meta, + commitsInfo = append(commitsInfo, logNode{ + commitMeta: meta, commitHash: prevHash, parentHashes: ph}) From 297752f022079ab955b42b2b1d9964c81286fab5 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Mon, 14 Feb 2022 11:28:56 -0800 Subject: [PATCH 085/105] Removed local override --- go/go.mod | 2 -- 1 file changed, 2 deletions(-) diff --git a/go/go.mod b/go/go.mod index 98e9307372..720de96fe8 100644 --- a/go/go.mod +++ b/go/go.mod @@ -132,6 +132,4 @@ replace ( github.com/oliveagle/jsonpath => github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 ) -replace github.com/dolthub/go-mysql-server => ../../go-mysql-server - go 1.17 From 339e0ec7012288f76d0f9ccb69eafe0d3b7e6c51 Mon Sep 17 00:00:00 2001 From: JCOR11599 Date: Mon, 14 Feb 2022 19:28:57 +0000 Subject: [PATCH 086/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/cmd/dolt/commands/log.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/cmd/dolt/commands/log.go b/go/cmd/dolt/commands/log.go index c8c4ebdc5f..a9950fac87 100644 --- a/go/cmd/dolt/commands/log.go +++ b/go/cmd/dolt/commands/log.go @@ -260,7 +260,7 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o } commitsInfo = append(commitsInfo, logNode{ - commitMeta: meta, + commitMeta: meta, commitHash: cmHash, parentHashes: pHashes, branchNames: cHashToRefs[cmHash], @@ -363,7 +363,7 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d } commitsInfo = append(commitsInfo, logNode{ - commitMeta: meta, + commitMeta: meta, commitHash: prevHash, parentHashes: ph}) From 42a9a5b98ddf69526bdfefa0627d580f802147b0 Mon Sep 17 00:00:00 2001 From: zachmu Date: Mon, 14 Feb 2022 19:47:22 +0000 Subject: [PATCH 087/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/go.sum | 2 ++ 1 file changed, 2 insertions(+) diff --git a/go/go.sum b/go/go.sum index 833a0d6249..483957b3d8 100755 --- a/go/go.sum +++ b/go/go.sum @@ -170,6 +170,8 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= +github.com/dolthub/go-mysql-server v0.11.1-0.20220214192607-a43d3762ebb4 h1:u/hEZaHVHXIzCQr4g0zQvW6NiYUReByizSEz/AKwkZI= +github.com/dolthub/go-mysql-server v0.11.1-0.20220214192607-a43d3762ebb4/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 h1:oyPHJlzumKta1vnOQqUnfdz+pk3EmnHS3Nd0cCT0I2g= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371/go.mod h1:dhGBqcCEfK5kuFmeO5+WOx3hqc1k3M29c1oS/R7N4ms= github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8= From 6b792be8bbdd663c045bb2ba1997871c737db6c4 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Mon, 14 Feb 2022 12:00:30 -0800 Subject: [PATCH 088/105] pr feedback --- go/store/prolly/map_test.go | 4 ++-- go/store/prolly/node.go | 4 ++-- go/store/prolly/tree_merge.go | 8 ++++---- go/store/prolly/utils_test.go | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/go/store/prolly/map_test.go b/go/store/prolly/map_test.go index 51038b8ab5..c5ff03f08b 100644 --- a/go/store/prolly/map_test.go +++ b/go/store/prolly/map_test.go @@ -20,7 +20,6 @@ import ( "io" "math/rand" "testing" - "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -28,7 +27,8 @@ import ( "github.com/dolthub/dolt/go/store/val" ) -var testRand = rand.New(rand.NewSource(time.Now().UnixNano())) +// todo(andy): randomize test seed +var testRand = rand.New(rand.NewSource(1)) func TestMap(t *testing.T) { scales := []int{ diff --git a/go/store/prolly/node.go b/go/store/prolly/node.go index 69725e89fa..023fec6e48 100644 --- a/go/store/prolly/node.go +++ b/go/store/prolly/node.go @@ -31,7 +31,7 @@ const ( ) func init() { - //emptyNode = makeMapNode(sharedPool, 0, nil, nil) + emptyNode = makeMapNode(sharedPool, 0, nil, nil) } type Node struct { @@ -79,7 +79,7 @@ func makeMapNode(pool pool.BuffPool, level uint64, keys, values []nodeItem) (nod serial.TupleMapAddKeyFormat(b, serial.TupleFormatV1) serial.TupleMapAddValueFormat(b, serial.TupleFormatV1) serial.TupleMapAddTreeLevel(b, byte(level)) - // todo(andy): tree empty + // todo(andy): tree count b.Finish(serial.TupleMapEnd(b)) return mapNodeFromBytes(b.FinishedBytes()) diff --git a/go/store/prolly/tree_merge.go b/go/store/prolly/tree_merge.go index 7aaf781997..86bb49638c 100644 --- a/go/store/prolly/tree_merge.go +++ b/go/store/prolly/tree_merge.go @@ -28,7 +28,7 @@ const patchBufferSize = 1024 // TupleMergeFn is a callback that handles 3-way merging of tuples. // A typical implementation will attempt a cell-wise merge of the tuples, -// or register a conflict if a such a merge is not possible. +// or register a conflict if such a merge is not possible. type TupleMergeFn func(left, right Diff) (Diff, bool) // ThreeWayMerge implements a three-way merge algorithm using |base| as the common ancestor, |right| as @@ -54,7 +54,7 @@ func ThreeWayMerge(ctx context.Context, left, right, base Map, cb TupleMergeFn) // iterate |ld| and |rd| in parallel, populating |buf| eg.Go(func() (err error) { defer func() { - if cerr := buf.close(); err != nil { + if cerr := buf.close(); err == nil { err = cerr } }() @@ -195,9 +195,9 @@ func sendPatches(ctx context.Context, l, r treeDiffer, buf patchBuffer, cb Tuple } } - for lok { + if lok { // already in left - break + return nil } for rok { diff --git a/go/store/prolly/utils_test.go b/go/store/prolly/utils_test.go index 0ca9f47738..e9ba40c3b9 100644 --- a/go/store/prolly/utils_test.go +++ b/go/store/prolly/utils_test.go @@ -51,7 +51,7 @@ func countOrderedMap(t *testing.T, om orderedMap) (cnt int) { require.NoError(t, err) cnt++ } - return + return cnt } func keyDescFromMap(om orderedMap) val.TupleDesc { @@ -98,7 +98,7 @@ func randomTuplePairs(count int, keyDesc, valDesc val.TupleDesc) (items [][2]val } dupes = dupes[:0] } - return + return items } func randomCompositeTuplePairs(count int, keyDesc, valDesc val.TupleDesc) (items [][2]val.Tuple) { From ebc1f090b6288b0a51e388f4832b7f1cac52933b Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Mon, 14 Feb 2022 12:17:24 -0800 Subject: [PATCH 089/105] Removed deleted file from copyright check script --- go/utils/copyrightshdrs/main.go | 1 - 1 file changed, 1 deletion(-) diff --git a/go/utils/copyrightshdrs/main.go b/go/utils/copyrightshdrs/main.go index d486d508f9..fd8f6c3f28 100644 --- a/go/utils/copyrightshdrs/main.go +++ b/go/utils/copyrightshdrs/main.go @@ -240,7 +240,6 @@ var CopiedNomsFiles []CopiedNomsFile = []CopiedNomsFile{ {Path: "store/nomdl/parser.go", NomsPath: "go/nomdl/parser.go", HadCopyrightNotice: true}, {Path: "store/nomdl/parser_test.go", NomsPath: "go/nomdl/parser_test.go", HadCopyrightNotice: true}, {Path: "store/perf/codec-perf-rig/main.go", NomsPath: "go/perf/codec-perf-rig/main.go", HadCopyrightNotice: true}, - {Path: "store/perf/hash-perf-rig/main.go", NomsPath: "go/perf/hash-perf-rig/main.go", HadCopyrightNotice: true}, {Path: "store/perf/suite/suite.go", NomsPath: "go/perf/suite/suite.go", HadCopyrightNotice: true}, {Path: "store/perf/suite/suite_test.go", NomsPath: "go/perf/suite/suite_test.go", HadCopyrightNotice: true}, {Path: "store/sloppy/sloppy.go", NomsPath: "go/sloppy/sloppy.go", HadCopyrightNotice: true}, From cdf3347d3939bb9bdc639b959907a1ed74af2e15 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Mon, 14 Feb 2022 12:44:05 -0800 Subject: [PATCH 090/105] pr feedback --- .../doltcore/sqle/index/prolly_fields_test.go | 198 ++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 go/libraries/doltcore/sqle/index/prolly_fields_test.go diff --git a/go/libraries/doltcore/sqle/index/prolly_fields_test.go b/go/libraries/doltcore/sqle/index/prolly_fields_test.go new file mode 100644 index 0000000000..70e10a2ea3 --- /dev/null +++ b/go/libraries/doltcore/sqle/index/prolly_fields_test.go @@ -0,0 +1,198 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +import ( + "encoding/json" + "math" + "testing" + "time" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/expression/function" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/store/pool" + "github.com/dolthub/dolt/go/store/val" +) + +type prollyFieldTest struct { + name string + value interface{} + typ val.Type +} + +func TestRoundTripProllyFields(t *testing.T) { + tests := []prollyFieldTest{ + { + name: "null", + typ: val.Type{ + Enc: val.Int8Enc, + Nullable: true, + }, + value: nil, + }, + { + name: "int8", + typ: val.Type{Enc: val.Int8Enc}, + value: int8(-42), + }, + { + name: "uint8", + typ: val.Type{Enc: val.Uint8Enc}, + value: uint8(42), + }, + { + name: "int16", + typ: val.Type{Enc: val.Int16Enc}, + value: int16(-42), + }, + { + name: "uint16", + typ: val.Type{Enc: val.Uint16Enc}, + value: uint16(42), + }, + { + name: "int32", + typ: val.Type{Enc: val.Int32Enc}, + value: int32(-42), + }, + { + name: "uint32", + typ: val.Type{Enc: val.Uint32Enc}, + value: uint32(42), + }, + { + name: "int64", + typ: val.Type{Enc: val.Int64Enc}, + value: int64(-42), + }, + { + name: "uint64", + typ: val.Type{Enc: val.Uint64Enc}, + value: uint64(42), + }, + { + name: "float32", + typ: val.Type{Enc: val.Float32Enc}, + value: float32(math.Pi), + }, + { + name: "float64", + typ: val.Type{Enc: val.Float64Enc}, + value: float64(-math.Pi), + }, + { + name: "string", + typ: val.Type{Enc: val.StringEnc}, + value: "lorem ipsum", + }, + { + name: "bytes", + typ: val.Type{Enc: val.BytesEnc}, + value: []byte("lorem ipsum"), + }, + { + name: "year", + typ: val.Type{Enc: val.YearEnc}, + value: int16(2022), + }, + { + name: "date", + typ: val.Type{Enc: val.DateEnc}, + value: time.Now().UTC(), + }, + { + name: "datetime", + typ: val.Type{Enc: val.DatetimeEnc}, + value: time.Now().UTC(), + }, + { + name: "timestamp", + typ: val.Type{Enc: val.TimestampEnc}, + value: time.Now().UTC(), + }, + { + name: "json", + typ: val.Type{Enc: val.JSONEnc}, + value: mustParseJson(t, `{"a": 1, "b": false}`), + }, + { + name: "point", + typ: val.Type{Enc: val.GeometryEnc}, + value: mustParseGeometryType(t, "POINT(1 2)"), + }, + { + name: "linestring", + typ: val.Type{Enc: val.GeometryEnc}, + value: mustParseGeometryType(t, "LINESTRING(1 2,3 4)"), + }, + { + name: "polygon", + typ: val.Type{Enc: val.GeometryEnc}, + value: mustParseGeometryType(t, "POLYGON((0 0,1 1,1 0,0 0))"), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + testRoundTripProllyFields(t, test) + }) + } +} + +var testPool = pool.NewBuffPool() + +func testRoundTripProllyFields(t *testing.T, test prollyFieldTest) { + desc := val.NewTupleDescriptor(test.typ) + builder := val.NewTupleBuilder(desc) + + err := PutField(builder, 0, test.value) + assert.NoError(t, err) + + tup := builder.Build(testPool) + + v, err := GetField(desc, 0, tup) + assert.NoError(t, err) + assert.Equal(t, test.value, v) +} + +func mustParseGeometryType(t *testing.T, s string) (v interface{}) { + // Determine type, and get data + geomType, data, err := function.ParseWKTHeader(s) + require.NoError(t, err) + + srid, order := uint32(0), false + switch geomType { + case "point": + v, err = function.WKTToPoint(data, srid, order) + case "linestring": + v, err = function.WKTToLine(data, srid, order) + case "polygon": + v, err = function.WKTToPoly(data, srid, order) + default: + panic("unknown geometry type") + } + require.NoError(t, err) + return +} + +func mustParseJson(t *testing.T, s string) sql.JSONDocument { + var v interface{} + err := json.Unmarshal([]byte(s), &v) + require.NoError(t, err) + return sql.JSONDocument{Val: v} +} From 03536159c5ecee2d5b9101ed32bba909149331ba Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Mon, 14 Feb 2022 12:53:52 -0800 Subject: [PATCH 091/105] better onHeapTableIndex --- go/store/nbs/table_index.go | 732 ++++++++++++++++++++++++++++++ go/store/nbs/table_index_test.go | 90 ++++ go/store/nbs/table_persister.go | 9 +- go/store/nbs/table_reader.go | 416 ----------------- go/store/nbs/table_reader_test.go | 83 ---- go/store/nbs/util.go | 2 +- 6 files changed, 828 insertions(+), 504 deletions(-) create mode 100644 go/store/nbs/table_index.go create mode 100644 go/store/nbs/table_index_test.go diff --git a/go/store/nbs/table_index.go b/go/store/nbs/table_index.go new file mode 100644 index 0000000000..e4f4821dc3 --- /dev/null +++ b/go/store/nbs/table_index.go @@ -0,0 +1,732 @@ +package nbs + +import ( + "bytes" + "encoding/binary" + "io" + "os" + "sort" + "sync/atomic" + + "github.com/dolthub/dolt/go/libraries/utils/iohelp" + "github.com/dolthub/mmap-go" +) + +type tableIndex interface { + // ChunkCount returns the total number of chunks in the indexed file. + ChunkCount() uint32 + // EntrySuffixMatches returns true if the entry at index |idx| matches + // the suffix of the address |h|. Used by |Lookup| after finding + // matching indexes based on |Prefixes|. + EntrySuffixMatches(idx uint32, h *addr) (bool, error) + // IndexEntry returns the |indexEntry| at |idx|. Optionally puts the + // full address of that entry in |a| if |a| is not |nil|. + IndexEntry(idx uint32, a *addr) (indexEntry, error) + // Lookup returns an |indexEntry| for the chunk corresponding to the + // provided address |h|. Second returns is |true| if an entry exists + // and |false| otherwise. + Lookup(h *addr) (indexEntry, bool, error) + // Ordinals returns a slice of indexes which maps the |i|th chunk in + // the indexed file to its corresponding entry in index. The |i|th + // entry in the result is the |i|th chunk in the indexed file, and its + // corresponding value in the slice is the index entry that maps to it. + Ordinals() ([]uint32, error) + // Prefixes returns the sorted slice of |uint64| |addr| prefixes; each + // entry corresponds to an indexed chunk address. + Prefixes() ([]uint64, error) + // TableFileSize returns the total size of the indexed table file, in bytes. + TableFileSize() uint64 + // TotalUncompressedData returns the total uncompressed data size of + // the table file. Used for informational statistics only. + TotalUncompressedData() uint64 + + // Close releases any resources used by this tableIndex. + Close() error + + // Clone returns a |tableIndex| with the same contents which can be + // |Close|d independently. + Clone() (tableIndex, error) +} + +//var _ tableIndex = mmapTableIndex{} +// +//// parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index +//// and footer, though it may contain an unspecified number of bytes before that data. +//// |tableIndex| doesn't keep alive any references to |buff|. +//func parseTableIndex(buff []byte) (onHeapTableIndex, error) { +// return ReadTableIndex(bytes.NewReader(buff)) +//} +// +//func ReadTableIndex(rd io.ReadSeeker) (onHeapTableIndex, error) { +// footerSize := int64(magicNumberSize + uint64Size + uint32Size) +// _, err := rd.Seek(-footerSize, io.SeekEnd) +// +// if err != nil { +// return onHeapTableIndex{}, err +// } +// +// footer, err := iohelp.ReadNBytes(rd, int(footerSize)) +// +// if err != nil { +// return onHeapTableIndex{}, err +// } +// +// if string(footer[uint32Size+uint64Size:]) != magicNumber { +// return onHeapTableIndex{}, ErrInvalidTableFile +// } +// +// chunkCount := binary.BigEndian.Uint32(footer) +// totalUncompressedData := binary.BigEndian.Uint64(footer[uint32Size:]) +// +// // index +// suffixesSize := int64(chunkCount) * addrSuffixSize +// lengthsSize := int64(chunkCount) * lengthSize +// tuplesSize := int64(chunkCount) * prefixTupleSize +// indexSize := suffixesSize + lengthsSize + tuplesSize +// +// _, err = rd.Seek(-(indexSize + footerSize), io.SeekEnd) +// if err != nil { +// return onHeapTableIndex{}, ErrInvalidTableFile +// } +// +// prefixes, ordinals, err := streamComputePrefixes(chunkCount, rd) +// if err != nil { +// return onHeapTableIndex{}, ErrInvalidTableFile +// } +// lengths, offsets, err := streamComputeOffsets(chunkCount, rd) +// if err != nil { +// return onHeapTableIndex{}, ErrInvalidTableFile +// } +// suffixes, err := iohelp.ReadNBytes(rd, int(suffixesSize)) +// if err != nil { +// return onHeapTableIndex{}, ErrInvalidTableFile +// } +// +// return onHeapTableIndex{ +// chunkCount, totalUncompressedData, +// prefixes, offsets, +// lengths, ordinals, +// suffixes, +// }, nil +//} +// +//type onHeapTableIndex struct { +// chunkCount uint32 +// totalUncompressedData uint64 +// prefixes, offsets []uint64 +// lengths, ordinals []uint32 +// suffixes []byte +//} +// +//func (ti onHeapTableIndex) ChunkCount() uint32 { +// return ti.chunkCount +//} +// +//// EntrySuffixMatches returns true IFF the suffix for prefix entry |idx| +//// matches the address |a|. +//func (ti onHeapTableIndex) EntrySuffixMatches(idx uint32, h *addr) bool { +// li := uint64(ti.ordinals[idx]) * addrSuffixSize +// return bytes.Equal(h[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize]) +//} +// +//func (ti onHeapTableIndex) IndexEntry(idx uint32, a *addr) indexEntry { +// ord := ti.ordinals[idx] +// if a != nil { +// binary.BigEndian.PutUint64(a[:], ti.prefixes[idx]) +// li := uint64(ord) * addrSuffixSize +// copy(a[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize]) +// } +// return indexResult{ti.offsets[ord], ti.lengths[ord]} +//} +// +//func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool) { +// ord := ti.lookupOrdinal(h) +// if ord == ti.chunkCount { +// return indexResult{}, false +// } +// return indexResult{ti.offsets[ord], ti.lengths[ord]}, true +//} +// +//func (ti onHeapTableIndex) Ordinals() []uint32 { +// return ti.ordinals +//} +// +//func (ti onHeapTableIndex) Prefixes() []uint64 { +// return ti.prefixes +//} +// +//// TableFileSize returns the size of the table file that this index references. +//// This assumes that the index follows immediately after the last chunk in the +//// file and that the last chunk in the file is in the index. +//func (ti onHeapTableIndex) TableFileSize() uint64 { +// if ti.chunkCount == 0 { +// return footerSize +// } +// len, offset := ti.offsets[ti.chunkCount-1], uint64(ti.lengths[ti.chunkCount-1]) +// return offset + len + indexSize(ti.chunkCount) + footerSize +//} +// +//func (ti onHeapTableIndex) TotalUncompressedData() uint64 { +// return ti.totalUncompressedData +//} +// +//func (ti onHeapTableIndex) Close() error { +// return nil +//} +// +//func (ti onHeapTableIndex) Clone() tableIndex { +// return ti +//} +// +//func (ti onHeapTableIndex) prefixIdxToOrdinal(idx uint32) uint32 { +// return ti.ordinals[idx] +//} +// +//// prefixIdx returns the first position in |tr.prefixes| whose value == +//// |prefix|. Returns |tr.chunkCount| if absent +//func (ti onHeapTableIndex) prefixIdx(prefix uint64) (idx uint32) { +// // NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in +// // an extremely tight loop and inlining the code was a significant perf improvement. +// idx, j := 0, ti.chunkCount +// for idx < j { +// h := idx + (j-idx)/2 // avoid overflow when computing h +// // i ≤ h < j +// if ti.prefixes[h] < prefix { +// idx = h + 1 // preserves f(i-1) == false +// } else { +// j = h // preserves f(j) == true +// } +// } +// +// return +//} +// +//// lookupOrdinal returns the ordinal of |h| if present. Returns |ti.chunkCount| +//// if absent. +//func (ti onHeapTableIndex) lookupOrdinal(h *addr) uint32 { +// prefix := h.Prefix() +// +// for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixes[idx] == prefix; idx++ { +// if ti.EntrySuffixMatches(idx, h) { +// return ti.ordinals[idx] +// } +// } +// +// return ti.chunkCount +//} +// +//func computeOffsets(count uint32, buff []byte) (lengths []uint32, offsets []uint64) { +// lengths = make([]uint32, count) +// offsets = make([]uint64, count) +// +// lengths[0] = binary.BigEndian.Uint32(buff) +// +// for i := uint64(1); i < uint64(count); i++ { +// lengths[i] = binary.BigEndian.Uint32(buff[i*lengthSize:]) +// offsets[i] = offsets[i-1] + uint64(lengths[i-1]) +// } +// return +//} +// +//func streamComputeOffsets(count uint32, rd io.Reader) (lengths []uint32, offsets []uint64, err error) { +// lengths = make([]uint32, count) +// offsets = make([]uint64, count) +// buff := make([]byte, lengthSize) +// +// n, err := rd.Read(buff) +// if err != nil { +// return nil, nil, err +// } +// if n != lengthSize { +// return nil, nil, ErrNotEnoughBytes +// } +// lengths[0] = binary.BigEndian.Uint32(buff) +// +// for i := uint64(1); i < uint64(count); i++ { +// n, err := rd.Read(buff) +// if err != nil { +// return nil, nil, err +// } +// if n != lengthSize { +// return nil, nil, ErrNotEnoughBytes +// } +// lengths[i] = binary.BigEndian.Uint32(buff) +// offsets[i] = offsets[i-1] + uint64(lengths[i-1]) +// } +// +// return +//} +// +//func computePrefixes(count uint32, buff []byte) (prefixes []uint64, ordinals []uint32) { +// prefixes = make([]uint64, count) +// ordinals = make([]uint32, count) +// +// for i := uint64(0); i < uint64(count); i++ { +// idx := i * prefixTupleSize +// prefixes[i] = binary.BigEndian.Uint64(buff[idx:]) +// ordinals[i] = binary.BigEndian.Uint32(buff[idx+addrPrefixSize:]) +// } +// return +//} +// +//func streamComputePrefixes(count uint32, rd io.Reader) (prefixes []uint64, ordinals []uint32, err error) { +// prefixes = make([]uint64, count) +// ordinals = make([]uint32, count) +// buff := make([]byte, prefixTupleSize) +// +// for i := uint64(0); i < uint64(count); i++ { +// n, err := rd.Read(buff) +// if err != nil { +// return nil, nil, err +// } +// if n != prefixTupleSize { +// return nil, nil, ErrNotEnoughBytes +// } +// prefixes[i] = binary.BigEndian.Uint64(buff) +// ordinals[i] = binary.BigEndian.Uint32(buff[addrPrefixSize:]) +// } +// +// return +//} + +func ReadTableFooter(rd io.ReadSeeker) (chunkCount uint32, totalUncompressedData uint64, err error) { + footerSize := int64(magicNumberSize + uint64Size + uint32Size) + _, err = rd.Seek(-footerSize, io.SeekEnd) + + if err != nil { + return 0, 0, err + } + + footer, err := iohelp.ReadNBytes(rd, int(footerSize)) + + if err != nil { + return 0, 0, err + } + + if string(footer[uint32Size+uint64Size:]) != magicNumber { + return 0, 0, ErrInvalidTableFile + } + + chunkCount = binary.BigEndian.Uint32(footer) + totalUncompressedData = binary.BigEndian.Uint64(footer[uint32Size:]) + + return +} + +// parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index +// and footer, though it may contain an unspecified number of bytes before that data. +// |tableIndex| doesn't keep alive any references to |buff|. +// Does not allocate new memory except for offsets, computes on buff in place. +func parseTableIndex(buff []byte) (onHeapTableIndex, error) { + chunkCount, totalUncompressedData, err := ReadTableFooter(bytes.NewReader(buff)) + if err != nil { + return onHeapTableIndex{}, err + } + + iS := indexSize(chunkCount) + buff = buff[:len(buff)-footerSize] + // Trim away any extra bytes + buff = buff[uint64(len(buff))-iS:] + + return NewOnHeapTableIndex(buff, chunkCount, totalUncompressedData) +} + +// ReadTableIndex loads an index into memory from an io.ReadSeeker +// Caution: Allocates new memory for entire index +func ReadTableIndex(rd io.ReadSeeker) (onHeapTableIndex, error) { + chunkCount, totalUncompressedData, err := ReadTableFooter(rd) + if err != nil { + return onHeapTableIndex{}, err + } + iS := int64(indexSize(chunkCount)) + _, err = rd.Seek(-(iS + footerSize), io.SeekEnd) + if err != nil { + return onHeapTableIndex{}, ErrInvalidTableFile + } + buff := make([]byte, iS) + _, err = io.ReadFull(rd, buff) + if err != nil { + return onHeapTableIndex{}, err + } + + return NewOnHeapTableIndex(buff, chunkCount, totalUncompressedData) +} + +type onHeapTableIndex struct { + tableFileSize uint64 + // Tuple bytes + tupleB []byte + // Offset bytes + offsetB []byte + // Suffix bytes + suffixB []byte + chunkCount uint32 + totalUncompressedData uint64 +} + +var _ tableIndex = &onHeapTableIndex{} + +// NewOnHeapTableIndex creates a table index given a buffer of just the table index (no footer) +func NewOnHeapTableIndex(b []byte, chunkCount uint32, totalUncompressedData uint64) (onHeapTableIndex, error) { + tuples := b[:prefixTupleSize*chunkCount] + lengths := b[prefixTupleSize*chunkCount : prefixTupleSize*chunkCount+lengthSize*chunkCount] + suffixes := b[prefixTupleSize*chunkCount+lengthSize*chunkCount:] + + lR := bytes.NewReader(lengths) + offsets := make([]byte, chunkCount*offsetSize) + _, err := io.ReadFull(NewOffsetsReader(lR), offsets) + if err != nil { + return onHeapTableIndex{}, err + } + + return onHeapTableIndex{ + tupleB: tuples, + offsetB: offsets, + suffixB: suffixes, + chunkCount: chunkCount, + totalUncompressedData: totalUncompressedData, + }, nil +} + +func (ti onHeapTableIndex) ChunkCount() uint32 { + return ti.chunkCount +} + +func (ti onHeapTableIndex) EntrySuffixMatches(idx uint32, h *addr) (bool, error) { + ord := ti.ordinalAt(idx) + o := ord * addrSuffixSize + b := ti.suffixB[o : o+addrSuffixSize] + return bytes.Equal(h[addrPrefixSize:], b), nil +} + +func (ti onHeapTableIndex) IndexEntry(idx uint32, a *addr) (entry indexEntry, err error) { + prefix, ord := ti.tupleAt(idx) + + if a != nil { + binary.BigEndian.PutUint64(a[:], prefix) + + o := int64(addrSuffixSize * ord) + b := ti.suffixB[o : o+addrSuffixSize] + copy(a[addrPrefixSize:], b) + } + + return ti.getIndexEntry(ord), nil +} + +func (ti onHeapTableIndex) getIndexEntry(ord uint32) indexEntry { + var prevOff uint64 + if ord == 0 { + prevOff = 0 + } else { + prevOff = ti.offsetAt(ord - 1) + } + ordOff := ti.offsetAt(ord) + length := uint32(ordOff - prevOff) + return indexResult{ + o: prevOff, + l: length, + } +} + +func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool, error) { + ord, err := ti.lookupOrdinal(h) + if err != nil { + return indexResult{}, false, err + } + if ord == ti.chunkCount { + return indexResult{}, false, nil + } + return ti.getIndexEntry(ord), true, nil +} + +// lookupOrdinal returns the ordinal of |h| if present. Returns |ti.chunkCount| +// if absent. +func (ti onHeapTableIndex) lookupOrdinal(h *addr) (uint32, error) { + prefix := h.Prefix() + + for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixAt(idx) == prefix; idx++ { + m, err := ti.EntrySuffixMatches(idx, h) + if err != nil { + return ti.chunkCount, err + } + if m { + return ti.ordinalAt(idx), nil + } + } + + return ti.chunkCount, nil +} + +// prefixIdx returns the first position in |tr.prefixes| whose value == +// |prefix|. Returns |tr.chunkCount| if absent +func (ti onHeapTableIndex) prefixIdx(prefix uint64) (idx uint32) { + // NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in + // an extremely tight loop and inlining the code was a significant perf improvement. + idx, j := 0, ti.chunkCount + for idx < j { + h := idx + (j-idx)/2 // avoid overflow when computing h + // i ≤ h < j + if ti.prefixAt(h) < prefix { + idx = h + 1 // preserves f(i-1) == false + } else { + j = h // preserves f(j) == true + } + } + + return +} + +func (ti onHeapTableIndex) tupleAt(idx uint32) (prefix uint64, ord uint32) { + off := int64(prefixTupleSize * idx) + b := ti.tupleB[off : off+prefixTupleSize] + + prefix = binary.BigEndian.Uint64(b[:]) + ord = binary.BigEndian.Uint32(b[addrPrefixSize:]) + return prefix, ord +} + +func (ti onHeapTableIndex) prefixAt(idx uint32) uint64 { + off := int64(prefixTupleSize * idx) + b := ti.tupleB[off : off+addrPrefixSize] + return binary.BigEndian.Uint64(b) +} + +func (ti onHeapTableIndex) ordinalAt(idx uint32) uint32 { + off := int64(prefixTupleSize*idx) + addrPrefixSize + b := ti.tupleB[off : off+ordinalSize] + return binary.BigEndian.Uint32(b) +} + +func (ti onHeapTableIndex) offsetAt(ord uint32) uint64 { + off := int64(offsetSize * ord) + b := ti.offsetB[off : off+offsetSize] + return binary.BigEndian.Uint64(b) +} + +func (ti onHeapTableIndex) Ordinals() ([]uint32, error) { + o := make([]uint32, ti.chunkCount) + for i, off := uint32(0), 0; i < ti.chunkCount; i, off = i+1, off+prefixTupleSize { + b := ti.tupleB[off+addrPrefixSize : off+prefixTupleSize] + o[i] = binary.BigEndian.Uint32(b) + } + return o, nil +} + +func (ti onHeapTableIndex) Prefixes() ([]uint64, error) { + p := make([]uint64, ti.chunkCount) + for i, off := uint32(0), 0; i < ti.chunkCount; i, off = i+1, off+prefixTupleSize { + b := ti.tupleB[off : off+addrPrefixSize] + p[i] = binary.BigEndian.Uint64(b) + } + return p, nil +} + +// TableFileSize returns the size of the table file that this index references. +// This assumes that the index follows immediately after the last chunk in the +// file and that the last chunk in the file is in the index. +func (ti onHeapTableIndex) TableFileSize() uint64 { + if ti.chunkCount == 0 { + return footerSize + } + entry := ti.getIndexEntry(ti.chunkCount - 1) + offset, len := entry.Offset(), uint64(entry.Length()) + return offset + len + indexSize(ti.chunkCount) + footerSize +} + +func (ti onHeapTableIndex) TotalUncompressedData() uint64 { + return ti.totalUncompressedData +} + +func (ti onHeapTableIndex) Close() error { + return nil +} + +func (ti onHeapTableIndex) Clone() (tableIndex, error) { + return ti, nil +} + +// mmap table index + +type mmapIndexEntry []byte + +const mmapIndexEntryOffsetStart = addrSuffixSize +const mmapIndexEntryLengthStart = addrSuffixSize + uint64Size + +func (e mmapIndexEntry) suffix() []byte { + return e[:addrSuffixSize] +} + +func (e mmapIndexEntry) Offset() uint64 { + return binary.BigEndian.Uint64(e[mmapIndexEntryOffsetStart:]) +} + +func (e mmapIndexEntry) Length() uint32 { + return binary.BigEndian.Uint32(e[mmapIndexEntryLengthStart:]) +} + +func mmapOffheapSize(chunks int) int { + pageSize := 4096 + esz := addrSuffixSize + uint64Size + lengthSize + min := esz * chunks + if min%pageSize == 0 { + return min + } else { + return (min/pageSize + 1) * pageSize + } +} + +// An mmapIndexEntry is an addrSuffix, a BigEndian uint64 for the offset and a +// BigEnding uint32 for the chunk size. +const mmapIndexEntrySize = addrSuffixSize + uint64Size + lengthSize + +type mmapOrdinal struct { + idx int + offset uint64 +} +type mmapOrdinalSlice []mmapOrdinal + +func (s mmapOrdinalSlice) Len() int { return len(s) } +func (s mmapOrdinalSlice) Less(i, j int) bool { return s[i].offset < s[j].offset } +func (s mmapOrdinalSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +type mmapTableIndex struct { + chunkCount uint32 + totalUncompressedData uint64 + fileSz uint64 + prefixes []uint64 + data mmap.MMap + refCnt *int32 +} + +func newMmapTableIndex(ti onHeapTableIndex, f *os.File) (mmapTableIndex, error) { + flags := 0 + if f == nil { + flags = mmap.ANON + } + arr, err := mmap.MapRegion(f, mmapOffheapSize(int(ti.chunkCount)), mmap.RDWR, flags, 0) + if err != nil { + return mmapTableIndex{}, err + } + var a addr + for i := uint32(0); i < ti.chunkCount; i++ { + idx := i * mmapIndexEntrySize + si := addrSuffixSize * ti.ordinalAt(i) + copy(arr[idx:], ti.suffixB[si:si+addrSuffixSize]) + + e, err := ti.IndexEntry(i, &a) + if err != nil { + return mmapTableIndex{}, err + } + binary.BigEndian.PutUint64(arr[idx+mmapIndexEntryOffsetStart:], e.Offset()) + binary.BigEndian.PutUint32(arr[idx+mmapIndexEntryLengthStart:], e.Length()) + } + + refCnt := new(int32) + *refCnt = 1 + p, err := ti.Prefixes() + if err != nil { + return mmapTableIndex{}, err + } + return mmapTableIndex{ + ti.chunkCount, + ti.totalUncompressedData, + ti.TableFileSize(), + p, + arr, + refCnt, + }, nil +} + +func (i mmapTableIndex) ChunkCount() uint32 { + return i.chunkCount +} + +func (i mmapTableIndex) EntrySuffixMatches(idx uint32, h *addr) (bool, error) { + mi := idx * mmapIndexEntrySize + e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize]) + return bytes.Equal(e.suffix(), h[addrPrefixSize:]), nil +} + +func (i mmapTableIndex) IndexEntry(idx uint32, a *addr) (indexEntry, error) { + mi := idx * mmapIndexEntrySize + e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize]) + if a != nil { + binary.BigEndian.PutUint64(a[:], i.prefixes[idx]) + copy(a[addrPrefixSize:], e.suffix()) + } + return e, nil +} + +func (i mmapTableIndex) Lookup(h *addr) (indexEntry, bool, error) { + prefix := binary.BigEndian.Uint64(h[:]) + for idx := i.prefixIdx(prefix); idx < i.chunkCount && i.prefixes[idx] == prefix; idx++ { + mi := idx * mmapIndexEntrySize + e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize]) + if bytes.Equal(e.suffix(), h[addrPrefixSize:]) { + return e, true, nil + } + } + return mmapIndexEntry{}, false, nil +} + +func (i mmapTableIndex) Ordinals() ([]uint32, error) { + s := mmapOrdinalSlice(make([]mmapOrdinal, i.chunkCount)) + for idx := 0; uint32(idx) < i.chunkCount; idx++ { + mi := idx * mmapIndexEntrySize + e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize]) + s[idx] = mmapOrdinal{idx, e.Offset()} + } + sort.Sort(s) + res := make([]uint32, i.chunkCount) + for j, r := range s { + res[r.idx] = uint32(j) + } + return res, nil +} + +func (i mmapTableIndex) Prefixes() ([]uint64, error) { + return i.prefixes, nil +} + +func (i mmapTableIndex) TableFileSize() uint64 { + return i.fileSz +} + +func (i mmapTableIndex) TotalUncompressedData() uint64 { + return i.totalUncompressedData +} + +func (i mmapTableIndex) Close() error { + cnt := atomic.AddInt32(i.refCnt, -1) + if cnt == 0 { + return i.data.Unmap() + } + if cnt < 0 { + panic("Close() called and reduced ref count to < 0.") + } + return nil +} + +func (i mmapTableIndex) Clone() (tableIndex, error) { + cnt := atomic.AddInt32(i.refCnt, 1) + if cnt == 1 { + panic("Clone() called after last Close(). This index is no longer valid.") + } + return i, nil +} + +func (i mmapTableIndex) prefixIdx(prefix uint64) (idx uint32) { + // NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in + // an extremely tight loop and inlining the code was a significant perf improvement. + idx, j := 0, i.chunkCount + for idx < j { + h := idx + (j-idx)/2 // avoid overflow when computing h + // i ≤ h < j + if i.prefixes[h] < prefix { + idx = h + 1 // preserves f(i-1) == false + } else { + j = h // preserves f(j) == true + } + } + return +} diff --git a/go/store/nbs/table_index_test.go b/go/store/nbs/table_index_test.go new file mode 100644 index 0000000000..a185f4c118 --- /dev/null +++ b/go/store/nbs/table_index_test.go @@ -0,0 +1,90 @@ +package nbs + +import ( + "io" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseTableIndex(t *testing.T) { + f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx") + require.NoError(t, err) + defer f.Close() + bs, err := io.ReadAll(f) + require.NoError(t, err) + idx, err := parseTableIndex(bs) + require.NoError(t, err) + defer idx.Close() + assert.Equal(t, uint32(596), idx.ChunkCount()) + seen := make(map[addr]bool) + for i := uint32(0); i < idx.ChunkCount(); i++ { + var onheapaddr addr + e, err := idx.IndexEntry(i, &onheapaddr) + require.NoError(t, err) + if _, ok := seen[onheapaddr]; !ok { + seen[onheapaddr] = true + lookupe, ok, err := idx.Lookup(&onheapaddr) + require.NoError(t, err) + assert.True(t, ok) + assert.Equal(t, e.Offset(), lookupe.Offset(), "%v does not match %v for address %v", e, lookupe, onheapaddr) + assert.Equal(t, e.Length(), lookupe.Length()) + } + } +} + +func TestMMapIndex(t *testing.T) { + f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx") + require.NoError(t, err) + defer f.Close() + bs, err := io.ReadAll(f) + require.NoError(t, err) + idx, err := parseTableIndex(bs) + require.NoError(t, err) + defer idx.Close() + mmidx, err := newMmapTableIndex(idx, nil) + require.NoError(t, err) + defer mmidx.Close() + assert.Equal(t, idx.ChunkCount(), mmidx.ChunkCount()) + seen := make(map[addr]bool) + for i := uint32(0); i < idx.ChunkCount(); i++ { + var onheapaddr addr + onheapentry, err := idx.IndexEntry(i, &onheapaddr) + require.NoError(t, err) + var mmaddr addr + mmentry, err := mmidx.IndexEntry(i, &mmaddr) + require.NoError(t, err) + assert.Equal(t, onheapaddr, mmaddr) + assert.Equal(t, onheapentry.Offset(), mmentry.Offset()) + assert.Equal(t, onheapentry.Length(), mmentry.Length()) + if _, ok := seen[onheapaddr]; !ok { + seen[onheapaddr] = true + mmentry, found, err := mmidx.Lookup(&onheapaddr) + require.NoError(t, err) + assert.True(t, found) + assert.Equal(t, onheapentry.Offset(), mmentry.Offset(), "%v does not match %v for address %v", onheapentry, mmentry, onheapaddr) + assert.Equal(t, onheapentry.Length(), mmentry.Length()) + } + wrongaddr := onheapaddr + if wrongaddr[19] != 0 { + wrongaddr[19] = 0 + _, found, err := mmidx.Lookup(&wrongaddr) + require.NoError(t, err) + assert.False(t, found) + } + } + o1, err := idx.Ordinals() + require.NoError(t, err) + o2, err := mmidx.Ordinals() + require.NoError(t, err) + assert.Equal(t, o1, o2) + p1, err := idx.Prefixes() + require.NoError(t, err) + p2, err := mmidx.Prefixes() + require.NoError(t, err) + assert.Equal(t, p1, p2) + assert.Equal(t, idx.TableFileSize(), mmidx.TableFileSize()) + assert.Equal(t, idx.TotalUncompressedData(), mmidx.TotalUncompressedData()) +} diff --git a/go/store/nbs/table_persister.go b/go/store/nbs/table_persister.go index 0f2d611ac4..a0fd054994 100644 --- a/go/store/nbs/table_persister.go +++ b/go/store/nbs/table_persister.go @@ -283,15 +283,16 @@ func planConjoin(sources chunkSources, stats *Stats) (plan compactionPlan, err e if onHeap, ok := index.(onHeapTableIndex); ok { // TODO: copy the lengths and suffixes as a byte-copy from src BUG #3438 // Bring over the lengths block, in order - for _, length := range onHeap.lengths { - binary.BigEndian.PutUint32(plan.mergedIndex[lengthsPos:], length) + for ord := uint32(0); ord < onHeap.chunkCount; ord++ { + e := onHeap.getIndexEntry(ord) + binary.BigEndian.PutUint32(plan.mergedIndex[lengthsPos:], e.Length()) lengthsPos += lengthSize } // Bring over the suffixes block, in order - n := copy(plan.mergedIndex[suffixesPos:], onHeap.suffixes) + n := copy(plan.mergedIndex[suffixesPos:], onHeap.suffixB) - if n != len(onHeap.suffixes) { + if n != len(onHeap.suffixB) { return compactionPlan{}, errors.New("failed to copy all data") } diff --git a/go/store/nbs/table_reader.go b/go/store/nbs/table_reader.go index 32f0e07071..5060f9c999 100644 --- a/go/store/nbs/table_reader.go +++ b/go/store/nbs/table_reader.go @@ -22,20 +22,16 @@ package nbs import ( - "bytes" "context" "encoding/binary" "errors" "io" - "os" "sort" "sync/atomic" - "github.com/dolthub/mmap-go" "github.com/golang/snappy" "golang.org/x/sync/errgroup" - "github.com/dolthub/dolt/go/libraries/utils/iohelp" "github.com/dolthub/dolt/go/store/chunks" "github.com/dolthub/dolt/go/store/hash" ) @@ -107,14 +103,6 @@ func init() { // ErrInvalidTableFile is an error returned when a table file is corrupt or invalid. var ErrInvalidTableFile = errors.New("invalid or corrupt table file") -type onHeapTableIndex struct { - chunkCount uint32 - totalUncompressedData uint64 - prefixes, offsets []uint64 - lengths, ordinals []uint32 - suffixes []byte -} - type indexEntry interface { Offset() uint64 Length() uint32 @@ -133,185 +121,6 @@ func (ir indexResult) Length() uint32 { return ir.l } -// An mmapIndexEntry is an addrSuffix, a BigEndian uint64 for the offset and a -// BigEnding uint32 for the chunk size. -const mmapIndexEntrySize = addrSuffixSize + uint64Size + lengthSize - -type mmapOrdinalSlice []mmapOrdinal - -func (s mmapOrdinalSlice) Len() int { return len(s) } -func (s mmapOrdinalSlice) Less(i, j int) bool { return s[i].offset < s[j].offset } -func (s mmapOrdinalSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } - -func (i mmapTableIndex) Ordinals() ([]uint32, error) { - s := mmapOrdinalSlice(make([]mmapOrdinal, i.chunkCount)) - for idx := 0; uint32(idx) < i.chunkCount; idx++ { - mi := idx * mmapIndexEntrySize - e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize]) - s[idx] = mmapOrdinal{idx, e.Offset()} - } - sort.Sort(s) - res := make([]uint32, i.chunkCount) - for j, r := range s { - res[r.idx] = uint32(j) - } - return res, nil -} - -type mmapTableIndex struct { - chunkCount uint32 - totalUncompressedData uint64 - fileSz uint64 - prefixes []uint64 - data mmap.MMap - refCnt *int32 -} - -func (i mmapTableIndex) Prefixes() ([]uint64, error) { - return i.prefixes, nil -} - -type mmapOrdinal struct { - idx int - offset uint64 -} - -func (i mmapTableIndex) TableFileSize() uint64 { - return i.fileSz -} - -func (i mmapTableIndex) ChunkCount() uint32 { - return i.chunkCount -} - -func (i mmapTableIndex) TotalUncompressedData() uint64 { - return i.totalUncompressedData -} - -func (i mmapTableIndex) Close() error { - cnt := atomic.AddInt32(i.refCnt, -1) - if cnt == 0 { - return i.data.Unmap() - } - if cnt < 0 { - panic("Close() called and reduced ref count to < 0.") - } - return nil -} - -func (i mmapTableIndex) Clone() (tableIndex, error) { - cnt := atomic.AddInt32(i.refCnt, 1) - if cnt == 1 { - panic("Clone() called after last Close(). This index is no longer valid.") - } - return i, nil -} - -func (i mmapTableIndex) prefixIdx(prefix uint64) (idx uint32) { - // NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in - // an extremely tight loop and inlining the code was a significant perf improvement. - idx, j := 0, i.chunkCount - for idx < j { - h := idx + (j-idx)/2 // avoid overflow when computing h - // i ≤ h < j - if i.prefixes[h] < prefix { - idx = h + 1 // preserves f(i-1) == false - } else { - j = h // preserves f(j) == true - } - } - return -} - -func (i mmapTableIndex) Lookup(h *addr) (indexEntry, bool, error) { - prefix := binary.BigEndian.Uint64(h[:]) - for idx := i.prefixIdx(prefix); idx < i.chunkCount && i.prefixes[idx] == prefix; idx++ { - mi := idx * mmapIndexEntrySize - e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize]) - if bytes.Equal(e.suffix(), h[addrPrefixSize:]) { - return e, true, nil - } - } - return mmapIndexEntry{}, false, nil -} - -func (i mmapTableIndex) EntrySuffixMatches(idx uint32, h *addr) (bool, error) { - mi := idx * mmapIndexEntrySize - e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize]) - return bytes.Equal(e.suffix(), h[addrPrefixSize:]), nil -} - -func (i mmapTableIndex) IndexEntry(idx uint32, a *addr) (indexEntry, error) { - mi := idx * mmapIndexEntrySize - e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize]) - if a != nil { - binary.BigEndian.PutUint64(a[:], i.prefixes[idx]) - copy(a[addrPrefixSize:], e.suffix()) - } - return e, nil -} - -type mmapIndexEntry []byte - -const mmapIndexEntryOffsetStart = addrSuffixSize -const mmapIndexEntryLengthStart = addrSuffixSize + uint64Size - -func (e mmapIndexEntry) suffix() []byte { - return e[:addrSuffixSize] -} - -func (e mmapIndexEntry) Offset() uint64 { - return binary.BigEndian.Uint64(e[mmapIndexEntryOffsetStart:]) -} - -func (e mmapIndexEntry) Length() uint32 { - return binary.BigEndian.Uint32(e[mmapIndexEntryLengthStart:]) -} - -func mmapOffheapSize(chunks int) int { - pageSize := 4096 - esz := addrSuffixSize + uint64Size + lengthSize - min := esz * chunks - if min%pageSize == 0 { - return min - } else { - return (min/pageSize + 1) * pageSize - } -} - -func newMmapTableIndex(ti onHeapTableIndex, f *os.File) (mmapTableIndex, error) { - flags := 0 - if f == nil { - flags = mmap.ANON - } - arr, err := mmap.MapRegion(f, mmapOffheapSize(len(ti.ordinals)), mmap.RDWR, flags, 0) - if err != nil { - return mmapTableIndex{}, err - } - for i := range ti.ordinals { - idx := i * mmapIndexEntrySize - si := addrSuffixSize * ti.ordinals[i] - copy(arr[idx:], ti.suffixes[si:si+addrSuffixSize]) - binary.BigEndian.PutUint64(arr[idx+mmapIndexEntryOffsetStart:], ti.offsets[ti.ordinals[i]]) - binary.BigEndian.PutUint32(arr[idx+mmapIndexEntryLengthStart:], ti.lengths[ti.ordinals[i]]) - } - - refCnt := new(int32) - *refCnt = 1 - p, err := ti.Prefixes() - if err != nil { - return mmapTableIndex{}, err - } - return mmapTableIndex{ - ti.chunkCount, - ti.totalUncompressedData, - ti.TableFileSize(), - p, - arr, - refCnt, - }, nil -} - type tableReaderAt interface { ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error) } @@ -330,231 +139,6 @@ type tableReader struct { blockSize uint64 } -type tableIndex interface { - // ChunkCount returns the total number of chunks in the indexed file. - ChunkCount() uint32 - // EntrySuffixMatches returns true if the entry at index |idx| matches - // the suffix of the address |h|. Used by |Lookup| after finding - // matching indexes based on |Prefixes|. - EntrySuffixMatches(idx uint32, h *addr) (bool, error) - // IndexEntry returns the |indexEntry| at |idx|. Optionally puts the - // full address of that entry in |a| if |a| is not |nil|. - IndexEntry(idx uint32, a *addr) (indexEntry, error) - // Lookup returns an |indexEntry| for the chunk corresponding to the - // provided address |h|. Second returns is |true| if an entry exists - // and |false| otherwise. - Lookup(h *addr) (indexEntry, bool, error) - // Ordinals returns a slice of indexes which maps the |i|th chunk in - // the indexed file to its corresponding entry in index. The |i|th - // entry in the result is the |i|th chunk in the indexed file, and its - // corresponding value in the slice is the index entry that maps to it. - Ordinals() ([]uint32, error) - // Prefixes returns the sorted slice of |uint64| |addr| prefixes; each - // entry corresponds to an indexed chunk address. - Prefixes() ([]uint64, error) - // TableFileSize returns the total size of the indexed table file, in bytes. - TableFileSize() uint64 - // TotalUncompressedData returns the total uncompressed data size of - // the table file. Used for informational statistics only. - TotalUncompressedData() uint64 - - // Close releases any resources used by this tableIndex. - Close() error - - // Clone returns a |tableIndex| with the same contents which can be - // |Close|d independently. - Clone() (tableIndex, error) -} - -var _ tableIndex = mmapTableIndex{} - -// parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index -// and footer, though it may contain an unspecified number of bytes before that data. -// |tableIndex| doesn't keep alive any references to |buff|. -func parseTableIndex(buff []byte) (onHeapTableIndex, error) { - return ReadTableIndex(bytes.NewReader(buff)) -} - -func ReadTableFooter(rd io.ReadSeeker) (chunkCount uint32, totalUncompressedData uint64, err error) { - footerSize := int64(magicNumberSize + uint64Size + uint32Size) - _, err = rd.Seek(-footerSize, io.SeekEnd) - - if err != nil { - return 0, 0, err - } - - footer, err := iohelp.ReadNBytes(rd, int(footerSize)) - - if err != nil { - return 0, 0, err - } - - if string(footer[uint32Size+uint64Size:]) != magicNumber { - return 0, 0, ErrInvalidTableFile - } - - chunkCount = binary.BigEndian.Uint32(footer) - totalUncompressedData = binary.BigEndian.Uint64(footer[uint32Size:]) - - return -} - -func ReadTableIndex(rd io.ReadSeeker) (onHeapTableIndex, error) { - footerSize := int64(magicNumberSize + uint64Size + uint32Size) - chunkCount, totalUncompressedData, err := ReadTableFooter(rd) - if err != nil { - return onHeapTableIndex{}, err - } - - suffixesSize := int64(chunkCount) * addrSuffixSize - lengthsSize := int64(chunkCount) * lengthSize - tuplesSize := int64(chunkCount) * prefixTupleSize - indexSize := suffixesSize + lengthsSize + tuplesSize - - _, err = rd.Seek(-(indexSize + footerSize), io.SeekEnd) - if err != nil { - return onHeapTableIndex{}, ErrInvalidTableFile - } - - indexBytes, err := iohelp.ReadNBytes(rd, int(indexSize)) - if err != nil { - return onHeapTableIndex{}, ErrInvalidTableFile - } - - prefixes, ordinals := computePrefixes(chunkCount, indexBytes[:tuplesSize]) - lengths, offsets := computeOffsets(chunkCount, indexBytes[tuplesSize:tuplesSize+lengthsSize]) - suffixes := indexBytes[tuplesSize+lengthsSize:] - - return onHeapTableIndex{ - chunkCount, totalUncompressedData, - prefixes, offsets, - lengths, ordinals, - suffixes, - }, nil -} - -func computeOffsets(count uint32, buff []byte) (lengths []uint32, offsets []uint64) { - lengths = make([]uint32, count) - offsets = make([]uint64, count) - - lengths[0] = binary.BigEndian.Uint32(buff) - - for i := uint64(1); i < uint64(count); i++ { - lengths[i] = binary.BigEndian.Uint32(buff[i*lengthSize:]) - offsets[i] = offsets[i-1] + uint64(lengths[i-1]) - } - return -} - -func computePrefixes(count uint32, buff []byte) (prefixes []uint64, ordinals []uint32) { - prefixes = make([]uint64, count) - ordinals = make([]uint32, count) - - for i := uint64(0); i < uint64(count); i++ { - idx := i * prefixTupleSize - prefixes[i] = binary.BigEndian.Uint64(buff[idx:]) - ordinals[i] = binary.BigEndian.Uint32(buff[idx+addrPrefixSize:]) - } - return -} - -func (ti onHeapTableIndex) prefixIdxToOrdinal(idx uint32) uint32 { - return ti.ordinals[idx] -} - -// TableFileSize returns the size of the table file that this index references. -// This assumes that the index follows immediately after the last chunk in the -// file and that the last chunk in the file is in the index. -func (ti onHeapTableIndex) TableFileSize() uint64 { - if ti.chunkCount == 0 { - return footerSize - } - offset, len := ti.offsets[ti.chunkCount-1], uint64(ti.lengths[ti.chunkCount-1]) - return offset + len + indexSize(ti.chunkCount) + footerSize -} - -// prefixIdx returns the first position in |tr.prefixes| whose value == -// |prefix|. Returns |tr.chunkCount| if absent -func (ti onHeapTableIndex) prefixIdx(prefix uint64) (idx uint32) { - // NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in - // an extremely tight loop and inlining the code was a significant perf improvement. - idx, j := 0, ti.chunkCount - for idx < j { - h := idx + (j-idx)/2 // avoid overflow when computing h - // i ≤ h < j - if ti.prefixes[h] < prefix { - idx = h + 1 // preserves f(i-1) == false - } else { - j = h // preserves f(j) == true - } - } - - return -} - -// EntrySuffixMatches returns true IFF the suffix for prefix entry |idx| -// matches the address |a|. -func (ti onHeapTableIndex) EntrySuffixMatches(idx uint32, h *addr) (bool, error) { - li := uint64(ti.ordinals[idx]) * addrSuffixSize - return bytes.Equal(h[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize]), nil -} - -// lookupOrdinal returns the ordinal of |h| if present. Returns |ti.chunkCount| -// if absent. -func (ti onHeapTableIndex) lookupOrdinal(h *addr) uint32 { - prefix := h.Prefix() - - for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixes[idx] == prefix; idx++ { - if b, _ := ti.EntrySuffixMatches(idx, h); b { - return ti.ordinals[idx] - } - } - - return ti.chunkCount -} - -func (ti onHeapTableIndex) IndexEntry(idx uint32, a *addr) (indexEntry, error) { - ord := ti.ordinals[idx] - if a != nil { - binary.BigEndian.PutUint64(a[:], ti.prefixes[idx]) - li := uint64(ord) * addrSuffixSize - copy(a[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize]) - } - return indexResult{ti.offsets[ord], ti.lengths[ord]}, nil -} - -func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool, error) { - ord := ti.lookupOrdinal(h) - if ord == ti.chunkCount { - return indexResult{}, false, nil - } - return indexResult{ti.offsets[ord], ti.lengths[ord]}, true, nil -} - -func (ti onHeapTableIndex) Prefixes() ([]uint64, error) { - return ti.prefixes, nil -} - -func (ti onHeapTableIndex) Ordinals() ([]uint32, error) { - return ti.ordinals, nil -} - -func (ti onHeapTableIndex) ChunkCount() uint32 { - return ti.chunkCount -} - -func (ti onHeapTableIndex) TotalUncompressedData() uint64 { - return ti.totalUncompressedData -} - -func (ti onHeapTableIndex) Close() error { - return nil -} - -func (ti onHeapTableIndex) Clone() (tableIndex, error) { - return ti, nil -} - // newTableReader parses a valid nbs table byte stream and returns a reader. buff must end with an NBS index // and footer, though it may contain an unspecified number of bytes before that data. r should allow // retrieving any desired range of bytes from the table. diff --git a/go/store/nbs/table_reader_test.go b/go/store/nbs/table_reader_test.go index 91ebfa8e5f..201e56d3b3 100644 --- a/go/store/nbs/table_reader_test.go +++ b/go/store/nbs/table_reader_test.go @@ -15,12 +15,9 @@ package nbs import ( - "io" - "os" "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestCompressedChunkIsEmpty(t *testing.T) { @@ -32,86 +29,6 @@ func TestCompressedChunkIsEmpty(t *testing.T) { } } -func TestParseTableIndex(t *testing.T) { - f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx") - require.NoError(t, err) - defer f.Close() - bs, err := io.ReadAll(f) - require.NoError(t, err) - idx, err := parseTableIndex(bs) - require.NoError(t, err) - defer idx.Close() - assert.Equal(t, uint32(596), idx.ChunkCount()) - seen := make(map[addr]bool) - for i := uint32(0); i < idx.ChunkCount(); i++ { - var onheapaddr addr - e, err := idx.IndexEntry(i, &onheapaddr) - require.NoError(t, err) - if _, ok := seen[onheapaddr]; !ok { - seen[onheapaddr] = true - lookupe, ok, err := idx.Lookup(&onheapaddr) - require.NoError(t, err) - assert.True(t, ok) - assert.Equal(t, e.Offset(), lookupe.Offset(), "%v does not match %v for address %v", e, lookupe, onheapaddr) - assert.Equal(t, e.Length(), lookupe.Length()) - } - } -} - -func TestMMapIndex(t *testing.T) { - f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx") - require.NoError(t, err) - defer f.Close() - bs, err := io.ReadAll(f) - require.NoError(t, err) - idx, err := parseTableIndex(bs) - require.NoError(t, err) - defer idx.Close() - mmidx, err := newMmapTableIndex(idx, nil) - require.NoError(t, err) - defer mmidx.Close() - assert.Equal(t, idx.ChunkCount(), mmidx.ChunkCount()) - seen := make(map[addr]bool) - for i := uint32(0); i < idx.ChunkCount(); i++ { - var onheapaddr addr - onheapentry, err := idx.IndexEntry(i, &onheapaddr) - require.NoError(t, err) - var mmaddr addr - mmentry, err := mmidx.IndexEntry(i, &mmaddr) - require.NoError(t, err) - assert.Equal(t, onheapaddr, mmaddr) - assert.Equal(t, onheapentry.Offset(), mmentry.Offset()) - assert.Equal(t, onheapentry.Length(), mmentry.Length()) - if _, ok := seen[onheapaddr]; !ok { - seen[onheapaddr] = true - mmentry, found, err := mmidx.Lookup(&onheapaddr) - require.NoError(t, err) - assert.True(t, found) - assert.Equal(t, onheapentry.Offset(), mmentry.Offset(), "%v does not match %v for address %v", onheapentry, mmentry, onheapaddr) - assert.Equal(t, onheapentry.Length(), mmentry.Length()) - } - wrongaddr := onheapaddr - if wrongaddr[19] != 0 { - wrongaddr[19] = 0 - _, found, err := mmidx.Lookup(&wrongaddr) - require.NoError(t, err) - assert.False(t, found) - } - } - o1, err := idx.Ordinals() - require.NoError(t, err) - o2, err := mmidx.Ordinals() - require.NoError(t, err) - assert.Equal(t, o1, o2) - p1, err := idx.Prefixes() - require.NoError(t, err) - p2, err := mmidx.Prefixes() - require.NoError(t, err) - assert.Equal(t, p1, p2) - assert.Equal(t, idx.TableFileSize(), mmidx.TableFileSize()) - assert.Equal(t, idx.TotalUncompressedData(), mmidx.TotalUncompressedData()) -} - func TestCanReadAhead(t *testing.T) { type expected struct { end uint64 diff --git a/go/store/nbs/util.go b/go/store/nbs/util.go index f721f9e713..6a7a8a980b 100644 --- a/go/store/nbs/util.go +++ b/go/store/nbs/util.go @@ -80,7 +80,7 @@ func GetTableIndexPrefixes(rd io.ReadSeeker) (prefixes []uint64, err error) { } }() - return idx.prefixes, nil + return idx.Prefixes() } func GuessPrefixOrdinal(prefix uint64, n uint32) int { From abf9119a677ff9678f780eb254e9091ef771b3cb Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Mon, 14 Feb 2022 12:46:27 -0800 Subject: [PATCH 092/105] memory profiling --- go/performance/memprof/membench_test.go | 41 +++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 go/performance/memprof/membench_test.go diff --git a/go/performance/memprof/membench_test.go b/go/performance/memprof/membench_test.go new file mode 100644 index 0000000000..d7af66587c --- /dev/null +++ b/go/performance/memprof/membench_test.go @@ -0,0 +1,41 @@ +package memprof + +import ( + "context" + "flag" + "log" + "os" + "testing" + + "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" + "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" + "github.com/dolthub/dolt/go/libraries/utils/filesys" + "github.com/dolthub/dolt/go/store/types" +) + +var loc = flag.String("doltDir", "", "Directory of dolt database") +var urlStr string +var ddb *doltdb.DoltDB + +func TestMain(m *testing.M) { + flag.Parse() + if *loc == "" { + log.Panicf("doltDir must be specified") + } + + urlStr = "file://" + *loc + dbfactory.DoltDataDir + + code := m.Run() + os.Exit(code) +} + +func BenchmarkLoadDoltDBMemory(b *testing.B) { + for i := 0; i < b.N; i++ { + ctx := context.Background() + var err error + ddb, err = doltdb.LoadDoltDB(ctx, types.Format_Default, urlStr, filesys.LocalFS) + if err != nil { + b.Fatalf("failed to load doltdb, err: %s", err.Error()) + } + } +} From f580a5d03bac5909b9972639c200401d7d842c12 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Mon, 14 Feb 2022 13:49:11 -0800 Subject: [PATCH 093/105] Update mmap_table_reader to copy index data into heap before unmapping --- go/store/nbs/mmap_table_reader.go | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/go/store/nbs/mmap_table_reader.go b/go/store/nbs/mmap_table_reader.go index 454ec9db27..cd23c3a48e 100644 --- a/go/store/nbs/mmap_table_reader.go +++ b/go/store/nbs/mmap_table_reader.go @@ -105,29 +105,32 @@ func newMmapTableReader(dir string, h addr, chunkCount uint32, indexCache *index // index. Mmap won't take an offset that's not page-aligned, so find the nearest page boundary preceding the index. indexOffset := fi.Size() - int64(footerSize) - int64(indexSize(chunkCount)) aligned := indexOffset / mmapAlignment * mmapAlignment // Thanks, integer arithmetic! + length := int(fi.Size() - aligned) if fi.Size()-aligned > maxInt { err = fmt.Errorf("%s - size: %d alignment: %d> maxInt: %d", path, fi.Size(), aligned, maxInt) return } - var mm mmap.MMap - mm, err = mmap.MapRegion(f, int(fi.Size()-aligned), mmap.RDONLY, 0, aligned) - - if err != nil { - return - } - - defer func() { - unmapErr := mm.Unmap() - - if unmapErr != nil { - err = unmapErr + buff := make([]byte, indexSize(chunkCount)+footerSize) + func() { + var mm mmap.MMap + mm, err = mmap.MapRegion(f, length, mmap.RDONLY, 0, aligned) + if err != nil { + return } + + defer func() { + unmapErr := mm.Unmap() + + if unmapErr != nil { + err = unmapErr + } + }() + copy(buff, mm[indexOffset-aligned:]) }() - buff := []byte(mm) - ti, err = parseTableIndex(buff[indexOffset-aligned:]) + ti, err = parseTableIndex(buff) if err != nil { return From bf0cbd4ab3f60912dc35a32a11764626120d251c Mon Sep 17 00:00:00 2001 From: druvv Date: Mon, 14 Feb 2022 22:27:27 +0000 Subject: [PATCH 094/105] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/store/nbs/table_index.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/go/store/nbs/table_index.go b/go/store/nbs/table_index.go index e4f4821dc3..e9b7395deb 100644 --- a/go/store/nbs/table_index.go +++ b/go/store/nbs/table_index.go @@ -8,8 +8,9 @@ import ( "sort" "sync/atomic" - "github.com/dolthub/dolt/go/libraries/utils/iohelp" "github.com/dolthub/mmap-go" + + "github.com/dolthub/dolt/go/libraries/utils/iohelp" ) type tableIndex interface { From 5822653cd8f0132b26ae61718313e3fc36173e2d Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Mon, 14 Feb 2022 14:43:11 -0800 Subject: [PATCH 095/105] cleanup --- go/performance/memprof/membench_test.go | 5 +- go/store/nbs/table_index.go | 241 ------------------------ 2 files changed, 1 insertion(+), 245 deletions(-) diff --git a/go/performance/memprof/membench_test.go b/go/performance/memprof/membench_test.go index d7af66587c..853e7d6bfc 100644 --- a/go/performance/memprof/membench_test.go +++ b/go/performance/memprof/membench_test.go @@ -3,7 +3,6 @@ package memprof import ( "context" "flag" - "log" "os" "testing" @@ -19,9 +18,6 @@ var ddb *doltdb.DoltDB func TestMain(m *testing.M) { flag.Parse() - if *loc == "" { - log.Panicf("doltDir must be specified") - } urlStr = "file://" + *loc + dbfactory.DoltDataDir @@ -30,6 +26,7 @@ func TestMain(m *testing.M) { } func BenchmarkLoadDoltDBMemory(b *testing.B) { + b.SkipNow() for i := 0; i < b.N; i++ { ctx := context.Background() var err error diff --git a/go/store/nbs/table_index.go b/go/store/nbs/table_index.go index e4f4821dc3..95ad1d376d 100644 --- a/go/store/nbs/table_index.go +++ b/go/store/nbs/table_index.go @@ -48,247 +48,6 @@ type tableIndex interface { Clone() (tableIndex, error) } -//var _ tableIndex = mmapTableIndex{} -// -//// parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index -//// and footer, though it may contain an unspecified number of bytes before that data. -//// |tableIndex| doesn't keep alive any references to |buff|. -//func parseTableIndex(buff []byte) (onHeapTableIndex, error) { -// return ReadTableIndex(bytes.NewReader(buff)) -//} -// -//func ReadTableIndex(rd io.ReadSeeker) (onHeapTableIndex, error) { -// footerSize := int64(magicNumberSize + uint64Size + uint32Size) -// _, err := rd.Seek(-footerSize, io.SeekEnd) -// -// if err != nil { -// return onHeapTableIndex{}, err -// } -// -// footer, err := iohelp.ReadNBytes(rd, int(footerSize)) -// -// if err != nil { -// return onHeapTableIndex{}, err -// } -// -// if string(footer[uint32Size+uint64Size:]) != magicNumber { -// return onHeapTableIndex{}, ErrInvalidTableFile -// } -// -// chunkCount := binary.BigEndian.Uint32(footer) -// totalUncompressedData := binary.BigEndian.Uint64(footer[uint32Size:]) -// -// // index -// suffixesSize := int64(chunkCount) * addrSuffixSize -// lengthsSize := int64(chunkCount) * lengthSize -// tuplesSize := int64(chunkCount) * prefixTupleSize -// indexSize := suffixesSize + lengthsSize + tuplesSize -// -// _, err = rd.Seek(-(indexSize + footerSize), io.SeekEnd) -// if err != nil { -// return onHeapTableIndex{}, ErrInvalidTableFile -// } -// -// prefixes, ordinals, err := streamComputePrefixes(chunkCount, rd) -// if err != nil { -// return onHeapTableIndex{}, ErrInvalidTableFile -// } -// lengths, offsets, err := streamComputeOffsets(chunkCount, rd) -// if err != nil { -// return onHeapTableIndex{}, ErrInvalidTableFile -// } -// suffixes, err := iohelp.ReadNBytes(rd, int(suffixesSize)) -// if err != nil { -// return onHeapTableIndex{}, ErrInvalidTableFile -// } -// -// return onHeapTableIndex{ -// chunkCount, totalUncompressedData, -// prefixes, offsets, -// lengths, ordinals, -// suffixes, -// }, nil -//} -// -//type onHeapTableIndex struct { -// chunkCount uint32 -// totalUncompressedData uint64 -// prefixes, offsets []uint64 -// lengths, ordinals []uint32 -// suffixes []byte -//} -// -//func (ti onHeapTableIndex) ChunkCount() uint32 { -// return ti.chunkCount -//} -// -//// EntrySuffixMatches returns true IFF the suffix for prefix entry |idx| -//// matches the address |a|. -//func (ti onHeapTableIndex) EntrySuffixMatches(idx uint32, h *addr) bool { -// li := uint64(ti.ordinals[idx]) * addrSuffixSize -// return bytes.Equal(h[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize]) -//} -// -//func (ti onHeapTableIndex) IndexEntry(idx uint32, a *addr) indexEntry { -// ord := ti.ordinals[idx] -// if a != nil { -// binary.BigEndian.PutUint64(a[:], ti.prefixes[idx]) -// li := uint64(ord) * addrSuffixSize -// copy(a[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize]) -// } -// return indexResult{ti.offsets[ord], ti.lengths[ord]} -//} -// -//func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool) { -// ord := ti.lookupOrdinal(h) -// if ord == ti.chunkCount { -// return indexResult{}, false -// } -// return indexResult{ti.offsets[ord], ti.lengths[ord]}, true -//} -// -//func (ti onHeapTableIndex) Ordinals() []uint32 { -// return ti.ordinals -//} -// -//func (ti onHeapTableIndex) Prefixes() []uint64 { -// return ti.prefixes -//} -// -//// TableFileSize returns the size of the table file that this index references. -//// This assumes that the index follows immediately after the last chunk in the -//// file and that the last chunk in the file is in the index. -//func (ti onHeapTableIndex) TableFileSize() uint64 { -// if ti.chunkCount == 0 { -// return footerSize -// } -// len, offset := ti.offsets[ti.chunkCount-1], uint64(ti.lengths[ti.chunkCount-1]) -// return offset + len + indexSize(ti.chunkCount) + footerSize -//} -// -//func (ti onHeapTableIndex) TotalUncompressedData() uint64 { -// return ti.totalUncompressedData -//} -// -//func (ti onHeapTableIndex) Close() error { -// return nil -//} -// -//func (ti onHeapTableIndex) Clone() tableIndex { -// return ti -//} -// -//func (ti onHeapTableIndex) prefixIdxToOrdinal(idx uint32) uint32 { -// return ti.ordinals[idx] -//} -// -//// prefixIdx returns the first position in |tr.prefixes| whose value == -//// |prefix|. Returns |tr.chunkCount| if absent -//func (ti onHeapTableIndex) prefixIdx(prefix uint64) (idx uint32) { -// // NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in -// // an extremely tight loop and inlining the code was a significant perf improvement. -// idx, j := 0, ti.chunkCount -// for idx < j { -// h := idx + (j-idx)/2 // avoid overflow when computing h -// // i ≤ h < j -// if ti.prefixes[h] < prefix { -// idx = h + 1 // preserves f(i-1) == false -// } else { -// j = h // preserves f(j) == true -// } -// } -// -// return -//} -// -//// lookupOrdinal returns the ordinal of |h| if present. Returns |ti.chunkCount| -//// if absent. -//func (ti onHeapTableIndex) lookupOrdinal(h *addr) uint32 { -// prefix := h.Prefix() -// -// for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixes[idx] == prefix; idx++ { -// if ti.EntrySuffixMatches(idx, h) { -// return ti.ordinals[idx] -// } -// } -// -// return ti.chunkCount -//} -// -//func computeOffsets(count uint32, buff []byte) (lengths []uint32, offsets []uint64) { -// lengths = make([]uint32, count) -// offsets = make([]uint64, count) -// -// lengths[0] = binary.BigEndian.Uint32(buff) -// -// for i := uint64(1); i < uint64(count); i++ { -// lengths[i] = binary.BigEndian.Uint32(buff[i*lengthSize:]) -// offsets[i] = offsets[i-1] + uint64(lengths[i-1]) -// } -// return -//} -// -//func streamComputeOffsets(count uint32, rd io.Reader) (lengths []uint32, offsets []uint64, err error) { -// lengths = make([]uint32, count) -// offsets = make([]uint64, count) -// buff := make([]byte, lengthSize) -// -// n, err := rd.Read(buff) -// if err != nil { -// return nil, nil, err -// } -// if n != lengthSize { -// return nil, nil, ErrNotEnoughBytes -// } -// lengths[0] = binary.BigEndian.Uint32(buff) -// -// for i := uint64(1); i < uint64(count); i++ { -// n, err := rd.Read(buff) -// if err != nil { -// return nil, nil, err -// } -// if n != lengthSize { -// return nil, nil, ErrNotEnoughBytes -// } -// lengths[i] = binary.BigEndian.Uint32(buff) -// offsets[i] = offsets[i-1] + uint64(lengths[i-1]) -// } -// -// return -//} -// -//func computePrefixes(count uint32, buff []byte) (prefixes []uint64, ordinals []uint32) { -// prefixes = make([]uint64, count) -// ordinals = make([]uint32, count) -// -// for i := uint64(0); i < uint64(count); i++ { -// idx := i * prefixTupleSize -// prefixes[i] = binary.BigEndian.Uint64(buff[idx:]) -// ordinals[i] = binary.BigEndian.Uint32(buff[idx+addrPrefixSize:]) -// } -// return -//} -// -//func streamComputePrefixes(count uint32, rd io.Reader) (prefixes []uint64, ordinals []uint32, err error) { -// prefixes = make([]uint64, count) -// ordinals = make([]uint32, count) -// buff := make([]byte, prefixTupleSize) -// -// for i := uint64(0); i < uint64(count); i++ { -// n, err := rd.Read(buff) -// if err != nil { -// return nil, nil, err -// } -// if n != prefixTupleSize { -// return nil, nil, ErrNotEnoughBytes -// } -// prefixes[i] = binary.BigEndian.Uint64(buff) -// ordinals[i] = binary.BigEndian.Uint32(buff[addrPrefixSize:]) -// } -// -// return -//} - func ReadTableFooter(rd io.ReadSeeker) (chunkCount uint32, totalUncompressedData uint64, err error) { footerSize := int64(magicNumberSize + uint64Size + uint32Size) _, err = rd.Seek(-footerSize, io.SeekEnd) From d31cf08ee2bd9d6c8c11422ac5d0b15fc61aa0ce Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Mon, 14 Feb 2022 15:01:43 -0800 Subject: [PATCH 096/105] copyright --- go/performance/memprof/membench_test.go | 14 ++++++++++++++ go/store/nbs/table_index.go | 14 ++++++++++++++ go/store/nbs/table_index_test.go | 14 ++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/go/performance/memprof/membench_test.go b/go/performance/memprof/membench_test.go index 853e7d6bfc..aeb6f0e68d 100644 --- a/go/performance/memprof/membench_test.go +++ b/go/performance/memprof/membench_test.go @@ -1,3 +1,17 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package memprof import ( diff --git a/go/store/nbs/table_index.go b/go/store/nbs/table_index.go index 5e73a51792..4633bc4c6c 100644 --- a/go/store/nbs/table_index.go +++ b/go/store/nbs/table_index.go @@ -1,3 +1,17 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package nbs import ( diff --git a/go/store/nbs/table_index_test.go b/go/store/nbs/table_index_test.go index a185f4c118..6d451872ad 100644 --- a/go/store/nbs/table_index_test.go +++ b/go/store/nbs/table_index_test.go @@ -1,3 +1,17 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package nbs import ( From eeb9d81cc033bac04b113ffb36d522f8c17472ed Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Mon, 14 Feb 2022 15:14:33 -0800 Subject: [PATCH 097/105] retrigger checks From e440ea2021cb50091dede30dc958c212974f7517 Mon Sep 17 00:00:00 2001 From: James Cor Date: Mon, 14 Feb 2022 16:32:56 -0800 Subject: [PATCH 098/105] chaging dolt transaction commit to both global and session variable --- go/libraries/doltcore/sqle/dsess/variables.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/libraries/doltcore/sqle/dsess/variables.go b/go/libraries/doltcore/sqle/dsess/variables.go index e54d062a53..d3fb18af4a 100644 --- a/go/libraries/doltcore/sqle/dsess/variables.go +++ b/go/libraries/doltcore/sqle/dsess/variables.go @@ -39,7 +39,7 @@ func init() { sql.SystemVariables.AddSystemVariables([]sql.SystemVariable{ { // If true, causes a Dolt commit to occur when you commit a transaction. Name: DoltCommitOnTransactionCommit, - Scope: sql.SystemVariableScope_Session, + Scope: sql.SystemVariableScope_Both, Dynamic: true, SetVarHintApplies: false, Type: sql.NewSystemBoolType(DoltCommitOnTransactionCommit), From 0b3a23483e40946deb549c3c93869078461dd2ed Mon Sep 17 00:00:00 2001 From: James Cor Date: Mon, 14 Feb 2022 16:49:28 -0800 Subject: [PATCH 099/105] adding test --- integration-tests/bats/sql-server-config.bats | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/integration-tests/bats/sql-server-config.bats b/integration-tests/bats/sql-server-config.bats index aa1d842b7f..f391d23d79 100644 --- a/integration-tests/bats/sql-server-config.bats +++ b/integration-tests/bats/sql-server-config.bats @@ -54,6 +54,18 @@ teardown() { [[ "$output" =~ "sqlserver.global.max_connections = 1000" ]] || false } +@test "sql-server-config: dolt_transaction_commit is global variable" { + cd repo1 + start_sql_server repo1 + + insert_query repo1 1 "SET @@PERSIST.dolt_transaction_commit = 1" + server_query repo1 1 "select @@GLOBAL.dolt_transaction_commit" "@@GLOBAL.dolt_transaction_commit\n1" + + run dolt config --local --list + [ "$status" -eq 0 ] + [[ "$output" =~ "sqlserver.global.dolt_transaction_commit = 1" ]] || false +} + @test "sql-server-config: persist only global variable during server session" { cd repo1 start_sql_server repo1 From 0e05e79aaf18bc7e19ecb85a39c4fb032d2a9e7a Mon Sep 17 00:00:00 2001 From: James Cor Date: Mon, 14 Feb 2022 17:28:04 -0800 Subject: [PATCH 100/105] updating test --- integration-tests/bats/sql-server-config.bats | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/integration-tests/bats/sql-server-config.bats b/integration-tests/bats/sql-server-config.bats index f391d23d79..141ea2f9cc 100644 --- a/integration-tests/bats/sql-server-config.bats +++ b/integration-tests/bats/sql-server-config.bats @@ -58,12 +58,21 @@ teardown() { cd repo1 start_sql_server repo1 - insert_query repo1 1 "SET @@PERSIST.dolt_transaction_commit = 1" + insert_query repo1 1 "SET @@GLOBAL.dolt_transaction_commit = 1" server_query repo1 1 "select @@GLOBAL.dolt_transaction_commit" "@@GLOBAL.dolt_transaction_commit\n1" + server_query repo1 1 "select @@SESSION.dolt_transaction_commit" "@@SESSION.dolt_transaction_commit\n1" + server_query repo1 1 "select @@dolt_transaction_commit" "@@SESSION.dolt_transaction_commit\n1" - run dolt config --local --list - [ "$status" -eq 0 ] - [[ "$output" =~ "sqlserver.global.dolt_transaction_commit = 1" ]] || false + # only 1 commit + commits=$(dolt log --oneline | wc -l) + [ $commits -eq 1 ] + + # create a table + server_query repo1 1 "create table tmp (i int)" + + # now there are two commits + commits=$(dolt log --oneline | wc -l) + [ $commits -eq 2 ] } @test "sql-server-config: persist only global variable during server session" { From e28969a7c7bc196892e82fe6d00ad9adce59b8f4 Mon Sep 17 00:00:00 2001 From: Hydrocharged Date: Tue, 15 Feb 2022 14:21:38 +0000 Subject: [PATCH 101/105] [ga-bump-dep] Bump dependency in Dolt by Hydrocharged --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index 720de96fe8..5f9b80f5b3 100644 --- a/go/go.mod +++ b/go/go.mod @@ -68,7 +68,7 @@ require ( ) require ( - github.com/dolthub/go-mysql-server v0.11.1-0.20220214192607-a43d3762ebb4 + github.com/dolthub/go-mysql-server v0.11.1-0.20220215141938-c484f95c3408 github.com/google/flatbuffers v2.0.5+incompatible github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6 github.com/prometheus/client_golang v1.11.0 diff --git a/go/go.sum b/go/go.sum index 483957b3d8..57cefc25d4 100755 --- a/go/go.sum +++ b/go/go.sum @@ -170,8 +170,8 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= -github.com/dolthub/go-mysql-server v0.11.1-0.20220214192607-a43d3762ebb4 h1:u/hEZaHVHXIzCQr4g0zQvW6NiYUReByizSEz/AKwkZI= -github.com/dolthub/go-mysql-server v0.11.1-0.20220214192607-a43d3762ebb4/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= +github.com/dolthub/go-mysql-server v0.11.1-0.20220215141938-c484f95c3408 h1:+pG8pYVBWPfDtAF1YMGfvyHvfjDrOjii+AkzNcZLBTA= +github.com/dolthub/go-mysql-server v0.11.1-0.20220215141938-c484f95c3408/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 h1:oyPHJlzumKta1vnOQqUnfdz+pk3EmnHS3Nd0cCT0I2g= github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371/go.mod h1:dhGBqcCEfK5kuFmeO5+WOx3hqc1k3M29c1oS/R7N4ms= github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8= From f54d64f87aa798146113084276924d528ce41767 Mon Sep 17 00:00:00 2001 From: Daylon Wilkins Date: Tue, 15 Feb 2022 06:29:02 -0800 Subject: [PATCH 102/105] Added interface methods --- go/libraries/doltcore/mvdata/channel_row_source.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/go/libraries/doltcore/mvdata/channel_row_source.go b/go/libraries/doltcore/mvdata/channel_row_source.go index 970168a8c5..109a4d1d7a 100644 --- a/go/libraries/doltcore/mvdata/channel_row_source.go +++ b/go/libraries/doltcore/mvdata/channel_row_source.go @@ -70,6 +70,11 @@ func (c *ChannelRowSource) WithChildren(children ...sql.Node) (sql.Node, error) return c, nil } +// CheckPrivileges implements the sql.Node interface. +func (c *ChannelRowSource) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOperationChecker) bool { + return true +} + // channelRowIter wraps the channel under the sql.RowIter interface type channelRowIter struct { rowChannel chan sql.Row From e276a517e7ce5e3747ebd249ce3d15efcaff5d55 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 15 Feb 2022 10:01:47 -0800 Subject: [PATCH 103/105] return err --- go/store/nbs/mmap_table_reader.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/go/store/nbs/mmap_table_reader.go b/go/store/nbs/mmap_table_reader.go index cd23c3a48e..2fb5ab7c93 100644 --- a/go/store/nbs/mmap_table_reader.go +++ b/go/store/nbs/mmap_table_reader.go @@ -129,6 +129,9 @@ func newMmapTableReader(dir string, h addr, chunkCount uint32, indexCache *index }() copy(buff, mm[indexOffset-aligned:]) }() + if err != nil { + return onHeapTableIndex{}, err + } ti, err = parseTableIndex(buff) From 82815181b697ba36ee18ee8fee140cb72b202f20 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 15 Feb 2022 10:19:52 -0800 Subject: [PATCH 104/105] add parseTableIndexByCopy --- go/store/nbs/aws_chunk_source.go | 32 +++++++++++++------------ go/store/nbs/chunk_source_adapter.go | 2 +- go/store/nbs/file_table_persister.go | 2 +- go/store/nbs/table_index.go | 36 ++++++++++++++++++++-------- go/store/nbs/util.go | 4 ++-- 5 files changed, 47 insertions(+), 29 deletions(-) diff --git a/go/store/nbs/aws_chunk_source.go b/go/store/nbs/aws_chunk_source.go index 3df210052b..71fe7c0e3a 100644 --- a/go/store/nbs/aws_chunk_source.go +++ b/go/store/nbs/aws_chunk_source.go @@ -52,48 +52,50 @@ func newAWSChunkSource(ctx context.Context, ddb *ddbTableStore, s3 *s3ObjectRead } t1 := time.Now() - indexBytes, tra, err := func() ([]byte, tableReaderAt, error) { + index, tra, err := func() (tableIndex, tableReaderAt, error) { if al.tableMayBeInDynamo(chunkCount) { data, err := ddb.ReadTable(ctx, name, stats) if data == nil && err == nil { // There MUST be either data or an error - return nil, &dynamoTableReaderAt{}, errors.New("no data available") + return onHeapTableIndex{}, &dynamoTableReaderAt{}, errors.New("no data available") } if data != nil { - return data, &dynamoTableReaderAt{ddb: ddb, h: name}, nil + stats.IndexBytesPerRead.Sample(uint64(len(data))) + ind, err := parseTableIndexByCopy(data) + if err != nil { + return onHeapTableIndex{}, nil, err + } + return ind, &dynamoTableReaderAt{ddb: ddb, h: name}, nil } if _, ok := err.(tableNotInDynamoErr); !ok { - return nil, &dynamoTableReaderAt{}, err + return onHeapTableIndex{}, &dynamoTableReaderAt{}, err } } - size := indexSize(chunkCount) + footerSize buff := make([]byte, size) - n, _, err := s3.ReadFromEnd(ctx, name, buff, stats) - if err != nil { - return nil, &dynamoTableReaderAt{}, err + return onHeapTableIndex{}, &dynamoTableReaderAt{}, err } - if size != uint64(n) { - return nil, &dynamoTableReaderAt{}, errors.New("failed to read all data") + return onHeapTableIndex{}, &dynamoTableReaderAt{}, errors.New("failed to read all data") } - - return buff, &s3TableReaderAt{s3: s3, h: name}, nil + stats.IndexBytesPerRead.Sample(uint64(len(buff))) + ind, err := parseTableIndex(buff) + if err != nil { + return onHeapTableIndex{}, &dynamoTableReaderAt{}, err + } + return ind, &s3TableReaderAt{s3: s3, h: name}, nil }() if err != nil { return &chunkSourceAdapter{}, err } - stats.IndexBytesPerRead.Sample(uint64(len(indexBytes))) stats.IndexReadLatency.SampleTimeSince(t1) - index, err := parseIndex(indexBytes) - if err != nil { return emptyChunkSource{}, err } diff --git a/go/store/nbs/chunk_source_adapter.go b/go/store/nbs/chunk_source_adapter.go index 1e589aecaf..8a669d75c6 100644 --- a/go/store/nbs/chunk_source_adapter.go +++ b/go/store/nbs/chunk_source_adapter.go @@ -24,7 +24,7 @@ func (csa chunkSourceAdapter) hash() (addr, error) { } func newReaderFromIndexData(indexCache *indexCache, idxData []byte, name addr, tra tableReaderAt, blockSize uint64) (cs chunkSource, err error) { - index, err := parseTableIndex(idxData) + index, err := parseTableIndexByCopy(idxData) if err != nil { return nil, err diff --git a/go/store/nbs/file_table_persister.go b/go/store/nbs/file_table_persister.go index da5c4b808d..d77b471490 100644 --- a/go/store/nbs/file_table_persister.go +++ b/go/store/nbs/file_table_persister.go @@ -90,7 +90,7 @@ func (ftp *fsTablePersister) persistTable(ctx context.Context, name addr, data [ return "", ferr } - index, ferr := parseTableIndex(data) + index, ferr := parseTableIndexByCopy(data) if ferr != nil { return "", ferr diff --git a/go/store/nbs/table_index.go b/go/store/nbs/table_index.go index 4633bc4c6c..a2cac4b34c 100644 --- a/go/store/nbs/table_index.go +++ b/go/store/nbs/table_index.go @@ -17,6 +17,7 @@ package nbs import ( "bytes" "encoding/binary" + "errors" "io" "os" "sort" @@ -27,6 +28,11 @@ import ( "github.com/dolthub/dolt/go/libraries/utils/iohelp" ) +var ( + ErrWrongBufferSize = errors.New("buffer length and/or capacity incorrect for chunkCount specified in footer") + ErrWrongCopySize = errors.New("could not copy enough bytes") +) + type tableIndex interface { // ChunkCount returns the total number of chunks in the indexed file. ChunkCount() uint32 @@ -88,26 +94,31 @@ func ReadTableFooter(rd io.ReadSeeker) (chunkCount uint32, totalUncompressedData } // parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index -// and footer, though it may contain an unspecified number of bytes before that data. -// |tableIndex| doesn't keep alive any references to |buff|. -// Does not allocate new memory except for offsets, computes on buff in place. +// and footer and its length and capacity must match the expected indexSize for the chunkCount specified in the footer. +// Retains the buffer and does not allocate new memory except for offsets, computes on buff in place. func parseTableIndex(buff []byte) (onHeapTableIndex, error) { chunkCount, totalUncompressedData, err := ReadTableFooter(bytes.NewReader(buff)) if err != nil { return onHeapTableIndex{}, err } - - iS := indexSize(chunkCount) + iS := indexSize(chunkCount) + footerSize + if uint64(len(buff)) != iS || uint64(cap(buff)) != iS { + return onHeapTableIndex{}, ErrWrongBufferSize + } buff = buff[:len(buff)-footerSize] - // Trim away any extra bytes - buff = buff[uint64(len(buff))-iS:] - return NewOnHeapTableIndex(buff, chunkCount, totalUncompressedData) } -// ReadTableIndex loads an index into memory from an io.ReadSeeker +// parseTableIndexByCopy reads the footer, copies indexSize(chunkCount) bytes, and parses an on heap table index. +// Useful to create an onHeapTableIndex without retaining the entire underlying array of data. +func parseTableIndexByCopy(buff []byte) (onHeapTableIndex, error) { + r := bytes.NewReader(buff) + return ReadTableIndexByCopy(r) +} + +// ReadTableIndexByCopy loads an index into memory from an io.ReadSeeker // Caution: Allocates new memory for entire index -func ReadTableIndex(rd io.ReadSeeker) (onHeapTableIndex, error) { +func ReadTableIndexByCopy(rd io.ReadSeeker) (onHeapTableIndex, error) { chunkCount, totalUncompressedData, err := ReadTableFooter(rd) if err != nil { return onHeapTableIndex{}, err @@ -152,6 +163,11 @@ func NewOnHeapTableIndex(b []byte, chunkCount uint32, totalUncompressedData uint if err != nil { return onHeapTableIndex{}, err } + /** + TODO: Optimize memory usage further + There's wasted space here. The lengths segment in the buffer is retained unnecessarily. We can use that space to + store half the offsets and then allocate an additional len(lengths) to store the rest. + */ return onHeapTableIndex{ tupleB: tuples, diff --git a/go/store/nbs/util.go b/go/store/nbs/util.go index 6a7a8a980b..d41f4c26a0 100644 --- a/go/store/nbs/util.go +++ b/go/store/nbs/util.go @@ -25,7 +25,7 @@ import ( ) func IterChunks(rd io.ReadSeeker, cb func(chunk chunks.Chunk) (stop bool, err error)) error { - idx, err := ReadTableIndex(rd) + idx, err := ReadTableIndexByCopy(rd) if err != nil { return err } @@ -69,7 +69,7 @@ func IterChunks(rd io.ReadSeeker, cb func(chunk chunks.Chunk) (stop bool, err er } func GetTableIndexPrefixes(rd io.ReadSeeker) (prefixes []uint64, err error) { - idx, err := ReadTableIndex(rd) + idx, err := ReadTableIndexByCopy(rd) if err != nil { return nil, err } From 6a1c332a199cca3ff7d4fdbce4c169ba06000f24 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Tue, 15 Feb 2022 10:02:48 -0800 Subject: [PATCH 105/105] fix tests --- go/store/nbs/aws_chunk_source_test.go | 4 ++-- go/store/nbs/aws_table_persister_test.go | 2 +- go/store/nbs/cmp_chunk_table_writer_test.go | 4 ++-- go/store/nbs/file_table_persister_test.go | 6 +++--- go/store/nbs/mem_table_test.go | 6 +++--- go/store/nbs/root_tracker_test.go | 4 ++-- go/store/nbs/s3_fake_test.go | 4 ++-- go/store/nbs/table_index_test.go | 4 ++-- go/store/nbs/table_persister_test.go | 2 +- go/store/nbs/table_test.go | 16 ++++++++-------- 10 files changed, 26 insertions(+), 26 deletions(-) diff --git a/go/store/nbs/aws_chunk_source_test.go b/go/store/nbs/aws_chunk_source_test.go index 2102b9622f..0e015267c3 100644 --- a/go/store/nbs/aws_chunk_source_test.go +++ b/go/store/nbs/aws_chunk_source_test.go @@ -74,7 +74,7 @@ func TestAWSChunkSource(t *testing.T) { t.Run("WithIndexCache", func(t *testing.T) { assert := assert.New(t) - index, err := parseTableIndex(tableData) + index, err := parseTableIndexByCopy(tableData) require.NoError(t, err) cache := newIndexCache(1024) cache.put(h, index) @@ -98,7 +98,7 @@ func TestAWSChunkSource(t *testing.T) { t.Run("WithIndexCache", func(t *testing.T) { assert := assert.New(t) - index, err := parseTableIndex(tableData) + index, err := parseTableIndexByCopy(tableData) require.NoError(t, err) cache := newIndexCache(1024) cache.put(h, index) diff --git a/go/store/nbs/aws_table_persister_test.go b/go/store/nbs/aws_table_persister_test.go index 2185815d4e..658f515189 100644 --- a/go/store/nbs/aws_table_persister_test.go +++ b/go/store/nbs/aws_table_persister_test.go @@ -546,7 +546,7 @@ func bytesToChunkSource(t *testing.T, bs ...[]byte) chunkSource { tableSize, name, err := tw.finish() require.NoError(t, err) data := buff[:tableSize] - ti, err := parseTableIndex(data) + ti, err := parseTableIndexByCopy(data) require.NoError(t, err) rdr, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) require.NoError(t, err) diff --git a/go/store/nbs/cmp_chunk_table_writer_test.go b/go/store/nbs/cmp_chunk_table_writer_test.go index 6c72a015d7..7aa0021a48 100644 --- a/go/store/nbs/cmp_chunk_table_writer_test.go +++ b/go/store/nbs/cmp_chunk_table_writer_test.go @@ -35,7 +35,7 @@ func TestCmpChunkTableWriter(t *testing.T) { require.NoError(t, err) // Setup a TableReader to read compressed chunks out of - ti, err := parseTableIndex(buff) + ti, err := parseTableIndexByCopy(buff) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) require.NoError(t, err) @@ -73,7 +73,7 @@ func TestCmpChunkTableWriter(t *testing.T) { require.NoError(t, err) outputBuff := output.Bytes() - outputTI, err := parseTableIndex(outputBuff) + outputTI, err := parseTableIndexByCopy(outputBuff) require.NoError(t, err) outputTR, err := newTableReader(outputTI, tableReaderAtFromBytes(buff), fileBlockSize) require.NoError(t, err) diff --git a/go/store/nbs/file_table_persister_test.go b/go/store/nbs/file_table_persister_test.go index 0cf4671219..f0904ca0d9 100644 --- a/go/store/nbs/file_table_persister_test.go +++ b/go/store/nbs/file_table_persister_test.go @@ -127,7 +127,7 @@ func TestFSTablePersisterPersist(t *testing.T) { if assert.True(mustUint32(src.count()) > 0) { buff, err := os.ReadFile(filepath.Join(dir, mustAddr(src.hash()).String())) require.NoError(t, err) - ti, err := parseTableIndex(buff) + ti, err := parseTableIndexByCopy(buff) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) require.NoError(t, err) @@ -228,7 +228,7 @@ func TestFSTablePersisterConjoinAll(t *testing.T) { if assert.True(mustUint32(src.count()) > 0) { buff, err := os.ReadFile(filepath.Join(dir, mustAddr(src.hash()).String())) require.NoError(t, err) - ti, err := parseTableIndex(buff) + ti, err := parseTableIndexByCopy(buff) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) require.NoError(t, err) @@ -267,7 +267,7 @@ func TestFSTablePersisterConjoinAllDups(t *testing.T) { if assert.True(mustUint32(src.count()) > 0) { buff, err := os.ReadFile(filepath.Join(dir, mustAddr(src.hash()).String())) require.NoError(t, err) - ti, err := parseTableIndex(buff) + ti, err := parseTableIndexByCopy(buff) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) require.NoError(t, err) diff --git a/go/store/nbs/mem_table_test.go b/go/store/nbs/mem_table_test.go index 6842ccaa3e..8cd48a557c 100644 --- a/go/store/nbs/mem_table_test.go +++ b/go/store/nbs/mem_table_test.go @@ -150,7 +150,7 @@ func TestMemTableWrite(t *testing.T) { td1, _, err := buildTable(chunks[1:2]) require.NoError(t, err) - ti1, err := parseTableIndex(td1) + ti1, err := parseTableIndexByCopy(td1) require.NoError(t, err) tr1, err := newTableReader(ti1, tableReaderAtFromBytes(td1), fileBlockSize) require.NoError(t, err) @@ -158,7 +158,7 @@ func TestMemTableWrite(t *testing.T) { td2, _, err := buildTable(chunks[2:]) require.NoError(t, err) - ti2, err := parseTableIndex(td2) + ti2, err := parseTableIndexByCopy(td2) require.NoError(t, err) tr2, err := newTableReader(ti2, tableReaderAtFromBytes(td2), fileBlockSize) require.NoError(t, err) @@ -168,7 +168,7 @@ func TestMemTableWrite(t *testing.T) { require.NoError(t, err) assert.Equal(uint32(1), count) - ti, err := parseTableIndex(data) + ti, err := parseTableIndexByCopy(data) require.NoError(t, err) outReader, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) require.NoError(t, err) diff --git a/go/store/nbs/root_tracker_test.go b/go/store/nbs/root_tracker_test.go index 80387fd77e..81cd9dc316 100644 --- a/go/store/nbs/root_tracker_test.go +++ b/go/store/nbs/root_tracker_test.go @@ -461,7 +461,7 @@ func (ftp fakeTablePersister) Persist(ctx context.Context, mt *memTable, haver c if chunkCount > 0 { ftp.mu.Lock() defer ftp.mu.Unlock() - ti, err := parseTableIndex(data) + ti, err := parseTableIndexByCopy(data) if err != nil { return nil, err @@ -488,7 +488,7 @@ func (ftp fakeTablePersister) ConjoinAll(ctx context.Context, sources chunkSourc if chunkCount > 0 { ftp.mu.Lock() defer ftp.mu.Unlock() - ti, err := parseTableIndex(data) + ti, err := parseTableIndexByCopy(data) if err != nil { return nil, err diff --git a/go/store/nbs/s3_fake_test.go b/go/store/nbs/s3_fake_test.go index dc52d0e116..2dda8b5958 100644 --- a/go/store/nbs/s3_fake_test.go +++ b/go/store/nbs/s3_fake_test.go @@ -76,7 +76,7 @@ func (m *fakeS3) readerForTable(name addr) (chunkReader, error) { m.mu.Lock() defer m.mu.Unlock() if buff, present := m.data[name.String()]; present { - ti, err := parseTableIndex(buff) + ti, err := parseTableIndexByCopy(buff) if err != nil { return nil, err @@ -98,7 +98,7 @@ func (m *fakeS3) readerForTableWithNamespace(ns string, name addr) (chunkReader, key = ns + "/" + key } if buff, present := m.data[key]; present { - ti, err := parseTableIndex(buff) + ti, err := parseTableIndexByCopy(buff) if err != nil { return nil, err diff --git a/go/store/nbs/table_index_test.go b/go/store/nbs/table_index_test.go index 6d451872ad..ffff73d488 100644 --- a/go/store/nbs/table_index_test.go +++ b/go/store/nbs/table_index_test.go @@ -29,7 +29,7 @@ func TestParseTableIndex(t *testing.T) { defer f.Close() bs, err := io.ReadAll(f) require.NoError(t, err) - idx, err := parseTableIndex(bs) + idx, err := parseTableIndexByCopy(bs) require.NoError(t, err) defer idx.Close() assert.Equal(t, uint32(596), idx.ChunkCount()) @@ -55,7 +55,7 @@ func TestMMapIndex(t *testing.T) { defer f.Close() bs, err := io.ReadAll(f) require.NoError(t, err) - idx, err := parseTableIndex(bs) + idx, err := parseTableIndexByCopy(bs) require.NoError(t, err) defer idx.Close() mmidx, err := newMmapTableIndex(idx, nil) diff --git a/go/store/nbs/table_persister_test.go b/go/store/nbs/table_persister_test.go index 8d60d2f0c5..19184a0d4c 100644 --- a/go/store/nbs/table_persister_test.go +++ b/go/store/nbs/table_persister_test.go @@ -45,7 +45,7 @@ func TestPlanCompaction(t *testing.T) { } data, name, err := buildTable(content) require.NoError(t, err) - ti, err := parseTableIndex(data) + ti, err := parseTableIndexByCopy(data) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) require.NoError(t, err) diff --git a/go/store/nbs/table_test.go b/go/store/nbs/table_test.go index 94383af89d..32ca6c0d1f 100644 --- a/go/store/nbs/table_test.go +++ b/go/store/nbs/table_test.go @@ -77,7 +77,7 @@ func TestSimple(t *testing.T) { tableData, _, err := buildTable(chunks) require.NoError(t, err) - ti, err := parseTableIndex(tableData) + ti, err := parseTableIndexByCopy(tableData) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) require.NoError(t, err) @@ -124,7 +124,7 @@ func TestHasMany(t *testing.T) { tableData, _, err := buildTable(chunks) require.NoError(t, err) - ti, err := parseTableIndex(tableData) + ti, err := parseTableIndexByCopy(tableData) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) require.NoError(t, err) @@ -175,7 +175,7 @@ func TestHasManySequentialPrefix(t *testing.T) { require.NoError(t, err) buff = buff[:length] - ti, err := parseTableIndex(buff) + ti, err := parseTableIndexByCopy(buff) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) require.NoError(t, err) @@ -204,7 +204,7 @@ func TestGetMany(t *testing.T) { tableData, _, err := buildTable(data) require.NoError(t, err) - ti, err := parseTableIndex(tableData) + ti, err := parseTableIndexByCopy(tableData) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) require.NoError(t, err) @@ -238,7 +238,7 @@ func TestCalcReads(t *testing.T) { tableData, _, err := buildTable(chunks) require.NoError(t, err) - ti, err := parseTableIndex(tableData) + ti, err := parseTableIndexByCopy(tableData) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), 0) require.NoError(t, err) @@ -275,7 +275,7 @@ func TestExtract(t *testing.T) { tableData, _, err := buildTable(chunks) require.NoError(t, err) - ti, err := parseTableIndex(tableData) + ti, err := parseTableIndexByCopy(tableData) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) require.NoError(t, err) @@ -314,7 +314,7 @@ func Test65k(t *testing.T) { tableData, _, err := buildTable(chunks) require.NoError(t, err) - ti, err := parseTableIndex(tableData) + ti, err := parseTableIndexByCopy(tableData) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) require.NoError(t, err) @@ -367,7 +367,7 @@ func doTestNGetMany(t *testing.T, count int) { tableData, _, err := buildTable(data) require.NoError(t, err) - ti, err := parseTableIndex(tableData) + ti, err := parseTableIndexByCopy(tableData) require.NoError(t, err) tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) require.NoError(t, err)