From 91e1bc7992e202bb12cb2e739884e766b25c832a Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Thu, 8 Sep 2022 11:54:47 -0700 Subject: [PATCH 1/4] move orderedTree, orderedMap to pkg tree --- go/store/prolly/address_map.go | 56 +- go/store/prolly/artifact_map.go | 52 +- go/store/prolly/artifact_map_test.go | 2 +- go/store/prolly/commit_closure.go | 52 +- go/store/prolly/commit_closure_test.go | 14 +- go/store/prolly/doc.go | 4 +- go/store/prolly/map.go | 140 ++-- go/store/prolly/map_merge_test.go | 2 +- go/store/prolly/mutable_map.go | 48 +- go/store/prolly/mutable_map_write_test.go | 4 +- go/store/prolly/ordered_tree.go | 604 ------------------ go/store/prolly/range.go | 2 +- go/store/prolly/range_iter.go | 46 +- go/store/prolly/tree/diff.go | 2 + go/store/prolly/tree/map.go | 569 +++++++++++++++++ .../{ordered_map.go => tree/mutable_map.go} | 41 +- go/store/prolly/utils_test.go | 2 +- go/store/prolly/write_amplification_test.go | 24 +- 18 files changed, 829 insertions(+), 835 deletions(-) delete mode 100644 go/store/prolly/ordered_tree.go create mode 100644 go/store/prolly/tree/map.go rename go/store/prolly/{ordered_map.go => tree/mutable_map.go} (53%) diff --git a/go/store/prolly/address_map.go b/go/store/prolly/address_map.go index cc8415e9bf..fd08e84ce4 100644 --- a/go/store/prolly/address_map.go +++ b/go/store/prolly/address_map.go @@ -26,7 +26,7 @@ import ( ) type AddressMap struct { - addresses orderedTree[stringSlice, address, lexicographic] + addresses tree.StaticMap[stringSlice, address, lexicographic] } func NewEmptyAddressMap(ns tree.NodeStore) (AddressMap, error) { @@ -41,10 +41,10 @@ func NewEmptyAddressMap(ns tree.NodeStore) (AddressMap, error) { func NewAddressMap(node tree.Node, ns tree.NodeStore) (AddressMap, error) { return AddressMap{ - addresses: orderedTree[stringSlice, address, lexicographic]{ - root: node, - ns: ns, - order: lexicographic{}, + addresses: tree.StaticMap[stringSlice, address, lexicographic]{ + Root: node, + NodeStore: ns, + Order: lexicographic{}, }, }, nil } @@ -55,42 +55,42 @@ type address []byte type lexicographic struct{} -var _ ordering[stringSlice] = lexicographic{} +var _ tree.Ordering[stringSlice] = lexicographic{} func (l lexicographic) Compare(left, right stringSlice) int { return bytes.Compare(left, right) } func (c AddressMap) Count() (int, error) { - return c.addresses.count() + return c.addresses.Count() } func (c AddressMap) Height() (int, error) { - return c.addresses.height() + return c.addresses.Height() } func (c AddressMap) Node() tree.Node { - return c.addresses.root + return c.addresses.Root } func (c AddressMap) HashOf() hash.Hash { - return c.addresses.hashOf() + return c.addresses.HashOf() } func (c AddressMap) Format() *types.NomsBinFormat { - return c.addresses.ns.Format() + return c.addresses.NodeStore.Format() } func (c AddressMap) WalkAddresses(ctx context.Context, cb tree.AddressCb) error { - return c.addresses.walkAddresses(ctx, cb) + return c.addresses.WalkAddresses(ctx, cb) } func (c AddressMap) WalkNodes(ctx context.Context, cb tree.NodeCb) error { - return c.addresses.walkNodes(ctx, cb) + return c.addresses.WalkNodes(ctx, cb) } func (c AddressMap) Get(ctx context.Context, name string) (addr hash.Hash, err error) { - err = c.addresses.get(ctx, stringSlice(name), func(n stringSlice, a address) error { + err = c.addresses.Get(ctx, stringSlice(name), func(n stringSlice, a address) error { if n != nil { addr = hash.New(a) } @@ -100,11 +100,11 @@ func (c AddressMap) Get(ctx context.Context, name string) (addr hash.Hash, err e } func (c AddressMap) Has(ctx context.Context, name string) (ok bool, err error) { - return c.addresses.has(ctx, stringSlice(name)) + return c.addresses.Has(ctx, stringSlice(name)) } func (c AddressMap) IterAll(ctx context.Context, cb func(name string, address hash.Hash) error) error { - iter, err := c.addresses.iterAll(ctx) + iter, err := c.addresses.IterAll(ctx) if err != nil { return err } @@ -129,40 +129,40 @@ func (c AddressMap) IterAll(ctx context.Context, cb func(name string, address ha func (c AddressMap) Editor() AddressMapEditor { return AddressMapEditor{ - addresses: c.addresses.mutate(), + addresses: c.addresses.Mutate(), } } type AddressMapEditor struct { - addresses orderedMap[stringSlice, address, lexicographic] + addresses tree.MutableMap[stringSlice, address, lexicographic] } func (wr AddressMapEditor) Add(ctx context.Context, name string, addr hash.Hash) error { - return wr.addresses.put(ctx, stringSlice(name), addr[:]) + return wr.addresses.Put(ctx, stringSlice(name), addr[:]) } func (wr AddressMapEditor) Update(ctx context.Context, name string, addr hash.Hash) error { - return wr.addresses.put(ctx, stringSlice(name), addr[:]) + return wr.addresses.Put(ctx, stringSlice(name), addr[:]) } func (wr AddressMapEditor) Delete(ctx context.Context, name string) error { - return wr.addresses.delete(ctx, stringSlice(name)) + return wr.addresses.Delete(ctx, stringSlice(name)) } func (wr AddressMapEditor) Flush(ctx context.Context) (AddressMap, error) { - tr := wr.addresses.tree - serializer := message.NewAddressMapSerializer(tr.ns.Pool()) + tr := wr.addresses.StaticMap + serializer := message.NewAddressMapSerializer(tr.NodeStore.Pool()) - root, err := tree.ApplyMutations(ctx, tr.ns, tr.root, serializer, wr.addresses.mutations(), tr.compareItems) + root, err := tree.ApplyMutations(ctx, tr.NodeStore, tr.Root, serializer, wr.addresses.Mutations(), tr.CompareItems) if err != nil { return AddressMap{}, err } return AddressMap{ - addresses: orderedTree[stringSlice, address, lexicographic]{ - root: root, - ns: tr.ns, - order: tr.order, + addresses: tree.StaticMap[stringSlice, address, lexicographic]{ + Root: root, + NodeStore: tr.NodeStore, + Order: tr.Order, }, }, nil } diff --git a/go/store/prolly/artifact_map.go b/go/store/prolly/artifact_map.go index ffed343287..dae23385d7 100644 --- a/go/store/prolly/artifact_map.go +++ b/go/store/prolly/artifact_map.go @@ -44,7 +44,7 @@ const ( ) type ArtifactMap struct { - tuples orderedTree[val.Tuple, val.Tuple, val.TupleDesc] + tuples tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc] // the description of the source table where these artifacts come from srcKeyDesc val.TupleDesc keyDesc val.TupleDesc @@ -55,10 +55,10 @@ type ArtifactMap struct { // the corresponding row map. func NewArtifactMap(node tree.Node, ns tree.NodeStore, srcKeyDesc val.TupleDesc) ArtifactMap { keyDesc, valDesc := mergeArtifactsDescriptorsFromSource(srcKeyDesc) - tuples := orderedTree[val.Tuple, val.Tuple, val.TupleDesc]{ - root: node, - ns: ns, - order: keyDesc, + tuples := tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{ + Root: node, + NodeStore: ns, + Order: keyDesc, } return ArtifactMap{ tuples: tuples, @@ -94,10 +94,10 @@ func NewArtifactMapFromTuples(ctx context.Context, ns tree.NodeStore, srcKeyDesc return ArtifactMap{}, err } - tuples := orderedTree[val.Tuple, val.Tuple, val.TupleDesc]{ - root: root, - ns: ns, - order: kd, + tuples := tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{ + Root: root, + NodeStore: ns, + Order: kd, } return ArtifactMap{ tuples: tuples, @@ -108,27 +108,27 @@ func NewArtifactMapFromTuples(ctx context.Context, ns tree.NodeStore, srcKeyDesc } func (m ArtifactMap) Count() (int, error) { - return m.tuples.count() + return m.tuples.Count() } func (m ArtifactMap) Height() (int, error) { - return m.tuples.height() + return m.tuples.Height() } func (m ArtifactMap) HashOf() hash.Hash { - return m.tuples.hashOf() + return m.tuples.HashOf() } func (m ArtifactMap) Node() tree.Node { - return m.tuples.root + return m.tuples.Root } func (m ArtifactMap) NodeStore() tree.NodeStore { - return m.tuples.ns + return m.tuples.NodeStore } func (m ArtifactMap) Format() *types.NomsBinFormat { - return m.tuples.ns.Format() + return m.tuples.NodeStore.Format() } func (m ArtifactMap) Descriptors() (key, val val.TupleDesc) { @@ -136,23 +136,23 @@ func (m ArtifactMap) Descriptors() (key, val val.TupleDesc) { } func (m ArtifactMap) WalkAddresses(ctx context.Context, cb tree.AddressCb) error { - return m.tuples.walkAddresses(ctx, cb) + return m.tuples.WalkAddresses(ctx, cb) } func (m ArtifactMap) WalkNodes(ctx context.Context, cb tree.NodeCb) error { - return m.tuples.walkNodes(ctx, cb) + return m.tuples.WalkNodes(ctx, cb) } -func (m ArtifactMap) Get(ctx context.Context, key val.Tuple, cb KeyValueFn[val.Tuple, val.Tuple]) (err error) { - return m.tuples.get(ctx, key, cb) +func (m ArtifactMap) Get(ctx context.Context, key val.Tuple, cb tree.KeyValueFn[val.Tuple, val.Tuple]) (err error) { + return m.tuples.Get(ctx, key, cb) } func (m ArtifactMap) Has(ctx context.Context, key val.Tuple) (ok bool, err error) { - return m.tuples.has(ctx, key) + return m.tuples.Has(ctx, key) } func (m ArtifactMap) Pool() pool.BuffPool { - return m.tuples.ns.Pool() + return m.tuples.NodeStore.Pool() } func (m ArtifactMap) Editor() ArtifactsEditor { @@ -160,7 +160,7 @@ func (m ArtifactMap) Editor() ArtifactsEditor { return ArtifactsEditor{ srcKeyDesc: m.srcKeyDesc, mut: MutableMap{ - tuples: m.tuples.mutate(), + tuples: m.tuples.Mutate(), keyDesc: m.keyDesc, valDesc: m.valDesc, }, @@ -174,7 +174,7 @@ func (m ArtifactMap) Editor() ArtifactsEditor { func (m ArtifactMap) IterAll(ctx context.Context) (ArtifactIter, error) { numPks := m.srcKeyDesc.Count() tb := val.NewTupleBuilder(m.srcKeyDesc) - itr, err := m.tuples.iterAll(ctx) + itr, err := m.tuples.IterAll(ctx) if err != nil { return nil, err } @@ -296,8 +296,8 @@ func (m ArtifactMap) iterAllOfTypes(ctx context.Context, artTypes ...ArtifactTyp } func MergeArtifactMaps(ctx context.Context, left, right, base ArtifactMap, cb tree.CollisionFn) (ArtifactMap, error) { - serializer := message.NewMergeArtifactSerializer(base.keyDesc, left.tuples.ns.Pool()) - tuples, err := mergeOrderedTrees(ctx, left.tuples, right.tuples, base.tuples, cb, serializer, base.valDesc) + serializer := message.NewMergeArtifactSerializer(base.keyDesc, left.tuples.NodeStore.Pool()) + tuples, err := tree.MergeOrderedTrees(ctx, left.tuples, right.tuples, base.tuples, cb, serializer, base.valDesc) if err != nil { return ArtifactMap{}, err } @@ -596,7 +596,7 @@ func mergeArtifactsDescriptorsFromSource(srcKd val.TupleDesc) (kd, vd val.TupleD func ArtifactDebugFormat(ctx context.Context, m ArtifactMap) (string, error) { kd, vd := m.Descriptors() - iter, err := m.tuples.iterAll(ctx) + iter, err := m.tuples.IterAll(ctx) if err != nil { return "", err } diff --git a/go/store/prolly/artifact_map_test.go b/go/store/prolly/artifact_map_test.go index 8394dbec6e..0427fa5d60 100644 --- a/go/store/prolly/artifact_map_test.go +++ b/go/store/prolly/artifact_map_test.go @@ -62,7 +62,7 @@ func TestArtifactMapEditing(t *testing.T) { return nil }) - // Verify that we found all the root-ish hashes + // Verify that we found all the Root-ish hashes if nodeCount == 1 { assert.Equal(t, n, addressCount) } else { diff --git a/go/store/prolly/commit_closure.go b/go/store/prolly/commit_closure.go index 384cb9ca2c..d9606c74b1 100644 --- a/go/store/prolly/commit_closure.go +++ b/go/store/prolly/commit_closure.go @@ -29,12 +29,12 @@ import ( type CommitClosureValue []byte type CommitClosure struct { - closure orderedTree[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering] + closure tree.StaticMap[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering] } type commitClosureKeyOrdering struct{} -var _ ordering[CommitClosureKey] = commitClosureKeyOrdering{} +var _ tree.Ordering[CommitClosureKey] = commitClosureKeyOrdering{} func (o commitClosureKeyOrdering) Compare(left, right CommitClosureKey) int { lh, rh := left.Height(), right.Height() @@ -58,42 +58,42 @@ func NewEmptyCommitClosure(ns tree.NodeStore) (CommitClosure, error) { func NewCommitClosure(node tree.Node, ns tree.NodeStore) (CommitClosure, error) { return CommitClosure{ - closure: orderedTree[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering]{ - root: node, - ns: ns, - order: commitClosureKeyOrdering{}, + closure: tree.StaticMap[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering]{ + Root: node, + NodeStore: ns, + Order: commitClosureKeyOrdering{}, }, }, nil } func (c CommitClosure) Count() (int, error) { - return c.closure.count() + return c.closure.Count() } func (c CommitClosure) Height() (int, error) { - return c.closure.height() + return c.closure.Height() } func (c CommitClosure) Node() tree.Node { - return c.closure.root + return c.closure.Root } func (c CommitClosure) HashOf() hash.Hash { - return c.closure.hashOf() + return c.closure.HashOf() } func (c CommitClosure) Format() *types.NomsBinFormat { - return c.closure.ns.Format() + return c.closure.NodeStore.Format() } func (c CommitClosure) Editor() CommitClosureEditor { return CommitClosureEditor{ - closure: c.closure.mutate(), + closure: c.closure.Mutate(), } } func (c CommitClosure) IterAllReverse(ctx context.Context) (CommitClosureIter, error) { - return c.closure.iterAllReverse(ctx) + return c.closure.IterAllReverse(ctx) } func DecodeCommitClosureKey(key []byte) (height uint64, addr hash.Hash) { @@ -103,12 +103,12 @@ func DecodeCommitClosureKey(key []byte) (height uint64, addr hash.Hash) { } type CommitClosureEditor struct { - closure orderedMap[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering] + closure tree.MutableMap[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering] } type CommitClosureKey []byte -type CommitClosureIter kvIter[CommitClosureKey, CommitClosureValue] +type CommitClosureIter tree.KvIter[CommitClosureKey, CommitClosureValue] func NewCommitClosureKey(p pool.BuffPool, height uint64, addr hash.Hash) CommitClosureKey { r := p.Get(8 + 20) @@ -132,31 +132,31 @@ func (k CommitClosureKey) Less(other CommitClosureKey) bool { var emptyCommitClosureValue CommitClosureValue = CommitClosureValue(make([]byte, 1)) func (wr CommitClosureEditor) Add(ctx context.Context, key CommitClosureKey) error { - return wr.closure.put(ctx, key, emptyCommitClosureValue) + return wr.closure.Put(ctx, key, emptyCommitClosureValue) } func (wr CommitClosureEditor) Delete(ctx context.Context, key CommitClosureKey) error { - return wr.closure.delete(ctx, key) + return wr.closure.Delete(ctx, key) } func (wr CommitClosureEditor) Flush(ctx context.Context) (CommitClosure, error) { - tr := wr.closure.tree - serializer := message.NewCommitClosureSerializer(tr.ns.Pool()) + tr := wr.closure.StaticMap + serializer := message.NewCommitClosureSerializer(tr.NodeStore.Pool()) - root, err := tree.ApplyMutations(ctx, tr.ns, tr.root, serializer, wr.closure.mutations(), tr.compareItems) + root, err := tree.ApplyMutations(ctx, tr.NodeStore, tr.Root, serializer, wr.closure.Mutations(), tr.CompareItems) if err != nil { return CommitClosure{}, err } return CommitClosure{ - closure: orderedTree[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering]{ - root: root, - ns: tr.ns, - order: tr.order, + closure: tree.StaticMap[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering]{ + Root: root, + NodeStore: tr.NodeStore, + Order: tr.Order, }, }, nil } -func DiffCommitClosures(ctx context.Context, from, to CommitClosure, cb DiffFn) error { - return diffOrderedTrees(ctx, from.closure, to.closure, cb) +func DiffCommitClosures(ctx context.Context, from, to CommitClosure, cb tree.DiffFn) error { + return tree.DiffOrderedTrees(ctx, from.closure, to.closure, cb) } diff --git a/go/store/prolly/commit_closure_test.go b/go/store/prolly/commit_closure_test.go index 39db08cb03..1c943fd5f4 100644 --- a/go/store/prolly/commit_closure_test.go +++ b/go/store/prolly/commit_closure_test.go @@ -58,7 +58,7 @@ func TestCommitClosure(t *testing.T) { c, err := cc.Count() require.NoError(t, err) assert.Equal(t, 0, c) - assert.Equal(t, 0, cc.closure.root.Count()) + assert.Equal(t, 0, cc.closure.Root.Count()) c, err = cc.Height() require.NoError(t, err) assert.Equal(t, 1, c) @@ -167,8 +167,8 @@ func TestCommitClosure(t *testing.T) { require.NoError(t, err) assert.Equal(t, 4096, ccc) - // Walk the addresses in the root. - msg := serial.Message(tree.ValueFromNode(cc.closure.root).(types.SerialMessage)) + // Walk the addresses in the Root. + msg := serial.Message(tree.ValueFromNode(cc.closure.Root).(types.SerialMessage)) numaddresses := 0 err = message.WalkAddresses(ctx, msg, func(ctx context.Context, addr hash.Hash) error { numaddresses++ @@ -177,9 +177,9 @@ func TestCommitClosure(t *testing.T) { require.NoError(t, err) assert.Less(t, numaddresses, 4096) - // Walk all addresses in the tree. + // Walk all addresses in the Tree. numaddresses = 0 - err = tree.WalkAddresses(ctx, cc.closure.root, ns, func(ctx context.Context, addr hash.Hash) error { + err = tree.WalkAddresses(ctx, cc.closure.Root, ns, func(ctx context.Context, addr hash.Hash) error { numaddresses++ return nil }) @@ -203,13 +203,13 @@ func TestCommitClosure(t *testing.T) { numnodes := 0 totalentries := 0 - err = tree.WalkNodes(ctx, cc.closure.root, ns, func(ctx context.Context, node tree.Node) error { + err = tree.WalkNodes(ctx, cc.closure.Root, ns, func(ctx context.Context, node tree.Node) error { numnodes++ totalentries += node.Count() return nil }) require.NoError(t, err) - assert.Less(t, cc.closure.root.Count(), numnodes) + assert.Less(t, cc.closure.Root.Count(), numnodes) assert.Less(t, 4096, totalentries) }) } diff --git a/go/store/prolly/doc.go b/go/store/prolly/doc.go index 504bd95785..918b7e78a8 100644 --- a/go/store/prolly/doc.go +++ b/go/store/prolly/doc.go @@ -18,7 +18,7 @@ Package prolly includes: 2. Build trees of messages using a NodeStore abstraction 2. Traverse and search NodeStore and related data structures -NodeStore is the primary interface for building/reading tree data structures: +NodeStore is the primary interface for building/reading Tree data structures: - AddressMap, ProllyTreeNode, CommitClosure are the current Node flatbuffer message types - A Node contains at least keys and values @@ -29,7 +29,7 @@ NodeStore is the primary interface for building/reading tree data structures: motivation - Leaf nodes' values can be addresses. - For example, blobs are stored in ProllyTreeNode leaves as value address. - The value address reference is the root hash of a tree stored separated. In + The value address reference is the Root hash of a Tree stored separated. In these cases, it is important to distinguish between 1) self-contained trees of a single type; and 2) the datastore as a whole, comprised of several types of trees. diff --git a/go/store/prolly/map.go b/go/store/prolly/map.go index 5a859b9464..a1bfed64f6 100644 --- a/go/store/prolly/map.go +++ b/go/store/prolly/map.go @@ -30,25 +30,23 @@ import ( ) type Map struct { - tuples orderedTree[val.Tuple, val.Tuple, val.TupleDesc] + tuples tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc] keyDesc val.TupleDesc valDesc val.TupleDesc } -type DiffFn func(context.Context, tree.Diff) error - type DiffSummary struct { Adds, Removes uint64 Changes, CellChanges uint64 NewSize, OldSize uint64 } -// NewMap creates an empty prolly tree Map +// NewMap creates an empty prolly Tree Map func NewMap(node tree.Node, ns tree.NodeStore, keyDesc, valDesc val.TupleDesc) Map { - tuples := orderedTree[val.Tuple, val.Tuple, val.TupleDesc]{ - root: node, - ns: ns, - order: keyDesc, + tuples := tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{ + Root: node, + NodeStore: ns, + Order: keyDesc, } return Map{ tuples: tuples, @@ -57,7 +55,7 @@ func NewMap(node tree.Node, ns tree.NodeStore, keyDesc, valDesc val.TupleDesc) M } } -// NewMapFromTuples creates a prolly tree Map from slice of sorted Tuples. +// NewMapFromTuples creates a prolly Tree Map from slice of sorted Tuples. func NewMapFromTuples(ctx context.Context, ns tree.NodeStore, keyDesc, valDesc val.TupleDesc, tups ...val.Tuple) (Map, error) { if len(tups)%2 != 0 { return Map{}, fmt.Errorf("tuples must be key-value pairs") @@ -102,44 +100,82 @@ func NewMapFromTupleIter(ctx context.Context, ns tree.NodeStore, keyDesc, valDes func MutateMapWithTupleIter(ctx context.Context, m Map, iter TupleIter) (Map, error) { t := m.tuples i := mutationIter{iter: iter} - s := message.NewProllyMapSerializer(m.valDesc, t.ns.Pool()) + s := message.NewProllyMapSerializer(m.valDesc, t.NodeStore.Pool()) - root, err := tree.ApplyMutations(ctx, t.ns, t.root, s, i, t.compareItems) + root, err := tree.ApplyMutations(ctx, t.NodeStore, t.Root, s, i, t.CompareItems) if err != nil { return Map{}, err } return Map{ - tuples: orderedTree[val.Tuple, val.Tuple, val.TupleDesc]{ - root: root, - ns: t.ns, - order: t.order, + tuples: tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{ + Root: root, + NodeStore: t.NodeStore, + Order: t.Order, }, keyDesc: m.keyDesc, valDesc: m.valDesc, }, nil } -func DiffMaps(ctx context.Context, from, to Map, cb DiffFn) error { - return diffOrderedTrees(ctx, from.tuples, to.tuples, cb) +func DiffMaps(ctx context.Context, from, to Map, cb tree.DiffFn) error { + return tree.DiffOrderedTrees(ctx, from.tuples, to.tuples, cb) } // RangeDiffMaps returns diffs within a Range. See Range for which diffs are // returned. -func RangeDiffMaps(ctx context.Context, from, to Map, rng Range, cb DiffFn) error { - return rangeDiffOrderedTrees(ctx, from.tuples, to.tuples, rng, cb) +func RangeDiffMaps(ctx context.Context, from, to Map, rng Range, cb tree.DiffFn) error { + cfn := func(left, right tree.Item) int { + return from.tuples.Order.Compare(val.Tuple(left), val.Tuple(right)) + } + fns, tns := from.tuples.NodeStore, to.tuples.NodeStore + + fromStart, err := tree.NewCursorFromSearchFn(ctx, fns, from.tuples.Root, rangeStartSearchFn(rng)) + if err != nil { + return err + } + toStart, err := tree.NewCursorFromSearchFn(ctx, tns, to.tuples.Root, rangeStartSearchFn(rng)) + if err != nil { + return err + } + + fromStop, err := tree.NewCursorFromSearchFn(ctx, fns, from.tuples.Root, rangeStopSearchFn(rng)) + if err != nil { + return err + } + toStop, err := tree.NewCursorFromSearchFn(ctx, tns, to.tuples.Root, rangeStopSearchFn(rng)) + if err != nil { + return err + } + + differ, err := tree.DifferFromCursors(fromStart, toStart, fromStop, toStop, cfn) + if err != nil { + return err + } + + for { + var diff tree.Diff + if diff, err = differ.Next(ctx); err != nil { + break + } + + if err = cb(ctx, diff); err != nil { + break + } + } + return err } // DiffMapsKeyRange returns diffs within a physical key range. The key range is // specified by |start| and |stop|. If |start| and/or |stop| is null, then the // range is unbounded towards that end. -func DiffMapsKeyRange(ctx context.Context, from, to Map, start, stop val.Tuple, cb DiffFn) error { - return diffKeyRangeOrderedTrees(ctx, from.tuples, to.tuples, start, stop, cb) +func DiffMapsKeyRange(ctx context.Context, from, to Map, start, stop val.Tuple, cb tree.DiffFn) error { + return tree.DiffKeyRangeOrderedTrees(ctx, from.tuples, to.tuples, start, stop, cb) } func MergeMaps(ctx context.Context, left, right, base Map, cb tree.CollisionFn) (Map, error) { serializer := message.NewProllyMapSerializer(left.valDesc, base.NodeStore().Pool()) - tuples, err := mergeOrderedTrees(ctx, left.tuples, right.tuples, base.tuples, cb, serializer, base.valDesc) + tuples, err := tree.MergeOrderedTrees(ctx, left.tuples, right.tuples, base.tuples, cb, serializer, base.valDesc) if err != nil { return Map{}, err } @@ -153,7 +189,7 @@ func MergeMaps(ctx context.Context, left, right, base Map, cb tree.CollisionFn) // NodeStore returns the map's NodeStore func (m Map) NodeStore() tree.NodeStore { - return m.tuples.ns + return m.tuples.NodeStore } // Mutate makes a MutableMap from a Map. @@ -163,21 +199,21 @@ func (m Map) Mutate() MutableMap { // Count returns the number of key-value pairs in the Map. func (m Map) Count() (int, error) { - return m.tuples.count() + return m.tuples.Count() } func (m Map) Height() (int, error) { - return m.tuples.height() + return m.tuples.Height() } // HashOf returns the Hash of this Map. func (m Map) HashOf() hash.Hash { - return m.tuples.hashOf() + return m.tuples.HashOf() } // Format returns the NomsBinFormat of this Map. func (m Map) Format() *types.NomsBinFormat { - return m.tuples.ns.Format() + return m.tuples.NodeStore.Format() } // Descriptors returns the TupleDesc's from this Map. @@ -186,47 +222,47 @@ func (m Map) Descriptors() (val.TupleDesc, val.TupleDesc) { } func (m Map) WalkAddresses(ctx context.Context, cb tree.AddressCb) error { - return m.tuples.walkAddresses(ctx, cb) + return m.tuples.WalkAddresses(ctx, cb) } func (m Map) WalkNodes(ctx context.Context, cb tree.NodeCb) error { - return m.tuples.walkNodes(ctx, cb) + return m.tuples.WalkNodes(ctx, cb) } // Get searches for the key-value pair keyed by |key| and passes the results to the callback. // If |key| is not present in the map, a nil key-value pair are passed. -func (m Map) Get(ctx context.Context, key val.Tuple, cb KeyValueFn[val.Tuple, val.Tuple]) (err error) { - return m.tuples.get(ctx, key, cb) +func (m Map) Get(ctx context.Context, key val.Tuple, cb tree.KeyValueFn[val.Tuple, val.Tuple]) (err error) { + return m.tuples.Get(ctx, key, cb) } // Has returns true is |key| is present in the Map. func (m Map) Has(ctx context.Context, key val.Tuple) (ok bool, err error) { - return m.tuples.has(ctx, key) + return m.tuples.Has(ctx, key) } func (m Map) Last(ctx context.Context) (key, value val.Tuple, err error) { - return m.tuples.last(ctx) + return m.tuples.Last(ctx) } // IterAll returns a MapIter that iterates over the entire Map. func (m Map) IterAll(ctx context.Context) (MapIter, error) { - return m.tuples.iterAll(ctx) + return m.tuples.IterAll(ctx) } // IterAllReverse returns a MapIter that iterates over the entire Map from the end to the beginning. func (m Map) IterAllReverse(ctx context.Context) (MapIter, error) { - return m.tuples.iterAllReverse(ctx) + return m.tuples.IterAllReverse(ctx) } // IterOrdinalRange returns a MapIter for the ordinal range beginning at |start| and ending before |stop|. func (m Map) IterOrdinalRange(ctx context.Context, start, stop uint64) (MapIter, error) { - return m.tuples.iterOrdinalRange(ctx, start, stop) + return m.tuples.IterOrdinalRange(ctx, start, stop) } // FetchOrdinalRange fetches all leaf Nodes for the ordinal range beginning at |start| // and ending before |stop| and returns an iterator over their Items. func (m Map) FetchOrdinalRange(ctx context.Context, start, stop uint64) (MapIter, error) { - return m.tuples.fetchOrdinalRange(ctx, start, stop) + return m.tuples.FetchOrdinalRange(ctx, start, stop) } // IterRange returns a mutableMapIter that iterates over a Range. @@ -235,7 +271,7 @@ func (m Map) IterRange(ctx context.Context, rng Range) (MapIter, error) { return m.pointLookupFromRange(ctx, rng) } - iter, err := treeIterFromRange(ctx, m.tuples.root, m.tuples.ns, rng) + iter, err := treeIterFromRange(ctx, m.tuples.Root, m.tuples.NodeStore, rng) if err != nil { return nil, err } @@ -246,28 +282,28 @@ func (m Map) IterRange(ctx context.Context, rng Range) (MapIter, error) { // |stop|. If |startInclusive| and/or |stop| is nil, the range will be open // towards that end. func (m Map) IterKeyRange(ctx context.Context, start, stop val.Tuple) (MapIter, error) { - return m.tuples.iterKeyRange(ctx, start, stop) + return m.tuples.IterKeyRange(ctx, start, stop) } // GetOrdinalForKey returns the smallest ordinal position at which the key >= // |query|. func (m Map) GetOrdinalForKey(ctx context.Context, query val.Tuple) (uint64, error) { - return m.tuples.getOrdinalForKey(ctx, query) + return m.tuples.GetOrdinalForKey(ctx, query) } // GetKeyRangeCardinality returns the number of key-value tuples between |start| // and |stopExclusive|. If |start| and/or |stop| is null that end is unbounded. func (m Map) GetKeyRangeCardinality(ctx context.Context, startInclusive val.Tuple, endExclusive val.Tuple) (uint64, error) { - return m.tuples.getKeyRangeCardinality(ctx, startInclusive, endExclusive) + return m.tuples.GetKeyRangeCardinality(ctx, startInclusive, endExclusive) } func (m Map) Node() tree.Node { - return m.tuples.root + return m.tuples.Root } -// Pool returns the pool.BuffPool of the underlying tuples' tree.NodeStore +// Pool returns the pool.BuffPool of the underlying tuples' Tree.NodeStore func (m Map) Pool() pool.BuffPool { - return m.tuples.ns.Pool() + return m.tuples.NodeStore.Pool() } func (m Map) CompareItems(left, right tree.Item) int { @@ -275,7 +311,7 @@ func (m Map) CompareItems(left, right tree.Item) int { } func (m Map) pointLookupFromRange(ctx context.Context, rng Range) (*pointLookup, error) { - cur, err := tree.NewCursorFromSearchFn(ctx, m.tuples.ns, m.tuples.root, rangeStartSearchFn(rng)) + cur, err := tree.NewCursorFromSearchFn(ctx, m.tuples.NodeStore, m.tuples.Root, rangeStartSearchFn(rng)) if err != nil { return nil, err } @@ -299,7 +335,7 @@ func treeIterFromRange( root tree.Node, ns tree.NodeStore, rng Range, -) (*orderedTreeIter[val.Tuple, val.Tuple], error) { +) (*tree.OrderedTreeIter[val.Tuple, val.Tuple], error) { var ( err error start *tree.Cursor @@ -316,15 +352,7 @@ func treeIterFromRange( return nil, err } - stopF := func(curr *tree.Cursor) bool { - return curr.Compare(stop) >= 0 - } - - if stopF(start) { - start = nil // empty range - } - - return &orderedTreeIter[val.Tuple, val.Tuple]{curr: start, stop: stopF, step: start.Advance}, nil + return tree.OrderedTreeIterFromCursors[val.Tuple, val.Tuple](start, stop), nil } func NewPointLookup(k, v val.Tuple) *pointLookup { @@ -390,7 +418,7 @@ func ConvertToSecondaryKeylessIndex(m Map) Map { newTypes[len(newTypes)-1] = val.Type{Enc: val.Hash128Enc} newKeyDesc := val.NewTupleDescriptorWithComparator(keyDesc.Comparator(), newTypes...) newTuples := m.tuples - newTuples.order = newKeyDesc + newTuples.Order = newKeyDesc return Map{ tuples: newTuples, keyDesc: newKeyDesc, diff --git a/go/store/prolly/map_merge_test.go b/go/store/prolly/map_merge_test.go index e8b23668fb..5f8ea5a439 100644 --- a/go/store/prolly/map_merge_test.go +++ b/go/store/prolly/map_merge_test.go @@ -150,7 +150,7 @@ func testTupleMergeFn(t *testing.T, kd, vd val.TupleDesc, sz int, ns tree.NodeSt tuples[i], tuples[j] = tuples[j], tuples[i] }) - // make overlapping edits + // make overlapping Edits left := makeUpdatesToTuples(kd, vd, tuples[:mutSz]...) right := makeUpdatesToTuples(kd, vd, tuples[:mutSz]...) diff --git a/go/store/prolly/mutable_map.go b/go/store/prolly/mutable_map.go index 9bf5b98ebb..baa1b16a48 100644 --- a/go/store/prolly/mutable_map.go +++ b/go/store/prolly/mutable_map.go @@ -34,7 +34,7 @@ const ( // However, once ApplyPending() is called, those mutations are moved to the applied tier, and the pending tier is // cleared. type MutableMap struct { - tuples orderedMap[val.Tuple, val.Tuple, val.TupleDesc] + tuples tree.MutableMap[val.Tuple, val.Tuple, val.TupleDesc] keyDesc val.TupleDesc valDesc val.TupleDesc } @@ -42,7 +42,7 @@ type MutableMap struct { // newMutableMap returns a new MutableMap. func newMutableMap(m Map) MutableMap { return MutableMap{ - tuples: m.tuples.mutate(), + tuples: m.tuples.Mutate(), keyDesc: m.keyDesc, valDesc: m.valDesc, } @@ -59,17 +59,17 @@ func (mut MutableMap) flushWithSerializer(ctx context.Context, s message.Seriali return Map{}, err } - tr := mut.tuples.tree - root, err := tree.ApplyMutations(ctx, tr.ns, tr.root, s, mut.tuples.mutations(), tr.compareItems) + tr := mut.tuples.StaticMap + root, err := tree.ApplyMutations(ctx, tr.NodeStore, tr.Root, s, mut.tuples.Mutations(), tr.CompareItems) if err != nil { return Map{}, err } return Map{ - tuples: orderedTree[val.Tuple, val.Tuple, val.TupleDesc]{ - root: root, - ns: tr.ns, - order: tr.order, + tuples: tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{ + Root: root, + NodeStore: tr.NodeStore, + Order: tr.Order, }, keyDesc: mut.keyDesc, valDesc: mut.valDesc, @@ -78,39 +78,39 @@ func (mut MutableMap) flushWithSerializer(ctx context.Context, s message.Seriali // NodeStore returns the map's NodeStore func (mut MutableMap) NodeStore() tree.NodeStore { - return mut.tuples.tree.ns + return mut.tuples.StaticMap.NodeStore } // Put adds the Tuple pair |key|, |value| to the MutableMap. func (mut MutableMap) Put(ctx context.Context, key, value val.Tuple) error { - return mut.tuples.put(ctx, key, value) + return mut.tuples.Put(ctx, key, value) } // Delete deletes the pair keyed by |key| from the MutableMap. func (mut MutableMap) Delete(ctx context.Context, key val.Tuple) error { - return mut.tuples.delete(ctx, key) + return mut.tuples.Delete(ctx, key) } // Get fetches the Tuple pair keyed by |key|, if it exists, and passes it to |cb|. // If the |key| is not present in the MutableMap, a nil Tuple pair is passed to |cb|. -func (mut MutableMap) Get(ctx context.Context, key val.Tuple, cb KeyValueFn[val.Tuple, val.Tuple]) (err error) { - return mut.tuples.get(ctx, key, cb) +func (mut MutableMap) Get(ctx context.Context, key val.Tuple, cb tree.KeyValueFn[val.Tuple, val.Tuple]) (err error) { + return mut.tuples.Get(ctx, key, cb) } // Has returns true if |key| is present in the MutableMap. func (mut MutableMap) Has(ctx context.Context, key val.Tuple) (ok bool, err error) { - return mut.tuples.has(ctx, key) + return mut.tuples.Has(ctx, key) } // ApplyPending moves all pending mutations to the underlying map. func (mut *MutableMap) ApplyPending(ctx context.Context) error { - mut.tuples.edits.Checkpoint() + mut.tuples.Edits.Checkpoint() return nil } // DiscardPending removes all pending mutations. func (mut *MutableMap) DiscardPending(context.Context) { - mut.tuples.edits.Revert() + mut.tuples.Edits.Revert() } // IterAll returns a mutableMapIter that iterates over the entire MutableMap. @@ -121,11 +121,11 @@ func (mut MutableMap) IterAll(ctx context.Context) (MapIter, error) { // IterRange returns a MapIter that iterates over a Range. func (mut MutableMap) IterRange(ctx context.Context, rng Range) (MapIter, error) { - treeIter, err := treeIterFromRange(ctx, mut.tuples.tree.root, mut.tuples.tree.ns, rng) + treeIter, err := treeIterFromRange(ctx, mut.tuples.StaticMap.Root, mut.tuples.StaticMap.NodeStore, rng) if err != nil { return nil, err } - memIter := memIterFromRange(mut.tuples.edits, rng) + memIter := memIterFromRange(mut.tuples.Edits, rng) iter := &mutableMapIter[val.Tuple, val.Tuple, val.TupleDesc]{ memory: memIter, @@ -139,7 +139,7 @@ func (mut MutableMap) IterRange(ctx context.Context, rng Range) (MapIter, error) // HasEdits returns true when the MutableMap has performed at least one Put or Delete operation. This does not indicate // whether the materialized map contains different values to the contained unedited map. func (mut MutableMap) HasEdits() bool { - return mut.tuples.edits.Count() > 0 + return mut.tuples.Edits.Count() > 0 } // Descriptors returns the key and value val.TupleDesc. @@ -150,8 +150,8 @@ func (mut MutableMap) Descriptors() (val.TupleDesc, val.TupleDesc) { func debugFormat(ctx context.Context, m MutableMap) (string, error) { kd, vd := m.keyDesc, m.valDesc - editIter := m.tuples.edits.IterAtStart() - tupleIter, err := m.tuples.tree.iterAll(ctx) + editIter := m.tuples.Edits.IterAtStart() + tupleIter, err := m.tuples.StaticMap.IterAll(ctx) if err != nil { return "", err } @@ -159,7 +159,7 @@ func debugFormat(ctx context.Context, m MutableMap) (string, error) { var sb strings.Builder sb.WriteString("Mutable Map {\n") - c := strconv.Itoa(m.tuples.edits.Count()) + c := strconv.Itoa(m.tuples.Edits.Count()) sb.WriteString("\tedits (count: " + c + ") {\n") for { k, v := editIter.Current() @@ -175,13 +175,13 @@ func debugFormat(ctx context.Context, m MutableMap) (string, error) { } sb.WriteString("\t},\n") - ci, err := m.tuples.tree.count() + ci, err := m.tuples.StaticMap.Count() if err != nil { return "", err } c = strconv.Itoa(ci) - sb.WriteString("\ttree (count: " + c + ") {\n") + sb.WriteString("\tTree (count: " + c + ") {\n") for { k, v, err := tupleIter.Next(ctx) if err == io.EOF { diff --git a/go/store/prolly/mutable_map_write_test.go b/go/store/prolly/mutable_map_write_test.go index 3ed44842e2..ebbd3de9b4 100644 --- a/go/store/prolly/mutable_map_write_test.go +++ b/go/store/prolly/mutable_map_write_test.go @@ -545,10 +545,10 @@ func makeDelete(k int64) (key val.Tuple) { func materializeMap(t *testing.T, mut MutableMap) Map { ctx := context.Background() - // ensure edits are provided in order + // ensure Edits are provided in Order err := mut.ApplyPending(ctx) require.NoError(t, err) - iter := mut.tuples.mutations() + iter := mut.tuples.Mutations() prev, _ := iter.NextMutation(ctx) require.NotNil(t, prev) for { diff --git a/go/store/prolly/ordered_tree.go b/go/store/prolly/ordered_tree.go deleted file mode 100644 index b2bee074f1..0000000000 --- a/go/store/prolly/ordered_tree.go +++ /dev/null @@ -1,604 +0,0 @@ -// Copyright 2022 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package prolly - -import ( - "context" - "fmt" - "io" - - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/prolly/message" - "github.com/dolthub/dolt/go/store/prolly/tree" - "github.com/dolthub/dolt/go/store/skip" - "github.com/dolthub/dolt/go/store/val" -) - -type KeyValueFn[K, V ~[]byte] func(key K, value V) error - -type kvIter[K, V ~[]byte] interface { - Next(ctx context.Context) (K, V, error) -} - -type ordering[K ~[]byte] interface { - Compare(left, right K) int -} - -// orderedTree is a static prolly tree with ordered elements. -type orderedTree[K, V ~[]byte, O ordering[K]] struct { - root tree.Node - ns tree.NodeStore - order O -} - -func diffOrderedTrees[K, V ~[]byte, O ordering[K]]( - ctx context.Context, - from, to orderedTree[K, V, O], - cb DiffFn, -) error { - cfn := func(left, right tree.Item) int { - return from.order.Compare(K(left), K(right)) - } - differ, err := tree.DifferFromRoots(ctx, from.ns, to.ns, from.root, to.root, cfn) - if err != nil { - return err - } - - for { - var diff tree.Diff - if diff, err = differ.Next(ctx); err != nil { - break - } - - if err = cb(ctx, diff); err != nil { - break - } - } - return err -} - -func rangeDiffOrderedTrees[K, V ~[]byte, O ordering[K]]( - ctx context.Context, - from, to orderedTree[K, V, O], - rng Range, - cb DiffFn, -) error { - cfn := func(left, right tree.Item) int { - return from.order.Compare(K(left), K(right)) - } - - fromStart, err := tree.NewCursorFromSearchFn(ctx, from.ns, from.root, rangeStartSearchFn(rng)) - if err != nil { - return err - } - toStart, err := tree.NewCursorFromSearchFn(ctx, to.ns, to.root, rangeStartSearchFn(rng)) - if err != nil { - return err - } - - fromStop, err := tree.NewCursorFromSearchFn(ctx, from.ns, from.root, rangeStopSearchFn(rng)) - if err != nil { - return err - } - toStop, err := tree.NewCursorFromSearchFn(ctx, to.ns, to.root, rangeStopSearchFn(rng)) - if err != nil { - return err - } - - differ, err := tree.DifferFromCursors(fromStart, toStart, fromStop, toStop, cfn) - if err != nil { - return err - } - - for { - var diff tree.Diff - if diff, err = differ.Next(ctx); err != nil { - break - } - - if err = cb(ctx, diff); err != nil { - break - } - } - return err -} - -func diffKeyRangeOrderedTrees[K, V ~[]byte, O ordering[K]]( - ctx context.Context, - from, to orderedTree[K, V, O], - start, stop K, - cb DiffFn, -) error { - var fromStart, fromStop, toStart, toStop *tree.Cursor - var err error - - if len(start) == 0 { - fromStart, err = tree.NewCursorAtStart(ctx, from.ns, from.root) - if err != nil { - return err - } - - toStart, err = tree.NewCursorAtStart(ctx, to.ns, to.root) - if err != nil { - return err - } - } else { - fromStart, err = tree.NewCursorAtItem(ctx, from.ns, from.root, tree.Item(start), from.searchNode) - if err != nil { - return err - } - - toStart, err = tree.NewCursorAtItem(ctx, to.ns, to.root, tree.Item(start), to.searchNode) - if err != nil { - return err - } - } - - if len(stop) == 0 { - fromStop, err = tree.NewCursorPastEnd(ctx, from.ns, from.root) - if err != nil { - return err - } - - toStop, err = tree.NewCursorPastEnd(ctx, to.ns, to.root) - if err != nil { - return err - } - } else { - fromStop, err = tree.NewCursorAtItem(ctx, from.ns, from.root, tree.Item(stop), from.searchNode) - if err != nil { - return err - } - - toStop, err = tree.NewCursorAtItem(ctx, to.ns, to.root, tree.Item(stop), to.searchNode) - if err != nil { - return err - } - } - - cfn := func(left, right tree.Item) int { - return from.order.Compare(K(left), K(right)) - } - - differ, err := tree.DifferFromCursors(fromStart, toStart, fromStop, toStop, cfn) - if err != nil { - return err - } - - for { - var diff tree.Diff - if diff, err = differ.Next(ctx); err != nil { - break - } - - if err = cb(ctx, diff); err != nil { - break - } - } - return err -} - -func mergeOrderedTrees[K, V ~[]byte, O ordering[K], S message.Serializer]( - ctx context.Context, - l, r, base orderedTree[K, V, O], - cb tree.CollisionFn, - serializer S, - valDesc val.TupleDesc, -) (orderedTree[K, V, O], error) { - cfn := func(left, right tree.Item) int { - return base.order.Compare(K(left), K(right)) - } - root, err := tree.ThreeWayMerge(ctx, base.ns, l.root, r.root, base.root, cfn, cb, serializer, valDesc) - if err != nil { - return orderedTree[K, V, O]{}, err - } - - return orderedTree[K, V, O]{ - root: root, - ns: base.ns, - order: base.order, - }, nil -} - -func (t orderedTree[K, V, O]) count() (int, error) { - return t.root.TreeCount() -} - -func (t orderedTree[K, V, O]) height() (int, error) { - l, err := t.root.Level() - return l + 1, err -} - -func (t orderedTree[K, V, O]) hashOf() hash.Hash { - return t.root.HashOf() -} - -func (t orderedTree[K, V, O]) mutate() orderedMap[K, V, O] { - return orderedMap[K, V, O]{ - edits: skip.NewSkipList(func(left, right []byte) int { - return t.order.Compare(left, right) - }), - tree: t, - } -} - -func (t orderedTree[K, V, O]) walkAddresses(ctx context.Context, cb tree.AddressCb) error { - return tree.WalkAddresses(ctx, t.root, t.ns, cb) -} - -func (t orderedTree[K, V, O]) walkNodes(ctx context.Context, cb tree.NodeCb) error { - return tree.WalkNodes(ctx, t.root, t.ns, cb) -} - -func (t orderedTree[K, V, O]) get(ctx context.Context, query K, cb KeyValueFn[K, V]) (err error) { - cur, err := tree.NewLeafCursorAtItem(ctx, t.ns, t.root, tree.Item(query), t.searchNode) - if err != nil { - return err - } - - var key K - var value V - - if cur.Valid() { - key = K(cur.CurrentKey()) - if t.order.Compare(query, key) == 0 { - value = V(cur.CurrentValue()) - } else { - key = nil - } - } - return cb(key, value) -} - -func (t orderedTree[K, V, O]) has(ctx context.Context, query K) (ok bool, err error) { - cur, err := tree.NewLeafCursorAtItem(ctx, t.ns, t.root, tree.Item(query), t.searchNode) - if err != nil { - return false, err - } - - if cur.Valid() { - ok = t.order.Compare(query, K(cur.CurrentKey())) == 0 - } - - return -} - -func (t orderedTree[K, V, O]) last(ctx context.Context) (key K, value V, err error) { - cur, err := tree.NewCursorAtEnd(ctx, t.ns, t.root) - if err != nil { - return nil, nil, err - } - - if cur.Valid() { - key, value = K(cur.CurrentKey()), V(cur.CurrentValue()) - } - return -} - -func (t orderedTree[K, V, O]) iterAll(ctx context.Context) (*orderedTreeIter[K, V], error) { - c, err := tree.NewCursorAtStart(ctx, t.ns, t.root) - if err != nil { - return nil, err - } - - s, err := tree.NewCursorPastEnd(ctx, t.ns, t.root) - if err != nil { - return nil, err - } - - stop := func(curr *tree.Cursor) bool { - return curr.Compare(s) >= 0 - } - - if stop(c) { - // empty range - return &orderedTreeIter[K, V]{curr: nil}, nil - } - - return &orderedTreeIter[K, V]{curr: c, stop: stop, step: c.Advance}, nil -} - -func (t orderedTree[K, V, O]) iterAllReverse(ctx context.Context) (*orderedTreeIter[K, V], error) { - beginning, err := tree.NewCursorAtStart(ctx, t.ns, t.root) - if err != nil { - return nil, err - } - err = beginning.Retreat(ctx) - if err != nil { - return nil, err - } - - end, err := tree.NewCursorAtEnd(ctx, t.ns, t.root) - if err != nil { - return nil, err - } - - stop := func(curr *tree.Cursor) bool { - return curr.Compare(beginning) <= 0 - } - - if stop(end) { - // empty range - return &orderedTreeIter[K, V]{curr: nil}, nil - } - - return &orderedTreeIter[K, V]{curr: end, stop: stop, step: end.Retreat}, nil -} - -func (t orderedTree[K, V, O]) iterOrdinalRange(ctx context.Context, start, stop uint64) (*orderedTreeIter[K, V], error) { - if stop == start { - return &orderedTreeIter[K, V]{curr: nil}, nil - } - if stop < start { - return nil, fmt.Errorf("invalid ordinal bounds (%d, %d)", start, stop) - } else { - c, err := t.count() - if err != nil { - return nil, err - } - if stop > uint64(c) { - return nil, fmt.Errorf("stop index (%d) out of bounds", stop) - } - } - - lo, err := tree.NewCursorAtOrdinal(ctx, t.ns, t.root, start) - if err != nil { - return nil, err - } - - hi, err := tree.NewCursorAtOrdinal(ctx, t.ns, t.root, stop) - if err != nil { - return nil, err - } - - stopF := func(curr *tree.Cursor) bool { - return curr.Compare(hi) >= 0 - } - - return &orderedTreeIter[K, V]{curr: lo, stop: stopF, step: lo.Advance}, nil -} - -func (t orderedTree[K, V, O]) fetchOrdinalRange(ctx context.Context, start, stop uint64) (*orderedLeafSpanIter[K, V], error) { - if stop == start { - return &orderedLeafSpanIter[K, V]{}, nil - } - if stop < start { - return nil, fmt.Errorf("invalid ordinal bounds (%d, %d)", start, stop) - } else { - c, err := t.count() - if err != nil { - return nil, err - } else if stop > uint64(c) { - return nil, fmt.Errorf("stop index (%d) out of bounds", stop) - } - } - - span, err := tree.FetchLeafNodeSpan(ctx, t.ns, t.root, start, stop) - if err != nil { - return nil, err - } - - nd, leaves := span.Leaves[0], span.Leaves[1:] - c, s := span.LocalStart, nd.Count() - if len(leaves) == 0 { - s = span.LocalStop // one leaf span - } - - return &orderedLeafSpanIter[K, V]{ - nd: nd, - curr: c, - stop: s, - leaves: leaves, - final: span.LocalStop, - }, nil -} - -func (t orderedTree[K, V, O]) iterKeyRange(ctx context.Context, start, stop K) (*orderedTreeIter[K, V], error) { - lo, hi, err := t.getKeyRangeCursors(ctx, start, stop) - if err != nil { - return nil, err - } - - stopF := func(curr *tree.Cursor) bool { - return curr.Compare(hi) >= 0 - } - - if stopF(lo) { - return &orderedTreeIter[K, V]{curr: nil}, nil - } - - return &orderedTreeIter[K, V]{curr: lo, stop: stopF, step: lo.Advance}, nil -} - -func (t orderedTree[K, V, O]) getKeyRangeCardinality(ctx context.Context, start, stop K) (uint64, error) { - lo, hi, err := t.getKeyRangeCursors(ctx, start, stop) - if err != nil { - return 0, err - } - - startOrd, err := tree.GetOrdinalOfCursor(lo) - if err != nil { - return 0, err - } - - endOrd, err := tree.GetOrdinalOfCursor(hi) - if err != nil { - return 0, err - } - - if startOrd > endOrd { - return 0, nil - } - - return endOrd - startOrd, nil -} - -func (t orderedTree[K, V, O]) getKeyRangeCursors(ctx context.Context, startInclusive, stopExclusive K) (lo, hi *tree.Cursor, err error) { - if len(startInclusive) == 0 { - lo, err = tree.NewCursorAtStart(ctx, t.ns, t.root) - if err != nil { - return nil, nil, err - } - } else { - lo, err = tree.NewCursorAtItem(ctx, t.ns, t.root, tree.Item(startInclusive), t.searchNode) - if err != nil { - return nil, nil, err - } - } - - if len(stopExclusive) == 0 { - hi, err = tree.NewCursorPastEnd(ctx, t.ns, t.root) - if err != nil { - return nil, nil, err - } - } else { - hi, err = tree.NewCursorAtItem(ctx, t.ns, t.root, tree.Item(stopExclusive), t.searchNode) - if err != nil { - return nil, nil, err - } - } - - return -} - -// searchNode returns the smallest index where nd[i] >= query -// Adapted from search.Sort to inline comparison. -func (t orderedTree[K, V, O]) searchNode(query tree.Item, nd tree.Node) int { - n := int(nd.Count()) - // Define f(-1) == false and f(n) == true. - // Invariant: f(i-1) == false, f(j) == true. - i, j := 0, n - for i < j { - h := int(uint(i+j) >> 1) // avoid overflow when computing h - less := t.order.Compare(K(query), K(nd.GetKey(h))) <= 0 - // i ≤ h < j - if !less { - i = h + 1 // preserves f(i-1) == false - } else { - j = h // preserves f(j) == true - } - } - // i == j, f(i-1) == false, and - // f(j) (= f(i)) == true => answer is i. - return i -} - -func (t orderedTree[K, V, O]) compareItems(left, right tree.Item) int { - return t.order.Compare(K(left), K(right)) -} - -// getOrdinalForKey returns the smallest ordinal position at which the key >= |query|. -func (t orderedTree[K, V, O]) getOrdinalForKey(ctx context.Context, query K) (uint64, error) { - cur, err := tree.NewCursorAtItem(ctx, t.ns, t.root, tree.Item(query), t.searchNode) - if err != nil { - return 0, err - } - - return tree.GetOrdinalOfCursor(cur) -} - -var _ tree.ItemSearchFn = orderedTree[tree.Item, tree.Item, ordering[tree.Item]]{}.searchNode -var _ tree.CompareFn = orderedTree[tree.Item, tree.Item, ordering[tree.Item]]{}.compareItems - -type orderedTreeIter[K, V ~[]byte] struct { - // current tuple location - curr *tree.Cursor - - // the function called to moved |curr| forward in the direction of iteration. - step func(context.Context) error - // should return |true| if the passed in cursor is past the iteration's stopping point. - stop func(*tree.Cursor) bool -} - -func (it *orderedTreeIter[K, V]) Next(ctx context.Context) (key K, value V, err error) { - if it.curr == nil { - return nil, nil, io.EOF - } - - k, v := tree.CurrentCursorItems(it.curr) - key, value = K(k), V(v) - - err = it.step(ctx) - if err != nil { - return nil, nil, err - } - if it.stop(it.curr) { - // past the end of the range - it.curr = nil - } - - return -} - -func (it *orderedTreeIter[K, V]) current() (key K, value V) { - // |it.curr| is set to nil when its range is exhausted - if it.curr != nil && it.curr.Valid() { - k, v := tree.CurrentCursorItems(it.curr) - key, value = K(k), V(v) - } - return -} - -func (it *orderedTreeIter[K, V]) iterate(ctx context.Context) (err error) { - err = it.step(ctx) - if err != nil { - return err - } - - if it.stop(it.curr) { - // past the end of the range - it.curr = nil - } - - return -} - -type orderedLeafSpanIter[K, V ~[]byte] struct { - // in-progress node - nd tree.Node - // current index, - curr int - // last index for |nd| - stop int - // remaining leaves - leaves []tree.Node - // stop index in last leaf node - final int -} - -func (s *orderedLeafSpanIter[K, V]) Next(ctx context.Context) (key K, value V, err error) { - if s.curr >= s.stop { - // |s.nd| exhausted - if len(s.leaves) == 0 { - // span exhausted - return nil, nil, io.EOF - } - - s.nd = s.leaves[0] - s.curr = 0 - s.stop = s.nd.Count() - - s.leaves = s.leaves[1:] - if len(s.leaves) == 0 { - // |s.nd| is the last leaf - s.stop = s.final - } - } - - key = K(s.nd.GetKey(s.curr)) - value = V(s.nd.GetValue(s.curr)) - s.curr++ - return -} diff --git a/go/store/prolly/range.go b/go/store/prolly/range.go index 6361ac6108..3dda67349a 100644 --- a/go/store/prolly/range.go +++ b/go/store/prolly/range.go @@ -42,7 +42,7 @@ func PrefixRange(prefix val.Tuple, desc val.TupleDesc) Range { return closedRange(prefix, prefix, desc) } -// Range defines a subset of a prolly tree Tuple index. +// Range defines a subset of a prolly Tree Tuple index. // // Range can be used either to physically partition an index or // to logically filter an index. diff --git a/go/store/prolly/range_iter.go b/go/store/prolly/range_iter.go index 6aff9b3fb1..c4d3affd7f 100644 --- a/go/store/prolly/range_iter.go +++ b/go/store/prolly/range_iter.go @@ -18,37 +18,37 @@ import ( "context" "io" + "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/skip" - "github.com/dolthub/dolt/go/store/val" ) -type MapIter kvIter[val.Tuple, val.Tuple] +type MapIter tree.KvIter[val.Tuple, val.Tuple] var _ MapIter = &mutableMapIter[val.Tuple, val.Tuple, val.TupleDesc]{} -var _ MapIter = &orderedTreeIter[val.Tuple, val.Tuple]{} +var _ MapIter = &tree.OrderedTreeIter[val.Tuple, val.Tuple]{} type rangeIter[K, V ~[]byte] interface { - iterate(ctx context.Context) error - current() (key K, value V) + Iterate(ctx context.Context) error + Current() (key K, value V) } -var _ rangeIter[val.Tuple, val.Tuple] = &orderedTreeIter[val.Tuple, val.Tuple]{} +var _ rangeIter[val.Tuple, val.Tuple] = &tree.OrderedTreeIter[val.Tuple, val.Tuple]{} var _ rangeIter[val.Tuple, val.Tuple] = &memRangeIter{} var _ rangeIter[val.Tuple, val.Tuple] = emptyIter{} // mutableMapIter iterates over a Range of Tuples. -type mutableMapIter[K, V ~[]byte, O ordering[K]] struct { +type mutableMapIter[K, V ~[]byte, O tree.Ordering[K]] struct { memory rangeIter[K, V] - prolly *orderedTreeIter[K, V] + prolly *tree.OrderedTreeIter[K, V] order O } // Next returns the next pair of Tuples in the Range, or io.EOF if the iter is done. func (it mutableMapIter[K, V, O]) Next(ctx context.Context) (key K, value V, err error) { for { - mk, mv := it.memory.current() - pk, pv := it.prolly.current() + mk, mv := it.memory.Current() + pk, pv := it.prolly.Current() if mk == nil && pk == nil { // range is exhausted @@ -59,23 +59,23 @@ func (it mutableMapIter[K, V, O]) Next(ctx context.Context) (key K, value V, err switch { case cmp < 0: key, value = pk, pv - if err = it.prolly.iterate(ctx); err != nil { + if err = it.prolly.Iterate(ctx); err != nil { return nil, nil, err } case cmp > 0: key, value = mk, mv - if err = it.memory.iterate(ctx); err != nil { + if err = it.memory.Iterate(ctx); err != nil { return nil, nil, err } case cmp == 0: // |it.memory| wins ties key, value = mk, mv - if err = it.memory.iterate(ctx); err != nil { + if err = it.memory.Iterate(ctx); err != nil { return nil, nil, err } - if err = it.prolly.iterate(ctx); err != nil { + if err = it.prolly.Iterate(ctx); err != nil { return nil, nil, err } } @@ -90,10 +90,10 @@ func (it mutableMapIter[K, V, O]) Next(ctx context.Context) (key K, value V, err func (it mutableMapIter[K, V, O]) currentKeys() (memKey, proKey K) { if it.memory != nil { - memKey, _ = it.memory.current() + memKey, _ = it.memory.Current() } if it.prolly != nil { - proKey, _ = it.prolly.current() + proKey, _ = it.prolly.Current() } return } @@ -155,9 +155,9 @@ type memRangeIter struct { rng Range } -// current returns the iter's current Tuple pair, or nil Tuples +// Current returns the iter's current Tuple pair, or nil Tuples // if the iter has exhausted its range, it will -func (it *memRangeIter) current() (key, value val.Tuple) { +func (it *memRangeIter) Current() (key, value val.Tuple) { // |it.iter| is set to nil when its range is exhausted if it.iter != nil { key, value = it.iter.Current() @@ -165,12 +165,12 @@ func (it *memRangeIter) current() (key, value val.Tuple) { return } -// iterate progresses the iter inside its range. -func (it *memRangeIter) iterate(context.Context) (err error) { +// Iterate progresses the iter inside its range. +func (it *memRangeIter) Iterate(context.Context) (err error) { for { it.iter.Advance() - k, _ := it.current() + k, _ := it.Current() if k == nil || !it.rng.belowStop(k) { it.iter = nil // range exhausted } @@ -185,9 +185,9 @@ func (e emptyIter) Next(context.Context) (val.Tuple, val.Tuple, error) { return nil, nil, io.EOF } -func (e emptyIter) iterate(ctx context.Context) (err error) { return } +func (e emptyIter) Iterate(ctx context.Context) (err error) { return } -func (e emptyIter) current() (key, value val.Tuple) { return } +func (e emptyIter) Current() (key, value val.Tuple) { return } type filteredIter struct { iter MapIter diff --git a/go/store/prolly/tree/diff.go b/go/store/prolly/tree/diff.go index e5ade5965a..2be18622dc 100644 --- a/go/store/prolly/tree/diff.go +++ b/go/store/prolly/tree/diff.go @@ -34,6 +34,8 @@ type Diff struct { Type DiffType } +type DiffFn func(context.Context, Diff) error + type Differ struct { from, to *Cursor fromStop, toStop *Cursor diff --git a/go/store/prolly/tree/map.go b/go/store/prolly/tree/map.go new file mode 100644 index 0000000000..8c5dd074eb --- /dev/null +++ b/go/store/prolly/tree/map.go @@ -0,0 +1,569 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tree + +import ( + "context" + "fmt" + "io" + + "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/prolly/message" + "github.com/dolthub/dolt/go/store/skip" + "github.com/dolthub/dolt/go/store/val" +) + +type KeyValueFn[K, V ~[]byte] func(key K, value V) error + +type KvIter[K, V ~[]byte] interface { + Next(ctx context.Context) (K, V, error) +} + +type Ordering[K ~[]byte] interface { + Compare(left, right K) int +} + +// StaticMap is a static prolly Tree with ordered elements. +type StaticMap[K, V ~[]byte, O Ordering[K]] struct { + Root Node + NodeStore NodeStore + Order O +} + +func DiffOrderedTrees[K, V ~[]byte, O Ordering[K]]( + ctx context.Context, + from, to StaticMap[K, V, O], + cb DiffFn, +) error { + cfn := func(left, right Item) int { + return from.Order.Compare(K(left), K(right)) + } + differ, err := DifferFromRoots(ctx, from.NodeStore, to.NodeStore, from.Root, to.Root, cfn) + if err != nil { + return err + } + + for { + var diff Diff + if diff, err = differ.Next(ctx); err != nil { + break + } + + if err = cb(ctx, diff); err != nil { + break + } + } + return err +} + +func DiffKeyRangeOrderedTrees[K, V ~[]byte, O Ordering[K]]( + ctx context.Context, + from, to StaticMap[K, V, O], + start, stop K, + cb DiffFn, +) error { + var fromStart, fromStop, toStart, toStop *Cursor + var err error + + if len(start) == 0 { + fromStart, err = NewCursorAtStart(ctx, from.NodeStore, from.Root) + if err != nil { + return err + } + + toStart, err = NewCursorAtStart(ctx, to.NodeStore, to.Root) + if err != nil { + return err + } + } else { + fromStart, err = NewCursorAtItem(ctx, from.NodeStore, from.Root, Item(start), from.searchNode) + if err != nil { + return err + } + + toStart, err = NewCursorAtItem(ctx, to.NodeStore, to.Root, Item(start), to.searchNode) + if err != nil { + return err + } + } + + if len(stop) == 0 { + fromStop, err = NewCursorPastEnd(ctx, from.NodeStore, from.Root) + if err != nil { + return err + } + + toStop, err = NewCursorPastEnd(ctx, to.NodeStore, to.Root) + if err != nil { + return err + } + } else { + fromStop, err = NewCursorAtItem(ctx, from.NodeStore, from.Root, Item(stop), from.searchNode) + if err != nil { + return err + } + + toStop, err = NewCursorAtItem(ctx, to.NodeStore, to.Root, Item(stop), to.searchNode) + if err != nil { + return err + } + } + + cfn := func(left, right Item) int { + return from.Order.Compare(K(left), K(right)) + } + + differ, err := DifferFromCursors(fromStart, toStart, fromStop, toStop, cfn) + if err != nil { + return err + } + + for { + var diff Diff + if diff, err = differ.Next(ctx); err != nil { + break + } + + if err = cb(ctx, diff); err != nil { + break + } + } + return err +} + +func MergeOrderedTrees[K, V ~[]byte, O Ordering[K], S message.Serializer]( + ctx context.Context, + l, r, base StaticMap[K, V, O], + cb CollisionFn, + serializer S, + valDesc val.TupleDesc, +) (StaticMap[K, V, O], error) { + cfn := func(left, right Item) int { + return base.Order.Compare(K(left), K(right)) + } + root, err := ThreeWayMerge(ctx, base.NodeStore, l.Root, r.Root, base.Root, cfn, cb, serializer, valDesc) + if err != nil { + return StaticMap[K, V, O]{}, err + } + + return StaticMap[K, V, O]{ + Root: root, + NodeStore: base.NodeStore, + Order: base.Order, + }, nil +} + +func (t StaticMap[K, V, O]) Count() (int, error) { + return t.Root.TreeCount() +} + +func (t StaticMap[K, V, O]) Height() (int, error) { + l, err := t.Root.Level() + return l + 1, err +} + +func (t StaticMap[K, V, O]) HashOf() hash.Hash { + return t.Root.HashOf() +} + +func (t StaticMap[K, V, O]) Mutate() MutableMap[K, V, O] { + return MutableMap[K, V, O]{ + Edits: skip.NewSkipList(func(left, right []byte) int { + return t.Order.Compare(left, right) + }), + StaticMap: t, + } +} + +func (t StaticMap[K, V, O]) WalkAddresses(ctx context.Context, cb AddressCb) error { + return WalkAddresses(ctx, t.Root, t.NodeStore, cb) +} + +func (t StaticMap[K, V, O]) WalkNodes(ctx context.Context, cb NodeCb) error { + return WalkNodes(ctx, t.Root, t.NodeStore, cb) +} + +func (t StaticMap[K, V, O]) Get(ctx context.Context, query K, cb KeyValueFn[K, V]) (err error) { + cur, err := NewLeafCursorAtItem(ctx, t.NodeStore, t.Root, Item(query), t.searchNode) + if err != nil { + return err + } + + var key K + var value V + + if cur.Valid() { + key = K(cur.CurrentKey()) + if t.Order.Compare(query, key) == 0 { + value = V(cur.CurrentValue()) + } else { + key = nil + } + } + return cb(key, value) +} + +func (t StaticMap[K, V, O]) Has(ctx context.Context, query K) (ok bool, err error) { + cur, err := NewLeafCursorAtItem(ctx, t.NodeStore, t.Root, Item(query), t.searchNode) + if err != nil { + return false, err + } + + if cur.Valid() { + ok = t.Order.Compare(query, K(cur.CurrentKey())) == 0 + } + + return +} + +func (t StaticMap[K, V, O]) Last(ctx context.Context) (key K, value V, err error) { + cur, err := NewCursorAtEnd(ctx, t.NodeStore, t.Root) + if err != nil { + return nil, nil, err + } + + if cur.Valid() { + key, value = K(cur.CurrentKey()), V(cur.CurrentValue()) + } + return +} + +func (t StaticMap[K, V, O]) IterAll(ctx context.Context) (*OrderedTreeIter[K, V], error) { + c, err := NewCursorAtStart(ctx, t.NodeStore, t.Root) + if err != nil { + return nil, err + } + + s, err := NewCursorPastEnd(ctx, t.NodeStore, t.Root) + if err != nil { + return nil, err + } + + stop := func(curr *Cursor) bool { + return curr.Compare(s) >= 0 + } + + if stop(c) { + // empty range + return &OrderedTreeIter[K, V]{curr: nil}, nil + } + + return &OrderedTreeIter[K, V]{curr: c, stop: stop, step: c.Advance}, nil +} + +func (t StaticMap[K, V, O]) IterAllReverse(ctx context.Context) (*OrderedTreeIter[K, V], error) { + beginning, err := NewCursorAtStart(ctx, t.NodeStore, t.Root) + if err != nil { + return nil, err + } + err = beginning.Retreat(ctx) + if err != nil { + return nil, err + } + + end, err := NewCursorAtEnd(ctx, t.NodeStore, t.Root) + if err != nil { + return nil, err + } + + stop := func(curr *Cursor) bool { + return curr.Compare(beginning) <= 0 + } + + if stop(end) { + // empty range + return &OrderedTreeIter[K, V]{curr: nil}, nil + } + + return &OrderedTreeIter[K, V]{curr: end, stop: stop, step: end.Retreat}, nil +} + +func (t StaticMap[K, V, O]) IterOrdinalRange(ctx context.Context, start, stop uint64) (*OrderedTreeIter[K, V], error) { + if stop == start { + return &OrderedTreeIter[K, V]{curr: nil}, nil + } + if stop < start { + return nil, fmt.Errorf("invalid ordinal bounds (%d, %d)", start, stop) + } else { + c, err := t.Count() + if err != nil { + return nil, err + } + if stop > uint64(c) { + return nil, fmt.Errorf("stop index (%d) out of bounds", stop) + } + } + + lo, err := NewCursorAtOrdinal(ctx, t.NodeStore, t.Root, start) + if err != nil { + return nil, err + } + + hi, err := NewCursorAtOrdinal(ctx, t.NodeStore, t.Root, stop) + if err != nil { + return nil, err + } + + stopF := func(curr *Cursor) bool { + return curr.Compare(hi) >= 0 + } + + return &OrderedTreeIter[K, V]{curr: lo, stop: stopF, step: lo.Advance}, nil +} + +func (t StaticMap[K, V, O]) FetchOrdinalRange(ctx context.Context, start, stop uint64) (*orderedLeafSpanIter[K, V], error) { + if stop == start { + return &orderedLeafSpanIter[K, V]{}, nil + } + if stop < start { + return nil, fmt.Errorf("invalid ordinal bounds (%d, %d)", start, stop) + } else { + c, err := t.Count() + if err != nil { + return nil, err + } else if stop > uint64(c) { + return nil, fmt.Errorf("stop index (%d) out of bounds", stop) + } + } + + span, err := FetchLeafNodeSpan(ctx, t.NodeStore, t.Root, start, stop) + if err != nil { + return nil, err + } + + nd, leaves := span.Leaves[0], span.Leaves[1:] + c, s := span.LocalStart, nd.Count() + if len(leaves) == 0 { + s = span.LocalStop // one leaf span + } + + return &orderedLeafSpanIter[K, V]{ + nd: nd, + curr: c, + stop: s, + leaves: leaves, + final: span.LocalStop, + }, nil +} + +func (t StaticMap[K, V, O]) IterKeyRange(ctx context.Context, start, stop K) (*OrderedTreeIter[K, V], error) { + lo, hi, err := t.getKeyRangeCursors(ctx, start, stop) + if err != nil { + return nil, err + } + + stopF := func(curr *Cursor) bool { + return curr.Compare(hi) >= 0 + } + + if stopF(lo) { + return &OrderedTreeIter[K, V]{curr: nil}, nil + } + + return &OrderedTreeIter[K, V]{curr: lo, stop: stopF, step: lo.Advance}, nil +} + +func (t StaticMap[K, V, O]) GetKeyRangeCardinality(ctx context.Context, start, stop K) (uint64, error) { + lo, hi, err := t.getKeyRangeCursors(ctx, start, stop) + if err != nil { + return 0, err + } + + startOrd, err := GetOrdinalOfCursor(lo) + if err != nil { + return 0, err + } + + endOrd, err := GetOrdinalOfCursor(hi) + if err != nil { + return 0, err + } + + if startOrd > endOrd { + return 0, nil + } + + return endOrd - startOrd, nil +} + +func (t StaticMap[K, V, O]) getKeyRangeCursors(ctx context.Context, startInclusive, stopExclusive K) (lo, hi *Cursor, err error) { + if len(startInclusive) == 0 { + lo, err = NewCursorAtStart(ctx, t.NodeStore, t.Root) + if err != nil { + return nil, nil, err + } + } else { + lo, err = NewCursorAtItem(ctx, t.NodeStore, t.Root, Item(startInclusive), t.searchNode) + if err != nil { + return nil, nil, err + } + } + + if len(stopExclusive) == 0 { + hi, err = NewCursorPastEnd(ctx, t.NodeStore, t.Root) + if err != nil { + return nil, nil, err + } + } else { + hi, err = NewCursorAtItem(ctx, t.NodeStore, t.Root, Item(stopExclusive), t.searchNode) + if err != nil { + return nil, nil, err + } + } + + return +} + +// searchNode returns the smallest index where nd[i] >= query +// Adapted from search.Sort to inline comparison. +func (t StaticMap[K, V, O]) searchNode(query Item, nd Node) int { + n := int(nd.Count()) + // Define f(-1) == false and f(n) == true. + // Invariant: f(i-1) == false, f(j) == true. + i, j := 0, n + for i < j { + h := int(uint(i+j) >> 1) // avoid overflow when computing h + less := t.Order.Compare(K(query), K(nd.GetKey(h))) <= 0 + // i ≤ h < j + if !less { + i = h + 1 // preserves f(i-1) == false + } else { + j = h // preserves f(j) == true + } + } + // i == j, f(i-1) == false, and + // f(j) (= f(i)) == true => answer is i. + return i +} + +func (t StaticMap[K, V, O]) CompareItems(left, right Item) int { + return t.Order.Compare(K(left), K(right)) +} + +// getOrdinalForKey returns the smallest ordinal position at which the key >= |query|. +func (t StaticMap[K, V, O]) GetOrdinalForKey(ctx context.Context, query K) (uint64, error) { + cur, err := NewCursorAtItem(ctx, t.NodeStore, t.Root, Item(query), t.searchNode) + if err != nil { + return 0, err + } + + return GetOrdinalOfCursor(cur) +} + +var _ ItemSearchFn = StaticMap[Item, Item, Ordering[Item]]{}.searchNode +var _ CompareFn = StaticMap[Item, Item, Ordering[Item]]{}.CompareItems + +type OrderedTreeIter[K, V ~[]byte] struct { + // current tuple location + curr *Cursor + + // the function called to moved |curr| forward in the direction of iteration. + step func(context.Context) error + // should return |true| if the passed in cursor is past the iteration's stopping point. + stop func(*Cursor) bool +} + +func OrderedTreeIterFromCursors[K, V ~[]byte](start, stop *Cursor) *OrderedTreeIter[K, V] { + stopF := func(curr *Cursor) bool { + return curr.Compare(stop) >= 0 + } + + if stopF(start) { + start = nil // empty range + } + + return &OrderedTreeIter[K, V]{curr: start, stop: stopF, step: start.Advance} +} + +func (it *OrderedTreeIter[K, V]) Next(ctx context.Context) (key K, value V, err error) { + if it.curr == nil { + return nil, nil, io.EOF + } + + k, v := CurrentCursorItems(it.curr) + key, value = K(k), V(v) + + err = it.step(ctx) + if err != nil { + return nil, nil, err + } + if it.stop(it.curr) { + // past the end of the range + it.curr = nil + } + + return +} + +func (it *OrderedTreeIter[K, V]) Current() (key K, value V) { + // |it.curr| is set to nil when its range is exhausted + if it.curr != nil && it.curr.Valid() { + k, v := CurrentCursorItems(it.curr) + key, value = K(k), V(v) + } + return +} + +func (it *OrderedTreeIter[K, V]) Iterate(ctx context.Context) (err error) { + err = it.step(ctx) + if err != nil { + return err + } + + if it.stop(it.curr) { + // past the end of the range + it.curr = nil + } + + return +} + +type orderedLeafSpanIter[K, V ~[]byte] struct { + // in-progress node + nd Node + // current index, + curr int + // last index for |nd| + stop int + // remaining leaves + leaves []Node + // stop index in last leaf node + final int +} + +func (s *orderedLeafSpanIter[K, V]) Next(ctx context.Context) (key K, value V, err error) { + if s.curr >= s.stop { + // |s.nd| exhausted + if len(s.leaves) == 0 { + // span exhausted + return nil, nil, io.EOF + } + + s.nd = s.leaves[0] + s.curr = 0 + s.stop = s.nd.Count() + + s.leaves = s.leaves[1:] + if len(s.leaves) == 0 { + // |s.nd| is the last leaf + s.stop = s.final + } + } + + key = K(s.nd.GetKey(s.curr)) + value = V(s.nd.GetValue(s.curr)) + s.curr++ + return +} diff --git a/go/store/prolly/ordered_map.go b/go/store/prolly/tree/mutable_map.go similarity index 53% rename from go/store/prolly/ordered_map.go rename to go/store/prolly/tree/mutable_map.go index abe4e4f968..33d378916f 100644 --- a/go/store/prolly/ordered_map.go +++ b/go/store/prolly/tree/mutable_map.go @@ -12,64 +12,63 @@ // See the License for the specific language governing permissions and // limitations under the License. -package prolly +package tree import ( "context" - "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/skip" ) -// orderedMap is a mutable prolly tree with ordered elements. -type orderedMap[K, V ~[]byte, O ordering[K]] struct { - edits *skip.List - tree orderedTree[K, V, O] +// MutableMap is a mutable prolly Static with ordered elements. +type MutableMap[K, V ~[]byte, O Ordering[K]] struct { + Edits *skip.List + StaticMap[K, V, O] } -func (m orderedMap[K, V, O]) put(_ context.Context, key K, value V) error { - m.edits.Put(key, value) +func (m MutableMap[K, V, O]) Put(_ context.Context, key K, value V) error { + m.Edits.Put(key, value) return nil } -func (m orderedMap[K, V, O]) delete(_ context.Context, key K) error { - m.edits.Put(key, nil) +func (m MutableMap[K, V, O]) Delete(_ context.Context, key K) error { + m.Edits.Put(key, nil) return nil } -func (m orderedMap[K, V, O]) get(ctx context.Context, key K, cb KeyValueFn[K, V]) (err error) { - value, ok := m.edits.Get(key) +func (m MutableMap[K, V, O]) Get(ctx context.Context, key K, cb KeyValueFn[K, V]) (err error) { + value, ok := m.Edits.Get(key) if ok { if value == nil { - // there is a pending delete of |key| in |m.edits|. + // there is a pending delete of |key| in |m.Edits|. key = nil } return cb(key, value) } - return m.tree.get(ctx, key, cb) + return m.StaticMap.Get(ctx, key, cb) } -func (m orderedMap[K, V, O]) has(ctx context.Context, key K) (present bool, err error) { - value, ok := m.edits.Get(key) +func (m MutableMap[K, V, O]) Has(ctx context.Context, key K) (present bool, err error) { + value, ok := m.Edits.Get(key) if ok { present = value != nil return } - return m.tree.has(ctx, key) + return m.StaticMap.Has(ctx, key) } -func (m orderedMap[K, V, O]) mutations() tree.MutationIter { - return orderedListIter[K, V]{iter: m.edits.IterAtStart()} +func (m MutableMap[K, V, O]) Mutations() MutationIter { + return orderedListIter[K, V]{iter: m.Edits.IterAtStart()} } type orderedListIter[K, V ~[]byte] struct { iter *skip.ListIter } -var _ tree.MutationIter = &orderedListIter[tree.Item, tree.Item]{} +var _ MutationIter = &orderedListIter[Item, Item]{} -func (it orderedListIter[K, V]) NextMutation(context.Context) (tree.Item, tree.Item) { +func (it orderedListIter[K, V]) NextMutation(context.Context) (Item, Item) { k, v := it.iter.Current() if k == nil { return nil, nil diff --git a/go/store/prolly/utils_test.go b/go/store/prolly/utils_test.go index 6a561105c9..3c8a45a4a4 100644 --- a/go/store/prolly/utils_test.go +++ b/go/store/prolly/utils_test.go @@ -30,7 +30,7 @@ import ( // harness for Map, memoryMap, and MutableMap. type testMap interface { Has(ctx context.Context, key val.Tuple) (bool, error) - Get(ctx context.Context, key val.Tuple, cb KeyValueFn[val.Tuple, val.Tuple]) (err error) + Get(ctx context.Context, key val.Tuple, cb tree.KeyValueFn[val.Tuple, val.Tuple]) (err error) IterAll(ctx context.Context) (MapIter, error) IterRange(ctx context.Context, rng Range) (MapIter, error) Descriptors() (val.TupleDesc, val.TupleDesc) diff --git a/go/store/prolly/write_amplification_test.go b/go/store/prolly/write_amplification_test.go index 9f001a5dce..49cdba68ea 100644 --- a/go/store/prolly/write_amplification_test.go +++ b/go/store/prolly/write_amplification_test.go @@ -62,10 +62,10 @@ func (rk deleteSingleKey) makeMutations(ctx context.Context, leaf tree.Node) ([] // t.Skip("unskip for metrics") // // t.Run("Key Splitter", func(t *testing.T) { -// testWriteAmpWithSplitter(t, tree.newKeySplitter) +// testWriteAmpWithSplitter(t, Tree.newKeySplitter) // }) // t.Run("Smooth Rolling Hasher", func(t *testing.T) { -// testWriteAmpWithSplitter(t, tree.newRollingHashSplitter) +// testWriteAmpWithSplitter(t, Tree.newRollingHashSplitter) // }) //} // @@ -74,36 +74,36 @@ func (rk deleteSingleKey) makeMutations(ctx context.Context, leaf tree.Node) ([] // // const scale = 100_000 // t.Run("Key Splitter", func(t *testing.T) { -// tree.defaultSplitterFactory = tree.newKeySplitter +// Tree.defaultSplitterFactory = Tree.newKeySplitter // t.Run("Random Uints", func(t *testing.T) { // pm, _ := makeProllyMap(t, scale) // before := pm.(Map) // printMapSummary(t, before) // }) // t.Run("Ascending Uints", func(t *testing.T) { -// keys, values, desc := tree.AscendingCompositeIntTuples(scale) +// keys, values, desc := Tree.AscendingCompositeIntTuples(scale) // before := prollyMapFromKeysAndValues(t, desc, desc, keys, values) // printMapSummary(t, before) // }) // }) // t.Run("Smooth Rolling Hasher", func(t *testing.T) { -// tree.defaultSplitterFactory = tree.newRollingHashSplitter +// Tree.defaultSplitterFactory = Tree.newRollingHashSplitter // t.Run("Random Uints", func(t *testing.T) { // pm, _ := makeProllyMap(t, scale) // before := pm.(Map) // printMapSummary(t, before) // }) // t.Run("Ascending Uints", func(t *testing.T) { -// keys, values, desc := tree.AscendingCompositeIntTuples(scale) +// keys, values, desc := Tree.AscendingCompositeIntTuples(scale) // before := prollyMapFromKeysAndValues(t, desc, desc, keys, values) // printMapSummary(t, before) // }) // }) //} // -//func testWriteAmpWithSplitter(t *testing.T, factory tree.splitterFactory) { +//func testWriteAmpWithSplitter(t *testing.T, factory Tree.splitterFactory) { // const scale = 100_000 -// tree.defaultSplitterFactory = factory +// Tree.defaultSplitterFactory = factory // // t.Run("Random Uint Map", func(t *testing.T) { // pm, _ := makeProllyMap(t, scale) @@ -116,7 +116,7 @@ func (rk deleteSingleKey) makeMutations(ctx context.Context, leaf tree.Node) ([] // }) // }) // t.Run("Ascending Uint Map", func(t *testing.T) { -// keys, values, desc := tree.AscendingCompositeIntTuples(scale) +// keys, values, desc := Tree.AscendingCompositeIntTuples(scale) // before := prollyMapFromKeysAndValues(t, desc, desc, keys, values) // t.Run("delete random key", func(t *testing.T) { // testWriteAmplification(t, before, deleteSingleKey{}) @@ -184,17 +184,17 @@ func measureWriteAmplification(t *testing.T, before, after Map) (count, size int require.NoError(t, err) for addr := range novel { - n, err := after.tuples.ns.Read(ctx, addr) + n, err := after.tuples.NodeStore.Read(ctx, addr) require.NoError(t, err) size += n.Size() } - size += after.tuples.root.Size() + size += after.tuples.Root.Size() count = novel.Size() + 1 return } func printMapSummary(t *testing.T, m Map) { - tree.PrintTreeSummaryByLevel(t, m.tuples.root, m.tuples.ns) + tree.PrintTreeSummaryByLevel(t, m.tuples.Root, m.tuples.NodeStore) } func prollyMapFromKeysAndValues(t *testing.T, kd, vd val.TupleDesc, keys, values []val.Tuple) Map { From 949967f267e35270c3ce9b99487e6fe32f99f351 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Thu, 8 Sep 2022 13:27:19 -0700 Subject: [PATCH 2/4] refactored pkg prolly to remove tree.CompareFn --- .../doltcore/sqle/index/index_lookup.go | 4 +- go/store/prolly/address_map.go | 11 +- go/store/prolly/artifact_map.go | 2 +- go/store/prolly/commit_closure.go | 11 +- go/store/prolly/map.go | 22 ++-- go/store/prolly/mutable_map.go | 10 +- go/store/prolly/tree/diff.go | 45 +++++-- go/store/prolly/tree/map.go | 23 +--- go/store/prolly/tree/merge.go | 32 ++--- go/store/prolly/tree/mutator.go | 10 +- go/store/prolly/tree/node_cursor.go | 114 ++++++++++-------- 11 files changed, 147 insertions(+), 137 deletions(-) diff --git a/go/libraries/doltcore/sqle/index/index_lookup.go b/go/libraries/doltcore/sqle/index/index_lookup.go index cb14f79ee0..48d9847474 100644 --- a/go/libraries/doltcore/sqle/index/index_lookup.go +++ b/go/libraries/doltcore/sqle/index/index_lookup.go @@ -349,7 +349,7 @@ func (lb *baseLookupBuilder) NewRowIter(ctx *sql.Context, part sql.Partition) (s // point lookups on concrete values, and range lookups for null matches. func (lb *baseLookupBuilder) newPointLookup(ctx *sql.Context, rang prolly.Range) (prolly.MapIter, error) { if lb.cur == nil { - cur, err := tree.NewCursorFromCompareFn(ctx, lb.sec.NodeStore(), lb.sec.Node(), tree.Item(rang.Tup), lb.sec.CompareItems) + cur, err := tree.NewCursorAtKey(ctx, lb.sec.NodeStore(), lb.sec.Node(), rang.Tup, lb.secKd) if err != nil { return nil, err } @@ -361,7 +361,7 @@ func (lb *baseLookupBuilder) newPointLookup(ctx *sql.Context, rang prolly.Range) lb.cur = cur } - err := lb.cur.Seek(ctx, tree.Item(rang.Tup), lb.sec.CompareItems) + err := tree.Seek(ctx, lb.cur, rang.Tup, lb.secKd) if err != nil { return nil, err } diff --git a/go/store/prolly/address_map.go b/go/store/prolly/address_map.go index fd08e84ce4..0ecc255ade 100644 --- a/go/store/prolly/address_map.go +++ b/go/store/prolly/address_map.go @@ -150,10 +150,11 @@ func (wr AddressMapEditor) Delete(ctx context.Context, name string) error { } func (wr AddressMapEditor) Flush(ctx context.Context) (AddressMap, error) { - tr := wr.addresses.StaticMap - serializer := message.NewAddressMapSerializer(tr.NodeStore.Pool()) + sm := wr.addresses.StaticMap + serializer := message.NewAddressMapSerializer(sm.NodeStore.Pool()) + fn := tree.ApplyMutations[stringSlice, lexicographic, message.AddressMapSerializer] - root, err := tree.ApplyMutations(ctx, tr.NodeStore, tr.Root, serializer, wr.addresses.Mutations(), tr.CompareItems) + root, err := fn(ctx, sm.NodeStore, sm.Root, lexicographic{}, serializer, wr.addresses.Mutations()) if err != nil { return AddressMap{}, err } @@ -161,8 +162,8 @@ func (wr AddressMapEditor) Flush(ctx context.Context) (AddressMap, error) { return AddressMap{ addresses: tree.StaticMap[stringSlice, address, lexicographic]{ Root: root, - NodeStore: tr.NodeStore, - Order: tr.Order, + NodeStore: sm.NodeStore, + Order: sm.Order, }, }, nil } diff --git a/go/store/prolly/artifact_map.go b/go/store/prolly/artifact_map.go index dae23385d7..9fb8f17c99 100644 --- a/go/store/prolly/artifact_map.go +++ b/go/store/prolly/artifact_map.go @@ -297,7 +297,7 @@ func (m ArtifactMap) iterAllOfTypes(ctx context.Context, artTypes ...ArtifactTyp func MergeArtifactMaps(ctx context.Context, left, right, base ArtifactMap, cb tree.CollisionFn) (ArtifactMap, error) { serializer := message.NewMergeArtifactSerializer(base.keyDesc, left.tuples.NodeStore.Pool()) - tuples, err := tree.MergeOrderedTrees(ctx, left.tuples, right.tuples, base.tuples, cb, serializer, base.valDesc) + tuples, err := tree.MergeOrderedTrees(ctx, left.tuples, right.tuples, base.tuples, cb, serializer) if err != nil { return ArtifactMap{}, err } diff --git a/go/store/prolly/commit_closure.go b/go/store/prolly/commit_closure.go index d9606c74b1..13bfaf3289 100644 --- a/go/store/prolly/commit_closure.go +++ b/go/store/prolly/commit_closure.go @@ -140,10 +140,11 @@ func (wr CommitClosureEditor) Delete(ctx context.Context, key CommitClosureKey) } func (wr CommitClosureEditor) Flush(ctx context.Context) (CommitClosure, error) { - tr := wr.closure.StaticMap - serializer := message.NewCommitClosureSerializer(tr.NodeStore.Pool()) + sm := wr.closure.StaticMap + serializer := message.NewCommitClosureSerializer(sm.NodeStore.Pool()) + fn := tree.ApplyMutations[CommitClosureKey, commitClosureKeyOrdering, message.CommitClosureSerializer] - root, err := tree.ApplyMutations(ctx, tr.NodeStore, tr.Root, serializer, wr.closure.Mutations(), tr.CompareItems) + root, err := fn(ctx, sm.NodeStore, sm.Root, commitClosureKeyOrdering{}, serializer, wr.closure.Mutations()) if err != nil { return CommitClosure{}, err } @@ -151,8 +152,8 @@ func (wr CommitClosureEditor) Flush(ctx context.Context) (CommitClosure, error) return CommitClosure{ closure: tree.StaticMap[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering]{ Root: root, - NodeStore: tr.NodeStore, - Order: tr.Order, + NodeStore: sm.NodeStore, + Order: sm.Order, }, }, nil } diff --git a/go/store/prolly/map.go b/go/store/prolly/map.go index a1bfed64f6..eaa535b90d 100644 --- a/go/store/prolly/map.go +++ b/go/store/prolly/map.go @@ -98,11 +98,10 @@ func NewMapFromTupleIter(ctx context.Context, ns tree.NodeStore, keyDesc, valDes } func MutateMapWithTupleIter(ctx context.Context, m Map, iter TupleIter) (Map, error) { - t := m.tuples - i := mutationIter{iter: iter} - s := message.NewProllyMapSerializer(m.valDesc, t.NodeStore.Pool()) + fn := tree.ApplyMutations[val.Tuple, val.TupleDesc, message.ProllyMapSerializer] + s := message.NewProllyMapSerializer(m.valDesc, m.tuples.NodeStore.Pool()) - root, err := tree.ApplyMutations(ctx, t.NodeStore, t.Root, s, i, t.CompareItems) + root, err := fn(ctx, m.tuples.NodeStore, m.tuples.Root, m.keyDesc, s, mutationIter{iter: iter}) if err != nil { return Map{}, err } @@ -110,8 +109,8 @@ func MutateMapWithTupleIter(ctx context.Context, m Map, iter TupleIter) (Map, er return Map{ tuples: tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{ Root: root, - NodeStore: t.NodeStore, - Order: t.Order, + NodeStore: m.tuples.NodeStore, + Order: m.tuples.Order, }, keyDesc: m.keyDesc, valDesc: m.valDesc, @@ -125,9 +124,6 @@ func DiffMaps(ctx context.Context, from, to Map, cb tree.DiffFn) error { // RangeDiffMaps returns diffs within a Range. See Range for which diffs are // returned. func RangeDiffMaps(ctx context.Context, from, to Map, rng Range, cb tree.DiffFn) error { - cfn := func(left, right tree.Item) int { - return from.tuples.Order.Compare(val.Tuple(left), val.Tuple(right)) - } fns, tns := from.tuples.NodeStore, to.tuples.NodeStore fromStart, err := tree.NewCursorFromSearchFn(ctx, fns, from.tuples.Root, rangeStartSearchFn(rng)) @@ -148,7 +144,11 @@ func RangeDiffMaps(ctx context.Context, from, to Map, rng Range, cb tree.DiffFn) return err } - differ, err := tree.DifferFromCursors(fromStart, toStart, fromStop, toStop, cfn) + differ, err := tree.DifferFromCursors[val.Tuple, val.TupleDesc]( + fromStart, toStart, + fromStop, toStop, + from.tuples.Order, + ) if err != nil { return err } @@ -175,7 +175,7 @@ func DiffMapsKeyRange(ctx context.Context, from, to Map, start, stop val.Tuple, func MergeMaps(ctx context.Context, left, right, base Map, cb tree.CollisionFn) (Map, error) { serializer := message.NewProllyMapSerializer(left.valDesc, base.NodeStore().Pool()) - tuples, err := tree.MergeOrderedTrees(ctx, left.tuples, right.tuples, base.tuples, cb, serializer, base.valDesc) + tuples, err := tree.MergeOrderedTrees(ctx, left.tuples, right.tuples, base.tuples, cb, serializer) if err != nil { return Map{}, err } diff --git a/go/store/prolly/mutable_map.go b/go/store/prolly/mutable_map.go index baa1b16a48..454c4aeef3 100644 --- a/go/store/prolly/mutable_map.go +++ b/go/store/prolly/mutable_map.go @@ -59,8 +59,10 @@ func (mut MutableMap) flushWithSerializer(ctx context.Context, s message.Seriali return Map{}, err } - tr := mut.tuples.StaticMap - root, err := tree.ApplyMutations(ctx, tr.NodeStore, tr.Root, s, mut.tuples.Mutations(), tr.CompareItems) + sm := mut.tuples.StaticMap + fn := tree.ApplyMutations[val.Tuple, val.TupleDesc, message.Serializer] + + root, err := fn(ctx, sm.NodeStore, sm.Root, mut.keyDesc, s, mut.tuples.Mutations()) if err != nil { return Map{}, err } @@ -68,8 +70,8 @@ func (mut MutableMap) flushWithSerializer(ctx context.Context, s message.Seriali return Map{ tuples: tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{ Root: root, - NodeStore: tr.NodeStore, - Order: tr.Order, + NodeStore: sm.NodeStore, + Order: sm.Order, }, keyDesc: mut.keyDesc, valDesc: mut.valDesc, diff --git a/go/store/prolly/tree/diff.go b/go/store/prolly/tree/diff.go index 2be18622dc..e15b6ea100 100644 --- a/go/store/prolly/tree/diff.go +++ b/go/store/prolly/tree/diff.go @@ -36,46 +36,67 @@ type Diff struct { type DiffFn func(context.Context, Diff) error -type Differ struct { +type Differ[K ~[]byte, O Ordering[K]] struct { from, to *Cursor fromStop, toStop *Cursor - cmp CompareFn + order O } -func DifferFromRoots(ctx context.Context, fromNs NodeStore, toNs NodeStore, from, to Node, cmp CompareFn) (Differ, error) { +func DifferFromRoots[K ~[]byte, O Ordering[K]]( + ctx context.Context, + fromNs NodeStore, toNs NodeStore, + from, to Node, + order O, +) (Differ[K, O], error) { fc, err := NewCursorAtStart(ctx, fromNs, from) if err != nil { - return Differ{}, err + return Differ[K, O]{}, err } tc, err := NewCursorAtStart(ctx, toNs, to) if err != nil { - return Differ{}, err + return Differ[K, O]{}, err } fs, err := NewCursorPastEnd(ctx, fromNs, from) if err != nil { - return Differ{}, err + return Differ[K, O]{}, err } ts, err := NewCursorPastEnd(ctx, toNs, to) if err != nil { - return Differ{}, err + return Differ[K, O]{}, err } - return Differ{from: fc, to: tc, fromStop: fs, toStop: ts, cmp: cmp}, nil + return Differ[K, O]{ + from: fc, + to: tc, + fromStop: fs, + toStop: ts, + order: order, + }, nil } -func DifferFromCursors(fromStart, toStart, fromStop, toStop *Cursor, cmp CompareFn) (Differ, error) { - return Differ{from: fromStart, to: toStart, fromStop: fromStop, toStop: toStop, cmp: cmp}, nil +func DifferFromCursors[K ~[]byte, O Ordering[K]]( + fromStart, toStart, + fromStop, toStop *Cursor, + order O, +) (Differ[K, O], error) { + return Differ[K, O]{ + from: fromStart, + to: toStart, + fromStop: fromStop, + toStop: toStop, + order: order, + }, nil } -func (td Differ) Next(ctx context.Context) (diff Diff, err error) { +func (td Differ[K, O]) Next(ctx context.Context) (diff Diff, err error) { for td.from.Valid() && td.from.Compare(td.fromStop) < 0 && td.to.Valid() && td.to.Compare(td.toStop) < 0 { f := td.from.CurrentKey() t := td.to.CurrentKey() - cmp := td.cmp(f, t) + cmp := td.order.Compare(K(f), K(t)) switch { case cmp < 0: diff --git a/go/store/prolly/tree/map.go b/go/store/prolly/tree/map.go index 8c5dd074eb..ef9a0e85dd 100644 --- a/go/store/prolly/tree/map.go +++ b/go/store/prolly/tree/map.go @@ -22,7 +22,6 @@ import ( "github.com/dolthub/dolt/go/store/hash" "github.com/dolthub/dolt/go/store/prolly/message" "github.com/dolthub/dolt/go/store/skip" - "github.com/dolthub/dolt/go/store/val" ) type KeyValueFn[K, V ~[]byte] func(key K, value V) error @@ -31,10 +30,6 @@ type KvIter[K, V ~[]byte] interface { Next(ctx context.Context) (K, V, error) } -type Ordering[K ~[]byte] interface { - Compare(left, right K) int -} - // StaticMap is a static prolly Tree with ordered elements. type StaticMap[K, V ~[]byte, O Ordering[K]] struct { Root Node @@ -47,10 +42,7 @@ func DiffOrderedTrees[K, V ~[]byte, O Ordering[K]]( from, to StaticMap[K, V, O], cb DiffFn, ) error { - cfn := func(left, right Item) int { - return from.Order.Compare(K(left), K(right)) - } - differ, err := DifferFromRoots(ctx, from.NodeStore, to.NodeStore, from.Root, to.Root, cfn) + differ, err := DifferFromRoots[K](ctx, from.NodeStore, to.NodeStore, from.Root, to.Root, from.Order) if err != nil { return err } @@ -121,11 +113,7 @@ func DiffKeyRangeOrderedTrees[K, V ~[]byte, O Ordering[K]]( } } - cfn := func(left, right Item) int { - return from.Order.Compare(K(left), K(right)) - } - - differ, err := DifferFromCursors(fromStart, toStart, fromStop, toStop, cfn) + differ, err := DifferFromCursors[K](fromStart, toStart, fromStop, toStop, from.Order) if err != nil { return err } @@ -148,12 +136,8 @@ func MergeOrderedTrees[K, V ~[]byte, O Ordering[K], S message.Serializer]( l, r, base StaticMap[K, V, O], cb CollisionFn, serializer S, - valDesc val.TupleDesc, ) (StaticMap[K, V, O], error) { - cfn := func(left, right Item) int { - return base.Order.Compare(K(left), K(right)) - } - root, err := ThreeWayMerge(ctx, base.NodeStore, l.Root, r.Root, base.Root, cfn, cb, serializer, valDesc) + root, err := ThreeWayMerge[K](ctx, base.NodeStore, l.Root, r.Root, base.Root, cb, base.Order, serializer) if err != nil { return StaticMap[K, V, O]{}, err } @@ -463,7 +447,6 @@ func (t StaticMap[K, V, O]) GetOrdinalForKey(ctx context.Context, query K) (uint } var _ ItemSearchFn = StaticMap[Item, Item, Ordering[Item]]{}.searchNode -var _ CompareFn = StaticMap[Item, Item, Ordering[Item]]{}.CompareItems type OrderedTreeIter[K, V ~[]byte] struct { // current tuple location diff --git a/go/store/prolly/tree/merge.go b/go/store/prolly/tree/merge.go index 6e8878cf69..66e48ef162 100644 --- a/go/store/prolly/tree/merge.go +++ b/go/store/prolly/tree/merge.go @@ -15,14 +15,12 @@ package tree import ( - "bytes" "context" "io" "golang.org/x/sync/errgroup" "github.com/dolthub/dolt/go/store/prolly/message" - "github.com/dolthub/dolt/go/store/val" ) const patchBufferSize = 1024 @@ -38,22 +36,21 @@ type CollisionFn func(left, right Diff) (Diff, bool) // |right| are applied directly to |left|. This reduces the amount of write work and improves performance. // In the case that a key-value pair was modified on both |left| and |right| with different resulting // values, the CollisionFn is called to perform a cell-wise merge, or to throw a conflict. -func ThreeWayMerge[S message.Serializer]( +func ThreeWayMerge[K ~[]byte, O Ordering[K], S message.Serializer]( ctx context.Context, ns NodeStore, left, right, base Node, - compare CompareFn, collide CollisionFn, + order O, serializer S, - valDesc val.TupleDesc, ) (final Node, err error) { - ld, err := DifferFromRoots(ctx, ns, ns, base, left, compare) + ld, err := DifferFromRoots[K](ctx, ns, ns, base, left, order) if err != nil { return Node{}, err } - rd, err := DifferFromRoots(ctx, ns, ns, base, right, compare) + rd, err := DifferFromRoots[K](ctx, ns, ns, base, right, order) if err != nil { return Node{}, err } @@ -74,7 +71,7 @@ func ThreeWayMerge[S message.Serializer]( // consume |patches| and apply them to |left| eg.Go(func() error { - final, err = ApplyMutations(ctx, ns, left, serializer, patches, compare) + final, err = ApplyMutations[K](ctx, ns, left, order, serializer, patches) return err }) @@ -126,7 +123,12 @@ func (ps patchBuffer) Close() error { return nil } -func sendPatches(ctx context.Context, l, r Differ, buf patchBuffer, cb CollisionFn) (err error) { +func sendPatches[K ~[]byte, O Ordering[K]]( + ctx context.Context, + l, r Differ[K, O], + buf patchBuffer, + cb CollisionFn, +) (err error) { var ( left, right Diff lok, rok = true, true @@ -149,7 +151,7 @@ func sendPatches(ctx context.Context, l, r Differ, buf patchBuffer, cb Collision } for lok && rok { - cmp := compareDiffKeys(left, right, l.cmp) + cmp := l.order.Compare(K(left.Key), K(right.Key)) switch { case cmp < 0: @@ -225,13 +227,3 @@ func sendPatches(ctx context.Context, l, r Differ, buf patchBuffer, cb Collision return nil } - -func compareDiffKeys(left, right Diff, cmp CompareFn) int { - return cmp(Item(left.Key), Item(right.Key)) -} - -func equalDiffVals(left, right Diff) bool { - // todo(andy): bytes must be comparable - ok := left.Type == right.Type - return ok && bytes.Equal(left.To, right.To) -} diff --git a/go/store/prolly/tree/mutator.go b/go/store/prolly/tree/mutator.go index f4e445b9a5..0023fcc736 100644 --- a/go/store/prolly/tree/mutator.go +++ b/go/store/prolly/tree/mutator.go @@ -56,20 +56,20 @@ type MutationIter interface { // - Repeat for every edit. // // - Finalize the chunker and resolve the tree's new root Node. -func ApplyMutations[S message.Serializer]( +func ApplyMutations[K ~[]byte, O Ordering[K], S message.Serializer]( ctx context.Context, ns NodeStore, root Node, + order O, serializer S, edits MutationIter, - compare CompareFn, ) (Node, error) { newKey, newValue := edits.NextMutation(ctx) if newKey == nil { return root, nil // no mutations } - cur, err := NewCursorFromCompareFn(ctx, ns, root, newKey, compare) + cur, err := NewCursorAtKey(ctx, ns, root, K(newKey), order) if err != nil { return Node{}, err } @@ -82,7 +82,7 @@ func ApplyMutations[S message.Serializer]( for newKey != nil { // move |cur| to the NextMutation mutation point - err = cur.Seek(ctx, newKey, compare) + err = Seek(ctx, cur, K(newKey), order) if err != nil { return Node{}, err } @@ -91,7 +91,7 @@ func ApplyMutations[S message.Serializer]( if cur.Valid() { // Compare mutations |newKey| and |newValue| // to the existing pair from the cursor - if compare(newKey, cur.CurrentKey()) == 0 { + if order.Compare(K(newKey), K(cur.CurrentKey())) == 0 { oldValue = cur.CurrentValue() } } diff --git a/go/store/prolly/tree/node_cursor.go b/go/store/prolly/tree/node_cursor.go index d7de921570..b1c46d3617 100644 --- a/go/store/prolly/tree/node_cursor.go +++ b/go/store/prolly/tree/node_cursor.go @@ -25,7 +25,6 @@ import ( "context" "errors" "fmt" - "sort" "github.com/dolthub/dolt/go/store/hash" ) @@ -38,10 +37,35 @@ type Cursor struct { nrw NodeStore } -type CompareFn func(left, right Item) int - type SearchFn func(nd Node) (idx int) +type Ordering[K ~[]byte] interface { + Compare(left, right K) int +} + +// SearchForKey returns a SearchFn for |key|. +func SearchForKey[K ~[]byte, O Ordering[K]](key K, order O) SearchFn { + return func(nd Node) (idx int) { + n := int(nd.Count()) + // Define f(-1) == false and f(n) == true. + // Invariant: f(i-1) == false, f(j) == true. + i, j := 0, n + for i < j { + h := int(uint(i+j) >> 1) // avoid overflow when computing h + less := order.Compare(key, K(nd.GetKey(h))) <= 0 + // i ≤ h < j + if !less { + i = h + 1 // preserves f(i-1) == false + } else { + j = h // preserves f(j) == true + } + } + // i == j, f(i-1) == false, and + // f(j) (= f(i)) == true => answer is i. + return i + } +} + type ItemSearchFn func(item Item, nd Node) (idx int) func NewCursorAtStart(ctx context.Context, ns NodeStore, nd Node) (cur *Cursor, err error) { @@ -180,6 +204,10 @@ func GetOrdinalOfCursor(curr *Cursor) (ord uint64, err error) { return ord, nil } +func NewCursorAtKey[K ~[]byte, O Ordering[K]](ctx context.Context, ns NodeStore, nd Node, key K, order O) (cur *Cursor, err error) { + return NewCursorFromSearchFn(ctx, ns, nd, SearchForKey(key, order)) +} + func NewCursorFromSearchFn(ctx context.Context, ns NodeStore, nd Node, search SearchFn) (cur *Cursor, err error) { cur = &Cursor{nd: nd, nrw: ns} @@ -212,14 +240,6 @@ func NewCursorFromSearchFn(ctx context.Context, ns NodeStore, nd Node, search Se return } -func NewCursorFromCompareFn(ctx context.Context, ns NodeStore, n Node, i Item, compare CompareFn) (*Cursor, error) { - return NewCursorAtItem(ctx, ns, n, i, func(item Item, node Node) (idx int) { - return sort.Search(node.Count(), func(i int) bool { - return compare(item, node.GetKey(i)) <= 0 - }) - }) -} - func NewCursorAtItem(ctx context.Context, ns NodeStore, nd Node, item Item, search ItemSearchFn) (cur *Cursor, err error) { cur = &Cursor{nd: nd, nrw: ns} @@ -366,6 +386,37 @@ func CurrentCursorItems(cur *Cursor) (key, value Item) { return } +// Seek updates the cursor's node to one whose range spans the key's value, or the last +// node if the key is greater than all existing keys. +// If a node does not contain the key, we recurse upwards to the parent cursor. If the +// node contains a key, we recurse downwards into child nodes. +func Seek[K ~[]byte, O Ordering[K]](ctx context.Context, cur *Cursor, key K, order O) (err error) { + inBounds := true + if cur.parent != nil { + inBounds = inBounds && order.Compare(key, K(cur.firstKey())) >= 0 + inBounds = inBounds && order.Compare(key, K(cur.lastKey())) <= 0 + } + + if !inBounds { + // |item| is outside the bounds of |cur.nd|, search up the tree + err = Seek(ctx, cur.parent, key, order) + if err != nil { + return err + } + // stay in bounds for internal nodes + cur.parent.keepInBounds() + + cur.nd, err = fetchChild(ctx, cur.nrw, cur.parent.CurrentRef()) + if err != nil { + return err + } + } + + cur.idx = SearchForKey(key, order)(cur.nd) + + return +} + func (cur *Cursor) Valid() bool { return cur.nd.count != 0 && cur.nd.bytes() != nil && @@ -456,47 +507,6 @@ func (cur *Cursor) level() (uint64, error) { return uint64(lvl), nil } -// Seek updates the cursor's node to one whose range spans the key's value, or the last -// node if the key is greater than all existing keys. -// If a node does not contain the key, we recurse upwards to the parent cursor. If the -// node contains a key, we recurse downwards into child nodes. -func (cur *Cursor) Seek(ctx context.Context, key Item, cb CompareFn) (err error) { - inBounds := true - if cur.parent != nil { - inBounds = inBounds && cb(key, cur.firstKey()) >= 0 - inBounds = inBounds && cb(key, cur.lastKey()) <= 0 - } - - if !inBounds { - // |item| is outside the bounds of |cur.nd|, search up the tree - err = cur.parent.Seek(ctx, key, cb) - if err != nil { - return err - } - // stay in bounds for internal nodes - cur.parent.keepInBounds() - - cur.nd, err = fetchChild(ctx, cur.nrw, cur.parent.CurrentRef()) - if err != nil { - return err - } - } - - cur.idx = cur.search(key, cb) - - return -} - -// search returns the index of |item| if it's present in |cur.nd|, or the -// index of the next greatest element if it's not present. -func (cur *Cursor) search(item Item, cb CompareFn) (idx int) { - idx = sort.Search(int(cur.nd.count), func(i int) bool { - return cb(item, cur.nd.GetKey(i)) <= 0 - }) - - return idx -} - // invalidateAtEnd sets the cursor's index to the node count. func (cur *Cursor) invalidateAtEnd() { cur.idx = int(cur.nd.count) From 349e6208172ce305a0dc753b3f4ffa9435274abe Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Thu, 8 Sep 2022 14:17:31 -0700 Subject: [PATCH 3/4] removed ItemSearchFn, simplified cursor usage --- go/store/prolly/tree/map.go | 46 ++-------- go/store/prolly/tree/node_cursor.go | 111 +++++++++-------------- go/store/prolly/tree/node_cursor_test.go | 27 ++---- 3 files changed, 56 insertions(+), 128 deletions(-) diff --git a/go/store/prolly/tree/map.go b/go/store/prolly/tree/map.go index ef9a0e85dd..c281ed6a12 100644 --- a/go/store/prolly/tree/map.go +++ b/go/store/prolly/tree/map.go @@ -80,12 +80,12 @@ func DiffKeyRangeOrderedTrees[K, V ~[]byte, O Ordering[K]]( return err } } else { - fromStart, err = NewCursorAtItem(ctx, from.NodeStore, from.Root, Item(start), from.searchNode) + fromStart, err = NewCursorAtKey(ctx, from.NodeStore, from.Root, start, from.Order) if err != nil { return err } - toStart, err = NewCursorAtItem(ctx, to.NodeStore, to.Root, Item(start), to.searchNode) + toStart, err = NewCursorAtKey(ctx, to.NodeStore, to.Root, start, to.Order) if err != nil { return err } @@ -102,12 +102,12 @@ func DiffKeyRangeOrderedTrees[K, V ~[]byte, O Ordering[K]]( return err } } else { - fromStop, err = NewCursorAtItem(ctx, from.NodeStore, from.Root, Item(stop), from.searchNode) + fromStop, err = NewCursorAtKey(ctx, from.NodeStore, from.Root, stop, from.Order) if err != nil { return err } - toStop, err = NewCursorAtItem(ctx, to.NodeStore, to.Root, Item(stop), to.searchNode) + toStop, err = NewCursorAtKey(ctx, to.NodeStore, to.Root, stop, to.Order) if err != nil { return err } @@ -180,7 +180,7 @@ func (t StaticMap[K, V, O]) WalkNodes(ctx context.Context, cb NodeCb) error { } func (t StaticMap[K, V, O]) Get(ctx context.Context, query K, cb KeyValueFn[K, V]) (err error) { - cur, err := NewLeafCursorAtItem(ctx, t.NodeStore, t.Root, Item(query), t.searchNode) + cur, err := NewLeafCursorAtKey(ctx, t.NodeStore, t.Root, query, t.Order) if err != nil { return err } @@ -200,7 +200,7 @@ func (t StaticMap[K, V, O]) Get(ctx context.Context, query K, cb KeyValueFn[K, V } func (t StaticMap[K, V, O]) Has(ctx context.Context, query K) (ok bool, err error) { - cur, err := NewLeafCursorAtItem(ctx, t.NodeStore, t.Root, Item(query), t.searchNode) + cur, err := NewLeafCursorAtKey(ctx, t.NodeStore, t.Root, query, t.Order) if err != nil { return false, err } @@ -389,7 +389,7 @@ func (t StaticMap[K, V, O]) getKeyRangeCursors(ctx context.Context, startInclusi return nil, nil, err } } else { - lo, err = NewCursorAtItem(ctx, t.NodeStore, t.Root, Item(startInclusive), t.searchNode) + lo, err = NewCursorAtKey(ctx, t.NodeStore, t.Root, startInclusive, t.Order) if err != nil { return nil, nil, err } @@ -401,7 +401,7 @@ func (t StaticMap[K, V, O]) getKeyRangeCursors(ctx context.Context, startInclusi return nil, nil, err } } else { - hi, err = NewCursorAtItem(ctx, t.NodeStore, t.Root, Item(stopExclusive), t.searchNode) + hi, err = NewCursorAtKey(ctx, t.NodeStore, t.Root, stopExclusive, t.Order) if err != nil { return nil, nil, err } @@ -410,35 +410,9 @@ func (t StaticMap[K, V, O]) getKeyRangeCursors(ctx context.Context, startInclusi return } -// searchNode returns the smallest index where nd[i] >= query -// Adapted from search.Sort to inline comparison. -func (t StaticMap[K, V, O]) searchNode(query Item, nd Node) int { - n := int(nd.Count()) - // Define f(-1) == false and f(n) == true. - // Invariant: f(i-1) == false, f(j) == true. - i, j := 0, n - for i < j { - h := int(uint(i+j) >> 1) // avoid overflow when computing h - less := t.Order.Compare(K(query), K(nd.GetKey(h))) <= 0 - // i ≤ h < j - if !less { - i = h + 1 // preserves f(i-1) == false - } else { - j = h // preserves f(j) == true - } - } - // i == j, f(i-1) == false, and - // f(j) (= f(i)) == true => answer is i. - return i -} - -func (t StaticMap[K, V, O]) CompareItems(left, right Item) int { - return t.Order.Compare(K(left), K(right)) -} - // getOrdinalForKey returns the smallest ordinal position at which the key >= |query|. func (t StaticMap[K, V, O]) GetOrdinalForKey(ctx context.Context, query K) (uint64, error) { - cur, err := NewCursorAtItem(ctx, t.NodeStore, t.Root, Item(query), t.searchNode) + cur, err := NewCursorAtKey(ctx, t.NodeStore, t.Root, query, t.Order) if err != nil { return 0, err } @@ -446,8 +420,6 @@ func (t StaticMap[K, V, O]) GetOrdinalForKey(ctx context.Context, query K) (uint return GetOrdinalOfCursor(cur) } -var _ ItemSearchFn = StaticMap[Item, Item, Ordering[Item]]{}.searchNode - type OrderedTreeIter[K, V ~[]byte] struct { // current tuple location curr *Cursor diff --git a/go/store/prolly/tree/node_cursor.go b/go/store/prolly/tree/node_cursor.go index b1c46d3617..a139fd308c 100644 --- a/go/store/prolly/tree/node_cursor.go +++ b/go/store/prolly/tree/node_cursor.go @@ -43,31 +43,6 @@ type Ordering[K ~[]byte] interface { Compare(left, right K) int } -// SearchForKey returns a SearchFn for |key|. -func SearchForKey[K ~[]byte, O Ordering[K]](key K, order O) SearchFn { - return func(nd Node) (idx int) { - n := int(nd.Count()) - // Define f(-1) == false and f(n) == true. - // Invariant: f(i-1) == false, f(j) == true. - i, j := 0, n - for i < j { - h := int(uint(i+j) >> 1) // avoid overflow when computing h - less := order.Compare(key, K(nd.GetKey(h))) <= 0 - // i ≤ h < j - if !less { - i = h + 1 // preserves f(i-1) == false - } else { - j = h // preserves f(j) == true - } - } - // i == j, f(i-1) == false, and - // f(j) (= f(i)) == true => answer is i. - return i - } -} - -type ItemSearchFn func(item Item, nd Node) (idx int) - func NewCursorAtStart(ctx context.Context, ns NodeStore, nd Node) (cur *Cursor, err error) { cur = &Cursor{nd: nd, nrw: ns} var leaf bool @@ -240,49 +215,29 @@ func NewCursorFromSearchFn(ctx context.Context, ns NodeStore, nd Node, search Se return } -func NewCursorAtItem(ctx context.Context, ns NodeStore, nd Node, item Item, search ItemSearchFn) (cur *Cursor, err error) { - cur = &Cursor{nd: nd, nrw: ns} +func NewLeafCursorAtKey[K ~[]byte, O Ordering[K]](ctx context.Context, ns NodeStore, nd Node, key K, order O) (Cursor, error) { + cur := Cursor{nd: nd, nrw: ns} + for { + // binary search |cur.nd| for |key| + i, j := 0, cur.nd.Count() + for i < j { + h := int(uint(i+j) >> 1) + cmp := order.Compare(key, K(cur.nd.GetKey(h))) + if cmp > 0 { + i = h + 1 + } else { + j = h + } + } + cur.idx = i - cur.idx = search(item, cur.nd) - var leaf bool - leaf, err = cur.isLeaf() - if err != nil { - return nil, err - } - for !leaf { - - // stay in bounds for internal nodes - cur.keepInBounds() - - nd, err = fetchChild(ctx, ns, cur.CurrentRef()) + leaf, err := cur.isLeaf() if err != nil { return cur, err + } else if leaf { + break // done } - parent := cur - cur = &Cursor{nd: nd, parent: parent, nrw: ns} - - cur.idx = search(item, cur.nd) - leaf, err = cur.isLeaf() - if err != nil { - return nil, err - } - } - - return -} - -func NewLeafCursorAtItem(ctx context.Context, ns NodeStore, nd Node, item Item, search ItemSearchFn) (cur Cursor, err error) { - cur = Cursor{nd: nd, parent: nil, nrw: ns} - - cur.idx = search(item, cur.nd) - var leaf bool - leaf, err = cur.isLeaf() - if err != nil { - return cur, err - } - for !leaf { - // stay in bounds for internal nodes cur.keepInBounds() @@ -291,17 +246,33 @@ func NewLeafCursorAtItem(ctx context.Context, ns NodeStore, nd Node, item Item, if err != nil { return cur, err } - - cur.idx = search(item, cur.nd) - leaf, err = cur.isLeaf() - if err != nil { - return cur, err - } } - return cur, nil } +// SearchForKey returns a SearchFn for |key|. +func SearchForKey[K ~[]byte, O Ordering[K]](key K, order O) SearchFn { + return func(nd Node) (idx int) { + n := int(nd.Count()) + // Define f(-1) == false and f(n) == true. + // Invariant: f(i-1) == false, f(j) == true. + i, j := 0, n + for i < j { + h := int(uint(i+j) >> 1) // avoid overflow when computing h + less := order.Compare(key, K(nd.GetKey(h))) <= 0 + // i ≤ h < j + if !less { + i = h + 1 // preserves f(i-1) == false + } else { + j = h // preserves f(j) == true + } + } + // i == j, f(i-1) == false, and + // f(j) (= f(i)) == true => answer is i. + return i + } +} + type LeafSpan struct { Leaves []Node LocalStart int diff --git a/go/store/prolly/tree/node_cursor_test.go b/go/store/prolly/tree/node_cursor_test.go index 726b888a39..ead4515ee6 100644 --- a/go/store/prolly/tree/node_cursor_test.go +++ b/go/store/prolly/tree/node_cursor_test.go @@ -17,7 +17,6 @@ package tree import ( "context" "fmt" - "sort" "testing" "github.com/stretchr/testify/assert" @@ -79,7 +78,7 @@ func testNewCursorAtItem(t *testing.T, count int) { ctx := context.Background() for i := range items { key, value := items[i][0], items[i][1] - cur, err := NewCursorAtItem(ctx, ns, root, key, searchTestTree) + cur, err := NewCursorAtKey(ctx, ns, root, val.Tuple(key), keyDesc) require.NoError(t, err) assert.Equal(t, key, cur.CurrentKey()) assert.Equal(t, value, cur.CurrentValue()) @@ -89,18 +88,11 @@ func testNewCursorAtItem(t *testing.T, count int) { } func testGetOrdinalOfCursor(t *testing.T, count int) { - tuples, d := AscendingUintTuples(count) - - search := func(item Item, nd Node) (idx int) { - return sort.Search(int(nd.count), func(i int) bool { - l, r := val.Tuple(item), val.Tuple(nd.GetKey(i)) - return d.Compare(l, r) <= 0 - }) - } + tuples, desc := AscendingUintTuples(count) ctx := context.Background() ns := NewTestNodeStore() - serializer := message.NewProllyMapSerializer(d, ns.Pool()) + serializer := message.NewProllyMapSerializer(desc, ns.Pool()) chkr, err := newEmptyChunker(ctx, ns, serializer) require.NoError(t, err) @@ -112,7 +104,7 @@ func testGetOrdinalOfCursor(t *testing.T, count int) { assert.NoError(t, err) for i := 0; i < len(tuples); i++ { - curr, err := NewCursorAtItem(ctx, ns, nd, Item(tuples[i][0]), search) + curr, err := NewCursorAtKey(ctx, ns, nd, tuples[i][0], desc) require.NoError(t, err) ord, err := GetOrdinalOfCursor(curr) @@ -121,11 +113,11 @@ func testGetOrdinalOfCursor(t *testing.T, count int) { assert.Equal(t, uint64(i), ord) } - b := val.NewTupleBuilder(d) + b := val.NewTupleBuilder(desc) b.PutUint32(0, uint32(len(tuples))) aboveItem := b.Build(sharedPool) - curr, err := NewCursorAtItem(ctx, ns, nd, Item(aboveItem), search) + curr, err := NewCursorAtKey(ctx, ns, nd, aboveItem, desc) require.NoError(t, err) ord, err := GetOrdinalOfCursor(curr) @@ -171,13 +163,6 @@ var valDesc = val.NewTupleDescriptor( val.Type{Enc: val.Int64Enc, Nullable: true}, ) -func searchTestTree(item Item, nd Node) int { - return sort.Search(int(nd.count), func(i int) bool { - l, r := val.Tuple(item), val.Tuple(nd.GetKey(i)) - return keyDesc.Compare(l, r) <= 0 - }) -} - func randomTupleItemPairs(count int, ns NodeStore) (items [][2]Item) { tups := RandomTuplePairs(count, keyDesc, valDesc, ns) items = make([][2]Item, count) From 859f450fee84bb44b080cfcf3d9953c084c8f4ff Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Thu, 8 Sep 2022 14:45:16 -0700 Subject: [PATCH 4/4] cleanup --- go/store/prolly/{map.go => tuple_map.go} | 0 go/store/prolly/{mutable_map.go => tuple_mutable_map.go} | 0 go/store/prolly/{range.go => tuple_range.go} | 0 go/store/prolly/{range_iter.go => tuple_range_iter.go} | 2 +- .../prolly/{range_iter_test.go => tuple_range_iter_test.go} | 0 go/store/prolly/{range_test.go => tuple_range_test.go} | 0 go/store/skip/list.go | 1 + 7 files changed, 2 insertions(+), 1 deletion(-) rename go/store/prolly/{map.go => tuple_map.go} (100%) rename go/store/prolly/{mutable_map.go => tuple_mutable_map.go} (100%) rename go/store/prolly/{range.go => tuple_range.go} (100%) rename go/store/prolly/{range_iter.go => tuple_range_iter.go} (98%) rename go/store/prolly/{range_iter_test.go => tuple_range_iter_test.go} (100%) rename go/store/prolly/{range_test.go => tuple_range_test.go} (100%) diff --git a/go/store/prolly/map.go b/go/store/prolly/tuple_map.go similarity index 100% rename from go/store/prolly/map.go rename to go/store/prolly/tuple_map.go diff --git a/go/store/prolly/mutable_map.go b/go/store/prolly/tuple_mutable_map.go similarity index 100% rename from go/store/prolly/mutable_map.go rename to go/store/prolly/tuple_mutable_map.go diff --git a/go/store/prolly/range.go b/go/store/prolly/tuple_range.go similarity index 100% rename from go/store/prolly/range.go rename to go/store/prolly/tuple_range.go diff --git a/go/store/prolly/range_iter.go b/go/store/prolly/tuple_range_iter.go similarity index 98% rename from go/store/prolly/range_iter.go rename to go/store/prolly/tuple_range_iter.go index c4d3affd7f..a838a5acbb 100644 --- a/go/store/prolly/range_iter.go +++ b/go/store/prolly/tuple_range_iter.go @@ -134,7 +134,7 @@ func memIterFromRange(list *skip.List, rng Range) *memRangeIter { } // skipSearchFromRange is a skip.SeekFn used to initialize -// a skip.List iterator for a given Range. The skip.SeekFn +// a skip.List iterator for a given Range. The skip.SearchFn // returns true if the iter being initialized is not yet // within the bounds of Range |rng|. func skipSearchFromRange(rng Range) skip.SeekFn { diff --git a/go/store/prolly/range_iter_test.go b/go/store/prolly/tuple_range_iter_test.go similarity index 100% rename from go/store/prolly/range_iter_test.go rename to go/store/prolly/tuple_range_iter_test.go diff --git a/go/store/prolly/range_test.go b/go/store/prolly/tuple_range_test.go similarity index 100% rename from go/store/prolly/range_test.go rename to go/store/prolly/tuple_range_test.go diff --git a/go/store/skip/list.go b/go/store/skip/list.go index 2a371ff9c0..915ed85070 100644 --- a/go/store/skip/list.go +++ b/go/store/skip/list.go @@ -58,6 +58,7 @@ type List struct { type nodeId uint32 +// tower is a multi-level skipNode pointer. type tower [maxHeight + 1]nodeId type skipNode struct {