Files
dolt/go/store/prolly/tree/map.go

633 lines
15 KiB
Go

// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tree
import (
"context"
"fmt"
"io"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly/message"
"github.com/dolthub/dolt/go/store/skip"
)
type KeyValueFn[K, V ~[]byte] func(key K, value V) error
type KvIter[K, V ~[]byte] interface {
Next(ctx context.Context) (K, V, error)
}
// StaticMap is a static prolly Tree with ordered elements.
type StaticMap[K, V ~[]byte, O Ordering[K]] struct {
Root Node
NodeStore NodeStore
Order O
}
// DiffOrderedTrees invokes `cb` for each difference between `from` and `to. If `considerAllRowsModified`
// is true, then a key that exists in both trees will be considered a modification even if the bytes are the same.
// This is used when `from` and `to` have different schemas.
func DiffOrderedTrees[K, V ~[]byte, O Ordering[K]](
ctx context.Context,
from, to StaticMap[K, V, O],
considerAllRowsModified bool,
cb DiffFn,
) error {
differ, err := DifferFromRoots[K](ctx, from.NodeStore, to.NodeStore, from.Root, to.Root, from.Order, considerAllRowsModified)
if err != nil {
return err
}
for {
var diff Diff
if diff, err = differ.Next(ctx); err != nil {
break
}
if err = cb(ctx, diff); err != nil {
break
}
}
return err
}
func DiffKeyRangeOrderedTrees[K, V ~[]byte, O Ordering[K]](
ctx context.Context,
from, to StaticMap[K, V, O],
start, stop K,
cb DiffFn,
) error {
var fromStart, fromStop, toStart, toStop *cursor
var err error
if len(start) == 0 {
fromStart, err = newCursorAtStart(ctx, from.NodeStore, from.Root)
if err != nil {
return err
}
toStart, err = newCursorAtStart(ctx, to.NodeStore, to.Root)
if err != nil {
return err
}
} else {
fromStart, err = newCursorAtKey(ctx, from.NodeStore, from.Root, start, from.Order)
if err != nil {
return err
}
toStart, err = newCursorAtKey(ctx, to.NodeStore, to.Root, start, to.Order)
if err != nil {
return err
}
}
if len(stop) == 0 {
fromStop, err = newCursorPastEnd(ctx, from.NodeStore, from.Root)
if err != nil {
return err
}
toStop, err = newCursorPastEnd(ctx, to.NodeStore, to.Root)
if err != nil {
return err
}
} else {
fromStop, err = newCursorAtKey(ctx, from.NodeStore, from.Root, stop, from.Order)
if err != nil {
return err
}
toStop, err = newCursorAtKey(ctx, to.NodeStore, to.Root, stop, to.Order)
if err != nil {
return err
}
}
differ := Differ[K, O]{
from: fromStart,
to: toStart,
fromStop: fromStop,
toStop: toStop,
order: from.Order,
}
for {
var diff Diff
if diff, err = differ.Next(ctx); err != nil {
break
}
if err = cb(ctx, diff); err != nil {
break
}
}
return err
}
func MergeOrderedTrees[K, V ~[]byte, O Ordering[K], S message.Serializer](
ctx context.Context,
l, r, base StaticMap[K, V, O],
cb CollisionFn,
leftSchemaChanged, rightSchemaChanged bool,
serializer S,
) (StaticMap[K, V, O], MergeStats, error) {
root, stats, err := ThreeWayMerge[K](ctx, base.NodeStore, l.Root, r.Root, base.Root, cb, leftSchemaChanged, rightSchemaChanged, base.Order, serializer)
if err != nil {
return StaticMap[K, V, O]{}, MergeStats{}, err
}
return StaticMap[K, V, O]{
Root: root,
NodeStore: base.NodeStore,
Order: base.Order,
}, stats, nil
}
// VisitMapLevelOrder visits each internal node of the tree in level order and calls the provided callback `cb` on each hash
// encountered. This function is used primarily for building appendix table files for databases to help optimize reads.
func VisitMapLevelOrder[K, V ~[]byte, O Ordering[K]](
ctx context.Context,
m StaticMap[K, V, O],
cb func(h hash.Hash) (int64, error),
) error {
// get cursor to leaves
cur, err := newCursorAtStart(ctx, m.NodeStore, m.Root)
if err != nil {
return err
}
first := cur.CurrentKey()
// start by iterating level 1 nodes,
// then recurse upwards until we're at the root
for cur.parent != nil {
cur = cur.parent
for cur.Valid() {
_, err = cb(cur.currentRef())
if err != nil {
return err
}
if err = cur.advance(ctx); err != nil {
return err
}
}
// return cursor to the start of the map
if err = Seek(ctx, cur, K(first), m.Order); err != nil {
return err
}
}
return err
}
func (t StaticMap[K, V, O]) Count() (int, error) {
return t.Root.TreeCount()
}
func (t StaticMap[K, V, O]) Height() int {
return t.Root.Level() + 1
}
func (t StaticMap[K, V, O]) HashOf() hash.Hash {
return t.Root.HashOf()
}
func (t StaticMap[K, V, O]) Mutate() MutableMap[K, V, O] {
return MutableMap[K, V, O]{
Edits: skip.NewSkipList(func(left, right []byte) int {
return t.Order.Compare(left, right)
}),
Static: t,
}
}
func (t StaticMap[K, V, O]) WalkAddresses(ctx context.Context, cb AddressCb) error {
return WalkAddresses(ctx, t.Root, t.NodeStore, cb)
}
func (t StaticMap[K, V, O]) WalkNodes(ctx context.Context, cb NodeCb) error {
return WalkNodes(ctx, t.Root, t.NodeStore, cb)
}
func (t StaticMap[K, V, O]) Get(ctx context.Context, query K, cb KeyValueFn[K, V]) (err error) {
cur, err := newLeafCursorAtKey(ctx, t.NodeStore, t.Root, query, t.Order)
if err != nil {
return err
}
var key K
var value V
if cur.Valid() {
key = K(cur.CurrentKey())
if t.Order.Compare(query, key) == 0 {
value = V(cur.currentValue())
} else {
key = nil
}
}
return cb(key, value)
}
func (t StaticMap[K, V, O]) GetPrefix(ctx context.Context, query K, prefixOrder O, cb KeyValueFn[K, V]) (err error) {
cur, err := newLeafCursorAtKey(ctx, t.NodeStore, t.Root, query, prefixOrder)
if err != nil {
return err
}
var key K
var value V
if cur.Valid() {
key = K(cur.CurrentKey())
if prefixOrder.Compare(query, key) == 0 {
value = V(cur.currentValue())
} else {
key = nil
}
}
return cb(key, value)
}
func (t StaticMap[K, V, O]) Has(ctx context.Context, query K) (ok bool, err error) {
cur, err := newLeafCursorAtKey(ctx, t.NodeStore, t.Root, query, t.Order)
if err != nil {
return false, err
} else if cur.Valid() {
ok = t.Order.Compare(query, K(cur.CurrentKey())) == 0
}
return
}
func (t StaticMap[K, V, O]) HasPrefix(ctx context.Context, query K, prefixOrder O) (ok bool, err error) {
cur, err := newLeafCursorAtKey(ctx, t.NodeStore, t.Root, query, prefixOrder)
if err != nil {
return false, err
} else if cur.Valid() {
// true if |query| is a prefix of |cur.currentKey()|
ok = prefixOrder.Compare(query, K(cur.CurrentKey())) == 0
}
return
}
func (t StaticMap[K, V, O]) LastKey(ctx context.Context) (key K) {
if t.Root.count > 0 {
// if |t.Root| is a leaf node, it represents the entire map
// if |t.Root| is an internal node, its last key is the
// delimiter for last subtree and is the last key in the map
key = K(getLastKey(t.Root))
}
return
}
func (t StaticMap[K, V, O]) IterAll(ctx context.Context) (*OrderedTreeIter[K, V], error) {
c, err := newCursorAtStart(ctx, t.NodeStore, t.Root)
if err != nil {
return nil, err
}
s, err := newCursorPastEnd(ctx, t.NodeStore, t.Root)
if err != nil {
return nil, err
}
stop := func(curr *cursor) bool {
return curr.compare(s) >= 0
}
if stop(c) {
// empty range
return &OrderedTreeIter[K, V]{curr: nil}, nil
}
return &OrderedTreeIter[K, V]{curr: c, stop: stop, step: c.advance}, nil
}
func (t StaticMap[K, V, O]) IterAllReverse(ctx context.Context) (*OrderedTreeIter[K, V], error) {
beginning, err := newCursorAtStart(ctx, t.NodeStore, t.Root)
if err != nil {
return nil, err
}
err = beginning.retreat(ctx)
if err != nil {
return nil, err
}
end, err := newCursorAtEnd(ctx, t.NodeStore, t.Root)
if err != nil {
return nil, err
}
stop := func(curr *cursor) bool {
return curr.compare(beginning) <= 0
}
if stop(end) {
// empty range
return &OrderedTreeIter[K, V]{curr: nil}, nil
}
return &OrderedTreeIter[K, V]{curr: end, stop: stop, step: end.retreat}, nil
}
func (t StaticMap[K, V, O]) IterOrdinalRange(ctx context.Context, start, stop uint64) (*OrderedTreeIter[K, V], error) {
if stop == start {
return &OrderedTreeIter[K, V]{curr: nil}, nil
}
if stop < start {
return nil, fmt.Errorf("invalid ordinal bounds (%d, %d)", start, stop)
} else {
c, err := t.Count()
if err != nil {
return nil, err
}
if stop > uint64(c) {
return nil, fmt.Errorf("stop index (%d) out of bounds", stop)
}
}
lo, err := newCursorAtOrdinal(ctx, t.NodeStore, t.Root, start)
if err != nil {
return nil, err
}
hi, err := newCursorAtOrdinal(ctx, t.NodeStore, t.Root, stop)
if err != nil {
return nil, err
}
stopF := func(curr *cursor) bool {
return curr.compare(hi) >= 0
}
return &OrderedTreeIter[K, V]{curr: lo, stop: stopF, step: lo.advance}, nil
}
func (t StaticMap[K, V, O]) FetchOrdinalRange(ctx context.Context, start, stop uint64) (*orderedLeafSpanIter[K, V], error) {
if stop == start {
return &orderedLeafSpanIter[K, V]{}, nil
}
if stop < start {
return nil, fmt.Errorf("invalid ordinal bounds (%d, %d)", start, stop)
} else {
c, err := t.Count()
if err != nil {
return nil, err
} else if stop > uint64(c) {
return nil, fmt.Errorf("stop index (%d) out of bounds", stop)
}
}
span, err := fetchLeafNodeSpan(ctx, t.NodeStore, t.Root, start, stop)
if err != nil {
return nil, err
}
nd, leaves := span.Leaves[0], span.Leaves[1:]
c, s := span.LocalStart, nd.Count()
if len(leaves) == 0 {
s = span.LocalStop // one leaf span
}
return &orderedLeafSpanIter[K, V]{
nd: nd,
curr: c,
stop: s,
leaves: leaves,
final: span.LocalStop,
}, nil
}
func (t StaticMap[K, V, O]) IterKeyRange(ctx context.Context, start, stop K) (*OrderedTreeIter[K, V], error) {
lo, hi, err := t.getKeyRangeCursors(ctx, start, stop)
if err != nil {
return nil, err
}
stopF := func(curr *cursor) bool {
return curr.compare(hi) >= 0
}
if stopF(lo) {
return &OrderedTreeIter[K, V]{curr: nil}, nil
}
return &OrderedTreeIter[K, V]{curr: lo, stop: stopF, step: lo.advance}, nil
}
func (t StaticMap[K, V, O]) GetKeyRangeCardinality(ctx context.Context, start, stop K) (uint64, error) {
lo, hi, err := t.getKeyRangeCursors(ctx, start, stop)
if err != nil {
return 0, err
}
startOrd, err := getOrdinalOfCursor(lo)
if err != nil {
return 0, err
}
endOrd, err := getOrdinalOfCursor(hi)
if err != nil {
return 0, err
}
if startOrd > endOrd {
return 0, nil
}
return endOrd - startOrd, nil
}
func (t StaticMap[K, V, O]) getKeyRangeCursors(ctx context.Context, startInclusive, stopExclusive K) (lo, hi *cursor, err error) {
if len(startInclusive) == 0 {
lo, err = newCursorAtStart(ctx, t.NodeStore, t.Root)
if err != nil {
return nil, nil, err
}
} else {
lo, err = newCursorAtKey(ctx, t.NodeStore, t.Root, startInclusive, t.Order)
if err != nil {
return nil, nil, err
}
}
if len(stopExclusive) == 0 {
hi, err = newCursorPastEnd(ctx, t.NodeStore, t.Root)
if err != nil {
return nil, nil, err
}
} else {
hi, err = newCursorAtKey(ctx, t.NodeStore, t.Root, stopExclusive, t.Order)
if err != nil {
return nil, nil, err
}
}
return
}
// GetOrdinalForKey returns the smallest ordinal position at which the key >= |query|.
func (t StaticMap[K, V, O]) GetOrdinalForKey(ctx context.Context, query K) (uint64, error) {
cur, err := newCursorAtKey(ctx, t.NodeStore, t.Root, query, t.Order)
if err != nil {
return 0, err
}
return getOrdinalOfCursor(cur)
}
type OrderedTreeIter[K, V ~[]byte] struct {
// current tuple location
curr *cursor
// the function called to moved |curr| forward in the direction of iteration.
step func(context.Context) error
// should return |true| if the passed in cursor is past the iteration's stopping point.
stop func(*cursor) bool
}
func ReverseOrderedTreeIterFromCursors[K, V ~[]byte](
ctx context.Context,
root Node, ns NodeStore,
findStart, findEnd SearchFn,
) (*OrderedTreeIter[K, V], error) {
start, err := newCursorFromSearchFn(ctx, ns, root, findStart)
if err != nil {
return nil, err
}
end, err := newCursorFromSearchFn(ctx, ns, root, findEnd)
if err != nil {
return nil, err
}
err = end.retreat(ctx)
if err != nil {
return nil, err
}
stopFn := func(curr *cursor) bool {
return curr.compare(start) < 0
}
if stopFn(end) {
end = nil // empty range
}
return &OrderedTreeIter[K, V]{curr: end, stop: stopFn, step: end.retreat}, nil
}
func OrderedTreeIterFromCursors[K, V ~[]byte](
ctx context.Context,
root Node, ns NodeStore,
findStart, findStop SearchFn,
) (*OrderedTreeIter[K, V], error) {
start, err := newCursorFromSearchFn(ctx, ns, root, findStart)
if err != nil {
return nil, err
}
stop, err := newCursorFromSearchFn(ctx, ns, root, findStop)
if err != nil {
return nil, err
}
stopFn := func(curr *cursor) bool {
return curr.compare(stop) >= 0
}
if stopFn(start) {
start = nil // empty range
}
return &OrderedTreeIter[K, V]{curr: start, stop: stopFn, step: start.advance}, nil
}
func (it *OrderedTreeIter[K, V]) Next(ctx context.Context) (key K, value V, err error) {
if it.curr == nil {
return nil, nil, io.EOF
}
k, v := currentCursorItems(it.curr)
key, value = K(k), V(v)
err = it.step(ctx)
if err != nil {
return nil, nil, err
}
if it.stop(it.curr) {
// past the end of the range
it.curr = nil
}
return
}
func (it *OrderedTreeIter[K, V]) Current() (key K, value V) {
// |it.curr| is set to nil when its range is exhausted
if it.curr != nil && it.curr.Valid() {
k, v := currentCursorItems(it.curr)
key, value = K(k), V(v)
}
return
}
func (it *OrderedTreeIter[K, V]) Iterate(ctx context.Context) (err error) {
err = it.step(ctx)
if err != nil {
return err
}
if it.stop(it.curr) {
// past the end of the range
it.curr = nil
}
return
}
type orderedLeafSpanIter[K, V ~[]byte] struct {
// in-progress node
nd Node
// current index,
curr int
// last index for |nd|
stop int
// remaining leaves
leaves []Node
// stop index in last leaf node
final int
}
func (s *orderedLeafSpanIter[K, V]) Next(ctx context.Context) (key K, value V, err error) {
if s.curr >= s.stop {
// |s.nd| exhausted
if len(s.leaves) == 0 {
// span exhausted
return nil, nil, io.EOF
}
s.nd = s.leaves[0]
s.curr = 0
s.stop = s.nd.Count()
s.leaves = s.leaves[1:]
if len(s.leaves) == 0 {
// |s.nd| is the last leaf
s.stop = s.final
}
}
key = K(s.nd.GetKey(s.curr))
value = V(s.nd.GetValue(s.curr))
s.curr++
return
}