Files
dolt/go/store/prolly/tree/node_cursor.go
2023-05-11 15:57:19 -07:00

625 lines
14 KiB
Go

// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file incorporates work covered by the following copyright and
// permission notice:
//
// Copyright 2016 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package tree
import (
"context"
"errors"
"fmt"
"github.com/dolthub/dolt/go/store/hash"
)
// cursor explores a tree of Nodes.
type cursor struct {
nd Node
idx int
parent *cursor
nrw NodeStore
}
type SearchFn func(nd Node) (idx int)
type Ordering[K ~[]byte] interface {
Compare(left, right K) int
}
func newCursorAtStart(ctx context.Context, ns NodeStore, nd Node) (cur *cursor, err error) {
cur = &cursor{nd: nd, nrw: ns}
for !cur.isLeaf() {
nd, err = fetchChild(ctx, ns, cur.currentRef())
if err != nil {
return nil, err
}
parent := cur
cur = &cursor{nd: nd, parent: parent, nrw: ns}
}
return
}
func newCursorAtEnd(ctx context.Context, ns NodeStore, nd Node) (cur *cursor, err error) {
cur = &cursor{nd: nd, nrw: ns}
cur.skipToNodeEnd()
for !cur.isLeaf() {
nd, err = fetchChild(ctx, ns, cur.currentRef())
if err != nil {
return nil, err
}
parent := cur
cur = &cursor{nd: nd, parent: parent, nrw: ns}
cur.skipToNodeEnd()
}
return
}
func newCursorPastEnd(ctx context.Context, ns NodeStore, nd Node) (cur *cursor, err error) {
cur, err = newCursorAtEnd(ctx, ns, nd)
if err != nil {
return nil, err
}
// Advance |cur| past the end
err = cur.advance(ctx)
if err != nil {
return nil, err
}
if cur.idx != int(cur.nd.count) {
panic("expected |ok| to be false")
}
return
}
func newCursorAtOrdinal(ctx context.Context, ns NodeStore, nd Node, ord uint64) (cur *cursor, err error) {
cnt, err := nd.TreeCount()
if err != nil {
return nil, err
}
if ord >= uint64(cnt) {
return newCursorPastEnd(ctx, ns, nd)
}
distance := int64(ord)
return newCursorFromSearchFn(ctx, ns, nd, func(nd Node) (idx int) {
if nd.IsLeaf() {
return int(distance)
}
nd, _ = nd.loadSubtrees()
for idx = 0; idx < nd.Count(); idx++ {
cnt, _ := nd.getSubtreeCount(idx)
card := int64(cnt)
if (distance - card) < 0 {
break
}
distance -= card
}
return
})
}
// GetOrdinalOfCursor returns the ordinal position of a cursor.
func getOrdinalOfCursor(curr *cursor) (ord uint64, err error) {
if !curr.isLeaf() {
return 0, fmt.Errorf("|cur| must be at a leaf")
}
ord += uint64(curr.idx)
for curr.parent != nil {
curr = curr.parent
// If a parent has been invalidated past end, act like we were at the
// last subtree.
if curr.idx >= curr.nd.Count() {
curr.skipToNodeEnd()
} else if curr.idx < 0 {
return 0, fmt.Errorf("found invalid parent cursor behind node start")
}
curr.nd, err = curr.nd.loadSubtrees()
if err != nil {
return 0, err
}
for idx := curr.idx - 1; idx >= 0; idx-- {
cnt, err := curr.nd.getSubtreeCount(idx)
if err != nil {
return 0, err
}
ord += cnt
}
}
return ord, nil
}
func newCursorAtKey[K ~[]byte, O Ordering[K]](ctx context.Context, ns NodeStore, nd Node, key K, order O) (cur *cursor, err error) {
return newCursorFromSearchFn(ctx, ns, nd, searchForKey(key, order))
}
func newCursorFromSearchFn(ctx context.Context, ns NodeStore, nd Node, search SearchFn) (cur *cursor, err error) {
cur = &cursor{nd: nd, nrw: ns}
cur.idx = search(cur.nd)
for !cur.isLeaf() {
// stay in bounds for internal nodes
cur.keepInBounds()
nd, err = fetchChild(ctx, ns, cur.currentRef())
if err != nil {
return cur, err
}
parent := cur
cur = &cursor{nd: nd, parent: parent, nrw: ns}
cur.idx = search(cur.nd)
}
return
}
func newLeafCursorAtKey[K ~[]byte, O Ordering[K]](ctx context.Context, ns NodeStore, nd Node, key K, order O) (cursor, error) {
var err error
cur := cursor{nd: nd, nrw: ns}
for {
// binary search |cur.nd| for |key|
i, j := 0, cur.nd.Count()
for i < j {
h := int(uint(i+j) >> 1)
cmp := order.Compare(key, K(cur.nd.GetKey(h)))
if cmp > 0 {
i = h + 1
} else {
j = h
}
}
cur.idx = i
if cur.isLeaf() {
break // done
}
// stay in bounds for internal nodes
cur.keepInBounds()
// reuse |cur| object to keep stack alloc'd
cur.nd, err = fetchChild(ctx, ns, cur.currentRef())
if err != nil {
return cur, err
}
}
return cur, nil
}
// searchForKey returns a SearchFn for |key|.
func searchForKey[K ~[]byte, O Ordering[K]](key K, order O) SearchFn {
return func(nd Node) (idx int) {
n := int(nd.Count())
// Define f(-1) == false and f(n) == true.
// Invariant: f(i-1) == false, f(j) == true.
i, j := 0, n
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
less := order.Compare(key, K(nd.GetKey(h))) <= 0
// i ≤ h < j
if !less {
i = h + 1 // preserves f(i-1) == false
} else {
j = h // preserves f(j) == true
}
}
// i == j, f(i-1) == false, and
// f(j) (= f(i)) == true => answer is i.
return i
}
}
type LeafSpan struct {
Leaves []Node
LocalStart int
LocalStop int
}
// FetchLeafNodeSpan returns the leaf Node span for the ordinal range [start, stop). It fetches the span using
// an eager breadth-first search and makes batch read calls to the persistence layer via NodeStore.ReadMany.
func fetchLeafNodeSpan(ctx context.Context, ns NodeStore, root Node, start, stop uint64) (LeafSpan, error) {
leaves, localStart, err := recursiveFetchLeafNodeSpan(ctx, ns, []Node{root}, start, stop)
if err != nil {
return LeafSpan{}, err
}
localStop := (stop - start) + localStart
for i := 0; i < len(leaves)-1; i++ {
localStop -= uint64(leaves[i].Count())
}
return LeafSpan{
Leaves: leaves,
LocalStart: int(localStart),
LocalStop: int(localStop),
}, nil
}
func recursiveFetchLeafNodeSpan(ctx context.Context, ns NodeStore, nodes []Node, start, stop uint64) ([]Node, uint64, error) {
if nodes[0].IsLeaf() {
// verify leaf homogeneity
for i := range nodes {
if !nodes[i].IsLeaf() {
return nil, 0, errors.New("mixed leaf/non-leaf set")
}
}
return nodes, start, nil
}
gets := make(hash.HashSlice, 0, len(nodes)*nodes[0].Count())
acc := uint64(0)
var err error
for _, nd := range nodes {
if nd, err = nd.loadSubtrees(); err != nil {
return nil, 0, err
}
for i := 0; i < nd.Count(); i++ {
card, err := nd.getSubtreeCount(i)
if err != nil {
return nil, 0, err
}
if acc == 0 && card < start {
start -= card
stop -= card
continue
}
gets = append(gets, hash.New(nd.GetValue(i)))
acc += card
if acc >= stop {
break
}
}
}
children, err := ns.ReadMany(ctx, gets)
if err != nil {
return nil, 0, err
}
return recursiveFetchLeafNodeSpan(ctx, ns, children, start, stop)
}
func currentCursorItems(cur *cursor) (key, value Item) {
key = cur.nd.keys.GetItem(cur.idx, cur.nd.msg)
value = cur.nd.values.GetItem(cur.idx, cur.nd.msg)
return
}
// Seek updates the cursor's node to one whose range spans the key's value, or the last
// node if the key is greater than all existing keys.
// If a node does not contain the key, we recurse upwards to the parent cursor. If the
// node contains a key, we recurse downwards into child nodes.
func Seek[K ~[]byte, O Ordering[K]](ctx context.Context, cur *cursor, key K, order O) (err error) {
inBounds := true
if cur.parent != nil {
inBounds = inBounds && order.Compare(key, K(cur.firstKey())) >= 0
inBounds = inBounds && order.Compare(key, K(cur.lastKey())) <= 0
}
if !inBounds {
// |item| is outside the bounds of |cur.nd|, search up the tree
err = Seek(ctx, cur.parent, key, order)
if err != nil {
return err
}
// stay in bounds for internal nodes
cur.parent.keepInBounds()
cur.nd, err = fetchChild(ctx, cur.nrw, cur.parent.currentRef())
if err != nil {
return err
}
}
cur.idx = searchForKey(key, order)(cur.nd)
return
}
func (cur *cursor) Valid() bool {
return cur.nd.count != 0 &&
cur.nd.bytes() != nil &&
cur.idx >= 0 &&
cur.idx < int(cur.nd.count)
}
func (cur *cursor) CurrentKey() Item {
return cur.nd.GetKey(cur.idx)
}
func (cur *cursor) currentValue() Item {
return cur.nd.GetValue(cur.idx)
}
func (cur *cursor) currentRef() hash.Hash {
return cur.nd.getAddress(cur.idx)
}
func (cur *cursor) currentSubtreeSize() (uint64, error) {
if cur.isLeaf() {
return 1, nil
}
var err error
cur.nd, err = cur.nd.loadSubtrees()
if err != nil {
return 0, err
}
return cur.nd.getSubtreeCount(cur.idx)
}
func (cur *cursor) firstKey() Item {
return cur.nd.GetKey(0)
}
func (cur *cursor) lastKey() Item {
lastKeyIdx := int(cur.nd.count) - 1
return cur.nd.GetKey(lastKeyIdx)
}
func (cur *cursor) skipToNodeStart() {
cur.idx = 0
}
func (cur *cursor) skipToNodeEnd() {
lastKeyIdx := int(cur.nd.count) - 1
cur.idx = lastKeyIdx
}
func (cur *cursor) keepInBounds() {
if cur.idx < 0 {
cur.skipToNodeStart()
}
lastKeyIdx := int(cur.nd.count) - 1
if cur.idx > lastKeyIdx {
cur.skipToNodeEnd()
}
}
func (cur *cursor) atNodeStart() bool {
return cur.idx == 0
}
// atNodeEnd returns true if the cursor's current |idx|
// points to the last node item
func (cur *cursor) atNodeEnd() bool {
lastKeyIdx := int(cur.nd.count) - 1
return cur.idx == lastKeyIdx
}
func (cur *cursor) isLeaf() bool {
return cur.nd.level == 0
}
func (cur *cursor) level() (uint64, error) {
return uint64(cur.nd.level), nil
}
// invalidateAtEnd sets the cursor's index to the node count.
func (cur *cursor) invalidateAtEnd() {
cur.idx = int(cur.nd.count)
}
// invalidateAtStart sets the cursor's index to -1.
func (cur *cursor) invalidateAtStart() {
cur.idx = -1
}
// hasNext returns true if we do not need to recursively
// check the parent to know that the current cursor
// has more keys. hasNext can be false even if parent
// cursors are not exhausted.
func (cur *cursor) hasNext() bool {
return cur.idx < int(cur.nd.count)-1
}
// hasPrev returns true if the current node has preceding
// keys. hasPrev can be false even in a parent node has
// preceding keys.
func (cur *cursor) hasPrev() bool {
return cur.idx > 0
}
// outOfBounds returns true if the current cursor and
// all parents are exhausted.
func (cur *cursor) outOfBounds() bool {
return cur.idx < 0 || cur.idx >= int(cur.nd.count)
}
// advance either increments the current key index by one,
// or has reached the end of the current node and skips to the next
// child of the parent cursor, recursively if necessary, returning
// either an error or nil.
//
// More specifically, one of three things happens:
//
// 1) The current chunk still has keys, iterate to
// the next |idx|;
//
// 2) We've exhausted the current cursor, but there is at least
// one |parent| cursor with more keys. We find that |parent| recursively,
// perform step (1), and then have every child initialize itself
// using the new |parent|.
//
// 3) We've exhausted the current cursor and every |parent|. Jump
// to an end state (idx = node.count).
func (cur *cursor) advance(ctx context.Context) error {
if cur.hasNext() {
cur.idx++
return nil
}
if cur.parent == nil {
cur.invalidateAtEnd()
return nil
}
// recursively increment the parent
err := cur.parent.advance(ctx)
if err != nil {
return err
}
if cur.parent.outOfBounds() {
// exhausted every parent cursor
cur.invalidateAtEnd()
return nil
}
// new parent cursor points to new cur node
err = cur.fetchNode(ctx)
if err != nil {
return err
}
cur.skipToNodeStart()
return nil
}
// retreat decrements to the previous key, if necessary by
// recursively decrementing parent nodes.
func (cur *cursor) retreat(ctx context.Context) error {
if cur.hasPrev() {
cur.idx--
return nil
}
if cur.parent == nil {
cur.invalidateAtStart()
return nil
}
// recursively decrement the parent
err := cur.parent.retreat(ctx)
if err != nil {
return err
}
if cur.parent.outOfBounds() {
// exhausted every parent cursor
cur.invalidateAtStart()
return nil
}
// new parent cursor points to new cur node
err = cur.fetchNode(ctx)
if err != nil {
return err
}
cur.skipToNodeEnd()
return nil
}
// fetchNode loads the Node that the cursor index points to.
// It's called whenever the cursor advances/retreats to a different chunk.
func (cur *cursor) fetchNode(ctx context.Context) (err error) {
assertTrue(cur.parent != nil, "cannot fetch node for cursor with nil parent")
cur.nd, err = fetchChild(ctx, cur.nrw, cur.parent.currentRef())
cur.idx = -1 // caller must set
return err
}
// Compare returns the highest relative index difference
// between two cursor trees. A parent has a higher precedence
// than its child.
//
// Ex:
//
// cur: L3 -> 4, L2 -> 2, L1 -> 5, L0 -> 2
// other: L3 -> 4, L2 -> 2, L1 -> 5, L0 -> 4
//
// res => -2 (from level 0)
//
// cur: L3 -> 4, L2 -> 2, L1 -> 5, L0 -> 2
// other: L3 -> 4, L2 -> 3, L1 -> 5, L0 -> 4
//
// res => +1 (from level 2)
func (cur *cursor) compare(other *cursor) int {
return compareCursors(cur, other)
}
func (cur *cursor) clone() *cursor {
cln := cursor{
nd: cur.nd,
idx: cur.idx,
nrw: cur.nrw,
}
if cur.parent != nil {
cln.parent = cur.parent.clone()
}
return &cln
}
func (cur *cursor) copy(other *cursor) {
cur.nd = other.nd
cur.idx = other.idx
cur.nrw = other.nrw
if cur.parent != nil {
assertTrue(other.parent != nil, "cursors must be of equal height to call copy()")
cur.parent.copy(other.parent)
} else {
assertTrue(other.parent == nil, "cursors must be of equal height to call copy()")
}
}
func compareCursors(left, right *cursor) (diff int) {
diff = 0
for {
d := left.idx - right.idx
if d != 0 {
diff = d
}
if left.parent == nil || right.parent == nil {
break
}
left, right = left.parent, right.parent
}
return
}
func fetchChild(ctx context.Context, ns NodeStore, ref hash.Hash) (Node, error) {
// todo(andy) handle nil Node, dangling ref
return ns.Read(ctx, ref)
}
func assertTrue(b bool, msg string) {
if !b {
panic("assertion failed: " + msg)
}
}