mirror of
https://github.com/dolthub/dolt.git
synced 2026-04-23 21:59:01 -05:00
swapping out prolly node implementations
This commit is contained in:
+15
-17
@@ -25,7 +25,7 @@ import (
|
||||
)
|
||||
|
||||
type Map struct {
|
||||
root Node
|
||||
root mapNode
|
||||
keyDesc val.TupleDesc
|
||||
valDesc val.TupleDesc
|
||||
ns NodeStore
|
||||
@@ -34,7 +34,7 @@ type Map struct {
|
||||
type KeyValueFn func(key, value val.Tuple) error
|
||||
|
||||
// NewMap creates an empty prolly tree Map
|
||||
func NewMap(node Node, ns NodeStore, keyDesc, valDesc val.TupleDesc) Map {
|
||||
func NewMap(node mapNode, ns NodeStore, keyDesc, valDesc val.TupleDesc) Map {
|
||||
return Map{
|
||||
root: node,
|
||||
keyDesc: keyDesc,
|
||||
@@ -45,7 +45,7 @@ func NewMap(node Node, ns NodeStore, keyDesc, valDesc val.TupleDesc) Map {
|
||||
|
||||
// NewMapFromTuples creates a prolly tree Map from slice of sorted Tuples.
|
||||
func NewMapFromTuples(ctx context.Context, ns NodeStore, keyDesc, valDesc val.TupleDesc, tups ...val.Tuple) (Map, error) {
|
||||
m := NewMap(nil, ns, keyDesc, valDesc)
|
||||
m := NewMap(mapNode{}, ns, keyDesc, valDesc)
|
||||
|
||||
ch, err := newEmptyTreeChunker(ctx, ns, newDefaultNodeSplitter)
|
||||
if err != nil {
|
||||
@@ -83,7 +83,7 @@ func (m Map) Count() uint64 {
|
||||
|
||||
// HashOf returns the Hash of this Map.
|
||||
func (m Map) HashOf() hash.Hash {
|
||||
return hash.Of(m.root)
|
||||
return m.root.hashOf()
|
||||
}
|
||||
|
||||
// Format returns the NomsBinFormat of this Map.
|
||||
@@ -127,7 +127,7 @@ func (m Map) Has(ctx context.Context, key val.Tuple) (ok bool, err error) {
|
||||
}
|
||||
|
||||
if cur.valid() {
|
||||
k := val.Tuple(cur.currentPair().key())
|
||||
k := val.Tuple(cur.currentKey())
|
||||
ok = m.compareKeys(key, k) == 0
|
||||
}
|
||||
|
||||
@@ -193,10 +193,10 @@ func (m Map) iterFromRange(ctx context.Context, rng Range) (*prollyRangeIter, er
|
||||
|
||||
func (m Map) rangeStartSearchFn(rng Range) searchFn {
|
||||
// todo(andy): inline sort.Search()
|
||||
return func(query nodeItem, nd Node) int {
|
||||
i := sort.Search(nd.nodeCount()/stride, func(i int) bool {
|
||||
return func(query nodeItem, nd mapNode) int {
|
||||
return sort.Search(nd.nodeCount(), func(i int) bool {
|
||||
q := val.Tuple(query)
|
||||
t := val.Tuple(nd.getItem(i * stride))
|
||||
t := val.Tuple(nd.getKey(i))
|
||||
|
||||
// compare using the range's tuple descriptor.
|
||||
cmp := rng.KeyDesc.Compare(q, t)
|
||||
@@ -206,16 +206,15 @@ func (m Map) rangeStartSearchFn(rng Range) searchFn {
|
||||
return cmp < 0
|
||||
}
|
||||
})
|
||||
return i * stride
|
||||
}
|
||||
}
|
||||
|
||||
func (m Map) rangeStopSearchFn(rng Range) searchFn {
|
||||
// todo(andy): inline sort.Search()
|
||||
return func(query nodeItem, nd Node) int {
|
||||
i := sort.Search(nd.nodeCount()/stride, func(i int) bool {
|
||||
return func(query nodeItem, nd mapNode) int {
|
||||
return sort.Search(nd.nodeCount(), func(i int) bool {
|
||||
q := val.Tuple(query)
|
||||
t := val.Tuple(nd.getItem(i * stride))
|
||||
t := val.Tuple(nd.getKey(i))
|
||||
|
||||
// compare using the range's tuple descriptor.
|
||||
cmp := rng.KeyDesc.Compare(q, t)
|
||||
@@ -225,20 +224,19 @@ func (m Map) rangeStopSearchFn(rng Range) searchFn {
|
||||
return cmp <= 0
|
||||
}
|
||||
})
|
||||
return i * stride
|
||||
}
|
||||
}
|
||||
|
||||
// searchNode returns the smallest index where nd[i] >= query
|
||||
// Adapted from search.Sort to inline comparison.
|
||||
func (m Map) searchNode(query nodeItem, nd Node) int {
|
||||
n := nd.nodeCount() / stride
|
||||
func (m Map) searchNode(query nodeItem, nd mapNode) int {
|
||||
n := nd.nodeCount()
|
||||
// Define f(-1) == false and f(n) == true.
|
||||
// Invariant: f(i-1) == false, f(j) == true.
|
||||
i, j := 0, n
|
||||
for i < j {
|
||||
h := int(uint(i+j) >> 1) // avoid overflow when computing h
|
||||
less := m.compareItems(query, nd.getItem(h*stride)) <= 0
|
||||
less := m.compareItems(query, nd.getKey(h)) <= 0
|
||||
// i ≤ h < j
|
||||
if !less {
|
||||
i = h + 1 // preserves f(i-1) == false
|
||||
@@ -248,7 +246,7 @@ func (m Map) searchNode(query nodeItem, nd Node) int {
|
||||
}
|
||||
// i == j, f(i-1) == false, and
|
||||
// f(j) (= f(i)) == true => answer is i.
|
||||
return i * stride
|
||||
return i
|
||||
}
|
||||
|
||||
var _ searchFn = Map{}.searchNode
|
||||
|
||||
+71
-34
@@ -15,6 +15,8 @@
|
||||
package prolly
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/dolthub/dolt/go/gen/fb/serial"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/pool"
|
||||
@@ -25,44 +27,67 @@ const (
|
||||
refSz = hash.ByteLen
|
||||
)
|
||||
|
||||
const (
|
||||
cumulativeCountSize = val.ByteSize(6)
|
||||
nodeCountSize = val.ByteSize(2)
|
||||
treeLevelSize = val.ByteSize(1)
|
||||
|
||||
maxNodeDataSize = uint64(math.MaxUint16)
|
||||
)
|
||||
|
||||
var emptyNode = mapNode{}
|
||||
|
||||
type mapNode struct {
|
||||
buf serial.TupleMap
|
||||
|
||||
// todo(andy): embed hash.Hash here?
|
||||
}
|
||||
|
||||
func makeMapNode(pool pool.BuffPool, level uint64, items ...nodeItem) (node Node) {
|
||||
var sz uint64
|
||||
for _, item := range items {
|
||||
sz += uint64(item.size())
|
||||
func makeProllyNode(pool pool.BuffPool, level uint64, items ...nodeItem) (node mapNode) {
|
||||
//var sz uint64
|
||||
//for _, item := range items {
|
||||
// sz += uint64(item.size())
|
||||
//
|
||||
//}
|
||||
//count := len(items)
|
||||
//
|
||||
//if sz > maxNodeDataSize {
|
||||
// panic("items exceeded max chunk data size")
|
||||
//}
|
||||
//
|
||||
//pos := val.ByteSize(sz)
|
||||
//pos += val.OffsetsSize(count)
|
||||
//pos += cumulativeCountSize
|
||||
//pos += nodeCountSize
|
||||
//pos += treeLevelSize
|
||||
//
|
||||
//node = pool.Get(uint64(pos))
|
||||
//
|
||||
//cc := countCumulativeItems(level, items)
|
||||
//writeCumulativeCount(node, cc)
|
||||
//writeItemCount(node, count)
|
||||
//writeTreeLevel(node, level)
|
||||
//
|
||||
//pos = 0
|
||||
//offs, _ := node.offsets()
|
||||
//for i, item := range items {
|
||||
// copy(node[pos:pos+item.size()], item)
|
||||
// offs.Put(i, pos)
|
||||
// pos += item.size()
|
||||
//}
|
||||
//
|
||||
//return node
|
||||
|
||||
}
|
||||
count := len(items)
|
||||
return mapNode{}
|
||||
}
|
||||
|
||||
if sz > maxNodeDataSize {
|
||||
panic("items exceeded max chunk data size")
|
||||
}
|
||||
func mapNodeFromBytes(bb []byte) mapNode {
|
||||
fb := serial.GetRootAsTupleMap(bb, 0)
|
||||
return mapNode{buf: *fb}
|
||||
}
|
||||
|
||||
pos := val.ByteSize(sz)
|
||||
pos += val.OffsetsSize(count)
|
||||
pos += cumulativeCountSize
|
||||
pos += nodeCountSize
|
||||
pos += treeLevelSize
|
||||
|
||||
node = pool.Get(uint64(pos))
|
||||
|
||||
cc := countCumulativeItems(level, items)
|
||||
writeCumulativeCount(node, cc)
|
||||
writeItemCount(node, count)
|
||||
writeTreeLevel(node, level)
|
||||
|
||||
pos = 0
|
||||
offs, _ := node.offsets()
|
||||
for i, item := range items {
|
||||
copy(node[pos:pos+item.size()], item)
|
||||
offs.Put(i, pos)
|
||||
pos += item.size()
|
||||
}
|
||||
|
||||
return node
|
||||
func (nd mapNode) hashOf() hash.Hash {
|
||||
return hash.Of(nd.bytes())
|
||||
}
|
||||
|
||||
func (nd mapNode) getKey(i int) nodeItem {
|
||||
@@ -70,7 +95,7 @@ func (nd mapNode) getKey(i int) nodeItem {
|
||||
|
||||
start, stop := uint16(0), uint16(len(keys))
|
||||
if i > 0 {
|
||||
start = nd.buf.KeyOffsets(i-1)
|
||||
start = nd.buf.KeyOffsets(i - 1)
|
||||
}
|
||||
if i < nd.buf.KeyOffsetsLength() {
|
||||
stop = nd.buf.KeyOffsets(i)
|
||||
@@ -80,11 +105,19 @@ func (nd mapNode) getKey(i int) nodeItem {
|
||||
}
|
||||
|
||||
func (nd mapNode) getValue(i int) nodeItem {
|
||||
if nd.leafNode() {
|
||||
return nd.getValueTuple(i)
|
||||
} else {
|
||||
return nd.getRef(i)
|
||||
}
|
||||
}
|
||||
|
||||
func (nd mapNode) getValueTuple(i int) nodeItem {
|
||||
values := nd.buf.ValueTuplesBytes()
|
||||
|
||||
start, stop := uint16(0), uint16(len(values))
|
||||
if i > 0 {
|
||||
start = nd.buf.ValueOffsets(i-1)
|
||||
start = nd.buf.ValueOffsets(i - 1)
|
||||
}
|
||||
if i < nd.buf.ValueOffsetsLength() {
|
||||
stop = nd.buf.ValueOffsets(i)
|
||||
@@ -117,5 +150,9 @@ func (nd mapNode) leafNode() bool {
|
||||
}
|
||||
|
||||
func (nd mapNode) empty() bool {
|
||||
return nd.nodeCount() == 0
|
||||
return nd.bytes() == nil || nd.nodeCount() == 0
|
||||
}
|
||||
|
||||
func (nd mapNode) bytes() []byte {
|
||||
return nd.buf.Table().Bytes
|
||||
}
|
||||
|
||||
@@ -22,17 +22,17 @@ import (
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
func fetchChild(ctx context.Context, ns NodeStore, mt metaValue) (Node, error) {
|
||||
// todo(andy) handle nil Node, dangling ref
|
||||
func fetchChild(ctx context.Context, ns NodeStore, mt metaValue) (mapNode, error) {
|
||||
// todo(andy) handle nil mapNode, dangling ref
|
||||
return ns.Read(ctx, mt.GetRef())
|
||||
}
|
||||
|
||||
func writeNewChild(ctx context.Context, ns NodeStore, level uint64, items ...nodeItem) (Node, nodePair, error) {
|
||||
func writeNewChild(ctx context.Context, ns NodeStore, level uint64, items ...nodeItem) (mapNode, nodePair, error) {
|
||||
child := makeProllyNode(ns.Pool(), level, items...)
|
||||
|
||||
ref, err := ns.Write(ctx, child)
|
||||
if err != nil {
|
||||
return nil, nodePair{}, err
|
||||
return mapNode{}, nodePair{}, err
|
||||
}
|
||||
|
||||
if len(items) == 0 {
|
||||
@@ -57,7 +57,7 @@ const (
|
||||
metaValueRefIdx = 1
|
||||
)
|
||||
|
||||
// metaValue is a value Tuple in an internal Node of a prolly tree.
|
||||
// metaValue is a value Tuple in an internal mapNode of a prolly tree.
|
||||
// metaValues have two fields: cumulative count and ref.
|
||||
type metaValue val.Tuple
|
||||
|
||||
@@ -74,7 +74,7 @@ func (mt metaValue) GetCumulativeCount() uint64 {
|
||||
return val.ReadUint48(cnt)
|
||||
}
|
||||
|
||||
// GetRef returns the hash.Hash of the child Node pointed
|
||||
// GetRef returns the hash.Hash of the child mapNode pointed
|
||||
// to by this metaValue.
|
||||
func (mt metaValue) GetRef() hash.Hash {
|
||||
tup := val.Tuple(mt)
|
||||
|
||||
@@ -1,220 +0,0 @@
|
||||
// Copyright 2021 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package prolly
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/pool"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
const (
|
||||
cumulativeCountSize = val.ByteSize(6)
|
||||
nodeCountSize = val.ByteSize(2)
|
||||
treeLevelSize = val.ByteSize(1)
|
||||
|
||||
maxNodeDataSize = uint64(math.MaxUint16)
|
||||
)
|
||||
|
||||
var emptyNode = Node([]byte{0, 0, 0, 0, 0, 0, 0, 0, 0})
|
||||
|
||||
// Node is a node in a prolly tree. Nodes are byte slices containing node items and
|
||||
// a footer. The footer contains offsets, an item count for the node, a cumulative
|
||||
// item count for the subtree rooted at this node, and this node's tree level.
|
||||
// Prolly trees are organized like a B+ Tree without linked leaf nodes. Internal
|
||||
// Nodes contain only keys and child pointers ("metaKeys" and "metaValues"). Leaf
|
||||
// Nodes contain keys and values. The offsets array enables random acces to items
|
||||
// with in a Node. The cumulative count field allows seeking into the tree by an
|
||||
// item's index number.
|
||||
//
|
||||
// Node:
|
||||
// Items in a node are packed contiguously from the front of the byte slice.
|
||||
// For internal Nodes, metaKeys and metaValues are stored in alternating order
|
||||
// as separate items. MetaValues contain a chunk ref that can be resolved to a
|
||||
// child node using a NodeStore. MetaKeys store the largest key Tuple within
|
||||
// the subtree rooted at that child Node.
|
||||
// +--------+--------+-----+--------+--------+
|
||||
// | Item 0 | Item 1 | ... | Item N | Footer |
|
||||
// +--------+--------+--------------+--------+
|
||||
//
|
||||
// Footer:
|
||||
// +---------------+------------------+------------+------------+
|
||||
// | Offsets Array | Cumulative Count | Node Count | Tree Level |
|
||||
// +---------------+------------------+------------+------------+
|
||||
//
|
||||
// Offsets Array:
|
||||
// The offset array contains a uint16 for each node item after item 0. Offset i
|
||||
// encodes the byte distance from the front of the node to the beginning of the
|
||||
// ith item in the node. The offsets array for N items is 2*(N-1) bytes.
|
||||
// +----------+----------+-----+----------+
|
||||
// | Offset 1 | Offset 2 | ... | Offset N |
|
||||
// +----------+----------+-----+----------+
|
||||
//
|
||||
// Cumulative Count:
|
||||
// The cumulative count is the total number of items in the subtree rooted at
|
||||
// this node. For leaf nodes, cumulative count is the same as node count.
|
||||
// +---------------------------+
|
||||
// | Cumulative Count (uint48) |
|
||||
// +---------------------------+
|
||||
//
|
||||
// Node Count:
|
||||
// Node count is the number of items in this node.
|
||||
// +---------------------+
|
||||
// | Node Count (uint16) |
|
||||
// +---------------------+
|
||||
//
|
||||
// Tree Level:
|
||||
// Tree Level is the height of this node within the tree. Leaf nodes are
|
||||
// level 0, the first level of internal nodes is level 1.
|
||||
// +--------------------+
|
||||
// | Tree Level (uint8) |
|
||||
// +--------------------+
|
||||
//
|
||||
// Note: the current Node implementation is oriented toward implementing Map
|
||||
// semantics. However, Node could easily be modified to support Set semantics,
|
||||
// or other collections.
|
||||
//
|
||||
type Node []byte
|
||||
|
||||
type nodeItem []byte
|
||||
|
||||
func (i nodeItem) size() val.ByteSize {
|
||||
return val.ByteSize(len(i))
|
||||
}
|
||||
|
||||
type nodePair [2]nodeItem
|
||||
|
||||
func (p nodePair) key() nodeItem {
|
||||
return p[0]
|
||||
}
|
||||
|
||||
func (p nodePair) value() nodeItem {
|
||||
return p[1]
|
||||
}
|
||||
|
||||
func makeProllyNode(pool pool.BuffPool, level uint64, items ...nodeItem) (node Node) {
|
||||
var sz uint64
|
||||
for _, item := range items {
|
||||
sz += uint64(item.size())
|
||||
|
||||
}
|
||||
count := len(items)
|
||||
|
||||
if sz > maxNodeDataSize {
|
||||
panic("items exceeded max chunk data size")
|
||||
}
|
||||
|
||||
pos := val.ByteSize(sz)
|
||||
pos += val.OffsetsSize(count)
|
||||
pos += cumulativeCountSize
|
||||
pos += nodeCountSize
|
||||
pos += treeLevelSize
|
||||
|
||||
node = pool.Get(uint64(pos))
|
||||
|
||||
cc := countCumulativeItems(level, items)
|
||||
writeCumulativeCount(node, cc)
|
||||
writeItemCount(node, count)
|
||||
writeTreeLevel(node, level)
|
||||
|
||||
pos = 0
|
||||
offs, _ := node.offsets()
|
||||
for i, item := range items {
|
||||
copy(node[pos:pos+item.size()], item)
|
||||
offs.Put(i, pos)
|
||||
pos += item.size()
|
||||
}
|
||||
|
||||
return node
|
||||
}
|
||||
|
||||
func countCumulativeItems(level uint64, items []nodeItem) (c uint64) {
|
||||
if level == 0 {
|
||||
return uint64(len(items))
|
||||
}
|
||||
|
||||
for i := 1; i < len(items); i += 2 {
|
||||
c += metaValue(items[i]).GetCumulativeCount()
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func (nd Node) getItem(i int) nodeItem {
|
||||
offs, itemStop := nd.offsets()
|
||||
start, stop := offs.GetBounds(i, itemStop)
|
||||
return nodeItem(nd[start:stop])
|
||||
}
|
||||
|
||||
func (nd Node) getPair(i int) (p nodePair) {
|
||||
offs, itemStop := nd.offsets()
|
||||
start, stop := offs.GetBounds(i, itemStop)
|
||||
p[0] = nodeItem(nd[start:stop])
|
||||
start, stop = offs.GetBounds(i+1, itemStop)
|
||||
p[1] = nodeItem(nd[start:stop])
|
||||
return
|
||||
}
|
||||
|
||||
func (nd Node) size() val.ByteSize {
|
||||
return val.ByteSize(len(nd))
|
||||
}
|
||||
|
||||
func (nd Node) level() int {
|
||||
sl := nd[nd.size()-treeLevelSize:]
|
||||
return int(val.ReadUint8(sl))
|
||||
}
|
||||
|
||||
func (nd Node) nodeCount() int {
|
||||
stop := nd.size() - treeLevelSize
|
||||
start := stop - nodeCountSize
|
||||
return int(val.ReadUint16(nd[start:stop]))
|
||||
}
|
||||
|
||||
func (nd Node) cumulativeCount() uint64 {
|
||||
stop := nd.size() - treeLevelSize - nodeCountSize
|
||||
start := stop - cumulativeCountSize
|
||||
buf := nd[start:stop]
|
||||
return val.ReadUint48(buf)
|
||||
}
|
||||
|
||||
func (nd Node) offsets() (offs val.Offsets, itemStop val.ByteSize) {
|
||||
stop := nd.size() - treeLevelSize - nodeCountSize - cumulativeCountSize
|
||||
itemStop = stop - val.OffsetsSize(nd.nodeCount())
|
||||
return val.Offsets(nd[itemStop:stop]), itemStop
|
||||
}
|
||||
|
||||
func (nd Node) leafNode() bool {
|
||||
return nd.level() == 0
|
||||
}
|
||||
|
||||
func (nd Node) empty() bool {
|
||||
return len(nd) == 0 || nd.nodeCount() == 0
|
||||
}
|
||||
|
||||
func writeTreeLevel(nd Node, level uint64) {
|
||||
nd[nd.size()-treeLevelSize] = uint8(level)
|
||||
}
|
||||
|
||||
func writeItemCount(nd Node, count int) {
|
||||
stop := nd.size() - treeLevelSize
|
||||
start := stop - nodeCountSize
|
||||
val.WriteUint16(nd[start:stop], uint16(count))
|
||||
}
|
||||
|
||||
func writeCumulativeCount(nd Node, count uint64) {
|
||||
stop := nd.size() - treeLevelSize - nodeCountSize
|
||||
start := stop - cumulativeCountSize
|
||||
val.WriteUint48(nd[start:stop], count)
|
||||
}
|
||||
@@ -28,14 +28,25 @@ import (
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
const (
|
||||
// for leaf and internal nodes
|
||||
stride = 2
|
||||
)
|
||||
type nodeItem []byte
|
||||
|
||||
// nodeCursor explores a tree of Node items.
|
||||
func (i nodeItem) size() val.ByteSize {
|
||||
return val.ByteSize(len(i))
|
||||
}
|
||||
|
||||
type nodePair [2]nodeItem
|
||||
|
||||
func (p nodePair) key() nodeItem {
|
||||
return p[0]
|
||||
}
|
||||
|
||||
func (p nodePair) value() nodeItem {
|
||||
return p[1]
|
||||
}
|
||||
|
||||
// nodeCursor explores a tree of mapNode items.
|
||||
type nodeCursor struct {
|
||||
nd Node
|
||||
nd mapNode
|
||||
idx int
|
||||
parent *nodeCursor
|
||||
nrw NodeStore
|
||||
@@ -43,9 +54,9 @@ type nodeCursor struct {
|
||||
|
||||
type compareFn func(left, right nodeItem) int
|
||||
|
||||
type searchFn func(item nodeItem, nd Node) (idx int)
|
||||
type searchFn func(item nodeItem, nd mapNode) (idx int)
|
||||
|
||||
func newCursorAtStart(ctx context.Context, nrw NodeStore, nd Node) (cur *nodeCursor, err error) {
|
||||
func newCursorAtStart(ctx context.Context, nrw NodeStore, nd mapNode) (cur *nodeCursor, err error) {
|
||||
cur = &nodeCursor{nd: nd, nrw: nrw}
|
||||
for !cur.isLeaf() {
|
||||
mv := metaValue(cur.currentPair().value())
|
||||
@@ -60,7 +71,7 @@ func newCursorAtStart(ctx context.Context, nrw NodeStore, nd Node) (cur *nodeCur
|
||||
return
|
||||
}
|
||||
|
||||
func newCursorPastEnd(ctx context.Context, nrw NodeStore, nd Node) (cur *nodeCursor, err error) {
|
||||
func newCursorPastEnd(ctx context.Context, nrw NodeStore, nd mapNode) (cur *nodeCursor, err error) {
|
||||
cur = &nodeCursor{nd: nd, nrw: nrw}
|
||||
cur.skipToNodeEnd()
|
||||
|
||||
@@ -88,11 +99,11 @@ func newCursorPastEnd(ctx context.Context, nrw NodeStore, nd Node) (cur *nodeCur
|
||||
return
|
||||
}
|
||||
|
||||
func newCursorAtTuple(ctx context.Context, nrw NodeStore, nd Node, tup val.Tuple, search searchFn) (cur *nodeCursor, err error) {
|
||||
func newCursorAtTuple(ctx context.Context, nrw NodeStore, nd mapNode, tup val.Tuple, search searchFn) (cur *nodeCursor, err error) {
|
||||
return newCursorAtItem(ctx, nrw, nd, nodeItem(tup), search)
|
||||
}
|
||||
|
||||
func newCursorAtItem(ctx context.Context, nrw NodeStore, nd Node, item nodeItem, search searchFn) (cur *nodeCursor, err error) {
|
||||
func newCursorAtItem(ctx context.Context, nrw NodeStore, nd mapNode, item nodeItem, search searchFn) (cur *nodeCursor, err error) {
|
||||
cur = &nodeCursor{nd: nd, nrw: nrw}
|
||||
|
||||
cur.idx = search(item, cur.nd)
|
||||
@@ -116,7 +127,7 @@ func newCursorAtItem(ctx context.Context, nrw NodeStore, nd Node, item nodeItem,
|
||||
return
|
||||
}
|
||||
|
||||
func newLeafCursorAtItem(ctx context.Context, nrw NodeStore, nd Node, item nodeItem, search searchFn) (cur nodeCursor, err error) {
|
||||
func newLeafCursorAtItem(ctx context.Context, nrw NodeStore, nd mapNode, item nodeItem, search searchFn) (cur nodeCursor, err error) {
|
||||
cur = nodeCursor{nd: nd, parent: nil, nrw: nrw}
|
||||
|
||||
cur.idx = search(item, cur.nd)
|
||||
@@ -139,24 +150,31 @@ func newLeafCursorAtItem(ctx context.Context, nrw NodeStore, nd Node, item nodeI
|
||||
}
|
||||
|
||||
func (cur *nodeCursor) valid() bool {
|
||||
if cur.nd == nil {
|
||||
if cur.nd.empty() {
|
||||
return false
|
||||
}
|
||||
cnt := cur.nd.nodeCount()
|
||||
return cur.idx >= 0 && cur.idx < cnt
|
||||
}
|
||||
|
||||
func (cur *nodeCursor) currentKey() nodeItem {
|
||||
return cur.nd.getKey(cur.idx)
|
||||
}
|
||||
|
||||
// currentPair returns the item at the currentPair cursor position
|
||||
func (cur *nodeCursor) currentPair() nodePair {
|
||||
return cur.nd.getPair(cur.idx)
|
||||
return nodePair{
|
||||
cur.nd.getKey(cur.idx),
|
||||
cur.nd.getValue(cur.idx),
|
||||
}
|
||||
}
|
||||
|
||||
func (cur *nodeCursor) firstKey() nodeItem {
|
||||
return cur.nd.getItem(0)
|
||||
return cur.nd.getKey(0)
|
||||
}
|
||||
|
||||
func (cur *nodeCursor) lastKey() nodeItem {
|
||||
return cur.nd.getItem(cur.lastKeyIdx())
|
||||
return cur.nd.getKey(cur.lastKeyIdx())
|
||||
}
|
||||
|
||||
func (cur *nodeCursor) skipToNodeStart() {
|
||||
@@ -185,7 +203,7 @@ func (cur *nodeCursor) atNodeEnd() bool {
|
||||
}
|
||||
|
||||
func (cur *nodeCursor) lastKeyIdx() int {
|
||||
return cur.nd.nodeCount() - stride
|
||||
return cur.nd.nodeCount() - 1
|
||||
}
|
||||
|
||||
func (cur *nodeCursor) isLeaf() bool {
|
||||
@@ -228,11 +246,11 @@ func (cur *nodeCursor) seek(ctx context.Context, item nodeItem, cb compareFn) (e
|
||||
// index of the nextMutation greatest element if it is not present.
|
||||
func (cur *nodeCursor) search(item nodeItem, cb compareFn) (idx int) {
|
||||
count := cur.nd.nodeCount()
|
||||
idx = sort.Search(count/stride, func(i int) bool {
|
||||
return cb(item, cur.nd.getItem(i*stride)) <= 0
|
||||
idx = sort.Search(count, func(i int) bool {
|
||||
return cb(item, cur.nd.getKey(i)) <= 0
|
||||
})
|
||||
|
||||
return idx * stride
|
||||
return idx
|
||||
}
|
||||
|
||||
func (cur *nodeCursor) advance(ctx context.Context) (bool, error) {
|
||||
@@ -249,7 +267,7 @@ func (cur *nodeCursor) advance(ctx context.Context) (bool, error) {
|
||||
|
||||
func (cur *nodeCursor) advanceInBounds(ctx context.Context) (bool, error) {
|
||||
if cur.idx < cur.lastKeyIdx() {
|
||||
cur.idx += stride
|
||||
cur.idx += 1
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -291,7 +309,7 @@ func (cur *nodeCursor) retreat(ctx context.Context) (bool, error) {
|
||||
return false, err
|
||||
}
|
||||
if !ok {
|
||||
cur.idx = -stride
|
||||
cur.idx = -1
|
||||
}
|
||||
|
||||
return ok, nil
|
||||
@@ -299,11 +317,11 @@ func (cur *nodeCursor) retreat(ctx context.Context) (bool, error) {
|
||||
|
||||
func (cur *nodeCursor) retreatInBounds(ctx context.Context) (bool, error) {
|
||||
if cur.idx > 0 {
|
||||
cur.idx -= stride
|
||||
cur.idx -= 1
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if cur.idx == -stride {
|
||||
if cur.idx == -1 {
|
||||
// |cur| is already out of bounds
|
||||
return false, nil
|
||||
}
|
||||
@@ -334,7 +352,7 @@ func (cur *nodeCursor) retreatInBounds(ctx context.Context) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// fetchNode loads the Node that the cursor index points to.
|
||||
// fetchNode loads the mapNode that the cursor index points to.
|
||||
// It's called whenever the cursor advances/retreats to a different chunk.
|
||||
func (cur *nodeCursor) fetchNode(ctx context.Context) (err error) {
|
||||
assertTrue(cur.parent != nil)
|
||||
|
||||
@@ -57,7 +57,7 @@ func newTestNodeStore() NodeStore {
|
||||
return NewNodeStore(ts.NewView())
|
||||
}
|
||||
|
||||
func randomTree(t *testing.T, count int) (Node, [][2]nodeItem, NodeStore) {
|
||||
func randomTree(t *testing.T, count int) (mapNode, [][2]nodeItem, NodeStore) {
|
||||
ctx := context.Background()
|
||||
ns := newTestNodeStore()
|
||||
chunker, err := newEmptyTreeChunker(ctx, ns, newDefaultNodeSplitter)
|
||||
@@ -83,7 +83,7 @@ var valDesc = val.NewTupleDescriptor(
|
||||
val.Type{Enc: val.Int64Enc, Nullable: true},
|
||||
)
|
||||
|
||||
func searchTestTree(item nodeItem, nd Node) int {
|
||||
func searchTestTree(item nodeItem, nd mapNode) int {
|
||||
idx := sort.Search(nd.nodeCount()/stride, func(i int) bool {
|
||||
l, r := val.Tuple(item), val.Tuple(nd.getItem(i*stride))
|
||||
return keyDesc.Compare(l, r) <= 0
|
||||
|
||||
@@ -32,12 +32,11 @@ const (
|
||||
|
||||
// NodeStore reads and writes prolly tree Nodes.
|
||||
type NodeStore interface {
|
||||
// Read reads a prolly tree mapNode from the store.
|
||||
Read(ctx context.Context, ref hash.Hash) (mapNode, error)
|
||||
|
||||
// Read reads a prolly tree Node from the store.
|
||||
Read(ctx context.Context, ref hash.Hash) (Node, error)
|
||||
|
||||
// Write writes a prolly tree Node to the store.
|
||||
Write(ctx context.Context, nd Node) (hash.Hash, error)
|
||||
// Write writes a prolly tree mapNode to the store.
|
||||
Write(ctx context.Context, nd mapNode) (hash.Hash, error)
|
||||
|
||||
// Pool returns a buffer pool.
|
||||
Pool() pool.BuffPool
|
||||
@@ -68,23 +67,24 @@ func NewNodeStore(cs chunks.ChunkStore) NodeStore {
|
||||
}
|
||||
|
||||
// Read implements NodeStore.
|
||||
func (ns nodeStore) Read(ctx context.Context, ref hash.Hash) (Node, error) {
|
||||
func (ns nodeStore) Read(ctx context.Context, ref hash.Hash) (mapNode, error) {
|
||||
c, ok := ns.cache.get(ref)
|
||||
if ok {
|
||||
return c.Data(), nil
|
||||
return mapNodeFromBytes(c.Data()), nil
|
||||
}
|
||||
|
||||
c, err := ns.store.Get(ctx, ref)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return mapNode{}, err
|
||||
}
|
||||
ns.cache.insert(c)
|
||||
return c.Data(), err
|
||||
|
||||
return mapNodeFromBytes(c.Data()), err
|
||||
}
|
||||
|
||||
// Write implements NodeStore.
|
||||
func (ns nodeStore) Write(ctx context.Context, nd Node) (hash.Hash, error) {
|
||||
c := chunks.NewChunk(nd)
|
||||
func (ns nodeStore) Write(ctx context.Context, nd mapNode) (hash.Hash, error) {
|
||||
c := chunks.NewChunk(nd.bytes())
|
||||
if err := ns.store.Put(ctx, c); err != nil {
|
||||
return hash.Hash{}, err
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ func TestRoundTripNodeItems(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func newLeafNode(items []nodeItem) Node {
|
||||
func newLeafNode(items []nodeItem) mapNode {
|
||||
return makeProllyNode(sharedPool, 0, items...)
|
||||
}
|
||||
|
||||
|
||||
@@ -37,24 +37,23 @@ func NewEmptyMap(sch schema.Schema) Map {
|
||||
|
||||
// PartitionKeysFromMap naively divides the map by its top-level keys.
|
||||
func PartitionKeysFromMap(m Map) (keys []val.Tuple) {
|
||||
keys = make([]val.Tuple, m.root.nodeCount()/2)
|
||||
keys = make([]val.Tuple, m.root.nodeCount())
|
||||
for i := range keys {
|
||||
pair := m.root.getPair(i * 2)
|
||||
keys[i] = val.Tuple(pair.key())
|
||||
keys[i] = val.Tuple(m.root.getKey(i))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func ValueFromNode(nd Node) types.Value {
|
||||
return types.InlineBlob(nd)
|
||||
func ValueFromNode(nd mapNode) types.Value {
|
||||
return types.InlineBlob(nd.bytes())
|
||||
}
|
||||
|
||||
func NodeFromValue(v types.Value) Node {
|
||||
return Node(v.(types.InlineBlob))
|
||||
func NodeFromValue(v types.Value) mapNode {
|
||||
return mapNodeFromBytes(v.(types.InlineBlob))
|
||||
}
|
||||
|
||||
func ValueFromMap(m Map) types.Value {
|
||||
return types.InlineBlob(m.root)
|
||||
return types.InlineBlob(m.root.bytes())
|
||||
}
|
||||
|
||||
func MapFromValue(v types.Value, sch schema.Schema, vrw types.ValueReadWriter) Map {
|
||||
|
||||
@@ -53,7 +53,7 @@ func newEmptyTreeChunker(ctx context.Context, ns NodeStore, newSplit newSplitter
|
||||
}
|
||||
|
||||
func newTreeChunker(ctx context.Context, cur *nodeCursor, level uint64, ns NodeStore, newSplit newSplitterFn) (*treeChunker, error) {
|
||||
// |cur| will be nil if this is a new Node, implying this is a new tree, or the tree has grown in height relative
|
||||
// |cur| will be nil if this is a new mapNode, implying this is a new tree, or the tree has grown in height relative
|
||||
// to its original chunked form.
|
||||
|
||||
sc := &treeChunker{
|
||||
@@ -169,7 +169,7 @@ func (tc *treeChunker) advanceTo(ctx context.Context, next *nodeCursor) error {
|
||||
}
|
||||
|
||||
// |tc.cur| is now inconsistent with its parent, invalidate it.
|
||||
tc.cur.nd = nil
|
||||
tc.cur.nd = mapNode{}
|
||||
}
|
||||
|
||||
break
|
||||
@@ -215,8 +215,8 @@ func (tc *treeChunker) Skip(ctx context.Context) error {
|
||||
// may be made before or after the pair, but not between them.
|
||||
func (tc *treeChunker) Append(ctx context.Context, key, value nodeItem) (bool, error) {
|
||||
// When adding new key-value pairs to an in-progress chunk, we must enforce 3 invariants
|
||||
// (1) Key-value pairs are stored in the same Node.
|
||||
// (2) The total size of a Node's data cannot exceed |maxNodeDataSize|.
|
||||
// (1) Key-value pairs are stored in the same mapNode.
|
||||
// (2) The total size of a mapNode's data cannot exceed |maxNodeDataSize|.
|
||||
// (3) Internal Nodes (level > 0) must contain at least 2 key-value pairs (4 node items).
|
||||
// Infinite recursion can occur if internal nodes contain a single metaPair with a key
|
||||
// large enough to trigger a chunk boundary. Forming a chunk boundary after a single
|
||||
@@ -313,13 +313,13 @@ func (tc *treeChunker) createParentChunker(ctx context.Context) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// createNode creates a Node from the current items in |sc.currentPair|,
|
||||
// clears the current items, then returns the new Node and a metaValue that
|
||||
// points to it. The Node is always eagerly written.
|
||||
func (tc *treeChunker) createNode(ctx context.Context) (Node, nodePair, error) {
|
||||
// createNode creates a mapNode from the current items in |sc.currentPair|,
|
||||
// clears the current items, then returns the new mapNode and a metaValue that
|
||||
// points to it. The mapNode is always eagerly written.
|
||||
func (tc *treeChunker) createNode(ctx context.Context) (mapNode, nodePair, error) {
|
||||
nd, metaPair, err := writeNewChild(ctx, tc.ns, tc.level, tc.current...)
|
||||
if err != nil {
|
||||
return nil, nodePair{}, err
|
||||
return mapNode{}, nodePair{}, err
|
||||
}
|
||||
|
||||
// |tc.currentPair| is copied so it's safe to re-use the memory.
|
||||
@@ -329,15 +329,15 @@ func (tc *treeChunker) createNode(ctx context.Context) (Node, nodePair, error) {
|
||||
return nd, metaPair, nil
|
||||
}
|
||||
|
||||
// Done returns the root Node of the resulting tree.
|
||||
// Done returns the root mapNode of the resulting tree.
|
||||
// The logic here is subtle, but hopefully correct and understandable. See comments inline.
|
||||
func (tc *treeChunker) Done(ctx context.Context) (Node, error) {
|
||||
func (tc *treeChunker) Done(ctx context.Context) (mapNode, error) {
|
||||
assertTrue(!tc.done)
|
||||
tc.done = true
|
||||
|
||||
if tc.cur != nil {
|
||||
if err := tc.finalizeCursor(ctx); err != nil {
|
||||
return nil, err
|
||||
return mapNode{}, err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -348,7 +348,7 @@ func (tc *treeChunker) Done(ctx context.Context) (Node, error) {
|
||||
// |tc.current| are the last items at this level of the tree,
|
||||
// make a chunk out of them
|
||||
if err := tc.handleChunkBoundary(ctx); err != nil {
|
||||
return nil, err
|
||||
return mapNode{}, err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -357,29 +357,29 @@ func (tc *treeChunker) Done(ctx context.Context) (Node, error) {
|
||||
|
||||
// At this point, we know |tc.current| contains every item at this level of the tree.
|
||||
// To see this, consider that there are two ways items can enter |tc.current|.
|
||||
// (1) as the result of resume() with the cursor on anything other than the first item in the Node
|
||||
// (1) as the result of resume() with the cursor on anything other than the first item in the mapNode
|
||||
// (2) as a result of a child treeChunker hitting an explicit chunk boundary during either Append() or finalize().
|
||||
//
|
||||
// The only way there can be no items in some parent treeChunker's |tc.current| is if this treeChunker began with
|
||||
// a cursor within its first existing chunk (and thus all parents resume()'d with a cursor on their first item) and
|
||||
// continued through all sebsequent items without creating any explicit chunk boundaries (and thus never sent any
|
||||
// items up to a parent as a result of chunking). Therefore, this treeChunker's |tc.current| must contain all items
|
||||
// within the current Node.
|
||||
// within the current mapNode.
|
||||
|
||||
// This level must represent *a* root of the tree, but it is possibly non-canonical. There are three possible cases:
|
||||
// (1) This is "leaf" treeChunker and thus produced tree of depth 1 which contains exactly one chunk
|
||||
// (never hit a boundary), or
|
||||
// (2) This in an internal Node of the tree which contains multiple references to child nodes. In either case,
|
||||
// (2) This in an internal mapNode of the tree which contains multiple references to child nodes. In either case,
|
||||
// this is the canonical root of the tree.
|
||||
if tc.isLeaf() || len(tc.current) > metaPairCount {
|
||||
nd, _, err := tc.createNode(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return mapNode{}, err
|
||||
}
|
||||
return nd, nil
|
||||
}
|
||||
|
||||
// (3) This is an internal Node of the tree with a single metaPair. This is a non-canonical root, and we must walk
|
||||
// (3) This is an internal mapNode of the tree with a single metaPair. This is a non-canonical root, and we must walk
|
||||
// down until we find cases (1) or (2), above.
|
||||
assertTrue(!tc.isLeaf())
|
||||
assertTrue(len(tc.current) == metaPairCount)
|
||||
@@ -388,19 +388,19 @@ func (tc *treeChunker) Done(ctx context.Context) (Node, error) {
|
||||
for {
|
||||
child, err := fetchChild(ctx, tc.ns, mt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return mapNode{}, err
|
||||
}
|
||||
|
||||
if child.leafNode() || child.nodeCount() > 1 {
|
||||
return child, nil
|
||||
}
|
||||
|
||||
mt = metaValue(child.getItem(metaPairValIdx))
|
||||
mt = metaValue(child.getRef(0))
|
||||
}
|
||||
}
|
||||
|
||||
// If we are mutating an existing Node, appending subsequent items in the Node until we reach a pre-existing chunk
|
||||
// boundary or the end of the Node.
|
||||
// If we are mutating an existing mapNode, appending subsequent items in the mapNode until we reach a pre-existing chunk
|
||||
// boundary or the end of the mapNode.
|
||||
func (tc *treeChunker) finalizeCursor(ctx context.Context) (err error) {
|
||||
for tc.cur.valid() {
|
||||
pair := tc.cur.currentPair()
|
||||
@@ -411,7 +411,7 @@ func (tc *treeChunker) finalizeCursor(ctx context.Context) (err error) {
|
||||
return err
|
||||
}
|
||||
if ok && tc.cur.atNodeEnd() {
|
||||
break // boundary occurred at same place in old & new Node
|
||||
break // boundary occurred at same place in old & new mapNode
|
||||
}
|
||||
|
||||
_, err = tc.cur.advance(ctx)
|
||||
@@ -428,7 +428,7 @@ func (tc *treeChunker) finalizeCursor(ctx context.Context) (err error) {
|
||||
}
|
||||
|
||||
// invalidate this cursor to mark it finalized.
|
||||
tc.cur.nd = nil
|
||||
tc.cur.nd = mapNode{}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -54,7 +54,7 @@ func roundTripTreeItems(t *testing.T) {
|
||||
validateTreeItems(t, ns, root, items)
|
||||
}
|
||||
|
||||
func countTree(t *testing.T, ns NodeStore, nd Node) (count int) {
|
||||
func countTree(t *testing.T, ns NodeStore, nd mapNode) (count int) {
|
||||
ctx := context.Background()
|
||||
err := iterTree(ctx, ns, nd, func(_ nodeItem) (err error) {
|
||||
count++
|
||||
@@ -64,7 +64,7 @@ func countTree(t *testing.T, ns NodeStore, nd Node) (count int) {
|
||||
return
|
||||
}
|
||||
|
||||
func validateTreeItems(t *testing.T, ns NodeStore, nd Node, expected [][2]nodeItem) {
|
||||
func validateTreeItems(t *testing.T, ns NodeStore, nd mapNode, expected [][2]nodeItem) {
|
||||
i := 0
|
||||
ctx := context.Background()
|
||||
err := iterTree(ctx, ns, nd, func(actual nodeItem) (err error) {
|
||||
@@ -78,7 +78,7 @@ func validateTreeItems(t *testing.T, ns NodeStore, nd Node, expected [][2]nodeIt
|
||||
return
|
||||
}
|
||||
|
||||
func iterTree(ctx context.Context, ns NodeStore, nd Node, cb func(item nodeItem) error) error {
|
||||
func iterTree(ctx context.Context, ns NodeStore, nd mapNode, cb func(item nodeItem) error) error {
|
||||
if nd.empty() {
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user