swapping out prolly node implementations

This commit is contained in:
Andy Arthur
2022-02-01 16:22:35 -08:00
parent 1c6fa85166
commit 2140c48262
11 changed files with 183 additions and 351 deletions
+15 -17
View File
@@ -25,7 +25,7 @@ import (
)
type Map struct {
root Node
root mapNode
keyDesc val.TupleDesc
valDesc val.TupleDesc
ns NodeStore
@@ -34,7 +34,7 @@ type Map struct {
type KeyValueFn func(key, value val.Tuple) error
// NewMap creates an empty prolly tree Map
func NewMap(node Node, ns NodeStore, keyDesc, valDesc val.TupleDesc) Map {
func NewMap(node mapNode, ns NodeStore, keyDesc, valDesc val.TupleDesc) Map {
return Map{
root: node,
keyDesc: keyDesc,
@@ -45,7 +45,7 @@ func NewMap(node Node, ns NodeStore, keyDesc, valDesc val.TupleDesc) Map {
// NewMapFromTuples creates a prolly tree Map from slice of sorted Tuples.
func NewMapFromTuples(ctx context.Context, ns NodeStore, keyDesc, valDesc val.TupleDesc, tups ...val.Tuple) (Map, error) {
m := NewMap(nil, ns, keyDesc, valDesc)
m := NewMap(mapNode{}, ns, keyDesc, valDesc)
ch, err := newEmptyTreeChunker(ctx, ns, newDefaultNodeSplitter)
if err != nil {
@@ -83,7 +83,7 @@ func (m Map) Count() uint64 {
// HashOf returns the Hash of this Map.
func (m Map) HashOf() hash.Hash {
return hash.Of(m.root)
return m.root.hashOf()
}
// Format returns the NomsBinFormat of this Map.
@@ -127,7 +127,7 @@ func (m Map) Has(ctx context.Context, key val.Tuple) (ok bool, err error) {
}
if cur.valid() {
k := val.Tuple(cur.currentPair().key())
k := val.Tuple(cur.currentKey())
ok = m.compareKeys(key, k) == 0
}
@@ -193,10 +193,10 @@ func (m Map) iterFromRange(ctx context.Context, rng Range) (*prollyRangeIter, er
func (m Map) rangeStartSearchFn(rng Range) searchFn {
// todo(andy): inline sort.Search()
return func(query nodeItem, nd Node) int {
i := sort.Search(nd.nodeCount()/stride, func(i int) bool {
return func(query nodeItem, nd mapNode) int {
return sort.Search(nd.nodeCount(), func(i int) bool {
q := val.Tuple(query)
t := val.Tuple(nd.getItem(i * stride))
t := val.Tuple(nd.getKey(i))
// compare using the range's tuple descriptor.
cmp := rng.KeyDesc.Compare(q, t)
@@ -206,16 +206,15 @@ func (m Map) rangeStartSearchFn(rng Range) searchFn {
return cmp < 0
}
})
return i * stride
}
}
func (m Map) rangeStopSearchFn(rng Range) searchFn {
// todo(andy): inline sort.Search()
return func(query nodeItem, nd Node) int {
i := sort.Search(nd.nodeCount()/stride, func(i int) bool {
return func(query nodeItem, nd mapNode) int {
return sort.Search(nd.nodeCount(), func(i int) bool {
q := val.Tuple(query)
t := val.Tuple(nd.getItem(i * stride))
t := val.Tuple(nd.getKey(i))
// compare using the range's tuple descriptor.
cmp := rng.KeyDesc.Compare(q, t)
@@ -225,20 +224,19 @@ func (m Map) rangeStopSearchFn(rng Range) searchFn {
return cmp <= 0
}
})
return i * stride
}
}
// searchNode returns the smallest index where nd[i] >= query
// Adapted from search.Sort to inline comparison.
func (m Map) searchNode(query nodeItem, nd Node) int {
n := nd.nodeCount() / stride
func (m Map) searchNode(query nodeItem, nd mapNode) int {
n := nd.nodeCount()
// Define f(-1) == false and f(n) == true.
// Invariant: f(i-1) == false, f(j) == true.
i, j := 0, n
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
less := m.compareItems(query, nd.getItem(h*stride)) <= 0
less := m.compareItems(query, nd.getKey(h)) <= 0
// i ≤ h < j
if !less {
i = h + 1 // preserves f(i-1) == false
@@ -248,7 +246,7 @@ func (m Map) searchNode(query nodeItem, nd Node) int {
}
// i == j, f(i-1) == false, and
// f(j) (= f(i)) == true => answer is i.
return i * stride
return i
}
var _ searchFn = Map{}.searchNode
+71 -34
View File
@@ -15,6 +15,8 @@
package prolly
import (
"math"
"github.com/dolthub/dolt/go/gen/fb/serial"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/pool"
@@ -25,44 +27,67 @@ const (
refSz = hash.ByteLen
)
const (
cumulativeCountSize = val.ByteSize(6)
nodeCountSize = val.ByteSize(2)
treeLevelSize = val.ByteSize(1)
maxNodeDataSize = uint64(math.MaxUint16)
)
var emptyNode = mapNode{}
type mapNode struct {
buf serial.TupleMap
// todo(andy): embed hash.Hash here?
}
func makeMapNode(pool pool.BuffPool, level uint64, items ...nodeItem) (node Node) {
var sz uint64
for _, item := range items {
sz += uint64(item.size())
func makeProllyNode(pool pool.BuffPool, level uint64, items ...nodeItem) (node mapNode) {
//var sz uint64
//for _, item := range items {
// sz += uint64(item.size())
//
//}
//count := len(items)
//
//if sz > maxNodeDataSize {
// panic("items exceeded max chunk data size")
//}
//
//pos := val.ByteSize(sz)
//pos += val.OffsetsSize(count)
//pos += cumulativeCountSize
//pos += nodeCountSize
//pos += treeLevelSize
//
//node = pool.Get(uint64(pos))
//
//cc := countCumulativeItems(level, items)
//writeCumulativeCount(node, cc)
//writeItemCount(node, count)
//writeTreeLevel(node, level)
//
//pos = 0
//offs, _ := node.offsets()
//for i, item := range items {
// copy(node[pos:pos+item.size()], item)
// offs.Put(i, pos)
// pos += item.size()
//}
//
//return node
}
count := len(items)
return mapNode{}
}
if sz > maxNodeDataSize {
panic("items exceeded max chunk data size")
}
func mapNodeFromBytes(bb []byte) mapNode {
fb := serial.GetRootAsTupleMap(bb, 0)
return mapNode{buf: *fb}
}
pos := val.ByteSize(sz)
pos += val.OffsetsSize(count)
pos += cumulativeCountSize
pos += nodeCountSize
pos += treeLevelSize
node = pool.Get(uint64(pos))
cc := countCumulativeItems(level, items)
writeCumulativeCount(node, cc)
writeItemCount(node, count)
writeTreeLevel(node, level)
pos = 0
offs, _ := node.offsets()
for i, item := range items {
copy(node[pos:pos+item.size()], item)
offs.Put(i, pos)
pos += item.size()
}
return node
func (nd mapNode) hashOf() hash.Hash {
return hash.Of(nd.bytes())
}
func (nd mapNode) getKey(i int) nodeItem {
@@ -70,7 +95,7 @@ func (nd mapNode) getKey(i int) nodeItem {
start, stop := uint16(0), uint16(len(keys))
if i > 0 {
start = nd.buf.KeyOffsets(i-1)
start = nd.buf.KeyOffsets(i - 1)
}
if i < nd.buf.KeyOffsetsLength() {
stop = nd.buf.KeyOffsets(i)
@@ -80,11 +105,19 @@ func (nd mapNode) getKey(i int) nodeItem {
}
func (nd mapNode) getValue(i int) nodeItem {
if nd.leafNode() {
return nd.getValueTuple(i)
} else {
return nd.getRef(i)
}
}
func (nd mapNode) getValueTuple(i int) nodeItem {
values := nd.buf.ValueTuplesBytes()
start, stop := uint16(0), uint16(len(values))
if i > 0 {
start = nd.buf.ValueOffsets(i-1)
start = nd.buf.ValueOffsets(i - 1)
}
if i < nd.buf.ValueOffsetsLength() {
stop = nd.buf.ValueOffsets(i)
@@ -117,5 +150,9 @@ func (nd mapNode) leafNode() bool {
}
func (nd mapNode) empty() bool {
return nd.nodeCount() == 0
return nd.bytes() == nil || nd.nodeCount() == 0
}
func (nd mapNode) bytes() []byte {
return nd.buf.Table().Bytes
}
+6 -6
View File
@@ -22,17 +22,17 @@ import (
"github.com/dolthub/dolt/go/store/val"
)
func fetchChild(ctx context.Context, ns NodeStore, mt metaValue) (Node, error) {
// todo(andy) handle nil Node, dangling ref
func fetchChild(ctx context.Context, ns NodeStore, mt metaValue) (mapNode, error) {
// todo(andy) handle nil mapNode, dangling ref
return ns.Read(ctx, mt.GetRef())
}
func writeNewChild(ctx context.Context, ns NodeStore, level uint64, items ...nodeItem) (Node, nodePair, error) {
func writeNewChild(ctx context.Context, ns NodeStore, level uint64, items ...nodeItem) (mapNode, nodePair, error) {
child := makeProllyNode(ns.Pool(), level, items...)
ref, err := ns.Write(ctx, child)
if err != nil {
return nil, nodePair{}, err
return mapNode{}, nodePair{}, err
}
if len(items) == 0 {
@@ -57,7 +57,7 @@ const (
metaValueRefIdx = 1
)
// metaValue is a value Tuple in an internal Node of a prolly tree.
// metaValue is a value Tuple in an internal mapNode of a prolly tree.
// metaValues have two fields: cumulative count and ref.
type metaValue val.Tuple
@@ -74,7 +74,7 @@ func (mt metaValue) GetCumulativeCount() uint64 {
return val.ReadUint48(cnt)
}
// GetRef returns the hash.Hash of the child Node pointed
// GetRef returns the hash.Hash of the child mapNode pointed
// to by this metaValue.
func (mt metaValue) GetRef() hash.Hash {
tup := val.Tuple(mt)
-220
View File
@@ -1,220 +0,0 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package prolly
import (
"math"
"github.com/dolthub/dolt/go/store/pool"
"github.com/dolthub/dolt/go/store/val"
)
const (
cumulativeCountSize = val.ByteSize(6)
nodeCountSize = val.ByteSize(2)
treeLevelSize = val.ByteSize(1)
maxNodeDataSize = uint64(math.MaxUint16)
)
var emptyNode = Node([]byte{0, 0, 0, 0, 0, 0, 0, 0, 0})
// Node is a node in a prolly tree. Nodes are byte slices containing node items and
// a footer. The footer contains offsets, an item count for the node, a cumulative
// item count for the subtree rooted at this node, and this node's tree level.
// Prolly trees are organized like a B+ Tree without linked leaf nodes. Internal
// Nodes contain only keys and child pointers ("metaKeys" and "metaValues"). Leaf
// Nodes contain keys and values. The offsets array enables random acces to items
// with in a Node. The cumulative count field allows seeking into the tree by an
// item's index number.
//
// Node:
// Items in a node are packed contiguously from the front of the byte slice.
// For internal Nodes, metaKeys and metaValues are stored in alternating order
// as separate items. MetaValues contain a chunk ref that can be resolved to a
// child node using a NodeStore. MetaKeys store the largest key Tuple within
// the subtree rooted at that child Node.
// +--------+--------+-----+--------+--------+
// | Item 0 | Item 1 | ... | Item N | Footer |
// +--------+--------+--------------+--------+
//
// Footer:
// +---------------+------------------+------------+------------+
// | Offsets Array | Cumulative Count | Node Count | Tree Level |
// +---------------+------------------+------------+------------+
//
// Offsets Array:
// The offset array contains a uint16 for each node item after item 0. Offset i
// encodes the byte distance from the front of the node to the beginning of the
// ith item in the node. The offsets array for N items is 2*(N-1) bytes.
// +----------+----------+-----+----------+
// | Offset 1 | Offset 2 | ... | Offset N |
// +----------+----------+-----+----------+
//
// Cumulative Count:
// The cumulative count is the total number of items in the subtree rooted at
// this node. For leaf nodes, cumulative count is the same as node count.
// +---------------------------+
// | Cumulative Count (uint48) |
// +---------------------------+
//
// Node Count:
// Node count is the number of items in this node.
// +---------------------+
// | Node Count (uint16) |
// +---------------------+
//
// Tree Level:
// Tree Level is the height of this node within the tree. Leaf nodes are
// level 0, the first level of internal nodes is level 1.
// +--------------------+
// | Tree Level (uint8) |
// +--------------------+
//
// Note: the current Node implementation is oriented toward implementing Map
// semantics. However, Node could easily be modified to support Set semantics,
// or other collections.
//
type Node []byte
type nodeItem []byte
func (i nodeItem) size() val.ByteSize {
return val.ByteSize(len(i))
}
type nodePair [2]nodeItem
func (p nodePair) key() nodeItem {
return p[0]
}
func (p nodePair) value() nodeItem {
return p[1]
}
func makeProllyNode(pool pool.BuffPool, level uint64, items ...nodeItem) (node Node) {
var sz uint64
for _, item := range items {
sz += uint64(item.size())
}
count := len(items)
if sz > maxNodeDataSize {
panic("items exceeded max chunk data size")
}
pos := val.ByteSize(sz)
pos += val.OffsetsSize(count)
pos += cumulativeCountSize
pos += nodeCountSize
pos += treeLevelSize
node = pool.Get(uint64(pos))
cc := countCumulativeItems(level, items)
writeCumulativeCount(node, cc)
writeItemCount(node, count)
writeTreeLevel(node, level)
pos = 0
offs, _ := node.offsets()
for i, item := range items {
copy(node[pos:pos+item.size()], item)
offs.Put(i, pos)
pos += item.size()
}
return node
}
func countCumulativeItems(level uint64, items []nodeItem) (c uint64) {
if level == 0 {
return uint64(len(items))
}
for i := 1; i < len(items); i += 2 {
c += metaValue(items[i]).GetCumulativeCount()
}
return c
}
func (nd Node) getItem(i int) nodeItem {
offs, itemStop := nd.offsets()
start, stop := offs.GetBounds(i, itemStop)
return nodeItem(nd[start:stop])
}
func (nd Node) getPair(i int) (p nodePair) {
offs, itemStop := nd.offsets()
start, stop := offs.GetBounds(i, itemStop)
p[0] = nodeItem(nd[start:stop])
start, stop = offs.GetBounds(i+1, itemStop)
p[1] = nodeItem(nd[start:stop])
return
}
func (nd Node) size() val.ByteSize {
return val.ByteSize(len(nd))
}
func (nd Node) level() int {
sl := nd[nd.size()-treeLevelSize:]
return int(val.ReadUint8(sl))
}
func (nd Node) nodeCount() int {
stop := nd.size() - treeLevelSize
start := stop - nodeCountSize
return int(val.ReadUint16(nd[start:stop]))
}
func (nd Node) cumulativeCount() uint64 {
stop := nd.size() - treeLevelSize - nodeCountSize
start := stop - cumulativeCountSize
buf := nd[start:stop]
return val.ReadUint48(buf)
}
func (nd Node) offsets() (offs val.Offsets, itemStop val.ByteSize) {
stop := nd.size() - treeLevelSize - nodeCountSize - cumulativeCountSize
itemStop = stop - val.OffsetsSize(nd.nodeCount())
return val.Offsets(nd[itemStop:stop]), itemStop
}
func (nd Node) leafNode() bool {
return nd.level() == 0
}
func (nd Node) empty() bool {
return len(nd) == 0 || nd.nodeCount() == 0
}
func writeTreeLevel(nd Node, level uint64) {
nd[nd.size()-treeLevelSize] = uint8(level)
}
func writeItemCount(nd Node, count int) {
stop := nd.size() - treeLevelSize
start := stop - nodeCountSize
val.WriteUint16(nd[start:stop], uint16(count))
}
func writeCumulativeCount(nd Node, count uint64) {
stop := nd.size() - treeLevelSize - nodeCountSize
start := stop - cumulativeCountSize
val.WriteUint48(nd[start:stop], count)
}
+43 -25
View File
@@ -28,14 +28,25 @@ import (
"github.com/dolthub/dolt/go/store/val"
)
const (
// for leaf and internal nodes
stride = 2
)
type nodeItem []byte
// nodeCursor explores a tree of Node items.
func (i nodeItem) size() val.ByteSize {
return val.ByteSize(len(i))
}
type nodePair [2]nodeItem
func (p nodePair) key() nodeItem {
return p[0]
}
func (p nodePair) value() nodeItem {
return p[1]
}
// nodeCursor explores a tree of mapNode items.
type nodeCursor struct {
nd Node
nd mapNode
idx int
parent *nodeCursor
nrw NodeStore
@@ -43,9 +54,9 @@ type nodeCursor struct {
type compareFn func(left, right nodeItem) int
type searchFn func(item nodeItem, nd Node) (idx int)
type searchFn func(item nodeItem, nd mapNode) (idx int)
func newCursorAtStart(ctx context.Context, nrw NodeStore, nd Node) (cur *nodeCursor, err error) {
func newCursorAtStart(ctx context.Context, nrw NodeStore, nd mapNode) (cur *nodeCursor, err error) {
cur = &nodeCursor{nd: nd, nrw: nrw}
for !cur.isLeaf() {
mv := metaValue(cur.currentPair().value())
@@ -60,7 +71,7 @@ func newCursorAtStart(ctx context.Context, nrw NodeStore, nd Node) (cur *nodeCur
return
}
func newCursorPastEnd(ctx context.Context, nrw NodeStore, nd Node) (cur *nodeCursor, err error) {
func newCursorPastEnd(ctx context.Context, nrw NodeStore, nd mapNode) (cur *nodeCursor, err error) {
cur = &nodeCursor{nd: nd, nrw: nrw}
cur.skipToNodeEnd()
@@ -88,11 +99,11 @@ func newCursorPastEnd(ctx context.Context, nrw NodeStore, nd Node) (cur *nodeCur
return
}
func newCursorAtTuple(ctx context.Context, nrw NodeStore, nd Node, tup val.Tuple, search searchFn) (cur *nodeCursor, err error) {
func newCursorAtTuple(ctx context.Context, nrw NodeStore, nd mapNode, tup val.Tuple, search searchFn) (cur *nodeCursor, err error) {
return newCursorAtItem(ctx, nrw, nd, nodeItem(tup), search)
}
func newCursorAtItem(ctx context.Context, nrw NodeStore, nd Node, item nodeItem, search searchFn) (cur *nodeCursor, err error) {
func newCursorAtItem(ctx context.Context, nrw NodeStore, nd mapNode, item nodeItem, search searchFn) (cur *nodeCursor, err error) {
cur = &nodeCursor{nd: nd, nrw: nrw}
cur.idx = search(item, cur.nd)
@@ -116,7 +127,7 @@ func newCursorAtItem(ctx context.Context, nrw NodeStore, nd Node, item nodeItem,
return
}
func newLeafCursorAtItem(ctx context.Context, nrw NodeStore, nd Node, item nodeItem, search searchFn) (cur nodeCursor, err error) {
func newLeafCursorAtItem(ctx context.Context, nrw NodeStore, nd mapNode, item nodeItem, search searchFn) (cur nodeCursor, err error) {
cur = nodeCursor{nd: nd, parent: nil, nrw: nrw}
cur.idx = search(item, cur.nd)
@@ -139,24 +150,31 @@ func newLeafCursorAtItem(ctx context.Context, nrw NodeStore, nd Node, item nodeI
}
func (cur *nodeCursor) valid() bool {
if cur.nd == nil {
if cur.nd.empty() {
return false
}
cnt := cur.nd.nodeCount()
return cur.idx >= 0 && cur.idx < cnt
}
func (cur *nodeCursor) currentKey() nodeItem {
return cur.nd.getKey(cur.idx)
}
// currentPair returns the item at the currentPair cursor position
func (cur *nodeCursor) currentPair() nodePair {
return cur.nd.getPair(cur.idx)
return nodePair{
cur.nd.getKey(cur.idx),
cur.nd.getValue(cur.idx),
}
}
func (cur *nodeCursor) firstKey() nodeItem {
return cur.nd.getItem(0)
return cur.nd.getKey(0)
}
func (cur *nodeCursor) lastKey() nodeItem {
return cur.nd.getItem(cur.lastKeyIdx())
return cur.nd.getKey(cur.lastKeyIdx())
}
func (cur *nodeCursor) skipToNodeStart() {
@@ -185,7 +203,7 @@ func (cur *nodeCursor) atNodeEnd() bool {
}
func (cur *nodeCursor) lastKeyIdx() int {
return cur.nd.nodeCount() - stride
return cur.nd.nodeCount() - 1
}
func (cur *nodeCursor) isLeaf() bool {
@@ -228,11 +246,11 @@ func (cur *nodeCursor) seek(ctx context.Context, item nodeItem, cb compareFn) (e
// index of the nextMutation greatest element if it is not present.
func (cur *nodeCursor) search(item nodeItem, cb compareFn) (idx int) {
count := cur.nd.nodeCount()
idx = sort.Search(count/stride, func(i int) bool {
return cb(item, cur.nd.getItem(i*stride)) <= 0
idx = sort.Search(count, func(i int) bool {
return cb(item, cur.nd.getKey(i)) <= 0
})
return idx * stride
return idx
}
func (cur *nodeCursor) advance(ctx context.Context) (bool, error) {
@@ -249,7 +267,7 @@ func (cur *nodeCursor) advance(ctx context.Context) (bool, error) {
func (cur *nodeCursor) advanceInBounds(ctx context.Context) (bool, error) {
if cur.idx < cur.lastKeyIdx() {
cur.idx += stride
cur.idx += 1
return true, nil
}
@@ -291,7 +309,7 @@ func (cur *nodeCursor) retreat(ctx context.Context) (bool, error) {
return false, err
}
if !ok {
cur.idx = -stride
cur.idx = -1
}
return ok, nil
@@ -299,11 +317,11 @@ func (cur *nodeCursor) retreat(ctx context.Context) (bool, error) {
func (cur *nodeCursor) retreatInBounds(ctx context.Context) (bool, error) {
if cur.idx > 0 {
cur.idx -= stride
cur.idx -= 1
return true, nil
}
if cur.idx == -stride {
if cur.idx == -1 {
// |cur| is already out of bounds
return false, nil
}
@@ -334,7 +352,7 @@ func (cur *nodeCursor) retreatInBounds(ctx context.Context) (bool, error) {
return false, nil
}
// fetchNode loads the Node that the cursor index points to.
// fetchNode loads the mapNode that the cursor index points to.
// It's called whenever the cursor advances/retreats to a different chunk.
func (cur *nodeCursor) fetchNode(ctx context.Context) (err error) {
assertTrue(cur.parent != nil)
+2 -2
View File
@@ -57,7 +57,7 @@ func newTestNodeStore() NodeStore {
return NewNodeStore(ts.NewView())
}
func randomTree(t *testing.T, count int) (Node, [][2]nodeItem, NodeStore) {
func randomTree(t *testing.T, count int) (mapNode, [][2]nodeItem, NodeStore) {
ctx := context.Background()
ns := newTestNodeStore()
chunker, err := newEmptyTreeChunker(ctx, ns, newDefaultNodeSplitter)
@@ -83,7 +83,7 @@ var valDesc = val.NewTupleDescriptor(
val.Type{Enc: val.Int64Enc, Nullable: true},
)
func searchTestTree(item nodeItem, nd Node) int {
func searchTestTree(item nodeItem, nd mapNode) int {
idx := sort.Search(nd.nodeCount()/stride, func(i int) bool {
l, r := val.Tuple(item), val.Tuple(nd.getItem(i*stride))
return keyDesc.Compare(l, r) <= 0
+11 -11
View File
@@ -32,12 +32,11 @@ const (
// NodeStore reads and writes prolly tree Nodes.
type NodeStore interface {
// Read reads a prolly tree mapNode from the store.
Read(ctx context.Context, ref hash.Hash) (mapNode, error)
// Read reads a prolly tree Node from the store.
Read(ctx context.Context, ref hash.Hash) (Node, error)
// Write writes a prolly tree Node to the store.
Write(ctx context.Context, nd Node) (hash.Hash, error)
// Write writes a prolly tree mapNode to the store.
Write(ctx context.Context, nd mapNode) (hash.Hash, error)
// Pool returns a buffer pool.
Pool() pool.BuffPool
@@ -68,23 +67,24 @@ func NewNodeStore(cs chunks.ChunkStore) NodeStore {
}
// Read implements NodeStore.
func (ns nodeStore) Read(ctx context.Context, ref hash.Hash) (Node, error) {
func (ns nodeStore) Read(ctx context.Context, ref hash.Hash) (mapNode, error) {
c, ok := ns.cache.get(ref)
if ok {
return c.Data(), nil
return mapNodeFromBytes(c.Data()), nil
}
c, err := ns.store.Get(ctx, ref)
if err != nil {
return nil, err
return mapNode{}, err
}
ns.cache.insert(c)
return c.Data(), err
return mapNodeFromBytes(c.Data()), err
}
// Write implements NodeStore.
func (ns nodeStore) Write(ctx context.Context, nd Node) (hash.Hash, error) {
c := chunks.NewChunk(nd)
func (ns nodeStore) Write(ctx context.Context, nd mapNode) (hash.Hash, error) {
c := chunks.NewChunk(nd.bytes())
if err := ns.store.Put(ctx, c); err != nil {
return hash.Hash{}, err
}
+1 -1
View File
@@ -37,7 +37,7 @@ func TestRoundTripNodeItems(t *testing.T) {
}
}
func newLeafNode(items []nodeItem) Node {
func newLeafNode(items []nodeItem) mapNode {
return makeProllyNode(sharedPool, 0, items...)
}
+7 -8
View File
@@ -37,24 +37,23 @@ func NewEmptyMap(sch schema.Schema) Map {
// PartitionKeysFromMap naively divides the map by its top-level keys.
func PartitionKeysFromMap(m Map) (keys []val.Tuple) {
keys = make([]val.Tuple, m.root.nodeCount()/2)
keys = make([]val.Tuple, m.root.nodeCount())
for i := range keys {
pair := m.root.getPair(i * 2)
keys[i] = val.Tuple(pair.key())
keys[i] = val.Tuple(m.root.getKey(i))
}
return
}
func ValueFromNode(nd Node) types.Value {
return types.InlineBlob(nd)
func ValueFromNode(nd mapNode) types.Value {
return types.InlineBlob(nd.bytes())
}
func NodeFromValue(v types.Value) Node {
return Node(v.(types.InlineBlob))
func NodeFromValue(v types.Value) mapNode {
return mapNodeFromBytes(v.(types.InlineBlob))
}
func ValueFromMap(m Map) types.Value {
return types.InlineBlob(m.root)
return types.InlineBlob(m.root.bytes())
}
func MapFromValue(v types.Value, sch schema.Schema, vrw types.ValueReadWriter) Map {
+24 -24
View File
@@ -53,7 +53,7 @@ func newEmptyTreeChunker(ctx context.Context, ns NodeStore, newSplit newSplitter
}
func newTreeChunker(ctx context.Context, cur *nodeCursor, level uint64, ns NodeStore, newSplit newSplitterFn) (*treeChunker, error) {
// |cur| will be nil if this is a new Node, implying this is a new tree, or the tree has grown in height relative
// |cur| will be nil if this is a new mapNode, implying this is a new tree, or the tree has grown in height relative
// to its original chunked form.
sc := &treeChunker{
@@ -169,7 +169,7 @@ func (tc *treeChunker) advanceTo(ctx context.Context, next *nodeCursor) error {
}
// |tc.cur| is now inconsistent with its parent, invalidate it.
tc.cur.nd = nil
tc.cur.nd = mapNode{}
}
break
@@ -215,8 +215,8 @@ func (tc *treeChunker) Skip(ctx context.Context) error {
// may be made before or after the pair, but not between them.
func (tc *treeChunker) Append(ctx context.Context, key, value nodeItem) (bool, error) {
// When adding new key-value pairs to an in-progress chunk, we must enforce 3 invariants
// (1) Key-value pairs are stored in the same Node.
// (2) The total size of a Node's data cannot exceed |maxNodeDataSize|.
// (1) Key-value pairs are stored in the same mapNode.
// (2) The total size of a mapNode's data cannot exceed |maxNodeDataSize|.
// (3) Internal Nodes (level > 0) must contain at least 2 key-value pairs (4 node items).
// Infinite recursion can occur if internal nodes contain a single metaPair with a key
// large enough to trigger a chunk boundary. Forming a chunk boundary after a single
@@ -313,13 +313,13 @@ func (tc *treeChunker) createParentChunker(ctx context.Context) (err error) {
return nil
}
// createNode creates a Node from the current items in |sc.currentPair|,
// clears the current items, then returns the new Node and a metaValue that
// points to it. The Node is always eagerly written.
func (tc *treeChunker) createNode(ctx context.Context) (Node, nodePair, error) {
// createNode creates a mapNode from the current items in |sc.currentPair|,
// clears the current items, then returns the new mapNode and a metaValue that
// points to it. The mapNode is always eagerly written.
func (tc *treeChunker) createNode(ctx context.Context) (mapNode, nodePair, error) {
nd, metaPair, err := writeNewChild(ctx, tc.ns, tc.level, tc.current...)
if err != nil {
return nil, nodePair{}, err
return mapNode{}, nodePair{}, err
}
// |tc.currentPair| is copied so it's safe to re-use the memory.
@@ -329,15 +329,15 @@ func (tc *treeChunker) createNode(ctx context.Context) (Node, nodePair, error) {
return nd, metaPair, nil
}
// Done returns the root Node of the resulting tree.
// Done returns the root mapNode of the resulting tree.
// The logic here is subtle, but hopefully correct and understandable. See comments inline.
func (tc *treeChunker) Done(ctx context.Context) (Node, error) {
func (tc *treeChunker) Done(ctx context.Context) (mapNode, error) {
assertTrue(!tc.done)
tc.done = true
if tc.cur != nil {
if err := tc.finalizeCursor(ctx); err != nil {
return nil, err
return mapNode{}, err
}
}
@@ -348,7 +348,7 @@ func (tc *treeChunker) Done(ctx context.Context) (Node, error) {
// |tc.current| are the last items at this level of the tree,
// make a chunk out of them
if err := tc.handleChunkBoundary(ctx); err != nil {
return nil, err
return mapNode{}, err
}
}
@@ -357,29 +357,29 @@ func (tc *treeChunker) Done(ctx context.Context) (Node, error) {
// At this point, we know |tc.current| contains every item at this level of the tree.
// To see this, consider that there are two ways items can enter |tc.current|.
// (1) as the result of resume() with the cursor on anything other than the first item in the Node
// (1) as the result of resume() with the cursor on anything other than the first item in the mapNode
// (2) as a result of a child treeChunker hitting an explicit chunk boundary during either Append() or finalize().
//
// The only way there can be no items in some parent treeChunker's |tc.current| is if this treeChunker began with
// a cursor within its first existing chunk (and thus all parents resume()'d with a cursor on their first item) and
// continued through all sebsequent items without creating any explicit chunk boundaries (and thus never sent any
// items up to a parent as a result of chunking). Therefore, this treeChunker's |tc.current| must contain all items
// within the current Node.
// within the current mapNode.
// This level must represent *a* root of the tree, but it is possibly non-canonical. There are three possible cases:
// (1) This is "leaf" treeChunker and thus produced tree of depth 1 which contains exactly one chunk
// (never hit a boundary), or
// (2) This in an internal Node of the tree which contains multiple references to child nodes. In either case,
// (2) This in an internal mapNode of the tree which contains multiple references to child nodes. In either case,
// this is the canonical root of the tree.
if tc.isLeaf() || len(tc.current) > metaPairCount {
nd, _, err := tc.createNode(ctx)
if err != nil {
return nil, err
return mapNode{}, err
}
return nd, nil
}
// (3) This is an internal Node of the tree with a single metaPair. This is a non-canonical root, and we must walk
// (3) This is an internal mapNode of the tree with a single metaPair. This is a non-canonical root, and we must walk
// down until we find cases (1) or (2), above.
assertTrue(!tc.isLeaf())
assertTrue(len(tc.current) == metaPairCount)
@@ -388,19 +388,19 @@ func (tc *treeChunker) Done(ctx context.Context) (Node, error) {
for {
child, err := fetchChild(ctx, tc.ns, mt)
if err != nil {
return nil, err
return mapNode{}, err
}
if child.leafNode() || child.nodeCount() > 1 {
return child, nil
}
mt = metaValue(child.getItem(metaPairValIdx))
mt = metaValue(child.getRef(0))
}
}
// If we are mutating an existing Node, appending subsequent items in the Node until we reach a pre-existing chunk
// boundary or the end of the Node.
// If we are mutating an existing mapNode, appending subsequent items in the mapNode until we reach a pre-existing chunk
// boundary or the end of the mapNode.
func (tc *treeChunker) finalizeCursor(ctx context.Context) (err error) {
for tc.cur.valid() {
pair := tc.cur.currentPair()
@@ -411,7 +411,7 @@ func (tc *treeChunker) finalizeCursor(ctx context.Context) (err error) {
return err
}
if ok && tc.cur.atNodeEnd() {
break // boundary occurred at same place in old & new Node
break // boundary occurred at same place in old & new mapNode
}
_, err = tc.cur.advance(ctx)
@@ -428,7 +428,7 @@ func (tc *treeChunker) finalizeCursor(ctx context.Context) (err error) {
}
// invalidate this cursor to mark it finalized.
tc.cur.nd = nil
tc.cur.nd = mapNode{}
}
return nil
+3 -3
View File
@@ -54,7 +54,7 @@ func roundTripTreeItems(t *testing.T) {
validateTreeItems(t, ns, root, items)
}
func countTree(t *testing.T, ns NodeStore, nd Node) (count int) {
func countTree(t *testing.T, ns NodeStore, nd mapNode) (count int) {
ctx := context.Background()
err := iterTree(ctx, ns, nd, func(_ nodeItem) (err error) {
count++
@@ -64,7 +64,7 @@ func countTree(t *testing.T, ns NodeStore, nd Node) (count int) {
return
}
func validateTreeItems(t *testing.T, ns NodeStore, nd Node, expected [][2]nodeItem) {
func validateTreeItems(t *testing.T, ns NodeStore, nd mapNode, expected [][2]nodeItem) {
i := 0
ctx := context.Background()
err := iterTree(ctx, ns, nd, func(actual nodeItem) (err error) {
@@ -78,7 +78,7 @@ func validateTreeItems(t *testing.T, ns NodeStore, nd Node, expected [][2]nodeIt
return
}
func iterTree(ctx context.Context, ns NodeStore, nd Node, cb func(item nodeItem) error) error {
func iterTree(ctx context.Context, ns NodeStore, nd mapNode, cb func(item nodeItem) error) error {
if nd.empty() {
return nil
}