Merge pull request #5939 from dolthub/andy/storage-docs

[no-release-notes] go/{store, serial}: improve storage docs
This commit is contained in:
AndyA
2023-05-12 09:36:01 -07:00
committed by GitHub
24 changed files with 150 additions and 225 deletions
+2 -1
View File
@@ -26,7 +26,8 @@ table AddressMap {
// - value addresses for AddressMap leaf nodes
address_array:[ubyte] (required);
// array of uvarint encoded subtree counts
// array of varint encoded subtree counts
// see: go/store/prolly/message/varint.go
subtree_counts:[ubyte];
// total count of prolly tree
tree_count:uint64;
+2 -1
View File
@@ -21,7 +21,8 @@ table Blob {
// array of subtree addresses for internal tree nodes
address_array:[ubyte];
// array of uvarint encoded subtree sizes
// array of varint encoded subtree counts
// see: go/store/prolly/message/varint.go
subtree_sizes:[ubyte];
tree_size:uint64;
tree_level:uint8;
+2 -1
View File
@@ -22,7 +22,8 @@ table CommitClosure {
// array of subtree addresses for internal prolly tree nodes
address_array:[ubyte];
// array of uvarint encoded subtree counts
// array of varint encoded subtree counts
// see: go/store/prolly/message/varint.go
subtree_counts:[ubyte];
// total count of prolly tree
tree_count:uint64;
+2 -1
View File
@@ -35,7 +35,8 @@ table MergeArtifacts {
// array of subtree addresses for internal tree nodes
address_array:[ubyte];
// array of uvarint encoded subtree counts
// array of varint encoded subtree counts
// see: go/store/prolly/message/varint.go
subtree_counts:[ubyte];
// total count of prolly tree
tree_count:uint64;
+2 -3
View File
@@ -39,14 +39,13 @@ table ProllyTreeNode {
// (eg value tuples containing out-of-line BLOB addresses)
value_address_offsets:[uint16];
// array of chunk addresses
// - subtree addresses for internal prolly tree nodes
// - value addresses for AddressMap leaf nodes
address_array:[ubyte];
// array of uvarint encoded subtree counts
// array of varint encoded subtree counts
// see: go/store/prolly/message/varint.go
subtree_counts:[ubyte];
// total count of prolly tree
tree_count:uint64;
+1 -3
View File
@@ -42,9 +42,7 @@ table Column {
// sql display order
display_order:int16;
// todo(andy): ideally we'd resolve column identity
// without using tags, but the current implementation
// of schema.Schema is tightly coupled to tags.
// column tag
tag: uint64;
// storage encoding
+2
View File
@@ -34,12 +34,14 @@ table Table {
conflicts:Conflicts;
// address of a violations types.Map (for __DOLT_DEV__).
// todo: deprecate
violations:[ubyte];
// address of artifacts
artifacts:[ubyte];
}
// todo: deprecate
table Conflicts {
// address of a conflicts types.Map (for __DOLT_DEV__).
data:[ubyte] (required);
-1
View File
@@ -33,7 +33,6 @@ import (
"github.com/dolthub/dolt/go/store/val"
)
// todo(andy): randomize test seed
var testRand = rand.New(rand.NewSource(1))
var sharedPool = pool.NewBuffPool()
@@ -224,7 +224,6 @@ func estimateMergeArtifactSize(keys, values [][]byte, subtrees []uint64, keyAddr
panic(fmt.Sprintf("value vector exceeds Size limit ( %d > %d )", valSz, MaxVectorOffset))
}
// todo(andy): better estimates
bufSz += keySz + valSz // tuples
bufSz += refCntSz // subtree counts
bufSz += len(keys)*2 + len(values)*2 // offStart
-1
View File
@@ -222,7 +222,6 @@ func estimateProllyMapSize(keys, values [][]byte, subtrees []uint64, valAddrsCnt
panic(fmt.Sprintf("value vector exceeds Size limit ( %d > %d )", valSz, MaxVectorOffset))
}
// todo(andy): better estimates
bufSz += keySz + valSz // tuples
bufSz += subtreesSz // subtree counts
bufSz += len(keys)*2 + len(values)*2 // offStart
-1
View File
@@ -100,7 +100,6 @@ func writeAddressOffsets(b *fb.Builder, items [][]byte, sumSz int, td val.TupleD
}
func writeCountArray(b *fb.Builder, counts []uint64) fb.UOffsetT {
// todo(andy): write without alloc
buf := make([]byte, maxEncodedSize(len(counts)))
return b.CreateByteVector(encodeVarints(counts, buf))
}
+9 -9
View File
@@ -35,7 +35,7 @@ type Chunker interface {
}
type chunker[S message.Serializer] struct {
cur *Cursor
cur *cursor
parent *chunker[S]
level int
done bool
@@ -57,7 +57,7 @@ func newEmptyChunker[S message.Serializer](ctx context.Context, ns NodeStore, se
return newChunker(ctx, nil, 0, ns, serializer)
}
func newChunker[S message.Serializer](ctx context.Context, cur *Cursor, level int, ns NodeStore, serializer S) (*chunker[S], error) {
func newChunker[S message.Serializer](ctx context.Context, cur *cursor, level int, ns NodeStore, serializer S) (*chunker[S], error) {
// |cur| will be nil if this is a new Node, implying this is a new tree, or the tree has grown in height relative
// to its original chunked form.
@@ -144,21 +144,21 @@ func (tc *chunker[S]) DeletePair(ctx context.Context, _, _ Item) error {
return tc.skip(ctx)
}
// advanceTo progresses the chunker until its tracking Cursor catches up with
// |next|, a Cursor indicating next key where an edit will be applied.
// advanceTo progresses the chunker until its tracking cursor catches up with
// |next|, a cursor indicating next key where an edit will be applied.
//
// The method proceeds from the deepest chunker recursively into its
// linked list parents:
//
// (1) If the current Cursor and all of its parents are aligned with |next|,
// (1) If the current cursor and all of its parents are aligned with |next|,
// we are done.
//
// (2) In lockstep, a) append to the chunker and b) increment the Cursor until
// (2) In lockstep, a) append to the chunker and b) increment the cursor until
// we either meet condition (1) and return, or we synchronize and progress to
// (3) or (4). Synchronizing means that the current tree being built has
// reached a chunk boundary that aligns with a chunk boundary in the old tree
// being mutated. Synchronization means chunks between this boundary and
// |next| at the current Cursor level will be unchanged and can be skipped.
// |next| at the current cursor level will be unchanged and can be skipped.
//
// (3) All parent cursors are (1) current or (2) synchronized, or there are no
// parents, and we are done.
@@ -168,7 +168,7 @@ func (tc *chunker[S]) DeletePair(ctx context.Context, _, _ Item) error {
// anticipation of impending edits that may edit the current chunk. Note that
// processPrefix is only necessary for the "fast forward" case where we
// synchronized the tree level before reaching |next|.
func (tc *chunker[S]) advanceTo(ctx context.Context, next *Cursor) error {
func (tc *chunker[S]) advanceTo(ctx context.Context, next *cursor) error {
cmp := tc.cur.compare(next)
if cmp == 0 { // step (1)
return nil
@@ -347,7 +347,7 @@ func (tc *chunker[S]) handleChunkBoundary(ctx context.Context) error {
func (tc *chunker[S]) createParentChunker(ctx context.Context) (err error) {
assertTrue(tc.parent == nil, "chunker parent must be nil")
var parent *Cursor
var parent *cursor
if tc.cur != nil && tc.cur.parent != nil {
// todo(andy): does this comment make sense? cloning a pointer?
// Clone the parent cursor because otherwise calling cur.forward() will affect our parent - and vice versa -
+1 -1
View File
@@ -33,7 +33,7 @@ var goldenHash = hash.Hash{
0xea, 0x7d, 0x47, 0x69, 0x6c,
}
// todo(andy): need and analogous test in pkg prolly
// todo(andy): need an analogous test in pkg prolly
func TestContentAddress(t *testing.T) {
tups, _ := AscendingUintTuples(12345)
m := makeTree(t, tups)
+13 -13
View File
@@ -37,8 +37,8 @@ type Diff struct {
type DiffFn func(context.Context, Diff) error
type Differ[K ~[]byte, O Ordering[K]] struct {
from, to *Cursor
fromStop, toStop *Cursor
from, to *cursor
fromStop, toStop *cursor
order O
}
@@ -48,7 +48,7 @@ func DifferFromRoots[K ~[]byte, O Ordering[K]](
from, to Node,
order O,
) (Differ[K, O], error) {
var fc, tc *Cursor
var fc, tc *cursor
var err error
if !from.empty() {
@@ -57,7 +57,7 @@ func DifferFromRoots[K ~[]byte, O Ordering[K]](
return Differ[K, O]{}, err
}
} else {
fc = &Cursor{}
fc = &cursor{}
}
if !to.empty() {
@@ -66,7 +66,7 @@ func DifferFromRoots[K ~[]byte, O Ordering[K]](
return Differ[K, O]{}, err
}
} else {
tc = &Cursor{}
tc = &cursor{}
}
fs, err := newCursorPastEnd(ctx, fromNs, from)
@@ -156,7 +156,7 @@ func (td Differ[K, O]) Next(ctx context.Context) (diff Diff, err error) {
return Diff{}, io.EOF
}
func sendRemoved(ctx context.Context, from *Cursor) (diff Diff, err error) {
func sendRemoved(ctx context.Context, from *cursor) (diff Diff, err error) {
diff = Diff{
Type: RemovedDiff,
Key: from.CurrentKey(),
@@ -169,7 +169,7 @@ func sendRemoved(ctx context.Context, from *Cursor) (diff Diff, err error) {
return
}
func sendAdded(ctx context.Context, to *Cursor) (diff Diff, err error) {
func sendAdded(ctx context.Context, to *cursor) (diff Diff, err error) {
diff = Diff{
Type: AddedDiff,
Key: to.CurrentKey(),
@@ -182,7 +182,7 @@ func sendAdded(ctx context.Context, to *Cursor) (diff Diff, err error) {
return
}
func sendModified(ctx context.Context, from, to *Cursor) (diff Diff, err error) {
func sendModified(ctx context.Context, from, to *cursor) (diff Diff, err error) {
diff = Diff{
Type: ModifiedDiff,
Key: from.CurrentKey(),
@@ -199,7 +199,7 @@ func sendModified(ctx context.Context, from, to *Cursor) (diff Diff, err error)
return
}
func skipCommon(ctx context.Context, from, to *Cursor) (err error) {
func skipCommon(ctx context.Context, from, to *cursor) (err error) {
// track when |from.parent| and |to.parent| change
// to avoid unnecessary comparisons.
parentsAreNew := true
@@ -238,7 +238,7 @@ func skipCommon(ctx context.Context, from, to *Cursor) (err error) {
return err
}
func skipCommonParents(ctx context.Context, from, to *Cursor) (err error) {
func skipCommonParents(ctx context.Context, from, to *cursor) (err error) {
err = skipCommon(ctx, from.parent, to.parent)
if err != nil {
return err
@@ -266,18 +266,18 @@ func skipCommonParents(ctx context.Context, from, to *Cursor) (err error) {
}
// todo(andy): assumes equal byte representations
func equalItems(from, to *Cursor) bool {
func equalItems(from, to *cursor) bool {
return bytes.Equal(from.CurrentKey(), to.CurrentKey()) &&
bytes.Equal(from.currentValue(), to.currentValue())
}
func equalParents(from, to *Cursor) (eq bool) {
func equalParents(from, to *cursor) (eq bool) {
if from.parent != nil && to.parent != nil {
eq = equalItems(from.parent, to.parent)
}
return
}
func equalcursorValues(from, to *Cursor) bool {
func equalcursorValues(from, to *cursor) bool {
return bytes.Equal(from.currentValue(), to.currentValue())
}
+18 -18
View File
@@ -66,7 +66,7 @@ func DiffKeyRangeOrderedTrees[K, V ~[]byte, O Ordering[K]](
start, stop K,
cb DiffFn,
) error {
var fromStart, fromStop, toStart, toStop *Cursor
var fromStart, fromStop, toStart, toStop *cursor
var err error
if len(start) == 0 {
@@ -80,12 +80,12 @@ func DiffKeyRangeOrderedTrees[K, V ~[]byte, O Ordering[K]](
return err
}
} else {
fromStart, err = NewCursorAtKey(ctx, from.NodeStore, from.Root, start, from.Order)
fromStart, err = newCursorAtKey(ctx, from.NodeStore, from.Root, start, from.Order)
if err != nil {
return err
}
toStart, err = NewCursorAtKey(ctx, to.NodeStore, to.Root, start, to.Order)
toStart, err = newCursorAtKey(ctx, to.NodeStore, to.Root, start, to.Order)
if err != nil {
return err
}
@@ -102,12 +102,12 @@ func DiffKeyRangeOrderedTrees[K, V ~[]byte, O Ordering[K]](
return err
}
} else {
fromStop, err = NewCursorAtKey(ctx, from.NodeStore, from.Root, stop, from.Order)
fromStop, err = newCursorAtKey(ctx, from.NodeStore, from.Root, stop, from.Order)
if err != nil {
return err
}
toStop, err = NewCursorAtKey(ctx, to.NodeStore, to.Root, stop, to.Order)
toStop, err = newCursorAtKey(ctx, to.NodeStore, to.Root, stop, to.Order)
if err != nil {
return err
}
@@ -299,7 +299,7 @@ func (t StaticMap[K, V, O]) IterAll(ctx context.Context) (*OrderedTreeIter[K, V]
return nil, err
}
stop := func(curr *Cursor) bool {
stop := func(curr *cursor) bool {
return curr.compare(s) >= 0
}
@@ -326,7 +326,7 @@ func (t StaticMap[K, V, O]) IterAllReverse(ctx context.Context) (*OrderedTreeIte
return nil, err
}
stop := func(curr *Cursor) bool {
stop := func(curr *cursor) bool {
return curr.compare(beginning) <= 0
}
@@ -364,7 +364,7 @@ func (t StaticMap[K, V, O]) IterOrdinalRange(ctx context.Context, start, stop ui
return nil, err
}
stopF := func(curr *Cursor) bool {
stopF := func(curr *cursor) bool {
return curr.compare(hi) >= 0
}
@@ -412,7 +412,7 @@ func (t StaticMap[K, V, O]) IterKeyRange(ctx context.Context, start, stop K) (*O
return nil, err
}
stopF := func(curr *Cursor) bool {
stopF := func(curr *cursor) bool {
return curr.compare(hi) >= 0
}
@@ -445,14 +445,14 @@ func (t StaticMap[K, V, O]) GetKeyRangeCardinality(ctx context.Context, start, s
return endOrd - startOrd, nil
}
func (t StaticMap[K, V, O]) getKeyRangeCursors(ctx context.Context, startInclusive, stopExclusive K) (lo, hi *Cursor, err error) {
func (t StaticMap[K, V, O]) getKeyRangeCursors(ctx context.Context, startInclusive, stopExclusive K) (lo, hi *cursor, err error) {
if len(startInclusive) == 0 {
lo, err = newCursorAtStart(ctx, t.NodeStore, t.Root)
if err != nil {
return nil, nil, err
}
} else {
lo, err = NewCursorAtKey(ctx, t.NodeStore, t.Root, startInclusive, t.Order)
lo, err = newCursorAtKey(ctx, t.NodeStore, t.Root, startInclusive, t.Order)
if err != nil {
return nil, nil, err
}
@@ -464,7 +464,7 @@ func (t StaticMap[K, V, O]) getKeyRangeCursors(ctx context.Context, startInclusi
return nil, nil, err
}
} else {
hi, err = NewCursorAtKey(ctx, t.NodeStore, t.Root, stopExclusive, t.Order)
hi, err = newCursorAtKey(ctx, t.NodeStore, t.Root, stopExclusive, t.Order)
if err != nil {
return nil, nil, err
}
@@ -474,7 +474,7 @@ func (t StaticMap[K, V, O]) getKeyRangeCursors(ctx context.Context, startInclusi
// GetOrdinalForKey returns the smallest ordinal position at which the key >= |query|.
func (t StaticMap[K, V, O]) GetOrdinalForKey(ctx context.Context, query K) (uint64, error) {
cur, err := NewCursorAtKey(ctx, t.NodeStore, t.Root, query, t.Order)
cur, err := newCursorAtKey(ctx, t.NodeStore, t.Root, query, t.Order)
if err != nil {
return 0, err
}
@@ -483,12 +483,12 @@ func (t StaticMap[K, V, O]) GetOrdinalForKey(ctx context.Context, query K) (uint
type OrderedTreeIter[K, V ~[]byte] struct {
// current tuple location
curr *Cursor
curr *cursor
// the function called to moved |curr| forward in the direction of iteration.
step func(context.Context) error
// should return |true| if the passed in Cursor is past the iteration's stopping point.
stop func(*Cursor) bool
// should return |true| if the passed in cursor is past the iteration's stopping point.
stop func(*cursor) bool
}
func ReverseOrderedTreeIterFromCursors[K, V ~[]byte](
@@ -509,7 +509,7 @@ func ReverseOrderedTreeIterFromCursors[K, V ~[]byte](
return nil, err
}
stopFn := func(curr *Cursor) bool {
stopFn := func(curr *cursor) bool {
return curr.compare(start) < 0
}
@@ -534,7 +534,7 @@ func OrderedTreeIterFromCursors[K, V ~[]byte](
return nil, err
}
stopFn := func(curr *Cursor) bool {
stopFn := func(curr *cursor) bool {
return curr.compare(stop) >= 0
}
+1 -1
View File
@@ -69,7 +69,7 @@ func ApplyMutations[K ~[]byte, O Ordering[K], S message.Serializer](
return root, nil // no mutations
}
cur, err := NewCursorAtKey(ctx, ns, root, K(newKey), order)
cur, err := newCursorAtKey(ctx, ns, root, K(newKey), order)
if err != nil {
return Node{}, err
}
+71 -114
View File
@@ -29,11 +29,11 @@ import (
"github.com/dolthub/dolt/go/store/hash"
)
// Cursor explores a tree of Nodes.
type Cursor struct {
// cursor explores a tree of Nodes.
type cursor struct {
nd Node
idx int
parent *Cursor
parent *cursor
nrw NodeStore
}
@@ -43,56 +43,38 @@ type Ordering[K ~[]byte] interface {
Compare(left, right K) int
}
func newCursorAtStart(ctx context.Context, ns NodeStore, nd Node) (cur *Cursor, err error) {
cur = &Cursor{nd: nd, nrw: ns}
var leaf bool
leaf, err = cur.isLeaf()
if err != nil {
return nil, err
}
for !leaf {
func newCursorAtStart(ctx context.Context, ns NodeStore, nd Node) (cur *cursor, err error) {
cur = &cursor{nd: nd, nrw: ns}
for !cur.isLeaf() {
nd, err = fetchChild(ctx, ns, cur.currentRef())
if err != nil {
return nil, err
}
parent := cur
cur = &Cursor{nd: nd, parent: parent, nrw: ns}
leaf, err = cur.isLeaf()
if err != nil {
return nil, err
}
cur = &cursor{nd: nd, parent: parent, nrw: ns}
}
return
}
func newCursorAtEnd(ctx context.Context, ns NodeStore, nd Node) (cur *Cursor, err error) {
cur = &Cursor{nd: nd, nrw: ns}
func newCursorAtEnd(ctx context.Context, ns NodeStore, nd Node) (cur *cursor, err error) {
cur = &cursor{nd: nd, nrw: ns}
cur.skipToNodeEnd()
var leaf bool
leaf, err = cur.isLeaf()
if err != nil {
return nil, err
}
for !leaf {
for !cur.isLeaf() {
nd, err = fetchChild(ctx, ns, cur.currentRef())
if err != nil {
return nil, err
}
parent := cur
cur = &Cursor{nd: nd, parent: parent, nrw: ns}
cur = &cursor{nd: nd, parent: parent, nrw: ns}
cur.skipToNodeEnd()
leaf, err = cur.isLeaf()
if err != nil {
return nil, err
}
}
return
}
func newCursorPastEnd(ctx context.Context, ns NodeStore, nd Node) (cur *Cursor, err error) {
func newCursorPastEnd(ctx context.Context, ns NodeStore, nd Node) (cur *cursor, err error) {
cur, err = newCursorAtEnd(ctx, ns, nd)
if err != nil {
return nil, err
@@ -110,7 +92,7 @@ func newCursorPastEnd(ctx context.Context, ns NodeStore, nd Node) (cur *Cursor,
return
}
func newCursorAtOrdinal(ctx context.Context, ns NodeStore, nd Node, ord uint64) (cur *Cursor, err error) {
func newCursorAtOrdinal(ctx context.Context, ns NodeStore, nd Node, ord uint64) (cur *cursor, err error) {
cnt, err := nd.TreeCount()
if err != nil {
return nil, err
@@ -138,13 +120,9 @@ func newCursorAtOrdinal(ctx context.Context, ns NodeStore, nd Node, ord uint64)
})
}
// GetOrdinalOfCursor returns the ordinal position of a Cursor.
func getOrdinalOfCursor(curr *Cursor) (ord uint64, err error) {
leaf, err := curr.isLeaf()
if err != nil {
return 0, err
}
if !leaf {
// GetOrdinalOfCursor returns the ordinal position of a cursor.
func getOrdinalOfCursor(curr *cursor) (ord uint64, err error) {
if !curr.isLeaf() {
return 0, fmt.Errorf("|cur| must be at a leaf")
}
@@ -178,21 +156,15 @@ func getOrdinalOfCursor(curr *Cursor) (ord uint64, err error) {
return ord, nil
}
func NewCursorAtKey[K ~[]byte, O Ordering[K]](ctx context.Context, ns NodeStore, nd Node, key K, order O) (cur *Cursor, err error) {
func newCursorAtKey[K ~[]byte, O Ordering[K]](ctx context.Context, ns NodeStore, nd Node, key K, order O) (cur *cursor, err error) {
return newCursorFromSearchFn(ctx, ns, nd, searchForKey(key, order))
}
func newCursorFromSearchFn(ctx context.Context, ns NodeStore, nd Node, search SearchFn) (cur *Cursor, err error) {
cur = &Cursor{nd: nd, nrw: ns}
func newCursorFromSearchFn(ctx context.Context, ns NodeStore, nd Node, search SearchFn) (cur *cursor, err error) {
cur = &cursor{nd: nd, nrw: ns}
cur.idx = search(cur.nd)
var leaf bool
leaf, err = cur.isLeaf()
if err != nil {
return nil, err
}
for !leaf {
for !cur.isLeaf() {
// stay in bounds for internal nodes
cur.keepInBounds()
@@ -202,20 +174,16 @@ func newCursorFromSearchFn(ctx context.Context, ns NodeStore, nd Node, search Se
}
parent := cur
cur = &Cursor{nd: nd, parent: parent, nrw: ns}
cur = &cursor{nd: nd, parent: parent, nrw: ns}
cur.idx = search(cur.nd)
leaf, err = cur.isLeaf()
if err != nil {
return nil, err
}
}
return
}
func newLeafCursorAtKey[K ~[]byte, O Ordering[K]](ctx context.Context, ns NodeStore, nd Node, key K, order O) (Cursor, error) {
cur := Cursor{nd: nd, nrw: ns}
func newLeafCursorAtKey[K ~[]byte, O Ordering[K]](ctx context.Context, ns NodeStore, nd Node, key K, order O) (cursor, error) {
var err error
cur := cursor{nd: nd, nrw: ns}
for {
// binary search |cur.nd| for |key|
i, j := 0, cur.nd.Count()
@@ -230,10 +198,7 @@ func newLeafCursorAtKey[K ~[]byte, O Ordering[K]](ctx context.Context, ns NodeSt
}
cur.idx = i
leaf, err := cur.isLeaf()
if err != nil {
return cur, err
} else if leaf {
if cur.isLeaf() {
break // done
}
@@ -345,17 +310,17 @@ func recursiveFetchLeafNodeSpan(ctx context.Context, ns NodeStore, nodes []Node,
return recursiveFetchLeafNodeSpan(ctx, ns, children, start, stop)
}
func currentCursorItems(cur *Cursor) (key, value Item) {
func currentCursorItems(cur *cursor) (key, value Item) {
key = cur.nd.keys.GetItem(cur.idx, cur.nd.msg)
value = cur.nd.values.GetItem(cur.idx, cur.nd.msg)
return
}
// Seek updates the Cursor's node to one whose range spans the key's value, or the last
// Seek updates the cursor's node to one whose range spans the key's value, or the last
// node if the key is greater than all existing keys.
// If a node does not contain the key, we recurse upwards to the parent Cursor. If the
// If a node does not contain the key, we recurse upwards to the parent cursor. If the
// node contains a key, we recurse downwards into child nodes.
func Seek[K ~[]byte, O Ordering[K]](ctx context.Context, cur *Cursor, key K, order O) (err error) {
func Seek[K ~[]byte, O Ordering[K]](ctx context.Context, cur *cursor, key K, order O) (err error) {
inBounds := true
if cur.parent != nil {
inBounds = inBounds && order.Compare(key, K(cur.firstKey())) >= 0
@@ -382,33 +347,30 @@ func Seek[K ~[]byte, O Ordering[K]](ctx context.Context, cur *Cursor, key K, ord
return
}
func (cur *Cursor) Valid() bool {
func (cur *cursor) Valid() bool {
return cur.nd.count != 0 &&
cur.nd.bytes() != nil &&
cur.idx >= 0 &&
cur.idx < int(cur.nd.count)
}
func (cur *Cursor) CurrentKey() Item {
func (cur *cursor) CurrentKey() Item {
return cur.nd.GetKey(cur.idx)
}
func (cur *Cursor) currentValue() Item {
func (cur *cursor) currentValue() Item {
return cur.nd.GetValue(cur.idx)
}
func (cur *Cursor) currentRef() hash.Hash {
func (cur *cursor) currentRef() hash.Hash {
return cur.nd.getAddress(cur.idx)
}
func (cur *Cursor) currentSubtreeSize() (uint64, error) {
leaf, err := cur.isLeaf()
if err != nil {
return 0, err
}
if leaf {
func (cur *cursor) currentSubtreeSize() (uint64, error) {
if cur.isLeaf() {
return 1, nil
}
var err error
cur.nd, err = cur.nd.loadSubtrees()
if err != nil {
return 0, err
@@ -416,25 +378,25 @@ func (cur *Cursor) currentSubtreeSize() (uint64, error) {
return cur.nd.getSubtreeCount(cur.idx)
}
func (cur *Cursor) firstKey() Item {
func (cur *cursor) firstKey() Item {
return cur.nd.GetKey(0)
}
func (cur *Cursor) lastKey() Item {
func (cur *cursor) lastKey() Item {
lastKeyIdx := int(cur.nd.count) - 1
return cur.nd.GetKey(lastKeyIdx)
}
func (cur *Cursor) skipToNodeStart() {
func (cur *cursor) skipToNodeStart() {
cur.idx = 0
}
func (cur *Cursor) skipToNodeEnd() {
func (cur *cursor) skipToNodeEnd() {
lastKeyIdx := int(cur.nd.count) - 1
cur.idx = lastKeyIdx
}
func (cur *Cursor) keepInBounds() {
func (cur *cursor) keepInBounds() {
if cur.idx < 0 {
cur.skipToNodeStart()
}
@@ -444,64 +406,59 @@ func (cur *Cursor) keepInBounds() {
}
}
func (cur *Cursor) atNodeStart() bool {
func (cur *cursor) atNodeStart() bool {
return cur.idx == 0
}
// atNodeEnd returns true if the Cursor's current |idx|
// atNodeEnd returns true if the cursor's current |idx|
// points to the last node item
func (cur *Cursor) atNodeEnd() bool {
func (cur *cursor) atNodeEnd() bool {
lastKeyIdx := int(cur.nd.count) - 1
return cur.idx == lastKeyIdx
}
func (cur *Cursor) isLeaf() (bool, error) {
// todo(andy): cache Level
lvl, err := cur.level()
if err != nil {
return false, err
}
return lvl == 0, nil
func (cur *cursor) isLeaf() bool {
return cur.nd.level == 0
}
func (cur *Cursor) level() (uint64, error) {
func (cur *cursor) level() (uint64, error) {
return uint64(cur.nd.level), nil
}
// invalidateAtEnd sets the Cursor's index to the node count.
func (cur *Cursor) invalidateAtEnd() {
// invalidateAtEnd sets the cursor's index to the node count.
func (cur *cursor) invalidateAtEnd() {
cur.idx = int(cur.nd.count)
}
// invalidateAtStart sets the Cursor's index to -1.
func (cur *Cursor) invalidateAtStart() {
// invalidateAtStart sets the cursor's index to -1.
func (cur *cursor) invalidateAtStart() {
cur.idx = -1
}
// hasNext returns true if we do not need to recursively
// check the parent to know that the current Cursor
// check the parent to know that the current cursor
// has more keys. hasNext can be false even if parent
// cursors are not exhausted.
func (cur *Cursor) hasNext() bool {
func (cur *cursor) hasNext() bool {
return cur.idx < int(cur.nd.count)-1
}
// hasPrev returns true if the current node has preceding
// keys. hasPrev can be false even in a parent node has
// preceding keys.
func (cur *Cursor) hasPrev() bool {
func (cur *cursor) hasPrev() bool {
return cur.idx > 0
}
// outOfBounds returns true if the current Cursor and
// outOfBounds returns true if the current cursor and
// all parents are exhausted.
func (cur *Cursor) outOfBounds() bool {
func (cur *cursor) outOfBounds() bool {
return cur.idx < 0 || cur.idx >= int(cur.nd.count)
}
// advance either increments the current key index by one,
// or has reached the end of the current node and skips to the next
// child of the parent Cursor, recursively if necessary, returning
// child of the parent cursor, recursively if necessary, returning
// either an error or nil.
//
// More specifically, one of three things happens:
@@ -509,14 +466,14 @@ func (cur *Cursor) outOfBounds() bool {
// 1) The current chunk still has keys, iterate to
// the next |idx|;
//
// 2) We've exhausted the current Cursor, but there is at least
// one |parent| Cursor with more keys. We find that |parent| recursively,
// 2) We've exhausted the current cursor, but there is at least
// one |parent| cursor with more keys. We find that |parent| recursively,
// perform step (1), and then have every child initialize itself
// using the new |parent|.
//
// 3) We've exhausted the current Cursor and every |parent|. Jump
// 3) We've exhausted the current cursor and every |parent|. Jump
// to an end state (idx = node.count).
func (cur *Cursor) advance(ctx context.Context) error {
func (cur *cursor) advance(ctx context.Context) error {
if cur.hasNext() {
cur.idx++
return nil
@@ -551,7 +508,7 @@ func (cur *Cursor) advance(ctx context.Context) error {
// retreat decrements to the previous key, if necessary by
// recursively decrementing parent nodes.
func (cur *Cursor) retreat(ctx context.Context) error {
func (cur *cursor) retreat(ctx context.Context) error {
if cur.hasPrev() {
cur.idx--
return nil
@@ -584,9 +541,9 @@ func (cur *Cursor) retreat(ctx context.Context) error {
return nil
}
// fetchNode loads the Node that the Cursor index points to.
// It's called whenever the Cursor advances/retreats to a different chunk.
func (cur *Cursor) fetchNode(ctx context.Context) (err error) {
// fetchNode loads the Node that the cursor index points to.
// It's called whenever the cursor advances/retreats to a different chunk.
func (cur *cursor) fetchNode(ctx context.Context) (err error) {
assertTrue(cur.parent != nil, "cannot fetch node for cursor with nil parent")
cur.nd, err = fetchChild(ctx, cur.nrw, cur.parent.currentRef())
cur.idx = -1 // caller must set
@@ -594,7 +551,7 @@ func (cur *Cursor) fetchNode(ctx context.Context) (err error) {
}
// Compare returns the highest relative index difference
// between two Cursor trees. A parent has a higher precedence
// between two cursor trees. A parent has a higher precedence
// than its child.
//
// Ex:
@@ -608,12 +565,12 @@ func (cur *Cursor) fetchNode(ctx context.Context) (err error) {
// other: L3 -> 4, L2 -> 3, L1 -> 5, L0 -> 4
//
// res => +1 (from level 2)
func (cur *Cursor) compare(other *Cursor) int {
func (cur *cursor) compare(other *cursor) int {
return compareCursors(cur, other)
}
func (cur *Cursor) clone() *Cursor {
cln := Cursor{
func (cur *cursor) clone() *cursor {
cln := cursor{
nd: cur.nd,
idx: cur.idx,
nrw: cur.nrw,
@@ -626,7 +583,7 @@ func (cur *Cursor) clone() *Cursor {
return &cln
}
func (cur *Cursor) copy(other *Cursor) {
func (cur *cursor) copy(other *cursor) {
cur.nd = other.nd
cur.idx = other.idx
cur.nrw = other.nrw
@@ -639,7 +596,7 @@ func (cur *Cursor) copy(other *Cursor) {
}
}
func compareCursors(left, right *Cursor) (diff int) {
func compareCursors(left, right *cursor) (diff int) {
diff = 0
for {
d := left.idx - right.idx
+3 -3
View File
@@ -78,7 +78,7 @@ func testNewCursorAtItem(t *testing.T, count int) {
ctx := context.Background()
for i := range items {
key, value := items[i][0], items[i][1]
cur, err := NewCursorAtKey(ctx, ns, root, val.Tuple(key), keyDesc)
cur, err := newCursorAtKey(ctx, ns, root, val.Tuple(key), keyDesc)
require.NoError(t, err)
assert.Equal(t, key, cur.CurrentKey())
assert.Equal(t, value, cur.currentValue())
@@ -104,7 +104,7 @@ func testGetOrdinalOfCursor(t *testing.T, count int) {
assert.NoError(t, err)
for i := 0; i < len(tuples); i++ {
curr, err := NewCursorAtKey(ctx, ns, nd, tuples[i][0], desc)
curr, err := newCursorAtKey(ctx, ns, nd, tuples[i][0], desc)
require.NoError(t, err)
ord, err := getOrdinalOfCursor(curr)
@@ -117,7 +117,7 @@ func testGetOrdinalOfCursor(t *testing.T, count int) {
b.PutUint32(0, uint32(len(tuples)))
aboveItem := b.Build(sharedPool)
curr, err := NewCursorAtKey(ctx, ns, nd, aboveItem, desc)
curr, err := newCursorAtKey(ctx, ns, nd, aboveItem, desc)
require.NoError(t, err)
ord, err := getOrdinalOfCursor(curr)
-1
View File
@@ -190,7 +190,6 @@ func newKeySplitter(level uint8) nodeSplitter {
var _ splitterFactory = newKeySplitter
func (ks *keySplitter) Append(key, value Item) error {
// todo(andy): account for key/value offsets, vtable, etc.
thisSize := uint32(len(key) + len(value))
ks.size += thisSize
@@ -77,7 +77,6 @@ func (ase *AsyncSortedEdits) EditsAdded() int {
func (ase *AsyncSortedEdits) AddEdit(k types.LesserValuable, v types.Valuable) {
ase.editsAdded++
if ase.accumulating == nil {
// TODO: buffer pool
ase.accumulating = make([]types.KVP, 0, ase.sliceSize)
}
-9
View File
@@ -105,15 +105,6 @@ const (
DecimalEnc = Encoding(serial.EncodingDecimal)
JSONEnc = Encoding(serial.EncodingJSON)
GeometryEnc = Encoding(serial.EncodingGeometry)
// TODO
// CharEnc
// BinaryEnc
// TextEnc
// BlobEnc
// EnumEnc
// SetEnc
// ExpressionEnc
)
func sizeFromType(t Type) (ByteSize, bool) {
+21 -39
View File
@@ -27,50 +27,32 @@ const (
countSize ByteSize = 2
)
// todo(andy): update comment
// Tuples are byte slices containing field values and a footer. Tuples only
// contain Values for non-NULL Fields. Value i contains the data for ith non-
// NULL Field. Values are packed contiguously from the front of the Tuple. The
// footer contains offsets, a member mask, and a field count. offsets enable
// random access to Values. The member mask enables NULL-compaction for Values.
// A Tuple is a vector of fields encoded as a byte slice. Key-Value Tuple pairs
// are used to store row data within clustered and secondary indexes in Dolt.
//
// Tuples read and write Values as byte slices. (De)serialization is delegated
// to Tuple Descriptors, which know a Tuple's schema and associated encodings.
// When reading and writing Values, NULLs are encoded as nil byte slices. Note
// that these are not the same as zero-length byte slices. An empty string may
// be encoded as a zero-length byte slice and will be distinct from a NULL
// string both logically and semantically.
// The encoding format for Tuples starts with field values packed contiguously from
// the front of the Tuple, followed by field offsets, and finally a field count:
//
// Tuple:
// +---------+---------+-----+---------+---------+-------------+-------------+
// | Value 0 | Value 1 | ... | Value K | offsets | Member Mask | Field Count |
// +---------+---------+-----+---------+---------+-------------+-------------+
// +---------+---------+-----+---------+----------+-----+----------+-------+
// | Value 0 | Value 1 | ... | Value K | Offset 1 | ... | Offset K | Count |
// +---------+---------+-----+---------+----------+-----+----------+-------+
//
// offsets:
// The offset array contains a uint16 for each non-NULL field after field 0.
// Offset i encodes the distance to the ith Value from the front of the Tuple.
// The size of the offset array is 2*(K-1) bytes, where K is the number of
// Values in the Tuple.
// +----------+----------+-----+----------+
// | Offset 1 | Offset 2 | ... | Offset K |
// +----------+----------+-----+----------+
// Field offsets encode the byte-offset from the front of the Tuple to the beginning
// of the corresponding field in the Tuple. The offset for the first field is always
// zero and is therefor omitted. Offsets and the field count are little-endian
// encoded uint16 values.
//
// Member Mask:
// The member mask is a bit-array encoding field membership in Tuples. Fields
// with non-NULL values are present, and encoded as 1, NULL fields are absent
// and encoded as 0. The size of the bit array is math.Ceil(N/8) bytes, where
// N is the number of Fields in the Tuple.
// +------------+-------------+-----+
// | Bits 0 - 7 | Bits 8 - 15 | ... |
// +------------+-------------+-----+
// Tuples read and write field values as byte slices. Interpreting these encoded
// values is left up to TupleDesc which knows about a Tuple's schema and associated
// field encodings. Zero-length fields are interpreted as NULL values, all non-NULL
// values must be encoded with non-zero length. For this reason, variable-length
// strings are encoded with a NUL terminator (see codec.go).
//
// Field Count:
// The field fieldCount is a uint16 containing the number of fields in the
// Tuple, it is stored in 2 bytes.
// +----------------------+
// | Field Count (uint16) |
// +----------------------+
// Accessing the ith field where i > count will return a NULL value. This allows us
// to implicitly add nullable columns to the end of a schema without needing to
// rewrite index storage. However, because Dolt storage in content-addressed, we
// must have a single canonical encoding for any given Tuple. For this reason, the
// NULL suffix of a Tuple is explicitly truncated and the field count reduced.
type Tuple []byte
var EmptyTuple = Tuple([]byte{0, 0})
-1
View File
@@ -523,7 +523,6 @@ func (td TupleDesc) FormatValue(i int, value []byte) string {
return formatValue(td.Types[i].Enc, value)
}
func formatValue(enc Encoding, value []byte) string {
// todo(andy): complete cases
switch enc {
case Int8Enc:
v := readInt8(value)
-1
View File
@@ -96,7 +96,6 @@ func tuplePrefix(pool pool.BuffPool, tup Tuple, k int) Tuple {
}
func tupleSuffix(pool pool.BuffPool, tup Tuple, k int) Tuple {
// todo(andy)
cnt := tup.Count()
if k == 0 {
return EmptyTuple