mirror of
https://github.com/dolthub/dolt.git
synced 2026-01-23 10:27:34 -06:00
345 lines
8.2 KiB
Go
345 lines
8.2 KiB
Go
// Copyright 2021 Dolthub, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package tree
|
|
|
|
import (
|
|
"context"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"io"
|
|
|
|
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
|
|
|
"github.com/dolthub/dolt/go/gen/fb/serial"
|
|
"github.com/dolthub/dolt/go/store/hash"
|
|
"github.com/dolthub/dolt/go/store/prolly/message"
|
|
"github.com/dolthub/dolt/go/store/types"
|
|
"github.com/dolthub/dolt/go/store/val"
|
|
)
|
|
|
|
type Item []byte
|
|
|
|
type subtreeCounts []uint64
|
|
|
|
// Node is a generic implementation of a prolly tree node.
|
|
// Elements in a Node are generic Items. Interpreting Item
|
|
// contents is deferred to higher layers (see prolly.Map).
|
|
type Node struct {
|
|
// keys and values cache offset metadata
|
|
// to accelerate Item lookups into msg.
|
|
keys, values message.ItemAccess
|
|
|
|
// count is the Item pair count.
|
|
count uint16
|
|
|
|
// level is 0-indexed tree height.
|
|
level uint16
|
|
|
|
// subtrees contains the key cardinality
|
|
// of each child tree of a non-leaf Node.
|
|
// this field is lazily decoded from msg
|
|
// because it requires a malloc.
|
|
subtrees *subtreeCounts
|
|
|
|
// msg is the underlying buffer for the Node
|
|
// encoded as a Flatbuffers message.
|
|
msg serial.Message
|
|
}
|
|
|
|
type AddressCb func(ctx context.Context, addr hash.Hash) error
|
|
|
|
func WalkAddresses(ctx context.Context, nd Node, ns NodeStore, cb AddressCb) error {
|
|
return walkAddresses(ctx, nd, func(ctx context.Context, addr hash.Hash) error {
|
|
if err := cb(ctx, addr); err != nil {
|
|
return err
|
|
}
|
|
|
|
if nd.IsLeaf() {
|
|
return nil
|
|
}
|
|
|
|
child, err := ns.Read(ctx, addr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return WalkAddresses(ctx, child, ns, cb)
|
|
})
|
|
}
|
|
|
|
type NodeCb func(ctx context.Context, nd Node) error
|
|
|
|
// WalkNodes runs a callback function on every node found in the DFS of |nd|
|
|
// that is of the same message type as |nd|.
|
|
func WalkNodes(ctx context.Context, nd Node, ns NodeStore, cb NodeCb) error {
|
|
if err := cb(ctx, nd); err != nil {
|
|
return err
|
|
}
|
|
if nd.IsLeaf() {
|
|
return nil
|
|
}
|
|
|
|
return walkAddresses(ctx, nd, func(ctx context.Context, addr hash.Hash) error {
|
|
child, err := ns.Read(ctx, addr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return WalkNodes(ctx, child, ns, cb)
|
|
})
|
|
}
|
|
|
|
// walkOpaqueNodes runs a callback function on every node found in the DFS of |nd|
|
|
// including nested trees.
|
|
func walkOpaqueNodes(ctx context.Context, nd Node, ns NodeStore, cb NodeCb) error {
|
|
if err := cb(ctx, nd); err != nil {
|
|
return err
|
|
}
|
|
|
|
return walkAddresses(ctx, nd, func(ctx context.Context, addr hash.Hash) error {
|
|
child, err := ns.Read(ctx, addr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return WalkNodes(ctx, child, ns, cb)
|
|
})
|
|
}
|
|
|
|
func NodeFromBytes(msg []byte) (Node, error) {
|
|
keys, values, level, count, err := message.UnpackFields(msg)
|
|
return Node{
|
|
keys: keys,
|
|
values: values,
|
|
count: count,
|
|
level: level,
|
|
msg: msg,
|
|
}, err
|
|
}
|
|
|
|
func (nd Node) HashOf() hash.Hash {
|
|
return hash.Of(nd.bytes())
|
|
}
|
|
|
|
func (nd Node) Count() int {
|
|
return int(nd.count)
|
|
}
|
|
|
|
func (nd Node) TreeCount() (int, error) {
|
|
return message.GetTreeCount(nd.msg)
|
|
}
|
|
|
|
func (nd Node) Size() int {
|
|
return len(nd.bytes())
|
|
}
|
|
|
|
// Level returns the tree Level for this node
|
|
func (nd Node) Level() int {
|
|
return int(nd.level)
|
|
}
|
|
|
|
// IsLeaf returns whether this node is a leaf
|
|
func (nd Node) IsLeaf() bool {
|
|
return nd.level == 0
|
|
}
|
|
|
|
// GetKey returns the |ith| key of this node
|
|
func (nd Node) GetKey(i int) Item {
|
|
return nd.keys.GetItem(i, nd.msg)
|
|
}
|
|
|
|
// GetValue returns the |ith| value of this node.
|
|
func (nd Node) GetValue(i int) Item {
|
|
return nd.values.GetItem(i, nd.msg)
|
|
}
|
|
|
|
func (nd Node) loadSubtrees() (Node, error) {
|
|
var err error
|
|
if nd.subtrees == nil {
|
|
// deserializing subtree counts requires a malloc,
|
|
// we don't load them unless explicitly requested
|
|
sc, err := message.GetSubtrees(nd.msg)
|
|
if err != nil {
|
|
return Node{}, err
|
|
}
|
|
nd.subtrees = (*subtreeCounts)(&sc)
|
|
}
|
|
return nd, err
|
|
}
|
|
|
|
func (nd Node) getSubtreeCount(i int) (uint64, error) {
|
|
if nd.IsLeaf() {
|
|
return 1, nil
|
|
}
|
|
// this will panic unless subtrees were loaded.
|
|
return (*nd.subtrees)[i], nil
|
|
}
|
|
|
|
// getAddress returns the |ith| address of this node.
|
|
// This method assumes values are 20-byte address hashes.
|
|
func (nd Node) getAddress(i int) hash.Hash {
|
|
return hash.New(nd.GetValue(i))
|
|
}
|
|
|
|
func (nd Node) empty() bool {
|
|
return nd.bytes() == nil || nd.count == 0
|
|
}
|
|
|
|
func (nd Node) bytes() []byte {
|
|
return nd.msg
|
|
}
|
|
|
|
func walkAddresses(ctx context.Context, nd Node, cb AddressCb) (err error) {
|
|
return message.WalkAddresses(ctx, nd.msg, cb)
|
|
}
|
|
|
|
func getLastKey(nd Node) Item {
|
|
return nd.GetKey(int(nd.count) - 1)
|
|
}
|
|
|
|
// OutputProllyNode writes the node given to the writer given in a human-readable format, with values converted
|
|
// to the type specified by the provided schema. All nodes have keys displayed in this manner. Interior nodes have
|
|
// their child hash references spelled out, leaf nodes have value tuples delineated like the keys
|
|
func OutputProllyNode(ctx context.Context, w io.Writer, node Node, ns NodeStore, schema schema.Schema) error {
|
|
kd := schema.GetKeyDescriptor()
|
|
vd := schema.GetValueDescriptor()
|
|
for i := 0; i < int(node.count); i++ {
|
|
k := node.GetKey(i)
|
|
kt := val.Tuple(k)
|
|
|
|
w.Write([]byte("\n { key: "))
|
|
for j := 0; j < kt.Count(); j++ {
|
|
if j > 0 {
|
|
w.Write([]byte(", "))
|
|
}
|
|
|
|
isAddr := val.IsAddrEncoding(kd.Types[j].Enc)
|
|
if isAddr {
|
|
w.Write([]byte("#"))
|
|
}
|
|
w.Write([]byte(hex.EncodeToString(kd.GetField(j, kt))))
|
|
if isAddr {
|
|
w.Write([]byte(" ("))
|
|
key, err := GetField(ctx, kd, j, kt, ns)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
w.Write([]byte(fmt.Sprint(key)))
|
|
w.Write([]byte(")"))
|
|
}
|
|
|
|
}
|
|
|
|
if node.IsLeaf() {
|
|
v := node.GetValue(i)
|
|
vt := val.Tuple(v)
|
|
|
|
w.Write([]byte(" value: "))
|
|
for j := 0; j < vt.Count(); j++ {
|
|
if j > 0 {
|
|
w.Write([]byte(", "))
|
|
}
|
|
isAddr := val.IsAddrEncoding(vd.Types[j].Enc)
|
|
if isAddr {
|
|
w.Write([]byte("#"))
|
|
}
|
|
w.Write([]byte(hex.EncodeToString(vd.GetField(j, vt))))
|
|
if isAddr {
|
|
w.Write([]byte(" ("))
|
|
value, err := GetField(ctx, vd, j, vt, ns)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
w.Write([]byte(fmt.Sprint(value)))
|
|
w.Write([]byte(")"))
|
|
}
|
|
}
|
|
|
|
w.Write([]byte(" }"))
|
|
} else {
|
|
ref := node.getAddress(i)
|
|
|
|
w.Write([]byte(" ref: #"))
|
|
w.Write([]byte(ref.String()))
|
|
w.Write([]byte(" }"))
|
|
}
|
|
}
|
|
|
|
w.Write([]byte("\n"))
|
|
return nil
|
|
}
|
|
|
|
// OutputProllyNodeBytes writes the node given to the writer given in a semi-human-readable format, where values are still
|
|
// displayed in hex-encoded byte strings, but are delineated into their fields. All nodes have keys displayed in this
|
|
// manner. Interior nodes have their child hash references spelled out, leaf nodes have value tuples delineated like
|
|
// the keys
|
|
func OutputProllyNodeBytes(w io.Writer, node Node) error {
|
|
for i := 0; i < int(node.count); i++ {
|
|
k := node.GetKey(i)
|
|
kt := val.Tuple(k)
|
|
|
|
w.Write([]byte("\n { key: "))
|
|
for j := 0; j < kt.Count(); j++ {
|
|
if j > 0 {
|
|
w.Write([]byte(", "))
|
|
}
|
|
|
|
w.Write([]byte(hex.EncodeToString(kt.GetField(j))))
|
|
}
|
|
|
|
if node.IsLeaf() {
|
|
v := node.GetValue(i)
|
|
vt := val.Tuple(v)
|
|
|
|
w.Write([]byte(" value: "))
|
|
for j := 0; j < vt.Count(); j++ {
|
|
if j > 0 {
|
|
w.Write([]byte(", "))
|
|
}
|
|
w.Write([]byte(hex.EncodeToString(vt.GetField(j))))
|
|
}
|
|
|
|
w.Write([]byte(" }"))
|
|
} else {
|
|
ref := node.getAddress(i)
|
|
|
|
w.Write([]byte(" ref: #"))
|
|
w.Write([]byte(ref.String()))
|
|
w.Write([]byte(" }"))
|
|
}
|
|
}
|
|
|
|
w.Write([]byte("\n"))
|
|
return nil
|
|
}
|
|
|
|
func OutputAddressMapNode(w io.Writer, node Node) error {
|
|
for i := 0; i < int(node.count); i++ {
|
|
k := node.GetKey(i)
|
|
w.Write([]byte("\n { key: "))
|
|
w.Write(k)
|
|
|
|
ref := node.getAddress(i)
|
|
|
|
w.Write([]byte(" ref: #"))
|
|
w.Write([]byte(ref.String()))
|
|
w.Write([]byte(" }"))
|
|
}
|
|
w.Write([]byte("\n"))
|
|
return nil
|
|
}
|
|
|
|
func ValueFromNode(root Node) types.Value {
|
|
return types.SerialMessage(root.bytes())
|
|
}
|