mirror of
https://github.com/dolthub/dolt.git
synced 2026-02-12 10:32:27 -06:00
added address map serialization
This commit is contained in:
237
go/gen/fb/serial/addressmap.go
Normal file
237
go/gen/fb/serial/addressmap.go
Normal file
@@ -0,0 +1,237 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by the FlatBuffers compiler. DO NOT EDIT.
|
||||
|
||||
package serial
|
||||
|
||||
import (
|
||||
flatbuffers "github.com/google/flatbuffers/go"
|
||||
)
|
||||
|
||||
type AddressMap struct {
|
||||
_tab flatbuffers.Table
|
||||
}
|
||||
|
||||
func GetRootAsAddressMap(buf []byte, offset flatbuffers.UOffsetT) *AddressMap {
|
||||
n := flatbuffers.GetUOffsetT(buf[offset:])
|
||||
x := &AddressMap{}
|
||||
x.Init(buf, n+offset)
|
||||
return x
|
||||
}
|
||||
|
||||
func GetSizePrefixedRootAsAddressMap(buf []byte, offset flatbuffers.UOffsetT) *AddressMap {
|
||||
n := flatbuffers.GetUOffsetT(buf[offset+flatbuffers.SizeUint32:])
|
||||
x := &AddressMap{}
|
||||
x.Init(buf, n+offset+flatbuffers.SizeUint32)
|
||||
return x
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) Init(buf []byte, i flatbuffers.UOffsetT) {
|
||||
rcv._tab.Bytes = buf
|
||||
rcv._tab.Pos = i
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) Table() flatbuffers.Table {
|
||||
return rcv._tab
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) KeyItems(j int) byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) KeyItemsLength() int {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
|
||||
if o != 0 {
|
||||
return rcv._tab.VectorLen(o)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) KeyItemsBytes() []byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
|
||||
if o != 0 {
|
||||
return rcv._tab.ByteVector(o + rcv._tab.Pos)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) MutateKeyItems(j int, n byte) bool {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) KeyOffsets(j int) uint16 {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(6))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.GetUint16(a + flatbuffers.UOffsetT(j*2))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) KeyOffsetsLength() int {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(6))
|
||||
if o != 0 {
|
||||
return rcv._tab.VectorLen(o)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) MutateKeyOffsets(j int, n uint16) bool {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(6))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.MutateUint16(a+flatbuffers.UOffsetT(j*2), n)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) AddressArray(j int) byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) AddressArrayLength() int {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
|
||||
if o != 0 {
|
||||
return rcv._tab.VectorLen(o)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) AddressArrayBytes() []byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
|
||||
if o != 0 {
|
||||
return rcv._tab.ByteVector(o + rcv._tab.Pos)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) MutateAddressArray(j int, n byte) bool {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) SubtreeCounts(j int) byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(10))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) SubtreeCountsLength() int {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(10))
|
||||
if o != 0 {
|
||||
return rcv._tab.VectorLen(o)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) SubtreeCountsBytes() []byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(10))
|
||||
if o != 0 {
|
||||
return rcv._tab.ByteVector(o + rcv._tab.Pos)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) MutateSubtreeCounts(j int, n byte) bool {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(10))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) TreeCount() uint64 {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(12))
|
||||
if o != 0 {
|
||||
return rcv._tab.GetUint64(o + rcv._tab.Pos)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) MutateTreeCount(n uint64) bool {
|
||||
return rcv._tab.MutateUint64Slot(12, n)
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) TreeLevel() byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(14))
|
||||
if o != 0 {
|
||||
return rcv._tab.GetByte(o + rcv._tab.Pos)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *AddressMap) MutateTreeLevel(n byte) bool {
|
||||
return rcv._tab.MutateByteSlot(14, n)
|
||||
}
|
||||
|
||||
func AddressMapStart(builder *flatbuffers.Builder) {
|
||||
builder.StartObject(6)
|
||||
}
|
||||
func AddressMapAddKeyItems(builder *flatbuffers.Builder, keyItems flatbuffers.UOffsetT) {
|
||||
builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(keyItems), 0)
|
||||
}
|
||||
func AddressMapStartKeyItemsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
|
||||
return builder.StartVector(1, numElems, 1)
|
||||
}
|
||||
func AddressMapAddKeyOffsets(builder *flatbuffers.Builder, keyOffsets flatbuffers.UOffsetT) {
|
||||
builder.PrependUOffsetTSlot(1, flatbuffers.UOffsetT(keyOffsets), 0)
|
||||
}
|
||||
func AddressMapStartKeyOffsetsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
|
||||
return builder.StartVector(2, numElems, 2)
|
||||
}
|
||||
func AddressMapAddAddressArray(builder *flatbuffers.Builder, addressArray flatbuffers.UOffsetT) {
|
||||
builder.PrependUOffsetTSlot(2, flatbuffers.UOffsetT(addressArray), 0)
|
||||
}
|
||||
func AddressMapStartAddressArrayVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
|
||||
return builder.StartVector(1, numElems, 1)
|
||||
}
|
||||
func AddressMapAddSubtreeCounts(builder *flatbuffers.Builder, subtreeCounts flatbuffers.UOffsetT) {
|
||||
builder.PrependUOffsetTSlot(3, flatbuffers.UOffsetT(subtreeCounts), 0)
|
||||
}
|
||||
func AddressMapStartSubtreeCountsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
|
||||
return builder.StartVector(1, numElems, 1)
|
||||
}
|
||||
func AddressMapAddTreeCount(builder *flatbuffers.Builder, treeCount uint64) {
|
||||
builder.PrependUint64Slot(4, treeCount, 0)
|
||||
}
|
||||
func AddressMapAddTreeLevel(builder *flatbuffers.Builder, treeLevel byte) {
|
||||
builder.PrependByteSlot(5, treeLevel, 0)
|
||||
}
|
||||
func AddressMapEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT {
|
||||
return builder.EndObject()
|
||||
}
|
||||
@@ -23,6 +23,7 @@ const CommitFileID = "DCMT"
|
||||
const RootValueFileID = "RTVL"
|
||||
const TableFileID = "DTBL"
|
||||
const ProllyTreeNodeFileID = "PRLY"
|
||||
const AddressMapFileID = "ADRM"
|
||||
|
||||
func GetFileID(bs []byte) string {
|
||||
if len(bs) < 8 {
|
||||
|
||||
41
go/serial/addressmap.fbs
Normal file
41
go/serial/addressmap.fbs
Normal file
@@ -0,0 +1,41 @@
|
||||
// Copyright 2021 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
namespace serial;
|
||||
|
||||
table AddressMap {
|
||||
// sorted array of key items
|
||||
key_items:[ubyte] (required);
|
||||
// items offets for |key_items|, zeroth offset omitted
|
||||
key_offsets:[uint16] (required);
|
||||
|
||||
// array of chunk addresses
|
||||
// - subtree addresses for internal prolly tree nodes
|
||||
// - value addresses for AddressMap leaf nodes
|
||||
address_array:[ubyte] (required);
|
||||
|
||||
// array of uvarint encoded subtree counts
|
||||
subtree_counts:[ubyte];
|
||||
// total count of prolly tree
|
||||
tree_count:uint64;
|
||||
// prolly tree level, 0 for leaf nodes
|
||||
tree_level:uint8;
|
||||
}
|
||||
|
||||
|
||||
// KEEP THIS IN SYNC WITH fileidentifiers.go
|
||||
file_identifier "ADRM";
|
||||
|
||||
root_type AddressMap;
|
||||
|
||||
@@ -23,6 +23,7 @@ const CommitFileID = "DCMT"
|
||||
const RootValueFileID = "RTVL"
|
||||
const TableFileID = "DTBL"
|
||||
const ProllyTreeNodeFileID = "PRLY"
|
||||
const AddressMapFileID = "ADRM"
|
||||
|
||||
func GetFileID(bs []byte) string {
|
||||
if len(bs) < 8 {
|
||||
|
||||
@@ -14,6 +14,7 @@ fi
|
||||
flatc -o $GEN_DIR --gen-onefile --filename-suffix "" --gen-mutable --go-namespace "serial" --go \
|
||||
commit.fbs \
|
||||
prolly.fbs \
|
||||
addressmap.fbs \
|
||||
rootvalue.fbs \
|
||||
schema.fbs \
|
||||
storeroot.fbs \
|
||||
|
||||
@@ -19,9 +19,6 @@ import (
|
||||
"context"
|
||||
"io"
|
||||
|
||||
fb "github.com/google/flatbuffers/go"
|
||||
|
||||
"github.com/dolthub/dolt/go/gen/fb/serial"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/pool"
|
||||
"github.com/dolthub/dolt/go/store/prolly/message"
|
||||
@@ -164,7 +161,7 @@ func newAddrMapBuilder(level int) *addrMapBuilder {
|
||||
var _ tree.NodeBuilderFactory[*addrMapBuilder] = newAddrMapBuilder
|
||||
|
||||
type addrMapBuilder struct {
|
||||
keys, values []tree.Item
|
||||
keys, values [][]byte
|
||||
size, level int
|
||||
|
||||
subtrees tree.SubtreeCounts
|
||||
@@ -201,44 +198,7 @@ func (nb *addrMapBuilder) reset() {
|
||||
}
|
||||
|
||||
func (nb *addrMapBuilder) Build(pool pool.BuffPool) (node tree.Node) {
|
||||
var (
|
||||
keyTups, keyOffs fb.UOffsetT
|
||||
refArr, cardArr fb.UOffsetT
|
||||
)
|
||||
|
||||
keySz, valSz, bufSz := estimateBufferSize(nb.keys, nb.values, nb.subtrees)
|
||||
b := getFlatbufferBuilder(pool, bufSz)
|
||||
|
||||
// serialize keys and offsets
|
||||
keyTups = writeItemBytes(b, nb.keys, keySz)
|
||||
serial.ProllyTreeNodeStartKeyOffsetsVector(b, len(nb.keys)-1)
|
||||
keyOffs = writeItemOffsets(b, nb.keys, keySz)
|
||||
|
||||
if nb.level > 0 {
|
||||
// serialize child refs and subtree counts for internal nodes
|
||||
refArr = writeItemBytes(b, nb.values, valSz)
|
||||
cardArr = writeCountArray(b, nb.subtrees)
|
||||
} else {
|
||||
// serialize value refs for leaf nodes
|
||||
refArr = writeItemBytes(b, nb.values, valSz)
|
||||
}
|
||||
|
||||
// populate the node's vtable
|
||||
serial.ProllyTreeNodeStart(b)
|
||||
serial.ProllyTreeNodeAddKeyItems(b, keyTups)
|
||||
serial.ProllyTreeNodeAddKeyOffsets(b, keyOffs)
|
||||
if nb.level > 0 {
|
||||
serial.ProllyTreeNodeAddAddressArray(b, refArr)
|
||||
serial.ProllyTreeNodeAddSubtreeCounts(b, cardArr)
|
||||
serial.ProllyTreeNodeAddTreeCount(b, nb.subtrees.Sum())
|
||||
} else {
|
||||
serial.ProllyTreeNodeAddAddressArray(b, refArr)
|
||||
serial.ProllyTreeNodeAddTreeCount(b, uint64(len(nb.keys)))
|
||||
}
|
||||
serial.ProllyTreeNodeAddTreeLevel(b, uint8(nb.level))
|
||||
b.FinishWithFileIdentifier(serial.ProllyTreeNodeEnd(b), []byte(serial.ProllyTreeNodeFileID))
|
||||
msg := message.SerializeAddressMap(pool, nb.keys, nb.values, nb.level, nb.subtrees)
|
||||
nb.reset()
|
||||
|
||||
buf := b.FinishedBytes()
|
||||
return tree.NodeFromBytes(buf)
|
||||
return tree.NodeFromBytes(msg)
|
||||
}
|
||||
|
||||
@@ -294,6 +294,7 @@ func (nb *mapBuilder) reset() {
|
||||
|
||||
func (nb *mapBuilder) Build(pool pool.BuffPool) (node tree.Node) {
|
||||
msg := message.SerializeProllyMap(pool, nb.keys, nb.values, nb.level, nb.subtrees)
|
||||
nb.reset()
|
||||
return tree.NodeFromBytes(msg)
|
||||
}
|
||||
|
||||
|
||||
147
go/store/prolly/message/address_map.go
Normal file
147
go/store/prolly/message/address_map.go
Normal file
@@ -0,0 +1,147 @@
|
||||
// Copyright 2021 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package message
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
|
||||
fb "github.com/google/flatbuffers/go"
|
||||
|
||||
"github.com/dolthub/dolt/go/gen/fb/serial"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/pool"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
const (
|
||||
// This constant is mirrored from serial.AddressMap.KeyOffsetsLength()
|
||||
// It is only as stable as the flatbuffers schema that defines it.
|
||||
addressMapKeyOffsetsVOffset = 6
|
||||
)
|
||||
|
||||
var addressMapFileID = []byte(serial.AddressMapFileID)
|
||||
|
||||
func SerializeAddressMap(pool pool.BuffPool, keys, addrs [][]byte, level int, subtrees []uint64) Message {
|
||||
var (
|
||||
keyArr, keyOffs fb.UOffsetT
|
||||
addrArr, cardArr fb.UOffsetT
|
||||
)
|
||||
|
||||
keySz, addrSz, totalSz := estimateAddressMapSize(keys, addrs, subtrees)
|
||||
b := getFlatbufferBuilder(pool, totalSz)
|
||||
|
||||
// keys
|
||||
keyArr = writeItemBytes(b, keys, keySz)
|
||||
serial.AddressMapStartKeyOffsetsVector(b, len(keys)-1)
|
||||
keyOffs = writeItemOffsets(b, keys, keySz)
|
||||
|
||||
// addresses
|
||||
addrArr = writeItemBytes(b, addrs, addrSz)
|
||||
|
||||
// subtree cardinalities
|
||||
if level > 0 {
|
||||
cardArr = writeCountArray(b, subtrees)
|
||||
}
|
||||
|
||||
serial.AddressMapStart(b)
|
||||
serial.AddressMapAddKeyItems(b, keyArr)
|
||||
serial.AddressMapAddKeyOffsets(b, keyOffs)
|
||||
serial.AddressMapAddAddressArray(b, addrArr)
|
||||
|
||||
if level > 0 {
|
||||
serial.AddressMapAddSubtreeCounts(b, cardArr)
|
||||
serial.AddressMapAddTreeCount(b, sumSubtrees(subtrees))
|
||||
} else {
|
||||
serial.AddressMapAddTreeCount(b, uint64(len(keys)))
|
||||
}
|
||||
serial.AddressMapAddTreeLevel(b, uint8(level))
|
||||
b.FinishWithFileIdentifier(serial.AddressMapEnd(b), addressMapFileID)
|
||||
return b.FinishedBytes()
|
||||
}
|
||||
|
||||
func getAddressMapKeys(msg Message) (keys val.SlicedBuffer) {
|
||||
am := serial.GetRootAsAddressMap(msg, 0)
|
||||
keys.Buf = am.KeyItemsBytes()
|
||||
keys.Offs = getAddressMapKeyOffsets(am)
|
||||
return
|
||||
}
|
||||
|
||||
func getAddressMapValues(msg Message) (values val.SlicedBuffer) {
|
||||
am := serial.GetRootAsAddressMap(msg, 0)
|
||||
values.Buf = am.AddressArrayBytes()
|
||||
values.Offs = offsetsForAddressArray(values.Buf)
|
||||
return
|
||||
}
|
||||
|
||||
func walkAddressMapAddresses(ctx context.Context, msg Message, cb func(ctx context.Context, addr hash.Hash) error) error {
|
||||
am := serial.GetRootAsAddressMap(msg, 0)
|
||||
arr := am.AddressArrayBytes()
|
||||
for i := 0; i < len(arr)/hash.ByteLen; i++ {
|
||||
addr := hash.New(arr[i*addrSize : (i+1)*addrSize])
|
||||
if err := cb(ctx, addr); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getAddressMapCount(msg Message) uint16 {
|
||||
am := serial.GetRootAsAddressMap(msg, 0)
|
||||
if am.KeyItemsLength() == 0 {
|
||||
return 0
|
||||
}
|
||||
// zeroth offset ommitted from array
|
||||
return uint16(am.KeyOffsetsLength() + 1)
|
||||
}
|
||||
|
||||
func getAddressMapTreeLevel(msg Message) int {
|
||||
am := serial.GetRootAsAddressMap(msg, 0)
|
||||
return int(am.TreeLevel())
|
||||
}
|
||||
|
||||
func getAddressMapTreeCount(msg Message) int {
|
||||
am := serial.GetRootAsAddressMap(msg, 0)
|
||||
return int(am.TreeCount())
|
||||
}
|
||||
|
||||
func getAddressMapSubtrees(msg Message) []uint64 {
|
||||
cnt := getAddressMapCount(msg)
|
||||
am := serial.GetRootAsAddressMap(msg, 0)
|
||||
return readSubtreeCounts(int(cnt), am.SubtreeCountsBytes())
|
||||
}
|
||||
|
||||
func getAddressMapKeyOffsets(pm *serial.AddressMap) []byte {
|
||||
sz := pm.KeyOffsetsLength() * 2
|
||||
tab := pm.Table()
|
||||
vec := tab.Offset(addressMapKeyOffsetsVOffset)
|
||||
start := int(tab.Vector(fb.UOffsetT(vec)))
|
||||
stop := start + sz
|
||||
return tab.Bytes[start:stop]
|
||||
}
|
||||
|
||||
func estimateAddressMapSize(keys, addresses [][]byte, subtrees []uint64) (keySz, addrSz, totalSz int) {
|
||||
assertTrue(len(keys) == len(addresses))
|
||||
for i := range keys {
|
||||
keySz += len(keys[i])
|
||||
addrSz += len(addresses[i])
|
||||
}
|
||||
totalSz += keySz + addrSz
|
||||
totalSz += len(keys) * uint16Size
|
||||
totalSz += len(subtrees) * binary.MaxVarintLen64
|
||||
totalSz += 8 + 1 + 1 + 1
|
||||
totalSz += 72
|
||||
return
|
||||
}
|
||||
@@ -23,8 +23,8 @@ import (
|
||||
|
||||
const (
|
||||
maxChunkSz = math.MaxUint16
|
||||
addrSz = hash.ByteLen
|
||||
offsetCount = maxChunkSz / addrSz
|
||||
addrSize = hash.ByteLen
|
||||
offsetCount = maxChunkSz / addrSize
|
||||
uint16Size = 2
|
||||
)
|
||||
|
||||
@@ -34,16 +34,16 @@ func init() {
|
||||
addressOffsets = make([]byte, offsetCount*uint16Size)
|
||||
|
||||
buf := addressOffsets
|
||||
off := uint16(addrSz)
|
||||
off := uint16(addrSize)
|
||||
for len(buf) > 0 {
|
||||
binary.LittleEndian.PutUint16(buf, off)
|
||||
buf = buf[uint16Size:]
|
||||
off += uint16(addrSz)
|
||||
off += uint16(addrSize)
|
||||
}
|
||||
}
|
||||
|
||||
func offsetsForAddressArray(arr []byte) (offs []byte) {
|
||||
cnt := len(arr) / addrSz
|
||||
cnt := len(arr) / addrSize
|
||||
offs = addressOffsets[:cnt*uint16Size]
|
||||
return
|
||||
}
|
||||
|
||||
@@ -30,6 +30,8 @@ func GetKeys(msg Message) val.SlicedBuffer {
|
||||
switch id {
|
||||
case serial.ProllyTreeNodeFileID:
|
||||
return getProllyMapKeys(msg)
|
||||
case serial.AddressMapFileID:
|
||||
return getAddressMapKeys(msg)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown message id %s", id))
|
||||
}
|
||||
@@ -40,6 +42,8 @@ func GetValues(msg Message) val.SlicedBuffer {
|
||||
switch id {
|
||||
case serial.ProllyTreeNodeFileID:
|
||||
return getProllyMapValues(msg)
|
||||
case serial.AddressMapFileID:
|
||||
return getAddressMapValues(msg)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown message id %s", id))
|
||||
}
|
||||
@@ -50,6 +54,8 @@ func WalkAddresses(ctx context.Context, msg Message, cb func(ctx context.Context
|
||||
switch id {
|
||||
case serial.ProllyTreeNodeFileID:
|
||||
return walkProllyMapAddresses(ctx, msg, cb)
|
||||
case serial.AddressMapFileID:
|
||||
return walkAddressMapAddresses(ctx, msg, cb)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown message id %s", id))
|
||||
}
|
||||
@@ -60,6 +66,8 @@ func GetCount(msg Message) uint16 {
|
||||
switch id {
|
||||
case serial.ProllyTreeNodeFileID:
|
||||
return getProllyMapCount(msg)
|
||||
case serial.AddressMapFileID:
|
||||
return getAddressMapCount(msg)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown message id %s", id))
|
||||
}
|
||||
@@ -70,6 +78,8 @@ func GetTreeLevel(msg Message) int {
|
||||
switch id {
|
||||
case serial.ProllyTreeNodeFileID:
|
||||
return getProllyMapTreeLevel(msg)
|
||||
case serial.AddressMapFileID:
|
||||
return getAddressMapTreeLevel(msg)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown message id %s", id))
|
||||
}
|
||||
@@ -80,6 +90,8 @@ func GetTreeCount(msg Message) int {
|
||||
switch id {
|
||||
case serial.ProllyTreeNodeFileID:
|
||||
return getProllyMapTreeCount(msg)
|
||||
case serial.AddressMapFileID:
|
||||
return getAddressMapTreeCount(msg)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown message id %s", id))
|
||||
}
|
||||
@@ -90,6 +102,8 @@ func GetSubtrees(msg Message) []uint64 {
|
||||
switch id {
|
||||
case serial.ProllyTreeNodeFileID:
|
||||
return getProllyMapSubtrees(msg)
|
||||
case serial.AddressMapFileID:
|
||||
return getAddressMapSubtrees(msg)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown message id %s", id))
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2021 Dolthub, Inc.
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@@ -16,6 +16,8 @@ package message
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
|
||||
fb "github.com/google/flatbuffers/go"
|
||||
|
||||
@@ -26,13 +28,15 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
// These constants are mirrored from serial.ProllyTreeNode.KeyOffsetsLength()
|
||||
// and serial.ProllyTreeNode.ValueOffsetsLength() respectively.
|
||||
// They are only as stable as the flatbuffers schemas that define them.
|
||||
keyOffsetsVOffset = 6
|
||||
valueOffsetsVOffset = 12
|
||||
// These constants are mirrored from serial.ProllyTreeNode.KeyOffsets()
|
||||
// and serial.ProllyTreeNode.ValueOffsets() respectively.
|
||||
// They are only as stable as the flatbuffers schema that define them.
|
||||
prollyMapKeyOffsetsVOffset = 6
|
||||
prollyMapValueOffsetsVOffset = 12
|
||||
)
|
||||
|
||||
var prollyMapFileID = []byte(serial.ProllyTreeNodeFileID)
|
||||
|
||||
func SerializeProllyMap(pool pool.BuffPool, keys, values [][]byte, level int, subtrees []uint64) Message {
|
||||
var (
|
||||
keyTups, keyOffs fb.UOffsetT
|
||||
@@ -40,7 +44,7 @@ func SerializeProllyMap(pool pool.BuffPool, keys, values [][]byte, level int, su
|
||||
refArr, cardArr fb.UOffsetT
|
||||
)
|
||||
|
||||
keySz, valSz, bufSz := estimateBufferSize(keys, values, subtrees)
|
||||
keySz, valSz, bufSz := estimateProllyMapSize(keys, values, subtrees)
|
||||
b := getFlatbufferBuilder(pool, bufSz)
|
||||
|
||||
// serialize keys and offsets
|
||||
@@ -75,22 +79,14 @@ func SerializeProllyMap(pool pool.BuffPool, keys, values [][]byte, level int, su
|
||||
serial.ProllyTreeNodeAddKeyType(b, serial.ItemTypeTupleFormatAlpha)
|
||||
serial.ProllyTreeNodeAddValueType(b, serial.ItemTypeTupleFormatAlpha)
|
||||
serial.ProllyTreeNodeAddTreeLevel(b, uint8(level))
|
||||
b.FinishWithFileIdentifier(serial.ProllyTreeNodeEnd(b), []byte(serial.ProllyTreeNodeFileID))
|
||||
|
||||
b.FinishWithFileIdentifier(serial.ProllyTreeNodeEnd(b), prollyMapFileID)
|
||||
return b.FinishedBytes()
|
||||
}
|
||||
|
||||
func sumSubtrees(subtrees []uint64) (sum uint64) {
|
||||
for i := range subtrees {
|
||||
sum += subtrees[i]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func getProllyMapKeys(msg Message) (keys val.SlicedBuffer) {
|
||||
pm := serial.GetRootAsProllyTreeNode(msg, 0)
|
||||
keys.Buf = pm.KeyItemsBytes()
|
||||
keys.Offs = getKeyOffsetsVector(pm)
|
||||
keys.Offs = getProllyMapKeyOffsets(pm)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -99,7 +95,7 @@ func getProllyMapValues(msg Message) (values val.SlicedBuffer) {
|
||||
items := pm.ValueItemsBytes()
|
||||
if items != nil {
|
||||
values.Buf = items
|
||||
values.Offs = getValueOffsetsVector(pm)
|
||||
values.Offs = getProllyMapValueOffsets(pm)
|
||||
} else {
|
||||
values.Buf = pm.AddressArrayBytes()
|
||||
values.Offs = offsetsForAddressArray(values.Buf)
|
||||
@@ -111,7 +107,7 @@ func walkProllyMapAddresses(ctx context.Context, msg Message, cb func(ctx contex
|
||||
pm := serial.GetRootAsProllyTreeNode(msg, 0)
|
||||
arr := pm.AddressArrayBytes()
|
||||
for i := 0; i < len(arr)/hash.ByteLen; i++ {
|
||||
addr := hash.New(arr[i*addrSz : (i+1)*addrSz])
|
||||
addr := hash.New(arr[i*addrSize : (i+1)*addrSize])
|
||||
if err := cb(ctx, addr); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -121,7 +117,7 @@ func walkProllyMapAddresses(ctx context.Context, msg Message, cb func(ctx contex
|
||||
arr2 := pm.ValueItemsBytes()
|
||||
for i := 0; i < cnt; i++ {
|
||||
o := pm.ValueAddressOffsets(i)
|
||||
addr := hash.New(arr[o : o+addrSz])
|
||||
addr := hash.New(arr[o : o+addrSize])
|
||||
if err := cb(ctx, addr); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -155,22 +151,48 @@ func getProllyMapSubtrees(msg Message) []uint64 {
|
||||
return readSubtreeCounts(int(cnt), pm.SubtreeCountsBytes())
|
||||
}
|
||||
|
||||
func getKeyOffsetsVector(pm *serial.ProllyTreeNode) []byte {
|
||||
func getProllyMapKeyOffsets(pm *serial.ProllyTreeNode) []byte {
|
||||
sz := pm.KeyOffsetsLength() * 2
|
||||
tab := pm.Table()
|
||||
vec := tab.Offset(keyOffsetsVOffset)
|
||||
vec := tab.Offset(prollyMapKeyOffsetsVOffset)
|
||||
start := int(tab.Vector(fb.UOffsetT(vec)))
|
||||
stop := start + sz
|
||||
|
||||
return tab.Bytes[start:stop]
|
||||
}
|
||||
|
||||
func getValueOffsetsVector(pm *serial.ProllyTreeNode) []byte {
|
||||
func getProllyMapValueOffsets(pm *serial.ProllyTreeNode) []byte {
|
||||
sz := pm.ValueOffsetsLength() * 2
|
||||
tab := pm.Table()
|
||||
vec := tab.Offset(valueOffsetsVOffset)
|
||||
vec := tab.Offset(prollyMapValueOffsetsVOffset)
|
||||
start := int(tab.Vector(fb.UOffsetT(vec)))
|
||||
stop := start + sz
|
||||
|
||||
return tab.Bytes[start:stop]
|
||||
}
|
||||
|
||||
// estimateProllyMapSize returns the exact Size of the tuple vectors for keys and values,
|
||||
// and an estimate of the overall Size of the final flatbuffer.
|
||||
func estimateProllyMapSize(keys, values [][]byte, subtrees []uint64) (keySz, valSz, bufSz int) {
|
||||
for i := range keys {
|
||||
keySz += len(keys[i])
|
||||
valSz += len(values[i])
|
||||
}
|
||||
refCntSz := len(subtrees) * binary.MaxVarintLen64
|
||||
|
||||
// constraints enforced upstream
|
||||
if keySz > int(MaxVectorOffset) {
|
||||
panic(fmt.Sprintf("key vector exceeds Size limit ( %d > %d )", keySz, MaxVectorOffset))
|
||||
}
|
||||
if valSz > int(MaxVectorOffset) {
|
||||
panic(fmt.Sprintf("value vector exceeds Size limit ( %d > %d )", valSz, MaxVectorOffset))
|
||||
}
|
||||
|
||||
bufSz += keySz + valSz // tuples
|
||||
bufSz += refCntSz // subtree counts
|
||||
bufSz += len(keys)*2 + len(values)*2 // offsets
|
||||
bufSz += 8 + 1 + 1 + 1 // metadata
|
||||
bufSz += 72 // vtable (approx)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -31,10 +31,10 @@ func TestGetKeyValueOffsetsVectors(t *testing.T) {
|
||||
require.True(t, sumSize(keys)+sumSize(values) < MaxVectorOffset)
|
||||
msg := SerializeProllyMap(sharedPool, keys, values, 0, nil)
|
||||
|
||||
// uses getKeyOffsetsVector with hard-coded vtable slot
|
||||
// uses getProllyMapKeyOffsetsVector with hard-coded vtable slot
|
||||
keyBuf := getProllyMapKeys(msg)
|
||||
|
||||
// uses getValueOffsetsVector with hard-coded vtable slot
|
||||
// uses getProllyMapValueOffsetsVector with hard-coded vtable slot
|
||||
valBuf := getProllyMapValues(msg)
|
||||
|
||||
for i := range keys {
|
||||
|
||||
@@ -16,7 +16,6 @@ package message
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
fb "github.com/google/flatbuffers/go"
|
||||
@@ -87,28 +86,9 @@ func WriteSubtreeCounts(sc []uint64) []byte {
|
||||
return buf[:pos]
|
||||
}
|
||||
|
||||
// estimateBufferSize returns the exact Size of the tuple vectors for keys and values,
|
||||
// and an estimate of the overall Size of the final flatbuffer.
|
||||
func estimateBufferSize(keys, values [][]byte, subtrees []uint64) (keySz, valSz, bufSz int) {
|
||||
for i := range keys {
|
||||
keySz += len(keys[i])
|
||||
valSz += len(values[i])
|
||||
func sumSubtrees(subtrees []uint64) (sum uint64) {
|
||||
for i := range subtrees {
|
||||
sum += subtrees[i]
|
||||
}
|
||||
refCntSz := len(subtrees) * binary.MaxVarintLen64
|
||||
|
||||
// constraints enforced upstream
|
||||
if keySz > int(MaxVectorOffset) {
|
||||
panic(fmt.Sprintf("key vector exceeds Size limit ( %d > %d )", keySz, MaxVectorOffset))
|
||||
}
|
||||
if valSz > int(MaxVectorOffset) {
|
||||
panic(fmt.Sprintf("value vector exceeds Size limit ( %d > %d )", valSz, MaxVectorOffset))
|
||||
}
|
||||
|
||||
bufSz += keySz + valSz // tuples
|
||||
bufSz += refCntSz // subtree counts
|
||||
bufSz += len(keys)*2 + len(values)*2 // offsets
|
||||
bufSz += 8 + 1 + 1 + 1 // metadata
|
||||
bufSz += 72 // vtable (approx)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user