added address map serialization

This commit is contained in:
Andy Arthur
2022-05-03 09:39:29 -07:00
parent 68c796ff49
commit 0087c58d8c
13 changed files with 502 additions and 97 deletions

View File

@@ -0,0 +1,237 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by the FlatBuffers compiler. DO NOT EDIT.
package serial
import (
flatbuffers "github.com/google/flatbuffers/go"
)
type AddressMap struct {
_tab flatbuffers.Table
}
func GetRootAsAddressMap(buf []byte, offset flatbuffers.UOffsetT) *AddressMap {
n := flatbuffers.GetUOffsetT(buf[offset:])
x := &AddressMap{}
x.Init(buf, n+offset)
return x
}
func GetSizePrefixedRootAsAddressMap(buf []byte, offset flatbuffers.UOffsetT) *AddressMap {
n := flatbuffers.GetUOffsetT(buf[offset+flatbuffers.SizeUint32:])
x := &AddressMap{}
x.Init(buf, n+offset+flatbuffers.SizeUint32)
return x
}
func (rcv *AddressMap) Init(buf []byte, i flatbuffers.UOffsetT) {
rcv._tab.Bytes = buf
rcv._tab.Pos = i
}
func (rcv *AddressMap) Table() flatbuffers.Table {
return rcv._tab
}
func (rcv *AddressMap) KeyItems(j int) byte {
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
if o != 0 {
a := rcv._tab.Vector(o)
return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1))
}
return 0
}
func (rcv *AddressMap) KeyItemsLength() int {
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
if o != 0 {
return rcv._tab.VectorLen(o)
}
return 0
}
func (rcv *AddressMap) KeyItemsBytes() []byte {
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
if o != 0 {
return rcv._tab.ByteVector(o + rcv._tab.Pos)
}
return nil
}
func (rcv *AddressMap) MutateKeyItems(j int, n byte) bool {
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
if o != 0 {
a := rcv._tab.Vector(o)
return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n)
}
return false
}
func (rcv *AddressMap) KeyOffsets(j int) uint16 {
o := flatbuffers.UOffsetT(rcv._tab.Offset(6))
if o != 0 {
a := rcv._tab.Vector(o)
return rcv._tab.GetUint16(a + flatbuffers.UOffsetT(j*2))
}
return 0
}
func (rcv *AddressMap) KeyOffsetsLength() int {
o := flatbuffers.UOffsetT(rcv._tab.Offset(6))
if o != 0 {
return rcv._tab.VectorLen(o)
}
return 0
}
func (rcv *AddressMap) MutateKeyOffsets(j int, n uint16) bool {
o := flatbuffers.UOffsetT(rcv._tab.Offset(6))
if o != 0 {
a := rcv._tab.Vector(o)
return rcv._tab.MutateUint16(a+flatbuffers.UOffsetT(j*2), n)
}
return false
}
func (rcv *AddressMap) AddressArray(j int) byte {
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
if o != 0 {
a := rcv._tab.Vector(o)
return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1))
}
return 0
}
func (rcv *AddressMap) AddressArrayLength() int {
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
if o != 0 {
return rcv._tab.VectorLen(o)
}
return 0
}
func (rcv *AddressMap) AddressArrayBytes() []byte {
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
if o != 0 {
return rcv._tab.ByteVector(o + rcv._tab.Pos)
}
return nil
}
func (rcv *AddressMap) MutateAddressArray(j int, n byte) bool {
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
if o != 0 {
a := rcv._tab.Vector(o)
return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n)
}
return false
}
func (rcv *AddressMap) SubtreeCounts(j int) byte {
o := flatbuffers.UOffsetT(rcv._tab.Offset(10))
if o != 0 {
a := rcv._tab.Vector(o)
return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1))
}
return 0
}
func (rcv *AddressMap) SubtreeCountsLength() int {
o := flatbuffers.UOffsetT(rcv._tab.Offset(10))
if o != 0 {
return rcv._tab.VectorLen(o)
}
return 0
}
func (rcv *AddressMap) SubtreeCountsBytes() []byte {
o := flatbuffers.UOffsetT(rcv._tab.Offset(10))
if o != 0 {
return rcv._tab.ByteVector(o + rcv._tab.Pos)
}
return nil
}
func (rcv *AddressMap) MutateSubtreeCounts(j int, n byte) bool {
o := flatbuffers.UOffsetT(rcv._tab.Offset(10))
if o != 0 {
a := rcv._tab.Vector(o)
return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n)
}
return false
}
func (rcv *AddressMap) TreeCount() uint64 {
o := flatbuffers.UOffsetT(rcv._tab.Offset(12))
if o != 0 {
return rcv._tab.GetUint64(o + rcv._tab.Pos)
}
return 0
}
func (rcv *AddressMap) MutateTreeCount(n uint64) bool {
return rcv._tab.MutateUint64Slot(12, n)
}
func (rcv *AddressMap) TreeLevel() byte {
o := flatbuffers.UOffsetT(rcv._tab.Offset(14))
if o != 0 {
return rcv._tab.GetByte(o + rcv._tab.Pos)
}
return 0
}
func (rcv *AddressMap) MutateTreeLevel(n byte) bool {
return rcv._tab.MutateByteSlot(14, n)
}
func AddressMapStart(builder *flatbuffers.Builder) {
builder.StartObject(6)
}
func AddressMapAddKeyItems(builder *flatbuffers.Builder, keyItems flatbuffers.UOffsetT) {
builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(keyItems), 0)
}
func AddressMapStartKeyItemsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
return builder.StartVector(1, numElems, 1)
}
func AddressMapAddKeyOffsets(builder *flatbuffers.Builder, keyOffsets flatbuffers.UOffsetT) {
builder.PrependUOffsetTSlot(1, flatbuffers.UOffsetT(keyOffsets), 0)
}
func AddressMapStartKeyOffsetsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
return builder.StartVector(2, numElems, 2)
}
func AddressMapAddAddressArray(builder *flatbuffers.Builder, addressArray flatbuffers.UOffsetT) {
builder.PrependUOffsetTSlot(2, flatbuffers.UOffsetT(addressArray), 0)
}
func AddressMapStartAddressArrayVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
return builder.StartVector(1, numElems, 1)
}
func AddressMapAddSubtreeCounts(builder *flatbuffers.Builder, subtreeCounts flatbuffers.UOffsetT) {
builder.PrependUOffsetTSlot(3, flatbuffers.UOffsetT(subtreeCounts), 0)
}
func AddressMapStartSubtreeCountsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
return builder.StartVector(1, numElems, 1)
}
func AddressMapAddTreeCount(builder *flatbuffers.Builder, treeCount uint64) {
builder.PrependUint64Slot(4, treeCount, 0)
}
func AddressMapAddTreeLevel(builder *flatbuffers.Builder, treeLevel byte) {
builder.PrependByteSlot(5, treeLevel, 0)
}
func AddressMapEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT {
return builder.EndObject()
}

View File

@@ -23,6 +23,7 @@ const CommitFileID = "DCMT"
const RootValueFileID = "RTVL"
const TableFileID = "DTBL"
const ProllyTreeNodeFileID = "PRLY"
const AddressMapFileID = "ADRM"
func GetFileID(bs []byte) string {
if len(bs) < 8 {

41
go/serial/addressmap.fbs Normal file
View File

@@ -0,0 +1,41 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
namespace serial;
table AddressMap {
// sorted array of key items
key_items:[ubyte] (required);
// items offets for |key_items|, zeroth offset omitted
key_offsets:[uint16] (required);
// array of chunk addresses
// - subtree addresses for internal prolly tree nodes
// - value addresses for AddressMap leaf nodes
address_array:[ubyte] (required);
// array of uvarint encoded subtree counts
subtree_counts:[ubyte];
// total count of prolly tree
tree_count:uint64;
// prolly tree level, 0 for leaf nodes
tree_level:uint8;
}
// KEEP THIS IN SYNC WITH fileidentifiers.go
file_identifier "ADRM";
root_type AddressMap;

View File

@@ -23,6 +23,7 @@ const CommitFileID = "DCMT"
const RootValueFileID = "RTVL"
const TableFileID = "DTBL"
const ProllyTreeNodeFileID = "PRLY"
const AddressMapFileID = "ADRM"
func GetFileID(bs []byte) string {
if len(bs) < 8 {

View File

@@ -14,6 +14,7 @@ fi
flatc -o $GEN_DIR --gen-onefile --filename-suffix "" --gen-mutable --go-namespace "serial" --go \
commit.fbs \
prolly.fbs \
addressmap.fbs \
rootvalue.fbs \
schema.fbs \
storeroot.fbs \

View File

@@ -19,9 +19,6 @@ import (
"context"
"io"
fb "github.com/google/flatbuffers/go"
"github.com/dolthub/dolt/go/gen/fb/serial"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/pool"
"github.com/dolthub/dolt/go/store/prolly/message"
@@ -164,7 +161,7 @@ func newAddrMapBuilder(level int) *addrMapBuilder {
var _ tree.NodeBuilderFactory[*addrMapBuilder] = newAddrMapBuilder
type addrMapBuilder struct {
keys, values []tree.Item
keys, values [][]byte
size, level int
subtrees tree.SubtreeCounts
@@ -201,44 +198,7 @@ func (nb *addrMapBuilder) reset() {
}
func (nb *addrMapBuilder) Build(pool pool.BuffPool) (node tree.Node) {
var (
keyTups, keyOffs fb.UOffsetT
refArr, cardArr fb.UOffsetT
)
keySz, valSz, bufSz := estimateBufferSize(nb.keys, nb.values, nb.subtrees)
b := getFlatbufferBuilder(pool, bufSz)
// serialize keys and offsets
keyTups = writeItemBytes(b, nb.keys, keySz)
serial.ProllyTreeNodeStartKeyOffsetsVector(b, len(nb.keys)-1)
keyOffs = writeItemOffsets(b, nb.keys, keySz)
if nb.level > 0 {
// serialize child refs and subtree counts for internal nodes
refArr = writeItemBytes(b, nb.values, valSz)
cardArr = writeCountArray(b, nb.subtrees)
} else {
// serialize value refs for leaf nodes
refArr = writeItemBytes(b, nb.values, valSz)
}
// populate the node's vtable
serial.ProllyTreeNodeStart(b)
serial.ProllyTreeNodeAddKeyItems(b, keyTups)
serial.ProllyTreeNodeAddKeyOffsets(b, keyOffs)
if nb.level > 0 {
serial.ProllyTreeNodeAddAddressArray(b, refArr)
serial.ProllyTreeNodeAddSubtreeCounts(b, cardArr)
serial.ProllyTreeNodeAddTreeCount(b, nb.subtrees.Sum())
} else {
serial.ProllyTreeNodeAddAddressArray(b, refArr)
serial.ProllyTreeNodeAddTreeCount(b, uint64(len(nb.keys)))
}
serial.ProllyTreeNodeAddTreeLevel(b, uint8(nb.level))
b.FinishWithFileIdentifier(serial.ProllyTreeNodeEnd(b), []byte(serial.ProllyTreeNodeFileID))
msg := message.SerializeAddressMap(pool, nb.keys, nb.values, nb.level, nb.subtrees)
nb.reset()
buf := b.FinishedBytes()
return tree.NodeFromBytes(buf)
return tree.NodeFromBytes(msg)
}

View File

@@ -294,6 +294,7 @@ func (nb *mapBuilder) reset() {
func (nb *mapBuilder) Build(pool pool.BuffPool) (node tree.Node) {
msg := message.SerializeProllyMap(pool, nb.keys, nb.values, nb.level, nb.subtrees)
nb.reset()
return tree.NodeFromBytes(msg)
}

View File

@@ -0,0 +1,147 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package message
import (
"context"
"encoding/binary"
fb "github.com/google/flatbuffers/go"
"github.com/dolthub/dolt/go/gen/fb/serial"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/pool"
"github.com/dolthub/dolt/go/store/val"
)
const (
// This constant is mirrored from serial.AddressMap.KeyOffsetsLength()
// It is only as stable as the flatbuffers schema that defines it.
addressMapKeyOffsetsVOffset = 6
)
var addressMapFileID = []byte(serial.AddressMapFileID)
func SerializeAddressMap(pool pool.BuffPool, keys, addrs [][]byte, level int, subtrees []uint64) Message {
var (
keyArr, keyOffs fb.UOffsetT
addrArr, cardArr fb.UOffsetT
)
keySz, addrSz, totalSz := estimateAddressMapSize(keys, addrs, subtrees)
b := getFlatbufferBuilder(pool, totalSz)
// keys
keyArr = writeItemBytes(b, keys, keySz)
serial.AddressMapStartKeyOffsetsVector(b, len(keys)-1)
keyOffs = writeItemOffsets(b, keys, keySz)
// addresses
addrArr = writeItemBytes(b, addrs, addrSz)
// subtree cardinalities
if level > 0 {
cardArr = writeCountArray(b, subtrees)
}
serial.AddressMapStart(b)
serial.AddressMapAddKeyItems(b, keyArr)
serial.AddressMapAddKeyOffsets(b, keyOffs)
serial.AddressMapAddAddressArray(b, addrArr)
if level > 0 {
serial.AddressMapAddSubtreeCounts(b, cardArr)
serial.AddressMapAddTreeCount(b, sumSubtrees(subtrees))
} else {
serial.AddressMapAddTreeCount(b, uint64(len(keys)))
}
serial.AddressMapAddTreeLevel(b, uint8(level))
b.FinishWithFileIdentifier(serial.AddressMapEnd(b), addressMapFileID)
return b.FinishedBytes()
}
func getAddressMapKeys(msg Message) (keys val.SlicedBuffer) {
am := serial.GetRootAsAddressMap(msg, 0)
keys.Buf = am.KeyItemsBytes()
keys.Offs = getAddressMapKeyOffsets(am)
return
}
func getAddressMapValues(msg Message) (values val.SlicedBuffer) {
am := serial.GetRootAsAddressMap(msg, 0)
values.Buf = am.AddressArrayBytes()
values.Offs = offsetsForAddressArray(values.Buf)
return
}
func walkAddressMapAddresses(ctx context.Context, msg Message, cb func(ctx context.Context, addr hash.Hash) error) error {
am := serial.GetRootAsAddressMap(msg, 0)
arr := am.AddressArrayBytes()
for i := 0; i < len(arr)/hash.ByteLen; i++ {
addr := hash.New(arr[i*addrSize : (i+1)*addrSize])
if err := cb(ctx, addr); err != nil {
return err
}
}
return nil
}
func getAddressMapCount(msg Message) uint16 {
am := serial.GetRootAsAddressMap(msg, 0)
if am.KeyItemsLength() == 0 {
return 0
}
// zeroth offset ommitted from array
return uint16(am.KeyOffsetsLength() + 1)
}
func getAddressMapTreeLevel(msg Message) int {
am := serial.GetRootAsAddressMap(msg, 0)
return int(am.TreeLevel())
}
func getAddressMapTreeCount(msg Message) int {
am := serial.GetRootAsAddressMap(msg, 0)
return int(am.TreeCount())
}
func getAddressMapSubtrees(msg Message) []uint64 {
cnt := getAddressMapCount(msg)
am := serial.GetRootAsAddressMap(msg, 0)
return readSubtreeCounts(int(cnt), am.SubtreeCountsBytes())
}
func getAddressMapKeyOffsets(pm *serial.AddressMap) []byte {
sz := pm.KeyOffsetsLength() * 2
tab := pm.Table()
vec := tab.Offset(addressMapKeyOffsetsVOffset)
start := int(tab.Vector(fb.UOffsetT(vec)))
stop := start + sz
return tab.Bytes[start:stop]
}
func estimateAddressMapSize(keys, addresses [][]byte, subtrees []uint64) (keySz, addrSz, totalSz int) {
assertTrue(len(keys) == len(addresses))
for i := range keys {
keySz += len(keys[i])
addrSz += len(addresses[i])
}
totalSz += keySz + addrSz
totalSz += len(keys) * uint16Size
totalSz += len(subtrees) * binary.MaxVarintLen64
totalSz += 8 + 1 + 1 + 1
totalSz += 72
return
}

View File

@@ -23,8 +23,8 @@ import (
const (
maxChunkSz = math.MaxUint16
addrSz = hash.ByteLen
offsetCount = maxChunkSz / addrSz
addrSize = hash.ByteLen
offsetCount = maxChunkSz / addrSize
uint16Size = 2
)
@@ -34,16 +34,16 @@ func init() {
addressOffsets = make([]byte, offsetCount*uint16Size)
buf := addressOffsets
off := uint16(addrSz)
off := uint16(addrSize)
for len(buf) > 0 {
binary.LittleEndian.PutUint16(buf, off)
buf = buf[uint16Size:]
off += uint16(addrSz)
off += uint16(addrSize)
}
}
func offsetsForAddressArray(arr []byte) (offs []byte) {
cnt := len(arr) / addrSz
cnt := len(arr) / addrSize
offs = addressOffsets[:cnt*uint16Size]
return
}

View File

@@ -30,6 +30,8 @@ func GetKeys(msg Message) val.SlicedBuffer {
switch id {
case serial.ProllyTreeNodeFileID:
return getProllyMapKeys(msg)
case serial.AddressMapFileID:
return getAddressMapKeys(msg)
default:
panic(fmt.Sprintf("unknown message id %s", id))
}
@@ -40,6 +42,8 @@ func GetValues(msg Message) val.SlicedBuffer {
switch id {
case serial.ProllyTreeNodeFileID:
return getProllyMapValues(msg)
case serial.AddressMapFileID:
return getAddressMapValues(msg)
default:
panic(fmt.Sprintf("unknown message id %s", id))
}
@@ -50,6 +54,8 @@ func WalkAddresses(ctx context.Context, msg Message, cb func(ctx context.Context
switch id {
case serial.ProllyTreeNodeFileID:
return walkProllyMapAddresses(ctx, msg, cb)
case serial.AddressMapFileID:
return walkAddressMapAddresses(ctx, msg, cb)
default:
panic(fmt.Sprintf("unknown message id %s", id))
}
@@ -60,6 +66,8 @@ func GetCount(msg Message) uint16 {
switch id {
case serial.ProllyTreeNodeFileID:
return getProllyMapCount(msg)
case serial.AddressMapFileID:
return getAddressMapCount(msg)
default:
panic(fmt.Sprintf("unknown message id %s", id))
}
@@ -70,6 +78,8 @@ func GetTreeLevel(msg Message) int {
switch id {
case serial.ProllyTreeNodeFileID:
return getProllyMapTreeLevel(msg)
case serial.AddressMapFileID:
return getAddressMapTreeLevel(msg)
default:
panic(fmt.Sprintf("unknown message id %s", id))
}
@@ -80,6 +90,8 @@ func GetTreeCount(msg Message) int {
switch id {
case serial.ProllyTreeNodeFileID:
return getProllyMapTreeCount(msg)
case serial.AddressMapFileID:
return getAddressMapTreeCount(msg)
default:
panic(fmt.Sprintf("unknown message id %s", id))
}
@@ -90,6 +102,8 @@ func GetSubtrees(msg Message) []uint64 {
switch id {
case serial.ProllyTreeNodeFileID:
return getProllyMapSubtrees(msg)
case serial.AddressMapFileID:
return getAddressMapSubtrees(msg)
default:
panic(fmt.Sprintf("unknown message id %s", id))
}

View File

@@ -1,4 +1,4 @@
// Copyright 2021 Dolthub, Inc.
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -16,6 +16,8 @@ package message
import (
"context"
"encoding/binary"
"fmt"
fb "github.com/google/flatbuffers/go"
@@ -26,13 +28,15 @@ import (
)
const (
// These constants are mirrored from serial.ProllyTreeNode.KeyOffsetsLength()
// and serial.ProllyTreeNode.ValueOffsetsLength() respectively.
// They are only as stable as the flatbuffers schemas that define them.
keyOffsetsVOffset = 6
valueOffsetsVOffset = 12
// These constants are mirrored from serial.ProllyTreeNode.KeyOffsets()
// and serial.ProllyTreeNode.ValueOffsets() respectively.
// They are only as stable as the flatbuffers schema that define them.
prollyMapKeyOffsetsVOffset = 6
prollyMapValueOffsetsVOffset = 12
)
var prollyMapFileID = []byte(serial.ProllyTreeNodeFileID)
func SerializeProllyMap(pool pool.BuffPool, keys, values [][]byte, level int, subtrees []uint64) Message {
var (
keyTups, keyOffs fb.UOffsetT
@@ -40,7 +44,7 @@ func SerializeProllyMap(pool pool.BuffPool, keys, values [][]byte, level int, su
refArr, cardArr fb.UOffsetT
)
keySz, valSz, bufSz := estimateBufferSize(keys, values, subtrees)
keySz, valSz, bufSz := estimateProllyMapSize(keys, values, subtrees)
b := getFlatbufferBuilder(pool, bufSz)
// serialize keys and offsets
@@ -75,22 +79,14 @@ func SerializeProllyMap(pool pool.BuffPool, keys, values [][]byte, level int, su
serial.ProllyTreeNodeAddKeyType(b, serial.ItemTypeTupleFormatAlpha)
serial.ProllyTreeNodeAddValueType(b, serial.ItemTypeTupleFormatAlpha)
serial.ProllyTreeNodeAddTreeLevel(b, uint8(level))
b.FinishWithFileIdentifier(serial.ProllyTreeNodeEnd(b), []byte(serial.ProllyTreeNodeFileID))
b.FinishWithFileIdentifier(serial.ProllyTreeNodeEnd(b), prollyMapFileID)
return b.FinishedBytes()
}
func sumSubtrees(subtrees []uint64) (sum uint64) {
for i := range subtrees {
sum += subtrees[i]
}
return
}
func getProllyMapKeys(msg Message) (keys val.SlicedBuffer) {
pm := serial.GetRootAsProllyTreeNode(msg, 0)
keys.Buf = pm.KeyItemsBytes()
keys.Offs = getKeyOffsetsVector(pm)
keys.Offs = getProllyMapKeyOffsets(pm)
return
}
@@ -99,7 +95,7 @@ func getProllyMapValues(msg Message) (values val.SlicedBuffer) {
items := pm.ValueItemsBytes()
if items != nil {
values.Buf = items
values.Offs = getValueOffsetsVector(pm)
values.Offs = getProllyMapValueOffsets(pm)
} else {
values.Buf = pm.AddressArrayBytes()
values.Offs = offsetsForAddressArray(values.Buf)
@@ -111,7 +107,7 @@ func walkProllyMapAddresses(ctx context.Context, msg Message, cb func(ctx contex
pm := serial.GetRootAsProllyTreeNode(msg, 0)
arr := pm.AddressArrayBytes()
for i := 0; i < len(arr)/hash.ByteLen; i++ {
addr := hash.New(arr[i*addrSz : (i+1)*addrSz])
addr := hash.New(arr[i*addrSize : (i+1)*addrSize])
if err := cb(ctx, addr); err != nil {
return err
}
@@ -121,7 +117,7 @@ func walkProllyMapAddresses(ctx context.Context, msg Message, cb func(ctx contex
arr2 := pm.ValueItemsBytes()
for i := 0; i < cnt; i++ {
o := pm.ValueAddressOffsets(i)
addr := hash.New(arr[o : o+addrSz])
addr := hash.New(arr[o : o+addrSize])
if err := cb(ctx, addr); err != nil {
return err
}
@@ -155,22 +151,48 @@ func getProllyMapSubtrees(msg Message) []uint64 {
return readSubtreeCounts(int(cnt), pm.SubtreeCountsBytes())
}
func getKeyOffsetsVector(pm *serial.ProllyTreeNode) []byte {
func getProllyMapKeyOffsets(pm *serial.ProllyTreeNode) []byte {
sz := pm.KeyOffsetsLength() * 2
tab := pm.Table()
vec := tab.Offset(keyOffsetsVOffset)
vec := tab.Offset(prollyMapKeyOffsetsVOffset)
start := int(tab.Vector(fb.UOffsetT(vec)))
stop := start + sz
return tab.Bytes[start:stop]
}
func getValueOffsetsVector(pm *serial.ProllyTreeNode) []byte {
func getProllyMapValueOffsets(pm *serial.ProllyTreeNode) []byte {
sz := pm.ValueOffsetsLength() * 2
tab := pm.Table()
vec := tab.Offset(valueOffsetsVOffset)
vec := tab.Offset(prollyMapValueOffsetsVOffset)
start := int(tab.Vector(fb.UOffsetT(vec)))
stop := start + sz
return tab.Bytes[start:stop]
}
// estimateProllyMapSize returns the exact Size of the tuple vectors for keys and values,
// and an estimate of the overall Size of the final flatbuffer.
func estimateProllyMapSize(keys, values [][]byte, subtrees []uint64) (keySz, valSz, bufSz int) {
for i := range keys {
keySz += len(keys[i])
valSz += len(values[i])
}
refCntSz := len(subtrees) * binary.MaxVarintLen64
// constraints enforced upstream
if keySz > int(MaxVectorOffset) {
panic(fmt.Sprintf("key vector exceeds Size limit ( %d > %d )", keySz, MaxVectorOffset))
}
if valSz > int(MaxVectorOffset) {
panic(fmt.Sprintf("value vector exceeds Size limit ( %d > %d )", valSz, MaxVectorOffset))
}
bufSz += keySz + valSz // tuples
bufSz += refCntSz // subtree counts
bufSz += len(keys)*2 + len(values)*2 // offsets
bufSz += 8 + 1 + 1 + 1 // metadata
bufSz += 72 // vtable (approx)
return
}

View File

@@ -31,10 +31,10 @@ func TestGetKeyValueOffsetsVectors(t *testing.T) {
require.True(t, sumSize(keys)+sumSize(values) < MaxVectorOffset)
msg := SerializeProllyMap(sharedPool, keys, values, 0, nil)
// uses getKeyOffsetsVector with hard-coded vtable slot
// uses getProllyMapKeyOffsetsVector with hard-coded vtable slot
keyBuf := getProllyMapKeys(msg)
// uses getValueOffsetsVector with hard-coded vtable slot
// uses getProllyMapValueOffsetsVector with hard-coded vtable slot
valBuf := getProllyMapValues(msg)
for i := range keys {

View File

@@ -16,7 +16,6 @@ package message
import (
"encoding/binary"
"fmt"
"math"
fb "github.com/google/flatbuffers/go"
@@ -87,28 +86,9 @@ func WriteSubtreeCounts(sc []uint64) []byte {
return buf[:pos]
}
// estimateBufferSize returns the exact Size of the tuple vectors for keys and values,
// and an estimate of the overall Size of the final flatbuffer.
func estimateBufferSize(keys, values [][]byte, subtrees []uint64) (keySz, valSz, bufSz int) {
for i := range keys {
keySz += len(keys[i])
valSz += len(values[i])
func sumSubtrees(subtrees []uint64) (sum uint64) {
for i := range subtrees {
sum += subtrees[i]
}
refCntSz := len(subtrees) * binary.MaxVarintLen64
// constraints enforced upstream
if keySz > int(MaxVectorOffset) {
panic(fmt.Sprintf("key vector exceeds Size limit ( %d > %d )", keySz, MaxVectorOffset))
}
if valSz > int(MaxVectorOffset) {
panic(fmt.Sprintf("value vector exceeds Size limit ( %d > %d )", valSz, MaxVectorOffset))
}
bufSz += keySz + valSz // tuples
bufSz += refCntSz // subtree counts
bufSz += len(keys)*2 + len(values)*2 // offsets
bufSz += 8 + 1 + 1 + 1 // metadata
bufSz += 72 // vtable (approx)
return
}