mirror of
https://github.com/dolthub/dolt.git
synced 2026-05-05 02:45:34 -05:00
Add Vector Index message type.
This commit is contained in:
@@ -42,6 +42,7 @@ const StashFileID = "STSH"
|
||||
const StatisticFileID = "STAT"
|
||||
const DoltgresRootValueFileID = "DGRV"
|
||||
const TupleFileID = "TUPL"
|
||||
const VectorIndexNodeFileID = "IVFF"
|
||||
|
||||
const MessageTypesKind int = 27
|
||||
|
||||
|
||||
@@ -0,0 +1,331 @@
|
||||
// Copyright 2022-2023 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by the FlatBuffers compiler. DO NOT EDIT.
|
||||
|
||||
package serial
|
||||
|
||||
import (
|
||||
flatbuffers "github.com/dolthub/flatbuffers/v23/go"
|
||||
)
|
||||
|
||||
type VectorIndexNode struct {
|
||||
_tab flatbuffers.Table
|
||||
}
|
||||
|
||||
func InitVectorIndexNodeRoot(o *VectorIndexNode, buf []byte, offset flatbuffers.UOffsetT) error {
|
||||
n := flatbuffers.GetUOffsetT(buf[offset:])
|
||||
return o.Init(buf, n+offset)
|
||||
}
|
||||
|
||||
func TryGetRootAsVectorIndexNode(buf []byte, offset flatbuffers.UOffsetT) (*VectorIndexNode, error) {
|
||||
x := &VectorIndexNode{}
|
||||
return x, InitVectorIndexNodeRoot(x, buf, offset)
|
||||
}
|
||||
|
||||
func TryGetSizePrefixedRootAsVectorIndexNode(buf []byte, offset flatbuffers.UOffsetT) (*VectorIndexNode, error) {
|
||||
x := &VectorIndexNode{}
|
||||
return x, InitVectorIndexNodeRoot(x, buf, offset+flatbuffers.SizeUint32)
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) Init(buf []byte, i flatbuffers.UOffsetT) error {
|
||||
rcv._tab.Bytes = buf
|
||||
rcv._tab.Pos = i
|
||||
if VectorIndexNodeNumFields < rcv.Table().NumFields() {
|
||||
return flatbuffers.ErrTableHasUnknownFields
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) Table() flatbuffers.Table {
|
||||
return rcv._tab
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) KeyItems(j int) byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) KeyItemsLength() int {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
|
||||
if o != 0 {
|
||||
return rcv._tab.VectorLen(o)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) KeyItemsBytes() []byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
|
||||
if o != 0 {
|
||||
return rcv._tab.ByteVector(o + rcv._tab.Pos)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) MutateKeyItems(j int, n byte) bool {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(4))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) KeyOffsets(j int) uint32 {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(6))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.GetUint32(a + flatbuffers.UOffsetT(j*4))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) KeyOffsetsLength() int {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(6))
|
||||
if o != 0 {
|
||||
return rcv._tab.VectorLen(o)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) MutateKeyOffsets(j int, n uint32) bool {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(6))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.MutateUint32(a+flatbuffers.UOffsetT(j*4), n)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) ValueItems(j int) byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) ValueItemsLength() int {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
|
||||
if o != 0 {
|
||||
return rcv._tab.VectorLen(o)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) ValueItemsBytes() []byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
|
||||
if o != 0 {
|
||||
return rcv._tab.ByteVector(o + rcv._tab.Pos)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) MutateValueItems(j int, n byte) bool {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(8))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) ValueOffsets(j int) uint32 {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(10))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.GetUint32(a + flatbuffers.UOffsetT(j*4))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) ValueOffsetsLength() int {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(10))
|
||||
if o != 0 {
|
||||
return rcv._tab.VectorLen(o)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) MutateValueOffsets(j int, n uint32) bool {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(10))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.MutateUint32(a+flatbuffers.UOffsetT(j*4), n)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) AddressArray(j int) byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(12))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) AddressArrayLength() int {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(12))
|
||||
if o != 0 {
|
||||
return rcv._tab.VectorLen(o)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) AddressArrayBytes() []byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(12))
|
||||
if o != 0 {
|
||||
return rcv._tab.ByteVector(o + rcv._tab.Pos)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) MutateAddressArray(j int, n byte) bool {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(12))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) SubtreeCounts(j int) byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(14))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.GetByte(a + flatbuffers.UOffsetT(j*1))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) SubtreeCountsLength() int {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(14))
|
||||
if o != 0 {
|
||||
return rcv._tab.VectorLen(o)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) SubtreeCountsBytes() []byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(14))
|
||||
if o != 0 {
|
||||
return rcv._tab.ByteVector(o + rcv._tab.Pos)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) MutateSubtreeCounts(j int, n byte) bool {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(14))
|
||||
if o != 0 {
|
||||
a := rcv._tab.Vector(o)
|
||||
return rcv._tab.MutateByte(a+flatbuffers.UOffsetT(j*1), n)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) TreeCount() uint64 {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(16))
|
||||
if o != 0 {
|
||||
return rcv._tab.GetUint64(o + rcv._tab.Pos)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) MutateTreeCount(n uint64) bool {
|
||||
return rcv._tab.MutateUint64Slot(16, n)
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) TreeLevel() byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(18))
|
||||
if o != 0 {
|
||||
return rcv._tab.GetByte(o + rcv._tab.Pos)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) MutateTreeLevel(n byte) bool {
|
||||
return rcv._tab.MutateByteSlot(18, n)
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) LogChunkSize() byte {
|
||||
o := flatbuffers.UOffsetT(rcv._tab.Offset(20))
|
||||
if o != 0 {
|
||||
return rcv._tab.GetByte(o + rcv._tab.Pos)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rcv *VectorIndexNode) MutateLogChunkSize(n byte) bool {
|
||||
return rcv._tab.MutateByteSlot(20, n)
|
||||
}
|
||||
|
||||
const VectorIndexNodeNumFields = 9
|
||||
|
||||
func VectorIndexNodeStart(builder *flatbuffers.Builder) {
|
||||
builder.StartObject(VectorIndexNodeNumFields)
|
||||
}
|
||||
func VectorIndexNodeAddKeyItems(builder *flatbuffers.Builder, keyItems flatbuffers.UOffsetT) {
|
||||
builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(keyItems), 0)
|
||||
}
|
||||
func VectorIndexNodeStartKeyItemsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
|
||||
return builder.StartVector(1, numElems, 1)
|
||||
}
|
||||
func VectorIndexNodeAddKeyOffsets(builder *flatbuffers.Builder, keyOffsets flatbuffers.UOffsetT) {
|
||||
builder.PrependUOffsetTSlot(1, flatbuffers.UOffsetT(keyOffsets), 0)
|
||||
}
|
||||
func VectorIndexNodeStartKeyOffsetsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
|
||||
return builder.StartVector(4, numElems, 4)
|
||||
}
|
||||
func VectorIndexNodeAddValueItems(builder *flatbuffers.Builder, valueItems flatbuffers.UOffsetT) {
|
||||
builder.PrependUOffsetTSlot(2, flatbuffers.UOffsetT(valueItems), 0)
|
||||
}
|
||||
func VectorIndexNodeStartValueItemsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
|
||||
return builder.StartVector(1, numElems, 1)
|
||||
}
|
||||
func VectorIndexNodeAddValueOffsets(builder *flatbuffers.Builder, valueOffsets flatbuffers.UOffsetT) {
|
||||
builder.PrependUOffsetTSlot(3, flatbuffers.UOffsetT(valueOffsets), 0)
|
||||
}
|
||||
func VectorIndexNodeStartValueOffsetsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
|
||||
return builder.StartVector(4, numElems, 4)
|
||||
}
|
||||
func VectorIndexNodeAddAddressArray(builder *flatbuffers.Builder, addressArray flatbuffers.UOffsetT) {
|
||||
builder.PrependUOffsetTSlot(4, flatbuffers.UOffsetT(addressArray), 0)
|
||||
}
|
||||
func VectorIndexNodeStartAddressArrayVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
|
||||
return builder.StartVector(1, numElems, 1)
|
||||
}
|
||||
func VectorIndexNodeAddSubtreeCounts(builder *flatbuffers.Builder, subtreeCounts flatbuffers.UOffsetT) {
|
||||
builder.PrependUOffsetTSlot(5, flatbuffers.UOffsetT(subtreeCounts), 0)
|
||||
}
|
||||
func VectorIndexNodeStartSubtreeCountsVector(builder *flatbuffers.Builder, numElems int) flatbuffers.UOffsetT {
|
||||
return builder.StartVector(1, numElems, 1)
|
||||
}
|
||||
func VectorIndexNodeAddTreeCount(builder *flatbuffers.Builder, treeCount uint64) {
|
||||
builder.PrependUint64Slot(6, treeCount, 0)
|
||||
}
|
||||
func VectorIndexNodeAddTreeLevel(builder *flatbuffers.Builder, treeLevel byte) {
|
||||
builder.PrependByteSlot(7, treeLevel, 0)
|
||||
}
|
||||
func VectorIndexNodeAddLogChunkSize(builder *flatbuffers.Builder, logChunkSize byte) {
|
||||
builder.PrependByteSlot(8, logChunkSize, 0)
|
||||
}
|
||||
func VectorIndexNodeEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT {
|
||||
return builder.EndObject()
|
||||
}
|
||||
@@ -42,6 +42,7 @@ const StashFileID = "STSH"
|
||||
const StatisticFileID = "STAT"
|
||||
const DoltgresRootValueFileID = "DGRV"
|
||||
const TupleFileID = "TUPL"
|
||||
const VectorIndexNodeFileID = "IVFF"
|
||||
|
||||
const MessageTypesKind int = 27
|
||||
|
||||
|
||||
@@ -38,7 +38,8 @@ fi
|
||||
table.fbs \
|
||||
tag.fbs \
|
||||
tuple.fbs \
|
||||
workingset.fbs
|
||||
workingset.fbs \
|
||||
vectorindexnode.fbs
|
||||
|
||||
# prefix files with copyright header
|
||||
for FILE in $GEN_DIR/*.go;
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
// Copyright 2024 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
namespace serial;
|
||||
|
||||
// VectorIndexNode is a node that makes up a vector index. Every key contains a vector value,
|
||||
// and keys are organized according to their proximity to their parent node.
|
||||
table VectorIndexNode {
|
||||
// sorted array of key items
|
||||
key_items:[ubyte] (required);
|
||||
// items offets for |key_items|
|
||||
// first offset is 0, last offset is len(key_items)
|
||||
key_offsets:[uint32] (required);
|
||||
// item type for |key_items|
|
||||
// key_type:ItemType;
|
||||
|
||||
// array of values items, ordered by paired key
|
||||
value_items:[ubyte];
|
||||
// item offsets for |value_items|
|
||||
// first offset is 0, last offset is len(value_items)
|
||||
value_offsets:[uint32];
|
||||
// item type for |value_items|
|
||||
// value_type:ItemType;
|
||||
|
||||
// array of chunk addresses
|
||||
// - subtree addresses for internal prolly tree nodes
|
||||
// - value addresses for AddressMap leaf nodes
|
||||
// node that while the keys in this index are addresses to JSON chunks, we don't store those in the address_array
|
||||
// because we are guarenteed to have other references to those chunks in the primary index.
|
||||
address_array:[ubyte] (required);
|
||||
|
||||
// array of varint encoded subtree counts
|
||||
// see: go/store/prolly/message/varint.go
|
||||
subtree_counts:[ubyte];
|
||||
// total count of prolly tree
|
||||
tree_count:uint64;
|
||||
// prolly tree level, 0 for leaf nodes
|
||||
tree_level:uint8;
|
||||
|
||||
// the base-2 log of the average (geometric mean) number of vectors stored in each node.
|
||||
// currently this is always set to 8, but other numbers are used in testing, and future versions of dolt
|
||||
// may choose to use a different size, or even select the best size for each index.
|
||||
// all nodes in an index must use the same size, and when modifying an existing index, we must use this value.
|
||||
log_chunk_size:uint8;
|
||||
}
|
||||
|
||||
|
||||
// KEEP THIS IN SYNC WITH fileidentifiers.go
|
||||
file_identifier "IVFF";
|
||||
|
||||
root_type VectorIndexNode;
|
||||
|
||||
@@ -41,6 +41,9 @@ func UnpackFields(msg serial.Message) (fileId string, keys, values ItemAccess, l
|
||||
case serial.ProllyTreeNodeFileID:
|
||||
keys, values, level, count, err = getProllyMapKeysAndValues(msg)
|
||||
return
|
||||
case serial.VectorIndexNodeFileID:
|
||||
keys, values, level, count, err = getVectorIndexKeysAndValues(msg)
|
||||
return
|
||||
case serial.AddressMapFileID:
|
||||
keys, values, level, count, err = getAddressMapKeysAndValues(msg)
|
||||
return
|
||||
@@ -75,6 +78,8 @@ func WalkAddresses(ctx context.Context, msg serial.Message, cb func(ctx context.
|
||||
switch id {
|
||||
case serial.ProllyTreeNodeFileID:
|
||||
return walkProllyMapAddresses(ctx, msg, cb)
|
||||
case serial.VectorIndexNodeFileID:
|
||||
return walkVectorIndexAddresses(ctx, msg, cb)
|
||||
case serial.AddressMapFileID:
|
||||
return walkAddressMapAddresses(ctx, msg, cb)
|
||||
case serial.MergeArtifactsFileID:
|
||||
@@ -93,6 +98,8 @@ func GetTreeCount(msg serial.Message) (int, error) {
|
||||
switch id {
|
||||
case serial.ProllyTreeNodeFileID:
|
||||
return getProllyMapTreeCount(msg)
|
||||
case serial.VectorIndexNodeFileID:
|
||||
return getVectorIndexTreeCount(msg)
|
||||
case serial.AddressMapFileID:
|
||||
return getAddressMapTreeCount(msg)
|
||||
case serial.MergeArtifactsFileID:
|
||||
|
||||
@@ -0,0 +1,212 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package message
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
|
||||
fb "github.com/dolthub/flatbuffers/v23/go"
|
||||
|
||||
"github.com/dolthub/dolt/go/gen/fb/serial"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/pool"
|
||||
)
|
||||
|
||||
const (
|
||||
// These constants are mirrored from serial.VectorIndexNode
|
||||
// They are only as stable as the flatbuffers schema that define them.
|
||||
vectorIvfKeyItemBytesVOffset fb.VOffsetT = 4
|
||||
vectorIvfKeyOffsetsVOffset fb.VOffsetT = 6
|
||||
vectorIvfValueItemBytesVOffset fb.VOffsetT = 8
|
||||
vectorIvfValueOffsetsVOffset fb.VOffsetT = 10
|
||||
vectorIvfAddressArrayBytesVOffset fb.VOffsetT = 12
|
||||
)
|
||||
|
||||
var vectorIvfFileID = []byte(serial.VectorIndexNodeFileID)
|
||||
|
||||
func NewVectorIndexSerializer(pool pool.BuffPool) VectorIndexSerializer {
|
||||
return VectorIndexSerializer{pool: pool}
|
||||
}
|
||||
|
||||
type VectorIndexSerializer struct {
|
||||
pool pool.BuffPool
|
||||
}
|
||||
|
||||
var _ Serializer = VectorIndexSerializer{}
|
||||
|
||||
func (s VectorIndexSerializer) Serialize(keys, values [][]byte, subtrees []uint64, level int) serial.Message {
|
||||
var (
|
||||
keyTups, keyOffs fb.UOffsetT
|
||||
valTups, valOffs fb.UOffsetT
|
||||
refArr, cardArr fb.UOffsetT
|
||||
)
|
||||
|
||||
keySz, valSz, bufSz := estimateVectorIndexSize(keys, values, subtrees)
|
||||
b := getFlatbufferBuilder(s.pool, bufSz)
|
||||
|
||||
// serialize keys and offStart
|
||||
keyTups = writeItemBytes(b, keys, keySz)
|
||||
serial.VectorIndexNodeStartKeyOffsetsVector(b, len(keys)+1)
|
||||
keyOffs = writeItemOffsets(b, keys, keySz)
|
||||
|
||||
if level == 0 {
|
||||
// serialize value tuples for leaf nodes
|
||||
valTups = writeItemBytes(b, values, valSz)
|
||||
serial.VectorIndexNodeStartValueOffsetsVector(b, len(values)+1)
|
||||
valOffs = writeItemOffsets(b, values, valSz)
|
||||
} else {
|
||||
// serialize child refs and subtree counts for internal nodes
|
||||
refArr = writeItemBytes(b, values, valSz)
|
||||
cardArr = writeCountArray(b, subtrees)
|
||||
}
|
||||
|
||||
// populate the node's vtable
|
||||
serial.VectorIndexNodeStart(b)
|
||||
serial.VectorIndexNodeAddKeyItems(b, keyTups)
|
||||
serial.VectorIndexNodeAddKeyOffsets(b, keyOffs)
|
||||
if level == 0 {
|
||||
serial.VectorIndexNodeAddValueItems(b, valTups)
|
||||
serial.VectorIndexNodeAddValueOffsets(b, valOffs)
|
||||
serial.VectorIndexNodeAddTreeCount(b, uint64(len(keys)))
|
||||
} else {
|
||||
serial.VectorIndexNodeAddAddressArray(b, refArr)
|
||||
serial.VectorIndexNodeAddSubtreeCounts(b, cardArr)
|
||||
serial.VectorIndexNodeAddTreeCount(b, sumSubtrees(subtrees))
|
||||
}
|
||||
serial.VectorIndexNodeAddTreeLevel(b, uint8(level))
|
||||
|
||||
return serial.FinishMessage(b, serial.VectorIndexNodeEnd(b), vectorIvfFileID)
|
||||
}
|
||||
|
||||
func getVectorIndexKeysAndValues(msg serial.Message) (keys, values ItemAccess, level, count uint16, err error) {
|
||||
var pm serial.VectorIndexNode
|
||||
err = serial.InitVectorIndexNodeRoot(&pm, msg, serial.MessagePrefixSz)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
keys.bufStart = lookupVectorOffset(vectorIvfKeyItemBytesVOffset, pm.Table())
|
||||
keys.bufLen = uint16(pm.KeyItemsLength())
|
||||
keys.offStart = lookupVectorOffset(vectorIvfKeyOffsetsVOffset, pm.Table())
|
||||
keys.offLen = uint16(pm.KeyOffsetsLength() * uint16Size)
|
||||
|
||||
count = (keys.offLen / 2) - 1
|
||||
level = uint16(pm.TreeLevel())
|
||||
|
||||
vv := pm.ValueItemsBytes()
|
||||
if vv != nil {
|
||||
values.bufStart = lookupVectorOffset(vectorIvfValueItemBytesVOffset, pm.Table())
|
||||
values.bufLen = uint16(pm.ValueItemsLength())
|
||||
values.offStart = lookupVectorOffset(vectorIvfValueOffsetsVOffset, pm.Table())
|
||||
values.offLen = uint16(pm.ValueOffsetsLength() * uint16Size)
|
||||
} else {
|
||||
values.bufStart = lookupVectorOffset(vectorIvfAddressArrayBytesVOffset, pm.Table())
|
||||
values.bufLen = uint16(pm.AddressArrayLength())
|
||||
values.itemWidth = hash.ByteLen
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func walkVectorIndexAddresses(ctx context.Context, msg serial.Message, cb func(ctx context.Context, addr hash.Hash) error) error {
|
||||
var pm serial.VectorIndexNode
|
||||
err := serial.InitVectorIndexNodeRoot(&pm, msg, serial.MessagePrefixSz)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
arr := pm.AddressArrayBytes()
|
||||
for i := 0; i < len(arr)/hash.ByteLen; i++ {
|
||||
addr := hash.New(arr[i*addrSize : (i+1)*addrSize])
|
||||
if err := cb(ctx, addr); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getVectorIndexCount(msg serial.Message) (uint16, error) {
|
||||
var pm serial.VectorIndexNode
|
||||
err := serial.InitVectorIndexNodeRoot(&pm, msg, serial.MessagePrefixSz)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return uint16(pm.KeyOffsetsLength() - 1), nil
|
||||
}
|
||||
|
||||
func getVectorIndexTreeLevel(msg serial.Message) (int, error) {
|
||||
var pm serial.VectorIndexNode
|
||||
err := serial.InitVectorIndexNodeRoot(&pm, msg, serial.MessagePrefixSz)
|
||||
if err != nil {
|
||||
return 0, fb.ErrTableHasUnknownFields
|
||||
}
|
||||
return int(pm.TreeLevel()), nil
|
||||
}
|
||||
|
||||
func getVectorIndexTreeCount(msg serial.Message) (int, error) {
|
||||
var pm serial.VectorIndexNode
|
||||
err := serial.InitVectorIndexNodeRoot(&pm, msg, serial.MessagePrefixSz)
|
||||
if err != nil {
|
||||
return 0, fb.ErrTableHasUnknownFields
|
||||
}
|
||||
return int(pm.TreeCount()), nil
|
||||
}
|
||||
|
||||
func getVectorIndexSubtrees(msg serial.Message) ([]uint64, error) {
|
||||
sz, err := getVectorIndexCount(msg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var pm serial.VectorIndexNode
|
||||
n := fb.GetUOffsetT(msg[serial.MessagePrefixSz:])
|
||||
err = pm.Init(msg, serial.MessagePrefixSz+n)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
counts := make([]uint64, sz)
|
||||
|
||||
return decodeVarints(pm.SubtreeCountsBytes(), counts), nil
|
||||
}
|
||||
|
||||
// estimateVectorIndexSize returns the exact Size of the tuple vectors for keys and values,
|
||||
// and an estimate of the overall Size of the final flatbuffer.
|
||||
func estimateVectorIndexSize(keys, values [][]byte, subtrees []uint64) (int, int, int) {
|
||||
var keySz, valSz, bufSz int
|
||||
for i := range keys {
|
||||
keySz += len(keys[i])
|
||||
valSz += len(values[i])
|
||||
}
|
||||
subtreesSz := len(subtrees) * binary.MaxVarintLen64
|
||||
|
||||
// constraints enforced upstream
|
||||
if keySz > int(MaxVectorOffset) {
|
||||
panic(fmt.Sprintf("key vector exceeds Size limit ( %d > %d )", keySz, MaxVectorOffset))
|
||||
}
|
||||
if valSz > int(MaxVectorOffset) {
|
||||
panic(fmt.Sprintf("value vector exceeds Size limit ( %d > %d )", valSz, MaxVectorOffset))
|
||||
}
|
||||
|
||||
bufSz += keySz + valSz // tuples
|
||||
bufSz += subtreesSz // subtree counts
|
||||
bufSz += len(keys)*2 + len(values)*2 // offStart
|
||||
bufSz += 8 + 1 + 1 + 1 // metadata
|
||||
bufSz += 72 // vtable (approx)
|
||||
bufSz += 100 // padding?
|
||||
bufSz += serial.MessagePrefixSz
|
||||
|
||||
return keySz, valSz, bufSz
|
||||
}
|
||||
Reference in New Issue
Block a user