Files
dolt/go/serial/vectorindexnode.fbs

66 lines
2.5 KiB
Plaintext

// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
include "schema.fbs";
namespace serial;
// VectorIndexNode is a node that makes up a vector index. Every key contains a vector value,
// and keys are organized according to their proximity to their parent node.
table VectorIndexNode {
// sorted array of key items
key_items:[ubyte] (required);
// item offsets for |key_items|
// first offset is 0, last offset is len(key_items)
key_offsets:[uint32] (required);
// array of values items, ordered by paired key
value_items:[ubyte];
// item offsets for |value_items|
// first offset is 0, last offset is len(value_items)
value_offsets:[uint32];
// array of chunk addresses
// - subtree addresses for internal prolly tree nodes
// - value addresses for AddressMap leaf nodes
// note that while the keys in this index are addresses to JSON chunks, we don't store those in the address_array
// because we are guaranteed to have other references to those chunks in the primary index.
address_array:[ubyte] (required);
// array of varint encoded subtree counts
// see: go/store/prolly/message/varint.go
subtree_counts:[ubyte];
// total count of prolly tree
tree_count:uint64;
// prolly tree level, 0 for leaf nodes
tree_level:uint8;
// the base-2 log of the average (geometric mean) number of vectors stored in each node.
// currently this is always set to 8, but other numbers are used in testing, and future versions of dolt
// may choose to use a different size, or even select the best size for each index.
// all nodes in an index must use the same size, and when modifying an existing index, we must use this value.
log_chunk_size:uint8;
// each node encodes the distance function used for the index. This allows lookups without needing to retrieve the
// distance function from the schema.
distance_type:DistanceType;
}
// KEEP THIS IN SYNC WITH fileidentifiers.go
file_identifier "IVFF";
root_type VectorIndexNode;