mirror of
https://github.com/dolthub/dolt.git
synced 2026-04-21 19:39:04 -05:00
694 lines
20 KiB
Go
694 lines
20 KiB
Go
// Copyright 2022 Dolthub, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package nbs
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"errors"
|
|
"io"
|
|
"os"
|
|
"sync"
|
|
"sync/atomic"
|
|
|
|
"github.com/dolthub/mmap-go"
|
|
|
|
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
|
|
"github.com/dolthub/dolt/go/store/hash"
|
|
)
|
|
|
|
var (
|
|
ErrWrongBufferSize = errors.New("buffer length and/or capacity incorrect for chunkCount specified in footer")
|
|
ErrWrongCopySize = errors.New("could not copy enough bytes")
|
|
)
|
|
|
|
type tableIndex interface {
|
|
// ChunkCount returns the total number of chunks in the indexed file.
|
|
ChunkCount() uint32
|
|
// EntrySuffixMatches returns true if the entry at index |idx| matches
|
|
// the suffix of the address |h|. Used by |Lookup| after finding
|
|
// matching indexes based on |Prefixes|.
|
|
EntrySuffixMatches(idx uint32, h *addr) (bool, error)
|
|
// IndexEntry returns the |indexEntry| at |idx|. Optionally puts the
|
|
// full address of that entry in |a| if |a| is not |nil|.
|
|
IndexEntry(idx uint32, a *addr) (indexEntry, error)
|
|
// Lookup returns an |indexEntry| for the chunk corresponding to the
|
|
// provided address |h|. Second returns is |true| if an entry exists
|
|
// and |false| otherwise.
|
|
Lookup(h *addr) (indexEntry, bool, error)
|
|
// Ordinals returns a slice of indexes which maps the |i|th chunk in
|
|
// the indexed file to its corresponding entry in index. The |i|th
|
|
// entry in the result is the |i|th chunk in the indexed file, and its
|
|
// corresponding value in the slice is the index entry that maps to it.
|
|
Ordinals() ([]uint32, error)
|
|
// Prefixes returns the sorted slice of |uint64| |addr| prefixes; each
|
|
// entry corresponds to an indexed chunk address.
|
|
Prefixes() ([]uint64, error)
|
|
// PrefixAt returns the prefix at the specified index
|
|
PrefixAt(idx uint32) uint64
|
|
// TableFileSize returns the total size of the indexed table file, in bytes.
|
|
TableFileSize() uint64
|
|
// TotalUncompressedData returns the total uncompressed data size of
|
|
// the table file. Used for informational statistics only.
|
|
TotalUncompressedData() uint64
|
|
|
|
// Close releases any resources used by this tableIndex.
|
|
Close() error
|
|
|
|
// Clone returns a |tableIndex| with the same contents which can be
|
|
// |Close|d independently.
|
|
Clone() (tableIndex, error)
|
|
}
|
|
|
|
func ReadTableFooter(rd io.ReadSeeker) (chunkCount uint32, totalUncompressedData uint64, err error) {
|
|
footerSize := int64(magicNumberSize + uint64Size + uint32Size)
|
|
_, err = rd.Seek(-footerSize, io.SeekEnd)
|
|
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
|
|
footer, err := iohelp.ReadNBytes(rd, int(footerSize))
|
|
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
|
|
if string(footer[uint32Size+uint64Size:]) != magicNumber {
|
|
return 0, 0, ErrInvalidTableFile
|
|
}
|
|
|
|
chunkCount = binary.BigEndian.Uint32(footer)
|
|
totalUncompressedData = binary.BigEndian.Uint64(footer[uint32Size:])
|
|
|
|
return
|
|
}
|
|
|
|
func indexMemSize(chunkCount uint32) uint64 {
|
|
is := indexSize(chunkCount) + footerSize
|
|
// Extra required space for offsets that don't fit into the region where lengths were previously stored, see
|
|
// newOnHeapTableIndex
|
|
is += uint64(offsetSize * (chunkCount - chunkCount/2))
|
|
return is
|
|
}
|
|
|
|
// parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index
|
|
// and footer and its length must match the expected indexSize for the chunkCount specified in the footer.
|
|
// Retains the buffer and does not allocate new memory except for offsets, computes on buff in place.
|
|
func parseTableIndex(buff []byte, q MemoryQuotaProvider) (onHeapTableIndex, error) {
|
|
chunkCount, totalUncompressedData, err := ReadTableFooter(bytes.NewReader(buff))
|
|
if err != nil {
|
|
return onHeapTableIndex{}, err
|
|
}
|
|
|
|
buff, err = removeFooter(buff, chunkCount)
|
|
if err != nil {
|
|
return onHeapTableIndex{}, err
|
|
}
|
|
|
|
chunks2 := chunkCount / 2
|
|
chunks1 := chunkCount - chunks2
|
|
offsetsBuff1 := make([]byte, chunks1*offsetSize)
|
|
|
|
return newOnHeapTableIndex(buff, offsetsBuff1, chunkCount, totalUncompressedData, q)
|
|
}
|
|
|
|
// similar to parseTableIndex except that it uses the given |offsetsBuff1|
|
|
// instead of allocating the additional space.
|
|
func parseTableIndexWithOffsetBuff(buff []byte, offsetsBuff1 []byte, q MemoryQuotaProvider) (onHeapTableIndex, error) {
|
|
chunkCount, totalUncompressedData, err := ReadTableFooter(bytes.NewReader(buff))
|
|
if err != nil {
|
|
return onHeapTableIndex{}, err
|
|
}
|
|
|
|
buff, err = removeFooter(buff, chunkCount)
|
|
if err != nil {
|
|
return onHeapTableIndex{}, err
|
|
}
|
|
|
|
return newOnHeapTableIndex(buff, offsetsBuff1, chunkCount, totalUncompressedData, q)
|
|
}
|
|
|
|
func removeFooter(p []byte, chunkCount uint32) (out []byte, err error) {
|
|
iS := indexSize(chunkCount) + footerSize
|
|
if uint64(len(p)) != iS {
|
|
return nil, ErrWrongBufferSize
|
|
}
|
|
out = p[:len(p)-footerSize]
|
|
return
|
|
}
|
|
|
|
// parseTableIndexByCopy reads the footer, copies indexSize(chunkCount) bytes, and parses an on heap table index.
|
|
// Useful to create an onHeapTableIndex without retaining the entire underlying array of data.
|
|
func parseTableIndexByCopy(buff []byte, q MemoryQuotaProvider) (onHeapTableIndex, error) {
|
|
r := bytes.NewReader(buff)
|
|
return ReadTableIndexByCopy(r, q)
|
|
}
|
|
|
|
// ReadTableIndexByCopy loads an index into memory from an io.ReadSeeker
|
|
// Caution: Allocates new memory for entire index
|
|
func ReadTableIndexByCopy(rd io.ReadSeeker, q MemoryQuotaProvider) (onHeapTableIndex, error) {
|
|
chunkCount, totalUncompressedData, err := ReadTableFooter(rd)
|
|
if err != nil {
|
|
return onHeapTableIndex{}, err
|
|
}
|
|
iS := int64(indexSize(chunkCount))
|
|
_, err = rd.Seek(-(iS + footerSize), io.SeekEnd)
|
|
if err != nil {
|
|
return onHeapTableIndex{}, err
|
|
}
|
|
buff := make([]byte, iS)
|
|
_, err = io.ReadFull(rd, buff)
|
|
if err != nil {
|
|
return onHeapTableIndex{}, err
|
|
}
|
|
|
|
chunks2 := chunkCount / 2
|
|
chunks1 := chunkCount - chunks2
|
|
offsets1Buff := make([]byte, chunks1*offsetSize)
|
|
|
|
return newOnHeapTableIndex(buff, offsets1Buff, chunkCount, totalUncompressedData, q)
|
|
}
|
|
|
|
type onHeapTableIndex struct {
|
|
q MemoryQuotaProvider
|
|
refCnt *int32
|
|
tableFileSize uint64
|
|
// Tuple bytes
|
|
tupleB []byte
|
|
// Offset bytes
|
|
offsetB1 []byte
|
|
offsetB2 []byte
|
|
// Suffix bytes
|
|
suffixB []byte
|
|
chunkCount uint32
|
|
totalUncompressedData uint64
|
|
}
|
|
|
|
var _ tableIndex = &onHeapTableIndex{}
|
|
|
|
// newOnHeapTableIndex converts a table file index with stored lengths on
|
|
// |indexBuff| into an index with stored offsets. Since offsets are twice the
|
|
// size of a length, we need to allocate additional space to store all the
|
|
// offsets. It stores the first n - n/2 offsets in |offsetsBuff1| (the
|
|
// additional space) and the rest into the region of |indexBuff| previously
|
|
// occupied by lengths. |onHeapTableIndex| computes directly on the given
|
|
// |indexBuff| and |offsetsBuff1| buffers.
|
|
func newOnHeapTableIndex(indexBuff []byte, offsetsBuff1 []byte, chunkCount uint32, totalUncompressedData uint64, q MemoryQuotaProvider) (onHeapTableIndex, error) {
|
|
tuples := indexBuff[:prefixTupleSize*chunkCount]
|
|
lengths := indexBuff[prefixTupleSize*chunkCount : prefixTupleSize*chunkCount+lengthSize*chunkCount]
|
|
suffixes := indexBuff[prefixTupleSize*chunkCount+lengthSize*chunkCount:]
|
|
|
|
chunks2 := chunkCount / 2
|
|
|
|
lR := bytes.NewReader(lengths)
|
|
r := NewOffsetsReader(lR)
|
|
_, err := io.ReadFull(r, offsetsBuff1)
|
|
if err != nil {
|
|
return onHeapTableIndex{}, err
|
|
}
|
|
|
|
var offsetsBuff2 []byte
|
|
if chunks2 > 0 {
|
|
offsetsBuff2 = lengths[:chunks2*offsetSize]
|
|
_, err = io.ReadFull(r, offsetsBuff2)
|
|
if err != nil {
|
|
return onHeapTableIndex{}, err
|
|
}
|
|
}
|
|
|
|
refCnt := new(int32)
|
|
*refCnt = 1
|
|
|
|
return onHeapTableIndex{
|
|
refCnt: refCnt,
|
|
q: q,
|
|
tupleB: tuples,
|
|
offsetB1: offsetsBuff1,
|
|
offsetB2: offsetsBuff2,
|
|
suffixB: suffixes,
|
|
chunkCount: chunkCount,
|
|
totalUncompressedData: totalUncompressedData,
|
|
}, nil
|
|
}
|
|
|
|
func (ti onHeapTableIndex) ChunkCount() uint32 {
|
|
return ti.chunkCount
|
|
}
|
|
|
|
func (ti onHeapTableIndex) PrefixAt(idx uint32) uint64 {
|
|
return ti.prefixAt(idx)
|
|
}
|
|
|
|
func (ti onHeapTableIndex) EntrySuffixMatches(idx uint32, h *addr) (bool, error) {
|
|
ord := ti.ordinalAt(idx)
|
|
o := ord * addrSuffixSize
|
|
b := ti.suffixB[o : o+addrSuffixSize]
|
|
return bytes.Equal(h[addrPrefixSize:], b), nil
|
|
}
|
|
|
|
func (ti onHeapTableIndex) IndexEntry(idx uint32, a *addr) (entry indexEntry, err error) {
|
|
prefix, ord := ti.tupleAt(idx)
|
|
|
|
if a != nil {
|
|
binary.BigEndian.PutUint64(a[:], prefix)
|
|
|
|
o := int64(addrSuffixSize * ord)
|
|
b := ti.suffixB[o : o+addrSuffixSize]
|
|
copy(a[addrPrefixSize:], b)
|
|
}
|
|
|
|
return ti.getIndexEntry(ord), nil
|
|
}
|
|
|
|
func (ti onHeapTableIndex) getIndexEntry(ord uint32) indexEntry {
|
|
var prevOff uint64
|
|
if ord == 0 {
|
|
prevOff = 0
|
|
} else {
|
|
prevOff = ti.offsetAt(ord - 1)
|
|
}
|
|
ordOff := ti.offsetAt(ord)
|
|
length := uint32(ordOff - prevOff)
|
|
return indexResult{
|
|
o: prevOff,
|
|
l: length,
|
|
}
|
|
}
|
|
|
|
func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool, error) {
|
|
ord, err := ti.lookupOrdinal(h)
|
|
if err != nil {
|
|
return indexResult{}, false, err
|
|
}
|
|
if ord == ti.chunkCount {
|
|
return indexResult{}, false, nil
|
|
}
|
|
return ti.getIndexEntry(ord), true, nil
|
|
}
|
|
|
|
// lookupOrdinal returns the ordinal of |h| if present. Returns |ti.chunkCount|
|
|
// if absent.
|
|
func (ti onHeapTableIndex) lookupOrdinal(h *addr) (uint32, error) {
|
|
prefix := h.Prefix()
|
|
|
|
for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixAt(idx) == prefix; idx++ {
|
|
m, err := ti.EntrySuffixMatches(idx, h)
|
|
if err != nil {
|
|
return ti.chunkCount, err
|
|
}
|
|
if m {
|
|
return ti.ordinalAt(idx), nil
|
|
}
|
|
}
|
|
|
|
return ti.chunkCount, nil
|
|
}
|
|
|
|
// prefixIdx returns the first position in |tr.prefixes| whose value ==
|
|
// |prefix|. Returns |tr.chunkCount| if absent
|
|
func (ti onHeapTableIndex) prefixIdx(prefix uint64) (idx uint32) {
|
|
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
|
|
// an extremely tight loop and inlining the code was a significant perf improvement.
|
|
idx, j := 0, ti.chunkCount
|
|
for idx < j {
|
|
h := idx + (j-idx)/2 // avoid overflow when computing h
|
|
// i ≤ h < j
|
|
if ti.prefixAt(h) < prefix {
|
|
idx = h + 1 // preserves f(i-1) == false
|
|
} else {
|
|
j = h // preserves f(j) == true
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (ti onHeapTableIndex) tupleAt(idx uint32) (prefix uint64, ord uint32) {
|
|
off := int64(prefixTupleSize * idx)
|
|
b := ti.tupleB[off : off+prefixTupleSize]
|
|
|
|
prefix = binary.BigEndian.Uint64(b[:])
|
|
ord = binary.BigEndian.Uint32(b[addrPrefixSize:])
|
|
return prefix, ord
|
|
}
|
|
|
|
func (ti onHeapTableIndex) prefixAt(idx uint32) uint64 {
|
|
off := int64(prefixTupleSize * idx)
|
|
b := ti.tupleB[off : off+addrPrefixSize]
|
|
return binary.BigEndian.Uint64(b)
|
|
}
|
|
|
|
func (ti onHeapTableIndex) ordinalAt(idx uint32) uint32 {
|
|
off := int64(prefixTupleSize*idx) + addrPrefixSize
|
|
b := ti.tupleB[off : off+ordinalSize]
|
|
return binary.BigEndian.Uint32(b)
|
|
}
|
|
|
|
// the first n - n/2 offsets are stored in offsetsB1 and the rest in offsetsB2
|
|
func (ti onHeapTableIndex) offsetAt(ord uint32) uint64 {
|
|
chunks1 := ti.chunkCount - ti.chunkCount/2
|
|
var b []byte
|
|
if ord < chunks1 {
|
|
off := int64(offsetSize * ord)
|
|
b = ti.offsetB1[off : off+offsetSize]
|
|
} else {
|
|
off := int64(offsetSize * (ord - chunks1))
|
|
b = ti.offsetB2[off : off+offsetSize]
|
|
}
|
|
|
|
return binary.BigEndian.Uint64(b)
|
|
}
|
|
|
|
func (ti onHeapTableIndex) Ordinals() ([]uint32, error) {
|
|
o := make([]uint32, ti.chunkCount)
|
|
for i, off := uint32(0), 0; i < ti.chunkCount; i, off = i+1, off+prefixTupleSize {
|
|
b := ti.tupleB[off+addrPrefixSize : off+prefixTupleSize]
|
|
o[i] = binary.BigEndian.Uint32(b)
|
|
}
|
|
return o, nil
|
|
}
|
|
|
|
func (ti onHeapTableIndex) Prefixes() ([]uint64, error) {
|
|
p := make([]uint64, ti.chunkCount)
|
|
for i, off := uint32(0), 0; i < ti.chunkCount; i, off = i+1, off+prefixTupleSize {
|
|
b := ti.tupleB[off : off+addrPrefixSize]
|
|
p[i] = binary.BigEndian.Uint64(b)
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
func (ti onHeapTableIndex) hashAt(idx uint32) hash.Hash {
|
|
// Get tuple
|
|
off := int64(prefixTupleSize * idx)
|
|
tuple := ti.tupleB[off : off+prefixTupleSize]
|
|
|
|
// Get prefix, ordinal, and suffix
|
|
prefix := tuple[:addrPrefixSize]
|
|
ord := binary.BigEndian.Uint32(tuple[addrPrefixSize:]) * addrSuffixSize
|
|
suffix := ti.suffixB[ord : ord+addrSuffixSize] // suffix is 12 bytes
|
|
|
|
// Combine prefix and suffix to get hash
|
|
buf := [hash.ByteLen]byte{}
|
|
copy(buf[:addrPrefixSize], prefix)
|
|
copy(buf[addrPrefixSize:], suffix)
|
|
|
|
return buf
|
|
}
|
|
|
|
// prefixIdxLBound returns the first position in |tr.prefixes| whose value is <= |prefix|.
|
|
// will return index less than where prefix would be if prefix is not found.
|
|
func (ti onHeapTableIndex) prefixIdxLBound(prefix uint64) uint32 {
|
|
l, r := uint32(0), ti.chunkCount
|
|
for l < r {
|
|
m := l + (r-l)/2 // find middle, rounding down
|
|
if ti.prefixAt(m) < prefix {
|
|
l = m + 1
|
|
} else {
|
|
r = m
|
|
}
|
|
}
|
|
|
|
return l
|
|
}
|
|
|
|
// prefixIdxLBound returns the first position in |tr.prefixes| whose value is >= |prefix|.
|
|
// will return index greater than where prefix would be if prefix is not found.
|
|
func (ti onHeapTableIndex) prefixIdxUBound(prefix uint64) (idx uint32) {
|
|
l, r := uint32(0), ti.chunkCount
|
|
for l < r {
|
|
m := l + (r-l+1)/2 // find middle, rounding up
|
|
if m >= ti.chunkCount { // prevent index out of bounds
|
|
return r
|
|
}
|
|
pre := ti.prefixAt(m)
|
|
if pre <= prefix {
|
|
l = m
|
|
} else {
|
|
r = m - 1
|
|
}
|
|
}
|
|
|
|
return l
|
|
}
|
|
|
|
func (ti onHeapTableIndex) padStringAndDecode(s string, p string) uint64 {
|
|
// Pad string
|
|
if p == "0" {
|
|
for i := len(s); i < 16; i++ {
|
|
s = s + p
|
|
}
|
|
} else {
|
|
for i := len(s); i < 16; i++ {
|
|
s = p + s
|
|
}
|
|
}
|
|
|
|
// Decode
|
|
h, _ := encoding.DecodeString(s)
|
|
return binary.BigEndian.Uint64(h)
|
|
}
|
|
|
|
func (ti onHeapTableIndex) ResolveShortHash(short []byte) ([]string, error) {
|
|
// Convert to string
|
|
shortHash := string(short)
|
|
|
|
// Calculate length
|
|
sLen := len(shortHash)
|
|
|
|
// Find lower and upper bounds of prefix indexes to check
|
|
var pIdxL, pIdxU uint32
|
|
if sLen >= 13 {
|
|
// Convert short string to prefix
|
|
sPrefix := ti.padStringAndDecode(shortHash, "0")
|
|
|
|
// Binary Search for prefix
|
|
pIdxL = ti.prefixIdx(sPrefix)
|
|
|
|
// Prefix doesn't exist
|
|
if pIdxL == ti.chunkCount {
|
|
return []string{}, errors.New("can't find prefix")
|
|
}
|
|
|
|
// Find last equal
|
|
pIdxU = pIdxL + 1
|
|
for sPrefix == ti.prefixAt(pIdxU) {
|
|
pIdxU++
|
|
}
|
|
} else {
|
|
// Convert short string to lower and upper bounds
|
|
sPrefixL := ti.padStringAndDecode(shortHash, "0")
|
|
sPrefixU := ti.padStringAndDecode(shortHash, "v")
|
|
|
|
// Binary search for lower and upper bounds
|
|
pIdxL = ti.prefixIdxLBound(sPrefixL)
|
|
pIdxU = ti.prefixIdxUBound(sPrefixU)
|
|
}
|
|
|
|
// Go through all equal prefixes
|
|
var res []string
|
|
for i := pIdxL; i < pIdxU; i++ {
|
|
// Get full hash at index
|
|
h := ti.hashAt(i)
|
|
|
|
// Convert to string representation
|
|
hashStr := h.String()
|
|
|
|
// If it matches append to result
|
|
if hashStr[:sLen] == shortHash {
|
|
res = append(res, hashStr)
|
|
}
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
|
|
// TableFileSize returns the size of the table file that this index references.
|
|
// This assumes that the index follows immediately after the last chunk in the
|
|
// file and that the last chunk in the file is in the index.
|
|
func (ti onHeapTableIndex) TableFileSize() uint64 {
|
|
if ti.chunkCount == 0 {
|
|
return footerSize
|
|
}
|
|
entry := ti.getIndexEntry(ti.chunkCount - 1)
|
|
offset, len := entry.Offset(), uint64(entry.Length())
|
|
return offset + len + indexSize(ti.chunkCount) + footerSize
|
|
}
|
|
|
|
func (ti onHeapTableIndex) TotalUncompressedData() uint64 {
|
|
return ti.totalUncompressedData
|
|
}
|
|
|
|
func (ti onHeapTableIndex) Close() error {
|
|
cnt := atomic.AddInt32(ti.refCnt, -1)
|
|
if cnt == 0 {
|
|
ti.tupleB = nil
|
|
ti.offsetB1 = nil
|
|
ti.offsetB2 = nil
|
|
ti.suffixB = nil
|
|
|
|
return ti.q.ReleaseQuota(indexMemSize(ti.chunkCount))
|
|
}
|
|
if cnt < 0 {
|
|
panic("Close() called and reduced ref count to < 0.")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (ti onHeapTableIndex) Clone() (tableIndex, error) {
|
|
cnt := atomic.AddInt32(ti.refCnt, 1)
|
|
if cnt == 1 {
|
|
panic("Clone() called after last Close(). This index is no longer valid.")
|
|
}
|
|
return ti, nil
|
|
}
|
|
|
|
// mmapTableIndex is an onHeapTableIndex but creates all of its slice buffers
|
|
// from mmap. It overrides Clone and Close of mmapTableIndex so that it can
|
|
// count references and release mmapped regions appropriately.
|
|
type mmapTableIndex struct {
|
|
onHeapTableIndex
|
|
refCnt *int32
|
|
q MemoryQuotaProvider
|
|
mmapped mmapWStat
|
|
indexDataBuff []byte
|
|
offset1DataBuff []byte
|
|
}
|
|
|
|
// newMmapTableIndex mmaps a region of memory large enough to store a fully
|
|
// parsed onHeapTableIndex. After creating the mmapTableIndex, index data should
|
|
// be loaded into |indexDataBuff| and then parsed with parseIndexBuffer.
|
|
func newMmapTableIndex(chunkCount uint32) (*mmapTableIndex, error) {
|
|
indexSize := int(indexSize(chunkCount) + footerSize)
|
|
|
|
chunks2 := chunkCount / 2
|
|
chunks1 := chunkCount - chunks2
|
|
offsets1Size := int(chunks1 * offsetSize)
|
|
|
|
mmapped, err := mmapWithStats(nil, indexSize+offsets1Size, mmap.RDWR, mmap.ANON, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
indexBytesBuff := mmapped.m[:indexSize]
|
|
offsets1Buff := mmapped.m[indexSize : indexSize+offsets1Size]
|
|
|
|
refCnt := new(int32)
|
|
*refCnt = 1
|
|
|
|
return &mmapTableIndex{
|
|
refCnt: refCnt,
|
|
mmapped: mmapped,
|
|
indexDataBuff: indexBytesBuff,
|
|
offset1DataBuff: offsets1Buff}, nil
|
|
}
|
|
|
|
func (ti *mmapTableIndex) Clone() (tableIndex, error) {
|
|
cnt := atomic.AddInt32(ti.refCnt, 1)
|
|
if cnt == 1 {
|
|
panic("Clone() called after last Close(). This index is no longer valid.")
|
|
}
|
|
return ti, nil
|
|
}
|
|
|
|
// Close closes the underlying onHeapTableIndex and then unmaps the memory
|
|
// region.
|
|
func (ti *mmapTableIndex) Close() error {
|
|
cnt := atomic.AddInt32(ti.refCnt, -1)
|
|
if cnt == 0 {
|
|
chunkCount := ti.chunkCount
|
|
// mmapTableIndex sets the quota provider for onHeapTableIndex to a
|
|
// noopQuotaProvider, so that we can release quota after the memory region
|
|
// is unmapped.
|
|
err := ti.onHeapTableIndex.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ti.indexDataBuff = nil
|
|
ti.offset1DataBuff = nil
|
|
err = ti.mmapped.Unmap()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = ti.q.ReleaseQuota(indexMemSize(chunkCount))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if cnt < 0 {
|
|
panic("Close() called and reduced ref count to < 0.")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (ti *mmapTableIndex) parseIndexBuffer(q MemoryQuotaProvider) (err error) {
|
|
ti.onHeapTableIndex, err = parseTableIndexWithOffsetBuff(ti.indexDataBuff, ti.offset1DataBuff, &noopQuotaProvider{})
|
|
ti.q = q
|
|
return err
|
|
}
|
|
|
|
type notifyFunc func(n uint64, total uint64)
|
|
|
|
var noOpNotify = func(uint64, uint64) {}
|
|
|
|
type mmapStats struct {
|
|
mu sync.Mutex
|
|
totalUsed uint64
|
|
WillMmap notifyFunc
|
|
Mmapped notifyFunc
|
|
UnMapped notifyFunc
|
|
}
|
|
|
|
var GlobalMmapStats = &mmapStats{
|
|
sync.Mutex{},
|
|
0,
|
|
noOpNotify,
|
|
noOpNotify,
|
|
noOpNotify,
|
|
}
|
|
|
|
type mmapWStat struct {
|
|
m mmap.MMap
|
|
used uint64
|
|
}
|
|
|
|
func mmapWithStats(f *os.File, length int, prot, flags int, offset int64) (mmapWStat, error) {
|
|
GlobalMmapStats.mu.Lock()
|
|
defer GlobalMmapStats.mu.Unlock()
|
|
GlobalMmapStats.WillMmap(uint64(length), GlobalMmapStats.totalUsed)
|
|
mmap, err := mmap.MapRegion(f, length, prot, flags, offset)
|
|
if err != nil {
|
|
return mmapWStat{}, err
|
|
}
|
|
GlobalMmapStats.totalUsed += uint64(length)
|
|
GlobalMmapStats.Mmapped(uint64(length), GlobalMmapStats.totalUsed)
|
|
return mmapWStat{mmap, uint64(length)}, nil
|
|
}
|
|
|
|
func (m mmapWStat) Unmap() error {
|
|
GlobalMmapStats.mu.Lock()
|
|
defer GlobalMmapStats.mu.Unlock()
|
|
err := m.m.Unmap()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
GlobalMmapStats.totalUsed -= m.used
|
|
GlobalMmapStats.UnMapped(m.used, GlobalMmapStats.totalUsed)
|
|
return nil
|
|
}
|