mirror of
https://github.com/dolthub/dolt.git
synced 2026-02-10 03:09:35 -06:00
Factor tableIndex out of tableReader (#2950)
Factor tableIndex out of tableReader
This commit is contained in:
@@ -81,14 +81,14 @@ func TestMemTableWrite(t *testing.T) {
|
||||
|
||||
td1, _ := buildTable(chunks[1:2])
|
||||
td2, _ := buildTable(chunks[2:])
|
||||
tr1, tr2 := newTableReader(td1, bytes.NewReader(td1), fileReadAmpThresh), newTableReader(td2, bytes.NewReader(td2), fileReadAmpThresh)
|
||||
tr1, tr2 := newTableReader(parseTableIndex(td1), bytes.NewReader(td1), fileReadAmpThresh), newTableReader(parseTableIndex(td2), bytes.NewReader(td2), fileReadAmpThresh)
|
||||
assert.True(tr1.has(computeAddr(chunks[1])))
|
||||
assert.True(tr2.has(computeAddr(chunks[2])))
|
||||
|
||||
_, data, count := mt.write(chunkReaderGroup{tr1, tr2})
|
||||
assert.Equal(uint32(1), count)
|
||||
|
||||
outReader := newTableReader(data, bytes.NewReader(data), fileReadAmpThresh)
|
||||
outReader := newTableReader(parseTableIndex(data), bytes.NewReader(data), fileReadAmpThresh)
|
||||
assert.True(outReader.has(computeAddr(chunks[0])))
|
||||
assert.False(outReader.has(computeAddr(chunks[1])))
|
||||
assert.False(outReader.has(computeAddr(chunks[2])))
|
||||
|
||||
@@ -57,7 +57,8 @@ func newMmapTableReader(dir string, h addr, chunkCount uint32) chunkSource {
|
||||
d.PanicIfError(err)
|
||||
success = true
|
||||
|
||||
source := &mmapTableReader{newTableReader(buff[indexOffset-aligned:], f, fileReadAmpThresh), f, buff, h}
|
||||
index := parseTableIndex(buff[indexOffset-aligned:])
|
||||
source := &mmapTableReader{newTableReader(index, f, fileReadAmpThresh), f, buff, h}
|
||||
|
||||
d.PanicIfFalse(chunkCount == source.count())
|
||||
return source
|
||||
|
||||
@@ -199,7 +199,7 @@ func (ftp fakeTablePersister) Compact(mt *memTable, haver chunkReader) chunkSour
|
||||
if mt.count() > 0 {
|
||||
var data []byte
|
||||
name, data, _ := mt.write(haver)
|
||||
ftp.sources[name] = newTableReader(data, bytes.NewReader(data), fileReadAmpThresh)
|
||||
ftp.sources[name] = newTableReader(parseTableIndex(data), bytes.NewReader(data), fileReadAmpThresh)
|
||||
return chunkSourceAdapter{ftp.sources[name], name}
|
||||
}
|
||||
return emptyChunkSource{}
|
||||
|
||||
@@ -55,7 +55,7 @@ func (m *fakeS3) readerForTable(name addr) chunkReader {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if buff, present := m.data[name.String()]; present {
|
||||
return newTableReader(buff, bytes.NewReader(buff), s3ReadAmpThresh)
|
||||
return newTableReader(parseTableIndex(buff), bytes.NewReader(buff), s3ReadAmpThresh)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -43,8 +43,9 @@ func newS3TableReader(s3 s3svc, bucket string, h addr, chunkCount uint32) chunkS
|
||||
n, err := source.readRange(buff, fmt.Sprintf("%s=-%d", s3RangePrefix, size))
|
||||
d.PanicIfError(err)
|
||||
d.PanicIfFalse(size == uint64(n))
|
||||
index := parseTableIndex(buff)
|
||||
|
||||
source.tableReader = newTableReader(buff, source, s3ReadAmpThresh)
|
||||
source.tableReader = newTableReader(index, source, s3ReadAmpThresh)
|
||||
d.PanicIfFalse(chunkCount == source.count())
|
||||
return source
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ func (s3p s3TablePersister) Compact(mt *memTable, haver chunkReader) chunkSource
|
||||
})
|
||||
d.Chk.NoError(err)
|
||||
s3tr := &s3TableReader{s3: s3p.s3, bucket: s3p.bucket, h: name}
|
||||
s3tr.tableReader = newTableReader(data, s3tr, s3ReadAmpThresh)
|
||||
s3tr.tableReader = newTableReader(parseTableIndex(data), s3tr, s3ReadAmpThresh)
|
||||
return s3tr
|
||||
}
|
||||
return emptyChunkSource{}
|
||||
|
||||
@@ -14,20 +14,22 @@ import (
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
// tableReader implements get & has queries against a single nbs table. goroutine safe.
|
||||
type tableReader struct {
|
||||
r io.ReaderAt
|
||||
suffixes []byte
|
||||
type tableIndex struct {
|
||||
chunkCount uint32
|
||||
prefixes, offsets []uint64
|
||||
lengths, ordinals []uint32
|
||||
chunkCount uint32
|
||||
readAmpThresh uint64
|
||||
suffixes []byte
|
||||
}
|
||||
|
||||
// newTableReader parses a valid nbs table byte stream and returns a reader. buff must end with an NBS index and footer, though it may contain an unspecified number of bytes before that data. r should allow retrieving any desired range of bytes from the table.
|
||||
func newTableReader(buff []byte, r io.ReaderAt, readAmpThresh uint64) tableReader {
|
||||
tr := tableReader{r: r, readAmpThresh: readAmpThresh}
|
||||
// tableReader implements get & has queries against a single nbs table. goroutine safe.
|
||||
type tableReader struct {
|
||||
tableIndex
|
||||
r io.ReaderAt
|
||||
readAmpThresh uint64
|
||||
}
|
||||
|
||||
// parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index and footer, though it may contain an unspecified number of bytes before that data. |tableIndex| doesn't keep alive any references to |buff|.
|
||||
func parseTableIndex(buff []byte) tableIndex {
|
||||
pos := uint64(len(buff))
|
||||
|
||||
// footer
|
||||
@@ -38,21 +40,28 @@ func newTableReader(buff []byte, r io.ReaderAt, readAmpThresh uint64) tableReade
|
||||
pos -= uint64Size
|
||||
|
||||
pos -= uint32Size
|
||||
tr.chunkCount = binary.BigEndian.Uint32(buff[pos:])
|
||||
chunkCount := binary.BigEndian.Uint32(buff[pos:])
|
||||
|
||||
// index
|
||||
suffixesSize := uint64(tr.chunkCount) * addrSuffixSize
|
||||
suffixesSize := uint64(chunkCount) * addrSuffixSize
|
||||
pos -= suffixesSize
|
||||
tr.suffixes = buff[pos : pos+suffixesSize]
|
||||
suffixes := make([]byte, suffixesSize)
|
||||
copy(suffixes, buff[pos:])
|
||||
|
||||
lengthsSize := uint64(tr.chunkCount) * lengthSize
|
||||
lengthsSize := uint64(chunkCount) * lengthSize
|
||||
pos -= lengthsSize
|
||||
tr.lengths, tr.offsets = computeOffsets(tr.chunkCount, buff[pos:pos+lengthsSize])
|
||||
lengths, offsets := computeOffsets(chunkCount, buff[pos:pos+lengthsSize])
|
||||
|
||||
tuplesSize := uint64(tr.chunkCount) * prefixTupleSize
|
||||
tuplesSize := uint64(chunkCount) * prefixTupleSize
|
||||
pos -= tuplesSize
|
||||
tr.prefixes, tr.ordinals = computePrefixes(tr.chunkCount, buff[pos:pos+tuplesSize])
|
||||
return tr
|
||||
prefixes, ordinals := computePrefixes(chunkCount, buff[pos:pos+tuplesSize])
|
||||
|
||||
return tableIndex{
|
||||
chunkCount,
|
||||
prefixes, offsets,
|
||||
lengths, ordinals,
|
||||
suffixes,
|
||||
}
|
||||
}
|
||||
|
||||
func computeOffsets(count uint32, buff []byte) (lengths []uint32, offsets []uint64) {
|
||||
@@ -80,6 +89,54 @@ func computePrefixes(count uint32, buff []byte) (prefixes []uint64, ordinals []u
|
||||
return
|
||||
}
|
||||
|
||||
func (ti tableIndex) prefixIdxToOrdinal(idx uint32) uint32 {
|
||||
return ti.ordinals[idx]
|
||||
}
|
||||
|
||||
// returns the first position in |tr.prefixes| whose value == |prefix|. Returns |tr.chunkCount|
|
||||
// if absent
|
||||
func (ti tableIndex) prefixIdx(prefix uint64) (idx uint32) {
|
||||
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
|
||||
// an extremely tight loop and inlining the code was a significant perf improvement.
|
||||
idx, j := 0, ti.chunkCount
|
||||
for idx < j {
|
||||
h := idx + (j-idx)/2 // avoid overflow when computing h
|
||||
// i ≤ h < j
|
||||
if ti.prefixes[h] < prefix {
|
||||
idx = h + 1 // preserves f(i-1) == false
|
||||
} else {
|
||||
j = h // preserves f(j) == true
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Return true IFF the suffix at insertion order |ordinal| matches the address |a|.
|
||||
func (ti tableIndex) ordinalSuffixMatches(ordinal uint32, h addr) bool {
|
||||
li := uint64(ordinal) * addrSuffixSize
|
||||
return bytes.Compare(h[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize]) == 0
|
||||
}
|
||||
|
||||
// returns the ordinal of |h| if present. returns |ti.chunkCount| if absent
|
||||
func (ti tableIndex) lookupOrdinal(h addr) uint32 {
|
||||
prefix := h.Prefix()
|
||||
|
||||
for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixes[idx] == prefix; idx++ {
|
||||
ordinal := ti.prefixIdxToOrdinal(idx)
|
||||
if ti.ordinalSuffixMatches(ordinal, h) {
|
||||
return ordinal
|
||||
}
|
||||
}
|
||||
|
||||
return ti.chunkCount
|
||||
}
|
||||
|
||||
// newTableReader parses a valid nbs table byte stream and returns a reader. buff must end with an NBS index and footer, though it may contain an unspecified number of bytes before that data. r should allow retrieving any desired range of bytes from the table.
|
||||
func newTableReader(index tableIndex, r io.ReaderAt, readAmpThresh uint64) tableReader {
|
||||
return tableReader{index, r, readAmpThresh}
|
||||
}
|
||||
|
||||
// Scan across (logically) two ordered slices of address prefixes.
|
||||
func (tr tableReader) hasMany(addrs []hasRecord) (remaining bool) {
|
||||
// TODO: Use findInIndex if (tr.chunkCount - len(addrs)*Log2(tr.chunkCount)) > (tr.chunkCount - len(addrs))
|
||||
@@ -122,77 +179,33 @@ func (tr tableReader) hasMany(addrs []hasRecord) (remaining bool) {
|
||||
return
|
||||
}
|
||||
|
||||
func (tr tableReader) prefixIdxToOrdinal(idx uint32) uint32 {
|
||||
return tr.ordinals[idx]
|
||||
}
|
||||
|
||||
// returns the first position in |tr.prefixes| whose value == |prefix|. Returns |tr.chunkCount|
|
||||
// if absent
|
||||
func (tr tableReader) prefixIdx(prefix uint64) (idx uint32) {
|
||||
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
|
||||
// an extremely tight loop and inlining the code was a significant perf improvement.
|
||||
idx, j := 0, tr.chunkCount
|
||||
for idx < j {
|
||||
h := idx + (j-idx)/2 // avoid overflow when computing h
|
||||
// i ≤ h < j
|
||||
if tr.prefixes[h] < prefix {
|
||||
idx = h + 1 // preserves f(i-1) == false
|
||||
} else {
|
||||
j = h // preserves f(j) == true
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (tr tableReader) count() uint32 {
|
||||
return tr.chunkCount
|
||||
}
|
||||
|
||||
// returns true iff |h| can be found in this table.
|
||||
func (tr tableReader) has(h addr) bool {
|
||||
prefix := h.Prefix()
|
||||
idx := tr.prefixIdx(prefix)
|
||||
|
||||
for ; idx < tr.chunkCount && tr.prefixes[idx] == prefix; idx++ {
|
||||
if tr.ordinalSuffixMatches(tr.prefixIdxToOrdinal(idx), h) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Return true IFF the suffix at insertion order |ordinal| matches the address |a|.
|
||||
func (tr tableReader) ordinalSuffixMatches(ordinal uint32, a addr) bool {
|
||||
li := uint64(ordinal) * addrSuffixSize
|
||||
return bytes.Compare(a[addrPrefixSize:], tr.suffixes[li:li+addrSuffixSize]) == 0
|
||||
ordinal := tr.lookupOrdinal(h)
|
||||
return ordinal < tr.count()
|
||||
}
|
||||
|
||||
// returns the storage associated with |h|, iff present. Returns nil if absent. On success,
|
||||
// the returned byte slice directly references the underlying storage.
|
||||
func (tr tableReader) get(h addr) (data []byte) {
|
||||
prefix := h.Prefix()
|
||||
idx := tr.prefixIdx(prefix)
|
||||
|
||||
for ; idx < tr.chunkCount && tr.prefixes[idx] == prefix; idx++ {
|
||||
ordinal := tr.prefixIdxToOrdinal(idx)
|
||||
if !tr.ordinalSuffixMatches(ordinal, h) {
|
||||
continue
|
||||
}
|
||||
|
||||
offset := tr.offsets[ordinal]
|
||||
length := uint64(tr.lengths[ordinal])
|
||||
buff := make([]byte, length) // TODO: Avoid this allocation for every get
|
||||
n, err := tr.r.ReadAt(buff, int64(offset))
|
||||
d.Chk.NoError(err)
|
||||
d.Chk.True(n == int(length))
|
||||
data = tr.parseChunk(h, buff)
|
||||
if data != nil {
|
||||
break
|
||||
}
|
||||
ordinal := tr.lookupOrdinal(h)
|
||||
if ordinal == tr.count() {
|
||||
return
|
||||
}
|
||||
|
||||
offset := tr.offsets[ordinal]
|
||||
length := uint64(tr.lengths[ordinal])
|
||||
buff := make([]byte, length) // TODO: Avoid this allocation for every get
|
||||
n, err := tr.r.ReadAt(buff, int64(offset))
|
||||
d.Chk.NoError(err)
|
||||
d.Chk.True(n == int(length))
|
||||
data = tr.parseChunk(h, buff)
|
||||
d.Chk.True(data != nil)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -185,7 +185,7 @@ func TestFSTablePersisterCompact(t *testing.T) {
|
||||
if assert.True(src.count() > 0) {
|
||||
buff, err := ioutil.ReadFile(filepath.Join(dir, src.hash().String()))
|
||||
assert.NoError(err)
|
||||
tr := newTableReader(buff, bytes.NewReader(buff), fileReadAmpThresh)
|
||||
tr := newTableReader(parseTableIndex(buff), bytes.NewReader(buff), fileReadAmpThresh)
|
||||
for _, c := range testChunks {
|
||||
assert.True(tr.has(computeAddr(c)))
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ func TestSimple(t *testing.T) {
|
||||
}
|
||||
|
||||
tableData, _ := buildTable(chunks)
|
||||
tr := newTableReader(tableData, bytes.NewReader(tableData), fileReadAmpThresh)
|
||||
tr := newTableReader(parseTableIndex(tableData), bytes.NewReader(tableData), fileReadAmpThresh)
|
||||
|
||||
assertChunksInReader(chunks, tr, assert)
|
||||
|
||||
@@ -89,7 +89,7 @@ func TestHasMany(t *testing.T) {
|
||||
}
|
||||
|
||||
tableData, _ := buildTable(chunks)
|
||||
tr := newTableReader(tableData, bytes.NewReader(tableData), fileReadAmpThresh)
|
||||
tr := newTableReader(parseTableIndex(tableData), bytes.NewReader(tableData), fileReadAmpThresh)
|
||||
|
||||
addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
|
||||
hasAddrs := []hasRecord{
|
||||
@@ -135,7 +135,7 @@ func TestHasManySequentialPrefix(t *testing.T) {
|
||||
length, _ := tw.finish()
|
||||
buff = buff[:length]
|
||||
|
||||
tr := newTableReader(buff, bytes.NewReader(buff), fileReadAmpThresh)
|
||||
tr := newTableReader(parseTableIndex(buff), bytes.NewReader(buff), fileReadAmpThresh)
|
||||
|
||||
hasAddrs := make([]hasRecord, 2)
|
||||
// Leave out the first address
|
||||
@@ -159,7 +159,7 @@ func TestGetMany(t *testing.T) {
|
||||
}
|
||||
|
||||
tableData, _ := buildTable(chunks)
|
||||
tr := newTableReader(tableData, bytes.NewReader(tableData), fileReadAmpThresh)
|
||||
tr := newTableReader(parseTableIndex(tableData), bytes.NewReader(tableData), fileReadAmpThresh)
|
||||
|
||||
addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
|
||||
getBatch := []getRecord{
|
||||
@@ -190,7 +190,7 @@ func Test65k(t *testing.T) {
|
||||
}
|
||||
|
||||
tableData, _ := buildTable(chunks)
|
||||
tr := newTableReader(tableData, bytes.NewReader(tableData), fileReadAmpThresh)
|
||||
tr := newTableReader(parseTableIndex(tableData), bytes.NewReader(tableData), fileReadAmpThresh)
|
||||
|
||||
for i := 0; i < count; i++ {
|
||||
data := dataFn(i)
|
||||
@@ -235,7 +235,7 @@ func doTestNGetMany(t *testing.T, count int) {
|
||||
}
|
||||
|
||||
tableData, _ := buildTable(chunks)
|
||||
tr := newTableReader(tableData, bytes.NewReader(tableData), fileReadAmpThresh)
|
||||
tr := newTableReader(parseTableIndex(tableData), bytes.NewReader(tableData), fileReadAmpThresh)
|
||||
|
||||
getBatch := make([]getRecord, len(chunks))
|
||||
for i := 0; i < count; i++ {
|
||||
|
||||
Reference in New Issue
Block a user