mirror of
https://github.com/dolthub/dolt.git
synced 2026-04-20 02:24:58 -05:00
Merge pull request #4617 from dolthub/andy/faster-table-index-lookup
go/store/nbs: Faster prefix lookup for table file indices
This commit is contained in:
@@ -304,7 +304,7 @@ func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool, error) {
|
||||
func (ti onHeapTableIndex) lookupOrdinal(h *addr) (uint32, error) {
|
||||
prefix := h.Prefix()
|
||||
|
||||
for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixAt(idx) == prefix; idx++ {
|
||||
for idx := ti.findPrefix(prefix); idx < ti.chunkCount && ti.prefixAt(idx) == prefix; idx++ {
|
||||
m, err := ti.EntrySuffixMatches(idx, h)
|
||||
if err != nil {
|
||||
return ti.chunkCount, err
|
||||
@@ -317,22 +317,24 @@ func (ti onHeapTableIndex) lookupOrdinal(h *addr) (uint32, error) {
|
||||
return ti.chunkCount, nil
|
||||
}
|
||||
|
||||
// prefixIdx returns the first position in |tr.prefixes| whose value ==
|
||||
// |prefix|. Returns |tr.chunkCount| if absent
|
||||
func (ti onHeapTableIndex) prefixIdx(prefix uint64) (idx uint32) {
|
||||
// findPrefix returns the first position in |tr.prefixes| whose value == |prefix|.
|
||||
// Returns |tr.chunkCount| if absent
|
||||
func (ti onHeapTableIndex) findPrefix(prefix uint64) (idx uint32) {
|
||||
query := make([]byte, addrPrefixSize)
|
||||
binary.BigEndian.PutUint64(query, prefix)
|
||||
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
|
||||
// an extremely tight loop and inlining the code was a significant perf improvement.
|
||||
idx, j := 0, ti.chunkCount
|
||||
for idx < j {
|
||||
h := idx + (j-idx)/2 // avoid overflow when computing h
|
||||
// i ≤ h < j
|
||||
if ti.prefixAt(h) < prefix {
|
||||
o := int64(prefixTupleSize * h)
|
||||
if bytes.Compare(ti.tupleB[o:o+addrPrefixSize], query) < 0 {
|
||||
idx = h + 1 // preserves f(i-1) == false
|
||||
} else {
|
||||
j = h // preserves f(j) == true
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -475,7 +477,7 @@ func (ti onHeapTableIndex) ResolveShortHash(short []byte) ([]string, error) {
|
||||
sPrefix := ti.padStringAndDecode(shortHash, "0")
|
||||
|
||||
// Binary Search for prefix
|
||||
pIdxL = ti.prefixIdx(sPrefix)
|
||||
pIdxL = ti.findPrefix(sPrefix)
|
||||
|
||||
// Prefix doesn't exist
|
||||
if pIdxL == ti.chunkCount {
|
||||
|
||||
@@ -50,6 +50,53 @@ func TestParseTableIndex(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFindPrefix(b *testing.B) {
|
||||
f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")
|
||||
require.NoError(b, err)
|
||||
defer f.Close()
|
||||
bs, err := io.ReadAll(f)
|
||||
require.NoError(b, err)
|
||||
idx, err := parseTableIndexByCopy(bs, &noopQuotaProvider{})
|
||||
require.NoError(b, err)
|
||||
defer idx.Close()
|
||||
assert.Equal(b, uint32(596), idx.ChunkCount())
|
||||
|
||||
prefixes, err := idx.Prefixes()
|
||||
require.NoError(b, err)
|
||||
|
||||
b.Run("benchmark prefixIdx()", func(b *testing.B) {
|
||||
var ord uint32
|
||||
for i := 0; i < b.N; i++ {
|
||||
ord = prefixIdx(idx, prefixes[uint(i)&uint(512)])
|
||||
}
|
||||
assert.True(b, ord < 596)
|
||||
})
|
||||
b.Run("benchmark findPrefix", func(b *testing.B) {
|
||||
var ord uint32
|
||||
for i := 0; i < b.N; i++ {
|
||||
ord = idx.findPrefix(prefixes[uint(i)&uint(512)])
|
||||
}
|
||||
assert.True(b, ord < 596)
|
||||
})
|
||||
}
|
||||
|
||||
// previous implementation for findIndex().
|
||||
func prefixIdx(ti onHeapTableIndex, prefix uint64) (idx uint32) {
|
||||
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
|
||||
// an extremely tight loop and inlining the code was a significant perf improvement.
|
||||
idx, j := 0, ti.chunkCount
|
||||
for idx < j {
|
||||
h := idx + (j-idx)/2 // avoid overflow when computing h
|
||||
// i ≤ h < j
|
||||
if ti.prefixAt(h) < prefix {
|
||||
idx = h + 1 // preserves f(i-1) == false
|
||||
} else {
|
||||
j = h // preserves f(j) == true
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func TestMMapIndex(t *testing.T) {
|
||||
f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")
|
||||
require.NoError(t, err)
|
||||
|
||||
Reference in New Issue
Block a user