Files
dolt/go/store/nbs/table_test.go
Aaron Son 7406c4658a go/store/nbs: Fix some quota leaks in conjoin, GC.
Adds a paranoid mode where we noisely detect unclosed table files. The mode can
be enabled by setting an environment variable.

Fixes some unit tests, including all of go/store/... to run cleanly under the
paranoid mode.

Changes the quota interface to:
* Release |sz int| bytes instead of requiring a []byte with the correct length
  to show up.
* Work with |int| instead of |uint64|, since MaxUint64 is never allocatable and
  MaxInt32+z is only allocatable on 64-bit platforms.
* Not return an error on Release(). Implementations should not fail to release
  quota.
2023-02-16 16:01:20 -08:00

497 lines
13 KiB
Go

// Copyright 2019 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file incorporates work covered by the following copyright and
// permission notice:
//
// Copyright 2016 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package nbs
import (
"context"
"encoding/binary"
"fmt"
"math/rand"
"sort"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/sync/errgroup"
"github.com/dolthub/dolt/go/store/chunks"
"github.com/dolthub/dolt/go/store/hash"
)
func buildTable(chunks [][]byte) ([]byte, addr, error) {
totalData := uint64(0)
for _, chunk := range chunks {
totalData += uint64(len(chunk))
}
capacity := maxTableSize(uint64(len(chunks)), totalData)
buff := make([]byte, capacity)
tw := newTableWriter(buff, nil)
for _, chunk := range chunks {
tw.addChunk(computeAddr(chunk), chunk)
}
length, blockHash, err := tw.finish()
if err != nil {
return nil, addr{}, err
}
return buff[:length], blockHash, nil
}
func mustGetString(assert *assert.Assertions, ctx context.Context, tr tableReader, data []byte) string {
bytes, err := tr.get(ctx, computeAddr(data), &Stats{})
assert.NoError(err)
return string(bytes)
}
func TestSimple(t *testing.T) {
ctx := context.Background()
assert := assert.New(t)
chunks := [][]byte{
[]byte("hello2"),
[]byte("goodbye2"),
[]byte("badbye2"),
}
tableData, _, err := buildTable(chunks)
require.NoError(t, err)
ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{})
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
defer tr.close()
assertChunksInReader(chunks, tr, assert)
assert.Equal(string(chunks[0]), mustGetString(assert, context.Background(), tr, chunks[0]))
assert.Equal(string(chunks[1]), mustGetString(assert, context.Background(), tr, chunks[1]))
assert.Equal(string(chunks[2]), mustGetString(assert, context.Background(), tr, chunks[2]))
notPresent := [][]byte{
[]byte("yo"),
[]byte("do"),
[]byte("so much to do"),
}
assertChunksNotInReader(notPresent, tr, assert)
assert.NotEqual(string(notPresent[0]), mustGetString(assert, context.Background(), tr, notPresent[0]))
assert.NotEqual(string(notPresent[1]), mustGetString(assert, context.Background(), tr, notPresent[1]))
assert.NotEqual(string(notPresent[2]), mustGetString(assert, context.Background(), tr, notPresent[2]))
}
func assertChunksInReader(chunks [][]byte, r chunkReader, assert *assert.Assertions) {
for _, c := range chunks {
assert.True(r.has(computeAddr(c)))
}
}
func assertChunksNotInReader(chunks [][]byte, r chunkReader, assert *assert.Assertions) {
for _, c := range chunks {
assert.False(r.has(computeAddr(c)))
}
}
func TestHasMany(t *testing.T) {
ctx := context.Background()
assert := assert.New(t)
chunks := [][]byte{
[]byte("hello2"),
[]byte("goodbye2"),
[]byte("badbye2"),
}
tableData, _, err := buildTable(chunks)
require.NoError(t, err)
ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{})
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
defer tr.close()
addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
hasAddrs := []hasRecord{
{&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), 0, false},
{&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), 1, false},
{&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), 2, false},
}
sort.Sort(hasRecordByPrefix(hasAddrs))
_, err = tr.hasMany(hasAddrs)
require.NoError(t, err)
for _, ha := range hasAddrs {
assert.True(ha.has, "Nothing for prefix %d", ha.prefix)
}
}
func TestHasManySequentialPrefix(t *testing.T) {
ctx := context.Background()
assert := assert.New(t)
// Use bogus addrs so we can generate the case of sequentially non-unique prefixes in the index
// Note that these are already sorted
addrStrings := []string{
"0rfgadopg6h3fk7d253ivbjsij4qo3nv",
"0rfgadopg6h3fk7d253ivbjsij4qo4nv",
"0rfgadopg6h3fk7d253ivbjsij4qo9nv",
}
addrs := make([]addr, len(addrStrings))
for i, s := range addrStrings {
addrs[i] = addr(hash.Parse(s))
}
bogusData := []byte("bogus") // doesn't matter what this is. hasMany() won't check chunkRecords
totalData := uint64(len(bogusData) * len(addrs))
capacity := maxTableSize(uint64(len(addrs)), totalData)
buff := make([]byte, capacity)
tw := newTableWriter(buff, nil)
for _, a := range addrs {
tw.addChunk(a, bogusData)
}
length, _, err := tw.finish()
require.NoError(t, err)
buff = buff[:length]
ti, err := parseTableIndexByCopy(ctx, buff, &UnlimitedQuotaProvider{})
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
require.NoError(t, err)
defer tr.close()
hasAddrs := make([]hasRecord, 2)
// Leave out the first address
hasAddrs[0] = hasRecord{&addrs[1], addrs[1].Prefix(), 1, false}
hasAddrs[1] = hasRecord{&addrs[2], addrs[2].Prefix(), 2, false}
_, err = tr.hasMany(hasAddrs)
require.NoError(t, err)
for _, ha := range hasAddrs {
assert.True(ha.has, fmt.Sprintf("Nothing for prefix %x\n", ha.prefix))
}
}
func BenchmarkHasMany(b *testing.B) {
const cnt = 64 * 1024
chnks := make([][]byte, cnt)
addrs := make(addrSlice, cnt)
hrecs := make([]hasRecord, cnt)
sparse := make([]hasRecord, cnt/1024)
data := make([]byte, cnt*16)
rand.Read(data)
for i := range chnks {
chnks[i] = data[i*16 : (i+1)*16]
}
for i := range addrs {
addrs[i] = computeAddr(chnks[i])
}
for i := range hrecs {
hrecs[i] = hasRecord{
a: &addrs[i],
prefix: prefixOf(addrs[i]),
order: i,
}
}
for i := range sparse {
j := i * 64
hrecs[i] = hasRecord{
a: &addrs[j],
prefix: prefixOf(addrs[j]),
order: j,
}
}
sort.Sort(hasRecordByPrefix(hrecs))
sort.Sort(hasRecordByPrefix(sparse))
ctx := context.Background()
tableData, _, err := buildTable(chnks)
require.NoError(b, err)
ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{})
require.NoError(b, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(b, err)
defer tr.close()
b.ResetTimer()
b.Run("dense has many", func(b *testing.B) {
var ok bool
for i := 0; i < b.N; i++ {
ok, err = tr.hasMany(hrecs)
}
assert.False(b, ok)
assert.NoError(b, err)
})
b.Run("sparse has many", func(b *testing.B) {
var ok bool
for i := 0; i < b.N; i++ {
ok, err = tr.hasMany(sparse)
}
assert.True(b, ok)
assert.NoError(b, err)
})
}
func prefixOf(a addr) uint64 {
return binary.BigEndian.Uint64(a[:addrPrefixSize])
}
func TestGetMany(t *testing.T) {
ctx := context.Background()
assert := assert.New(t)
data := [][]byte{
[]byte("hello2"),
[]byte("goodbye2"),
[]byte("badbye2"),
}
tableData, _, err := buildTable(data)
require.NoError(t, err)
ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{})
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
defer tr.close()
addrs := addrSlice{computeAddr(data[0]), computeAddr(data[1]), computeAddr(data[2])}
getBatch := []getRecord{
{&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), false},
{&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), false},
{&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), false},
}
sort.Sort(getRecordByPrefix(getBatch))
eg, ctx := errgroup.WithContext(context.Background())
got := make([]*chunks.Chunk, 0)
_, err = tr.getMany(ctx, eg, getBatch, func(ctx context.Context, c *chunks.Chunk) { got = append(got, c) }, &Stats{})
require.NoError(t, err)
require.NoError(t, eg.Wait())
assert.True(len(got) == len(getBatch))
}
func TestCalcReads(t *testing.T) {
ctx := context.Background()
assert := assert.New(t)
chunks := [][]byte{
[]byte("hello2"),
[]byte("goodbye2"),
[]byte("badbye2"),
}
tableData, _, err := buildTable(chunks)
require.NoError(t, err)
ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{})
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), 0)
require.NoError(t, err)
defer tr.close()
addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
getBatch := []getRecord{
{&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), false},
{&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), false},
{&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), false},
}
gb2 := []getRecord{getBatch[0], getBatch[2]}
sort.Sort(getRecordByPrefix(getBatch))
reads, remaining, err := tr.calcReads(getBatch, 0)
require.NoError(t, err)
assert.False(remaining)
assert.Equal(1, reads)
sort.Sort(getRecordByPrefix(gb2))
reads, remaining, err = tr.calcReads(gb2, 0)
require.NoError(t, err)
assert.False(remaining)
assert.Equal(2, reads)
}
func TestExtract(t *testing.T) {
ctx := context.Background()
assert := assert.New(t)
chunks := [][]byte{
[]byte("hello2"),
[]byte("goodbye2"),
[]byte("badbye2"),
}
tableData, _, err := buildTable(chunks)
require.NoError(t, err)
ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{})
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
defer tr.close()
addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
chunkChan := make(chan extractRecord)
go func() {
err := tr.extract(context.Background(), chunkChan)
require.NoError(t, err)
close(chunkChan)
}()
i := 0
for rec := range chunkChan {
assert.NotNil(rec.data, "Nothing for", addrs[i])
assert.Equal(addrs[i], rec.a)
assert.Equal(chunks[i], rec.data)
i++
}
}
func Test65k(t *testing.T) {
ctx := context.Background()
assert := assert.New(t)
count := 1 << 16
chunks := make([][]byte, count)
dataFn := func(i int) []byte {
return []byte(fmt.Sprintf("data%d", i*2))
}
for i := 0; i < count; i++ {
chunks[i] = dataFn(i)
}
tableData, _, err := buildTable(chunks)
require.NoError(t, err)
ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{})
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
defer tr.close()
for i := 0; i < count; i++ {
data := dataFn(i)
h := computeAddr(data)
assert.True(tr.has(computeAddr(data)))
bytes, err := tr.get(context.Background(), h, &Stats{})
require.NoError(t, err)
assert.Equal(string(data), string(bytes))
}
for i := count; i < count*2; i++ {
data := dataFn(i)
h := computeAddr(data)
assert.False(tr.has(computeAddr(data)))
bytes, err := tr.get(context.Background(), h, &Stats{})
require.NoError(t, err)
assert.NotEqual(string(data), string(bytes))
}
}
// Ensure all addresses share the first 7 bytes. Useful for easily generating tests which have
// "prefix" collisions.
func computeAddrCommonPrefix(data []byte) addr {
a := computeAddrDefault(data)
a[0] = 0x01
a[1] = 0x23
a[2] = 0x45
a[3] = 0x67
a[4] = 0x89
a[5] = 0xab
a[6] = 0xcd
return a
}
func doTestNGetMany(t *testing.T, count int) {
ctx := context.Background()
assert := assert.New(t)
data := make([][]byte, count)
dataFn := func(i int) []byte {
return []byte(fmt.Sprintf("data%d", i*2))
}
for i := 0; i < count; i++ {
data[i] = dataFn(i)
}
tableData, _, err := buildTable(data)
require.NoError(t, err)
ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{})
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
defer tr.close()
getBatch := make([]getRecord, len(data))
for i := 0; i < count; i++ {
a := computeAddr(dataFn(i))
getBatch[i] = getRecord{&a, a.Prefix(), false}
}
sort.Sort(getRecordByPrefix(getBatch))
eg, ctx := errgroup.WithContext(context.Background())
got := make([]*chunks.Chunk, 0)
_, err = tr.getMany(ctx, eg, getBatch, func(ctx context.Context, c *chunks.Chunk) { got = append(got, c) }, &Stats{})
require.NoError(t, err)
require.NoError(t, eg.Wait())
assert.True(len(got) == len(getBatch))
}
func Test65kGetMany(t *testing.T) {
doTestNGetMany(t, 1<<16)
}
func Test2kGetManyCommonPrefix(t *testing.T) {
computeAddr = computeAddrCommonPrefix
defer func() {
computeAddr = computeAddrDefault
}()
doTestNGetMany(t, 1<<11)
}
func TestEmpty(t *testing.T) {
assert := assert.New(t)
buff := make([]byte, footerSize)
tw := newTableWriter(buff, nil)
length, _, err := tw.finish()
require.NoError(t, err)
assert.True(length == footerSize)
}