benchmarking Dolt against BBolt for batch writes

This commit is contained in:
Andy Arthur
2022-09-08 16:27:45 -07:00
parent 6330677422
commit cb6c67fa37
3 changed files with 206 additions and 0 deletions

View File

@@ -118,6 +118,7 @@ require (
github.com/stretchr/objx v0.2.0 // indirect
github.com/tklauser/numcpus v0.3.0 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
go.etcd.io/bbolt v1.3.6 // indirect
go.opencensus.io v0.22.4 // indirect
go.uber.org/atomic v1.6.0 // indirect
go.uber.org/multierr v1.5.0 // indirect

View File

@@ -757,6 +757,8 @@ github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU=
go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4=
go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg=
go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk=
go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk=
@@ -964,6 +966,7 @@ golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200828194041-157a740278f4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

View File

@@ -0,0 +1,202 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package benchmark
import (
"bytes"
"context"
"math/rand"
"os"
"path/filepath"
"sort"
"testing"
"github.com/dolthub/dolt/go/store/nbs"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/skip"
"github.com/dolthub/dolt/go/store/types"
"github.com/dolthub/dolt/go/store/val"
"github.com/stretchr/testify/require"
"go.etcd.io/bbolt"
)
const (
batch = 1 << 16
sz = 8
)
var (
bucket = []byte("bolt")
)
func BenchmarkImportBBolt(b *testing.B) {
makeWriter := func() writer {
path, err := os.MkdirTemp("", "*")
require.NoError(b, err)
path = filepath.Join(path, "bolt.db")
db, err := bbolt.Open(path, 0666, &bbolt.Options{
// turn off fsync
NoGrowSync: true,
NoFreelistSync: true,
NoSync: true,
})
require.NoError(b, err)
err = db.Update(func(tx *bbolt.Tx) error {
_, err = tx.CreateBucket(bucket)
return err
})
require.NoError(b, err)
return &bboltWriter{
edits: skip.NewSkipList(bytes.Compare),
db: db,
}
}
b.Run("BBolt", func(b *testing.B) {
benchmarkBatchWrite(b, makeWriter())
})
}
func BenchmarkImportDolt(b *testing.B) {
makeWriter := func() writer {
ctx := context.Background()
nbf := types.Format_DOLT
memtable := uint64(256 * 1024 * 1024)
quota := nbs.NewUnlimitedMemQuotaProvider()
path, err := os.MkdirTemp("", "*")
require.NoError(b, err)
cs, err := nbs.NewLocalStore(ctx, nbf.VersionString(), path, memtable, quota)
require.NoError(b, err)
desc := val.NewTupleDescriptor(val.Type{Enc: val.Uint64Enc})
m, err := prolly.NewMapFromTuples(ctx, tree.NewNodeStore(cs), desc, desc)
require.NoError(b, err)
return &doltWriter{mut: m.Mutate()}
}
b.Run("Dolt", func(b *testing.B) {
benchmarkBatchWrite(b, makeWriter())
})
}
type bboltWriter struct {
edits *skip.List
db *bbolt.DB
}
func (wr *bboltWriter) Put(key, value []byte) error {
wr.edits.Put(key, value)
return nil
}
func (wr *bboltWriter) Flush() error {
return wr.db.Update(func(tx *bbolt.Tx) (err error) {
b := tx.Bucket(bucket)
iter := wr.edits.IterAtStart()
for {
k, v := iter.Current()
if k == nil {
break
}
if err = b.Put(k, v); err != nil {
return
}
iter.Advance()
}
return
})
}
type doltWriter struct {
mut prolly.MutableMap
}
func (wr *doltWriter) Put(key, value []byte) error {
return wr.mut.Put(context.Background(), key, value)
}
func (wr *doltWriter) Flush() error {
m, err := wr.mut.Map(context.Background())
if err != nil {
return err
}
wr.mut = m.Mutate()
return nil
}
func benchmarkBatchWrite(b *testing.B, wr writer) {
dp := newDataProvider(batch)
for i := 0; i < b.N; i++ {
k, v := dp.next()
require.NoError(b, wr.Put(k, v))
if dp.empty() {
require.NoError(b, wr.Flush())
dp = newDataProvider(batch)
}
}
}
type writer interface {
Put(key, value []byte) error
Flush() error
}
type dataProvider struct {
buf []byte
}
var _ sort.Interface = &dataProvider{}
func newDataProvider(count int) (dp *dataProvider) {
dp = &dataProvider{buf: make([]byte, count*sz)}
rand.Read(dp.buf)
return
}
func (dp *dataProvider) next() (k, v []byte) {
k, v = dp.buf[:sz], dp.buf[:sz]
dp.buf = dp.buf[sz:]
return
}
func (dp *dataProvider) empty() bool {
return len(dp.buf) == 0
}
func (dp *dataProvider) Len() int {
return len(dp.buf) / sz
}
func (dp *dataProvider) Less(i, j int) bool {
l := dp.buf[i*sz : (i*sz)+sz]
r := dp.buf[j*sz : (j*sz)+sz]
return bytes.Compare(l, r) < 0
}
var swap [sz]byte
func (dp *dataProvider) Swap(i, j int) {
l := dp.buf[i*sz : (i*sz)+sz]
r := dp.buf[j*sz : (j*sz)+sz]
copy(swap[:], l)
copy(l, r)
copy(r, swap[:])
}