From cb6c67fa3752b0d0fae06d60e046f9675574d6e8 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Thu, 8 Sep 2022 16:27:45 -0700 Subject: [PATCH] benchmarking Dolt against BBolt for batch writes --- go/go.mod | 1 + go/go.sum | 3 + .../benchmark/benchmark_batch_writes_test.go | 202 ++++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 go/store/prolly/benchmark/benchmark_batch_writes_test.go diff --git a/go/go.mod b/go/go.mod index 0ed27ec8a4..7eab491a19 100644 --- a/go/go.mod +++ b/go/go.mod @@ -118,6 +118,7 @@ require ( github.com/stretchr/objx v0.2.0 // indirect github.com/tklauser/numcpus v0.3.0 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect + go.etcd.io/bbolt v1.3.6 // indirect go.opencensus.io v0.22.4 // indirect go.uber.org/atomic v1.6.0 // indirect go.uber.org/multierr v1.5.0 // indirect diff --git a/go/go.sum b/go/go.sum index 3e3b62a9e1..4ade46ba91 100644 --- a/go/go.sum +++ b/go/go.sum @@ -757,6 +757,8 @@ github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= +go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= +go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= @@ -964,6 +966,7 @@ golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200828194041-157a740278f4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/go/store/prolly/benchmark/benchmark_batch_writes_test.go b/go/store/prolly/benchmark/benchmark_batch_writes_test.go new file mode 100644 index 0000000000..8ee8f93c6d --- /dev/null +++ b/go/store/prolly/benchmark/benchmark_batch_writes_test.go @@ -0,0 +1,202 @@ +// Copyright 2021 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package benchmark + +import ( + "bytes" + "context" + "math/rand" + "os" + "path/filepath" + "sort" + "testing" + + "github.com/dolthub/dolt/go/store/nbs" + "github.com/dolthub/dolt/go/store/prolly" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/dolt/go/store/skip" + "github.com/dolthub/dolt/go/store/types" + "github.com/dolthub/dolt/go/store/val" + + "github.com/stretchr/testify/require" + "go.etcd.io/bbolt" +) + +const ( + batch = 1 << 16 + sz = 8 +) + +var ( + bucket = []byte("bolt") +) + +func BenchmarkImportBBolt(b *testing.B) { + makeWriter := func() writer { + path, err := os.MkdirTemp("", "*") + require.NoError(b, err) + path = filepath.Join(path, "bolt.db") + + db, err := bbolt.Open(path, 0666, &bbolt.Options{ + // turn off fsync + NoGrowSync: true, + NoFreelistSync: true, + NoSync: true, + }) + require.NoError(b, err) + + err = db.Update(func(tx *bbolt.Tx) error { + _, err = tx.CreateBucket(bucket) + return err + }) + require.NoError(b, err) + return &bboltWriter{ + edits: skip.NewSkipList(bytes.Compare), + db: db, + } + } + + b.Run("BBolt", func(b *testing.B) { + benchmarkBatchWrite(b, makeWriter()) + }) +} + +func BenchmarkImportDolt(b *testing.B) { + makeWriter := func() writer { + ctx := context.Background() + nbf := types.Format_DOLT + memtable := uint64(256 * 1024 * 1024) + quota := nbs.NewUnlimitedMemQuotaProvider() + + path, err := os.MkdirTemp("", "*") + require.NoError(b, err) + + cs, err := nbs.NewLocalStore(ctx, nbf.VersionString(), path, memtable, quota) + require.NoError(b, err) + + desc := val.NewTupleDescriptor(val.Type{Enc: val.Uint64Enc}) + m, err := prolly.NewMapFromTuples(ctx, tree.NewNodeStore(cs), desc, desc) + require.NoError(b, err) + return &doltWriter{mut: m.Mutate()} + } + + b.Run("Dolt", func(b *testing.B) { + benchmarkBatchWrite(b, makeWriter()) + }) +} + +type bboltWriter struct { + edits *skip.List + db *bbolt.DB +} + +func (wr *bboltWriter) Put(key, value []byte) error { + wr.edits.Put(key, value) + return nil +} + +func (wr *bboltWriter) Flush() error { + return wr.db.Update(func(tx *bbolt.Tx) (err error) { + b := tx.Bucket(bucket) + iter := wr.edits.IterAtStart() + for { + k, v := iter.Current() + if k == nil { + break + } + if err = b.Put(k, v); err != nil { + return + } + iter.Advance() + } + return + }) +} + +type doltWriter struct { + mut prolly.MutableMap +} + +func (wr *doltWriter) Put(key, value []byte) error { + return wr.mut.Put(context.Background(), key, value) +} + +func (wr *doltWriter) Flush() error { + m, err := wr.mut.Map(context.Background()) + if err != nil { + return err + } + wr.mut = m.Mutate() + return nil +} + +func benchmarkBatchWrite(b *testing.B, wr writer) { + dp := newDataProvider(batch) + for i := 0; i < b.N; i++ { + k, v := dp.next() + require.NoError(b, wr.Put(k, v)) + if dp.empty() { + require.NoError(b, wr.Flush()) + dp = newDataProvider(batch) + } + } +} + +type writer interface { + Put(key, value []byte) error + Flush() error +} + +type dataProvider struct { + buf []byte +} + +var _ sort.Interface = &dataProvider{} + +func newDataProvider(count int) (dp *dataProvider) { + dp = &dataProvider{buf: make([]byte, count*sz)} + rand.Read(dp.buf) + return +} + +func (dp *dataProvider) next() (k, v []byte) { + k, v = dp.buf[:sz], dp.buf[:sz] + dp.buf = dp.buf[sz:] + return +} + +func (dp *dataProvider) empty() bool { + return len(dp.buf) == 0 +} + +func (dp *dataProvider) Len() int { + return len(dp.buf) / sz +} + +func (dp *dataProvider) Less(i, j int) bool { + l := dp.buf[i*sz : (i*sz)+sz] + r := dp.buf[j*sz : (j*sz)+sz] + return bytes.Compare(l, r) < 0 +} + +var swap [sz]byte + +func (dp *dataProvider) Swap(i, j int) { + l := dp.buf[i*sz : (i*sz)+sz] + r := dp.buf[j*sz : (j*sz)+sz] + copy(swap[:], l) + copy(l, r) + copy(r, swap[:]) +}