WIP add dolt performance benchmarks

This commit is contained in:
Dustin Brown
2019-11-12 18:10:01 -08:00
parent a2ee8f8df9
commit ba7e8f34ce
6 changed files with 1162 additions and 0 deletions

View File

@@ -0,0 +1,254 @@
// Copyright 2019 Liquidata, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"fmt"
"github.com/liquidata-inc/dolt/go/libraries/doltcore/dbfactory"
"io/ioutil"
"log"
"os"
"path/filepath"
"testing"
"github.com/liquidata-inc/dolt/go/cmd/dolt/commands"
"github.com/liquidata-inc/dolt/go/libraries/doltcore/doltdb"
"github.com/liquidata-inc/dolt/go/libraries/doltcore/env"
"github.com/liquidata-inc/dolt/go/store/types"
"github.com/liquidata-inc/dolt/go/cmd/dolt/commands/tblcmds"
"github.com/liquidata-inc/dolt/go/libraries/utils/filesys"
"github.com/liquidata-inc/dolt/go/libraries/utils/test"
)
const (
testHomeDir = "/user/tester"
)
type doltCommandFunc func(ctx context.Context, commandStr string, args []string, dEnv *env.DoltEnv) int
func removeTempDoltDataDir(fs filesys.Filesys) {
cwd, err := os.Getwd()
if err != nil {
log.Fatal(err)
}
doltDir := filepath.Join(cwd, dbfactory.DoltDir)
exists, _ := fs.Exists(doltDir)
if exists {
err := fs.Delete(doltDir, true)
if err != nil {
log.Fatal(err)
}
}
}
func getWorkingDir(fs filesys.Filesys) string {
workingDir := test.TestDir(testHomeDir)
err := fs.MkDirs(workingDir)
if err != nil {
log.Fatal(err)
}
return workingDir
}
func createTestEnvWithFS(fs filesys.Filesys, workingDir string) *env.DoltEnv {
removeTempDoltDataDir(fs)
testHomeDirFunc := func()(string, error){return workingDir, nil}
const name = "test mcgibbins"
const email = "bigfakeytester@fake.horse"
dEnv := env.Load(context.Background(), testHomeDirFunc, fs, doltdb.LocalDirDoltDB)
err := dEnv.InitRepo(context.Background(), types.Format_7_18, name, email)
if err != nil {
panic("Failed to initialize environment")
}
return dEnv
}
// BenchmarkDoltImport returns a function that runs benchmarks for importing
// a test dataset into Dolt
func BenchmarkDoltImport(rows int, cols []*SeedColumn, format string) func(b *testing.B) {
fs := filesys.LocalFS
wd := getWorkingDir(fs)
return func(b *testing.B) {
doltImport(b, fs, rows, cols, wd, format)
}
}
// BenchmarkDoltExport returns a function that runs benchmarks for exporting
// a test dataset out of Dolt
func BenchmarkDoltExport(rows int, cols []*SeedColumn, format string) func(b *testing.B) {
fs := filesys.LocalFS
wd := getWorkingDir(fs)
return func(b *testing.B) {
doltExport(b, fs, rows, cols, wd, format)
}
}
// BenchmarkDoltSQLSelect returns a function that runs benchmarks for executing a sql query
// against a Dolt table
func BenchmarkDoltSQLSelect(rows int, cols []*SeedColumn, format string) func(b *testing.B) {
fs := filesys.LocalFS
wd := getWorkingDir(fs)
return func(b *testing.B) {
doltSQLSelect(b, fs, rows, cols, wd, format)
}
}
func doltImport(b *testing.B, fs filesys.Filesys, rows int, cols []*SeedColumn, workingDir, format string) {
pathToImportFile := filepath.Join(workingDir, fmt.Sprintf("testData%s", format))
oldStdin := os.Stdin
defer func() { os.Stdin = oldStdin }()
commandFunc, commandStr, args, dEnv := getBenchmarkingTools(fs, rows, cols, workingDir, pathToImportFile, format)
runBenchmark(b, commandFunc, commandStr, args, dEnv)
}
func doltExport(b *testing.B, fs filesys.Filesys, rows int, cols []*SeedColumn, workingDir, format string) {
pathToImportFile := filepath.Join(workingDir, fmt.Sprintf("testData%s", format))
oldStdin := os.Stdin
commandFunc, commandStr, args, dEnv := getBenchmarkingTools(fs, rows, cols, workingDir, pathToImportFile, format)
// import
status := commandFunc(context.Background(), commandStr, args, dEnv)
if status != 0 {
log.Fatalf("failed to import table successfully with exit code %d \n", status)
}
// revert stdin
os.Stdin = oldStdin
args = []string{"-f", "testTable", pathToImportFile}
runBenchmark(b, tblcmds.Export, "dolt table export", args, dEnv)
}
func doltSQLSelect(b *testing.B, fs filesys.Filesys, rows int, cols []*SeedColumn, workingDir, format string) {
testTable := "testTable"
pathToImportFile := filepath.Join(workingDir, fmt.Sprintf("testData%s", format))
oldStdin := os.Stdin
commandFunc, commandStr, args, dEnv := getBenchmarkingTools(fs, rows, cols, workingDir, pathToImportFile, format)
// import
status := commandFunc(context.Background(), commandStr, args, dEnv)
if status != 0 {
log.Fatalf("failed to import table successfully with exit code %d \n", status)
}
// revert stdin
os.Stdin = oldStdin
args = []string{"-q", fmt.Sprintf("select count(*) from %s", testTable)}
runBenchmark(b, commands.Sql, "dolt sql", args, dEnv)
}
func runBenchmark(b *testing.B, commandFunc doltCommandFunc, commandStr string, args []string, dEnv *env.DoltEnv) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
status := commandFunc(context.Background(), commandStr, args, dEnv)
if status != 0 {
log.Fatalf("running benchmark failed with exit code... %d \n", status)
}
}
}
func getBenchmarkingTools(fs filesys.Filesys, rows int, cols []*SeedColumn, workingDir, pathToImportFile, format string) (commandFunc doltCommandFunc, commandStr string, args []string, dEnv *env.DoltEnv) {
testTable := "testTable"
sch := NewSeedSchema(rows, cols, format)
switch format {
case csvExt:
dEnv = setupDEnvImport(fs, sch, workingDir, testTable, "", pathToImportFile)
args = []string{"-c", "-f", testTable, pathToImportFile}
commandStr = "dolt table import"
commandFunc = tblcmds.Import
case sqlExt:
dEnv = setupDEnvImport(fs, sch, workingDir, testTable, "", pathToImportFile)
args = []string{}
commandStr = "dolt sql"
commandFunc = commands.Sql
stdin := getStdinForSQLBenchmark(fs, pathToImportFile)
os.Stdin = stdin
case jsonExt:
pathToSchemaFile := filepath.Join(workingDir, fmt.Sprintf("testSchema%s", format))
dEnv = setupDEnvImport(fs, sch, workingDir, testTable, pathToSchemaFile, pathToImportFile)
args = []string{"-c", "-f", "-s", pathToSchemaFile, testTable, pathToImportFile}
commandStr = "dolt table import"
commandFunc = tblcmds.Import
default:
log.Fatalf("cannot import file, unsupported file format %s \n", format)
}
return commandFunc, commandStr, args, dEnv
}
func setupDEnvImport(fs filesys.Filesys, sch *SeedSchema, workingDir, tableName, pathToSchemaFile, pathToImportFile string) *env.DoltEnv {
wc, err := fs.OpenForWrite(pathToImportFile)
if err != nil {
log.Fatal(err)
}
defer wc.Close()
ds := NewDSImpl(wc, sch, tableName)
if pathToSchemaFile != "" {
// write schema file
err := fs.WriteFile(pathToSchemaFile, sch.Bytes())
if err != nil {
panic("unable to write data file to filesystem")
}
}
//
//// write data file
//err := fs.WriteFile(pathToImportFile, ds.Ge)
//if err != nil {
// panic("unable to write data file to filesystem")
//}
ds.GenerateData()
return createTestEnvWithFS(fs, workingDir)
}
func getStdinForSQLBenchmark(fs filesys.Filesys, pathToImportFile string) *os.File {
content, err := fs.ReadFile(pathToImportFile)
if err != nil {
log.Fatal(err)
}
tmpfile, err := ioutil.TempFile("", "temp")
if err != nil {
log.Fatal(err)
}
defer os.Remove(tmpfile.Name()) // clean up
if _, err := tmpfile.Write(content); err != nil {
log.Fatal(err)
}
if err := tmpfile.Close(); err != nil {
log.Fatal(err)
}
f, err := os.Open(tmpfile.Name())
if err != nil {
log.Fatal(err)
}
return f
}

View File

@@ -0,0 +1,194 @@
// Copyright 2019 Liquidata, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"io"
"log"
"strings"
)
// Dataset is a set of test data used for benchmark testing
type Dataset interface {
//// String returns a string formatted based on the Dataset's SeedSchema
//String() string
//
//// Bytes returns a byte slice formatted based on the Dataset's SeedSchema
//Bytes() []byte
GenerateData()
// Change returns a Dataset mutated by the given percentage of change
Change(pct float32) Dataset
}
// DSImpl implements the Dataset interface
type DSImpl struct {
// Schema defines the structure of the Dataset
Schema *SeedSchema
// TableName is the name of the test dataset
TableName string
wc io.Writer
}
// NewDSImpl creates a new DSImpl
func NewDSImpl(wc io.Writer, sch *SeedSchema, tableName string) *DSImpl {
return &DSImpl{Schema: sch, TableName: tableName, wc: wc}
}
//// String returns a string of the dataset formatted based on the DSImpl's Schema
//func (ds *DSImpl) String() {
// generateTestData(ds.wc, ds.Schema.Rows, ds.Schema.Columns, ds.TableName, ds.Schema.FileFormatExt)
//}
//
func (ds *DSImpl) GenerateData() {
generateTestData(ds.wc, ds.Schema.Rows, ds.Schema.Columns, ds.TableName, ds.Schema.FileFormatExt)
}
// Change returns a DataSet that is a mutation of this Dataset by the given percentage
func (ds *DSImpl) Change(pct float32) Dataset {
// TODO
return &DSImpl{}
}
func generateTestData(wc io.Writer, rows int, cols []*SeedColumn, tableName, format string) {
var offset int
switch format {
case csvExt:
offset = 1
default:
offset = 0
}
//strs := getRowStrs(rows, offset, cols, tableName, format)
//return formatDataStr(strs, cols, tableName, format)
writeDataToWriter(wc, rows, offset, cols, tableName, format)
}
//func getRowStrs(w io.Writer, rows, offset int, cols []*SeedColumn, tableName, format string) {
func writeDataToWriter(wc io.Writer, rows, offset int, cols []*SeedColumn, tableName, format string) {
//strs := make([]string, rows+offset)
// handle the "header" for all format types
switch format {
case csvExt:
header := makeHeaderStr(cols, tableName, format)
_, err := wc.Write([]byte(header + "\n"))
if err != nil {
log.Fatal(err)
}
case sqlExt:
header := getSQLHeader(cols, tableName, format)
_, err := wc.Write([]byte(header + "\n"))
if err != nil {
log.Fatal(err)
}
case jsonExt:
prefix := "{\"Rows\":["
_, err := wc.Write([]byte(prefix))
if err != nil {
log.Fatal(err)
}
default:
log.Fatalf("unable to write the header, unsupported format %v \n", format)
}
// writeHeader(w, cols, tableName, format)
var prevRow []string
for i := 0; i < rows; i++ {
row := make([]string, len(cols))
for colIndex, col := range cols {
val := getColValue(prevRow, colIndex, col, format)
row[colIndex] = val
if i > 0 && prevRow != nil {
prevRow[colIndex] = val
}
}
//tempR := formatRow(row, cols, tableName, format)
//if format == jsonExt {
// if i == rows - 1 {
// tempR = tempR + "\n"
// } else {
// tempR = tempR + ",\n"
// }
//} else {
// tempR = tempR + "\n"
//}
_, err := wc.Write([]byte(formatRow(row, cols, i, rows-1, tableName, format)))
if err != nil {
log.Fatal(err)
}
prevRow = row[:]
}
// handle the "footer" for all format types
switch format {
case jsonExt:
suffix := "]}\n"
_, err := wc.Write([]byte(suffix))
if err != nil {
log.Fatal(err)
}
default:
}
}
func formatRow(strs []string, cols []*SeedColumn, currentRowIdx, lastRowIdx int, tableName, format string) string {
switch format {
case csvExt:
return strings.Join(strs, ",") + "\n"
case sqlExt:
return getSQLRow(strs, cols, tableName) + "\n"
case jsonExt:
var suffix string
if currentRowIdx == lastRowIdx {
suffix = "\n"
} else {
suffix = ",\n"
}
return getJSONRow(strs, cols) + suffix
default:
log.Fatalf("cannot format row, unsupported file format %s \n", format)
}
return ""
}
//
//func formatDataStr(strs []string, cols []*SeedColumn, tableName, format string) string {
// switch format {
// case csvExt:
// return strings.Join(strs, "\n")
// case sqlExt:
// return formatSQLStr(strs, cols, tableName, format)
// case jsonExt:
// return formatJSONStr(strs, cols, tableName, format)
// default:
// log.Fatalf("cannot format data string, unsupported file format %s \n", format)
// }
// return ""
//}
func makeHeaderStr(cols []*SeedColumn, tableName, format string) string {
str := make([]string, 0, len(cols))
for _, col := range cols {
str = append(str, col.Name)
}
return formatRow(str, cols, 0, 1, tableName, format)
}

View File

@@ -0,0 +1,194 @@
// Copyright 2019 Liquidata, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"log"
"math/rand"
"strconv"
"strings"
"github.com/liquidata-inc/dolt/go/libraries/doltcore/sql"
"github.com/liquidata-inc/dolt/go/store/types"
)
// Container is used to correctly format sql strings
type Container struct {
c []string
}
// NewContainer creates a new SQLContainer
func NewContainer(format string) *Container {
c := make([]string, 3)
switch format {
case sqlExt:
c[0] = "("
c[2] = ")"
case jsonExt:
c[0] = "{"
c[2] = "}"
default:
log.Fatalf("cannot create new container, unsupported format %s \n", format)
}
return &Container{c: c}
}
// InsertPayload returns the SQLContainer with the payload inserted, separated by the separator
func (sc *Container) InsertPayload(payload []string, separator string) string {
sc.c[1] = strings.Join(payload, separator)
return strings.Join(sc.c, "")
}
func getColValue(row []string, colIndex int, col *SeedColumn, format string) string {
switch col.GenType {
case increment:
return genNomsTypeValueIncrement(row, colIndex, col, format)
case random:
return getNomsTypeValueRandom(col, format)
default:
log.Fatalf("cannot get column value, unsupported gen type %s \n", col.GenType)
}
return ""
}
func genNomsTypeValueIncrement(row []string, colIndex int, col *SeedColumn, format string) string {
switch col.Type {
case types.IntKind:
if len(row) > 0 {
old, err := strconv.Atoi(row[colIndex])
if err != nil {
log.Fatalf(err.Error())
}
return fmt.Sprintf("%d", old+1)
}
return "1"
default:
log.Fatalf("cannot generate incremental value, unsupported noms type %s \n", col.Type.String())
}
return ""
}
func getNomsTypeValueRandom(col *SeedColumn, format string) string {
switch col.Type {
case types.IntKind:
return fmt.Sprintf("%d", rand.Intn(1000))
case types.StringKind:
return getRandomString(format)
default:
log.Fatalf("cannot generate random value, unsupported noms type %s \n", col.Type.String())
}
return ""
}
func getRandomString(format string) string {
letters := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
b := make([]byte, rand.Intn(255))
for i := range b {
b[i] = letters[rand.Int63()%int64(len(letters))]
}
switch format {
case sqlExt:
return fmt.Sprintf(`"%s"`, b)
default:
return string(b)
}
}
func getJSONRow(strs []string, cols []*SeedColumn) string {
if len(strs) != len(cols) {
log.Fatalf("values should be the length of columns. values: %+v, columns: %+v \n", strs, cols)
}
payload := make([]string, 0)
for i, col := range cols {
load := fmt.Sprintf("\"%s\":\"%s\"", col.Name, strs[i])
payload = append(payload, load)
}
container := NewContainer(jsonExt)
return container.InsertPayload(payload, ",")
}
func getSQLRow(strs []string, cols []*SeedColumn, tableName string) string {
container := NewContainer(sqlExt)
sqlCols := make([]string, 0)
for _, col := range cols {
sqlCols = append(sqlCols, fmt.Sprintf("`%s`", col.Name))
}
fieldNames := container.InsertPayload(sqlCols, ",")
values := container.InsertPayload(strs, ",")
return fmt.Sprintf("INSERT INTO `%s` %s VALUES %s;", tableName, fieldNames, values)
}
func getSQLHeader(cols []*SeedColumn, tableName, format string) string {
statement := make([]string, 0)
statement = append(statement, fmt.Sprintf("DROP TABLE IF EXISTS `%s`;\n", tableName))
statement = append(statement, fmt.Sprintf("CREATE TABLE `%s` ", tableName))
container := NewContainer(format)
schema := make([]string, 0)
pkDefs := make([]string, 0)
for i, col := range cols {
colStr := "`%s` %s"
// handle pk
if col.PrimaryKey {
pkDefs = append(pkDefs, fmt.Sprintf("PRIMARY KEY (`%s`)", col.Name))
colStr = "`%s` %s NOT NULL"
}
// handle increments
if col.GenType == increment {
colStr = fmt.Sprintf("%s AUTO_INCREMENT", colStr)
}
// append tag
colStr = fmt.Sprintf("%s COMMENT 'tag:%d'", colStr, i)
// translate noms type
sqlType, ok := sql.DoltToSQLType[col.Type]
if !ok {
log.Fatalf("unable to format sql string, unknown noms to sql conversion for type %v \n", col.Type)
}
schema = append(schema, fmt.Sprintf(colStr, col.Name, strings.ToUpper(sqlType)))
}
// add pk definitions to create table statement
for _, pkDef := range pkDefs {
schema = append(schema, pkDef)
}
// create and close create table statement
schemaStatement := container.InsertPayload(schema, ",\n")
statement = append(statement, schemaStatement+"; \n")
return strings.Join(statement, "")
}
func formatJSONStr(jsonRows []string, cols []*SeedColumn, tableName, format string) string {
prefix := "{\"Rows\":["
//suffix := "]}\n"
structure := make([]string, 0)
structure = append(structure, prefix)
//structure = append(structure, strings.Join(jsonRows, ","))
//structure = append(structure, suffix)
return strings.Join(structure, "")
}

View File

@@ -0,0 +1,162 @@
// Copyright 2019 Liquidata, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"flag"
"github.com/liquidata-inc/dolt/go/libraries/utils/filesys"
"log"
"os"
"testing"
)
const (
smallSet = 1000
mediumSet = 100000
largeSet = 10000000
)
var outputPath = flag.String("outputPath", "./", "the path where the serialized results file will be stored.")
var outputFormat = flag.String("outputFormat", ".csv", "the format used to serialize the benchmarking results.")
var resultsTableName = flag.String("resultsTableName", "results", "the name of the results table.")
func main() {
flag.Parse()
results := make([]result, 0)
// supported dolt formats we want to benchmark
testFmts := []string{csvExt, sqlExt, jsonExt}
// benchmark dolt import with all formats
for _, frmt := range testFmts {
benchmarks := []struct {
Name string
Format string
Rows int
Columns int
BM func(b *testing.B)
}{
{
Name: "dolt_import_small",
Format: frmt,
Rows: smallSet,
Columns: len(genSampleCols()),
BM: BenchmarkDoltImport(smallSet, genSampleCols(), frmt),
},
{
Name: "dolt_import_medium",
Format: frmt,
Rows: mediumSet,
Columns: len(genSampleCols()),
BM: BenchmarkDoltImport(mediumSet, genSampleCols(), frmt),
},
{
Name: "dolt_import_large",
Format: frmt,
Rows: largeSet,
Columns: len(genSampleCols()),
BM: BenchmarkDoltImport(largeSet, genSampleCols(), frmt),
},
}
for _, b := range benchmarks {
br := testing.Benchmark(b.BM)
res := result{
name: b.Name,
format: b.Format,
rows: b.Rows,
columns: b.Columns,
br: br,
}
results = append(results, res)
}
}
// benchmark other dolt commands with and just use a single import format
for _, frmt := range []string{csvExt} {
benchmarks := []struct {
Name string
Format string
Rows int
Columns int
BM func(b *testing.B)
}{
{
Name: "dolt_export_small",
Format: frmt,
Rows: smallSet,
Columns: len(genSampleCols()),
BM: BenchmarkDoltExport(smallSet, genSampleCols(), frmt),
},
{
Name: "dolt_export_medium",
Format: frmt,
Rows: mediumSet,
Columns: len(genSampleCols()),
BM: BenchmarkDoltExport(mediumSet, genSampleCols(), frmt),
},
{
Name: "dolt_export_large",
Format: frmt,
Rows: largeSet,
Columns: len(genSampleCols()),
BM: BenchmarkDoltExport(largeSet, genSampleCols(), frmt),
},
{
Name: "dolt_sql_select_small",
Format: frmt,
Rows: smallSet,
Columns: len(genSampleCols()),
BM: BenchmarkDoltSQLSelect(smallSet, genSampleCols(), frmt),
},
{
Name: "dolt_sql_select_medium",
Format: frmt,
Rows: mediumSet,
Columns: len(genSampleCols()),
BM: BenchmarkDoltSQLSelect(mediumSet, genSampleCols(), frmt),
},
{
Name: "dolt_sql_select_large",
Format: frmt,
Rows: largeSet,
Columns: len(genSampleCols()),
BM: BenchmarkDoltSQLSelect(largeSet, genSampleCols(), frmt),
},
}
for _, b := range benchmarks {
br := testing.Benchmark(b.BM)
res := result{
name: b.Name,
format: b.Format,
rows: b.Rows,
columns: b.Columns,
br: br,
}
results = append(results, res)
}
}
if err := serializeResults(results, *outputPath, *resultsTableName, *outputFormat); err != nil {
log.Fatal(err)
}
// cleanup temp dolt data dir
removeTempDoltDataDir(filesys.LocalFS)
os.Exit(0)
}

View File

@@ -0,0 +1,193 @@
// Copyright 2019 Liquidata, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"io"
"log"
"path/filepath"
"testing"
"time"
"github.com/liquidata-inc/dolt/go/libraries/utils/filesys"
"github.com/liquidata-inc/dolt/go/store/types"
)
type result struct {
name string
format string
rows int
columns int
br testing.BenchmarkResult
}
// RDSImpl is a Dataset containing results of benchmarking
type RSImpl struct {
// Schema defines the structure of the Dataset
Schema *SeedSchema
// Results are results of benchmarking
Results []result
// TableName is the name of the results table
TableName string
wc io.Writer
}
// NewRSImpl creates a new RSImpl
func NewRSImpl(wc io.Writer, sch *SeedSchema, results []result, tableName string) *RSImpl {
return &RSImpl{
Schema: sch,
Results: results,
TableName: tableName,
wc: wc,
}
}
//// String returns a string of the dataset formatted based on the RSImpl's Schema
//func (rds *RSImpl) String() string {
// return generateResultsData(rds.Results, rds.Schema.Columns, rds.TableName, rds.Schema.FileFormatExt)
//}
//// Bytes returns a byte slice of the dataset formatted based on the RSImpl's Schema
//func (rds *RSImpl) Bytes() []byte {
// str := generateResultsData(rds.Results, rds.Schema.Columns, rds.TableName, rds.Schema.FileFormatExt)
// return []byte(str)
//}
func (rds *RSImpl) GenerateData() {
generateResultsData(rds.wc, rds.Results, rds.Schema.Columns, rds.TableName, rds.Schema.FileFormatExt)
}
// Change returns a DataSet that is a mutation of this Dataset by the given percentage
func (rds *RSImpl) Change(pct float32) Dataset {
// TODO
return &RSImpl{}
}
func generateResultsData(wc io.Writer, results []result, cols []*SeedColumn, tableName, format string) {
switch format {
case csvExt:
generateCSVResults(wc, results, cols, tableName, format)
default:
log.Fatalf("cannot generate results data, file format %s unsupported \n", format)
}
}
func generateCSVResults(wc io.Writer, results []result, cols []*SeedColumn, tableName, format string) {
//strs := make([]string, len(results)+1)
//header := makeHeaderStr(cols, tableName, format)
header := makeHeaderStr(cols, tableName, format)
//strs[0] = header
_, err := wc.Write([]byte(header + "\n"))
if err != nil {
log.Fatal(err)
}
//strs[0] = header
var prevRow []string
for i, result := range results {
row := getResultsRow(prevRow, result, cols, format)
//strs[i+1] = formatRow(row, cols, tableName, format)
_, err := wc.Write([]byte(formatRow(row, cols, i, len(results) - 1, tableName, format)))
if err != nil {
log.Fatal(err)
}
prevRow = row[:]
}
//dataStr := formatDataStr(strs, cols, tableName, format)
//return dataStr
}
func getResultsRow(prevRow []string, res result, cols []*SeedColumn, format string) []string {
row := make([]string, len(cols))
// set id
if len(cols) > 0 && prevRow != nil {
row[0] = genNomsTypeValueIncrement(prevRow, 0, cols[0], format)
} else {
row[0] = "1"
}
// set name
row[1] = res.name
// set format
row[2] = res.format
// set rows
row[3] = fmt.Sprintf("%d", res.rows)
// set cols
row[4] = fmt.Sprintf("%d", res.columns)
// set iterations
row[5] = fmt.Sprintf("%d", res.br.N)
// set time
row[6] = res.br.T.String()
// set bytes
row[7] = fmt.Sprintf("%v", res.br.Bytes)
// set mem_allocs
row[8] = fmt.Sprintf("%v", res.br.MemAllocs)
// set mem_bytes
row[9] = fmt.Sprintf("%v", res.br.MemBytes)
// set alloced_bytes_per_op
row[10] = fmt.Sprintf("%v", res.br.AllocedBytesPerOp())
//set allocs_per_op
row[11] = fmt.Sprintf("%v", res.br.AllocsPerOp())
return row
}
func genResultsCols() []*SeedColumn {
return []*SeedColumn{
NewSeedColumn("id", true, types.IntKind, increment),
NewSeedColumn("name", false, types.StringKind, supplied),
NewSeedColumn("format", false, types.StringKind, supplied),
NewSeedColumn("rows", false, types.StringKind, supplied),
NewSeedColumn("columns", false, types.StringKind, supplied),
NewSeedColumn("iterations", false, types.StringKind, supplied),
NewSeedColumn("time", false, types.TimestampKind, supplied),
NewSeedColumn("bytes", false, types.IntKind, supplied),
NewSeedColumn("mem_allocs", false, types.IntKind, supplied),
NewSeedColumn("mem_bytes", false, types.IntKind, supplied),
NewSeedColumn("alloced_bytes_per_op", false, types.StringKind, supplied),
NewSeedColumn("allocs_per_op", false, types.StringKind, supplied),
}
}
func serializeResults(results []result, path, tableName, format string) error {
var sch *SeedSchema
switch format {
case csvExt:
sch = NewSeedSchema(len(results), genResultsCols(), csvExt)
default:
log.Fatalf("cannot serialize results, unsupported file format %s \n", format)
}
now := time.Now()
fs := filesys.LocalFS
resultsFile := filepath.Join(path, fmt.Sprintf("benchmark_results-%04d-%02d-%02d%s", now.Year(), now.Month(), now.Day(), format))
wc, err := fs.OpenForWrite(resultsFile)
if err != nil {
log.Fatal(err)
}
defer wc.Close()
ds := NewRSImpl(wc, sch, results, tableName)
ds.GenerateData()
return nil
//return fs.WriteFile(resultsFile, ds.Bytes())
}

View File

@@ -0,0 +1,165 @@
// Copyright 2019 Liquidata, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"log"
"strings"
"github.com/liquidata-inc/dolt/go/store/types"
)
const (
csvExt = ".csv"
jsonExt = ".json"
sqlExt = ".sql"
increment = GenType("increment")
random = GenType("random")
supplied = GenType("supplied")
)
var supportedFormats = []string{csvExt, jsonExt, sqlExt}
// GenType specifies how to generate subsequent row values for a given SeedColumn, for a test dataset
type GenType string
// SeedSchema contains the schema to be used to generate a test Dataset
type SeedSchema struct {
// Rows is size of the Dataset
Rows int
// Columns are the schema for the columns to be used for the Dataset
Columns []*SeedColumn
// FileFormatExt is the file format extension that directs how to construct the Dataset
// as a string or as bytes
FileFormatExt string
}
// NewSeedSchema creates a new SeedSchema
func NewSeedSchema(rows int, cols []*SeedColumn, format string) *SeedSchema {
for _, frmt := range supportedFormats {
if format == frmt {
return &SeedSchema{
Rows: rows,
Columns: cols,
FileFormatExt: format,
}
}
}
log.Fatalf("cannot build seed schema with unsupported file format %s \n", format)
return &SeedSchema{}
}
// Bytes returns a byte slice formatted according to the SeedSchema'a FileFormatExt
func (sch *SeedSchema) Bytes() []byte {
switch sch.FileFormatExt {
case jsonExt:
return getColSchemaJSON(sch.Columns)
default:
log.Fatalf("cannot create bytes from schema, unsupported format %s \n", sch.FileFormatExt)
}
return []byte{}
}
// SeedColumn is used to create a column in a test dataset for benchmark testing
type SeedColumn struct {
Name string
PrimaryKey bool
Type types.NomsKind
GenType GenType
}
// NewSeedColumn creates a new SeedColumn
func NewSeedColumn(name string, pk bool, t types.NomsKind, g GenType) *SeedColumn {
if isValidGenType(t, g) {
return &SeedColumn{
Name: name,
PrimaryKey: pk,
Type: t,
GenType: g,
}
}
log.Fatalf("cannot use gen type %s with noms type %s \n", g, t.String())
return &SeedColumn{}
}
func isValidGenType(t types.NomsKind, g GenType) bool {
var validTypes []types.NomsKind
switch g {
case increment:
validTypes = []types.NomsKind{types.IntKind}
case random:
validTypes = []types.NomsKind{types.IntKind, types.StringKind}
case supplied:
validTypes = []types.NomsKind{
types.IntKind,
types.StringKind,
types.TimestampKind,
}
default:
log.Fatalf("unsupported gen type %s \n", g)
}
for _, v := range validTypes {
if t == v {
return true
}
}
return false
}
func getColSchemaJSON(seedCols []*SeedColumn) []byte {
prefix := "{\"Columns\":["
suffix := "]}"
statement := make([]string, 0)
statement = append(statement, prefix)
schemaStr := "{\"tag\": %d,\"name\":\"%s\",\"kind\":\"%s\",\"is_part_of_pk\":%v,\"col_constraints\":%s}"
jsonCols := make([]string, 0)
for i, sc := range seedCols {
var pks []string
if sc.PrimaryKey {
pks = []string{"{\"constraint_type\": \"not_null\",\"params\": null}"}
} else {
pks = []string{}
}
jc := fmt.Sprintf(schemaStr, uint64(i), sc.Name, strings.ToLower(sc.Type.String()), sc.PrimaryKey, pks)
jsonCols = append(jsonCols, jc)
}
statement = append(statement, strings.Join(jsonCols, ","))
statement = append(statement, suffix)
return []byte(strings.Join(statement, ""))
}
func genSampleCols() []*SeedColumn {
return []*SeedColumn{
NewSeedColumn("id", true, types.IntKind, increment),
NewSeedColumn("int1", false, types.IntKind, random),
NewSeedColumn("int2", false, types.IntKind, increment),
NewSeedColumn("int3", false, types.IntKind, random),
NewSeedColumn("int4", false, types.IntKind, increment),
NewSeedColumn("int5", false, types.IntKind, increment),
NewSeedColumn("str1", false, types.StringKind, random),
NewSeedColumn("str2", false, types.StringKind, random),
NewSeedColumn("str3", false, types.StringKind, random),
NewSeedColumn("str4", false, types.StringKind, random),
NewSeedColumn("str5", false, types.StringKind, random),
}
}