diff --git a/go/performance/benchmarks/benchmarks.go b/go/performance/benchmarks/benchmarks.go new file mode 100644 index 0000000000..6cb23aafc1 --- /dev/null +++ b/go/performance/benchmarks/benchmarks.go @@ -0,0 +1,254 @@ +// Copyright 2019 Liquidata, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "fmt" + "github.com/liquidata-inc/dolt/go/libraries/doltcore/dbfactory" + "io/ioutil" + "log" + "os" + "path/filepath" + "testing" + + "github.com/liquidata-inc/dolt/go/cmd/dolt/commands" + "github.com/liquidata-inc/dolt/go/libraries/doltcore/doltdb" + "github.com/liquidata-inc/dolt/go/libraries/doltcore/env" + "github.com/liquidata-inc/dolt/go/store/types" + + "github.com/liquidata-inc/dolt/go/cmd/dolt/commands/tblcmds" + "github.com/liquidata-inc/dolt/go/libraries/utils/filesys" + "github.com/liquidata-inc/dolt/go/libraries/utils/test" +) + +const ( + testHomeDir = "/user/tester" +) + +type doltCommandFunc func(ctx context.Context, commandStr string, args []string, dEnv *env.DoltEnv) int + +func removeTempDoltDataDir(fs filesys.Filesys) { + cwd, err := os.Getwd() + if err != nil { + log.Fatal(err) + } + doltDir := filepath.Join(cwd, dbfactory.DoltDir) + exists, _ := fs.Exists(doltDir) + if exists { + err := fs.Delete(doltDir, true) + if err != nil { + log.Fatal(err) + } + } +} + +func getWorkingDir(fs filesys.Filesys) string { + workingDir := test.TestDir(testHomeDir) + err := fs.MkDirs(workingDir) + if err != nil { + log.Fatal(err) + } + return workingDir +} + +func createTestEnvWithFS(fs filesys.Filesys, workingDir string) *env.DoltEnv { + removeTempDoltDataDir(fs) + testHomeDirFunc := func()(string, error){return workingDir, nil} + const name = "test mcgibbins" + const email = "bigfakeytester@fake.horse" + dEnv := env.Load(context.Background(), testHomeDirFunc, fs, doltdb.LocalDirDoltDB) + err := dEnv.InitRepo(context.Background(), types.Format_7_18, name, email) + if err != nil { + panic("Failed to initialize environment") + } + return dEnv +} + +// BenchmarkDoltImport returns a function that runs benchmarks for importing +// a test dataset into Dolt +func BenchmarkDoltImport(rows int, cols []*SeedColumn, format string) func(b *testing.B) { + fs := filesys.LocalFS + wd := getWorkingDir(fs) + return func(b *testing.B) { + doltImport(b, fs, rows, cols, wd, format) + } +} + +// BenchmarkDoltExport returns a function that runs benchmarks for exporting +// a test dataset out of Dolt +func BenchmarkDoltExport(rows int, cols []*SeedColumn, format string) func(b *testing.B) { + fs := filesys.LocalFS + wd := getWorkingDir(fs) + return func(b *testing.B) { + doltExport(b, fs, rows, cols, wd, format) + } +} + +// BenchmarkDoltSQLSelect returns a function that runs benchmarks for executing a sql query +// against a Dolt table +func BenchmarkDoltSQLSelect(rows int, cols []*SeedColumn, format string) func(b *testing.B) { + fs := filesys.LocalFS + wd := getWorkingDir(fs) + return func(b *testing.B) { + doltSQLSelect(b, fs, rows, cols, wd, format) + } +} + +func doltImport(b *testing.B, fs filesys.Filesys, rows int, cols []*SeedColumn, workingDir, format string) { + pathToImportFile := filepath.Join(workingDir, fmt.Sprintf("testData%s", format)) + + oldStdin := os.Stdin + defer func() { os.Stdin = oldStdin }() + + commandFunc, commandStr, args, dEnv := getBenchmarkingTools(fs, rows, cols, workingDir, pathToImportFile, format) + + runBenchmark(b, commandFunc, commandStr, args, dEnv) +} + +func doltExport(b *testing.B, fs filesys.Filesys, rows int, cols []*SeedColumn, workingDir, format string) { + pathToImportFile := filepath.Join(workingDir, fmt.Sprintf("testData%s", format)) + oldStdin := os.Stdin + + commandFunc, commandStr, args, dEnv := getBenchmarkingTools(fs, rows, cols, workingDir, pathToImportFile, format) + + // import + status := commandFunc(context.Background(), commandStr, args, dEnv) + if status != 0 { + log.Fatalf("failed to import table successfully with exit code %d \n", status) + } + + // revert stdin + os.Stdin = oldStdin + + args = []string{"-f", "testTable", pathToImportFile} + runBenchmark(b, tblcmds.Export, "dolt table export", args, dEnv) +} + +func doltSQLSelect(b *testing.B, fs filesys.Filesys, rows int, cols []*SeedColumn, workingDir, format string) { + testTable := "testTable" + pathToImportFile := filepath.Join(workingDir, fmt.Sprintf("testData%s", format)) + + oldStdin := os.Stdin + + commandFunc, commandStr, args, dEnv := getBenchmarkingTools(fs, rows, cols, workingDir, pathToImportFile, format) + + // import + status := commandFunc(context.Background(), commandStr, args, dEnv) + if status != 0 { + log.Fatalf("failed to import table successfully with exit code %d \n", status) + } + + // revert stdin + os.Stdin = oldStdin + + args = []string{"-q", fmt.Sprintf("select count(*) from %s", testTable)} + runBenchmark(b, commands.Sql, "dolt sql", args, dEnv) +} + +func runBenchmark(b *testing.B, commandFunc doltCommandFunc, commandStr string, args []string, dEnv *env.DoltEnv) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + status := commandFunc(context.Background(), commandStr, args, dEnv) + if status != 0 { + log.Fatalf("running benchmark failed with exit code... %d \n", status) + } + } +} + +func getBenchmarkingTools(fs filesys.Filesys, rows int, cols []*SeedColumn, workingDir, pathToImportFile, format string) (commandFunc doltCommandFunc, commandStr string, args []string, dEnv *env.DoltEnv) { + testTable := "testTable" + sch := NewSeedSchema(rows, cols, format) + + switch format { + case csvExt: + dEnv = setupDEnvImport(fs, sch, workingDir, testTable, "", pathToImportFile) + args = []string{"-c", "-f", testTable, pathToImportFile} + commandStr = "dolt table import" + commandFunc = tblcmds.Import + case sqlExt: + dEnv = setupDEnvImport(fs, sch, workingDir, testTable, "", pathToImportFile) + args = []string{} + commandStr = "dolt sql" + commandFunc = commands.Sql + + stdin := getStdinForSQLBenchmark(fs, pathToImportFile) + os.Stdin = stdin + case jsonExt: + pathToSchemaFile := filepath.Join(workingDir, fmt.Sprintf("testSchema%s", format)) + dEnv = setupDEnvImport(fs, sch, workingDir, testTable, pathToSchemaFile, pathToImportFile) + args = []string{"-c", "-f", "-s", pathToSchemaFile, testTable, pathToImportFile} + commandStr = "dolt table import" + commandFunc = tblcmds.Import + default: + log.Fatalf("cannot import file, unsupported file format %s \n", format) + } + + return commandFunc, commandStr, args, dEnv +} + +func setupDEnvImport(fs filesys.Filesys, sch *SeedSchema, workingDir, tableName, pathToSchemaFile, pathToImportFile string) *env.DoltEnv { + wc, err := fs.OpenForWrite(pathToImportFile) + if err != nil { + log.Fatal(err) + } + defer wc.Close() + + ds := NewDSImpl(wc, sch, tableName) + + if pathToSchemaFile != "" { + // write schema file + err := fs.WriteFile(pathToSchemaFile, sch.Bytes()) + if err != nil { + panic("unable to write data file to filesystem") + } + } + // + //// write data file + //err := fs.WriteFile(pathToImportFile, ds.Ge) + //if err != nil { + // panic("unable to write data file to filesystem") + //} + + ds.GenerateData() + return createTestEnvWithFS(fs, workingDir) +} + +func getStdinForSQLBenchmark(fs filesys.Filesys, pathToImportFile string) *os.File { + content, err := fs.ReadFile(pathToImportFile) + if err != nil { + log.Fatal(err) + } + + tmpfile, err := ioutil.TempFile("", "temp") + if err != nil { + log.Fatal(err) + } + defer os.Remove(tmpfile.Name()) // clean up + + if _, err := tmpfile.Write(content); err != nil { + log.Fatal(err) + } + if err := tmpfile.Close(); err != nil { + log.Fatal(err) + } + + f, err := os.Open(tmpfile.Name()) + if err != nil { + log.Fatal(err) + } + + return f +} diff --git a/go/performance/benchmarks/dataset.go b/go/performance/benchmarks/dataset.go new file mode 100644 index 0000000000..5a3c609c09 --- /dev/null +++ b/go/performance/benchmarks/dataset.go @@ -0,0 +1,194 @@ +// Copyright 2019 Liquidata, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "io" + "log" + "strings" +) + +// Dataset is a set of test data used for benchmark testing +type Dataset interface { + //// String returns a string formatted based on the Dataset's SeedSchema + //String() string + // + //// Bytes returns a byte slice formatted based on the Dataset's SeedSchema + //Bytes() []byte + + GenerateData() + + // Change returns a Dataset mutated by the given percentage of change + Change(pct float32) Dataset +} + +// DSImpl implements the Dataset interface +type DSImpl struct { + // Schema defines the structure of the Dataset + Schema *SeedSchema + + // TableName is the name of the test dataset + TableName string + + wc io.Writer +} + +// NewDSImpl creates a new DSImpl +func NewDSImpl(wc io.Writer, sch *SeedSchema, tableName string) *DSImpl { + return &DSImpl{Schema: sch, TableName: tableName, wc: wc} +} + +//// String returns a string of the dataset formatted based on the DSImpl's Schema +//func (ds *DSImpl) String() { +// generateTestData(ds.wc, ds.Schema.Rows, ds.Schema.Columns, ds.TableName, ds.Schema.FileFormatExt) +//} +// + +func (ds *DSImpl) GenerateData() { + generateTestData(ds.wc, ds.Schema.Rows, ds.Schema.Columns, ds.TableName, ds.Schema.FileFormatExt) +} + +// Change returns a DataSet that is a mutation of this Dataset by the given percentage +func (ds *DSImpl) Change(pct float32) Dataset { + // TODO + return &DSImpl{} +} + +func generateTestData(wc io.Writer, rows int, cols []*SeedColumn, tableName, format string) { + var offset int + switch format { + case csvExt: + offset = 1 + default: + offset = 0 + } + //strs := getRowStrs(rows, offset, cols, tableName, format) + //return formatDataStr(strs, cols, tableName, format) + writeDataToWriter(wc, rows, offset, cols, tableName, format) +} + +//func getRowStrs(w io.Writer, rows, offset int, cols []*SeedColumn, tableName, format string) { +func writeDataToWriter(wc io.Writer, rows, offset int, cols []*SeedColumn, tableName, format string) { + //strs := make([]string, rows+offset) + + // handle the "header" for all format types + switch format { + case csvExt: + header := makeHeaderStr(cols, tableName, format) + _, err := wc.Write([]byte(header + "\n")) + if err != nil { + log.Fatal(err) + } + case sqlExt: + header := getSQLHeader(cols, tableName, format) + _, err := wc.Write([]byte(header + "\n")) + if err != nil { + log.Fatal(err) + } + case jsonExt: + prefix := "{\"Rows\":[" + _, err := wc.Write([]byte(prefix)) + if err != nil { + log.Fatal(err) + } + default: + log.Fatalf("unable to write the header, unsupported format %v \n", format) + } + + // writeHeader(w, cols, tableName, format) + + var prevRow []string + for i := 0; i < rows; i++ { + row := make([]string, len(cols)) + + for colIndex, col := range cols { + val := getColValue(prevRow, colIndex, col, format) + row[colIndex] = val + + if i > 0 && prevRow != nil { + prevRow[colIndex] = val + } + } + + //tempR := formatRow(row, cols, tableName, format) + //if format == jsonExt { + // if i == rows - 1 { + // tempR = tempR + "\n" + // } else { + // tempR = tempR + ",\n" + // } + //} else { + // tempR = tempR + "\n" + //} + _, err := wc.Write([]byte(formatRow(row, cols, i, rows-1, tableName, format))) + if err != nil { + log.Fatal(err) + } + prevRow = row[:] + } + + // handle the "footer" for all format types + switch format { + case jsonExt: + suffix := "]}\n" + _, err := wc.Write([]byte(suffix)) + if err != nil { + log.Fatal(err) + } + default: + } +} + +func formatRow(strs []string, cols []*SeedColumn, currentRowIdx, lastRowIdx int, tableName, format string) string { + switch format { + case csvExt: + return strings.Join(strs, ",") + "\n" + case sqlExt: + return getSQLRow(strs, cols, tableName) + "\n" + case jsonExt: + var suffix string + if currentRowIdx == lastRowIdx { + suffix = "\n" + } else { + suffix = ",\n" + } + return getJSONRow(strs, cols) + suffix + default: + log.Fatalf("cannot format row, unsupported file format %s \n", format) + } + return "" +} +// +//func formatDataStr(strs []string, cols []*SeedColumn, tableName, format string) string { +// switch format { +// case csvExt: +// return strings.Join(strs, "\n") +// case sqlExt: +// return formatSQLStr(strs, cols, tableName, format) +// case jsonExt: +// return formatJSONStr(strs, cols, tableName, format) +// default: +// log.Fatalf("cannot format data string, unsupported file format %s \n", format) +// } +// return "" +//} + +func makeHeaderStr(cols []*SeedColumn, tableName, format string) string { + str := make([]string, 0, len(cols)) + for _, col := range cols { + str = append(str, col.Name) + } + return formatRow(str, cols, 0, 1, tableName, format) +} diff --git a/go/performance/benchmarks/helpers.go b/go/performance/benchmarks/helpers.go new file mode 100644 index 0000000000..dac0e80083 --- /dev/null +++ b/go/performance/benchmarks/helpers.go @@ -0,0 +1,194 @@ +// Copyright 2019 Liquidata, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "log" + "math/rand" + "strconv" + "strings" + + "github.com/liquidata-inc/dolt/go/libraries/doltcore/sql" + "github.com/liquidata-inc/dolt/go/store/types" +) + +// Container is used to correctly format sql strings +type Container struct { + c []string +} + +// NewContainer creates a new SQLContainer +func NewContainer(format string) *Container { + c := make([]string, 3) + switch format { + case sqlExt: + c[0] = "(" + c[2] = ")" + case jsonExt: + c[0] = "{" + c[2] = "}" + default: + log.Fatalf("cannot create new container, unsupported format %s \n", format) + } + return &Container{c: c} +} + +// InsertPayload returns the SQLContainer with the payload inserted, separated by the separator +func (sc *Container) InsertPayload(payload []string, separator string) string { + sc.c[1] = strings.Join(payload, separator) + return strings.Join(sc.c, "") +} + +func getColValue(row []string, colIndex int, col *SeedColumn, format string) string { + switch col.GenType { + case increment: + return genNomsTypeValueIncrement(row, colIndex, col, format) + case random: + return getNomsTypeValueRandom(col, format) + default: + log.Fatalf("cannot get column value, unsupported gen type %s \n", col.GenType) + } + return "" +} + +func genNomsTypeValueIncrement(row []string, colIndex int, col *SeedColumn, format string) string { + switch col.Type { + case types.IntKind: + if len(row) > 0 { + old, err := strconv.Atoi(row[colIndex]) + if err != nil { + log.Fatalf(err.Error()) + } + return fmt.Sprintf("%d", old+1) + } + return "1" + default: + log.Fatalf("cannot generate incremental value, unsupported noms type %s \n", col.Type.String()) + } + return "" +} + +func getNomsTypeValueRandom(col *SeedColumn, format string) string { + switch col.Type { + case types.IntKind: + return fmt.Sprintf("%d", rand.Intn(1000)) + case types.StringKind: + return getRandomString(format) + default: + log.Fatalf("cannot generate random value, unsupported noms type %s \n", col.Type.String()) + } + return "" +} + +func getRandomString(format string) string { + letters := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + b := make([]byte, rand.Intn(255)) + for i := range b { + b[i] = letters[rand.Int63()%int64(len(letters))] + } + + switch format { + case sqlExt: + return fmt.Sprintf(`"%s"`, b) + default: + return string(b) + } +} + +func getJSONRow(strs []string, cols []*SeedColumn) string { + if len(strs) != len(cols) { + log.Fatalf("values should be the length of columns. values: %+v, columns: %+v \n", strs, cols) + } + + payload := make([]string, 0) + for i, col := range cols { + load := fmt.Sprintf("\"%s\":\"%s\"", col.Name, strs[i]) + payload = append(payload, load) + } + + container := NewContainer(jsonExt) + return container.InsertPayload(payload, ",") +} + +func getSQLRow(strs []string, cols []*SeedColumn, tableName string) string { + container := NewContainer(sqlExt) + sqlCols := make([]string, 0) + + for _, col := range cols { + sqlCols = append(sqlCols, fmt.Sprintf("`%s`", col.Name)) + } + + fieldNames := container.InsertPayload(sqlCols, ",") + values := container.InsertPayload(strs, ",") + + return fmt.Sprintf("INSERT INTO `%s` %s VALUES %s;", tableName, fieldNames, values) +} + +func getSQLHeader(cols []*SeedColumn, tableName, format string) string { + statement := make([]string, 0) + statement = append(statement, fmt.Sprintf("DROP TABLE IF EXISTS `%s`;\n", tableName)) + statement = append(statement, fmt.Sprintf("CREATE TABLE `%s` ", tableName)) + + container := NewContainer(format) + schema := make([]string, 0) + pkDefs := make([]string, 0) + for i, col := range cols { + colStr := "`%s` %s" + + // handle pk + if col.PrimaryKey { + pkDefs = append(pkDefs, fmt.Sprintf("PRIMARY KEY (`%s`)", col.Name)) + colStr = "`%s` %s NOT NULL" + } + + // handle increments + if col.GenType == increment { + colStr = fmt.Sprintf("%s AUTO_INCREMENT", colStr) + } + + // append tag + colStr = fmt.Sprintf("%s COMMENT 'tag:%d'", colStr, i) + + // translate noms type + sqlType, ok := sql.DoltToSQLType[col.Type] + if !ok { + log.Fatalf("unable to format sql string, unknown noms to sql conversion for type %v \n", col.Type) + } + + schema = append(schema, fmt.Sprintf(colStr, col.Name, strings.ToUpper(sqlType))) + } + + // add pk definitions to create table statement + for _, pkDef := range pkDefs { + schema = append(schema, pkDef) + } + + // create and close create table statement + schemaStatement := container.InsertPayload(schema, ",\n") + statement = append(statement, schemaStatement+"; \n") + + return strings.Join(statement, "") +} + +func formatJSONStr(jsonRows []string, cols []*SeedColumn, tableName, format string) string { + prefix := "{\"Rows\":[" + //suffix := "]}\n" + structure := make([]string, 0) + structure = append(structure, prefix) + //structure = append(structure, strings.Join(jsonRows, ",")) + //structure = append(structure, suffix) + return strings.Join(structure, "") +} diff --git a/go/performance/benchmarks/main.go b/go/performance/benchmarks/main.go new file mode 100644 index 0000000000..ca7b1c7757 --- /dev/null +++ b/go/performance/benchmarks/main.go @@ -0,0 +1,162 @@ +// Copyright 2019 Liquidata, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "flag" + "github.com/liquidata-inc/dolt/go/libraries/utils/filesys" + "log" + "os" + "testing" +) + +const ( + smallSet = 1000 + mediumSet = 100000 + largeSet = 10000000 +) + +var outputPath = flag.String("outputPath", "./", "the path where the serialized results file will be stored.") +var outputFormat = flag.String("outputFormat", ".csv", "the format used to serialize the benchmarking results.") +var resultsTableName = flag.String("resultsTableName", "results", "the name of the results table.") + +func main() { + flag.Parse() + + results := make([]result, 0) + + // supported dolt formats we want to benchmark + testFmts := []string{csvExt, sqlExt, jsonExt} + + // benchmark dolt import with all formats + for _, frmt := range testFmts { + benchmarks := []struct { + Name string + Format string + Rows int + Columns int + BM func(b *testing.B) + }{ + { + Name: "dolt_import_small", + Format: frmt, + Rows: smallSet, + Columns: len(genSampleCols()), + BM: BenchmarkDoltImport(smallSet, genSampleCols(), frmt), + }, + { + Name: "dolt_import_medium", + Format: frmt, + Rows: mediumSet, + Columns: len(genSampleCols()), + BM: BenchmarkDoltImport(mediumSet, genSampleCols(), frmt), + }, + { + Name: "dolt_import_large", + Format: frmt, + Rows: largeSet, + Columns: len(genSampleCols()), + BM: BenchmarkDoltImport(largeSet, genSampleCols(), frmt), + }, + } + + for _, b := range benchmarks { + br := testing.Benchmark(b.BM) + res := result{ + name: b.Name, + format: b.Format, + rows: b.Rows, + columns: b.Columns, + br: br, + } + results = append(results, res) + } + } + + // benchmark other dolt commands with and just use a single import format + for _, frmt := range []string{csvExt} { + benchmarks := []struct { + Name string + Format string + Rows int + Columns int + BM func(b *testing.B) + }{ + { + Name: "dolt_export_small", + Format: frmt, + Rows: smallSet, + Columns: len(genSampleCols()), + BM: BenchmarkDoltExport(smallSet, genSampleCols(), frmt), + }, + { + Name: "dolt_export_medium", + Format: frmt, + Rows: mediumSet, + Columns: len(genSampleCols()), + BM: BenchmarkDoltExport(mediumSet, genSampleCols(), frmt), + }, + { + Name: "dolt_export_large", + Format: frmt, + Rows: largeSet, + Columns: len(genSampleCols()), + BM: BenchmarkDoltExport(largeSet, genSampleCols(), frmt), + }, + { + Name: "dolt_sql_select_small", + Format: frmt, + Rows: smallSet, + Columns: len(genSampleCols()), + BM: BenchmarkDoltSQLSelect(smallSet, genSampleCols(), frmt), + }, + { + Name: "dolt_sql_select_medium", + Format: frmt, + Rows: mediumSet, + Columns: len(genSampleCols()), + BM: BenchmarkDoltSQLSelect(mediumSet, genSampleCols(), frmt), + }, + { + Name: "dolt_sql_select_large", + Format: frmt, + Rows: largeSet, + Columns: len(genSampleCols()), + BM: BenchmarkDoltSQLSelect(largeSet, genSampleCols(), frmt), + }, + } + + for _, b := range benchmarks { + br := testing.Benchmark(b.BM) + res := result{ + name: b.Name, + format: b.Format, + rows: b.Rows, + columns: b.Columns, + br: br, + } + results = append(results, res) + } + } + + if err := serializeResults(results, *outputPath, *resultsTableName, *outputFormat); err != nil { + log.Fatal(err) + } + + // cleanup temp dolt data dir + removeTempDoltDataDir(filesys.LocalFS) + + os.Exit(0) +} diff --git a/go/performance/benchmarks/results.go b/go/performance/benchmarks/results.go new file mode 100644 index 0000000000..d0c81beb4a --- /dev/null +++ b/go/performance/benchmarks/results.go @@ -0,0 +1,193 @@ +// Copyright 2019 Liquidata, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "io" + "log" + "path/filepath" + "testing" + "time" + + "github.com/liquidata-inc/dolt/go/libraries/utils/filesys" + "github.com/liquidata-inc/dolt/go/store/types" +) + +type result struct { + name string + format string + rows int + columns int + br testing.BenchmarkResult +} + +// RDSImpl is a Dataset containing results of benchmarking +type RSImpl struct { + // Schema defines the structure of the Dataset + Schema *SeedSchema + + // Results are results of benchmarking + Results []result + + // TableName is the name of the results table + TableName string + + wc io.Writer +} + +// NewRSImpl creates a new RSImpl +func NewRSImpl(wc io.Writer, sch *SeedSchema, results []result, tableName string) *RSImpl { + return &RSImpl{ + Schema: sch, + Results: results, + TableName: tableName, + wc: wc, + } +} + +//// String returns a string of the dataset formatted based on the RSImpl's Schema +//func (rds *RSImpl) String() string { +// return generateResultsData(rds.Results, rds.Schema.Columns, rds.TableName, rds.Schema.FileFormatExt) +//} + +//// Bytes returns a byte slice of the dataset formatted based on the RSImpl's Schema +//func (rds *RSImpl) Bytes() []byte { +// str := generateResultsData(rds.Results, rds.Schema.Columns, rds.TableName, rds.Schema.FileFormatExt) +// return []byte(str) +//} + +func (rds *RSImpl) GenerateData() { + generateResultsData(rds.wc, rds.Results, rds.Schema.Columns, rds.TableName, rds.Schema.FileFormatExt) +} + +// Change returns a DataSet that is a mutation of this Dataset by the given percentage +func (rds *RSImpl) Change(pct float32) Dataset { + // TODO + return &RSImpl{} +} + +func generateResultsData(wc io.Writer, results []result, cols []*SeedColumn, tableName, format string) { + switch format { + case csvExt: + generateCSVResults(wc, results, cols, tableName, format) + default: + log.Fatalf("cannot generate results data, file format %s unsupported \n", format) + } +} + +func generateCSVResults(wc io.Writer, results []result, cols []*SeedColumn, tableName, format string) { + //strs := make([]string, len(results)+1) + //header := makeHeaderStr(cols, tableName, format) + + header := makeHeaderStr(cols, tableName, format) + //strs[0] = header + _, err := wc.Write([]byte(header + "\n")) + if err != nil { + log.Fatal(err) + } + + //strs[0] = header + + var prevRow []string + for i, result := range results { + row := getResultsRow(prevRow, result, cols, format) + + //strs[i+1] = formatRow(row, cols, tableName, format) + _, err := wc.Write([]byte(formatRow(row, cols, i, len(results) - 1, tableName, format))) + if err != nil { + log.Fatal(err) + } + prevRow = row[:] + } + + //dataStr := formatDataStr(strs, cols, tableName, format) + //return dataStr +} + +func getResultsRow(prevRow []string, res result, cols []*SeedColumn, format string) []string { + row := make([]string, len(cols)) + // set id + if len(cols) > 0 && prevRow != nil { + row[0] = genNomsTypeValueIncrement(prevRow, 0, cols[0], format) + } else { + row[0] = "1" + } + // set name + row[1] = res.name + // set format + row[2] = res.format + // set rows + row[3] = fmt.Sprintf("%d", res.rows) + // set cols + row[4] = fmt.Sprintf("%d", res.columns) + // set iterations + row[5] = fmt.Sprintf("%d", res.br.N) + // set time + row[6] = res.br.T.String() + // set bytes + row[7] = fmt.Sprintf("%v", res.br.Bytes) + // set mem_allocs + row[8] = fmt.Sprintf("%v", res.br.MemAllocs) + // set mem_bytes + row[9] = fmt.Sprintf("%v", res.br.MemBytes) + // set alloced_bytes_per_op + row[10] = fmt.Sprintf("%v", res.br.AllocedBytesPerOp()) + //set allocs_per_op + row[11] = fmt.Sprintf("%v", res.br.AllocsPerOp()) + return row +} + +func genResultsCols() []*SeedColumn { + return []*SeedColumn{ + NewSeedColumn("id", true, types.IntKind, increment), + NewSeedColumn("name", false, types.StringKind, supplied), + NewSeedColumn("format", false, types.StringKind, supplied), + NewSeedColumn("rows", false, types.StringKind, supplied), + NewSeedColumn("columns", false, types.StringKind, supplied), + NewSeedColumn("iterations", false, types.StringKind, supplied), + NewSeedColumn("time", false, types.TimestampKind, supplied), + NewSeedColumn("bytes", false, types.IntKind, supplied), + NewSeedColumn("mem_allocs", false, types.IntKind, supplied), + NewSeedColumn("mem_bytes", false, types.IntKind, supplied), + NewSeedColumn("alloced_bytes_per_op", false, types.StringKind, supplied), + NewSeedColumn("allocs_per_op", false, types.StringKind, supplied), + } +} + +func serializeResults(results []result, path, tableName, format string) error { + var sch *SeedSchema + switch format { + case csvExt: + sch = NewSeedSchema(len(results), genResultsCols(), csvExt) + default: + log.Fatalf("cannot serialize results, unsupported file format %s \n", format) + } + now := time.Now() + fs := filesys.LocalFS + resultsFile := filepath.Join(path, fmt.Sprintf("benchmark_results-%04d-%02d-%02d%s", now.Year(), now.Month(), now.Day(), format)) + wc, err := fs.OpenForWrite(resultsFile) + if err != nil { + log.Fatal(err) + } + defer wc.Close() + + ds := NewRSImpl(wc, sch, results, tableName) + ds.GenerateData() + + return nil + + //return fs.WriteFile(resultsFile, ds.Bytes()) +} diff --git a/go/performance/benchmarks/seed_schema.go b/go/performance/benchmarks/seed_schema.go new file mode 100644 index 0000000000..2d6c56a38d --- /dev/null +++ b/go/performance/benchmarks/seed_schema.go @@ -0,0 +1,165 @@ +// Copyright 2019 Liquidata, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "log" + "strings" + + "github.com/liquidata-inc/dolt/go/store/types" +) + +const ( + csvExt = ".csv" + jsonExt = ".json" + sqlExt = ".sql" + + increment = GenType("increment") + random = GenType("random") + supplied = GenType("supplied") +) + +var supportedFormats = []string{csvExt, jsonExt, sqlExt} + +// GenType specifies how to generate subsequent row values for a given SeedColumn, for a test dataset +type GenType string + +// SeedSchema contains the schema to be used to generate a test Dataset +type SeedSchema struct { + // Rows is size of the Dataset + Rows int + + // Columns are the schema for the columns to be used for the Dataset + Columns []*SeedColumn + + // FileFormatExt is the file format extension that directs how to construct the Dataset + // as a string or as bytes + FileFormatExt string +} + +// NewSeedSchema creates a new SeedSchema +func NewSeedSchema(rows int, cols []*SeedColumn, format string) *SeedSchema { + for _, frmt := range supportedFormats { + if format == frmt { + return &SeedSchema{ + Rows: rows, + Columns: cols, + FileFormatExt: format, + } + } + } + log.Fatalf("cannot build seed schema with unsupported file format %s \n", format) + return &SeedSchema{} +} + +// Bytes returns a byte slice formatted according to the SeedSchema'a FileFormatExt +func (sch *SeedSchema) Bytes() []byte { + switch sch.FileFormatExt { + case jsonExt: + return getColSchemaJSON(sch.Columns) + default: + log.Fatalf("cannot create bytes from schema, unsupported format %s \n", sch.FileFormatExt) + } + return []byte{} +} + +// SeedColumn is used to create a column in a test dataset for benchmark testing +type SeedColumn struct { + Name string + PrimaryKey bool + Type types.NomsKind + GenType GenType +} + +// NewSeedColumn creates a new SeedColumn +func NewSeedColumn(name string, pk bool, t types.NomsKind, g GenType) *SeedColumn { + if isValidGenType(t, g) { + return &SeedColumn{ + Name: name, + PrimaryKey: pk, + Type: t, + GenType: g, + } + } + log.Fatalf("cannot use gen type %s with noms type %s \n", g, t.String()) + return &SeedColumn{} +} + +func isValidGenType(t types.NomsKind, g GenType) bool { + var validTypes []types.NomsKind + switch g { + case increment: + validTypes = []types.NomsKind{types.IntKind} + case random: + validTypes = []types.NomsKind{types.IntKind, types.StringKind} + case supplied: + validTypes = []types.NomsKind{ + types.IntKind, + types.StringKind, + types.TimestampKind, + } + default: + log.Fatalf("unsupported gen type %s \n", g) + } + for _, v := range validTypes { + if t == v { + return true + } + } + return false +} + +func getColSchemaJSON(seedCols []*SeedColumn) []byte { + prefix := "{\"Columns\":[" + suffix := "]}" + + statement := make([]string, 0) + statement = append(statement, prefix) + + schemaStr := "{\"tag\": %d,\"name\":\"%s\",\"kind\":\"%s\",\"is_part_of_pk\":%v,\"col_constraints\":%s}" + jsonCols := make([]string, 0) + + for i, sc := range seedCols { + var pks []string + if sc.PrimaryKey { + pks = []string{"{\"constraint_type\": \"not_null\",\"params\": null}"} + } else { + pks = []string{} + } + jc := fmt.Sprintf(schemaStr, uint64(i), sc.Name, strings.ToLower(sc.Type.String()), sc.PrimaryKey, pks) + jsonCols = append(jsonCols, jc) + } + + statement = append(statement, strings.Join(jsonCols, ",")) + statement = append(statement, suffix) + return []byte(strings.Join(statement, "")) +} + +func genSampleCols() []*SeedColumn { + return []*SeedColumn{ + NewSeedColumn("id", true, types.IntKind, increment), + NewSeedColumn("int1", false, types.IntKind, random), + NewSeedColumn("int2", false, types.IntKind, increment), + NewSeedColumn("int3", false, types.IntKind, random), + NewSeedColumn("int4", false, types.IntKind, increment), + NewSeedColumn("int5", false, types.IntKind, increment), + NewSeedColumn("str1", false, types.StringKind, random), + NewSeedColumn("str2", false, types.StringKind, random), + NewSeedColumn("str3", false, types.StringKind, random), + NewSeedColumn("str4", false, types.StringKind, random), + NewSeedColumn("str5", false, types.StringKind, random), + } +}