diff --git a/.github/scripts/import-benchmarking/get-dolt-dolt-job-json.sh b/.github/scripts/import-benchmarking/get-dolt-dolt-job-json.sh index 76ae8f94ee..d600b17e89 100755 --- a/.github/scripts/import-benchmarking/get-dolt-dolt-job-json.sh +++ b/.github/scripts/import-benchmarking/get-dolt-dolt-job-json.sh @@ -2,8 +2,8 @@ set -e -if [ "$#" -lt 8 ]; then - echo "Usage: ./get-dolt-dolt-job-json.sh " +if [ "$#" -lt 9 ]; then + echo "Usage: ./get-dolt-dolt-job-json.sh " exit 1 fi @@ -14,7 +14,8 @@ toServer="$4" toVersion="$5" timePrefix="$6" actorPrefix="$7" -issueNumber="$8" # TODO: Use this to paste the results onto the github issue +nomsBinFormat="$8" +issueNumber="$9" echo ' { @@ -54,6 +55,10 @@ echo ' "--region=us-west-2", "--results-dir='$timePrefix'", "--results-prefix='$actorPrefix'", + "--mysql-schema-file=schema.sql", + "--nbf='$nomsBinFormat'", + "--results-schema=results-schema.sql", + "--issue-number='$issueNumber'", "--fileNames=100k-sorted.csv", "--fileNames=100k-random.csv", "--fileNames=1m-sorted.csv", diff --git a/.github/scripts/import-benchmarking/get-mysql-dolt-job-json.sh b/.github/scripts/import-benchmarking/get-mysql-dolt-job-json.sh index b58ca69b29..da43256146 100755 --- a/.github/scripts/import-benchmarking/get-mysql-dolt-job-json.sh +++ b/.github/scripts/import-benchmarking/get-mysql-dolt-job-json.sh @@ -2,19 +2,20 @@ set -e -if [ "$#" -lt 8 ]; then - echo "Usage: ./get-mysql-dolt-job-json.sh " +if [ "$#" -lt 9 ]; then + echo "Usage: ./get-mysql-dolt-job-json.sh " exit 1 fi jobName="$1" fromServer="$2" fromVersion="$3" -toServer="$4" +toServer="$4" # make this mysql toVersion="$5" timePrefix="$6" actorPrefix="$7" -issueNumber="$8" # TODO: Use this to paste the results onto the github issue +nomsBinFormat="$8" +issueNumber="$9" echo ' { @@ -56,6 +57,9 @@ echo ' "--results-prefix='$actorPrefix'", "--mysql-exec=/usr/sbin/mysqld", "--mysql-schema-file=schema.sql", + "--nbf='$nomsBinFormat'", + "--results-schema=results-schema.sql", + "--issue-number='$issueNumber'", "--fileNames=100k-sorted.csv", "--fileNames=100k-random.csv", "--fileNames=1m-sorted.csv", diff --git a/.github/scripts/import-benchmarking/run-benchmarks.sh b/.github/scripts/import-benchmarking/run-benchmarks.sh index c43fef2546..f36a66a0b6 100755 --- a/.github/scripts/import-benchmarking/run-benchmarks.sh +++ b/.github/scripts/import-benchmarking/run-benchmarks.sh @@ -27,8 +27,6 @@ if [ -z "$MODE" ]; then exit 1 fi -echo "Setting from $FROM_SERVER: $FROM_VERSION" - # use first 8 characters of TO_VERSION to differentiate # jobs short=${TO_VERSION:0:8} @@ -38,7 +36,7 @@ actorShort="$lowered-$short" # random sleep sleep 0.$[ ( $RANDOM % 10 ) + 1 ]s -timesuffix=`date +%s%N` +timesuffix=`date +%s` jobname="$actorShort-$timesuffix" timeprefix=$(date +%Y/%m/%d) @@ -52,11 +50,14 @@ issuenumber=${ISSUE_NUMBER:-"-1"} source \ "$TEMPLATE_SCRIPT" \ "$jobname" \ + "$FROM_SERVER" \ "$FROM_VERSION" \ + "$TO_SERVER" \ "$TO_VERSION" \ "$timeprefix" \ "$actorprefix" \ - "$issuenumber" > job.json + "$NOMS_BIN_FORMAT" \ + "$issuenumber" > job.json out=$(KUBECONFIG="$KUBECONFIG" kubectl apply -f job.json || true) diff --git a/go/performance/import_benchmarker/cmd/sample-config.json b/go/performance/import_benchmarker/cmd/sample-config.json index 1bc9162d48..c15d6e96b1 100644 --- a/go/performance/import_benchmarker/cmd/sample-config.json +++ b/go/performance/import_benchmarker/cmd/sample-config.json @@ -1,7 +1,7 @@ { "Jobs": [ { - "Name": "Medium Dolt import with Sorted Rows", + "Name": "Medium MySQL import with Sorted Rows", "NumRows": 1000000, "Sorted": true, "Format": "csv", @@ -12,7 +12,7 @@ "SchemaPath": "/Users/vinairachakonda/go/src/dolthub/dolt/go/performance/import_benchmarker/cmd/schema.sql" }, { - "Name": "Medium MySQL import with Sorted Rows", + "Name": "Medium Dolt import with Sorted Rows", "NumRows": 1000000, "Sorted": true, "Format": "csv", diff --git a/go/performance/import_benchmarker/config.go b/go/performance/import_benchmarker/config.go index 6b699c6305..21f3494c0f 100644 --- a/go/performance/import_benchmarker/config.go +++ b/go/performance/import_benchmarker/config.go @@ -84,6 +84,9 @@ type ImportBenchmarkConfig struct { // MysqlHost is used to connect with a MySQL host MysqlHost string + + // NbfVersion is used to turn what format to run Dolt against + NbfVersion string } // NewDefaultImportBenchmarkConfig returns a default import configuration where data is generated with accordance to @@ -261,7 +264,7 @@ func RunBenchmarkTests(config *ImportBenchmarkConfig, workingDir string) []resul results := make([]result, 0) for _, doltJob := range doltJobs { - results = append(results, BenchmarkDoltImportJob(doltJob, workingDir)) + results = append(results, BenchmarkDoltImportJob(doltJob, workingDir, config.NbfVersion)) } results = append(results, BenchmarkMySQLImportJobs(mySQLJobs, getMysqlConfigFromConfig(config))...) diff --git a/go/performance/import_benchmarker/config_test.go b/go/performance/import_benchmarker/config_test.go index 4ee29cbf97..fbe9ea8149 100644 --- a/go/performance/import_benchmarker/config_test.go +++ b/go/performance/import_benchmarker/config_test.go @@ -42,6 +42,27 @@ func TestGeneratedConfigCanBeImported(t *testing.T) { os.RemoveAll(filepath.Join(wd, "testData.csv")) } +func TestNewStorageFormat(t *testing.T) { + t.Skip() // Skipping since dolt isn't installed on the github actions vm + + job := createSampleDoltJob() + config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{job}, NbfVersion: "__DOLT_1__"} + err := config.ValidateAndUpdateDefaults() + + assert.NoError(t, err) + + wd := GetWorkingDir() + results := RunBenchmarkTests(config, wd) + + assert.Equal(t, 1, len(results)) + assert.Equal(t, "dolt_import_small", results[0].name) + + // Sanity check: An import of 100,000 should never take more than 15 seconds + assert.LessOrEqual(t, results[0].br.T, time.Second*15) + + os.RemoveAll(filepath.Join(wd, "testData.csv")) +} + func TestCanGenerateFilesForAllFormats(t *testing.T) { config := &ImportBenchmarkConfig{Jobs: make([]*ImportBenchmarkJob, 0)} diff --git a/go/performance/import_benchmarker/dolt.go b/go/performance/import_benchmarker/dolt.go index 979c7e126a..da46a85d83 100644 --- a/go/performance/import_benchmarker/dolt.go +++ b/go/performance/import_benchmarker/dolt.go @@ -33,11 +33,11 @@ import ( // BenchmarkDoltImportJob returns a function that runs benchmarks for importing // a test dataset into Dolt -func BenchmarkDoltImportJob(job *ImportBenchmarkJob, workingDir string) result { +func BenchmarkDoltImportJob(job *ImportBenchmarkJob, workingDir, nbf string) result { oldStdin := os.Stdin defer func() { os.Stdin = oldStdin }() - setupAndInitializeDoltRepo(filesys.LocalFS, workingDir, job.ExecPath) + setupAndInitializeDoltRepo(filesys.LocalFS, workingDir, job.ExecPath, nbf) defer RemoveDoltDataDir(filesys.LocalFS, workingDir) // remove the repo each time commandStr, args := getBenchmarkingTools(job, workingDir) @@ -59,7 +59,7 @@ func BenchmarkDoltImportJob(job *ImportBenchmarkJob, workingDir string) result { } // setupAndInitializeDoltRepo calls the `dolt init` command on the workingDir to create a new Dolt repository. -func setupAndInitializeDoltRepo(fs filesys.Filesys, workingDir, doltExecPath string) { +func setupAndInitializeDoltRepo(fs filesys.Filesys, workingDir, doltExecPath, nbf string) { RemoveDoltDataDir(fs, workingDir) err := sysbench_runner.DoltVersion(context.Background(), doltExecPath) @@ -72,6 +72,13 @@ func setupAndInitializeDoltRepo(fs filesys.Filesys, workingDir, doltExecPath str log.Fatal(err.Error()) } + if nbf != "" { + err = os.Setenv("DOLT_DEFAULT_BIN_FORMAT", nbf) + if err != nil { + log.Fatal(err) + } + } + init := execCommand(context.Background(), doltExecPath, "init") init.Dir = workingDir