[no-release-notes] Update configuration files for generating import benchmark (#3828)

This commit is contained in:
Vinai Rachakonda
2022-07-14 18:21:40 -07:00
committed by GitHub
parent 755f502d7a
commit e357c4198c
7 changed files with 58 additions and 17 deletions

View File

@@ -2,8 +2,8 @@
set -e
if [ "$#" -lt 8 ]; then
echo "Usage: ./get-dolt-dolt-job-json.sh <jobName> <fromServer> <fromVersion> <toServer> <toVersion> <timePrefix> <actorPrefix> <issueNumber>"
if [ "$#" -lt 9 ]; then
echo "Usage: ./get-dolt-dolt-job-json.sh <jobName> <fromServer> <fromVersion> <toServer> <toVersion> <timePrefix> <actorPrefix> <nomsBinFormat> <issueNumber>"
exit 1
fi
@@ -14,7 +14,8 @@ toServer="$4"
toVersion="$5"
timePrefix="$6"
actorPrefix="$7"
issueNumber="$8" # TODO: Use this to paste the results onto the github issue
nomsBinFormat="$8"
issueNumber="$9"
echo '
{
@@ -54,6 +55,10 @@ echo '
"--region=us-west-2",
"--results-dir='$timePrefix'",
"--results-prefix='$actorPrefix'",
"--mysql-schema-file=schema.sql",
"--nbf='$nomsBinFormat'",
"--results-schema=results-schema.sql",
"--issue-number='$issueNumber'",
"--fileNames=100k-sorted.csv",
"--fileNames=100k-random.csv",
"--fileNames=1m-sorted.csv",

View File

@@ -2,19 +2,20 @@
set -e
if [ "$#" -lt 8 ]; then
echo "Usage: ./get-mysql-dolt-job-json.sh <jobName> <fromServer> <fromVersion> <toServer> <toVersion> <timePrefix> <actorPrefix> <issueNumber>"
if [ "$#" -lt 9 ]; then
echo "Usage: ./get-mysql-dolt-job-json.sh <jobName> <fromServer> <fromVersion> <toServer> <toVersion> <timePrefix> <actorPrefix> <nomsBinFormat> <issueNumber>"
exit 1
fi
jobName="$1"
fromServer="$2"
fromVersion="$3"
toServer="$4"
toServer="$4" # make this mysql
toVersion="$5"
timePrefix="$6"
actorPrefix="$7"
issueNumber="$8" # TODO: Use this to paste the results onto the github issue
nomsBinFormat="$8"
issueNumber="$9"
echo '
{
@@ -56,6 +57,9 @@ echo '
"--results-prefix='$actorPrefix'",
"--mysql-exec=/usr/sbin/mysqld",
"--mysql-schema-file=schema.sql",
"--nbf='$nomsBinFormat'",
"--results-schema=results-schema.sql",
"--issue-number='$issueNumber'",
"--fileNames=100k-sorted.csv",
"--fileNames=100k-random.csv",
"--fileNames=1m-sorted.csv",

View File

@@ -27,8 +27,6 @@ if [ -z "$MODE" ]; then
exit 1
fi
echo "Setting from $FROM_SERVER: $FROM_VERSION"
# use first 8 characters of TO_VERSION to differentiate
# jobs
short=${TO_VERSION:0:8}
@@ -38,7 +36,7 @@ actorShort="$lowered-$short"
# random sleep
sleep 0.$[ ( $RANDOM % 10 ) + 1 ]s
timesuffix=`date +%s%N`
timesuffix=`date +%s`
jobname="$actorShort-$timesuffix"
timeprefix=$(date +%Y/%m/%d)
@@ -52,11 +50,14 @@ issuenumber=${ISSUE_NUMBER:-"-1"}
source \
"$TEMPLATE_SCRIPT" \
"$jobname" \
"$FROM_SERVER" \
"$FROM_VERSION" \
"$TO_SERVER" \
"$TO_VERSION" \
"$timeprefix" \
"$actorprefix" \
"$issuenumber" > job.json
"$NOMS_BIN_FORMAT" \
"$issuenumber" > job.json
out=$(KUBECONFIG="$KUBECONFIG" kubectl apply -f job.json || true)

View File

@@ -1,7 +1,7 @@
{
"Jobs": [
{
"Name": "Medium Dolt import with Sorted Rows",
"Name": "Medium MySQL import with Sorted Rows",
"NumRows": 1000000,
"Sorted": true,
"Format": "csv",
@@ -12,7 +12,7 @@
"SchemaPath": "/Users/vinairachakonda/go/src/dolthub/dolt/go/performance/import_benchmarker/cmd/schema.sql"
},
{
"Name": "Medium MySQL import with Sorted Rows",
"Name": "Medium Dolt import with Sorted Rows",
"NumRows": 1000000,
"Sorted": true,
"Format": "csv",

View File

@@ -84,6 +84,9 @@ type ImportBenchmarkConfig struct {
// MysqlHost is used to connect with a MySQL host
MysqlHost string
// NbfVersion is used to turn what format to run Dolt against
NbfVersion string
}
// NewDefaultImportBenchmarkConfig returns a default import configuration where data is generated with accordance to
@@ -261,7 +264,7 @@ func RunBenchmarkTests(config *ImportBenchmarkConfig, workingDir string) []resul
results := make([]result, 0)
for _, doltJob := range doltJobs {
results = append(results, BenchmarkDoltImportJob(doltJob, workingDir))
results = append(results, BenchmarkDoltImportJob(doltJob, workingDir, config.NbfVersion))
}
results = append(results, BenchmarkMySQLImportJobs(mySQLJobs, getMysqlConfigFromConfig(config))...)

View File

@@ -42,6 +42,27 @@ func TestGeneratedConfigCanBeImported(t *testing.T) {
os.RemoveAll(filepath.Join(wd, "testData.csv"))
}
func TestNewStorageFormat(t *testing.T) {
t.Skip() // Skipping since dolt isn't installed on the github actions vm
job := createSampleDoltJob()
config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{job}, NbfVersion: "__DOLT_1__"}
err := config.ValidateAndUpdateDefaults()
assert.NoError(t, err)
wd := GetWorkingDir()
results := RunBenchmarkTests(config, wd)
assert.Equal(t, 1, len(results))
assert.Equal(t, "dolt_import_small", results[0].name)
// Sanity check: An import of 100,000 should never take more than 15 seconds
assert.LessOrEqual(t, results[0].br.T, time.Second*15)
os.RemoveAll(filepath.Join(wd, "testData.csv"))
}
func TestCanGenerateFilesForAllFormats(t *testing.T) {
config := &ImportBenchmarkConfig{Jobs: make([]*ImportBenchmarkJob, 0)}

View File

@@ -33,11 +33,11 @@ import (
// BenchmarkDoltImportJob returns a function that runs benchmarks for importing
// a test dataset into Dolt
func BenchmarkDoltImportJob(job *ImportBenchmarkJob, workingDir string) result {
func BenchmarkDoltImportJob(job *ImportBenchmarkJob, workingDir, nbf string) result {
oldStdin := os.Stdin
defer func() { os.Stdin = oldStdin }()
setupAndInitializeDoltRepo(filesys.LocalFS, workingDir, job.ExecPath)
setupAndInitializeDoltRepo(filesys.LocalFS, workingDir, job.ExecPath, nbf)
defer RemoveDoltDataDir(filesys.LocalFS, workingDir) // remove the repo each time
commandStr, args := getBenchmarkingTools(job, workingDir)
@@ -59,7 +59,7 @@ func BenchmarkDoltImportJob(job *ImportBenchmarkJob, workingDir string) result {
}
// setupAndInitializeDoltRepo calls the `dolt init` command on the workingDir to create a new Dolt repository.
func setupAndInitializeDoltRepo(fs filesys.Filesys, workingDir, doltExecPath string) {
func setupAndInitializeDoltRepo(fs filesys.Filesys, workingDir, doltExecPath, nbf string) {
RemoveDoltDataDir(fs, workingDir)
err := sysbench_runner.DoltVersion(context.Background(), doltExecPath)
@@ -72,6 +72,13 @@ func setupAndInitializeDoltRepo(fs filesys.Filesys, workingDir, doltExecPath str
log.Fatal(err.Error())
}
if nbf != "" {
err = os.Setenv("DOLT_DEFAULT_BIN_FORMAT", nbf)
if err != nil {
log.Fatal(err)
}
}
init := execCommand(context.Background(), doltExecPath, "init")
init.Dir = workingDir