Redo import perf (#4689)

* import perf harness prog

* more test isolation

* rework common dep

* fix server driver

* delete old files, fix readme

* more readme fixes

* batching edits

* edits

* skip import tests by def

* GA starter

* rename

* fix arg name

* change input name again

* remove espaced character from input desc

* fix env definition

* fix env definition

* fix workflow_dispatch name

* CI test files

* remove os spec

* runs on

* different set variable

* defaults

* defaults

* remove local GMS

* edits

* run bench from go/ directory

* dropped def equals

* go run, not go build

* fix output command name

* configure dolt

* fix query file path

* fix query file path

* switch query and inputs

* fix format <<EOF

* reformat yaml

* debug output

* more debug output

* fix echo -> cat

* fix to json flag

* fix yaml spacing

* yaml spacing

* yaml spacing

* revert html spacing

* json format

* env var expansion

* echo not cat

* another json string printing error

* no echo

* log template

* no template string

* wrong parameter, template should be templateName

* remove console.log

* pass file, not string, to ses

* rely on preexisting template? email action interface

* fix yaml

* fix html print

* fix html header

* change ci script

* fix sql-server connection pass

* mysql no password, setup default db

* put password back

* missed port for default database

* expanded CI tests

* shorter test file

* extend ci tests

* change default query

* try to push to dolthub

* modify push to dolthub

* duplicate test names

* typo

* dolt-action version

* invalid param for dolt_action

* specify feature branch

* specify main branch dolt-action

* -b flag first

* dont need -q flag for batch EOF

* combine results and dolthub push

* missing curly brace

* no auth

* new creds

* new cred fun

* use the cred key

* try again

* log table

* move push out of batch

* more logging

* new vs old branch

* fix branch rule

* more test

* all tests

* smaller tests

* smaller tests

* double env: key

* fix yaml error

* more yaml errors

* more yaml errors

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* linting fixes, remove other import nightly

* linting

* go vet

* licenses

* fix compile errorrs

* warn don't panic on init()

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* no external package

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* unused dep

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* Revert perf scripts

* refactor to repo dispatch

Co-authored-by: max-hoffman <max-hoffman@users.noreply.github.com>
This commit is contained in:
Maximilian Hoffman
2022-11-10 12:56:43 -08:00
committed by GitHub
parent 3bbc0e6512
commit 08167dfd5d
36 changed files with 2316 additions and 1875 deletions
+26
View File
@@ -0,0 +1,26 @@
#!/bin/bash
if [ "$#" -ne 2 ]; then
echo "Usage: csv_to_html.sh <csv_file> <html_file>"
fi
gw="$GITHUB_WORKSPACE"
in="$1"
out="$2"
echo "<table>" > "$out"
print_header=true
while read line; do
if "$print_header"; then
#echo " <tr><th>$line" | sed -e 's/:[^,]*\(,\|$\)/<\/th><th>/g' >> "$out"
echo " <tr><th>${line//,/</th><th>}</th></tr>" >> "$out"
print_header=false
continue
fi
echo " <tr><td>${line//,/</td><td>}</td></tr>" >> "$out"
done < "$in"
echo "</table>" >> "$out"
cat "$out"
echo "::set-output name=html::$(echo $out)"
@@ -61,4 +61,12 @@ jobs:
with:
token: ${{ secrets.REPO_ACCESS_TOKEN }}
event-type: benchmark-import
client-payload: '{"from_server": "dolt", "from_version": "${{ github.sha }}", "to_server": "dolt", "to_version": "${{ steps.comment-branch.outputs.head_sha }}", "mode": "pullRequest", "issue_number": "${{ steps.get_pull_number.outputs.pull_number }}", "actor": "${{ github.actor }}", "template_script": "./.github/scripts/import-benchmarking/get-dolt-dolt-job-json.sh"}'
client-payload: |
{
"version": "${{ steps.comment-branch-outputs.head_sha }}",
"run_file": "ci.yaml",
"report": "three_way_compare.sql",
"commit_to_branch": "${{ steps.comment-branch-outputs.head_sha }}",
"actor": "${{ github.actor }}",
"issue_id": "${{ steps.get_pull_number.outputs.pull_number }}"
}
+163 -5
View File
@@ -1,10 +1,168 @@
name: Import Benchmarks
on:
workflow_dispatch:
repository_dispatch:
types: [ benchmark-import ]
env:
BENCH_DIR: 'go/performance/import_benchmarker'
MYSQL_PORT: 3309
MYSQL_PASSWORD: password
jobs:
test:
name: Import benchmarks
bench:
name: Benchmark
defaults:
run:
shell: bash
strategy:
fail-fast: true
runs-on: ubuntu-latest
steps:
- name: Stub
run: echo hello
- name: Set up Go 1.x
id: go
uses: actions/setup-go@v3
with:
go-version: ^1.19
- name: Dolt version
id: version
run: |
version=${{ github.event.client_payload.version }}
- uses: actions/checkout@v3
with:
ref: ${{ github.event.client_payload.version }}
- name: Install dolt
working-directory: ./go
run: go install ./cmd/dolt
- uses: shogo82148/actions-setup-mysql@v1
with:
mysql-version: '8.0'
auto-start: true
root-password: ${{ env.MYSQL_PASSWORD }}
my-cnf: |
local_infile=1
socket=/tmp/mysqld2.sock
port=${{ env.MYSQL_PORT }}
- name: Setup MySQL
run: mysql -uroot -p${{ env.MYSQL_PASSWORD }} -h127.0.0.1 -P${{ env.MYSQL_PORT }} -e 'create database test;'
- name: Run bench
id: bench
working-directory: go/
run: |
out="$GITHUB_WORKSPACE/results.sql"
testspec="../${{ env.BENCH_DIR }}/testdata/${{ github.event.client_payload.run_file }}"
go run \
"github.com/dolthub/dolt/${{ env.BENCH_DIR }}/cmd" \
-test "$testspec" \
-out "$out"
echo "::set-output name=result_path::$out"
- name: Report
id: report
run: |
gw=$GITHUB_WORKSPACE
in="${{ steps.bench.outputs.result_path }}"
query="$(pwd)/${{ env.BENCH_DIR }}/reporting/${{ github.event.client_payload.report }}"
out="$gw/results.csv"
dolt_dir="$gw/import-perf"
dolt config --global --add user.email "import-perf@dolthub.com"
dolt config --global --add user.name "import-perf"
echo '${{ secrets.DOLTHUB_IMPORT_PERF_CREDS_VALUE }}' | dolt creds import
dolt clone import-perf/import-perf "$dolt_dir"
cd "$dolt_dir"
branch="${{ github.event.client_payload.commit_to_branch }}"
# checkout branch
if [ -z $(dolt sql -q "select 1 from dolt_branches where name = '$branch';") ]; then
dolt checkout -b $branch
else
dolt checkout $branch
fi
dolt sql -q "drop table if exists import_perf_results"
# load results
dolt sql < "$in"
# push results to dolthub
dolt add import_perf_results
dolt commit -m "CI commit"
dolt push origin $branch
# generate report
dolt sql -r csv < "$query" > "$out"
cat "$out"
echo "::set-output name=report_path::$out"
- name: Format HTML
id: html
if: ${{ github.event.client_payload.email_recipient }} != ""
run: |
gw="$GITHUB_WORKSPACE"
in="${{ steps.report.outputs.report_path }}"
out="$gw/results.html"
echo "<table>" > "$out"
print_header=true
while read line; do
if "$print_header"; then
echo " <tr><th>${line//,/</th><th>}</th></tr>" >> "$out"
print_header=false
continue
fi
echo " <tr><td>${line//,/</td><td>}</td></tr>" >> "$out"
done < "$in"
echo "</table>" >> "$out"
cat "$out"
echo "::set-output name=html::$(echo $out)"
- name: Configure AWS Credentials
if: ${{ github.event.client_payload.email_recipient }} != ""
uses: aws-actions/configure-aws-credentials@v1-node16
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Send Email
uses: ./.github/actions/ses-email-action
if: ${{ github.event.client_payload.email_recipient }} != ""
with:
region: us-west-2
toAddresses: '["${{ github.event.client_payload.email_recipient }}"]'
version: ${{ steps.version.outputs.ref }}
format: '__DOLT__'
dataFile: ${{ steps.html.outputs.html }}
- name: Read CSV
if: ${{ github.event.client_payload.issue_id }} != ""
id: csv
uses: juliangruber/read-file-action@v1
with:
path: "${{ steps.report.outputs.report_path }}"
- name: Create MD
if: ${{ github.event.client_payload.issue_id }} != ""
uses: petems/csv-to-md-table-action@master
id: md
with:
csvinput: ${{ steps.csv.outputs.content }}
- uses: mshick/add-pr-comment@v1
if: ${{ github.event.client_payload.issue_id }} != ""
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
repo-token-user-login: 'github-actions[bot]'
issue: ${{ github.event.client_payload.issue_id }}
message: |
`@${ACTOR} ${FORMAT}
${{ steps.md.outputs.markdown-table }}
@@ -19,9 +19,17 @@ jobs:
with:
token: ${{ secrets.REPO_ACCESS_TOKEN }}
event-type: sql-correctness
client-payload: '{"to_version": "${{ github.sha }}", "mode": "nightly", "actor": "${{ github.actor }}", "actor_email": "max@dolthub.com", "template_script": "./.github/scripts/sql-correctness/get-dolt-correctness-job-json.sh"}'
client-payload: '{"to_version": "${{ github.sha }}", "mode": "nightly", "actor": "${{ github.actor }}", "template_script": "./.github/scripts/sql-correctness/get-dolt-correctness-job-json.sh"}'
- uses: peter-evans/repository-dispatch@v2.0.0
with:
token: ${{ secrets.REPO_ACCESS_TOKEN }}
event-type: benchmark-import
client-payload: '{"to_server": "dolt", "to_version": "${{ github.sha }}", "from_server": "mysql", "from_version": "8.0.28", "mode": "nightly", "actor": "${{ github.actor }}", "template_script": "./.github/scripts/import-benchmarking/get-mysql-dolt-job-json.sh"}'
client-payload: |
{
"email_recipient": "${{ secrets.PERF_REPORTS_EMAIL_ADDRESS }}",
"version": "${{ github.sha }}",
"run_file": "ci.yaml",
"report": "three_way_compare.sql",
"commit_to_branch": "nightly",
"actor": "${{ github.actor }}"
}
@@ -48,4 +48,12 @@ jobs:
with:
token: ${{ secrets.REPO_ACCESS_TOKEN }}
event-type: benchmark-import
client-payload: '{"to_server": "dolt", "to_version": "${{ needs.set-version-actor.outputs.version }}", "from_server": "mysql", "from_version": "8.0.28", "mode": "release", "actor": "${{ needs.set-version-actor.outputs.actor }}", "actor_email": "${{ needs.set-version-actor.outputs.actor_email }}", "template_script": "./.github/scripts/import-benchmarking/get-mysql-dolt-job-json.sh"}'
client-payload: |
{
"email_recipient": "${{ secrets.PERF_REPORTS_EMAIL_ADDRESS }}",
"version": "${{ github.sha }}",
"run_file": "ci.yaml",
"report": "three_way_compare.sql",
"commit_to_branch": "main",
"actor": "${{ github.actor }}"
}
+5 -5
View File
@@ -37,14 +37,14 @@ require (
github.com/silvasur/buzhash v0.0.0-20160816060738-9bdec3dec7c6
github.com/sirupsen/logrus v1.8.1
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966
github.com/stretchr/testify v1.7.1
github.com/stretchr/testify v1.8.0
github.com/tealeg/xlsx v1.0.5
github.com/tklauser/go-sysconf v0.3.9 // indirect
go.uber.org/zap v1.15.0
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
golang.org/x/sys v0.0.0-20220829200755-d48e67d00261
golang.org/x/sys v0.1.0
google.golang.org/api v0.32.0
google.golang.org/grpc v1.49.0
google.golang.org/protobuf v1.27.1
@@ -56,6 +56,8 @@ require (
require (
github.com/aliyun/aliyun-oss-go-sdk v2.2.5+incompatible
github.com/cenkalti/backoff/v4 v4.1.3
github.com/cespare/xxhash v1.1.0
github.com/creasty/defaults v1.6.0
github.com/dolthub/go-mysql-server v0.14.1-0.20221110165837-03b011aa073d
github.com/google/flatbuffers v2.0.6+incompatible
github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6
@@ -75,6 +77,7 @@ require (
go.opentelemetry.io/otel/trace v1.7.0
golang.org/x/text v0.3.7
gonum.org/v1/plot v0.11.0
gopkg.in/yaml.v3 v3.0.1
)
require (
@@ -85,7 +88,6 @@ require (
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect
github.com/apache/thrift v0.13.1-0.20201008052519-daf620915714 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash v1.1.0 // indirect
github.com/cespare/xxhash/v2 v2.1.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-fonts/liberation v0.2.0 // indirect
@@ -115,7 +117,6 @@ require (
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.26.0 // indirect
github.com/prometheus/procfs v0.6.0 // indirect
github.com/stretchr/objx v0.2.0 // indirect
github.com/tklauser/numcpus v0.3.0 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
go.opencensus.io v0.22.4 // indirect
@@ -131,7 +132,6 @@ require (
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20210506142907-4a47615972c2 // indirect
gopkg.in/yaml.v3 v3.0.0 // indirect
)
replace (
+10 -7
View File
@@ -163,6 +163,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:ma
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/creasty/defaults v1.6.0 h1:ltuE9cfphUtlrBeomuu8PEyISTXnxqkBIoQfXgv7BSc=
github.com/creasty/defaults v1.6.0/go.mod h1:iGzKe6pbEHnpMPtfDXZEr0NVxWnPTjb1bbDy08fPzYM=
github.com/daixiang0/gci v0.2.4/go.mod h1:+AV8KmHTGxxwp/pY84TLQfFKp2vuKXXJVzF3kD/hfR4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@@ -692,8 +694,8 @@ github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3
github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/objx v0.4.0 h1:M2gUjqZET1qApGOWNSnZ49BAIMX4F/1plDv3+l31EJ4=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.1.4/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
@@ -701,8 +703,9 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/tdakkota/asciicheck v0.0.0-20200416190851-d7f85be797a2/go.mod h1:yHp0ai0Z9gUljN3o0xMhYJnH/IcvkdTBOX2fmJ93JEM=
github.com/tealeg/xlsx v1.0.5 h1:+f8oFmvY8Gw1iUXzPk+kz+4GpbDZPK1FhPiQRd+ypgE=
@@ -976,8 +979,8 @@ golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220111092808-5a964db01320/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220829200755-d48e67d00261 h1:v6hYoSR9T5oet+pMXwUWkbiVqx/63mlHjefrHmxwfeY=
golang.org/x/sys v0.0.0-20220829200755-d48e67d00261/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -1217,8 +1220,8 @@ gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0 h1:hjy8E9ON/egN1tAYqKb61G10WtihqetD4sz2H+8nIeA=
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
@@ -12,13 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package main
package sql_server_driver
import (
"bytes"
"database/sql"
"fmt"
"io"
"log"
"net/url"
"os"
"os/exec"
@@ -46,7 +47,7 @@ func init() {
var err error
DoltPath, err = exec.LookPath(path)
if err != nil {
panic(fmt.Sprintf("did not find dolt binary: %v", err.Error()))
log.Printf("did not find dolt binary: %v\n", err.Error())
}
}
@@ -108,11 +109,11 @@ func (u DoltUser) MakeRepoStore() (RepoStore, error) {
type RepoStore struct {
user DoltUser
dir string
Dir string
}
func (rs RepoStore) MakeRepo(name string) (Repo, error) {
path := filepath.Join(rs.dir, name)
path := filepath.Join(rs.Dir, name)
err := os.Mkdir(path, 0750)
if err != nil {
return Repo{}, err
@@ -127,18 +128,18 @@ func (rs RepoStore) MakeRepo(name string) (Repo, error) {
func (rs RepoStore) DoltCmd(args ...string) *exec.Cmd {
cmd := rs.user.DoltCmd(args...)
cmd.Dir = rs.dir
cmd.Dir = rs.Dir
return cmd
}
type Repo struct {
user DoltUser
dir string
Dir string
}
func (r Repo) DoltCmd(args ...string) *exec.Cmd {
cmd := r.user.DoltCmd(args...)
cmd.Dir = r.dir
cmd.Dir = r.Dir
return cmd
}
@@ -260,26 +261,23 @@ func (s *SqlServer) Restart(newargs *[]string) error {
}
func (s *SqlServer) DB(c Connection) (*sql.DB, error) {
authority := "root"
if c.User != "" {
authority = c.User
}
var pass string
pass, err := c.Password()
if err != nil {
return nil, err
}
if pass != "" {
authority += ":" + pass
}
location := fmt.Sprintf("tcp(127.0.0.1:%d)", s.Port)
dbname := s.DBName
return ConnectDB(c.User, pass, s.DBName, "127.0.0.1", s.Port, c.DriverParams)
}
func ConnectDB(user, password, name, host string, port int, driverParams map[string]string) (*sql.DB, error) {
params := make(url.Values)
params.Set("allowAllFiles", "true")
params.Set("tls", "preferred")
for k, v := range c.DriverParams {
for k, v := range driverParams {
params.Set(k, v)
}
dsn := fmt.Sprintf("%s@%s/%s?%s", authority, location, dbname, params.Encode())
dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?%s", user, password, host, port, name, params.Encode())
db, err := sql.Open("mysql", dsn)
if err != nil {
return nil, err
@@ -12,9 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd
// +build darwin dragonfly freebsd linux netbsd openbsd
package main
package sql_server_driver
import "syscall"
@@ -26,5 +27,3 @@ func (s *SqlServer) GracefulStop() error {
<-s.Done
return s.Cmd.Wait()
}
@@ -12,10 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package main
package sql_server_driver
import (
"syscall"
"golang.org/x/sys/windows"
)
@@ -37,7 +38,7 @@ func (s *SqlServer) GracefulStop() error {
return err
}
set, err = dll.FindProc("SetConsoleCtrlHandler")
set, err := dll.FindProc("SetConsoleCtrlHandler")
if err != nil {
return err
}
@@ -58,7 +59,7 @@ func (s *SqlServer) GracefulStop() error {
if err != nil {
return err
}
_, _, err := f.Call()
_, _, err = f.Call()
if err != nil {
return err
}
@@ -0,0 +1,242 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sql_server_driver
import (
"io"
"os"
"path/filepath"
"strings"
"github.com/creasty/defaults"
"gopkg.in/yaml.v3"
)
// |Connection| represents a single connection to a sql-server instance defined
// in the test. The connection will be established and every |Query| in
// |Queries| will be run against it. At the end, the connection will be torn down.
// If |RestartServer| is non-nil, the server which the connection targets will
// be restarted after the connection is terminated.
type Connection struct {
On string `yaml:"on"`
Queries []Query `yaml:"queries"`
RestartServer *RestartArgs `yaml:"restart_server"`
// Rarely needed, allows the entire connection assertion to be retried
// on an assertion failure. Use this is only for idempotent connection
// interactions and only if the sql-server is prone to tear down the
// connection based on things that are happening, such as cluster role
// transitions.
RetryAttempts int `yaml:"retry_attempts"`
// The user to connect as.
User string `default:"root" yaml:"user"`
// The password to connect with.
Pass string `yaml:"password"`
PassFile string `yaml:"password_file"`
// Any driver params to pass in the DSN.
DriverParams map[string]string `yaml:"driver_params"`
}
func (c *Connection) UnmarshalYAML(unmarshal func(interface{}) error) error {
defaults.Set(c)
type plain Connection
if err := unmarshal((*plain)(c)); err != nil {
return err
}
return nil
}
func (c Connection) Password() (string, error) {
if c.PassFile != "" {
bs, err := os.ReadFile(c.PassFile)
if err != nil {
return "", err
}
return strings.TrimSpace(string(bs)), nil
}
return c.Pass, nil
}
// |RestartArgs| are possible arguments, to change the arguments which are
// provided to the sql-server process when it is restarted. This is used, for
// example, to change server config on a restart.
type RestartArgs struct {
Args *[]string `yaml:"args"`
}
// |TestRepo| represents an init'd dolt repository that is available to a
// server instance. It can be created with some files and with remotes defined.
// |Name| can include path components separated by `/`, which will create the
// repository in a subdirectory.
type TestRepo struct {
Name string `yaml:"name"`
WithFiles []WithFile `yaml:"with_files"`
WithRemotes []WithRemote `yaml:"with_remotes"`
// Only valid on Test.Repos, not in Test.MultiRepos.Repos. If set, a
// sql-server process will be run against this TestRepo. It will be
// available as TestRepo.Name.
Server *Server `yaml:"server"`
ExternalServer *ExternalServer `yaml:"external-server"`
}
// |MultiRepo| is a subdirectory where many |TestRepo|s can be defined. You can
// start a sql-server on a |MultiRepo|, in which case there will be no default
// database to connect to.
type MultiRepo struct {
Name string `yaml:"name"`
Repos []TestRepo `yaml:"repos"`
WithFiles []WithFile `yaml:"with_files"`
// If set, a sql-server process will be run against this TestRepo. It
// will be available as MultiRepo.Name.
Server *Server `yaml:"server"`
}
// |WithRemote| defines remotes which should be defined on the repository
// before the sql-server is started.
type WithRemote struct {
Name string `yaml:"name"`
URL string `yaml:"url"`
}
// |WithFile| defines a file and its contents to be created in a |Repo| or
// |MultiRepo| before the servers are started.
type WithFile struct {
Name string `yaml:"name"`
// The contents of the file, provided inline in the YAML.
Contents string `yaml:"contents"`
// A source file path to copy to |Name|. Mutually exclusive with
// Contents.
SourcePath string `yaml:"source_path"`
}
func (f WithFile) WriteAtDir(dir string) error {
path := filepath.Join(dir, f.Name)
d := filepath.Dir(path)
err := os.MkdirAll(d, 0750)
if err != nil {
return err
}
if f.SourcePath != "" {
source, err := os.Open(f.SourcePath)
if err != nil {
return err
}
defer source.Close()
dest, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0550)
if err != nil {
return err
}
_, err = io.Copy(dest, source)
return err
} else {
return os.WriteFile(path, []byte(f.Contents), 0550)
}
}
// |Server| defines a sql-server process to start. |Name| must match the
// top-level |Name| of a |TestRepo| or |MultiRepo|.
type Server struct {
Name string `yaml:"name"`
Args []string `yaml:"args"`
// The |Port| which the server will be running on. For now, it is up to
// the |Args| to make sure this is true. Defaults to 3308.
Port int `yaml:"port"`
// Assertions to be run against the log output of the server process
// after the server process successfully terminates.
LogMatches []string `yaml:"log_matches"`
// Assertions to be run against the log output of the server process
// after the server process exits with an error. If |ErrorMatches| is
// defined, then the server process must exit with a non-0 exit code
// after it is launched. This will be asserted before any |Connections|
// interactions are performed.
ErrorMatches []string `yaml:"error_matches"`
}
type ExternalServer struct {
Name string `yaml:"name"`
Host string `yaml:"host"`
User string `yaml:"user"`
Password string `yaml:"password"`
// The |Port| which the server will be running on. For now, it is up to
// the |Args| to make sure this is true. Defaults to 3308.
Port int `yaml:"port"`
}
// The primary interaction of a |Connection|. Either |Query| or |Exec| should
// be set, not both.
type Query struct {
// Run a query against the connection.
Query string `yaml:"query"`
// Run a command against the connection.
Exec string `yaml:"exec"`
// Args to be passed as query parameters to either Query or Exec.
Args []string `yaml:"args"`
// This can only be non-empty for a |Query|. Asserts the results of the
// |Query|.
Result QueryResult `yaml:"result"`
// If this is non-empty, asserts the the |Query| or the |Exec|
// generates an error that matches this string.
ErrorMatch string `yaml:"error_match"`
// If this is non-zero, it represents the number of times to try the
// |Query| or the |Exec| and to check its assertions before we fail the
// test as a result of failed assertions. When interacting with queries
// that introspect things like replication state, this can be used to
// wait for quiescence in an inherently racey process. Interactions
// will be delayed slightly between each failure.
RetryAttempts int `yaml:"retry_attempts"`
}
// |QueryResult| specifies assertions on the results of a |Query|. Columns must
// be specified for a |Query| and the query results must fully match. If Rows
// are ommited, anything is allowed as long as all rows are read successfully.
// All assertions here are string equality.
type QueryResult struct {
Columns []string `yaml:"columns"`
Rows ResultRows `yaml:"rows"`
}
type ResultRows struct {
Or *[][][]string
}
func (r *ResultRows) UnmarshalYAML(value *yaml.Node) error {
if value.Kind == yaml.SequenceNode {
res := make([][][]string, 1)
r.Or = &res
return value.Decode(&(*r.Or)[0])
}
var or struct {
Or *[][][]string `yaml:"or"`
}
err := value.Decode(&or)
if err != nil {
return err
}
r.Or = or.Or
return nil
}
+303 -12
View File
@@ -1,19 +1,310 @@
## Import Benchmarker
## Import Benchmarking
This library is used to benchmark `dolt table import` on csv/json files and `dolt sql` on .sql files. It uses
the Go testing.B package to execute the relevant dolt commands.
Benchmark different import workflows expressed as yaml files.
### Test Files
Unit tests in `import_test.go` are not suitable for reporting performance
comparisons.
This package uses several test files that are stored in a private S3 bucket (import-benchmarking-github-actions-results)
which represent different sort order, primary keys, etc.
## Usage
The benchmarker supports custom configurations which runs different import jobs against a `dolt` database or a MySQL
server. The parameters of each job and the overall config file are specified in `config.go`.
Sample:
```bash
go build \
github.com/dolthub/dolt/go/performance/import_benchmarker/cmd \
-test testdata/shuffle.yaml
```
Note that if you run the benchmarker without a filepath than the benchmarker will generate a sample file for you. It is
best to stick with the default files used in the production benchmarking system to maintain a sense of consistency.\
Requirements:
### Notes
Tests that use dolt require a `dolt` binary in `PATH` for performance comparisons.
* You should name your table "test" in the MySQL schema file.
Tests with an `external-server` configuration are expected to be available
from the host machine on startup.
Example `mysqld` server on the host OS, assuming an initialized `datadir`
and pre-existing database:
```bash
mysqld --port 3308 --local-infile=1 --socket=/tmp/mysqld2.sock
````
Example mysql server `docker-compose.yml` config:
```yaml
mysql:
image: mysql/mysql-server:8.0
container_name: mysql-import-perf
ports:
- "3308:3306"
command: --local-infile=1 --socket=/tmp/mysqld2.sock
volumes:
- ./mysql:/var/lib/mysql
restart: always # always restart unless stopped manually
environment:
MYSQL_USER: root
MYSQL_ROOT_PASSWORD: password
MYSQL_PASSWORD: password
MYSQL_DATABASE: test
```
Note the `--local-infile` parameter, which permits `LOAD DATA`, and
the `--socket` parameter, which specifies a non-default socket that
will not conflict with any `dolt sql-server` instances. All other
parameters, including the database name, are configurable in the test
file yaml.
## Inputs
Specify imports for different servers and workloads along several
dimensions:
- repo
- dolt server (server field)
- dolt cli (omit server field)
- mysql server (external server field)
- table spec
- fmt (string): file format for importing
- csv: comma separated lines
- sql: dump file of insert statements
- rows (int): number of rows to import
- schema (string): CREATE_TABLE statement for table to import
- shuffle (bool): by default generated rows are sorted; indicate `true` to shuffle
- batch (bool): whether to batch insert statements (only applies to fmt=sql)
For an examples of the specific yaml input syntax, see the example
files below, or refer to the tests in `testdata/`.
Server Details:
- For dolt sql-server tests, a new sql-server will be constructed individually
for each test run.
- External servers are provided outside of the lifecycle of the `import_benchmarker`
command. The same database instance is used for every table import test.
- Import files are cached on the schema, row number, and format in between
tests.
## Outputs
The output format is a `.sql` file with the following schema:
```sql
CREATE TABLE IF NOT EXISTS import_perf_results (
test_name varchar(64),
server varchar(64),
detail varchar(64),
row_cnt int,
time double,
file_format varchar(8),
sorted bool,
primary key (test_name, server, detail)
);
```
A sample import file:
```sql
insert into import_perf_results values
('primary key types', 'mysql', 'int', 400000, 2.20, 'csv', 1);
insert into import_perf_results values
('primary key types', 'mysql', 'float', 400000, 1.98, 'csv', 1);
insert into import_perf_results values
('primary key types', 'mysql', 'varchar', 400000, 3.46, 'csv', 1);
insert into import_perf_results values
('config width', 'mysql', '2 cols', 400000, 1.71, 'csv', 1);
insert into import_perf_results values
('config width', 'mysql', '4 cols', 400000, 1.78, 'csv', 1);
insert into import_perf_results values
('config width', 'mysql', '8 cols', 400000, 2.10, 'csv', 1);
insert into import_perf_results values
('pk type', 'mysql', 'int', 400000, 1.70, 'csv', 1);
insert into import_perf_results values
('pk type', 'mysql', 'float', 400000, 1.95, 'csv', 1);
insert into import_perf_results values
('pk type', 'mysql', 'varchar', 400000, 3.86, 'csv', 1);
insert into import_perf_results values
('primary key types', 'dolt', 'int', 400000, 2.10, 'csv', 1);
insert into import_perf_results values
('primary key types', 'dolt', 'float', 400000, 2.83, 'csv', 1);
insert into import_perf_results values
('primary key types', 'dolt', 'varchar', 400000, 5.01, 'csv', 1);
insert into import_perf_results values
('config width', 'dolt', '2 cols', 400000, 2.12, 'csv', 1);
insert into import_perf_results values
('config width', 'dolt', '4 cols', 400000, 2.47, 'csv', 1);
insert into import_perf_results values
('config width', 'dolt', '8 cols', 400000, 2.84, 'csv', 1);
insert into import_perf_results values
('pk type', 'dolt', 'int', 400000, 2.06, 'csv', 1);
insert into import_perf_results values
('pk type', 'dolt', 'float', 400000, 2.27, 'csv', 1);
insert into import_perf_results values
('pk type', 'dolt', 'varchar', 400000, 5.34, 'csv', 1);
insert into import_perf_results values
('primary key types', 'dolt_cli', 'int', 400000, 2.40, 'csv', 1);
insert into import_perf_results values
('primary key types', 'dolt_cli', 'float', 400000, 2.44, 'csv', 1);
insert into import_perf_results values
('primary key types', 'dolt_cli', 'varchar', 400000, 5.58, 'csv', 1);
insert into import_perf_results values
('config width', 'dolt_cli', '2 cols', 400000, 2.40, 'csv', 1);
insert into import_perf_results values
('config width', 'dolt_cli', '4 cols', 400000, 2.77, 'csv', 1);
insert into import_perf_results values
('config width', 'dolt_cli', '8 cols', 400000, 3.23, 'csv', 1);
insert into import_perf_results values
('pk type', 'dolt_cli', 'int', 400000, 2.37, 'csv', 1);
insert into import_perf_results values
('pk type', 'dolt_cli', 'float', 400000, 2.43, 'csv', 1);
insert into import_perf_results values
('pk type', 'dolt_cli', 'varchar', 400000, 5.52, 'csv', 1);
```
Ingest the result file and run queries like the ones below to compare
import runtimes:
```sql
-- compare two servers
> select
a.test_name as test_name,
a.detail as detail,
a.row_cnt as row_cnt,
a.sorted as sorted,
a.time as dolt_time,
b.time as mysql_time,
round((a.time / b.time),2) as multiple
from import_perf_results a
join import_perf_results b
on
a.test_name = b.test_name and
a.detail = b.detail
where
a.server = 'dolt' and
b.server = 'mysql'
order by 1,2;
+-------------------+--------------+---------+--------+-----------+------------+----------+
| test_name | detail | row_cnt | sorted | dolt_time | mysql_time | multiple |
+-------------------+--------------+---------+--------+-----------+------------+----------+
| blobs | 1 blob | 400000 | 1 | 34.94 | 2.16 | 16.18 |
| blobs | 2 blobs | 400000 | 1 | 62.23 | 2.08 | 29.92 |
| blobs | no blob | 400000 | 1 | 2.91 | 2.09 | 1.39 |
| config width | 2 cols | 400000 | 1 | 2.12 | 1.71 | 1.24 |
| config width | 4 cols | 400000 | 1 | 2.47 | 1.78 | 1.39 |
| config width | 8 cols | 400000 | 1 | 2.84 | 2.1 | 1.35 |
| pk type | float | 400000 | 1 | 2.27 | 1.95 | 1.16 |
| pk type | int | 400000 | 1 | 2.06 | 1.7 | 1.21 |
| pk type | varchar | 400000 | 1 | 5.34 | 3.86 | 1.38 |
+-------------------+--------------+---------+--------+------------+----------+----------+
-- compare three servers
> select
o.test_name as test_name,
o.detail,
o.row_cnt,
o.sorted as sorted,
o.time as mysql_time,
(
select round((a.time / b.time),2) m
from import_perf_results a
join import_perf_results b
on
a.test_name = b.test_name and
a.detail = b.detail
where
a.server = 'dolt' and
b.server = 'mysql' and
a.test_name = o.test_name and
a.detail = o.detail
) as sql_mult,
(
select round((a.time / b.time),2) m
from import_perf_results a
join import_perf_results b
on
a.test_name = b.test_name and
a.detail = b.detail
where
a.server = 'dolt_cli' and
b.server = 'mysql' and
a.test_name = o.test_name and
a.detail = o.detail
) as cli_mult
from import_perf_results as o
where o.server = 'mysql'
order by 1,2;
+-------------------+--------------+---------+--------+------------+----------+----------+
| test_name | detail | row_cnt | sorted | mysql_time | sql_mult | cli_mult |
+-------------------+--------------+---------+--------+------------+----------+----------+
| blobs | 1 blob | 400000 | 1 | 2.16 | 16.18 | 13.43 |
| blobs | 2 blobs | 400000 | 1 | 2.08 | 29.92 | 26.71 |
| blobs | no blob | 400000 | 1 | 2.09 | 1.39 | 1.33 |
| config width | 2 cols | 400000 | 1 | 1.71 | 1.24 | 1.4 |
| config width | 4 cols | 400000 | 1 | 1.78 | 1.39 | 1.56 |
| config width | 8 cols | 400000 | 1 | 2.1 | 1.35 | 1.54 |
| pk type | float | 400000 | 1 | 1.95 | 1.16 | 1.25 |
| pk type | int | 400000 | 1 | 1.7 | 1.21 | 1.39 |
| pk type | varchar | 400000 | 1 | 3.86 | 1.38 | 1.43 |
+-------------------+--------------+---------+--------+------------+----------+----------+
```
## Example tests
Example test spec 1:
```yaml
tests:
- name: "sorting"
repos:
- name: repo1
server:
port: 3308
tables:
- name: "shuffle"
shuffle: true
rows: 100000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "sorted"
shuffle: false
rows: 100000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
```
We will import two tables with a dolt sql-server on port `3308`.
Both tables have 100,000 rows, and a schema with two columns.
The "sorted" test imports the default sorted rows, while the
"shuffle" imports unsorted rows.
Example import spec 2:
```yaml
tests:
- name: "row count"
repos:
- name: mysql
external-server:
name: test
host: 127.0.0.1
user: root
password: password
port: 4306
tables:
- name: "400k"
fmt: "csv"
rows: 40000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
```
We will connect to a database server named `test` on port `4306`
with the credentials above to run a 40,000 row import of a table
with two columns.
+38 -17
View File
@@ -16,41 +16,62 @@ package main
import (
"flag"
"fmt"
"log"
"os"
"github.com/dolthub/dolt/go/performance/import_benchmarker"
driver "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils/sql_server_driver"
ib "github.com/dolthub/dolt/go/performance/import_benchmarker"
)
const (
resultsTableName = "results"
)
var configPath = flag.String("config", "", "the path to a config file")
var path = flag.String("test", "", "the path to a test file")
var out = flag.String("out", "", "result output path")
func main() {
flag.Parse()
// Construct a config
config, err := import_benchmarker.NewDefaultImportBenchmarkConfig()
if *configPath != "" {
config, err = import_benchmarker.FromFileConfig(*configPath)
}
def, err := ib.ParseTestsFile(*path)
if err != nil {
log.Fatal(err.Error())
log.Fatalln(err)
}
// Get the working directory the tests will be executing in
wd := import_benchmarker.GetWorkingDir()
// Generate the tests and the benchmarker.
results, err := import_benchmarker.RunBenchmarkTests(config, wd)
tmpdir, err := os.MkdirTemp("", "repo-store-")
if err != nil {
log.Fatal(err)
log.Fatalln(err)
}
import_benchmarker.SerializeResults(results, wd, resultsTableName, "csv")
results := new(ib.ImportResults)
u, err := driver.NewDoltUser()
for _, test := range def.Tests {
test.Results = results
test.InitWithTmpDir(tmpdir)
for _, r := range test.Repos {
var err error
switch {
case r.ExternalServer != nil:
err = test.RunExternalServerTests(r.Name, r.ExternalServer)
case r.Server != nil:
err = test.RunSqlServerTests(r, u)
default:
err = test.RunCliTests(r, u)
}
if err != nil {
log.Fatalln(err)
}
}
}
if *out != "" {
of, err := os.Create(*out)
if err != nil {
log.Fatalln(err)
}
fmt.Fprintf(of, results.SqlDump())
} else {
fmt.Println(results.SqlDump())
}
os.Exit(0)
}
-282
View File
@@ -1,282 +0,0 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package import_benchmarker
import (
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"log"
"os"
"path/filepath"
"strings"
"github.com/dolthub/dolt/go/performance/utils/sysbench_runner"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
)
const (
smallSet = 100000
mediumSet = 1000000
largeSet = 10000000
testTable = "test"
)
var (
ErrMissingMysqlSchemaFile = errors.New("error: Must supply schema file for mysql jobs")
ErrImproperMysqlFileFormat = errors.New("error: Improper schema file for mysql")
ErrUnsupportedProgram = errors.New("error: Unsupported program only dolt or mysql used")
ErrUnsupportedFileFormat = errors.New("error: Unsupport formated. Only csv, json or sql allowed")
)
type ImportBenchmarkJob struct {
// Name of the job
Name string
// NumRows represents the number of rows being imported in the job.
NumRows int
// Sorted represents whether the data is sorted or not.
Sorted bool
// Format is either csv, json or sql.
Format string
// Filepath is the path to the data file. If empty data is generated instead.
Filepath string
// Program is either Dolt or MySQL.
Program string
// Version tracks the current version of Dolt or MySQL being used
Version string
// ExecPath is a path towards a Dolt or MySQL executable. This is also useful when running different versions of Dolt.
ExecPath string
// SchemaPath is a path towards a generated schema. It is needed for MySQL testing and optional for Dolt testing
SchemaPath string
}
type ImportBenchmarkConfig struct {
Jobs []*ImportBenchmarkJob
// MysqlConnectionProtocol is either tcp or unix. On our kubernetes benchmarking deployments unix is needed. To run this
// locally you want tcp
MysqlConnectionProtocol string
// MysqlPort is used to connect with a MySQL port
MysqlPort int
// MysqlHost is used to connect with a MySQL host
MysqlHost string
// NbfVersion is used to turn what format to run Dolt against
NbfVersion string
}
// NewDefaultImportBenchmarkConfig returns a default import configuration where data is generated with accordance to
// the medium set.
func NewDefaultImportBenchmarkConfig() (*ImportBenchmarkConfig, error) {
jobs := []*ImportBenchmarkJob{
{
Name: "dolt_import_small",
NumRows: smallSet,
Sorted: false,
Format: csvExt,
Version: "HEAD", // Use whatever dolt is installed locally
ExecPath: "dolt", // Assumes dolt is installed locally
Program: "dolt",
},
}
config := &ImportBenchmarkConfig{
Jobs: jobs,
}
err := config.ValidateAndUpdateDefaults()
if err != nil {
return nil, err
}
return config, nil
}
// FromFileConfig takes in a configuration file (encoded as JSON) and returns the relevant importBenchmark config
func FromFileConfig(configPath string) (*ImportBenchmarkConfig, error) {
data, err := ioutil.ReadFile(configPath)
if err != nil {
return nil, err
}
config := &ImportBenchmarkConfig{
Jobs: make([]*ImportBenchmarkJob, 0),
}
err = json.Unmarshal(data, config)
if err != nil {
return nil, err
}
err = config.ValidateAndUpdateDefaults()
if err != nil {
return nil, err
}
return config, nil
}
func (c *ImportBenchmarkConfig) ValidateAndUpdateDefaults() error {
if c.MysqlConnectionProtocol == "" {
c.MysqlConnectionProtocol = "tcp"
}
if c.MysqlHost == "" {
c.MysqlHost = defaultHost
}
if c.MysqlPort == 0 {
c.MysqlPort = defaultPort
}
for _, job := range c.Jobs {
err := job.updateDefaultsAndValidate()
if err != nil {
return err
}
}
return nil
}
func (j *ImportBenchmarkJob) updateDefaultsAndValidate() error {
j.Program = strings.ToLower(j.Program)
programAsServerType := sysbench_runner.ServerType(j.Program)
switch programAsServerType {
case sysbench_runner.MySql:
if j.SchemaPath == "" {
return ErrMissingMysqlSchemaFile
}
if j.Format != csvExt {
return ErrImproperMysqlFileFormat
}
case sysbench_runner.Dolt:
default:
return ErrUnsupportedProgram
}
j.Format = strings.ToLower(j.Format)
seen := false
for _, f := range supportedFormats {
if f == j.Format {
seen = true
}
}
if !seen {
return ErrUnsupportedFileFormat
}
return nil
}
func getMysqlConfigFromConfig(c *ImportBenchmarkConfig) sysbench_runner.MysqlConfig {
return sysbench_runner.MysqlConfig{Socket: defaultSocket, Host: c.MysqlHost, ConnectionProtocol: c.MysqlConnectionProtocol, Port: c.MysqlPort}
}
// generateTestFilesIfNeeded creates the test conditions for an import benchmark to execute. In the case that the config
// dictates that data needs to be generated, this function handles that
func generateTestFilesIfNeeded(config *ImportBenchmarkConfig) *ImportBenchmarkConfig {
jobs := make([]*ImportBenchmarkJob, 0)
for _, job := range config.Jobs {
// Preset csv path
if job.Filepath != "" {
jobs = append(jobs, job)
} else {
filePath, fileFormat := generateTestFile(job)
job.Filepath = filePath
job.Format = fileFormat
jobs = append(jobs, job)
}
}
config.Jobs = jobs
return config
}
// generateTestFile is used to create a generated test case with a randomly generated csv file.
func generateTestFile(job *ImportBenchmarkJob) (string, string) {
sch := NewSeedSchema(job.NumRows, genSampleCols(), job.Format)
pathToImportFile := filepath.Join(GetWorkingDir(), fmt.Sprintf("testData.%s", sch.FileFormatExt))
wc, err := filesys.LocalFS.OpenForWrite(pathToImportFile, os.ModePerm)
if err != nil {
log.Fatalf(err.Error())
}
defer wc.Close()
ds := NewDSImpl(wc, sch, seedRandom, testTable)
ds.GenerateData()
return pathToImportFile, sch.FileFormatExt
}
func RunBenchmarkTests(config *ImportBenchmarkConfig, workingDir string) ([]result, error) {
config = generateTestFilesIfNeeded(config)
// Split into the two jobs because we want
doltJobs := make([]*ImportBenchmarkJob, 0)
mySQLJobs := make([]*ImportBenchmarkJob, 0)
for _, job := range config.Jobs {
switch strings.ToLower(job.Program) {
case "dolt":
doltJobs = append(doltJobs, job)
case "mysql":
if job.Format != csvExt {
log.Fatal("mysql import benchmarking only supports csv files")
}
mySQLJobs = append(mySQLJobs, job)
default:
log.Fatal("error: Invalid program. Must use dolt or mysql. See the sample config")
}
}
results := make([]result, 0)
for _, doltJob := range doltJobs {
r, err := BenchmarkDoltImportJob(doltJob, workingDir, config.NbfVersion)
if err != nil {
return nil, err
}
results = append(results, r)
}
r, err := BenchmarkMySQLImportJobs(mySQLJobs, getMysqlConfigFromConfig(config))
if err != nil {
return nil, err
}
results = append(results, r...)
return results, nil
}
@@ -1,172 +0,0 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package import_benchmarker
import (
"os"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestGeneratedConfigCanBeImported(t *testing.T) {
t.Skip() // Skipping since dolt isn't installed on the github actions vm
config, err := NewDefaultImportBenchmarkConfig()
assert.NoError(t, err)
wd := GetWorkingDir()
results, err := RunBenchmarkTests(config, wd)
assert.NoError(t, err)
assert.Equal(t, 1, len(results))
assert.Equal(t, "dolt_import_small", results[0].name)
// Sanity check: An import of 100,000 should never take more than 15 seconds
assert.LessOrEqual(t, results[0].br.T, time.Second*15)
os.RemoveAll(filepath.Join(wd, "testData.csv"))
}
func TestNewStorageFormat(t *testing.T) {
t.Skip() // Skipping since dolt isn't installed on the github actions vm
job := createSampleDoltJob()
config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{job}, NbfVersion: "__DOLT__"}
err := config.ValidateAndUpdateDefaults()
assert.NoError(t, err)
wd := GetWorkingDir()
results, err := RunBenchmarkTests(config, wd)
assert.NoError(t, err)
assert.Equal(t, 1, len(results))
assert.Equal(t, "dolt_import_small", results[0].name)
// Sanity check: An import of 100,000 should never take more than 15 seconds
assert.LessOrEqual(t, results[0].br.T, time.Second*15)
os.RemoveAll(filepath.Join(wd, "testData.csv"))
}
func TestCanGenerateFilesForAllFormats(t *testing.T) {
config := &ImportBenchmarkConfig{Jobs: make([]*ImportBenchmarkJob, 0)}
// Create jobs for all configs
for _, format := range supportedFormats {
job := &ImportBenchmarkJob{
Name: "dolt_import_small",
NumRows: smallSet,
Sorted: false,
Format: format,
Version: "HEAD", // Use whatever dolt is installed locally
ExecPath: "dolt", // Assumes dolt is installed locally
}
config.Jobs = append(config.Jobs, job)
}
assert.Equal(t, 3, len(config.Jobs))
config = generateTestFilesIfNeeded(config)
for _, job := range config.Jobs {
file, err := os.Open(job.Filepath)
assert.NoError(t, err)
err = file.Close()
assert.NoError(t, err)
err = os.Remove(job.Filepath)
assert.NoError(t, err)
}
}
func TestBadConfigurations(t *testing.T) {
t.Run("non csv format MySQL Job is considered invalid", func(t *testing.T) {
mysqlJob := createSampleMysqlJob()
mysqlJob.Format = jsonExt
config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{mysqlJob}}
err := config.ValidateAndUpdateDefaults()
assert.Error(t, err)
assert.Equal(t, ErrImproperMysqlFileFormat, err)
})
t.Run("MySQL Job with no schema file errors", func(t *testing.T) {
mysqlJob := createSampleMysqlJob()
mysqlJob.SchemaPath = ""
config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{mysqlJob}}
err := config.ValidateAndUpdateDefaults()
assert.Error(t, err)
assert.Equal(t, ErrMissingMysqlSchemaFile, err)
})
t.Run("improper program type passed in ", func(t *testing.T) {
doltJob := createSampleDoltJob()
doltJob.Program = "fake-program"
config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{doltJob}}
err := config.ValidateAndUpdateDefaults()
assert.Error(t, err)
assert.Equal(t, ErrUnsupportedProgram, err)
})
t.Run("improper file extension passed in", func(t *testing.T) {
doltJob := createSampleDoltJob()
doltJob.Format = "psv"
config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{doltJob}}
err := config.ValidateAndUpdateDefaults()
assert.Error(t, err)
assert.Equal(t, ErrUnsupportedFileFormat, err)
})
}
// createSampleMysqlJob creates a simple MySQL job that is particularly valuable for the future
func createSampleMysqlJob() *ImportBenchmarkJob {
job := ImportBenchmarkJob{
Name: "Mysql Dummy",
Format: csvExt,
Version: "8.0.22", // Use whatever dolt is installed locally
ExecPath: "/usr/mysql", // Assumes dolt is installed locally
Program: "mysql",
SchemaPath: "/schema",
}
return &job
}
func createSampleDoltJob() *ImportBenchmarkJob {
job := &ImportBenchmarkJob{
Name: "dolt_import_small",
NumRows: smallSet,
Sorted: false,
Format: csvExt,
Version: "HEAD", // Use whatever dolt is installed locally
ExecPath: "dolt", // Assumes dolt is installed locally
Program: "dolt",
}
return job
}
@@ -1,149 +0,0 @@
// Copyright 2019 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package import_benchmarker
import (
"io"
"log"
"strings"
)
// Dataset is a set of test data used for benchmark testing
type Dataset interface {
// GenerateData generates a dataset for testing
GenerateData()
// Change returns a Dataset mutated by the given percentage of change
Change(pct float32) Dataset
}
// DSImpl implements the Dataset interface
type DSImpl struct {
// Schema defines the structure of the Dataset
Schema *SeedSchema
// TableName is the name of the test dataset
TableName string
// w is the writer where the test dataset will be written
w io.Writer
// sf is the function used to generate random data values in the dataset
sf seedFunc
}
// NewDSImpl creates a new DSImpl
func NewDSImpl(wc io.Writer, sch *SeedSchema, sf seedFunc, tableName string) *DSImpl {
return &DSImpl{Schema: sch, TableName: tableName, sf: sf, w: wc}
}
// GenerateData generates a dataset and writes it to a io.Writer
func (ds *DSImpl) GenerateData() {
writeDataToWriter(ds.w, ds.Schema.Rows, ds.Schema.Columns, ds.sf, ds.TableName, ds.Schema.FileFormatExt)
}
// Change returns a DataSet that is a mutation of this Dataset by the given percentage
func (ds *DSImpl) Change(pct float32) Dataset {
// TODO
return &DSImpl{}
}
func writeDataToWriter(wc io.Writer, rows int, cols []*SeedColumn, sf seedFunc, tableName, format string) {
// handle the "header" for all format types
writeHeader(wc, cols, tableName, format)
var prevRow []string
for i := 0; i < rows; i++ {
row := make([]string, len(cols))
for colIndex, col := range cols {
val := getColValue(prevRow, colIndex, col, sf, format)
row[colIndex] = val
if i > 0 && prevRow != nil {
prevRow[colIndex] = val
}
}
_, err := wc.Write([]byte(formatRow(row, cols, i, rows-1, tableName, format)))
if err != nil {
log.Fatal(err)
}
prevRow = row[:]
}
// handle the "footer" for format types
switch format {
case jsonExt:
suffix := "]}\n"
_, err := wc.Write([]byte(suffix))
if err != nil {
log.Fatal(err)
}
default:
}
}
func writeHeader(w io.Writer, cols []*SeedColumn, tableName, format string) {
switch format {
case csvExt:
header := makeCSVHeaderStr(cols, tableName, format)
_, err := w.Write([]byte(header + "\n"))
if err != nil {
log.Fatal(err)
}
case sqlExt:
header := getSQLHeader(cols, tableName, format)
_, err := w.Write([]byte(header + "\n"))
if err != nil {
log.Fatal(err)
}
case jsonExt:
prefix := "{\"Rows\":["
_, err := w.Write([]byte(prefix))
if err != nil {
log.Fatal(err)
}
default:
log.Fatalf("unable to write the header, unsupported format %v \n", format)
}
}
func formatRow(strs []string, cols []*SeedColumn, currentRowIdx, lastRowIdx int, tableName, format string) string {
switch format {
case csvExt:
return strings.Join(strs, ",") + "\n"
case sqlExt:
return getSQLRow(strs, cols, tableName) + "\n"
case jsonExt:
var suffix string
if currentRowIdx == lastRowIdx {
suffix = "\n"
} else {
suffix = ",\n"
}
return getJSONRow(strs, cols) + suffix
default:
log.Fatalf("cannot format row, unsupported file format %s \n", format)
}
return ""
}
func makeCSVHeaderStr(cols []*SeedColumn, tableName, format string) string {
str := make([]string, 0, len(cols))
for _, col := range cols {
str = append(str, col.Name)
}
return formatRow(str, cols, 0, 1, tableName, format)
}
-243
View File
@@ -1,243 +0,0 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package import_benchmarker
import (
"bytes"
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"testing"
"github.com/pkg/errors"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
"github.com/dolthub/dolt/go/libraries/utils/file"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
"github.com/dolthub/dolt/go/performance/utils/sysbench_runner"
)
// BenchmarkDoltImportJob returns a function that runs benchmarks for importing
// a test dataset into Dolt
func BenchmarkDoltImportJob(job *ImportBenchmarkJob, workingDir, nbf string) (result, error) {
oldStdin := os.Stdin
defer func() { os.Stdin = oldStdin }()
err := setupAndInitializeDoltRepo(filesys.LocalFS, workingDir, job.ExecPath, nbf)
if err != nil {
return result{}, err
}
defer RemoveDoltDataDir(filesys.LocalFS, workingDir) // remove the repo each time
commandStr, args, err := getBenchmarkingTools(job, workingDir)
if err != nil {
return result{}, err
}
if commandStr == "" {
return result{}, errors.New("failed to get command")
}
br := testing.Benchmark(func(b *testing.B) {
err = runBenchmarkCommand(b, commandStr, args, workingDir)
})
if err != nil {
return result{}, err
}
size, err := getSizeOnDisk(filesys.LocalFS, workingDir)
if err != nil {
return result{}, err
}
return result{
name: job.Name,
format: job.Format,
rows: job.NumRows,
columns: len(genSampleCols()),
sizeOnDisk: size,
br: br,
doltVersion: job.Version,
program: "dolt",
}, nil
}
// setupAndInitializeDoltRepo calls the `dolt init` command on the workingDir to create a new Dolt repository.
func setupAndInitializeDoltRepo(fs filesys.Filesys, workingDir, doltExecPath, nbf string) error {
err := RemoveDoltDataDir(fs, workingDir)
if err != nil {
return err
}
err = sysbench_runner.DoltVersion(context.Background(), doltExecPath)
if err != nil {
return err
}
err = sysbench_runner.UpdateDoltConfig(context.Background(), doltExecPath)
if err != nil {
return err
}
if nbf != "" {
err = os.Setenv("DOLT_DEFAULT_BIN_FORMAT", nbf)
if err != nil {
return err
}
}
init := execCommand(context.Background(), doltExecPath, "init")
init.Dir = workingDir
return init.Run()
}
// getBenchmarkingTools setups up the relevant environment for testing.
func getBenchmarkingTools(job *ImportBenchmarkJob, workingDir string) (commandStr string, args []string, err error) {
commandStr = job.ExecPath
switch job.Format {
case csvExt:
args = []string{"table", "import", "-c", "-f", testTable, job.Filepath}
if job.SchemaPath != "" {
args = append(args, "-s", job.SchemaPath)
}
case sqlExt:
stdin, serr := getStdinForSQLBenchmark(filesys.LocalFS, job.Filepath)
if serr != nil {
err = serr
return
}
os.Stdin = stdin
args = []string{"sql"}
case jsonExt:
pathToSchemaFile := filepath.Join(workingDir, fmt.Sprintf("testSchema%s", job.Format))
if job.SchemaPath != "" {
pathToSchemaFile = job.SchemaPath
}
args = []string{"table", "import", "-c", "-f", "-s", pathToSchemaFile, testTable, job.Filepath}
default:
err = errors.New(fmt.Sprintf("cannot import file, unsupported file format %s \n", job.Format))
return
}
return
}
// runBenchmarkCommand runs and times the benchmark. This is the critical portion of the code
func runBenchmarkCommand(b *testing.B, commandStr string, args []string, wd string) error {
// Note that we can rerun this because dolt import uses the -f parameter
for i := 0; i < b.N; i++ {
cmd := execCommand(context.Background(), commandStr, args...)
var errBytes bytes.Buffer
cmd.Dir = wd
cmd.Stdout = os.Stdout
cmd.Stderr = &errBytes
err := cmd.Run()
if err != nil {
return err
}
if len(strings.TrimSpace(errBytes.String())) > 0 {
return errors.New(fmt.Sprintf("error running benchmark: %s", errBytes.String()))
}
}
return nil
}
// RemoveDoltDataDir is used to remove the .dolt repository
func RemoveDoltDataDir(fs filesys.Filesys, dir string) error {
doltDir := filepath.Join(dir, dbfactory.DoltDir)
exists, _ := fs.Exists(doltDir)
if exists {
return fs.Delete(doltDir, true)
}
return nil
}
func execCommand(ctx context.Context, name string, arg ...string) *exec.Cmd {
e := exec.CommandContext(ctx, name, arg...)
return e
}
// getSizeOnDisk returns the size of the .dolt repo. This is useful for understanding how a repo grows in size in
// proportion to the number of rows.
func getSizeOnDisk(fs filesys.Filesys, workingDir string) (float64, error) {
doltDir := filepath.Join(workingDir, dbfactory.DoltDir)
exists, _ := fs.Exists(doltDir)
if !exists {
return 0, errors.New("dir does not exist")
}
size, err := dirSizeMB(doltDir)
if err != nil {
return 0, err
}
roundedStr := fmt.Sprintf("%.2f", size)
rounded, _ := strconv.ParseFloat(roundedStr, 2)
return rounded, nil
}
// cc: https://stackoverflow.com/questions/32482673/how-to-get-directory-total-size
func dirSizeMB(path string) (float64, error) {
var size int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
size += info.Size()
}
return err
})
sizeMB := float64(size) / 1024.0 / 1024.0
return sizeMB, err
}
func getStdinForSQLBenchmark(fs filesys.Filesys, pathToImportFile string) (*os.File, error) {
content, err := fs.ReadFile(pathToImportFile)
if err != nil {
return nil, err
}
tmpfile, err := os.CreateTemp("", "temp")
if err != nil {
return nil, err
}
defer file.Remove(tmpfile.Name()) // clean up
if _, err := tmpfile.Write(content); err != nil {
return nil, err
}
if err := tmpfile.Close(); err != nil {
return nil, err
}
return os.Open(tmpfile.Name())
}
@@ -0,0 +1,66 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package import_benchmarker
import (
"fmt"
"os"
"path/filepath"
"strconv"
"github.com/pkg/errors"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
)
// getSizeOnDisk returns the size of the .dolt repo. This is useful for understanding how a repo grows in size in
// proportion to the number of rows.
func getSizeOnDisk(fs filesys.Filesys, workingDir string) (float64, error) {
doltDir := filepath.Join(workingDir, dbfactory.DoltDir)
exists, _ := fs.Exists(doltDir)
if !exists {
return 0, errors.New("dir does not exist")
}
size, err := dirSizeMB(doltDir)
if err != nil {
return 0, err
}
roundedStr := fmt.Sprintf("%.2f", size)
rounded, _ := strconv.ParseFloat(roundedStr, 2)
return rounded, nil
}
// cc: https://stackoverflow.com/questions/32482673/how-to-get-directory-total-size
func dirSizeMB(path string) (float64, error) {
var size int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
size += info.Size()
}
return err
})
sizeMB := float64(size) / 1024.0 / 1024.0
return sizeMB, err
}
@@ -1,196 +0,0 @@
// Copyright 2019 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package import_benchmarker
import (
"fmt"
"log"
"math/rand"
"os"
"strconv"
"strings"
"github.com/dolthub/dolt/go/libraries/doltcore/schema/typeinfo"
"github.com/dolthub/dolt/go/store/types"
)
type seedFunc func(col *SeedColumn, format string) string
// Container is used to correctly format strings enclosed in brackets
type Container struct {
c []string
}
// NewContainer creates a new Container
func NewContainer(format string) *Container {
c := make([]string, 3)
switch format {
case sqlExt:
c[0] = "("
c[2] = ")"
case jsonExt:
c[0] = "{"
c[2] = "}"
default:
log.Fatalf("cannot create new container, unsupported format %s \n", format)
}
return &Container{c: c}
}
// InsertPayload returns the Container with the payload inserted, separated by the separator
func (sc *Container) InsertPayload(payload []string, separator string) string {
sc.c[1] = strings.Join(payload, separator)
return strings.Join(sc.c, "")
}
func getColValue(row []string, colIndex int, col *SeedColumn, sf seedFunc, format string) string {
switch col.GenType {
case increment:
return genNomsTypeValueIncrement(row, colIndex, col, format)
case random:
return getNomsTypeValueRandom(col, sf, format)
default:
log.Fatalf("cannot get column value, unsupported gen type %s \n", col.GenType)
}
return ""
}
func genNomsTypeValueIncrement(row []string, colIndex int, col *SeedColumn, format string) string {
switch col.Type {
case types.IntKind:
if len(row) > 0 {
old, err := strconv.Atoi(row[colIndex])
if err != nil {
log.Fatalf(err.Error())
}
return fmt.Sprintf("%d", old+1)
}
return "1"
default:
log.Fatalf("cannot generate incremental value, unsupported noms type %s \n", col.Type.String())
}
return ""
}
func getNomsTypeValueRandom(col *SeedColumn, sf seedFunc, format string) string {
return sf(col, format)
}
// seedRandom is a seedFunc that returns variably random strings for each supported
// nomsKind type
func seedRandom(col *SeedColumn, format string) string {
switch col.Type {
case types.IntKind:
return fmt.Sprintf("%d", rand.Intn(1000))
case types.StringKind:
return getRandomString(format)
default:
log.Fatalf("cannot generate random value, unsupported noms type %s \n", col.Type.String())
}
return ""
}
func getRandomString(format string) string {
letters := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
b := make([]byte, rand.Intn(255))
for i := range b {
b[i] = letters[rand.Int63()%int64(len(letters))]
}
switch format {
case sqlExt:
return fmt.Sprintf(`"%s"`, b)
default:
return string(b)
}
}
func getJSONRow(strs []string, cols []*SeedColumn) string {
if len(strs) != len(cols) {
log.Fatalf("values should be the length of columns. values: %+v, columns: %+v \n", strs, cols)
}
payload := make([]string, 0)
for i, col := range cols {
load := fmt.Sprintf("\"%s\":\"%s\"", col.Name, strs[i])
payload = append(payload, load)
}
container := NewContainer(jsonExt)
return container.InsertPayload(payload, ",")
}
func getSQLRow(strs []string, cols []*SeedColumn, tableName string) string {
container := NewContainer(sqlExt)
sqlCols := make([]string, 0)
for _, col := range cols {
sqlCols = append(sqlCols, fmt.Sprintf("`%s`", col.Name))
}
fieldNames := container.InsertPayload(sqlCols, ",")
values := container.InsertPayload(strs, ",")
return fmt.Sprintf("INSERT INTO `%s` %s VALUES %s;", tableName, fieldNames, values)
}
func getSQLHeader(cols []*SeedColumn, tableName, format string) string {
statement := make([]string, 0)
statement = append(statement, fmt.Sprintf("DROP TABLE IF EXISTS `%s`;\n", tableName))
statement = append(statement, fmt.Sprintf("CREATE TABLE `%s` ", tableName))
container := NewContainer(format)
schema := make([]string, 0)
pkDefs := make([]string, 0)
for i, col := range cols {
colStr := "`%s` %s"
// handle pk
if col.PrimaryKey {
pkDefs = append(pkDefs, fmt.Sprintf("PRIMARY KEY (`%s`)", col.Name))
colStr = "`%s` %s NOT NULL"
}
// handle increments
if col.GenType == increment {
colStr = fmt.Sprintf("%s AUTO_INCREMENT", colStr)
}
// append tag
colStr = fmt.Sprintf("%s COMMENT 'tag:%d'", colStr, i)
// translate noms type
sqlType := typeinfo.FromKind(col.Type).ToSqlType().String()
schema = append(schema, fmt.Sprintf(colStr, col.Name, strings.ToUpper(sqlType)))
}
// add pk definitions to create table statement
for _, pkDef := range pkDefs {
schema = append(schema, pkDef)
}
// create and close create table statement
schemaStatement := container.InsertPayload(schema, ",\n")
statement = append(statement, schemaStatement+"; \n")
return strings.Join(statement, "")
}
func GetWorkingDir() string {
wd, _ := os.Getwd()
return wd
}
@@ -0,0 +1,45 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package import_benchmarker
import (
"testing"
)
func TestImportSize(t *testing.T) {
t.Skip()
RunTestsFile(t, "testdata/size.yaml")
}
func TestExternalImport(t *testing.T) {
t.Skip()
RunTestsFile(t, "testdata/external.yaml")
}
func TestDoltImport(t *testing.T) {
t.Skip()
RunTestsFile(t, "testdata/dolt_server.yaml")
}
func TestShuffle(t *testing.T) {
t.Skip()
RunTestsFile(t, "testdata/shuffle.yaml")
}
func TestCI(t *testing.T) {
// this will be a lot slower than running `cmd/main.go -test testdata/ci.yaml`
t.Skip()
RunTestsFile(t, "testdata/ci.yaml")
}
-201
View File
@@ -1,201 +0,0 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package import_benchmarker
import (
"bytes"
"context"
"database/sql"
"errors"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"os/signal"
"strings"
"sync"
"syscall"
"testing"
"time"
"github.com/go-sql-driver/mysql"
"golang.org/x/sync/errgroup"
"github.com/dolthub/dolt/go/performance/utils/sysbench_runner"
)
const (
defaultHost = "127.0.0.1"
defaultPort = 3306
defaultSocket = "/var/run/mysqld/mysqld.sock"
dbName = "test"
)
func BenchmarkMySQLImportJobs(jobs []*ImportBenchmarkJob, mConfig sysbench_runner.MysqlConfig) ([]result, error) {
if len(jobs) == 0 {
return nil, nil
}
ctx := context.Background()
withCancelCtx, cancel := context.WithCancel(ctx)
gServer, serverCtx := errgroup.WithContext(withCancelCtx)
var serverErr bytes.Buffer
// Assume first server is okay
server := getMysqlServer(serverCtx, jobs[0].ExecPath, getServersArgs())
server.Stderr = &serverErr
// launch the mysql server
gServer.Go(func() error {
err := server.Run()
if err != nil {
return err
}
if len(strings.TrimSpace(serverErr.String())) > 0 {
return errors.New(fmt.Sprintf("server produced stderr output: %s", serverErr.String()))
}
return nil
})
// sleep to allow the server to start
time.Sleep(5 * time.Second)
// set up the relevant testing database and permissions
err := sysbench_runner.SetupDB(ctx, mConfig, dbName)
if err != nil {
cancel()
return nil, err
}
log.Println("successfully setup the database")
// handle user interrupt
quit := make(chan os.Signal, 1)
signal.Notify(quit, os.Interrupt, syscall.SIGTERM)
var wg sync.WaitGroup
wg.Add(1)
go func() {
<-quit
defer wg.Done()
signal.Stop(quit)
cancel()
}()
results := make([]result, len(jobs))
for i, job := range jobs {
// benchmark the actual job
var err error
br := testing.Benchmark(func(b *testing.B) {
err = benchmarkLoadData(ctx, b, mConfig, job)
})
if err != nil {
return nil, err
}
results[i] = result{
name: job.Name,
format: job.Format,
rows: job.NumRows,
columns: len(genSampleCols()),
sizeOnDisk: -1, // TODO: Think about how to collect MySQL table size
br: br,
doltVersion: job.Version,
program: "mysql",
}
}
return results, nil
}
func benchmarkLoadData(ctx context.Context, b *testing.B, mConfig sysbench_runner.MysqlConfig, job *ImportBenchmarkJob) (err error) {
var dsn string
dsn, err = sysbench_runner.FormatDsn(mConfig)
if err != nil {
return
}
var db *sql.DB
db, err = sql.Open("mysql", dsn)
if err != nil {
return
}
defer func() {
rerr := db.Close()
if err == nil {
err = rerr
}
}()
err = db.Ping()
if err != nil {
return
}
_, err = db.ExecContext(ctx, fmt.Sprintf("USE %s", dbName))
if err != nil {
return
}
// Load the schema for the test table. This assumes the table has the same name as testTable
var data []byte
data, err = ioutil.ReadFile(job.SchemaPath)
if err != nil {
return
}
// Register the local file as per https://github.com/go-sql-driver/mysql#load-data-local-infile-support
mysql.RegisterLocalFile(job.Filepath)
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Since dolt also creates the table on import we'll add dropping and creating the table to the benchmark
_, err = db.ExecContext(ctx, fmt.Sprintf("DROP TABLE IF EXISTS %s", testTable))
if err != nil {
return
}
// Run the CREATE TABLE command stored in the schema file
// TODO: This schema file must have the same name as testTable.
_, err = db.ExecContext(ctx, string(data))
if err != nil {
return
}
// Run LOAD DATA on the csv file
_, err = db.ExecContext(ctx, fmt.Sprintf(`LOAD DATA LOCAL INFILE '%s' REPLACE INTO TABLE %s FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 LINES`, job.Filepath, testTable))
if err != nil {
return
}
log.Printf("MySQL server loaded file %s \n", job.Filepath)
}
return
}
// getServerArgs returns the arguments that run the mysql server
func getServersArgs() []string {
return []string{"--user=mysql", fmt.Sprintf("--port=%d", defaultPort), "--local-infile=ON"}
}
// getMysqlServer returns a exec.Cmd for a dolt server
func getMysqlServer(ctx context.Context, serverExec string, params []string) *exec.Cmd {
return execCommand(ctx, serverExec, params...)
}
@@ -0,0 +1 @@
select * from import_perf_results order by 1,2;
@@ -0,0 +1,35 @@
select
o.test_name as test_name,
o.detail,
o.row_cnt,
o.sorted as sorted,
o.time as mysql_time,
(
select round((a.time / b.time),2) m
from import_perf_results a
join import_perf_results b
on
a.test_name = b.test_name and
a.detail = b.detail
where
a.server = 'dolt_server' and
b.server = 'mysql' and
a.test_name = o.test_name and
a.detail = o.detail
) as sql_mult,
(
select round((a.time / b.time),2) m
from import_perf_results a
join import_perf_results b
on
a.test_name = b.test_name and
a.detail = b.detail
where
a.server = 'dolt_cli' and
b.server = 'mysql' and
a.test_name = o.test_name and
a.detail = o.detail
) as cli_mult
from import_perf_results as o
where o.server = 'mysql'
order by 1,2;
@@ -1,170 +0,0 @@
// Copyright 2019 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package import_benchmarker
import (
"fmt"
"io"
"log"
"os"
"path/filepath"
"testing"
"time"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
"github.com/dolthub/dolt/go/store/types"
)
type result struct {
name string
format string
rows int
columns int
sizeOnDisk float64
br testing.BenchmarkResult
program string
doltVersion string
}
// RSImpl is a Dataset containing results of benchmarking
type RSImpl struct {
// Schema defines the structure of the Dataset
Schema *SeedSchema
// Results are results of benchmarking
Results []result
// TableName is the name of the results table
TableName string
// w is the writer where the results will be written
w io.Writer
}
// NewRSImpl creates a new RSImpl
func NewRSImpl(w io.Writer, sch *SeedSchema, results []result, tableName string) *RSImpl {
return &RSImpl{
Schema: sch,
Results: results,
TableName: tableName,
w: w,
}
}
// GenerateData writes the results to a io.Writer
func (rds *RSImpl) GenerateData() {
writeResultsToWriter(rds.w, rds.Results, rds.Schema.Columns, rds.TableName, rds.Schema.FileFormatExt)
}
// Change returns a DataSet that is a mutation of this Dataset by the given percentage
func (rds *RSImpl) Change(pct float32) Dataset {
// TODO
return &RSImpl{}
}
func writeResultsToWriter(wc io.Writer, results []result, cols []*SeedColumn, tableName, format string) {
switch format {
case csvExt:
generateCSVResults(wc, results, cols, tableName, format)
default:
log.Fatalf("cannot generate results data, file format %s unsupported \n", format)
}
}
func generateCSVResults(wc io.Writer, results []result, cols []*SeedColumn, tableName, format string) {
header := makeCSVHeaderStr(cols, tableName, format)
_, err := wc.Write([]byte(header))
if err != nil {
log.Fatal(err)
}
for i, result := range results {
row := getResultsRow(result, cols)
_, err := wc.Write([]byte(formatRow(row, cols, i, len(results)-1, tableName, format)))
if err != nil {
log.Fatal(err)
}
}
}
func getResultsRow(res result, cols []*SeedColumn) []string {
row := make([]string, len(cols))
// set name
row[0] = res.name
// set program
row[1] = res.program
// set version
row[2] = res.doltVersion
// set format
row[3] = res.format
// set rows
row[4] = fmt.Sprintf("%d", res.rows)
// set cols
row[5] = fmt.Sprintf("%d", res.columns)
// set iterations
row[6] = fmt.Sprintf("%d", res.br.N)
// set time
row[7] = res.br.T.Round(time.Millisecond * 10).String()
// set size_on_disk
row[8] = fmt.Sprintf("%v", res.sizeOnDisk)
// set rows_per_second
row[9] = fmt.Sprintf("%.2f", float64(res.rows)/res.br.T.Seconds())
// set datetime
t := time.Now()
row[10] = fmt.Sprintf("%04d-%02d-%02d %02d:%02d", t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute())
return row
}
func genResultsCols() []*SeedColumn {
return []*SeedColumn{
NewSeedColumn("name", false, types.StringKind, supplied),
NewSeedColumn("program", false, types.StringKind, supplied),
NewSeedColumn("version", false, types.StringKind, supplied),
NewSeedColumn("format", false, types.StringKind, supplied),
NewSeedColumn("rows", false, types.StringKind, supplied),
NewSeedColumn("columns", false, types.StringKind, supplied),
NewSeedColumn("iterations", false, types.StringKind, supplied),
NewSeedColumn("time", false, types.TimestampKind, supplied),
NewSeedColumn("size_on_disk(MB)", false, types.StringKind, supplied),
NewSeedColumn("rows_per_second", false, types.StringKind, supplied),
NewSeedColumn("date_time", false, types.StringKind, supplied),
}
}
func SerializeResults(results []result, path, tableName, format string) string {
var sch *SeedSchema
switch format {
case csvExt:
sch = NewSeedSchema(len(results), genResultsCols(), csvExt)
default:
log.Fatalf("cannot serialize results, unsupported file format %s \n", format)
}
now := time.Now()
fs := filesys.LocalFS
resultsFile := filepath.Join(path, fmt.Sprintf("benchmark_results-%04d-%02d-%02d.%s", now.Year(), now.Month(), now.Day(), format))
wc, err := fs.OpenForWrite(resultsFile, os.ModePerm)
if err != nil {
log.Fatal(err)
}
defer wc.Close()
ds := NewRSImpl(wc, sch, results, tableName)
ds.GenerateData()
return resultsFile
}
@@ -1,165 +0,0 @@
// Copyright 2019 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package import_benchmarker
import (
"fmt"
"log"
"strings"
"github.com/dolthub/dolt/go/store/types"
)
const (
csvExt = "csv"
jsonExt = "json"
sqlExt = "sql"
increment = GenType("increment")
random = GenType("random")
supplied = GenType("supplied")
)
var supportedFormats = []string{csvExt, jsonExt, sqlExt}
// GenType specifies how to generate subsequent row values for a given SeedColumn, for a test dataset
type GenType string
// SeedSchema contains the schema to be used to generate a test Dataset
type SeedSchema struct {
// Rows is size of the Dataset
Rows int
// Columns are the schema for the columns to be used for the Dataset
Columns []*SeedColumn
// FileFormatExt is the file format extension that directs how to construct the Dataset
// as a string or as bytes
FileFormatExt string
}
// NewSeedSchema creates a new SeedSchema
func NewSeedSchema(rows int, cols []*SeedColumn, format string) *SeedSchema {
for _, frmt := range supportedFormats {
if format == frmt {
return &SeedSchema{
Rows: rows,
Columns: cols,
FileFormatExt: format,
}
}
}
log.Fatalf("cannot build seed schema with unsupported file format %s \n", format)
return &SeedSchema{}
}
// Bytes returns a byte slice formatted according to the SeedSchema'a FileFormatExt
func (sch *SeedSchema) Bytes() []byte {
switch sch.FileFormatExt {
case jsonExt:
return getColSchemaJSON(sch.Columns)
default:
log.Fatalf("cannot create bytes from schema, unsupported format %s \n", sch.FileFormatExt)
}
return []byte{}
}
// SeedColumn is used to create a column in a test dataset for benchmark testing
type SeedColumn struct {
Name string
PrimaryKey bool
Type types.NomsKind
GenType GenType
}
// NewSeedColumn creates a new SeedColumn
func NewSeedColumn(name string, pk bool, t types.NomsKind, g GenType) *SeedColumn {
if isValidGenType(t, g) {
return &SeedColumn{
Name: name,
PrimaryKey: pk,
Type: t,
GenType: g,
}
}
log.Fatalf("cannot use gen type %s with noms type %s \n", g, t.String())
return &SeedColumn{}
}
func isValidGenType(t types.NomsKind, g GenType) bool {
var validTypes []types.NomsKind
switch g {
case increment:
validTypes = []types.NomsKind{types.IntKind}
case random:
validTypes = []types.NomsKind{types.IntKind, types.StringKind}
case supplied:
validTypes = []types.NomsKind{
types.IntKind,
types.StringKind,
types.TimestampKind,
}
default:
log.Fatalf("unsupported gen type %s \n", g)
}
for _, v := range validTypes {
if t == v {
return true
}
}
return false
}
func getColSchemaJSON(seedCols []*SeedColumn) []byte {
prefix := "{\"Columns\":["
suffix := "]}"
statement := make([]string, 0)
statement = append(statement, prefix)
schemaStr := "{\"tag\": %d,\"name\":\"%s\",\"kind\":\"%s\",\"is_part_of_pk\":%v,\"col_constraints\":%s}"
jsonCols := make([]string, 0)
for i, sc := range seedCols {
var pks []string
if sc.PrimaryKey {
pks = []string{"{\"constraint_type\": \"not_null\",\"params\": null}"}
} else {
pks = []string{}
}
jc := fmt.Sprintf(schemaStr, uint64(i), sc.Name, strings.ToLower(sc.Type.String()), sc.PrimaryKey, pks)
jsonCols = append(jsonCols, jc)
}
statement = append(statement, strings.Join(jsonCols, ","))
statement = append(statement, suffix)
return []byte(strings.Join(statement, ""))
}
// TODO: Support autogeneration for a wider variety of types
func genSampleCols() []*SeedColumn {
return []*SeedColumn{
NewSeedColumn("pk", true, types.IntKind, increment),
NewSeedColumn("c1", false, types.IntKind, random),
NewSeedColumn("c2", false, types.IntKind, increment),
NewSeedColumn("c3", false, types.IntKind, random),
NewSeedColumn("c4", false, types.IntKind, increment),
NewSeedColumn("c5", false, types.IntKind, increment),
NewSeedColumn("c6", false, types.StringKind, random),
NewSeedColumn("c7", false, types.StringKind, random),
NewSeedColumn("c8", false, types.StringKind, random),
NewSeedColumn("c9", false, types.StringKind, random),
}
}
+227
View File
@@ -0,0 +1,227 @@
opts:
seed: 0
tests:
- name: "primary key types"
repos: &repos
# - name: mysql # mysqld --port 3308 --local-infile=1
# external-server:
# name: test
# host: 127.0.0.1
# user: root
# password:
# port: 3308
- name: dolt_cli
# - name: dolt_server
# server:
# port: 3308
# args: [ "--port", "3308" ]
tables:
- name: "int"
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "float"
schema: |
create table xy (
x float primary key,
y varchar(30)
);
- name: "varchar"
schema: |
create table xy (
x varchar(20) primary key,
y varchar(30)
);
- name: "config width"
repos: *repos
tables:
- name: "2 cols"
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "4 cols"
schema: |
create table xy (
x int primary key,
y int,
z int,
w int
);
- name: "8 cols"
schema: |
create table xy (
x int primary key,
y int,
z int,
w int,
a int,
b int,
c int,
d int
);
- name: "pk type"
repos: *repos
tables:
- name: "int"
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "float"
schema: |
create table xy (
x float primary key,
y varchar(30)
);
- name: "varchar"
schema: |
create table xy (
x varchar(30) primary key,
y varchar(30)
);
- name: "schema types"
repos: *repos
tables:
- name: "varchar"
schema: |
create table xy (
x int,
y varchar(30)
);
# - name: "geometry"
# schema: |
# create table xy (
# x int primary key,
# y geometry
# );
- name: "datetime"
schema: |
create table xy (
x int primary key,
y date
);
- name: "secondary indexes"
repos: *repos
tables:
- name: "no secondary"
schema: |
create table xy (
x int primary key,
y varchar(30),
z varchar(30),
w varchar(30)
);
- name: "one index"
schema: |
create table xy (
x int primary key,
y varchar(30),
z varchar(30),
w varchar(30),
index x (x)
);
- name: "two index"
schema: |
create table xy (
x int primary key,
y varchar(30),
z varchar(30),
w varchar(30),
index x (x),
index y (y)
);
- name: "four index"
schema: |
create table xy (
x int primary key,
y varchar(30),
z varchar(30),
w varchar(30),
index x (x),
index y (y),
index z (z),
index w (w)
);
- name: "blobs"
repos: *repos
tables:
- name: "no blob"
schema: |
create table xy (
x int primary key,
y varchar(30),
z varchar(30),
w varchar(30)
);
- name: "1 blob"
schema: |
create table xy (
x int primary key,
y blob,
z varchar(30),
w varchar(30)
);
- name: "2 blobs"
schema: |
create table xy (
x int primary key,
y blob,
z blob,
w varchar(30)
);
- name: "row count"
repos: *repos
tables:
- name: "400k"
rows: 400000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "800k"
rows: 800000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "1.6mm"
rows: 1600000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "3.2mm"
rows: 3200000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "sorting"
repos: *repos
tables:
- name: "shuffled"
shuffle: true
rows: 2000000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "sorted"
shuffle: false
rows: 2000000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
+238
View File
@@ -0,0 +1,238 @@
opts:
seed: 0
tests:
- name: "pk type"
repos: &repos
- name: dolt_cli
- name: dolt_server
server:
port: 3308
args: [ "--port", "3308" ]
- name: mysql
external-server:
name: test
host: 127.0.0.1
user: root
password: password
port: 3309
tables:
- name: "int"
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "float"
schema: |
create table xy (
x float primary key,
y varchar(30)
);
- name: "varchar"
schema: |
create table xy (
x varchar(20) primary key,
y varchar(30)
);
- name: "config width"
repos: *repos
tables:
- name: "2 cols"
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "8 cols"
schema: |
create table xy (
x int primary key,
y int,
z int,
w int,
a int,
b int,
c int,
d int
);
- name: "32 cols"
schema: |
create table xy (
a1 int primary key,
a2 int, a3 int, a4 int, a5 int, a6 int, a7 int, a8 int, a9 int, a10 int, a11 int,
a12 int, a13 int, a14 int, a15 int, a16 int, a17 int, a18 int, a19 int, a20 int, a21 int,
a22 int, a23 int, a24 int, a25 int, a26 int, a27 int, a28 int, a29 int, a30 int, a31 int,
a32 int
);
- name: "col type"
repos: *repos
tables:
- name: "varchar"
schema: |
create table xy (
x int,
y varchar(30)
);
# - name: "geometry"
# schema: |
# create table xy (
# x int primary key,
# y geometry
# );
- name: "datetime"
schema: |
create table xy (
x int primary key,
y date
);
- name: "secondary index"
repos: *repos
tables:
- name: "no secondary"
schema: |
create table xy (
x int primary key,
y varchar(30),
z varchar(30),
w varchar(30)
);
- name: "one index"
schema: |
create table xy (
x int primary key,
y varchar(30),
z varchar(30),
w varchar(30),
index x (x)
);
- name: "two index"
schema: |
create table xy (
x int primary key,
y varchar(30),
z varchar(30),
w varchar(30),
index x (x),
index y (y)
);
- name: "four index"
schema: |
create table xy (
x int primary key,
y varchar(30),
z varchar(30),
w varchar(30),
index x (x),
index y (y),
index z (z),
index w (w)
);
- name: "blob"
repos: *repos
tables:
- name: "no blob"
schema: |
create table xy (
x int primary key,
y varchar(30),
z varchar(30),
w varchar(30)
);
- name: "1 blob"
schema: |
create table xy (
x int primary key,
y blob,
z varchar(30),
w varchar(30)
);
- name: "2 blobs"
schema: |
create table xy (
x int primary key,
y blob,
z blob,
w varchar(30)
);
- name: "row count"
repos: *repos
tables:
- name: "400k"
rows: 400000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "800k"
rows: 800000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "1.6mm"
rows: 1600000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "sorting"
repos: *repos
tables:
- name: "shuffled 1mm"
shuffle: true
rows: 1000000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "sorted 1mm"
shuffle: false
rows: 1000000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "batching"
repos:
- name: dolt_server
server:
port: 3308
args: [ "--port", "3308" ]
- name: mysql
external-server:
name: test
host: 127.0.0.1
user: root
password: password
port: 3309
tables:
- name: "LOAD DATA"
fmt: "csv"
rows: 10000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "batch sql"
fmt: "sql"
rows: 10000
batch: true
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "by line sql"
fmt: "sql"
rows: 10000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
@@ -0,0 +1,18 @@
opts:
seed: 0
tests:
- name: "row count"
repos:
- name: repo1
server:
port: 3308
args: [ "--port", "3308" ]
tables:
- name: "400k"
fmt: "csv"
rows: 40000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
@@ -0,0 +1,21 @@
opts:
seed: 0
tests:
- name: "row count"
repos:
- name: mysql
external-server:
name: test
host: 127.0.0.1
user: root
password: password
port: 4306
tables:
- name: "400k"
fmt: "csv"
rows: 40000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
+26
View File
@@ -0,0 +1,26 @@
opts:
seed: 0
tests:
- name: "sorting"
repos:
- name: repo1
server:
port: 3308
args: [ "--port", "3308" ]
tables:
- name: "shuffle"
shuffle: true
rows: 100000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "sorted"
shuffle: false
rows: 100000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
+31
View File
@@ -0,0 +1,31 @@
opts:
seed: 0
tests:
- name: "row count"
repos: &repos
- name: repo1
tables:
- name: "400k"
fmt: "csv"
rows: 400000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "800k"
fmt: "csv"
rows: 800000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "1.6mm"
fmt: "csv"
rows: 1600000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
+42
View File
@@ -0,0 +1,42 @@
opts:
seed: 0
tests:
- name: "sql"
repos:
- name: dolt
server:
port: 3308
args: [ "--port", "3308" ]
- name: mysql # mysqld --port 3308 --local-infile=1 --socket=/tmp/mysqld2.sock
external-server:
name: test
host: 127.0.0.1
user: root
password:
port: 3309
tables:
- name: "LOAD DATA"
fmt: "csv"
rows: 10000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "batch sql"
fmt: "sql"
rows: 10000
batch: true
schema: |
create table xy (
x int primary key,
y varchar(30)
);
- name: "by line sql"
fmt: "sql"
rows: 10000
schema: |
create table xy (
x int primary key,
y varchar(30)
);
@@ -0,0 +1,700 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package import_benchmarker
import (
"bufio"
"bytes"
"context"
"database/sql"
"fmt"
"math/rand"
"os"
"strconv"
"strings"
"testing"
"time"
"github.com/cespare/xxhash"
"github.com/creasty/defaults"
sql2 "github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/vitess/go/sqltypes"
ast "github.com/dolthub/vitess/go/vt/sqlparser"
"github.com/stretchr/testify/require"
yaml "gopkg.in/yaml.v3"
driver "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils/sql_server_driver"
)
const defaultBatchSize = 500
// TestDef is the top-level definition of tests to run.
type TestDef struct {
Tests []ImportTest `yaml:"tests"`
Opts *Opts `yaml:"opts"`
}
type Opts struct {
Seed int `yaml:"seed"`
}
// ImportTest is a single test to run. The Repos and MultiRepos will be created, and
// any Servers defined within them will be started. The interactions and
// assertions defined in Conns will be run.
type ImportTest struct {
Name string `yaml:"name"`
Repos []driver.TestRepo `yaml:"repos"`
Tables []Table `yaml:"tables"`
// Skip the entire test with this reason.
Skip string `yaml:"skip"`
Results *ImportResults
files map[uint64]*os.File
tmpdir string
}
type Table struct {
Name string `yaml:"name"`
Schema string `yaml:"schema"`
Rows int `default:"200000" yaml:"rows"`
Fmt string `default:"csv" yaml:"fmt"`
Shuffle bool `default:"false" yaml:"shuffle"`
Batch bool `default:"false" yaml:"batch"`
TargetTable string
}
func (s *Table) UnmarshalYAML(unmarshal func(interface{}) error) error {
defaults.Set(s)
type plain Table
if err := unmarshal((*plain)(s)); err != nil {
return err
}
return nil
}
func ParseTestsFile(path string) (TestDef, error) {
contents, err := os.ReadFile(path)
if err != nil {
return TestDef{}, err
}
dec := yaml.NewDecoder(bytes.NewReader(contents))
dec.KnownFields(true)
var res TestDef
err = dec.Decode(&res)
return res, err
}
func MakeRepo(rs driver.RepoStore, r driver.TestRepo) (driver.Repo, error) {
repo, err := rs.MakeRepo(r.Name)
if err != nil {
return driver.Repo{}, err
}
return repo, nil
}
func MakeServer(dc driver.DoltCmdable, s *driver.Server) (*driver.SqlServer, error) {
if s == nil {
return nil, nil
}
opts := []driver.SqlServerOpt{driver.WithArgs(s.Args...)}
if s.Port != 0 {
opts = append(opts, driver.WithPort(s.Port))
}
server, err := driver.StartSqlServer(dc, opts...)
if err != nil {
return nil, err
}
return server, nil
}
type ImportResult struct {
detail string
server string
test string
time float64
rows int
fmt string
sorted bool
batch bool
}
func (r ImportResult) String() string {
return fmt.Sprintf("- %s/%s/%s: %.2fs\n", r.test, r.server, r.detail, r.time)
}
type ImportResults struct {
res []ImportResult
}
func (r *ImportResults) append(ir ImportResult) {
r.res = append(r.res, ir)
}
func (r *ImportResults) String() string {
b := strings.Builder{}
b.WriteString("Results:\n")
for _, x := range r.res {
b.WriteString(x.String())
}
return b.String()
}
func (r *ImportResults) SqlDump() string {
b := strings.Builder{}
b.WriteString(`CREATE TABLE IF NOT EXISTS import_perf_results (
test_name varchar(64),
server varchar(64),
detail varchar(64),
row_cnt int,
time double,
file_format varchar(8),
sorted bool,
batch bool,
primary key (test_name, detail, server)
);
`)
b.WriteString("insert into import_perf_results values\n")
for i, r := range r.res {
if i > 0 {
b.WriteString(",\n ")
}
var sorted int
if r.sorted {
sorted = 1
}
var batch int
if r.batch {
batch = 1
}
b.WriteString(fmt.Sprintf(
"('%s', '%s', '%s', %d, %.2f, '%s', %b, %b)",
r.test, r.server, r.detail, r.rows, r.time, r.fmt, sorted, batch))
}
b.WriteString(";\n")
return b.String()
}
func (test *ImportTest) InitWithTmpDir(s string) {
test.tmpdir = s
test.files = make(map[uint64]*os.File)
}
// Run executes an import configuration. Test parallelism makes
// runtimes resulting from this method unsuitable for reporting.
func (test *ImportTest) Run(t *testing.T) {
if test.Skip != "" {
t.Skip(test.Skip)
}
var err error
if test.Results == nil {
test.Results = new(ImportResults)
tmp, err := os.MkdirTemp("", "repo-store-")
if err != nil {
require.NoError(t, err)
}
test.InitWithTmpDir(tmp)
}
u, err := driver.NewDoltUser()
for _, r := range test.Repos {
if r.ExternalServer != nil {
err := test.RunExternalServerTests(r.Name, r.ExternalServer)
require.NoError(t, err)
} else if r.Server != nil {
err = test.RunSqlServerTests(r, u)
require.NoError(t, err)
} else {
err = test.RunCliTests(r, u)
require.NoError(t, err)
}
}
fmt.Println(test.Results.String())
}
// RunExternalServerTests connects to a single externally provided server to run every test
func (test *ImportTest) RunExternalServerTests(repoName string, s *driver.ExternalServer) error {
return test.IterImportTables(test.Tables, func(tab Table, f *os.File) error {
db, err := driver.ConnectDB(s.User, s.Password, s.Name, s.Host, s.Port, nil)
if err != nil {
return err
}
defer db.Close()
switch tab.Fmt {
case "csv":
return test.benchLoadData(repoName, db, tab, f)
case "sql":
return test.benchSql(repoName, db, tab, f)
default:
return fmt.Errorf("unexpected table import format: %s", tab.Fmt)
}
})
}
// RunSqlServerTests creates a new repo and server for every import test.
func (test *ImportTest) RunSqlServerTests(repo driver.TestRepo, user driver.DoltUser) error {
return test.IterImportTables(test.Tables, func(tab Table, f *os.File) error {
//make a new server for every test
server, err := newServer(user, repo)
if err != nil {
return err
}
defer server.GracefulStop()
db, err := server.DB(driver.Connection{User: "root", Pass: ""})
if err != nil {
return err
}
err = modifyServerForImport(db)
if err != nil {
return err
}
switch tab.Fmt {
case "csv":
return test.benchLoadData(repo.Name, db, tab, f)
case "sql":
return test.benchSql(repo.Name, db, tab, f)
default:
return fmt.Errorf("unexpected table import format: %s", tab.Fmt)
}
})
}
func newServer(u driver.DoltUser, r driver.TestRepo) (*driver.SqlServer, error) {
rs, err := u.MakeRepoStore()
if err != nil {
return nil, err
}
// start dolt server
repo, err := MakeRepo(rs, r)
if err != nil {
return nil, err
}
r.Server.Args = append(r.Server.Args, "")
server, err := MakeServer(repo, r.Server)
if err != nil {
return nil, err
}
if server != nil {
server.DBName = r.Name
}
return server, nil
}
func modifyServerForImport(db *sql.DB) error {
_, err := db.Exec("SET GLOBAL local_infile=1 ")
if err != nil {
return err
}
return nil
}
func (test *ImportTest) benchLoadData(repoName string, db *sql.DB, tab Table, f *os.File) error {
ctx := context.Background()
conn, err := db.Conn(ctx)
if err != nil {
return err
}
defer conn.Close()
rows, err := conn.QueryContext(ctx, tab.Schema)
if err == nil {
rows.Close()
} else {
return err
}
start := time.Now()
q := fmt.Sprintf(`
LOAD DATA LOCAL INFILE '%s' INTO TABLE xy
FIELDS TERMINATED BY ',' ENCLOSED BY ''
LINES TERMINATED BY '\n'
IGNORE 1 LINES;`, f.Name())
rows, err = conn.QueryContext(ctx, q)
if err == nil {
rows.Close()
} else {
return err
}
runtime := time.Since(start)
test.Results.append(ImportResult{
test: test.Name,
server: repoName,
detail: tab.Name,
time: runtime.Seconds(),
rows: tab.Rows,
fmt: tab.Fmt,
sorted: !tab.Shuffle,
batch: tab.Batch,
})
rows, err = conn.QueryContext(
ctx,
fmt.Sprintf("drop table %s;", tab.TargetTable),
)
if err == nil {
rows.Close()
} else {
return err
}
return nil
}
func (test *ImportTest) benchSql(repoName string, db *sql.DB, tab Table, f *os.File) error {
ctx := context.Background()
conn, err := db.Conn(ctx)
if err != nil {
return err
}
defer conn.Close()
rows, err := conn.QueryContext(ctx, tab.Schema)
if err == nil {
rows.Close()
} else {
return err
}
defer conn.ExecContext(
ctx,
fmt.Sprintf("drop table %s;", tab.TargetTable),
)
f.Seek(0, 0)
s := bufio.NewScanner(f)
s.Split(ScanQueries)
start := time.Now()
for lineno := 1; s.Scan(); lineno++ {
line := s.Text()
var br bool
switch {
case line == "":
return fmt.Errorf("unexpected blank line, line number: %d", lineno)
case line == "\n":
br = true
default:
}
if br {
break
}
if err := s.Err(); err != nil {
return fmt.Errorf("%s:%d: %v", f.Name(), lineno, err)
}
_, err := conn.ExecContext(ctx, line)
if err != nil {
return err
}
}
runtime := time.Since(start)
test.Results.append(ImportResult{
test: test.Name,
server: repoName,
detail: tab.Name,
time: runtime.Seconds(),
rows: tab.Rows,
fmt: tab.Fmt,
sorted: !tab.Shuffle,
batch: tab.Batch,
})
if err == nil {
rows.Close()
} else {
return err
}
return nil
}
func ScanQueries(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexByte(data, ';'); i >= 0 {
// We have a full newline-terminated line.
return i + 1, dropCR(data[0:i]), nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), dropCR(data), nil
}
// Request more data.
return 0, nil, nil
}
func dropCR(data []byte) []byte {
if len(data) > 0 && data[len(data)-1] == '\r' {
return data[0 : len(data)-1]
}
return data
}
// RunCliTests runs each import test on a new dolt repo to avoid accumulated
// startup costs over time between tests.
func (test *ImportTest) RunCliTests(r driver.TestRepo, user driver.DoltUser) error {
return test.IterImportTables(test.Tables, func(tab Table, f *os.File) error {
var err error
rs, err := user.MakeRepoStore()
if err != nil {
return err
}
repo, err := MakeRepo(rs, r)
if err != nil {
return err
}
err = repo.DoltExec("sql", "-q", tab.Schema)
if err != nil {
return err
}
// start timer
start := time.Now()
cmd := repo.DoltCmd("table", "import", "-r", "--file-type", tab.Fmt, tab.TargetTable, f.Name())
_, err = cmd.StdoutPipe()
if err != nil {
return err
}
cmd.Stderr = cmd.Stdout
err = cmd.Run()
if err != nil {
return fmt.Errorf("%w: %s", err, cmd.Stderr)
}
// end timer, append result
runtime := time.Since(start)
test.Results.append(ImportResult{
test: test.Name,
server: r.Name,
detail: tab.Name,
time: runtime.Seconds(),
rows: tab.Rows,
fmt: tab.Fmt,
sorted: !tab.Shuffle,
batch: tab.Batch,
})
// reset repo at end
return repo.DoltExec("sql", "-q", fmt.Sprintf("drop table %s", tab.TargetTable))
})
}
func (test *ImportTest) IterImportTables(tables []Table, cb func(t Table, f *os.File) error) error {
for _, t := range tables {
key, err := tableKey(t)
if err != nil {
return err
}
table, names, types := parseTableAndSchema(t.Schema)
t.TargetTable = table
if f, ok := test.files[key]; ok {
// short circuit if we've already made file for schema/row count
err = cb(t, f)
if err != nil {
return err
}
continue
}
rows := make([]string, 0, t.Rows)
genRows(types, t.Rows, t.Fmt, func(r []string) {
switch t.Fmt {
case "csv":
rows = append(rows, strings.Join(r, ","))
case "sql":
rows = append(rows, fmt.Sprintf("(%s)", strings.Join(r, ", ")))
default:
panic(fmt.Sprintf("unknown format: %s", t.Fmt))
}
})
if t.Shuffle {
rand.Shuffle(len(rows), func(i, j int) { rows[i], rows[j] = rows[j], rows[i] })
}
f, err := os.CreateTemp(test.tmpdir, "import-data-")
if err != nil {
return err
}
switch t.Fmt {
case "csv":
fmt.Fprintf(f, "%s\n", strings.Join(names, ","))
for _, r := range rows {
fmt.Fprintf(f, "%s\n", r)
}
case "sql":
if t.Batch {
batchSize := defaultBatchSize
var i int
for i+batchSize < len(rows) {
fmt.Fprintf(f, newBatch(t.TargetTable, rows[i:i+batchSize]))
i += batchSize
}
if i < len(rows) {
fmt.Fprintf(f, newBatch(t.TargetTable, rows[i:]))
}
} else {
for _, r := range rows {
fmt.Fprintf(f, fmt.Sprintf("INSERT INTO %s VALUES %s;\n", t.TargetTable, r))
}
}
default:
panic(fmt.Sprintf("unknown format: %s", t.Fmt))
}
// cache file for schema and row count
test.files[key] = f
err = cb(t, f)
if err != nil {
return err
}
}
return nil
}
func newBatch(name string, rows []string) string {
b := strings.Builder{}
b.WriteString(fmt.Sprintf("INSERT INTO %s VALUES\n", name))
for _, r := range rows[:len(rows)-1] {
b.WriteString(" ")
b.WriteString(r)
b.WriteString(",\n")
}
b.WriteString(" ")
b.WriteString(rows[len(rows)-1])
b.WriteString(";\n")
return b.String()
}
func tableKey(t Table) (uint64, error) {
hash := xxhash.New()
_, err := hash.Write([]byte(t.Schema))
if err != nil {
return 0, err
}
if _, err := hash.Write([]byte(fmt.Sprintf("%#v,", t.Rows))); err != nil {
return 0, err
}
if err != nil {
return 0, err
}
_, err = hash.Write([]byte(t.Fmt))
if err != nil {
return 0, err
}
return hash.Sum64(), nil
}
func parseTableAndSchema(q string) (string, []string, []sql2.Type) {
stmt, _, err := ast.ParseOne(q)
if err != nil {
panic(fmt.Sprintf("invalid query: %s; %s", q, err))
}
var types []sql2.Type
var names []string
var table string
switch n := stmt.(type) {
case *ast.DDL:
table = n.Table.String()
for _, col := range n.TableSpec.Columns {
names = append(names, col.Name.String())
typ, err := sql2.ColumnTypeToType(&col.Type)
if err != nil {
panic(fmt.Sprintf("unexpected error reading type: %s", err))
}
types = append(types, typ)
}
default:
panic(fmt.Sprintf("expected CREATE TABLE, found: %s", q))
}
return table, names, types
}
func genRows(types []sql2.Type, n int, fmt string, cb func(r []string)) {
// generate |n| rows with column types
for i := 0; i < n; i++ {
row := make([]string, len(types))
for j, t := range types {
switch fmt {
case "sql":
switch t.Type() {
case sqltypes.Blob, sqltypes.VarChar, sqltypes.Timestamp, sqltypes.Date:
row[j] = "'" + genValue(i, t) + "'"
default:
row[j] = genValue(i, t)
}
default:
row[j] = genValue(i, t)
}
}
cb(row)
}
}
func genValue(i int, typ sql2.Type) string {
switch typ.Type() {
case sqltypes.Blob:
return fmt.Sprintf("blob %d", i)
case sqltypes.VarChar:
return fmt.Sprintf("varchar %d", i)
case sqltypes.Int8, sqltypes.Int16, sqltypes.Int32, sqltypes.Int64:
return strconv.Itoa(i)
case sqltypes.Float32, sqltypes.Float64:
return strconv.FormatFloat(float64(i), 'E', -1, 32)
case sqltypes.Bit:
return strconv.Itoa(i)
case sqltypes.Geometry:
return `{"type": "Point", "coordinates": [1,2]}`
case sqltypes.Timestamp:
return "2019-12-31T12:00:00Z"
case sqltypes.Date:
return "2019-12-31T00:00:00Z"
default:
panic(fmt.Sprintf("expected type, found: %s", typ))
}
}
func RunTestsFile(t *testing.T, path string) {
def, err := ParseTestsFile(path)
require.NoError(t, err)
for _, test := range def.Tests {
t.Run(test.Name, test.Run)
}
}
@@ -3,13 +3,17 @@ module github.com/dolthub/dolt/integration-tests/go-sql-server-driver
go 1.19
require (
github.com/go-sql-driver/mysql v1.6.0
github.com/dolthub/dolt/go v0.40.4
github.com/stretchr/testify v1.8.0
golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec
gopkg.in/yaml.v3 v3.0.1
)
require (
github.com/creasty/defaults v1.6.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-sql-driver/mysql v1.6.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/sys v0.1.0 // indirect
)
replace github.com/dolthub/dolt/go => ../../go/
@@ -1,3 +1,5 @@
github.com/creasty/defaults v1.6.0 h1:ltuE9cfphUtlrBeomuu8PEyISTXnxqkBIoQfXgv7BSc=
github.com/creasty/defaults v1.6.0/go.mod h1:iGzKe6pbEHnpMPtfDXZEr0NVxWnPTjb1bbDy08fPzYM=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -10,8 +12,8 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec h1:BkDtF2Ih9xZ7le9ndzTA7KJow28VbQW3odyk/8drmuI=
golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+19 -219
View File
@@ -17,20 +17,17 @@ package main
import (
"bytes"
"context"
"io"
"os"
"path/filepath"
"strings"
"testing"
"time"
"database/sql"
driver "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils/sql_server_driver"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"
yaml "gopkg.in/yaml.v3"
)
// TestDef is the top-level definition of tests to run.
type TestDef struct {
Tests []Test `yaml:"tests"`
}
@@ -39,188 +36,15 @@ type TestDef struct {
// any Servers defined within them will be started. The interactions and
// assertions defined in Conns will be run.
type Test struct {
Name string `yaml:"name"`
Repos []TestRepo `yaml:"repos"`
MultiRepos []MultiRepo `yaml:"multi_repos"`
Conns []Connection `yaml:"connections"`
Name string `yaml:"name"`
Repos []driver.TestRepo `yaml:"repos"`
MultiRepos []driver.MultiRepo `yaml:"multi_repos"`
Conns []driver.Connection `yaml:"connections"`
// Skip the entire test with this reason.
Skip string `yaml:"skip"`
}
// |Connection| represents a single connection to a sql-server instance defined
// in the test. The connection will be established and every |Query| in
// |Queries| will be run against it. At the end, the connection will be torn down.
// If |RestartServer| is non-nil, the server which the connection targets will
// be restarted after the connection is terminated.
type Connection struct {
On string `yaml:"on"`
Queries []Query `yaml:"queries"`
RestartServer *RestartArgs `yaml:"restart_server"`
// Rarely needed, allows the entire connection assertion to be retried
// on an assertion failure. Use this is only for idempotent connection
// interactions and only if the sql-server is prone to tear down the
// connection based on things that are happening, such as cluster role
// transitions.
RetryAttempts int `yaml:"retry_attempts"`
// The user to connect as.
User string `yaml:"user"`
// The password to connect with.
Pass string `yaml:"password"`
PassFile string `yaml:"password_file"`
// Any driver params to pass in the DSN.
DriverParams map[string]string `yaml:"driver_params"`
}
func (c Connection) Password() (string, error) {
if c.PassFile != "" {
bs, err := os.ReadFile(c.PassFile)
if err != nil {
return "", err
}
return strings.TrimSpace(string(bs)), nil
}
return c.Pass, nil
}
// |RestartArgs| are possible arguments, to change the arguments which are
// provided to the sql-server process when it is restarted. This is used, for
// example, to change server config on a restart.
type RestartArgs struct {
Args *[]string `yaml:"args"`
}
// |TestRepo| represents an init'd dolt repository that is available to a
// server instance. It can be created with some files and with remotes defined.
// |Name| can include path components separated by `/`, which will create the
// repository in a subdirectory.
type TestRepo struct {
Name string `yaml:"name"`
WithFiles []WithFile `yaml:"with_files"`
WithRemotes []WithRemote `yaml:"with_remotes"`
// Only valid on Test.Repos, not in Test.MultiRepos.Repos. If set, a
// sql-server process will be run against this TestRepo. It will be
// available as TestRepo.Name.
Server *Server `yaml:"server"`
}
// |MultiRepo| is a subdirectory where many |TestRepo|s can be defined. You can
// start a sql-server on a |MultiRepo|, in which case there will be no default
// database to connect to.
type MultiRepo struct {
Name string `yaml:"name"`
Repos []TestRepo `yaml:"repos"`
WithFiles []WithFile `yaml:"with_files"`
// If set, a sql-server process will be run against this TestRepo. It
// will be available as MultiRepo.Name.
Server *Server `yaml:"server"`
}
// |WithRemote| defines remotes which should be defined on the repository
// before the sql-server is started.
type WithRemote struct {
Name string `yaml:"name"`
URL string `yaml:"url"`
}
// |WithFile| defines a file and its contents to be created in a |Repo| or
// |MultiRepo| before the servers are started.
type WithFile struct {
Name string `yaml:"name"`
// The contents of the file, provided inline in the YAML.
Contents string `yaml:"contents"`
// A source file path to copy to |Name|. Mutually exclusive with
// Contents.
SourcePath string `yaml:"source_path"`
}
// |Server| defines a sql-server process to start. |Name| must match the
// top-level |Name| of a |TestRepo| or |MultiRepo|.
type Server struct {
Name string `yaml:"name"`
Args []string `yaml:"args"`
// The |Port| which the server will be running on. For now, it is up to
// the |Args| to make sure this is true. Defaults to 3308.
Port int `yaml:"port"`
// Assertions to be run against the log output of the server process
// after the server process successfully terminates.
LogMatches []string `yaml:"log_matches"`
// Assertions to be run against the log output of the server process
// after the server process exits with an error. If |ErrorMatches| is
// defined, then the server process must exit with a non-0 exit code
// after it is launched. This will be asserted before any |Connections|
// interactions are performed.
ErrorMatches []string `yaml:"error_matches"`
}
// The primary interaction of a |Connection|. Either |Query| or |Exec| should
// be set, not both.
type Query struct {
// Run a query against the connection.
Query string `yaml:"query"`
// Run a command against the connection.
Exec string `yaml:"exec"`
// Args to be passed as query parameters to either Query or Exec.
Args []string `yaml:"args"`
// This can only be non-empty for a |Query|. Asserts the results of the
// |Query|.
Result QueryResult `yaml:"result"`
// If this is non-empty, asserts the the |Query| or the |Exec|
// generates an error that matches this string.
ErrorMatch string `yaml:"error_match"`
// If this is non-zero, it represents the number of times to try the
// |Query| or the |Exec| and to check its assertions before we fail the
// test as a result of failed assertions. When interacting with queries
// that introspect things like replication state, this can be used to
// wait for quiescence in an inherently racey process. Interactions
// will be delayed slightly between each failure.
RetryAttempts int `yaml:"retry_attempts"`
}
// |QueryResult| specifies assertions on the results of a |Query|. Columns must
// be specified for a |Query| and the query results must fully match. If Rows
// are ommited, anything is allowed as long as all rows are read successfully.
// All assertions here are string equality.
type QueryResult struct {
Columns []string `yaml:"columns"`
Rows ResultRows `yaml:"rows"`
}
type ResultRows struct {
Or *[][][]string
}
func (r *ResultRows) UnmarshalYAML(value *yaml.Node) error {
if value.Kind == yaml.SequenceNode {
res := make([][][]string, 1)
r.Or = &res
return value.Decode(&(*r.Or)[0])
}
var or struct {
Or *[][][]string `yaml:"or"`
}
err := value.Decode(&or)
if err != nil {
return err
}
r.Or = or.Or
return nil
}
func ParseTestsFile(path string) (TestDef, error) {
contents, err := os.ReadFile(path)
if err != nil {
@@ -233,35 +57,11 @@ func ParseTestsFile(path string) (TestDef, error) {
return res, err
}
func (f WithFile) WriteAtDir(dir string) error {
path := filepath.Join(dir, f.Name)
d := filepath.Dir(path)
err := os.MkdirAll(d, 0750)
if err != nil {
return err
}
if f.SourcePath != "" {
source, err := os.Open(f.SourcePath)
if err != nil {
return err
}
defer source.Close()
dest, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0550)
if err != nil {
return err
}
_, err = io.Copy(dest, source)
return err
} else {
return os.WriteFile(path, []byte(f.Contents), 0550)
}
}
func MakeRepo(t *testing.T, rs RepoStore, r TestRepo) Repo {
func MakeRepo(t *testing.T, rs driver.RepoStore, r driver.TestRepo) driver.Repo {
repo, err := rs.MakeRepo(r.Name)
require.NoError(t, err)
for _, f := range r.WithFiles {
require.NoError(t, f.WriteAtDir(repo.dir))
require.NoError(t, f.WriteAtDir(repo.Dir))
}
for _, remote := range r.WithRemotes {
require.NoError(t, repo.CreateRemote(remote.Name, remote.URL))
@@ -269,15 +69,15 @@ func MakeRepo(t *testing.T, rs RepoStore, r TestRepo) Repo {
return repo
}
func MakeServer(t *testing.T, dc DoltCmdable, s *Server) *SqlServer {
func MakeServer(t *testing.T, dc driver.DoltCmdable, s *driver.Server) *driver.SqlServer {
if s == nil {
return nil
}
opts := []SqlServerOpt{WithArgs(s.Args...)}
opts := []driver.SqlServerOpt{driver.WithArgs(s.Args...)}
if s.Port != 0 {
opts = append(opts, WithPort(s.Port))
opts = append(opts, driver.WithPort(s.Port))
}
server, err := StartSqlServer(dc, opts...)
server, err := driver.StartSqlServer(dc, opts...)
require.NoError(t, err)
if len(s.ErrorMatches) > 0 {
err := server.ErrorStop()
@@ -309,12 +109,12 @@ func (test Test) Run(t *testing.T) {
t.Skip(test.Skip)
}
u, err := NewDoltUser()
u, err := driver.NewDoltUser()
require.NoError(t, err)
rs, err := u.MakeRepoStore()
require.NoError(t, err)
servers := make(map[string]*SqlServer)
servers := make(map[string]*driver.SqlServer)
for _, r := range test.Repos {
repo := MakeRepo(t, rs, r)
@@ -327,7 +127,7 @@ func (test Test) Run(t *testing.T) {
}
for _, mr := range test.MultiRepos {
// Each MultiRepo gets its own dolt config --global.
u, err := NewDoltUser()
u, err := driver.NewDoltUser()
require.NoError(t, err)
rs, err = u.MakeRepoStore()
require.NoError(t, err)
@@ -335,7 +135,7 @@ func (test Test) Run(t *testing.T) {
MakeRepo(t, rs, r)
}
for _, f := range mr.WithFiles {
require.NoError(t, f.WriteAtDir(rs.dir))
require.NoError(t, f.WriteAtDir(rs.Dir))
}
server := MakeServer(t, rs, mr.Server)
@@ -416,7 +216,7 @@ func (r *retryTestingT) try(attempts int, test func(require.TestingT)) {
r.errorfArgs = nil
r.failNow = false
if i != 0 {
time.Sleep(RetrySleepDuration)
time.Sleep(driver.RetrySleepDuration)
}
func() {
defer func() {
@@ -449,13 +249,13 @@ func RetryTestRun(t *testing.T, attempts int, test func(require.TestingT)) {
rtt.try(attempts, test)
}
func RunQuery(t *testing.T, conn *sql.Conn, q Query) {
func RunQuery(t *testing.T, conn *sql.Conn, q driver.Query) {
RetryTestRun(t, q.RetryAttempts, func(t require.TestingT) {
RunQueryAttempt(t, conn, q)
})
}
func RunQueryAttempt(t require.TestingT, conn *sql.Conn, q Query) {
func RunQueryAttempt(t require.TestingT, conn *sql.Conn, q driver.Query) {
args := make([]any, len(q.Args))
for i := range q.Args {
args[i] = q.Args[i]