mirror of
https://github.com/dolthub/dolt.git
synced 2026-05-01 20:00:22 -05:00
Redo import perf (#4689)
* import perf harness prog * more test isolation * rework common dep * fix server driver * delete old files, fix readme * more readme fixes * batching edits * edits * skip import tests by def * GA starter * rename * fix arg name * change input name again * remove espaced character from input desc * fix env definition * fix env definition * fix workflow_dispatch name * CI test files * remove os spec * runs on * different set variable * defaults * defaults * remove local GMS * edits * run bench from go/ directory * dropped def equals * go run, not go build * fix output command name * configure dolt * fix query file path * fix query file path * switch query and inputs * fix format <<EOF * reformat yaml * debug output * more debug output * fix echo -> cat * fix to json flag * fix yaml spacing * yaml spacing * yaml spacing * revert html spacing * json format * env var expansion * echo not cat * another json string printing error * no echo * log template * no template string * wrong parameter, template should be templateName * remove console.log * pass file, not string, to ses * rely on preexisting template? email action interface * fix yaml * fix html print * fix html header * change ci script * fix sql-server connection pass * mysql no password, setup default db * put password back * missed port for default database * expanded CI tests * shorter test file * extend ci tests * change default query * try to push to dolthub * modify push to dolthub * duplicate test names * typo * dolt-action version * invalid param for dolt_action * specify feature branch * specify main branch dolt-action * -b flag first * dont need -q flag for batch EOF * combine results and dolthub push * missing curly brace * no auth * new creds * new cred fun * use the cred key * try again * log table * move push out of batch * more logging * new vs old branch * fix branch rule * more test * all tests * smaller tests * smaller tests * double env: key * fix yaml error * more yaml errors * more yaml errors * [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh * linting fixes, remove other import nightly * linting * go vet * licenses * fix compile errorrs * warn don't panic on init() * [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh * no external package * [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh * unused dep * [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh * Revert perf scripts * refactor to repo dispatch Co-authored-by: max-hoffman <max-hoffman@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
3bbc0e6512
commit
08167dfd5d
+26
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ "$#" -ne 2 ]; then
|
||||
echo "Usage: csv_to_html.sh <csv_file> <html_file>"
|
||||
fi
|
||||
|
||||
gw="$GITHUB_WORKSPACE"
|
||||
in="$1"
|
||||
out="$2"
|
||||
|
||||
echo "<table>" > "$out"
|
||||
print_header=true
|
||||
while read line; do
|
||||
if "$print_header"; then
|
||||
#echo " <tr><th>$line" | sed -e 's/:[^,]*\(,\|$\)/<\/th><th>/g' >> "$out"
|
||||
echo " <tr><th>${line//,/</th><th>}</th></tr>" >> "$out"
|
||||
print_header=false
|
||||
continue
|
||||
fi
|
||||
echo " <tr><td>${line//,/</td><td>}</td></tr>" >> "$out"
|
||||
done < "$in"
|
||||
echo "</table>" >> "$out"
|
||||
|
||||
cat "$out"
|
||||
|
||||
echo "::set-output name=html::$(echo $out)"
|
||||
@@ -61,4 +61,12 @@ jobs:
|
||||
with:
|
||||
token: ${{ secrets.REPO_ACCESS_TOKEN }}
|
||||
event-type: benchmark-import
|
||||
client-payload: '{"from_server": "dolt", "from_version": "${{ github.sha }}", "to_server": "dolt", "to_version": "${{ steps.comment-branch.outputs.head_sha }}", "mode": "pullRequest", "issue_number": "${{ steps.get_pull_number.outputs.pull_number }}", "actor": "${{ github.actor }}", "template_script": "./.github/scripts/import-benchmarking/get-dolt-dolt-job-json.sh"}'
|
||||
client-payload: |
|
||||
{
|
||||
"version": "${{ steps.comment-branch-outputs.head_sha }}",
|
||||
"run_file": "ci.yaml",
|
||||
"report": "three_way_compare.sql",
|
||||
"commit_to_branch": "${{ steps.comment-branch-outputs.head_sha }}",
|
||||
"actor": "${{ github.actor }}",
|
||||
"issue_id": "${{ steps.get_pull_number.outputs.pull_number }}"
|
||||
}
|
||||
|
||||
@@ -1,10 +1,168 @@
|
||||
name: Import Benchmarks
|
||||
on:
|
||||
workflow_dispatch:
|
||||
repository_dispatch:
|
||||
types: [ benchmark-import ]
|
||||
env:
|
||||
BENCH_DIR: 'go/performance/import_benchmarker'
|
||||
MYSQL_PORT: 3309
|
||||
MYSQL_PASSWORD: password
|
||||
jobs:
|
||||
test:
|
||||
name: Import benchmarks
|
||||
bench:
|
||||
name: Benchmark
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
fail-fast: true
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stub
|
||||
run: echo hello
|
||||
- name: Set up Go 1.x
|
||||
id: go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: ^1.19
|
||||
|
||||
- name: Dolt version
|
||||
id: version
|
||||
run: |
|
||||
version=${{ github.event.client_payload.version }}
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
ref: ${{ github.event.client_payload.version }}
|
||||
|
||||
- name: Install dolt
|
||||
working-directory: ./go
|
||||
run: go install ./cmd/dolt
|
||||
|
||||
- uses: shogo82148/actions-setup-mysql@v1
|
||||
with:
|
||||
mysql-version: '8.0'
|
||||
auto-start: true
|
||||
root-password: ${{ env.MYSQL_PASSWORD }}
|
||||
my-cnf: |
|
||||
local_infile=1
|
||||
socket=/tmp/mysqld2.sock
|
||||
port=${{ env.MYSQL_PORT }}
|
||||
|
||||
- name: Setup MySQL
|
||||
run: mysql -uroot -p${{ env.MYSQL_PASSWORD }} -h127.0.0.1 -P${{ env.MYSQL_PORT }} -e 'create database test;'
|
||||
|
||||
- name: Run bench
|
||||
id: bench
|
||||
working-directory: go/
|
||||
run: |
|
||||
out="$GITHUB_WORKSPACE/results.sql"
|
||||
testspec="../${{ env.BENCH_DIR }}/testdata/${{ github.event.client_payload.run_file }}"
|
||||
go run \
|
||||
"github.com/dolthub/dolt/${{ env.BENCH_DIR }}/cmd" \
|
||||
-test "$testspec" \
|
||||
-out "$out"
|
||||
echo "::set-output name=result_path::$out"
|
||||
|
||||
- name: Report
|
||||
id: report
|
||||
run: |
|
||||
gw=$GITHUB_WORKSPACE
|
||||
in="${{ steps.bench.outputs.result_path }}"
|
||||
query="$(pwd)/${{ env.BENCH_DIR }}/reporting/${{ github.event.client_payload.report }}"
|
||||
out="$gw/results.csv"
|
||||
dolt_dir="$gw/import-perf"
|
||||
|
||||
dolt config --global --add user.email "import-perf@dolthub.com"
|
||||
dolt config --global --add user.name "import-perf"
|
||||
|
||||
echo '${{ secrets.DOLTHUB_IMPORT_PERF_CREDS_VALUE }}' | dolt creds import
|
||||
dolt clone import-perf/import-perf "$dolt_dir"
|
||||
|
||||
cd "$dolt_dir"
|
||||
|
||||
branch="${{ github.event.client_payload.commit_to_branch }}"
|
||||
# checkout branch
|
||||
if [ -z $(dolt sql -q "select 1 from dolt_branches where name = '$branch';") ]; then
|
||||
dolt checkout -b $branch
|
||||
else
|
||||
dolt checkout $branch
|
||||
fi
|
||||
|
||||
dolt sql -q "drop table if exists import_perf_results"
|
||||
|
||||
# load results
|
||||
dolt sql < "$in"
|
||||
|
||||
# push results to dolthub
|
||||
dolt add import_perf_results
|
||||
dolt commit -m "CI commit"
|
||||
dolt push origin $branch
|
||||
|
||||
# generate report
|
||||
dolt sql -r csv < "$query" > "$out"
|
||||
|
||||
cat "$out"
|
||||
echo "::set-output name=report_path::$out"
|
||||
|
||||
- name: Format HTML
|
||||
id: html
|
||||
if: ${{ github.event.client_payload.email_recipient }} != ""
|
||||
run: |
|
||||
gw="$GITHUB_WORKSPACE"
|
||||
in="${{ steps.report.outputs.report_path }}"
|
||||
out="$gw/results.html"
|
||||
|
||||
echo "<table>" > "$out"
|
||||
print_header=true
|
||||
while read line; do
|
||||
if "$print_header"; then
|
||||
echo " <tr><th>${line//,/</th><th>}</th></tr>" >> "$out"
|
||||
print_header=false
|
||||
continue
|
||||
fi
|
||||
echo " <tr><td>${line//,/</td><td>}</td></tr>" >> "$out"
|
||||
done < "$in"
|
||||
echo "</table>" >> "$out"
|
||||
|
||||
cat "$out"
|
||||
|
||||
echo "::set-output name=html::$(echo $out)"
|
||||
|
||||
- name: Configure AWS Credentials
|
||||
if: ${{ github.event.client_payload.email_recipient }} != ""
|
||||
uses: aws-actions/configure-aws-credentials@v1-node16
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: us-west-2
|
||||
|
||||
- name: Send Email
|
||||
uses: ./.github/actions/ses-email-action
|
||||
if: ${{ github.event.client_payload.email_recipient }} != ""
|
||||
with:
|
||||
region: us-west-2
|
||||
toAddresses: '["${{ github.event.client_payload.email_recipient }}"]'
|
||||
version: ${{ steps.version.outputs.ref }}
|
||||
format: '__DOLT__'
|
||||
dataFile: ${{ steps.html.outputs.html }}
|
||||
|
||||
- name: Read CSV
|
||||
if: ${{ github.event.client_payload.issue_id }} != ""
|
||||
id: csv
|
||||
uses: juliangruber/read-file-action@v1
|
||||
with:
|
||||
path: "${{ steps.report.outputs.report_path }}"
|
||||
|
||||
- name: Create MD
|
||||
if: ${{ github.event.client_payload.issue_id }} != ""
|
||||
uses: petems/csv-to-md-table-action@master
|
||||
id: md
|
||||
with:
|
||||
csvinput: ${{ steps.csv.outputs.content }}
|
||||
|
||||
- uses: mshick/add-pr-comment@v1
|
||||
if: ${{ github.event.client_payload.issue_id }} != ""
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
repo-token-user-login: 'github-actions[bot]'
|
||||
issue: ${{ github.event.client_payload.issue_id }}
|
||||
message: |
|
||||
`@${ACTOR} ${FORMAT}
|
||||
${{ steps.md.outputs.markdown-table }}
|
||||
|
||||
@@ -19,9 +19,17 @@ jobs:
|
||||
with:
|
||||
token: ${{ secrets.REPO_ACCESS_TOKEN }}
|
||||
event-type: sql-correctness
|
||||
client-payload: '{"to_version": "${{ github.sha }}", "mode": "nightly", "actor": "${{ github.actor }}", "actor_email": "max@dolthub.com", "template_script": "./.github/scripts/sql-correctness/get-dolt-correctness-job-json.sh"}'
|
||||
client-payload: '{"to_version": "${{ github.sha }}", "mode": "nightly", "actor": "${{ github.actor }}", "template_script": "./.github/scripts/sql-correctness/get-dolt-correctness-job-json.sh"}'
|
||||
- uses: peter-evans/repository-dispatch@v2.0.0
|
||||
with:
|
||||
token: ${{ secrets.REPO_ACCESS_TOKEN }}
|
||||
event-type: benchmark-import
|
||||
client-payload: '{"to_server": "dolt", "to_version": "${{ github.sha }}", "from_server": "mysql", "from_version": "8.0.28", "mode": "nightly", "actor": "${{ github.actor }}", "template_script": "./.github/scripts/import-benchmarking/get-mysql-dolt-job-json.sh"}'
|
||||
client-payload: |
|
||||
{
|
||||
"email_recipient": "${{ secrets.PERF_REPORTS_EMAIL_ADDRESS }}",
|
||||
"version": "${{ github.sha }}",
|
||||
"run_file": "ci.yaml",
|
||||
"report": "three_way_compare.sql",
|
||||
"commit_to_branch": "nightly",
|
||||
"actor": "${{ github.actor }}"
|
||||
}
|
||||
|
||||
@@ -48,4 +48,12 @@ jobs:
|
||||
with:
|
||||
token: ${{ secrets.REPO_ACCESS_TOKEN }}
|
||||
event-type: benchmark-import
|
||||
client-payload: '{"to_server": "dolt", "to_version": "${{ needs.set-version-actor.outputs.version }}", "from_server": "mysql", "from_version": "8.0.28", "mode": "release", "actor": "${{ needs.set-version-actor.outputs.actor }}", "actor_email": "${{ needs.set-version-actor.outputs.actor_email }}", "template_script": "./.github/scripts/import-benchmarking/get-mysql-dolt-job-json.sh"}'
|
||||
client-payload: |
|
||||
{
|
||||
"email_recipient": "${{ secrets.PERF_REPORTS_EMAIL_ADDRESS }}",
|
||||
"version": "${{ github.sha }}",
|
||||
"run_file": "ci.yaml",
|
||||
"report": "three_way_compare.sql",
|
||||
"commit_to_branch": "main",
|
||||
"actor": "${{ github.actor }}"
|
||||
}
|
||||
|
||||
@@ -37,14 +37,14 @@ require (
|
||||
github.com/silvasur/buzhash v0.0.0-20160816060738-9bdec3dec7c6
|
||||
github.com/sirupsen/logrus v1.8.1
|
||||
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966
|
||||
github.com/stretchr/testify v1.7.1
|
||||
github.com/stretchr/testify v1.8.0
|
||||
github.com/tealeg/xlsx v1.0.5
|
||||
github.com/tklauser/go-sysconf v0.3.9 // indirect
|
||||
go.uber.org/zap v1.15.0
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519
|
||||
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f
|
||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
|
||||
golang.org/x/sys v0.0.0-20220829200755-d48e67d00261
|
||||
golang.org/x/sys v0.1.0
|
||||
google.golang.org/api v0.32.0
|
||||
google.golang.org/grpc v1.49.0
|
||||
google.golang.org/protobuf v1.27.1
|
||||
@@ -56,6 +56,8 @@ require (
|
||||
require (
|
||||
github.com/aliyun/aliyun-oss-go-sdk v2.2.5+incompatible
|
||||
github.com/cenkalti/backoff/v4 v4.1.3
|
||||
github.com/cespare/xxhash v1.1.0
|
||||
github.com/creasty/defaults v1.6.0
|
||||
github.com/dolthub/go-mysql-server v0.14.1-0.20221110165837-03b011aa073d
|
||||
github.com/google/flatbuffers v2.0.6+incompatible
|
||||
github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6
|
||||
@@ -75,6 +77,7 @@ require (
|
||||
go.opentelemetry.io/otel/trace v1.7.0
|
||||
golang.org/x/text v0.3.7
|
||||
gonum.org/v1/plot v0.11.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -85,7 +88,6 @@ require (
|
||||
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect
|
||||
github.com/apache/thrift v0.13.1-0.20201008052519-daf620915714 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash v1.1.0 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.1.1 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/go-fonts/liberation v0.2.0 // indirect
|
||||
@@ -115,7 +117,6 @@ require (
|
||||
github.com/prometheus/client_model v0.2.0 // indirect
|
||||
github.com/prometheus/common v0.26.0 // indirect
|
||||
github.com/prometheus/procfs v0.6.0 // indirect
|
||||
github.com/stretchr/objx v0.2.0 // indirect
|
||||
github.com/tklauser/numcpus v0.3.0 // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.2 // indirect
|
||||
go.opencensus.io v0.22.4 // indirect
|
||||
@@ -131,7 +132,6 @@ require (
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
google.golang.org/appengine v1.6.7 // indirect
|
||||
google.golang.org/genproto v0.0.0-20210506142907-4a47615972c2 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.0 // indirect
|
||||
)
|
||||
|
||||
replace (
|
||||
|
||||
@@ -163,6 +163,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:ma
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/creasty/defaults v1.6.0 h1:ltuE9cfphUtlrBeomuu8PEyISTXnxqkBIoQfXgv7BSc=
|
||||
github.com/creasty/defaults v1.6.0/go.mod h1:iGzKe6pbEHnpMPtfDXZEr0NVxWnPTjb1bbDy08fPzYM=
|
||||
github.com/daixiang0/gci v0.2.4/go.mod h1:+AV8KmHTGxxwp/pY84TLQfFKp2vuKXXJVzF3kD/hfR4=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
@@ -692,8 +694,8 @@ github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3
|
||||
github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48=
|
||||
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
|
||||
github.com/stretchr/objx v0.4.0 h1:M2gUjqZET1qApGOWNSnZ49BAIMX4F/1plDv3+l31EJ4=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/testify v1.1.4/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
@@ -701,8 +703,9 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
|
||||
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
|
||||
github.com/tdakkota/asciicheck v0.0.0-20200416190851-d7f85be797a2/go.mod h1:yHp0ai0Z9gUljN3o0xMhYJnH/IcvkdTBOX2fmJ93JEM=
|
||||
github.com/tealeg/xlsx v1.0.5 h1:+f8oFmvY8Gw1iUXzPk+kz+4GpbDZPK1FhPiQRd+ypgE=
|
||||
@@ -976,8 +979,8 @@ golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBc
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220111092808-5a964db01320/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220829200755-d48e67d00261 h1:v6hYoSR9T5oet+pMXwUWkbiVqx/63mlHjefrHmxwfeY=
|
||||
golang.org/x/sys v0.0.0-20220829200755-d48e67d00261/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
|
||||
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
@@ -1217,8 +1220,8 @@ gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
|
||||
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0 h1:hjy8E9ON/egN1tAYqKb61G10WtihqetD4sz2H+8nIeA=
|
||||
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
|
||||
+16
-18
@@ -12,13 +12,14 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
package sql_server_driver
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
@@ -46,7 +47,7 @@ func init() {
|
||||
var err error
|
||||
DoltPath, err = exec.LookPath(path)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("did not find dolt binary: %v", err.Error()))
|
||||
log.Printf("did not find dolt binary: %v\n", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -108,11 +109,11 @@ func (u DoltUser) MakeRepoStore() (RepoStore, error) {
|
||||
|
||||
type RepoStore struct {
|
||||
user DoltUser
|
||||
dir string
|
||||
Dir string
|
||||
}
|
||||
|
||||
func (rs RepoStore) MakeRepo(name string) (Repo, error) {
|
||||
path := filepath.Join(rs.dir, name)
|
||||
path := filepath.Join(rs.Dir, name)
|
||||
err := os.Mkdir(path, 0750)
|
||||
if err != nil {
|
||||
return Repo{}, err
|
||||
@@ -127,18 +128,18 @@ func (rs RepoStore) MakeRepo(name string) (Repo, error) {
|
||||
|
||||
func (rs RepoStore) DoltCmd(args ...string) *exec.Cmd {
|
||||
cmd := rs.user.DoltCmd(args...)
|
||||
cmd.Dir = rs.dir
|
||||
cmd.Dir = rs.Dir
|
||||
return cmd
|
||||
}
|
||||
|
||||
type Repo struct {
|
||||
user DoltUser
|
||||
dir string
|
||||
Dir string
|
||||
}
|
||||
|
||||
func (r Repo) DoltCmd(args ...string) *exec.Cmd {
|
||||
cmd := r.user.DoltCmd(args...)
|
||||
cmd.Dir = r.dir
|
||||
cmd.Dir = r.Dir
|
||||
return cmd
|
||||
}
|
||||
|
||||
@@ -260,26 +261,23 @@ func (s *SqlServer) Restart(newargs *[]string) error {
|
||||
}
|
||||
|
||||
func (s *SqlServer) DB(c Connection) (*sql.DB, error) {
|
||||
authority := "root"
|
||||
if c.User != "" {
|
||||
authority = c.User
|
||||
}
|
||||
var pass string
|
||||
pass, err := c.Password()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if pass != "" {
|
||||
authority += ":" + pass
|
||||
}
|
||||
location := fmt.Sprintf("tcp(127.0.0.1:%d)", s.Port)
|
||||
dbname := s.DBName
|
||||
return ConnectDB(c.User, pass, s.DBName, "127.0.0.1", s.Port, c.DriverParams)
|
||||
}
|
||||
|
||||
func ConnectDB(user, password, name, host string, port int, driverParams map[string]string) (*sql.DB, error) {
|
||||
params := make(url.Values)
|
||||
params.Set("allowAllFiles", "true")
|
||||
params.Set("tls", "preferred")
|
||||
for k, v := range c.DriverParams {
|
||||
for k, v := range driverParams {
|
||||
params.Set(k, v)
|
||||
}
|
||||
dsn := fmt.Sprintf("%s@%s/%s?%s", authority, location, dbname, params.Encode())
|
||||
dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?%s", user, password, host, port, name, params.Encode())
|
||||
|
||||
db, err := sql.Open("mysql", dsn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
+2
-3
@@ -12,9 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd
|
||||
// +build darwin dragonfly freebsd linux netbsd openbsd
|
||||
|
||||
package main
|
||||
package sql_server_driver
|
||||
|
||||
import "syscall"
|
||||
|
||||
@@ -26,5 +27,3 @@ func (s *SqlServer) GracefulStop() error {
|
||||
<-s.Done
|
||||
return s.Cmd.Wait()
|
||||
}
|
||||
|
||||
|
||||
+4
-3
@@ -12,10 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
package sql_server_driver
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
@@ -37,7 +38,7 @@ func (s *SqlServer) GracefulStop() error {
|
||||
return err
|
||||
}
|
||||
|
||||
set, err = dll.FindProc("SetConsoleCtrlHandler")
|
||||
set, err := dll.FindProc("SetConsoleCtrlHandler")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -58,7 +59,7 @@ func (s *SqlServer) GracefulStop() error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, _, err := f.Call()
|
||||
_, _, err = f.Call()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -0,0 +1,242 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package sql_server_driver
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/creasty/defaults"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// |Connection| represents a single connection to a sql-server instance defined
|
||||
// in the test. The connection will be established and every |Query| in
|
||||
// |Queries| will be run against it. At the end, the connection will be torn down.
|
||||
// If |RestartServer| is non-nil, the server which the connection targets will
|
||||
// be restarted after the connection is terminated.
|
||||
type Connection struct {
|
||||
On string `yaml:"on"`
|
||||
Queries []Query `yaml:"queries"`
|
||||
RestartServer *RestartArgs `yaml:"restart_server"`
|
||||
|
||||
// Rarely needed, allows the entire connection assertion to be retried
|
||||
// on an assertion failure. Use this is only for idempotent connection
|
||||
// interactions and only if the sql-server is prone to tear down the
|
||||
// connection based on things that are happening, such as cluster role
|
||||
// transitions.
|
||||
RetryAttempts int `yaml:"retry_attempts"`
|
||||
|
||||
// The user to connect as.
|
||||
User string `default:"root" yaml:"user"`
|
||||
// The password to connect with.
|
||||
Pass string `yaml:"password"`
|
||||
PassFile string `yaml:"password_file"`
|
||||
// Any driver params to pass in the DSN.
|
||||
DriverParams map[string]string `yaml:"driver_params"`
|
||||
}
|
||||
|
||||
func (c *Connection) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
defaults.Set(c)
|
||||
type plain Connection
|
||||
if err := unmarshal((*plain)(c)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c Connection) Password() (string, error) {
|
||||
if c.PassFile != "" {
|
||||
bs, err := os.ReadFile(c.PassFile)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strings.TrimSpace(string(bs)), nil
|
||||
}
|
||||
return c.Pass, nil
|
||||
}
|
||||
|
||||
// |RestartArgs| are possible arguments, to change the arguments which are
|
||||
// provided to the sql-server process when it is restarted. This is used, for
|
||||
// example, to change server config on a restart.
|
||||
type RestartArgs struct {
|
||||
Args *[]string `yaml:"args"`
|
||||
}
|
||||
|
||||
// |TestRepo| represents an init'd dolt repository that is available to a
|
||||
// server instance. It can be created with some files and with remotes defined.
|
||||
// |Name| can include path components separated by `/`, which will create the
|
||||
// repository in a subdirectory.
|
||||
type TestRepo struct {
|
||||
Name string `yaml:"name"`
|
||||
WithFiles []WithFile `yaml:"with_files"`
|
||||
WithRemotes []WithRemote `yaml:"with_remotes"`
|
||||
|
||||
// Only valid on Test.Repos, not in Test.MultiRepos.Repos. If set, a
|
||||
// sql-server process will be run against this TestRepo. It will be
|
||||
// available as TestRepo.Name.
|
||||
Server *Server `yaml:"server"`
|
||||
ExternalServer *ExternalServer `yaml:"external-server"`
|
||||
}
|
||||
|
||||
// |MultiRepo| is a subdirectory where many |TestRepo|s can be defined. You can
|
||||
// start a sql-server on a |MultiRepo|, in which case there will be no default
|
||||
// database to connect to.
|
||||
type MultiRepo struct {
|
||||
Name string `yaml:"name"`
|
||||
Repos []TestRepo `yaml:"repos"`
|
||||
WithFiles []WithFile `yaml:"with_files"`
|
||||
|
||||
// If set, a sql-server process will be run against this TestRepo. It
|
||||
// will be available as MultiRepo.Name.
|
||||
Server *Server `yaml:"server"`
|
||||
}
|
||||
|
||||
// |WithRemote| defines remotes which should be defined on the repository
|
||||
// before the sql-server is started.
|
||||
type WithRemote struct {
|
||||
Name string `yaml:"name"`
|
||||
URL string `yaml:"url"`
|
||||
}
|
||||
|
||||
// |WithFile| defines a file and its contents to be created in a |Repo| or
|
||||
// |MultiRepo| before the servers are started.
|
||||
type WithFile struct {
|
||||
Name string `yaml:"name"`
|
||||
|
||||
// The contents of the file, provided inline in the YAML.
|
||||
Contents string `yaml:"contents"`
|
||||
|
||||
// A source file path to copy to |Name|. Mutually exclusive with
|
||||
// Contents.
|
||||
SourcePath string `yaml:"source_path"`
|
||||
}
|
||||
|
||||
func (f WithFile) WriteAtDir(dir string) error {
|
||||
path := filepath.Join(dir, f.Name)
|
||||
d := filepath.Dir(path)
|
||||
err := os.MkdirAll(d, 0750)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if f.SourcePath != "" {
|
||||
source, err := os.Open(f.SourcePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer source.Close()
|
||||
dest, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0550)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = io.Copy(dest, source)
|
||||
return err
|
||||
} else {
|
||||
return os.WriteFile(path, []byte(f.Contents), 0550)
|
||||
}
|
||||
}
|
||||
|
||||
// |Server| defines a sql-server process to start. |Name| must match the
|
||||
// top-level |Name| of a |TestRepo| or |MultiRepo|.
|
||||
type Server struct {
|
||||
Name string `yaml:"name"`
|
||||
Args []string `yaml:"args"`
|
||||
|
||||
// The |Port| which the server will be running on. For now, it is up to
|
||||
// the |Args| to make sure this is true. Defaults to 3308.
|
||||
Port int `yaml:"port"`
|
||||
|
||||
// Assertions to be run against the log output of the server process
|
||||
// after the server process successfully terminates.
|
||||
LogMatches []string `yaml:"log_matches"`
|
||||
|
||||
// Assertions to be run against the log output of the server process
|
||||
// after the server process exits with an error. If |ErrorMatches| is
|
||||
// defined, then the server process must exit with a non-0 exit code
|
||||
// after it is launched. This will be asserted before any |Connections|
|
||||
// interactions are performed.
|
||||
ErrorMatches []string `yaml:"error_matches"`
|
||||
}
|
||||
|
||||
type ExternalServer struct {
|
||||
Name string `yaml:"name"`
|
||||
Host string `yaml:"host"`
|
||||
User string `yaml:"user"`
|
||||
Password string `yaml:"password"`
|
||||
// The |Port| which the server will be running on. For now, it is up to
|
||||
// the |Args| to make sure this is true. Defaults to 3308.
|
||||
Port int `yaml:"port"`
|
||||
}
|
||||
|
||||
// The primary interaction of a |Connection|. Either |Query| or |Exec| should
|
||||
// be set, not both.
|
||||
type Query struct {
|
||||
// Run a query against the connection.
|
||||
Query string `yaml:"query"`
|
||||
|
||||
// Run a command against the connection.
|
||||
Exec string `yaml:"exec"`
|
||||
|
||||
// Args to be passed as query parameters to either Query or Exec.
|
||||
Args []string `yaml:"args"`
|
||||
|
||||
// This can only be non-empty for a |Query|. Asserts the results of the
|
||||
// |Query|.
|
||||
Result QueryResult `yaml:"result"`
|
||||
|
||||
// If this is non-empty, asserts the the |Query| or the |Exec|
|
||||
// generates an error that matches this string.
|
||||
ErrorMatch string `yaml:"error_match"`
|
||||
|
||||
// If this is non-zero, it represents the number of times to try the
|
||||
// |Query| or the |Exec| and to check its assertions before we fail the
|
||||
// test as a result of failed assertions. When interacting with queries
|
||||
// that introspect things like replication state, this can be used to
|
||||
// wait for quiescence in an inherently racey process. Interactions
|
||||
// will be delayed slightly between each failure.
|
||||
RetryAttempts int `yaml:"retry_attempts"`
|
||||
}
|
||||
|
||||
// |QueryResult| specifies assertions on the results of a |Query|. Columns must
|
||||
// be specified for a |Query| and the query results must fully match. If Rows
|
||||
// are ommited, anything is allowed as long as all rows are read successfully.
|
||||
// All assertions here are string equality.
|
||||
type QueryResult struct {
|
||||
Columns []string `yaml:"columns"`
|
||||
Rows ResultRows `yaml:"rows"`
|
||||
}
|
||||
|
||||
type ResultRows struct {
|
||||
Or *[][][]string
|
||||
}
|
||||
|
||||
func (r *ResultRows) UnmarshalYAML(value *yaml.Node) error {
|
||||
if value.Kind == yaml.SequenceNode {
|
||||
res := make([][][]string, 1)
|
||||
r.Or = &res
|
||||
return value.Decode(&(*r.Or)[0])
|
||||
}
|
||||
var or struct {
|
||||
Or *[][][]string `yaml:"or"`
|
||||
}
|
||||
err := value.Decode(&or)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.Or = or.Or
|
||||
return nil
|
||||
}
|
||||
@@ -1,19 +1,310 @@
|
||||
## Import Benchmarker
|
||||
## Import Benchmarking
|
||||
|
||||
This library is used to benchmark `dolt table import` on csv/json files and `dolt sql` on .sql files. It uses
|
||||
the Go testing.B package to execute the relevant dolt commands.
|
||||
Benchmark different import workflows expressed as yaml files.
|
||||
|
||||
### Test Files
|
||||
Unit tests in `import_test.go` are not suitable for reporting performance
|
||||
comparisons.
|
||||
|
||||
This package uses several test files that are stored in a private S3 bucket (import-benchmarking-github-actions-results)
|
||||
which represent different sort order, primary keys, etc.
|
||||
## Usage
|
||||
|
||||
The benchmarker supports custom configurations which runs different import jobs against a `dolt` database or a MySQL
|
||||
server. The parameters of each job and the overall config file are specified in `config.go`.
|
||||
Sample:
|
||||
```bash
|
||||
go build \
|
||||
github.com/dolthub/dolt/go/performance/import_benchmarker/cmd \
|
||||
-test testdata/shuffle.yaml
|
||||
```
|
||||
|
||||
Note that if you run the benchmarker without a filepath than the benchmarker will generate a sample file for you. It is
|
||||
best to stick with the default files used in the production benchmarking system to maintain a sense of consistency.\
|
||||
Requirements:
|
||||
|
||||
### Notes
|
||||
Tests that use dolt require a `dolt` binary in `PATH` for performance comparisons.
|
||||
|
||||
* You should name your table "test" in the MySQL schema file.
|
||||
Tests with an `external-server` configuration are expected to be available
|
||||
from the host machine on startup.
|
||||
|
||||
Example `mysqld` server on the host OS, assuming an initialized `datadir`
|
||||
and pre-existing database:
|
||||
```bash
|
||||
mysqld --port 3308 --local-infile=1 --socket=/tmp/mysqld2.sock
|
||||
````
|
||||
|
||||
Example mysql server `docker-compose.yml` config:
|
||||
```yaml
|
||||
mysql:
|
||||
image: mysql/mysql-server:8.0
|
||||
container_name: mysql-import-perf
|
||||
ports:
|
||||
- "3308:3306"
|
||||
command: --local-infile=1 --socket=/tmp/mysqld2.sock
|
||||
volumes:
|
||||
- ./mysql:/var/lib/mysql
|
||||
restart: always # always restart unless stopped manually
|
||||
environment:
|
||||
MYSQL_USER: root
|
||||
MYSQL_ROOT_PASSWORD: password
|
||||
MYSQL_PASSWORD: password
|
||||
MYSQL_DATABASE: test
|
||||
```
|
||||
|
||||
Note the `--local-infile` parameter, which permits `LOAD DATA`, and
|
||||
the `--socket` parameter, which specifies a non-default socket that
|
||||
will not conflict with any `dolt sql-server` instances. All other
|
||||
parameters, including the database name, are configurable in the test
|
||||
file yaml.
|
||||
|
||||
## Inputs
|
||||
|
||||
Specify imports for different servers and workloads along several
|
||||
dimensions:
|
||||
|
||||
- repo
|
||||
- dolt server (server field)
|
||||
- dolt cli (omit server field)
|
||||
- mysql server (external server field)
|
||||
|
||||
- table spec
|
||||
- fmt (string): file format for importing
|
||||
- csv: comma separated lines
|
||||
- sql: dump file of insert statements
|
||||
- rows (int): number of rows to import
|
||||
- schema (string): CREATE_TABLE statement for table to import
|
||||
- shuffle (bool): by default generated rows are sorted; indicate `true` to shuffle
|
||||
- batch (bool): whether to batch insert statements (only applies to fmt=sql)
|
||||
|
||||
For an examples of the specific yaml input syntax, see the example
|
||||
files below, or refer to the tests in `testdata/`.
|
||||
|
||||
Server Details:
|
||||
|
||||
- For dolt sql-server tests, a new sql-server will be constructed individually
|
||||
for each test run.
|
||||
- External servers are provided outside of the lifecycle of the `import_benchmarker`
|
||||
command. The same database instance is used for every table import test.
|
||||
- Import files are cached on the schema, row number, and format in between
|
||||
tests.
|
||||
|
||||
## Outputs
|
||||
|
||||
The output format is a `.sql` file with the following schema:
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS import_perf_results (
|
||||
test_name varchar(64),
|
||||
server varchar(64),
|
||||
detail varchar(64),
|
||||
row_cnt int,
|
||||
time double,
|
||||
file_format varchar(8),
|
||||
sorted bool,
|
||||
primary key (test_name, server, detail)
|
||||
);
|
||||
```
|
||||
|
||||
A sample import file:
|
||||
```sql
|
||||
insert into import_perf_results values
|
||||
('primary key types', 'mysql', 'int', 400000, 2.20, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('primary key types', 'mysql', 'float', 400000, 1.98, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('primary key types', 'mysql', 'varchar', 400000, 3.46, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('config width', 'mysql', '2 cols', 400000, 1.71, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('config width', 'mysql', '4 cols', 400000, 1.78, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('config width', 'mysql', '8 cols', 400000, 2.10, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('pk type', 'mysql', 'int', 400000, 1.70, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('pk type', 'mysql', 'float', 400000, 1.95, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('pk type', 'mysql', 'varchar', 400000, 3.86, 'csv', 1);
|
||||
|
||||
insert into import_perf_results values
|
||||
('primary key types', 'dolt', 'int', 400000, 2.10, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('primary key types', 'dolt', 'float', 400000, 2.83, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('primary key types', 'dolt', 'varchar', 400000, 5.01, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('config width', 'dolt', '2 cols', 400000, 2.12, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('config width', 'dolt', '4 cols', 400000, 2.47, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('config width', 'dolt', '8 cols', 400000, 2.84, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('pk type', 'dolt', 'int', 400000, 2.06, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('pk type', 'dolt', 'float', 400000, 2.27, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('pk type', 'dolt', 'varchar', 400000, 5.34, 'csv', 1);
|
||||
|
||||
insert into import_perf_results values
|
||||
('primary key types', 'dolt_cli', 'int', 400000, 2.40, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('primary key types', 'dolt_cli', 'float', 400000, 2.44, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('primary key types', 'dolt_cli', 'varchar', 400000, 5.58, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('config width', 'dolt_cli', '2 cols', 400000, 2.40, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('config width', 'dolt_cli', '4 cols', 400000, 2.77, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('config width', 'dolt_cli', '8 cols', 400000, 3.23, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('pk type', 'dolt_cli', 'int', 400000, 2.37, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('pk type', 'dolt_cli', 'float', 400000, 2.43, 'csv', 1);
|
||||
insert into import_perf_results values
|
||||
('pk type', 'dolt_cli', 'varchar', 400000, 5.52, 'csv', 1);
|
||||
```
|
||||
Ingest the result file and run queries like the ones below to compare
|
||||
import runtimes:
|
||||
|
||||
```sql
|
||||
-- compare two servers
|
||||
> select
|
||||
a.test_name as test_name,
|
||||
a.detail as detail,
|
||||
a.row_cnt as row_cnt,
|
||||
a.sorted as sorted,
|
||||
a.time as dolt_time,
|
||||
b.time as mysql_time,
|
||||
round((a.time / b.time),2) as multiple
|
||||
from import_perf_results a
|
||||
join import_perf_results b
|
||||
on
|
||||
a.test_name = b.test_name and
|
||||
a.detail = b.detail
|
||||
where
|
||||
a.server = 'dolt' and
|
||||
b.server = 'mysql'
|
||||
order by 1,2;
|
||||
|
||||
+-------------------+--------------+---------+--------+-----------+------------+----------+
|
||||
| test_name | detail | row_cnt | sorted | dolt_time | mysql_time | multiple |
|
||||
+-------------------+--------------+---------+--------+-----------+------------+----------+
|
||||
| blobs | 1 blob | 400000 | 1 | 34.94 | 2.16 | 16.18 |
|
||||
| blobs | 2 blobs | 400000 | 1 | 62.23 | 2.08 | 29.92 |
|
||||
| blobs | no blob | 400000 | 1 | 2.91 | 2.09 | 1.39 |
|
||||
| config width | 2 cols | 400000 | 1 | 2.12 | 1.71 | 1.24 |
|
||||
| config width | 4 cols | 400000 | 1 | 2.47 | 1.78 | 1.39 |
|
||||
| config width | 8 cols | 400000 | 1 | 2.84 | 2.1 | 1.35 |
|
||||
| pk type | float | 400000 | 1 | 2.27 | 1.95 | 1.16 |
|
||||
| pk type | int | 400000 | 1 | 2.06 | 1.7 | 1.21 |
|
||||
| pk type | varchar | 400000 | 1 | 5.34 | 3.86 | 1.38 |
|
||||
+-------------------+--------------+---------+--------+------------+----------+----------+
|
||||
|
||||
-- compare three servers
|
||||
> select
|
||||
o.test_name as test_name,
|
||||
o.detail,
|
||||
o.row_cnt,
|
||||
o.sorted as sorted,
|
||||
o.time as mysql_time,
|
||||
(
|
||||
select round((a.time / b.time),2) m
|
||||
from import_perf_results a
|
||||
join import_perf_results b
|
||||
on
|
||||
a.test_name = b.test_name and
|
||||
a.detail = b.detail
|
||||
where
|
||||
a.server = 'dolt' and
|
||||
b.server = 'mysql' and
|
||||
a.test_name = o.test_name and
|
||||
a.detail = o.detail
|
||||
) as sql_mult,
|
||||
(
|
||||
select round((a.time / b.time),2) m
|
||||
from import_perf_results a
|
||||
join import_perf_results b
|
||||
on
|
||||
a.test_name = b.test_name and
|
||||
a.detail = b.detail
|
||||
where
|
||||
a.server = 'dolt_cli' and
|
||||
b.server = 'mysql' and
|
||||
a.test_name = o.test_name and
|
||||
a.detail = o.detail
|
||||
) as cli_mult
|
||||
from import_perf_results as o
|
||||
where o.server = 'mysql'
|
||||
order by 1,2;
|
||||
|
||||
+-------------------+--------------+---------+--------+------------+----------+----------+
|
||||
| test_name | detail | row_cnt | sorted | mysql_time | sql_mult | cli_mult |
|
||||
+-------------------+--------------+---------+--------+------------+----------+----------+
|
||||
| blobs | 1 blob | 400000 | 1 | 2.16 | 16.18 | 13.43 |
|
||||
| blobs | 2 blobs | 400000 | 1 | 2.08 | 29.92 | 26.71 |
|
||||
| blobs | no blob | 400000 | 1 | 2.09 | 1.39 | 1.33 |
|
||||
| config width | 2 cols | 400000 | 1 | 1.71 | 1.24 | 1.4 |
|
||||
| config width | 4 cols | 400000 | 1 | 1.78 | 1.39 | 1.56 |
|
||||
| config width | 8 cols | 400000 | 1 | 2.1 | 1.35 | 1.54 |
|
||||
| pk type | float | 400000 | 1 | 1.95 | 1.16 | 1.25 |
|
||||
| pk type | int | 400000 | 1 | 1.7 | 1.21 | 1.39 |
|
||||
| pk type | varchar | 400000 | 1 | 3.86 | 1.38 | 1.43 |
|
||||
+-------------------+--------------+---------+--------+------------+----------+----------+
|
||||
```
|
||||
|
||||
## Example tests
|
||||
|
||||
Example test spec 1:
|
||||
```yaml
|
||||
tests:
|
||||
- name: "sorting"
|
||||
repos:
|
||||
- name: repo1
|
||||
server:
|
||||
port: 3308
|
||||
tables:
|
||||
- name: "shuffle"
|
||||
shuffle: true
|
||||
rows: 100000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "sorted"
|
||||
shuffle: false
|
||||
rows: 100000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
```
|
||||
|
||||
We will import two tables with a dolt sql-server on port `3308`.
|
||||
Both tables have 100,000 rows, and a schema with two columns.
|
||||
The "sorted" test imports the default sorted rows, while the
|
||||
"shuffle" imports unsorted rows.
|
||||
|
||||
Example import spec 2:
|
||||
|
||||
```yaml
|
||||
tests:
|
||||
- name: "row count"
|
||||
repos:
|
||||
- name: mysql
|
||||
external-server:
|
||||
name: test
|
||||
host: 127.0.0.1
|
||||
user: root
|
||||
password: password
|
||||
port: 4306
|
||||
tables:
|
||||
- name: "400k"
|
||||
fmt: "csv"
|
||||
rows: 40000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
```
|
||||
|
||||
We will connect to a database server named `test` on port `4306`
|
||||
with the credentials above to run a 40,000 row import of a table
|
||||
with two columns.
|
||||
@@ -16,41 +16,62 @@ package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/dolthub/dolt/go/performance/import_benchmarker"
|
||||
driver "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils/sql_server_driver"
|
||||
ib "github.com/dolthub/dolt/go/performance/import_benchmarker"
|
||||
)
|
||||
|
||||
const (
|
||||
resultsTableName = "results"
|
||||
)
|
||||
|
||||
var configPath = flag.String("config", "", "the path to a config file")
|
||||
var path = flag.String("test", "", "the path to a test file")
|
||||
var out = flag.String("out", "", "result output path")
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
// Construct a config
|
||||
config, err := import_benchmarker.NewDefaultImportBenchmarkConfig()
|
||||
if *configPath != "" {
|
||||
config, err = import_benchmarker.FromFileConfig(*configPath)
|
||||
}
|
||||
|
||||
def, err := ib.ParseTestsFile(*path)
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
// Get the working directory the tests will be executing in
|
||||
wd := import_benchmarker.GetWorkingDir()
|
||||
|
||||
// Generate the tests and the benchmarker.
|
||||
results, err := import_benchmarker.RunBenchmarkTests(config, wd)
|
||||
tmpdir, err := os.MkdirTemp("", "repo-store-")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
import_benchmarker.SerializeResults(results, wd, resultsTableName, "csv")
|
||||
results := new(ib.ImportResults)
|
||||
u, err := driver.NewDoltUser()
|
||||
for _, test := range def.Tests {
|
||||
test.Results = results
|
||||
test.InitWithTmpDir(tmpdir)
|
||||
|
||||
for _, r := range test.Repos {
|
||||
var err error
|
||||
switch {
|
||||
case r.ExternalServer != nil:
|
||||
err = test.RunExternalServerTests(r.Name, r.ExternalServer)
|
||||
case r.Server != nil:
|
||||
err = test.RunSqlServerTests(r, u)
|
||||
default:
|
||||
err = test.RunCliTests(r, u)
|
||||
}
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if *out != "" {
|
||||
of, err := os.Create(*out)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
fmt.Fprintf(of, results.SqlDump())
|
||||
} else {
|
||||
fmt.Println(results.SqlDump())
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
@@ -1,282 +0,0 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package import_benchmarker
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/dolthub/dolt/go/performance/utils/sysbench_runner"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
)
|
||||
|
||||
const (
|
||||
smallSet = 100000
|
||||
mediumSet = 1000000
|
||||
largeSet = 10000000
|
||||
testTable = "test"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrMissingMysqlSchemaFile = errors.New("error: Must supply schema file for mysql jobs")
|
||||
ErrImproperMysqlFileFormat = errors.New("error: Improper schema file for mysql")
|
||||
ErrUnsupportedProgram = errors.New("error: Unsupported program only dolt or mysql used")
|
||||
ErrUnsupportedFileFormat = errors.New("error: Unsupport formated. Only csv, json or sql allowed")
|
||||
)
|
||||
|
||||
type ImportBenchmarkJob struct {
|
||||
// Name of the job
|
||||
Name string
|
||||
|
||||
// NumRows represents the number of rows being imported in the job.
|
||||
NumRows int
|
||||
|
||||
// Sorted represents whether the data is sorted or not.
|
||||
Sorted bool
|
||||
|
||||
// Format is either csv, json or sql.
|
||||
Format string
|
||||
|
||||
// Filepath is the path to the data file. If empty data is generated instead.
|
||||
Filepath string
|
||||
|
||||
// Program is either Dolt or MySQL.
|
||||
Program string
|
||||
|
||||
// Version tracks the current version of Dolt or MySQL being used
|
||||
Version string
|
||||
|
||||
// ExecPath is a path towards a Dolt or MySQL executable. This is also useful when running different versions of Dolt.
|
||||
ExecPath string
|
||||
|
||||
// SchemaPath is a path towards a generated schema. It is needed for MySQL testing and optional for Dolt testing
|
||||
SchemaPath string
|
||||
}
|
||||
|
||||
type ImportBenchmarkConfig struct {
|
||||
Jobs []*ImportBenchmarkJob
|
||||
|
||||
// MysqlConnectionProtocol is either tcp or unix. On our kubernetes benchmarking deployments unix is needed. To run this
|
||||
// locally you want tcp
|
||||
MysqlConnectionProtocol string
|
||||
|
||||
// MysqlPort is used to connect with a MySQL port
|
||||
MysqlPort int
|
||||
|
||||
// MysqlHost is used to connect with a MySQL host
|
||||
MysqlHost string
|
||||
|
||||
// NbfVersion is used to turn what format to run Dolt against
|
||||
NbfVersion string
|
||||
}
|
||||
|
||||
// NewDefaultImportBenchmarkConfig returns a default import configuration where data is generated with accordance to
|
||||
// the medium set.
|
||||
func NewDefaultImportBenchmarkConfig() (*ImportBenchmarkConfig, error) {
|
||||
jobs := []*ImportBenchmarkJob{
|
||||
{
|
||||
Name: "dolt_import_small",
|
||||
NumRows: smallSet,
|
||||
Sorted: false,
|
||||
Format: csvExt,
|
||||
Version: "HEAD", // Use whatever dolt is installed locally
|
||||
ExecPath: "dolt", // Assumes dolt is installed locally
|
||||
Program: "dolt",
|
||||
},
|
||||
}
|
||||
|
||||
config := &ImportBenchmarkConfig{
|
||||
Jobs: jobs,
|
||||
}
|
||||
|
||||
err := config.ValidateAndUpdateDefaults()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return config, nil
|
||||
}
|
||||
|
||||
// FromFileConfig takes in a configuration file (encoded as JSON) and returns the relevant importBenchmark config
|
||||
func FromFileConfig(configPath string) (*ImportBenchmarkConfig, error) {
|
||||
data, err := ioutil.ReadFile(configPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
config := &ImportBenchmarkConfig{
|
||||
Jobs: make([]*ImportBenchmarkJob, 0),
|
||||
}
|
||||
|
||||
err = json.Unmarshal(data, config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = config.ValidateAndUpdateDefaults()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return config, nil
|
||||
}
|
||||
|
||||
func (c *ImportBenchmarkConfig) ValidateAndUpdateDefaults() error {
|
||||
if c.MysqlConnectionProtocol == "" {
|
||||
c.MysqlConnectionProtocol = "tcp"
|
||||
}
|
||||
|
||||
if c.MysqlHost == "" {
|
||||
c.MysqlHost = defaultHost
|
||||
}
|
||||
|
||||
if c.MysqlPort == 0 {
|
||||
c.MysqlPort = defaultPort
|
||||
}
|
||||
|
||||
for _, job := range c.Jobs {
|
||||
err := job.updateDefaultsAndValidate()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (j *ImportBenchmarkJob) updateDefaultsAndValidate() error {
|
||||
j.Program = strings.ToLower(j.Program)
|
||||
|
||||
programAsServerType := sysbench_runner.ServerType(j.Program)
|
||||
switch programAsServerType {
|
||||
case sysbench_runner.MySql:
|
||||
if j.SchemaPath == "" {
|
||||
return ErrMissingMysqlSchemaFile
|
||||
}
|
||||
|
||||
if j.Format != csvExt {
|
||||
return ErrImproperMysqlFileFormat
|
||||
}
|
||||
case sysbench_runner.Dolt:
|
||||
default:
|
||||
return ErrUnsupportedProgram
|
||||
}
|
||||
|
||||
j.Format = strings.ToLower(j.Format)
|
||||
|
||||
seen := false
|
||||
for _, f := range supportedFormats {
|
||||
if f == j.Format {
|
||||
seen = true
|
||||
}
|
||||
}
|
||||
if !seen {
|
||||
return ErrUnsupportedFileFormat
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMysqlConfigFromConfig(c *ImportBenchmarkConfig) sysbench_runner.MysqlConfig {
|
||||
return sysbench_runner.MysqlConfig{Socket: defaultSocket, Host: c.MysqlHost, ConnectionProtocol: c.MysqlConnectionProtocol, Port: c.MysqlPort}
|
||||
}
|
||||
|
||||
// generateTestFilesIfNeeded creates the test conditions for an import benchmark to execute. In the case that the config
|
||||
// dictates that data needs to be generated, this function handles that
|
||||
func generateTestFilesIfNeeded(config *ImportBenchmarkConfig) *ImportBenchmarkConfig {
|
||||
jobs := make([]*ImportBenchmarkJob, 0)
|
||||
|
||||
for _, job := range config.Jobs {
|
||||
// Preset csv path
|
||||
if job.Filepath != "" {
|
||||
jobs = append(jobs, job)
|
||||
} else {
|
||||
filePath, fileFormat := generateTestFile(job)
|
||||
|
||||
job.Filepath = filePath
|
||||
job.Format = fileFormat
|
||||
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
}
|
||||
|
||||
config.Jobs = jobs
|
||||
return config
|
||||
}
|
||||
|
||||
// generateTestFile is used to create a generated test case with a randomly generated csv file.
|
||||
func generateTestFile(job *ImportBenchmarkJob) (string, string) {
|
||||
sch := NewSeedSchema(job.NumRows, genSampleCols(), job.Format)
|
||||
|
||||
pathToImportFile := filepath.Join(GetWorkingDir(), fmt.Sprintf("testData.%s", sch.FileFormatExt))
|
||||
wc, err := filesys.LocalFS.OpenForWrite(pathToImportFile, os.ModePerm)
|
||||
if err != nil {
|
||||
log.Fatalf(err.Error())
|
||||
}
|
||||
|
||||
defer wc.Close()
|
||||
|
||||
ds := NewDSImpl(wc, sch, seedRandom, testTable)
|
||||
ds.GenerateData()
|
||||
|
||||
return pathToImportFile, sch.FileFormatExt
|
||||
}
|
||||
|
||||
func RunBenchmarkTests(config *ImportBenchmarkConfig, workingDir string) ([]result, error) {
|
||||
config = generateTestFilesIfNeeded(config)
|
||||
|
||||
// Split into the two jobs because we want
|
||||
doltJobs := make([]*ImportBenchmarkJob, 0)
|
||||
mySQLJobs := make([]*ImportBenchmarkJob, 0)
|
||||
|
||||
for _, job := range config.Jobs {
|
||||
switch strings.ToLower(job.Program) {
|
||||
case "dolt":
|
||||
doltJobs = append(doltJobs, job)
|
||||
case "mysql":
|
||||
if job.Format != csvExt {
|
||||
log.Fatal("mysql import benchmarking only supports csv files")
|
||||
}
|
||||
mySQLJobs = append(mySQLJobs, job)
|
||||
default:
|
||||
log.Fatal("error: Invalid program. Must use dolt or mysql. See the sample config")
|
||||
}
|
||||
}
|
||||
|
||||
results := make([]result, 0)
|
||||
for _, doltJob := range doltJobs {
|
||||
r, err := BenchmarkDoltImportJob(doltJob, workingDir, config.NbfVersion)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
results = append(results, r)
|
||||
}
|
||||
|
||||
r, err := BenchmarkMySQLImportJobs(mySQLJobs, getMysqlConfigFromConfig(config))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
results = append(results, r...)
|
||||
|
||||
return results, nil
|
||||
}
|
||||
@@ -1,172 +0,0 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package import_benchmarker
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestGeneratedConfigCanBeImported(t *testing.T) {
|
||||
t.Skip() // Skipping since dolt isn't installed on the github actions vm
|
||||
|
||||
config, err := NewDefaultImportBenchmarkConfig()
|
||||
assert.NoError(t, err)
|
||||
|
||||
wd := GetWorkingDir()
|
||||
|
||||
results, err := RunBenchmarkTests(config, wd)
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.Equal(t, 1, len(results))
|
||||
assert.Equal(t, "dolt_import_small", results[0].name)
|
||||
|
||||
// Sanity check: An import of 100,000 should never take more than 15 seconds
|
||||
assert.LessOrEqual(t, results[0].br.T, time.Second*15)
|
||||
|
||||
os.RemoveAll(filepath.Join(wd, "testData.csv"))
|
||||
}
|
||||
|
||||
func TestNewStorageFormat(t *testing.T) {
|
||||
t.Skip() // Skipping since dolt isn't installed on the github actions vm
|
||||
|
||||
job := createSampleDoltJob()
|
||||
config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{job}, NbfVersion: "__DOLT__"}
|
||||
err := config.ValidateAndUpdateDefaults()
|
||||
|
||||
assert.NoError(t, err)
|
||||
|
||||
wd := GetWorkingDir()
|
||||
results, err := RunBenchmarkTests(config, wd)
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.Equal(t, 1, len(results))
|
||||
assert.Equal(t, "dolt_import_small", results[0].name)
|
||||
|
||||
// Sanity check: An import of 100,000 should never take more than 15 seconds
|
||||
assert.LessOrEqual(t, results[0].br.T, time.Second*15)
|
||||
|
||||
os.RemoveAll(filepath.Join(wd, "testData.csv"))
|
||||
}
|
||||
|
||||
func TestCanGenerateFilesForAllFormats(t *testing.T) {
|
||||
config := &ImportBenchmarkConfig{Jobs: make([]*ImportBenchmarkJob, 0)}
|
||||
|
||||
// Create jobs for all configs
|
||||
for _, format := range supportedFormats {
|
||||
job := &ImportBenchmarkJob{
|
||||
Name: "dolt_import_small",
|
||||
NumRows: smallSet,
|
||||
Sorted: false,
|
||||
Format: format,
|
||||
Version: "HEAD", // Use whatever dolt is installed locally
|
||||
ExecPath: "dolt", // Assumes dolt is installed locally
|
||||
}
|
||||
|
||||
config.Jobs = append(config.Jobs, job)
|
||||
}
|
||||
|
||||
assert.Equal(t, 3, len(config.Jobs))
|
||||
|
||||
config = generateTestFilesIfNeeded(config)
|
||||
|
||||
for _, job := range config.Jobs {
|
||||
file, err := os.Open(job.Filepath)
|
||||
assert.NoError(t, err)
|
||||
|
||||
err = file.Close()
|
||||
assert.NoError(t, err)
|
||||
|
||||
err = os.Remove(job.Filepath)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBadConfigurations(t *testing.T) {
|
||||
t.Run("non csv format MySQL Job is considered invalid", func(t *testing.T) {
|
||||
mysqlJob := createSampleMysqlJob()
|
||||
mysqlJob.Format = jsonExt
|
||||
|
||||
config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{mysqlJob}}
|
||||
err := config.ValidateAndUpdateDefaults()
|
||||
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, ErrImproperMysqlFileFormat, err)
|
||||
})
|
||||
|
||||
t.Run("MySQL Job with no schema file errors", func(t *testing.T) {
|
||||
mysqlJob := createSampleMysqlJob()
|
||||
mysqlJob.SchemaPath = ""
|
||||
|
||||
config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{mysqlJob}}
|
||||
err := config.ValidateAndUpdateDefaults()
|
||||
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, ErrMissingMysqlSchemaFile, err)
|
||||
})
|
||||
|
||||
t.Run("improper program type passed in ", func(t *testing.T) {
|
||||
doltJob := createSampleDoltJob()
|
||||
doltJob.Program = "fake-program"
|
||||
|
||||
config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{doltJob}}
|
||||
err := config.ValidateAndUpdateDefaults()
|
||||
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, ErrUnsupportedProgram, err)
|
||||
})
|
||||
|
||||
t.Run("improper file extension passed in", func(t *testing.T) {
|
||||
doltJob := createSampleDoltJob()
|
||||
doltJob.Format = "psv"
|
||||
|
||||
config := &ImportBenchmarkConfig{Jobs: []*ImportBenchmarkJob{doltJob}}
|
||||
err := config.ValidateAndUpdateDefaults()
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, ErrUnsupportedFileFormat, err)
|
||||
})
|
||||
}
|
||||
|
||||
// createSampleMysqlJob creates a simple MySQL job that is particularly valuable for the future
|
||||
func createSampleMysqlJob() *ImportBenchmarkJob {
|
||||
job := ImportBenchmarkJob{
|
||||
Name: "Mysql Dummy",
|
||||
Format: csvExt,
|
||||
Version: "8.0.22", // Use whatever dolt is installed locally
|
||||
ExecPath: "/usr/mysql", // Assumes dolt is installed locally
|
||||
Program: "mysql",
|
||||
SchemaPath: "/schema",
|
||||
}
|
||||
|
||||
return &job
|
||||
}
|
||||
|
||||
func createSampleDoltJob() *ImportBenchmarkJob {
|
||||
job := &ImportBenchmarkJob{
|
||||
Name: "dolt_import_small",
|
||||
NumRows: smallSet,
|
||||
Sorted: false,
|
||||
Format: csvExt,
|
||||
Version: "HEAD", // Use whatever dolt is installed locally
|
||||
ExecPath: "dolt", // Assumes dolt is installed locally
|
||||
Program: "dolt",
|
||||
}
|
||||
|
||||
return job
|
||||
}
|
||||
@@ -1,149 +0,0 @@
|
||||
// Copyright 2019 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package import_benchmarker
|
||||
|
||||
import (
|
||||
"io"
|
||||
"log"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Dataset is a set of test data used for benchmark testing
|
||||
type Dataset interface {
|
||||
// GenerateData generates a dataset for testing
|
||||
GenerateData()
|
||||
|
||||
// Change returns a Dataset mutated by the given percentage of change
|
||||
Change(pct float32) Dataset
|
||||
}
|
||||
|
||||
// DSImpl implements the Dataset interface
|
||||
type DSImpl struct {
|
||||
// Schema defines the structure of the Dataset
|
||||
Schema *SeedSchema
|
||||
|
||||
// TableName is the name of the test dataset
|
||||
TableName string
|
||||
|
||||
// w is the writer where the test dataset will be written
|
||||
w io.Writer
|
||||
|
||||
// sf is the function used to generate random data values in the dataset
|
||||
sf seedFunc
|
||||
}
|
||||
|
||||
// NewDSImpl creates a new DSImpl
|
||||
func NewDSImpl(wc io.Writer, sch *SeedSchema, sf seedFunc, tableName string) *DSImpl {
|
||||
return &DSImpl{Schema: sch, TableName: tableName, sf: sf, w: wc}
|
||||
}
|
||||
|
||||
// GenerateData generates a dataset and writes it to a io.Writer
|
||||
func (ds *DSImpl) GenerateData() {
|
||||
writeDataToWriter(ds.w, ds.Schema.Rows, ds.Schema.Columns, ds.sf, ds.TableName, ds.Schema.FileFormatExt)
|
||||
}
|
||||
|
||||
// Change returns a DataSet that is a mutation of this Dataset by the given percentage
|
||||
func (ds *DSImpl) Change(pct float32) Dataset {
|
||||
// TODO
|
||||
return &DSImpl{}
|
||||
}
|
||||
|
||||
func writeDataToWriter(wc io.Writer, rows int, cols []*SeedColumn, sf seedFunc, tableName, format string) {
|
||||
// handle the "header" for all format types
|
||||
writeHeader(wc, cols, tableName, format)
|
||||
|
||||
var prevRow []string
|
||||
for i := 0; i < rows; i++ {
|
||||
row := make([]string, len(cols))
|
||||
|
||||
for colIndex, col := range cols {
|
||||
val := getColValue(prevRow, colIndex, col, sf, format)
|
||||
row[colIndex] = val
|
||||
|
||||
if i > 0 && prevRow != nil {
|
||||
prevRow[colIndex] = val
|
||||
}
|
||||
}
|
||||
_, err := wc.Write([]byte(formatRow(row, cols, i, rows-1, tableName, format)))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
prevRow = row[:]
|
||||
}
|
||||
|
||||
// handle the "footer" for format types
|
||||
switch format {
|
||||
case jsonExt:
|
||||
suffix := "]}\n"
|
||||
_, err := wc.Write([]byte(suffix))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func writeHeader(w io.Writer, cols []*SeedColumn, tableName, format string) {
|
||||
switch format {
|
||||
case csvExt:
|
||||
header := makeCSVHeaderStr(cols, tableName, format)
|
||||
_, err := w.Write([]byte(header + "\n"))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
case sqlExt:
|
||||
header := getSQLHeader(cols, tableName, format)
|
||||
_, err := w.Write([]byte(header + "\n"))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
case jsonExt:
|
||||
prefix := "{\"Rows\":["
|
||||
_, err := w.Write([]byte(prefix))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
default:
|
||||
log.Fatalf("unable to write the header, unsupported format %v \n", format)
|
||||
}
|
||||
}
|
||||
|
||||
func formatRow(strs []string, cols []*SeedColumn, currentRowIdx, lastRowIdx int, tableName, format string) string {
|
||||
switch format {
|
||||
case csvExt:
|
||||
return strings.Join(strs, ",") + "\n"
|
||||
case sqlExt:
|
||||
return getSQLRow(strs, cols, tableName) + "\n"
|
||||
case jsonExt:
|
||||
var suffix string
|
||||
if currentRowIdx == lastRowIdx {
|
||||
suffix = "\n"
|
||||
} else {
|
||||
suffix = ",\n"
|
||||
}
|
||||
return getJSONRow(strs, cols) + suffix
|
||||
default:
|
||||
log.Fatalf("cannot format row, unsupported file format %s \n", format)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func makeCSVHeaderStr(cols []*SeedColumn, tableName, format string) string {
|
||||
str := make([]string, 0, len(cols))
|
||||
for _, col := range cols {
|
||||
str = append(str, col.Name)
|
||||
}
|
||||
return formatRow(str, cols, 0, 1, tableName, format)
|
||||
}
|
||||
@@ -1,243 +0,0 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package import_benchmarker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/file"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
"github.com/dolthub/dolt/go/performance/utils/sysbench_runner"
|
||||
)
|
||||
|
||||
// BenchmarkDoltImportJob returns a function that runs benchmarks for importing
|
||||
// a test dataset into Dolt
|
||||
func BenchmarkDoltImportJob(job *ImportBenchmarkJob, workingDir, nbf string) (result, error) {
|
||||
oldStdin := os.Stdin
|
||||
defer func() { os.Stdin = oldStdin }()
|
||||
|
||||
err := setupAndInitializeDoltRepo(filesys.LocalFS, workingDir, job.ExecPath, nbf)
|
||||
if err != nil {
|
||||
return result{}, err
|
||||
}
|
||||
|
||||
defer RemoveDoltDataDir(filesys.LocalFS, workingDir) // remove the repo each time
|
||||
|
||||
commandStr, args, err := getBenchmarkingTools(job, workingDir)
|
||||
if err != nil {
|
||||
return result{}, err
|
||||
}
|
||||
|
||||
if commandStr == "" {
|
||||
return result{}, errors.New("failed to get command")
|
||||
}
|
||||
|
||||
br := testing.Benchmark(func(b *testing.B) {
|
||||
err = runBenchmarkCommand(b, commandStr, args, workingDir)
|
||||
})
|
||||
if err != nil {
|
||||
return result{}, err
|
||||
}
|
||||
|
||||
size, err := getSizeOnDisk(filesys.LocalFS, workingDir)
|
||||
if err != nil {
|
||||
return result{}, err
|
||||
}
|
||||
|
||||
return result{
|
||||
name: job.Name,
|
||||
format: job.Format,
|
||||
rows: job.NumRows,
|
||||
columns: len(genSampleCols()),
|
||||
sizeOnDisk: size,
|
||||
br: br,
|
||||
doltVersion: job.Version,
|
||||
program: "dolt",
|
||||
}, nil
|
||||
}
|
||||
|
||||
// setupAndInitializeDoltRepo calls the `dolt init` command on the workingDir to create a new Dolt repository.
|
||||
func setupAndInitializeDoltRepo(fs filesys.Filesys, workingDir, doltExecPath, nbf string) error {
|
||||
err := RemoveDoltDataDir(fs, workingDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = sysbench_runner.DoltVersion(context.Background(), doltExecPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = sysbench_runner.UpdateDoltConfig(context.Background(), doltExecPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if nbf != "" {
|
||||
err = os.Setenv("DOLT_DEFAULT_BIN_FORMAT", nbf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
init := execCommand(context.Background(), doltExecPath, "init")
|
||||
|
||||
init.Dir = workingDir
|
||||
return init.Run()
|
||||
}
|
||||
|
||||
// getBenchmarkingTools setups up the relevant environment for testing.
|
||||
func getBenchmarkingTools(job *ImportBenchmarkJob, workingDir string) (commandStr string, args []string, err error) {
|
||||
commandStr = job.ExecPath
|
||||
|
||||
switch job.Format {
|
||||
case csvExt:
|
||||
args = []string{"table", "import", "-c", "-f", testTable, job.Filepath}
|
||||
if job.SchemaPath != "" {
|
||||
args = append(args, "-s", job.SchemaPath)
|
||||
}
|
||||
case sqlExt:
|
||||
stdin, serr := getStdinForSQLBenchmark(filesys.LocalFS, job.Filepath)
|
||||
if serr != nil {
|
||||
err = serr
|
||||
return
|
||||
}
|
||||
|
||||
os.Stdin = stdin
|
||||
|
||||
args = []string{"sql"}
|
||||
case jsonExt:
|
||||
pathToSchemaFile := filepath.Join(workingDir, fmt.Sprintf("testSchema%s", job.Format))
|
||||
if job.SchemaPath != "" {
|
||||
pathToSchemaFile = job.SchemaPath
|
||||
}
|
||||
|
||||
args = []string{"table", "import", "-c", "-f", "-s", pathToSchemaFile, testTable, job.Filepath}
|
||||
default:
|
||||
err = errors.New(fmt.Sprintf("cannot import file, unsupported file format %s \n", job.Format))
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// runBenchmarkCommand runs and times the benchmark. This is the critical portion of the code
|
||||
func runBenchmarkCommand(b *testing.B, commandStr string, args []string, wd string) error {
|
||||
// Note that we can rerun this because dolt import uses the -f parameter
|
||||
for i := 0; i < b.N; i++ {
|
||||
cmd := execCommand(context.Background(), commandStr, args...)
|
||||
var errBytes bytes.Buffer
|
||||
cmd.Dir = wd
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = &errBytes
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(strings.TrimSpace(errBytes.String())) > 0 {
|
||||
return errors.New(fmt.Sprintf("error running benchmark: %s", errBytes.String()))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemoveDoltDataDir is used to remove the .dolt repository
|
||||
func RemoveDoltDataDir(fs filesys.Filesys, dir string) error {
|
||||
doltDir := filepath.Join(dir, dbfactory.DoltDir)
|
||||
exists, _ := fs.Exists(doltDir)
|
||||
if exists {
|
||||
return fs.Delete(doltDir, true)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func execCommand(ctx context.Context, name string, arg ...string) *exec.Cmd {
|
||||
e := exec.CommandContext(ctx, name, arg...)
|
||||
return e
|
||||
}
|
||||
|
||||
// getSizeOnDisk returns the size of the .dolt repo. This is useful for understanding how a repo grows in size in
|
||||
// proportion to the number of rows.
|
||||
func getSizeOnDisk(fs filesys.Filesys, workingDir string) (float64, error) {
|
||||
doltDir := filepath.Join(workingDir, dbfactory.DoltDir)
|
||||
exists, _ := fs.Exists(doltDir)
|
||||
|
||||
if !exists {
|
||||
return 0, errors.New("dir does not exist")
|
||||
}
|
||||
|
||||
size, err := dirSizeMB(doltDir)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
roundedStr := fmt.Sprintf("%.2f", size)
|
||||
rounded, _ := strconv.ParseFloat(roundedStr, 2)
|
||||
|
||||
return rounded, nil
|
||||
}
|
||||
|
||||
// cc: https://stackoverflow.com/questions/32482673/how-to-get-directory-total-size
|
||||
func dirSizeMB(path string) (float64, error) {
|
||||
var size int64
|
||||
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
size += info.Size()
|
||||
}
|
||||
return err
|
||||
})
|
||||
|
||||
sizeMB := float64(size) / 1024.0 / 1024.0
|
||||
|
||||
return sizeMB, err
|
||||
}
|
||||
|
||||
func getStdinForSQLBenchmark(fs filesys.Filesys, pathToImportFile string) (*os.File, error) {
|
||||
content, err := fs.ReadFile(pathToImportFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tmpfile, err := os.CreateTemp("", "temp")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Remove(tmpfile.Name()) // clean up
|
||||
|
||||
if _, err := tmpfile.Write(content); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := tmpfile.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return os.Open(tmpfile.Name())
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package import_benchmarker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
)
|
||||
|
||||
// getSizeOnDisk returns the size of the .dolt repo. This is useful for understanding how a repo grows in size in
|
||||
// proportion to the number of rows.
|
||||
func getSizeOnDisk(fs filesys.Filesys, workingDir string) (float64, error) {
|
||||
doltDir := filepath.Join(workingDir, dbfactory.DoltDir)
|
||||
exists, _ := fs.Exists(doltDir)
|
||||
|
||||
if !exists {
|
||||
return 0, errors.New("dir does not exist")
|
||||
}
|
||||
|
||||
size, err := dirSizeMB(doltDir)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
roundedStr := fmt.Sprintf("%.2f", size)
|
||||
rounded, _ := strconv.ParseFloat(roundedStr, 2)
|
||||
|
||||
return rounded, nil
|
||||
}
|
||||
|
||||
// cc: https://stackoverflow.com/questions/32482673/how-to-get-directory-total-size
|
||||
func dirSizeMB(path string) (float64, error) {
|
||||
var size int64
|
||||
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
size += info.Size()
|
||||
}
|
||||
return err
|
||||
})
|
||||
|
||||
sizeMB := float64(size) / 1024.0 / 1024.0
|
||||
|
||||
return sizeMB, err
|
||||
}
|
||||
@@ -1,196 +0,0 @@
|
||||
// Copyright 2019 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package import_benchmarker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"math/rand"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema/typeinfo"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
type seedFunc func(col *SeedColumn, format string) string
|
||||
|
||||
// Container is used to correctly format strings enclosed in brackets
|
||||
type Container struct {
|
||||
c []string
|
||||
}
|
||||
|
||||
// NewContainer creates a new Container
|
||||
func NewContainer(format string) *Container {
|
||||
c := make([]string, 3)
|
||||
switch format {
|
||||
case sqlExt:
|
||||
c[0] = "("
|
||||
c[2] = ")"
|
||||
case jsonExt:
|
||||
c[0] = "{"
|
||||
c[2] = "}"
|
||||
default:
|
||||
log.Fatalf("cannot create new container, unsupported format %s \n", format)
|
||||
}
|
||||
return &Container{c: c}
|
||||
}
|
||||
|
||||
// InsertPayload returns the Container with the payload inserted, separated by the separator
|
||||
func (sc *Container) InsertPayload(payload []string, separator string) string {
|
||||
sc.c[1] = strings.Join(payload, separator)
|
||||
return strings.Join(sc.c, "")
|
||||
}
|
||||
|
||||
func getColValue(row []string, colIndex int, col *SeedColumn, sf seedFunc, format string) string {
|
||||
switch col.GenType {
|
||||
case increment:
|
||||
return genNomsTypeValueIncrement(row, colIndex, col, format)
|
||||
case random:
|
||||
return getNomsTypeValueRandom(col, sf, format)
|
||||
default:
|
||||
log.Fatalf("cannot get column value, unsupported gen type %s \n", col.GenType)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func genNomsTypeValueIncrement(row []string, colIndex int, col *SeedColumn, format string) string {
|
||||
switch col.Type {
|
||||
case types.IntKind:
|
||||
if len(row) > 0 {
|
||||
old, err := strconv.Atoi(row[colIndex])
|
||||
if err != nil {
|
||||
log.Fatalf(err.Error())
|
||||
}
|
||||
return fmt.Sprintf("%d", old+1)
|
||||
}
|
||||
return "1"
|
||||
default:
|
||||
log.Fatalf("cannot generate incremental value, unsupported noms type %s \n", col.Type.String())
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getNomsTypeValueRandom(col *SeedColumn, sf seedFunc, format string) string {
|
||||
return sf(col, format)
|
||||
}
|
||||
|
||||
// seedRandom is a seedFunc that returns variably random strings for each supported
|
||||
// nomsKind type
|
||||
func seedRandom(col *SeedColumn, format string) string {
|
||||
switch col.Type {
|
||||
case types.IntKind:
|
||||
return fmt.Sprintf("%d", rand.Intn(1000))
|
||||
case types.StringKind:
|
||||
return getRandomString(format)
|
||||
default:
|
||||
log.Fatalf("cannot generate random value, unsupported noms type %s \n", col.Type.String())
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getRandomString(format string) string {
|
||||
letters := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
b := make([]byte, rand.Intn(255))
|
||||
for i := range b {
|
||||
b[i] = letters[rand.Int63()%int64(len(letters))]
|
||||
}
|
||||
|
||||
switch format {
|
||||
case sqlExt:
|
||||
return fmt.Sprintf(`"%s"`, b)
|
||||
default:
|
||||
return string(b)
|
||||
}
|
||||
}
|
||||
|
||||
func getJSONRow(strs []string, cols []*SeedColumn) string {
|
||||
if len(strs) != len(cols) {
|
||||
log.Fatalf("values should be the length of columns. values: %+v, columns: %+v \n", strs, cols)
|
||||
}
|
||||
|
||||
payload := make([]string, 0)
|
||||
for i, col := range cols {
|
||||
load := fmt.Sprintf("\"%s\":\"%s\"", col.Name, strs[i])
|
||||
payload = append(payload, load)
|
||||
}
|
||||
|
||||
container := NewContainer(jsonExt)
|
||||
return container.InsertPayload(payload, ",")
|
||||
}
|
||||
|
||||
func getSQLRow(strs []string, cols []*SeedColumn, tableName string) string {
|
||||
container := NewContainer(sqlExt)
|
||||
sqlCols := make([]string, 0)
|
||||
|
||||
for _, col := range cols {
|
||||
sqlCols = append(sqlCols, fmt.Sprintf("`%s`", col.Name))
|
||||
}
|
||||
|
||||
fieldNames := container.InsertPayload(sqlCols, ",")
|
||||
values := container.InsertPayload(strs, ",")
|
||||
|
||||
return fmt.Sprintf("INSERT INTO `%s` %s VALUES %s;", tableName, fieldNames, values)
|
||||
}
|
||||
|
||||
func getSQLHeader(cols []*SeedColumn, tableName, format string) string {
|
||||
statement := make([]string, 0)
|
||||
statement = append(statement, fmt.Sprintf("DROP TABLE IF EXISTS `%s`;\n", tableName))
|
||||
statement = append(statement, fmt.Sprintf("CREATE TABLE `%s` ", tableName))
|
||||
|
||||
container := NewContainer(format)
|
||||
schema := make([]string, 0)
|
||||
pkDefs := make([]string, 0)
|
||||
for i, col := range cols {
|
||||
colStr := "`%s` %s"
|
||||
|
||||
// handle pk
|
||||
if col.PrimaryKey {
|
||||
pkDefs = append(pkDefs, fmt.Sprintf("PRIMARY KEY (`%s`)", col.Name))
|
||||
colStr = "`%s` %s NOT NULL"
|
||||
}
|
||||
|
||||
// handle increments
|
||||
if col.GenType == increment {
|
||||
colStr = fmt.Sprintf("%s AUTO_INCREMENT", colStr)
|
||||
}
|
||||
|
||||
// append tag
|
||||
colStr = fmt.Sprintf("%s COMMENT 'tag:%d'", colStr, i)
|
||||
|
||||
// translate noms type
|
||||
sqlType := typeinfo.FromKind(col.Type).ToSqlType().String()
|
||||
|
||||
schema = append(schema, fmt.Sprintf(colStr, col.Name, strings.ToUpper(sqlType)))
|
||||
}
|
||||
|
||||
// add pk definitions to create table statement
|
||||
for _, pkDef := range pkDefs {
|
||||
schema = append(schema, pkDef)
|
||||
}
|
||||
|
||||
// create and close create table statement
|
||||
schemaStatement := container.InsertPayload(schema, ",\n")
|
||||
statement = append(statement, schemaStatement+"; \n")
|
||||
|
||||
return strings.Join(statement, "")
|
||||
}
|
||||
|
||||
func GetWorkingDir() string {
|
||||
wd, _ := os.Getwd()
|
||||
return wd
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package import_benchmarker
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestImportSize(t *testing.T) {
|
||||
t.Skip()
|
||||
RunTestsFile(t, "testdata/size.yaml")
|
||||
}
|
||||
|
||||
func TestExternalImport(t *testing.T) {
|
||||
t.Skip()
|
||||
RunTestsFile(t, "testdata/external.yaml")
|
||||
}
|
||||
|
||||
func TestDoltImport(t *testing.T) {
|
||||
t.Skip()
|
||||
RunTestsFile(t, "testdata/dolt_server.yaml")
|
||||
}
|
||||
|
||||
func TestShuffle(t *testing.T) {
|
||||
t.Skip()
|
||||
RunTestsFile(t, "testdata/shuffle.yaml")
|
||||
}
|
||||
|
||||
func TestCI(t *testing.T) {
|
||||
// this will be a lot slower than running `cmd/main.go -test testdata/ci.yaml`
|
||||
t.Skip()
|
||||
RunTestsFile(t, "testdata/ci.yaml")
|
||||
}
|
||||
@@ -1,201 +0,0 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package import_benchmarker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/go-sql-driver/mysql"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/dolthub/dolt/go/performance/utils/sysbench_runner"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultHost = "127.0.0.1"
|
||||
defaultPort = 3306
|
||||
|
||||
defaultSocket = "/var/run/mysqld/mysqld.sock"
|
||||
dbName = "test"
|
||||
)
|
||||
|
||||
func BenchmarkMySQLImportJobs(jobs []*ImportBenchmarkJob, mConfig sysbench_runner.MysqlConfig) ([]result, error) {
|
||||
if len(jobs) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
withCancelCtx, cancel := context.WithCancel(ctx)
|
||||
|
||||
gServer, serverCtx := errgroup.WithContext(withCancelCtx)
|
||||
var serverErr bytes.Buffer
|
||||
|
||||
// Assume first server is okay
|
||||
server := getMysqlServer(serverCtx, jobs[0].ExecPath, getServersArgs())
|
||||
server.Stderr = &serverErr
|
||||
|
||||
// launch the mysql server
|
||||
gServer.Go(func() error {
|
||||
err := server.Run()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(strings.TrimSpace(serverErr.String())) > 0 {
|
||||
return errors.New(fmt.Sprintf("server produced stderr output: %s", serverErr.String()))
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
// sleep to allow the server to start
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
// set up the relevant testing database and permissions
|
||||
err := sysbench_runner.SetupDB(ctx, mConfig, dbName)
|
||||
if err != nil {
|
||||
cancel()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Println("successfully setup the database")
|
||||
|
||||
// handle user interrupt
|
||||
quit := make(chan os.Signal, 1)
|
||||
signal.Notify(quit, os.Interrupt, syscall.SIGTERM)
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
<-quit
|
||||
defer wg.Done()
|
||||
signal.Stop(quit)
|
||||
cancel()
|
||||
}()
|
||||
|
||||
results := make([]result, len(jobs))
|
||||
|
||||
for i, job := range jobs {
|
||||
// benchmark the actual job
|
||||
var err error
|
||||
br := testing.Benchmark(func(b *testing.B) {
|
||||
err = benchmarkLoadData(ctx, b, mConfig, job)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
results[i] = result{
|
||||
name: job.Name,
|
||||
format: job.Format,
|
||||
rows: job.NumRows,
|
||||
columns: len(genSampleCols()),
|
||||
sizeOnDisk: -1, // TODO: Think about how to collect MySQL table size
|
||||
br: br,
|
||||
doltVersion: job.Version,
|
||||
program: "mysql",
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func benchmarkLoadData(ctx context.Context, b *testing.B, mConfig sysbench_runner.MysqlConfig, job *ImportBenchmarkJob) (err error) {
|
||||
var dsn string
|
||||
dsn, err = sysbench_runner.FormatDsn(mConfig)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var db *sql.DB
|
||||
db, err = sql.Open("mysql", dsn)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
rerr := db.Close()
|
||||
if err == nil {
|
||||
err = rerr
|
||||
}
|
||||
}()
|
||||
|
||||
err = db.Ping()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
_, err = db.ExecContext(ctx, fmt.Sprintf("USE %s", dbName))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Load the schema for the test table. This assumes the table has the same name as testTable
|
||||
var data []byte
|
||||
data, err = ioutil.ReadFile(job.SchemaPath)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Register the local file as per https://github.com/go-sql-driver/mysql#load-data-local-infile-support
|
||||
mysql.RegisterLocalFile(job.Filepath)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
// Since dolt also creates the table on import we'll add dropping and creating the table to the benchmark
|
||||
_, err = db.ExecContext(ctx, fmt.Sprintf("DROP TABLE IF EXISTS %s", testTable))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Run the CREATE TABLE command stored in the schema file
|
||||
// TODO: This schema file must have the same name as testTable.
|
||||
_, err = db.ExecContext(ctx, string(data))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Run LOAD DATA on the csv file
|
||||
_, err = db.ExecContext(ctx, fmt.Sprintf(`LOAD DATA LOCAL INFILE '%s' REPLACE INTO TABLE %s FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 LINES`, job.Filepath, testTable))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("MySQL server loaded file %s \n", job.Filepath)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// getServerArgs returns the arguments that run the mysql server
|
||||
func getServersArgs() []string {
|
||||
return []string{"--user=mysql", fmt.Sprintf("--port=%d", defaultPort), "--local-infile=ON"}
|
||||
}
|
||||
|
||||
// getMysqlServer returns a exec.Cmd for a dolt server
|
||||
func getMysqlServer(ctx context.Context, serverExec string, params []string) *exec.Cmd {
|
||||
return execCommand(ctx, serverExec, params...)
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
select * from import_perf_results order by 1,2;
|
||||
@@ -0,0 +1,35 @@
|
||||
select
|
||||
o.test_name as test_name,
|
||||
o.detail,
|
||||
o.row_cnt,
|
||||
o.sorted as sorted,
|
||||
o.time as mysql_time,
|
||||
(
|
||||
select round((a.time / b.time),2) m
|
||||
from import_perf_results a
|
||||
join import_perf_results b
|
||||
on
|
||||
a.test_name = b.test_name and
|
||||
a.detail = b.detail
|
||||
where
|
||||
a.server = 'dolt_server' and
|
||||
b.server = 'mysql' and
|
||||
a.test_name = o.test_name and
|
||||
a.detail = o.detail
|
||||
) as sql_mult,
|
||||
(
|
||||
select round((a.time / b.time),2) m
|
||||
from import_perf_results a
|
||||
join import_perf_results b
|
||||
on
|
||||
a.test_name = b.test_name and
|
||||
a.detail = b.detail
|
||||
where
|
||||
a.server = 'dolt_cli' and
|
||||
b.server = 'mysql' and
|
||||
a.test_name = o.test_name and
|
||||
a.detail = o.detail
|
||||
) as cli_mult
|
||||
from import_perf_results as o
|
||||
where o.server = 'mysql'
|
||||
order by 1,2;
|
||||
@@ -1,170 +0,0 @@
|
||||
// Copyright 2019 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package import_benchmarker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
type result struct {
|
||||
name string
|
||||
format string
|
||||
rows int
|
||||
columns int
|
||||
sizeOnDisk float64
|
||||
br testing.BenchmarkResult
|
||||
program string
|
||||
doltVersion string
|
||||
}
|
||||
|
||||
// RSImpl is a Dataset containing results of benchmarking
|
||||
type RSImpl struct {
|
||||
// Schema defines the structure of the Dataset
|
||||
Schema *SeedSchema
|
||||
|
||||
// Results are results of benchmarking
|
||||
Results []result
|
||||
|
||||
// TableName is the name of the results table
|
||||
TableName string
|
||||
|
||||
// w is the writer where the results will be written
|
||||
w io.Writer
|
||||
}
|
||||
|
||||
// NewRSImpl creates a new RSImpl
|
||||
func NewRSImpl(w io.Writer, sch *SeedSchema, results []result, tableName string) *RSImpl {
|
||||
return &RSImpl{
|
||||
Schema: sch,
|
||||
Results: results,
|
||||
TableName: tableName,
|
||||
w: w,
|
||||
}
|
||||
}
|
||||
|
||||
// GenerateData writes the results to a io.Writer
|
||||
func (rds *RSImpl) GenerateData() {
|
||||
writeResultsToWriter(rds.w, rds.Results, rds.Schema.Columns, rds.TableName, rds.Schema.FileFormatExt)
|
||||
}
|
||||
|
||||
// Change returns a DataSet that is a mutation of this Dataset by the given percentage
|
||||
func (rds *RSImpl) Change(pct float32) Dataset {
|
||||
// TODO
|
||||
return &RSImpl{}
|
||||
}
|
||||
|
||||
func writeResultsToWriter(wc io.Writer, results []result, cols []*SeedColumn, tableName, format string) {
|
||||
switch format {
|
||||
case csvExt:
|
||||
generateCSVResults(wc, results, cols, tableName, format)
|
||||
default:
|
||||
log.Fatalf("cannot generate results data, file format %s unsupported \n", format)
|
||||
}
|
||||
}
|
||||
|
||||
func generateCSVResults(wc io.Writer, results []result, cols []*SeedColumn, tableName, format string) {
|
||||
header := makeCSVHeaderStr(cols, tableName, format)
|
||||
|
||||
_, err := wc.Write([]byte(header))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
for i, result := range results {
|
||||
row := getResultsRow(result, cols)
|
||||
|
||||
_, err := wc.Write([]byte(formatRow(row, cols, i, len(results)-1, tableName, format)))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getResultsRow(res result, cols []*SeedColumn) []string {
|
||||
row := make([]string, len(cols))
|
||||
|
||||
// set name
|
||||
row[0] = res.name
|
||||
// set program
|
||||
row[1] = res.program
|
||||
// set version
|
||||
row[2] = res.doltVersion
|
||||
// set format
|
||||
row[3] = res.format
|
||||
// set rows
|
||||
row[4] = fmt.Sprintf("%d", res.rows)
|
||||
// set cols
|
||||
row[5] = fmt.Sprintf("%d", res.columns)
|
||||
// set iterations
|
||||
row[6] = fmt.Sprintf("%d", res.br.N)
|
||||
// set time
|
||||
row[7] = res.br.T.Round(time.Millisecond * 10).String()
|
||||
// set size_on_disk
|
||||
row[8] = fmt.Sprintf("%v", res.sizeOnDisk)
|
||||
// set rows_per_second
|
||||
row[9] = fmt.Sprintf("%.2f", float64(res.rows)/res.br.T.Seconds())
|
||||
// set datetime
|
||||
t := time.Now()
|
||||
row[10] = fmt.Sprintf("%04d-%02d-%02d %02d:%02d", t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute())
|
||||
return row
|
||||
}
|
||||
|
||||
func genResultsCols() []*SeedColumn {
|
||||
return []*SeedColumn{
|
||||
NewSeedColumn("name", false, types.StringKind, supplied),
|
||||
NewSeedColumn("program", false, types.StringKind, supplied),
|
||||
NewSeedColumn("version", false, types.StringKind, supplied),
|
||||
NewSeedColumn("format", false, types.StringKind, supplied),
|
||||
NewSeedColumn("rows", false, types.StringKind, supplied),
|
||||
NewSeedColumn("columns", false, types.StringKind, supplied),
|
||||
NewSeedColumn("iterations", false, types.StringKind, supplied),
|
||||
NewSeedColumn("time", false, types.TimestampKind, supplied),
|
||||
NewSeedColumn("size_on_disk(MB)", false, types.StringKind, supplied),
|
||||
NewSeedColumn("rows_per_second", false, types.StringKind, supplied),
|
||||
NewSeedColumn("date_time", false, types.StringKind, supplied),
|
||||
}
|
||||
}
|
||||
|
||||
func SerializeResults(results []result, path, tableName, format string) string {
|
||||
var sch *SeedSchema
|
||||
switch format {
|
||||
case csvExt:
|
||||
sch = NewSeedSchema(len(results), genResultsCols(), csvExt)
|
||||
default:
|
||||
log.Fatalf("cannot serialize results, unsupported file format %s \n", format)
|
||||
}
|
||||
now := time.Now()
|
||||
fs := filesys.LocalFS
|
||||
resultsFile := filepath.Join(path, fmt.Sprintf("benchmark_results-%04d-%02d-%02d.%s", now.Year(), now.Month(), now.Day(), format))
|
||||
wc, err := fs.OpenForWrite(resultsFile, os.ModePerm)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer wc.Close()
|
||||
|
||||
ds := NewRSImpl(wc, sch, results, tableName)
|
||||
ds.GenerateData()
|
||||
|
||||
return resultsFile
|
||||
}
|
||||
@@ -1,165 +0,0 @@
|
||||
// Copyright 2019 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package import_benchmarker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
const (
|
||||
csvExt = "csv"
|
||||
jsonExt = "json"
|
||||
sqlExt = "sql"
|
||||
|
||||
increment = GenType("increment")
|
||||
random = GenType("random")
|
||||
supplied = GenType("supplied")
|
||||
)
|
||||
|
||||
var supportedFormats = []string{csvExt, jsonExt, sqlExt}
|
||||
|
||||
// GenType specifies how to generate subsequent row values for a given SeedColumn, for a test dataset
|
||||
type GenType string
|
||||
|
||||
// SeedSchema contains the schema to be used to generate a test Dataset
|
||||
type SeedSchema struct {
|
||||
// Rows is size of the Dataset
|
||||
Rows int
|
||||
|
||||
// Columns are the schema for the columns to be used for the Dataset
|
||||
Columns []*SeedColumn
|
||||
|
||||
// FileFormatExt is the file format extension that directs how to construct the Dataset
|
||||
// as a string or as bytes
|
||||
FileFormatExt string
|
||||
}
|
||||
|
||||
// NewSeedSchema creates a new SeedSchema
|
||||
func NewSeedSchema(rows int, cols []*SeedColumn, format string) *SeedSchema {
|
||||
for _, frmt := range supportedFormats {
|
||||
if format == frmt {
|
||||
return &SeedSchema{
|
||||
Rows: rows,
|
||||
Columns: cols,
|
||||
FileFormatExt: format,
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Fatalf("cannot build seed schema with unsupported file format %s \n", format)
|
||||
return &SeedSchema{}
|
||||
}
|
||||
|
||||
// Bytes returns a byte slice formatted according to the SeedSchema'a FileFormatExt
|
||||
func (sch *SeedSchema) Bytes() []byte {
|
||||
switch sch.FileFormatExt {
|
||||
case jsonExt:
|
||||
return getColSchemaJSON(sch.Columns)
|
||||
default:
|
||||
log.Fatalf("cannot create bytes from schema, unsupported format %s \n", sch.FileFormatExt)
|
||||
}
|
||||
return []byte{}
|
||||
}
|
||||
|
||||
// SeedColumn is used to create a column in a test dataset for benchmark testing
|
||||
type SeedColumn struct {
|
||||
Name string
|
||||
PrimaryKey bool
|
||||
Type types.NomsKind
|
||||
GenType GenType
|
||||
}
|
||||
|
||||
// NewSeedColumn creates a new SeedColumn
|
||||
func NewSeedColumn(name string, pk bool, t types.NomsKind, g GenType) *SeedColumn {
|
||||
if isValidGenType(t, g) {
|
||||
return &SeedColumn{
|
||||
Name: name,
|
||||
PrimaryKey: pk,
|
||||
Type: t,
|
||||
GenType: g,
|
||||
}
|
||||
}
|
||||
log.Fatalf("cannot use gen type %s with noms type %s \n", g, t.String())
|
||||
return &SeedColumn{}
|
||||
}
|
||||
|
||||
func isValidGenType(t types.NomsKind, g GenType) bool {
|
||||
var validTypes []types.NomsKind
|
||||
switch g {
|
||||
case increment:
|
||||
validTypes = []types.NomsKind{types.IntKind}
|
||||
case random:
|
||||
validTypes = []types.NomsKind{types.IntKind, types.StringKind}
|
||||
case supplied:
|
||||
validTypes = []types.NomsKind{
|
||||
types.IntKind,
|
||||
types.StringKind,
|
||||
types.TimestampKind,
|
||||
}
|
||||
default:
|
||||
log.Fatalf("unsupported gen type %s \n", g)
|
||||
}
|
||||
for _, v := range validTypes {
|
||||
if t == v {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func getColSchemaJSON(seedCols []*SeedColumn) []byte {
|
||||
prefix := "{\"Columns\":["
|
||||
suffix := "]}"
|
||||
|
||||
statement := make([]string, 0)
|
||||
statement = append(statement, prefix)
|
||||
|
||||
schemaStr := "{\"tag\": %d,\"name\":\"%s\",\"kind\":\"%s\",\"is_part_of_pk\":%v,\"col_constraints\":%s}"
|
||||
jsonCols := make([]string, 0)
|
||||
|
||||
for i, sc := range seedCols {
|
||||
var pks []string
|
||||
if sc.PrimaryKey {
|
||||
pks = []string{"{\"constraint_type\": \"not_null\",\"params\": null}"}
|
||||
} else {
|
||||
pks = []string{}
|
||||
}
|
||||
jc := fmt.Sprintf(schemaStr, uint64(i), sc.Name, strings.ToLower(sc.Type.String()), sc.PrimaryKey, pks)
|
||||
jsonCols = append(jsonCols, jc)
|
||||
}
|
||||
|
||||
statement = append(statement, strings.Join(jsonCols, ","))
|
||||
statement = append(statement, suffix)
|
||||
return []byte(strings.Join(statement, ""))
|
||||
}
|
||||
|
||||
// TODO: Support autogeneration for a wider variety of types
|
||||
func genSampleCols() []*SeedColumn {
|
||||
return []*SeedColumn{
|
||||
NewSeedColumn("pk", true, types.IntKind, increment),
|
||||
NewSeedColumn("c1", false, types.IntKind, random),
|
||||
NewSeedColumn("c2", false, types.IntKind, increment),
|
||||
NewSeedColumn("c3", false, types.IntKind, random),
|
||||
NewSeedColumn("c4", false, types.IntKind, increment),
|
||||
NewSeedColumn("c5", false, types.IntKind, increment),
|
||||
NewSeedColumn("c6", false, types.StringKind, random),
|
||||
NewSeedColumn("c7", false, types.StringKind, random),
|
||||
NewSeedColumn("c8", false, types.StringKind, random),
|
||||
NewSeedColumn("c9", false, types.StringKind, random),
|
||||
}
|
||||
}
|
||||
+227
@@ -0,0 +1,227 @@
|
||||
opts:
|
||||
seed: 0
|
||||
tests:
|
||||
- name: "primary key types"
|
||||
repos: &repos
|
||||
# - name: mysql # mysqld --port 3308 --local-infile=1
|
||||
# external-server:
|
||||
# name: test
|
||||
# host: 127.0.0.1
|
||||
# user: root
|
||||
# password:
|
||||
# port: 3308
|
||||
- name: dolt_cli
|
||||
# - name: dolt_server
|
||||
# server:
|
||||
# port: 3308
|
||||
# args: [ "--port", "3308" ]
|
||||
|
||||
tables:
|
||||
- name: "int"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "float"
|
||||
schema: |
|
||||
create table xy (
|
||||
x float primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "varchar"
|
||||
schema: |
|
||||
create table xy (
|
||||
x varchar(20) primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "config width"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "2 cols"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "4 cols"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y int,
|
||||
z int,
|
||||
w int
|
||||
);
|
||||
- name: "8 cols"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y int,
|
||||
z int,
|
||||
w int,
|
||||
a int,
|
||||
b int,
|
||||
c int,
|
||||
d int
|
||||
);
|
||||
- name: "pk type"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "int"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "float"
|
||||
schema: |
|
||||
create table xy (
|
||||
x float primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "varchar"
|
||||
schema: |
|
||||
create table xy (
|
||||
x varchar(30) primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "schema types"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "varchar"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int,
|
||||
y varchar(30)
|
||||
);
|
||||
# - name: "geometry"
|
||||
# schema: |
|
||||
# create table xy (
|
||||
# x int primary key,
|
||||
# y geometry
|
||||
# );
|
||||
- name: "datetime"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y date
|
||||
);
|
||||
- name: "secondary indexes"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "no secondary"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30),
|
||||
z varchar(30),
|
||||
w varchar(30)
|
||||
);
|
||||
- name: "one index"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30),
|
||||
z varchar(30),
|
||||
w varchar(30),
|
||||
index x (x)
|
||||
);
|
||||
- name: "two index"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30),
|
||||
z varchar(30),
|
||||
w varchar(30),
|
||||
index x (x),
|
||||
index y (y)
|
||||
);
|
||||
- name: "four index"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30),
|
||||
z varchar(30),
|
||||
w varchar(30),
|
||||
index x (x),
|
||||
index y (y),
|
||||
index z (z),
|
||||
index w (w)
|
||||
);
|
||||
- name: "blobs"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "no blob"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30),
|
||||
z varchar(30),
|
||||
w varchar(30)
|
||||
);
|
||||
- name: "1 blob"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y blob,
|
||||
z varchar(30),
|
||||
w varchar(30)
|
||||
);
|
||||
- name: "2 blobs"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y blob,
|
||||
z blob,
|
||||
w varchar(30)
|
||||
);
|
||||
- name: "row count"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "400k"
|
||||
rows: 400000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "800k"
|
||||
rows: 800000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "1.6mm"
|
||||
rows: 1600000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "3.2mm"
|
||||
rows: 3200000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "sorting"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "shuffled"
|
||||
shuffle: true
|
||||
rows: 2000000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "sorted"
|
||||
shuffle: false
|
||||
rows: 2000000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
+238
@@ -0,0 +1,238 @@
|
||||
opts:
|
||||
seed: 0
|
||||
tests:
|
||||
- name: "pk type"
|
||||
repos: &repos
|
||||
- name: dolt_cli
|
||||
- name: dolt_server
|
||||
server:
|
||||
port: 3308
|
||||
args: [ "--port", "3308" ]
|
||||
- name: mysql
|
||||
external-server:
|
||||
name: test
|
||||
host: 127.0.0.1
|
||||
user: root
|
||||
password: password
|
||||
port: 3309
|
||||
tables:
|
||||
- name: "int"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "float"
|
||||
schema: |
|
||||
create table xy (
|
||||
x float primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "varchar"
|
||||
schema: |
|
||||
create table xy (
|
||||
x varchar(20) primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "config width"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "2 cols"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "8 cols"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y int,
|
||||
z int,
|
||||
w int,
|
||||
a int,
|
||||
b int,
|
||||
c int,
|
||||
d int
|
||||
);
|
||||
- name: "32 cols"
|
||||
schema: |
|
||||
create table xy (
|
||||
a1 int primary key,
|
||||
a2 int, a3 int, a4 int, a5 int, a6 int, a7 int, a8 int, a9 int, a10 int, a11 int,
|
||||
a12 int, a13 int, a14 int, a15 int, a16 int, a17 int, a18 int, a19 int, a20 int, a21 int,
|
||||
a22 int, a23 int, a24 int, a25 int, a26 int, a27 int, a28 int, a29 int, a30 int, a31 int,
|
||||
a32 int
|
||||
);
|
||||
- name: "col type"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "varchar"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int,
|
||||
y varchar(30)
|
||||
);
|
||||
# - name: "geometry"
|
||||
# schema: |
|
||||
# create table xy (
|
||||
# x int primary key,
|
||||
# y geometry
|
||||
# );
|
||||
- name: "datetime"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y date
|
||||
);
|
||||
- name: "secondary index"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "no secondary"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30),
|
||||
z varchar(30),
|
||||
w varchar(30)
|
||||
);
|
||||
- name: "one index"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30),
|
||||
z varchar(30),
|
||||
w varchar(30),
|
||||
index x (x)
|
||||
);
|
||||
- name: "two index"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30),
|
||||
z varchar(30),
|
||||
w varchar(30),
|
||||
index x (x),
|
||||
index y (y)
|
||||
);
|
||||
- name: "four index"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30),
|
||||
z varchar(30),
|
||||
w varchar(30),
|
||||
index x (x),
|
||||
index y (y),
|
||||
index z (z),
|
||||
index w (w)
|
||||
);
|
||||
- name: "blob"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "no blob"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30),
|
||||
z varchar(30),
|
||||
w varchar(30)
|
||||
);
|
||||
- name: "1 blob"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y blob,
|
||||
z varchar(30),
|
||||
w varchar(30)
|
||||
);
|
||||
- name: "2 blobs"
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y blob,
|
||||
z blob,
|
||||
w varchar(30)
|
||||
);
|
||||
- name: "row count"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "400k"
|
||||
rows: 400000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "800k"
|
||||
rows: 800000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "1.6mm"
|
||||
rows: 1600000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "sorting"
|
||||
repos: *repos
|
||||
tables:
|
||||
- name: "shuffled 1mm"
|
||||
shuffle: true
|
||||
rows: 1000000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "sorted 1mm"
|
||||
shuffle: false
|
||||
rows: 1000000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "batching"
|
||||
repos:
|
||||
- name: dolt_server
|
||||
server:
|
||||
port: 3308
|
||||
args: [ "--port", "3308" ]
|
||||
- name: mysql
|
||||
external-server:
|
||||
name: test
|
||||
host: 127.0.0.1
|
||||
user: root
|
||||
password: password
|
||||
port: 3309
|
||||
tables:
|
||||
- name: "LOAD DATA"
|
||||
fmt: "csv"
|
||||
rows: 10000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "batch sql"
|
||||
fmt: "sql"
|
||||
rows: 10000
|
||||
batch: true
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "by line sql"
|
||||
fmt: "sql"
|
||||
rows: 10000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
@@ -0,0 +1,18 @@
|
||||
opts:
|
||||
seed: 0
|
||||
tests:
|
||||
- name: "row count"
|
||||
repos:
|
||||
- name: repo1
|
||||
server:
|
||||
port: 3308
|
||||
args: [ "--port", "3308" ]
|
||||
tables:
|
||||
- name: "400k"
|
||||
fmt: "csv"
|
||||
rows: 40000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
@@ -0,0 +1,21 @@
|
||||
opts:
|
||||
seed: 0
|
||||
tests:
|
||||
- name: "row count"
|
||||
repos:
|
||||
- name: mysql
|
||||
external-server:
|
||||
name: test
|
||||
host: 127.0.0.1
|
||||
user: root
|
||||
password: password
|
||||
port: 4306
|
||||
tables:
|
||||
- name: "400k"
|
||||
fmt: "csv"
|
||||
rows: 40000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
@@ -0,0 +1,26 @@
|
||||
opts:
|
||||
seed: 0
|
||||
tests:
|
||||
- name: "sorting"
|
||||
repos:
|
||||
- name: repo1
|
||||
server:
|
||||
port: 3308
|
||||
args: [ "--port", "3308" ]
|
||||
tables:
|
||||
- name: "shuffle"
|
||||
shuffle: true
|
||||
rows: 100000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "sorted"
|
||||
shuffle: false
|
||||
rows: 100000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
@@ -0,0 +1,31 @@
|
||||
opts:
|
||||
seed: 0
|
||||
tests:
|
||||
- name: "row count"
|
||||
repos: &repos
|
||||
- name: repo1
|
||||
tables:
|
||||
- name: "400k"
|
||||
fmt: "csv"
|
||||
rows: 400000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "800k"
|
||||
fmt: "csv"
|
||||
rows: 800000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "1.6mm"
|
||||
fmt: "csv"
|
||||
rows: 1600000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
@@ -0,0 +1,42 @@
|
||||
opts:
|
||||
seed: 0
|
||||
tests:
|
||||
- name: "sql"
|
||||
repos:
|
||||
- name: dolt
|
||||
server:
|
||||
port: 3308
|
||||
args: [ "--port", "3308" ]
|
||||
- name: mysql # mysqld --port 3308 --local-infile=1 --socket=/tmp/mysqld2.sock
|
||||
external-server:
|
||||
name: test
|
||||
host: 127.0.0.1
|
||||
user: root
|
||||
password:
|
||||
port: 3309
|
||||
tables:
|
||||
- name: "LOAD DATA"
|
||||
fmt: "csv"
|
||||
rows: 10000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "batch sql"
|
||||
fmt: "sql"
|
||||
rows: 10000
|
||||
batch: true
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
- name: "by line sql"
|
||||
fmt: "sql"
|
||||
rows: 10000
|
||||
schema: |
|
||||
create table xy (
|
||||
x int primary key,
|
||||
y varchar(30)
|
||||
);
|
||||
@@ -0,0 +1,700 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package import_benchmarker
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/cespare/xxhash"
|
||||
"github.com/creasty/defaults"
|
||||
sql2 "github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/vitess/go/sqltypes"
|
||||
ast "github.com/dolthub/vitess/go/vt/sqlparser"
|
||||
"github.com/stretchr/testify/require"
|
||||
yaml "gopkg.in/yaml.v3"
|
||||
|
||||
driver "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils/sql_server_driver"
|
||||
)
|
||||
|
||||
const defaultBatchSize = 500
|
||||
|
||||
// TestDef is the top-level definition of tests to run.
|
||||
type TestDef struct {
|
||||
Tests []ImportTest `yaml:"tests"`
|
||||
Opts *Opts `yaml:"opts"`
|
||||
}
|
||||
|
||||
type Opts struct {
|
||||
Seed int `yaml:"seed"`
|
||||
}
|
||||
|
||||
// ImportTest is a single test to run. The Repos and MultiRepos will be created, and
|
||||
// any Servers defined within them will be started. The interactions and
|
||||
// assertions defined in Conns will be run.
|
||||
type ImportTest struct {
|
||||
Name string `yaml:"name"`
|
||||
Repos []driver.TestRepo `yaml:"repos"`
|
||||
Tables []Table `yaml:"tables"`
|
||||
|
||||
// Skip the entire test with this reason.
|
||||
Skip string `yaml:"skip"`
|
||||
|
||||
Results *ImportResults
|
||||
files map[uint64]*os.File
|
||||
tmpdir string
|
||||
}
|
||||
|
||||
type Table struct {
|
||||
Name string `yaml:"name"`
|
||||
Schema string `yaml:"schema"`
|
||||
Rows int `default:"200000" yaml:"rows"`
|
||||
Fmt string `default:"csv" yaml:"fmt"`
|
||||
Shuffle bool `default:"false" yaml:"shuffle"`
|
||||
Batch bool `default:"false" yaml:"batch"`
|
||||
TargetTable string
|
||||
}
|
||||
|
||||
func (s *Table) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
defaults.Set(s)
|
||||
|
||||
type plain Table
|
||||
if err := unmarshal((*plain)(s)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func ParseTestsFile(path string) (TestDef, error) {
|
||||
contents, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return TestDef{}, err
|
||||
}
|
||||
dec := yaml.NewDecoder(bytes.NewReader(contents))
|
||||
dec.KnownFields(true)
|
||||
var res TestDef
|
||||
err = dec.Decode(&res)
|
||||
return res, err
|
||||
}
|
||||
|
||||
func MakeRepo(rs driver.RepoStore, r driver.TestRepo) (driver.Repo, error) {
|
||||
repo, err := rs.MakeRepo(r.Name)
|
||||
if err != nil {
|
||||
return driver.Repo{}, err
|
||||
}
|
||||
return repo, nil
|
||||
}
|
||||
|
||||
func MakeServer(dc driver.DoltCmdable, s *driver.Server) (*driver.SqlServer, error) {
|
||||
if s == nil {
|
||||
return nil, nil
|
||||
}
|
||||
opts := []driver.SqlServerOpt{driver.WithArgs(s.Args...)}
|
||||
if s.Port != 0 {
|
||||
opts = append(opts, driver.WithPort(s.Port))
|
||||
}
|
||||
server, err := driver.StartSqlServer(dc, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return server, nil
|
||||
}
|
||||
|
||||
type ImportResult struct {
|
||||
detail string
|
||||
server string
|
||||
test string
|
||||
time float64
|
||||
rows int
|
||||
fmt string
|
||||
sorted bool
|
||||
batch bool
|
||||
}
|
||||
|
||||
func (r ImportResult) String() string {
|
||||
return fmt.Sprintf("- %s/%s/%s: %.2fs\n", r.test, r.server, r.detail, r.time)
|
||||
}
|
||||
|
||||
type ImportResults struct {
|
||||
res []ImportResult
|
||||
}
|
||||
|
||||
func (r *ImportResults) append(ir ImportResult) {
|
||||
r.res = append(r.res, ir)
|
||||
}
|
||||
|
||||
func (r *ImportResults) String() string {
|
||||
b := strings.Builder{}
|
||||
b.WriteString("Results:\n")
|
||||
for _, x := range r.res {
|
||||
b.WriteString(x.String())
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (r *ImportResults) SqlDump() string {
|
||||
b := strings.Builder{}
|
||||
b.WriteString(`CREATE TABLE IF NOT EXISTS import_perf_results (
|
||||
test_name varchar(64),
|
||||
server varchar(64),
|
||||
detail varchar(64),
|
||||
row_cnt int,
|
||||
time double,
|
||||
file_format varchar(8),
|
||||
sorted bool,
|
||||
batch bool,
|
||||
primary key (test_name, detail, server)
|
||||
);
|
||||
`)
|
||||
|
||||
b.WriteString("insert into import_perf_results values\n")
|
||||
for i, r := range r.res {
|
||||
if i > 0 {
|
||||
b.WriteString(",\n ")
|
||||
}
|
||||
var sorted int
|
||||
if r.sorted {
|
||||
sorted = 1
|
||||
}
|
||||
var batch int
|
||||
if r.batch {
|
||||
batch = 1
|
||||
}
|
||||
b.WriteString(fmt.Sprintf(
|
||||
"('%s', '%s', '%s', %d, %.2f, '%s', %b, %b)",
|
||||
r.test, r.server, r.detail, r.rows, r.time, r.fmt, sorted, batch))
|
||||
}
|
||||
b.WriteString(";\n")
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (test *ImportTest) InitWithTmpDir(s string) {
|
||||
test.tmpdir = s
|
||||
test.files = make(map[uint64]*os.File)
|
||||
}
|
||||
|
||||
// Run executes an import configuration. Test parallelism makes
|
||||
// runtimes resulting from this method unsuitable for reporting.
|
||||
func (test *ImportTest) Run(t *testing.T) {
|
||||
if test.Skip != "" {
|
||||
t.Skip(test.Skip)
|
||||
}
|
||||
var err error
|
||||
if test.Results == nil {
|
||||
test.Results = new(ImportResults)
|
||||
tmp, err := os.MkdirTemp("", "repo-store-")
|
||||
if err != nil {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
test.InitWithTmpDir(tmp)
|
||||
}
|
||||
|
||||
u, err := driver.NewDoltUser()
|
||||
for _, r := range test.Repos {
|
||||
if r.ExternalServer != nil {
|
||||
err := test.RunExternalServerTests(r.Name, r.ExternalServer)
|
||||
require.NoError(t, err)
|
||||
} else if r.Server != nil {
|
||||
err = test.RunSqlServerTests(r, u)
|
||||
require.NoError(t, err)
|
||||
} else {
|
||||
err = test.RunCliTests(r, u)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
}
|
||||
fmt.Println(test.Results.String())
|
||||
}
|
||||
|
||||
// RunExternalServerTests connects to a single externally provided server to run every test
|
||||
func (test *ImportTest) RunExternalServerTests(repoName string, s *driver.ExternalServer) error {
|
||||
return test.IterImportTables(test.Tables, func(tab Table, f *os.File) error {
|
||||
db, err := driver.ConnectDB(s.User, s.Password, s.Name, s.Host, s.Port, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer db.Close()
|
||||
switch tab.Fmt {
|
||||
case "csv":
|
||||
return test.benchLoadData(repoName, db, tab, f)
|
||||
case "sql":
|
||||
return test.benchSql(repoName, db, tab, f)
|
||||
default:
|
||||
return fmt.Errorf("unexpected table import format: %s", tab.Fmt)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// RunSqlServerTests creates a new repo and server for every import test.
|
||||
func (test *ImportTest) RunSqlServerTests(repo driver.TestRepo, user driver.DoltUser) error {
|
||||
return test.IterImportTables(test.Tables, func(tab Table, f *os.File) error {
|
||||
//make a new server for every test
|
||||
server, err := newServer(user, repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer server.GracefulStop()
|
||||
|
||||
db, err := server.DB(driver.Connection{User: "root", Pass: ""})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = modifyServerForImport(db)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch tab.Fmt {
|
||||
case "csv":
|
||||
return test.benchLoadData(repo.Name, db, tab, f)
|
||||
case "sql":
|
||||
return test.benchSql(repo.Name, db, tab, f)
|
||||
default:
|
||||
return fmt.Errorf("unexpected table import format: %s", tab.Fmt)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func newServer(u driver.DoltUser, r driver.TestRepo) (*driver.SqlServer, error) {
|
||||
rs, err := u.MakeRepoStore()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// start dolt server
|
||||
repo, err := MakeRepo(rs, r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.Server.Args = append(r.Server.Args, "")
|
||||
server, err := MakeServer(repo, r.Server)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if server != nil {
|
||||
server.DBName = r.Name
|
||||
}
|
||||
return server, nil
|
||||
}
|
||||
|
||||
func modifyServerForImport(db *sql.DB) error {
|
||||
_, err := db.Exec("SET GLOBAL local_infile=1 ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (test *ImportTest) benchLoadData(repoName string, db *sql.DB, tab Table, f *os.File) error {
|
||||
ctx := context.Background()
|
||||
conn, err := db.Conn(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
rows, err := conn.QueryContext(ctx, tab.Schema)
|
||||
if err == nil {
|
||||
rows.Close()
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
|
||||
q := fmt.Sprintf(`
|
||||
LOAD DATA LOCAL INFILE '%s' INTO TABLE xy
|
||||
FIELDS TERMINATED BY ',' ENCLOSED BY ''
|
||||
LINES TERMINATED BY '\n'
|
||||
IGNORE 1 LINES;`, f.Name())
|
||||
|
||||
rows, err = conn.QueryContext(ctx, q)
|
||||
if err == nil {
|
||||
rows.Close()
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
|
||||
runtime := time.Since(start)
|
||||
|
||||
test.Results.append(ImportResult{
|
||||
test: test.Name,
|
||||
server: repoName,
|
||||
detail: tab.Name,
|
||||
time: runtime.Seconds(),
|
||||
rows: tab.Rows,
|
||||
fmt: tab.Fmt,
|
||||
sorted: !tab.Shuffle,
|
||||
batch: tab.Batch,
|
||||
})
|
||||
|
||||
rows, err = conn.QueryContext(
|
||||
ctx,
|
||||
fmt.Sprintf("drop table %s;", tab.TargetTable),
|
||||
)
|
||||
if err == nil {
|
||||
rows.Close()
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (test *ImportTest) benchSql(repoName string, db *sql.DB, tab Table, f *os.File) error {
|
||||
ctx := context.Background()
|
||||
conn, err := db.Conn(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
rows, err := conn.QueryContext(ctx, tab.Schema)
|
||||
if err == nil {
|
||||
rows.Close()
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
|
||||
defer conn.ExecContext(
|
||||
ctx,
|
||||
fmt.Sprintf("drop table %s;", tab.TargetTable),
|
||||
)
|
||||
|
||||
f.Seek(0, 0)
|
||||
s := bufio.NewScanner(f)
|
||||
s.Split(ScanQueries)
|
||||
start := time.Now()
|
||||
|
||||
for lineno := 1; s.Scan(); lineno++ {
|
||||
line := s.Text()
|
||||
var br bool
|
||||
switch {
|
||||
case line == "":
|
||||
return fmt.Errorf("unexpected blank line, line number: %d", lineno)
|
||||
case line == "\n":
|
||||
br = true
|
||||
default:
|
||||
}
|
||||
if br {
|
||||
break
|
||||
}
|
||||
|
||||
if err := s.Err(); err != nil {
|
||||
return fmt.Errorf("%s:%d: %v", f.Name(), lineno, err)
|
||||
}
|
||||
|
||||
_, err := conn.ExecContext(ctx, line)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
runtime := time.Since(start)
|
||||
|
||||
test.Results.append(ImportResult{
|
||||
test: test.Name,
|
||||
server: repoName,
|
||||
detail: tab.Name,
|
||||
time: runtime.Seconds(),
|
||||
rows: tab.Rows,
|
||||
fmt: tab.Fmt,
|
||||
sorted: !tab.Shuffle,
|
||||
batch: tab.Batch,
|
||||
})
|
||||
|
||||
if err == nil {
|
||||
rows.Close()
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func ScanQueries(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
if atEOF && len(data) == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
if i := bytes.IndexByte(data, ';'); i >= 0 {
|
||||
// We have a full newline-terminated line.
|
||||
return i + 1, dropCR(data[0:i]), nil
|
||||
}
|
||||
// If we're at EOF, we have a final, non-terminated line. Return it.
|
||||
if atEOF {
|
||||
return len(data), dropCR(data), nil
|
||||
}
|
||||
// Request more data.
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
func dropCR(data []byte) []byte {
|
||||
if len(data) > 0 && data[len(data)-1] == '\r' {
|
||||
return data[0 : len(data)-1]
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
// RunCliTests runs each import test on a new dolt repo to avoid accumulated
|
||||
// startup costs over time between tests.
|
||||
func (test *ImportTest) RunCliTests(r driver.TestRepo, user driver.DoltUser) error {
|
||||
return test.IterImportTables(test.Tables, func(tab Table, f *os.File) error {
|
||||
var err error
|
||||
|
||||
rs, err := user.MakeRepoStore()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
repo, err := MakeRepo(rs, r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = repo.DoltExec("sql", "-q", tab.Schema)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// start timer
|
||||
start := time.Now()
|
||||
|
||||
cmd := repo.DoltCmd("table", "import", "-r", "--file-type", tab.Fmt, tab.TargetTable, f.Name())
|
||||
_, err = cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cmd.Stderr = cmd.Stdout
|
||||
err = cmd.Run()
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: %s", err, cmd.Stderr)
|
||||
}
|
||||
|
||||
// end timer, append result
|
||||
runtime := time.Since(start)
|
||||
|
||||
test.Results.append(ImportResult{
|
||||
test: test.Name,
|
||||
server: r.Name,
|
||||
detail: tab.Name,
|
||||
time: runtime.Seconds(),
|
||||
rows: tab.Rows,
|
||||
fmt: tab.Fmt,
|
||||
sorted: !tab.Shuffle,
|
||||
batch: tab.Batch,
|
||||
})
|
||||
|
||||
// reset repo at end
|
||||
return repo.DoltExec("sql", "-q", fmt.Sprintf("drop table %s", tab.TargetTable))
|
||||
})
|
||||
}
|
||||
|
||||
func (test *ImportTest) IterImportTables(tables []Table, cb func(t Table, f *os.File) error) error {
|
||||
for _, t := range tables {
|
||||
key, err := tableKey(t)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
table, names, types := parseTableAndSchema(t.Schema)
|
||||
t.TargetTable = table
|
||||
|
||||
if f, ok := test.files[key]; ok {
|
||||
// short circuit if we've already made file for schema/row count
|
||||
err = cb(t, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
rows := make([]string, 0, t.Rows)
|
||||
genRows(types, t.Rows, t.Fmt, func(r []string) {
|
||||
switch t.Fmt {
|
||||
case "csv":
|
||||
rows = append(rows, strings.Join(r, ","))
|
||||
case "sql":
|
||||
rows = append(rows, fmt.Sprintf("(%s)", strings.Join(r, ", ")))
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown format: %s", t.Fmt))
|
||||
}
|
||||
})
|
||||
|
||||
if t.Shuffle {
|
||||
rand.Shuffle(len(rows), func(i, j int) { rows[i], rows[j] = rows[j], rows[i] })
|
||||
}
|
||||
|
||||
f, err := os.CreateTemp(test.tmpdir, "import-data-")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch t.Fmt {
|
||||
case "csv":
|
||||
fmt.Fprintf(f, "%s\n", strings.Join(names, ","))
|
||||
for _, r := range rows {
|
||||
fmt.Fprintf(f, "%s\n", r)
|
||||
}
|
||||
case "sql":
|
||||
if t.Batch {
|
||||
batchSize := defaultBatchSize
|
||||
var i int
|
||||
for i+batchSize < len(rows) {
|
||||
fmt.Fprintf(f, newBatch(t.TargetTable, rows[i:i+batchSize]))
|
||||
i += batchSize
|
||||
}
|
||||
if i < len(rows) {
|
||||
fmt.Fprintf(f, newBatch(t.TargetTable, rows[i:]))
|
||||
}
|
||||
} else {
|
||||
for _, r := range rows {
|
||||
fmt.Fprintf(f, fmt.Sprintf("INSERT INTO %s VALUES %s;\n", t.TargetTable, r))
|
||||
}
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown format: %s", t.Fmt))
|
||||
}
|
||||
|
||||
// cache file for schema and row count
|
||||
test.files[key] = f
|
||||
|
||||
err = cb(t, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func newBatch(name string, rows []string) string {
|
||||
b := strings.Builder{}
|
||||
b.WriteString(fmt.Sprintf("INSERT INTO %s VALUES\n", name))
|
||||
for _, r := range rows[:len(rows)-1] {
|
||||
b.WriteString(" ")
|
||||
b.WriteString(r)
|
||||
b.WriteString(",\n")
|
||||
}
|
||||
b.WriteString(" ")
|
||||
b.WriteString(rows[len(rows)-1])
|
||||
b.WriteString(";\n")
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func tableKey(t Table) (uint64, error) {
|
||||
hash := xxhash.New()
|
||||
_, err := hash.Write([]byte(t.Schema))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if _, err := hash.Write([]byte(fmt.Sprintf("%#v,", t.Rows))); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
_, err = hash.Write([]byte(t.Fmt))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return hash.Sum64(), nil
|
||||
}
|
||||
|
||||
func parseTableAndSchema(q string) (string, []string, []sql2.Type) {
|
||||
stmt, _, err := ast.ParseOne(q)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("invalid query: %s; %s", q, err))
|
||||
}
|
||||
var types []sql2.Type
|
||||
var names []string
|
||||
var table string
|
||||
switch n := stmt.(type) {
|
||||
case *ast.DDL:
|
||||
table = n.Table.String()
|
||||
for _, col := range n.TableSpec.Columns {
|
||||
names = append(names, col.Name.String())
|
||||
typ, err := sql2.ColumnTypeToType(&col.Type)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("unexpected error reading type: %s", err))
|
||||
}
|
||||
types = append(types, typ)
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("expected CREATE TABLE, found: %s", q))
|
||||
}
|
||||
return table, names, types
|
||||
}
|
||||
|
||||
func genRows(types []sql2.Type, n int, fmt string, cb func(r []string)) {
|
||||
// generate |n| rows with column types
|
||||
for i := 0; i < n; i++ {
|
||||
row := make([]string, len(types))
|
||||
for j, t := range types {
|
||||
switch fmt {
|
||||
case "sql":
|
||||
switch t.Type() {
|
||||
case sqltypes.Blob, sqltypes.VarChar, sqltypes.Timestamp, sqltypes.Date:
|
||||
row[j] = "'" + genValue(i, t) + "'"
|
||||
default:
|
||||
row[j] = genValue(i, t)
|
||||
}
|
||||
default:
|
||||
row[j] = genValue(i, t)
|
||||
}
|
||||
}
|
||||
cb(row)
|
||||
}
|
||||
}
|
||||
|
||||
func genValue(i int, typ sql2.Type) string {
|
||||
switch typ.Type() {
|
||||
case sqltypes.Blob:
|
||||
return fmt.Sprintf("blob %d", i)
|
||||
case sqltypes.VarChar:
|
||||
return fmt.Sprintf("varchar %d", i)
|
||||
case sqltypes.Int8, sqltypes.Int16, sqltypes.Int32, sqltypes.Int64:
|
||||
return strconv.Itoa(i)
|
||||
case sqltypes.Float32, sqltypes.Float64:
|
||||
return strconv.FormatFloat(float64(i), 'E', -1, 32)
|
||||
case sqltypes.Bit:
|
||||
return strconv.Itoa(i)
|
||||
case sqltypes.Geometry:
|
||||
return `{"type": "Point", "coordinates": [1,2]}`
|
||||
case sqltypes.Timestamp:
|
||||
return "2019-12-31T12:00:00Z"
|
||||
case sqltypes.Date:
|
||||
return "2019-12-31T00:00:00Z"
|
||||
default:
|
||||
panic(fmt.Sprintf("expected type, found: %s", typ))
|
||||
}
|
||||
}
|
||||
|
||||
func RunTestsFile(t *testing.T, path string) {
|
||||
def, err := ParseTestsFile(path)
|
||||
require.NoError(t, err)
|
||||
for _, test := range def.Tests {
|
||||
t.Run(test.Name, test.Run)
|
||||
}
|
||||
}
|
||||
@@ -3,13 +3,17 @@ module github.com/dolthub/dolt/integration-tests/go-sql-server-driver
|
||||
go 1.19
|
||||
|
||||
require (
|
||||
github.com/go-sql-driver/mysql v1.6.0
|
||||
github.com/dolthub/dolt/go v0.40.4
|
||||
github.com/stretchr/testify v1.8.0
|
||||
golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/creasty/defaults v1.6.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/go-sql-driver/mysql v1.6.0 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
golang.org/x/sys v0.1.0 // indirect
|
||||
)
|
||||
|
||||
replace github.com/dolthub/dolt/go => ../../go/
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
github.com/creasty/defaults v1.6.0 h1:ltuE9cfphUtlrBeomuu8PEyISTXnxqkBIoQfXgv7BSc=
|
||||
github.com/creasty/defaults v1.6.0/go.mod h1:iGzKe6pbEHnpMPtfDXZEr0NVxWnPTjb1bbDy08fPzYM=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
@@ -10,8 +12,8 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec h1:BkDtF2Ih9xZ7le9ndzTA7KJow28VbQW3odyk/8drmuI=
|
||||
golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
|
||||
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
||||
@@ -17,20 +17,17 @@ package main
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"database/sql"
|
||||
driver "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils/sql_server_driver"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"gopkg.in/yaml.v3"
|
||||
yaml "gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// TestDef is the top-level definition of tests to run.
|
||||
type TestDef struct {
|
||||
Tests []Test `yaml:"tests"`
|
||||
}
|
||||
@@ -39,188 +36,15 @@ type TestDef struct {
|
||||
// any Servers defined within them will be started. The interactions and
|
||||
// assertions defined in Conns will be run.
|
||||
type Test struct {
|
||||
Name string `yaml:"name"`
|
||||
Repos []TestRepo `yaml:"repos"`
|
||||
MultiRepos []MultiRepo `yaml:"multi_repos"`
|
||||
Conns []Connection `yaml:"connections"`
|
||||
Name string `yaml:"name"`
|
||||
Repos []driver.TestRepo `yaml:"repos"`
|
||||
MultiRepos []driver.MultiRepo `yaml:"multi_repos"`
|
||||
Conns []driver.Connection `yaml:"connections"`
|
||||
|
||||
// Skip the entire test with this reason.
|
||||
Skip string `yaml:"skip"`
|
||||
}
|
||||
|
||||
// |Connection| represents a single connection to a sql-server instance defined
|
||||
// in the test. The connection will be established and every |Query| in
|
||||
// |Queries| will be run against it. At the end, the connection will be torn down.
|
||||
// If |RestartServer| is non-nil, the server which the connection targets will
|
||||
// be restarted after the connection is terminated.
|
||||
type Connection struct {
|
||||
On string `yaml:"on"`
|
||||
Queries []Query `yaml:"queries"`
|
||||
RestartServer *RestartArgs `yaml:"restart_server"`
|
||||
|
||||
// Rarely needed, allows the entire connection assertion to be retried
|
||||
// on an assertion failure. Use this is only for idempotent connection
|
||||
// interactions and only if the sql-server is prone to tear down the
|
||||
// connection based on things that are happening, such as cluster role
|
||||
// transitions.
|
||||
RetryAttempts int `yaml:"retry_attempts"`
|
||||
|
||||
// The user to connect as.
|
||||
User string `yaml:"user"`
|
||||
// The password to connect with.
|
||||
Pass string `yaml:"password"`
|
||||
PassFile string `yaml:"password_file"`
|
||||
// Any driver params to pass in the DSN.
|
||||
DriverParams map[string]string `yaml:"driver_params"`
|
||||
}
|
||||
|
||||
func (c Connection) Password() (string, error) {
|
||||
if c.PassFile != "" {
|
||||
bs, err := os.ReadFile(c.PassFile)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strings.TrimSpace(string(bs)), nil
|
||||
}
|
||||
return c.Pass, nil
|
||||
}
|
||||
|
||||
// |RestartArgs| are possible arguments, to change the arguments which are
|
||||
// provided to the sql-server process when it is restarted. This is used, for
|
||||
// example, to change server config on a restart.
|
||||
type RestartArgs struct {
|
||||
Args *[]string `yaml:"args"`
|
||||
}
|
||||
|
||||
// |TestRepo| represents an init'd dolt repository that is available to a
|
||||
// server instance. It can be created with some files and with remotes defined.
|
||||
// |Name| can include path components separated by `/`, which will create the
|
||||
// repository in a subdirectory.
|
||||
type TestRepo struct {
|
||||
Name string `yaml:"name"`
|
||||
WithFiles []WithFile `yaml:"with_files"`
|
||||
WithRemotes []WithRemote `yaml:"with_remotes"`
|
||||
|
||||
// Only valid on Test.Repos, not in Test.MultiRepos.Repos. If set, a
|
||||
// sql-server process will be run against this TestRepo. It will be
|
||||
// available as TestRepo.Name.
|
||||
Server *Server `yaml:"server"`
|
||||
}
|
||||
|
||||
// |MultiRepo| is a subdirectory where many |TestRepo|s can be defined. You can
|
||||
// start a sql-server on a |MultiRepo|, in which case there will be no default
|
||||
// database to connect to.
|
||||
type MultiRepo struct {
|
||||
Name string `yaml:"name"`
|
||||
Repos []TestRepo `yaml:"repos"`
|
||||
WithFiles []WithFile `yaml:"with_files"`
|
||||
|
||||
// If set, a sql-server process will be run against this TestRepo. It
|
||||
// will be available as MultiRepo.Name.
|
||||
Server *Server `yaml:"server"`
|
||||
}
|
||||
|
||||
// |WithRemote| defines remotes which should be defined on the repository
|
||||
// before the sql-server is started.
|
||||
type WithRemote struct {
|
||||
Name string `yaml:"name"`
|
||||
URL string `yaml:"url"`
|
||||
}
|
||||
|
||||
// |WithFile| defines a file and its contents to be created in a |Repo| or
|
||||
// |MultiRepo| before the servers are started.
|
||||
type WithFile struct {
|
||||
Name string `yaml:"name"`
|
||||
|
||||
// The contents of the file, provided inline in the YAML.
|
||||
Contents string `yaml:"contents"`
|
||||
|
||||
// A source file path to copy to |Name|. Mutually exclusive with
|
||||
// Contents.
|
||||
SourcePath string `yaml:"source_path"`
|
||||
}
|
||||
|
||||
// |Server| defines a sql-server process to start. |Name| must match the
|
||||
// top-level |Name| of a |TestRepo| or |MultiRepo|.
|
||||
type Server struct {
|
||||
Name string `yaml:"name"`
|
||||
Args []string `yaml:"args"`
|
||||
|
||||
// The |Port| which the server will be running on. For now, it is up to
|
||||
// the |Args| to make sure this is true. Defaults to 3308.
|
||||
Port int `yaml:"port"`
|
||||
|
||||
// Assertions to be run against the log output of the server process
|
||||
// after the server process successfully terminates.
|
||||
LogMatches []string `yaml:"log_matches"`
|
||||
|
||||
// Assertions to be run against the log output of the server process
|
||||
// after the server process exits with an error. If |ErrorMatches| is
|
||||
// defined, then the server process must exit with a non-0 exit code
|
||||
// after it is launched. This will be asserted before any |Connections|
|
||||
// interactions are performed.
|
||||
ErrorMatches []string `yaml:"error_matches"`
|
||||
}
|
||||
|
||||
// The primary interaction of a |Connection|. Either |Query| or |Exec| should
|
||||
// be set, not both.
|
||||
type Query struct {
|
||||
// Run a query against the connection.
|
||||
Query string `yaml:"query"`
|
||||
|
||||
// Run a command against the connection.
|
||||
Exec string `yaml:"exec"`
|
||||
|
||||
// Args to be passed as query parameters to either Query or Exec.
|
||||
Args []string `yaml:"args"`
|
||||
|
||||
// This can only be non-empty for a |Query|. Asserts the results of the
|
||||
// |Query|.
|
||||
Result QueryResult `yaml:"result"`
|
||||
|
||||
// If this is non-empty, asserts the the |Query| or the |Exec|
|
||||
// generates an error that matches this string.
|
||||
ErrorMatch string `yaml:"error_match"`
|
||||
|
||||
// If this is non-zero, it represents the number of times to try the
|
||||
// |Query| or the |Exec| and to check its assertions before we fail the
|
||||
// test as a result of failed assertions. When interacting with queries
|
||||
// that introspect things like replication state, this can be used to
|
||||
// wait for quiescence in an inherently racey process. Interactions
|
||||
// will be delayed slightly between each failure.
|
||||
RetryAttempts int `yaml:"retry_attempts"`
|
||||
}
|
||||
|
||||
// |QueryResult| specifies assertions on the results of a |Query|. Columns must
|
||||
// be specified for a |Query| and the query results must fully match. If Rows
|
||||
// are ommited, anything is allowed as long as all rows are read successfully.
|
||||
// All assertions here are string equality.
|
||||
type QueryResult struct {
|
||||
Columns []string `yaml:"columns"`
|
||||
Rows ResultRows `yaml:"rows"`
|
||||
}
|
||||
|
||||
type ResultRows struct {
|
||||
Or *[][][]string
|
||||
}
|
||||
|
||||
func (r *ResultRows) UnmarshalYAML(value *yaml.Node) error {
|
||||
if value.Kind == yaml.SequenceNode {
|
||||
res := make([][][]string, 1)
|
||||
r.Or = &res
|
||||
return value.Decode(&(*r.Or)[0])
|
||||
}
|
||||
var or struct {
|
||||
Or *[][][]string `yaml:"or"`
|
||||
}
|
||||
err := value.Decode(&or)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.Or = or.Or
|
||||
return nil
|
||||
}
|
||||
|
||||
func ParseTestsFile(path string) (TestDef, error) {
|
||||
contents, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
@@ -233,35 +57,11 @@ func ParseTestsFile(path string) (TestDef, error) {
|
||||
return res, err
|
||||
}
|
||||
|
||||
func (f WithFile) WriteAtDir(dir string) error {
|
||||
path := filepath.Join(dir, f.Name)
|
||||
d := filepath.Dir(path)
|
||||
err := os.MkdirAll(d, 0750)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if f.SourcePath != "" {
|
||||
source, err := os.Open(f.SourcePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer source.Close()
|
||||
dest, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0550)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = io.Copy(dest, source)
|
||||
return err
|
||||
} else {
|
||||
return os.WriteFile(path, []byte(f.Contents), 0550)
|
||||
}
|
||||
}
|
||||
|
||||
func MakeRepo(t *testing.T, rs RepoStore, r TestRepo) Repo {
|
||||
func MakeRepo(t *testing.T, rs driver.RepoStore, r driver.TestRepo) driver.Repo {
|
||||
repo, err := rs.MakeRepo(r.Name)
|
||||
require.NoError(t, err)
|
||||
for _, f := range r.WithFiles {
|
||||
require.NoError(t, f.WriteAtDir(repo.dir))
|
||||
require.NoError(t, f.WriteAtDir(repo.Dir))
|
||||
}
|
||||
for _, remote := range r.WithRemotes {
|
||||
require.NoError(t, repo.CreateRemote(remote.Name, remote.URL))
|
||||
@@ -269,15 +69,15 @@ func MakeRepo(t *testing.T, rs RepoStore, r TestRepo) Repo {
|
||||
return repo
|
||||
}
|
||||
|
||||
func MakeServer(t *testing.T, dc DoltCmdable, s *Server) *SqlServer {
|
||||
func MakeServer(t *testing.T, dc driver.DoltCmdable, s *driver.Server) *driver.SqlServer {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
opts := []SqlServerOpt{WithArgs(s.Args...)}
|
||||
opts := []driver.SqlServerOpt{driver.WithArgs(s.Args...)}
|
||||
if s.Port != 0 {
|
||||
opts = append(opts, WithPort(s.Port))
|
||||
opts = append(opts, driver.WithPort(s.Port))
|
||||
}
|
||||
server, err := StartSqlServer(dc, opts...)
|
||||
server, err := driver.StartSqlServer(dc, opts...)
|
||||
require.NoError(t, err)
|
||||
if len(s.ErrorMatches) > 0 {
|
||||
err := server.ErrorStop()
|
||||
@@ -309,12 +109,12 @@ func (test Test) Run(t *testing.T) {
|
||||
t.Skip(test.Skip)
|
||||
}
|
||||
|
||||
u, err := NewDoltUser()
|
||||
u, err := driver.NewDoltUser()
|
||||
require.NoError(t, err)
|
||||
rs, err := u.MakeRepoStore()
|
||||
require.NoError(t, err)
|
||||
|
||||
servers := make(map[string]*SqlServer)
|
||||
servers := make(map[string]*driver.SqlServer)
|
||||
|
||||
for _, r := range test.Repos {
|
||||
repo := MakeRepo(t, rs, r)
|
||||
@@ -327,7 +127,7 @@ func (test Test) Run(t *testing.T) {
|
||||
}
|
||||
for _, mr := range test.MultiRepos {
|
||||
// Each MultiRepo gets its own dolt config --global.
|
||||
u, err := NewDoltUser()
|
||||
u, err := driver.NewDoltUser()
|
||||
require.NoError(t, err)
|
||||
rs, err = u.MakeRepoStore()
|
||||
require.NoError(t, err)
|
||||
@@ -335,7 +135,7 @@ func (test Test) Run(t *testing.T) {
|
||||
MakeRepo(t, rs, r)
|
||||
}
|
||||
for _, f := range mr.WithFiles {
|
||||
require.NoError(t, f.WriteAtDir(rs.dir))
|
||||
require.NoError(t, f.WriteAtDir(rs.Dir))
|
||||
}
|
||||
|
||||
server := MakeServer(t, rs, mr.Server)
|
||||
@@ -416,7 +216,7 @@ func (r *retryTestingT) try(attempts int, test func(require.TestingT)) {
|
||||
r.errorfArgs = nil
|
||||
r.failNow = false
|
||||
if i != 0 {
|
||||
time.Sleep(RetrySleepDuration)
|
||||
time.Sleep(driver.RetrySleepDuration)
|
||||
}
|
||||
func() {
|
||||
defer func() {
|
||||
@@ -449,13 +249,13 @@ func RetryTestRun(t *testing.T, attempts int, test func(require.TestingT)) {
|
||||
rtt.try(attempts, test)
|
||||
}
|
||||
|
||||
func RunQuery(t *testing.T, conn *sql.Conn, q Query) {
|
||||
func RunQuery(t *testing.T, conn *sql.Conn, q driver.Query) {
|
||||
RetryTestRun(t, q.RetryAttempts, func(t require.TestingT) {
|
||||
RunQueryAttempt(t, conn, q)
|
||||
})
|
||||
}
|
||||
|
||||
func RunQueryAttempt(t require.TestingT, conn *sql.Conn, q Query) {
|
||||
func RunQueryAttempt(t require.TestingT, conn *sql.Conn, q driver.Query) {
|
||||
args := make([]any, len(q.Args))
|
||||
for i := range q.Args {
|
||||
args[i] = q.Args[i]
|
||||
|
||||
Reference in New Issue
Block a user