Revamp the Import Benchmarker (#3744)

2026-04-21 19:39:04 -05:00 · 2022-07-13 18:27:14 -07:00
parent 35a6eeca6b
commit 3f985c76d1
22 changed files with 1356 additions and 504 deletions
@@ -0,0 +1,80 @@
+#!/bin/sh
+
+set -e
+
+if [ "$#" -lt 8 ]; then
+    echo  "Usage: ./get-dolt-dolt-job-json.sh <jobName> <fromServer> <fromVersion> <toServer> <toVersion> <timePrefix> <actorPrefix> <issueNumber>"
+    exit 1
+fi
+
+jobName="$1"
+fromServer="$2"
+fromVersion="$3"
+toServer="$4"
+toVersion="$5"
+timePrefix="$6"
+actorPrefix="$7"
+issueNumber="$8" # TODO: Use this to paste the results onto the github issue
+
+echo '
+{
+  "apiVersion": "batch/v1",
+  "kind": "Job",
+  "metadata": {
+    "name": "'$jobName'",
+    "namespace": "performance-benchmarking"
+  },
+  "spec": {
+    "backoffLimit": 1,
+    "template": {
+      "spec": {
+        "serviceAccountName": "import-benchmarking",
+        "containers": [
+          {
+            "name": "import-benchmarking",
+            "image": "407903926827.dkr.ecr.us-west-2.amazonaws.com/liquidata/import-benchmarking:latest",
+            "resources": {
+              "limits": {
+                "cpu": "7000m"
+              }
+            },
+            "env": [
+              { "name": "GOMAXPROCS", "value": "7" },
+              { "name": "ACTOR", "value": "'$ACTOR'" },
+              { "name": "ACTOR_EMAIL", "value": "'$ACTOR_EMAIL'" },
+              { "name": "REPO_ACCESS_TOKEN", "value": "'$REPO_ACCESS_TOKEN'" }
+            ],
+            "imagePullPolicy": "Always",
+            "args": [
+              "--from-server='$fromServer'",
+              "--from-version='$fromVersion'",
+              "--to-server='$toServer'",
+              "--to-version='$toVersion'",
+              "--bucket=import-benchmarking-github-actions-results",
+              "--region=us-west-2",
+              "--results-dir='$timePrefix'",
+              "--results-prefix='$actorPrefix'",
+              "--fileNames=100k-sorted.csv",
+              "--fileNames=100k-random.csv",
+              "--fileNames=1m-sorted.csv",
+              "--fileNames=1m-random.csv"
+            ]
+          }
+        ],
+        "restartPolicy": "Never",
+        "nodeSelector": {
+          "performance-benchmarking-worker": "true"
+        },
+        "tolerations": [
+          {
+              "effect": "NoSchedule",
+              "key": "dedicated",
+              "operator": "Equal",
+              "value": "performance-benchmarking-worker"
+          }
+        ]
+      }
+    }
+  }
+}
+'
@@ -0,0 +1,82 @@
+#!/bin/sh
+
+set -e
+
+if [ "$#" -lt 8 ]; then
+    echo  "Usage: ./get-mysql-dolt-job-json.sh <jobName> <fromServer> <fromVersion> <toServer> <toVersion> <timePrefix> <actorPrefix> <issueNumber>"
+    exit 1
+fi
+
+jobName="$1"
+fromServer="$2"
+fromVersion="$3"
+toServer="$4"
+toVersion="$5"
+timePrefix="$6"
+actorPrefix="$7"
+issueNumber="$8" # TODO: Use this to paste the results onto the github issue
+
+echo '
+{
+  "apiVersion": "batch/v1",
+  "kind": "Job",
+  "metadata": {
+    "name": "'$jobName'",
+    "namespace": "performance-benchmarking"
+  },
+  "spec": {
+    "backoffLimit": 1,
+    "template": {
+      "spec": {
+        "serviceAccountName": "import-benchmarking",
+        "containers": [
+          {
+            "name": "import-benchmarking",
+            "image": "407903926827.dkr.ecr.us-west-2.amazonaws.com/liquidata/import-benchmarking:latest",
+            "resources": {
+              "limits": {
+                "cpu": "7000m"
+              }
+            },
+            "env": [
+              { "name": "GOMAXPROCS", "value": "7" },
+              { "name": "ACTOR", "value": "'$ACTOR'" },
+              { "name": "ACTOR_EMAIL", "value": "'$ACTOR_EMAIL'" },
+              { "name": "REPO_ACCESS_TOKEN", "value": "'$REPO_ACCESS_TOKEN'" }
+            ],
+            "imagePullPolicy": "Always",
+            "args": [
+              "--from-server='$fromServer'",
+              "--from-version='$fromVersion'",
+              "--to-server='$toServer'",
+              "--to-version='$toVersion'",
+              "--bucket=import-benchmarking-github-actions-results",
+              "--region=us-west-2",
+              "--results-dir='$timePrefix'",
+              "--results-prefix='$actorPrefix'",
+              "--mysql-exec=/usr/sbin/mysqld",
+              "--mysql-schema-file=schema.sql",
+              "--fileNames=100k-sorted.csv",
+              "--fileNames=100k-random.csv",
+              "--fileNames=1m-sorted.csv",
+              "--fileNames=1m-random.csv"
+            ]
+          }
+        ],
+        "restartPolicy": "Never",
+        "nodeSelector": {
+          "performance-benchmarking-worker": "true"
+        },
+        "tolerations": [
+          {
+              "effect": "NoSchedule",
+              "key": "dedicated",
+              "operator": "Equal",
+              "value": "performance-benchmarking-worker"
+          }
+        ]
+      }
+    }
+  }
+}
+'
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+set -e
+
+if [ -z "$KUBECONFIG" ]; then
+    echo  "Must set KUBECONFIG"
+    exit 1
+fi
+
+if [ -z "$TEMPLATE_SCRIPT" ]; then
+    echo  "Must set TEMPLATE_SCRIPT"
+    exit 1
+fi
+
+if [ -z "$FROM_SERVER" ] || [ -z "$FROM_VERSION" ] || [ -z "$TO_SERVER" ] || [ -z "$TO_VERSION" ]; then
+    echo  "Must set FROM_SERVER FROM_VERSION TO_SERVER and TO_VERSION"
+    exit 1
+fi
+
+if [ -z "$ACTOR" ]; then
+    echo  "Must set ACTOR"
+    exit 1
+fi
+
+if [ -z "$MODE" ]; then
+    echo  "Must set MODE"
+    exit 1
+fi
+
+echo "Setting from $FROM_SERVER: $FROM_VERSION"
+
+# use first 8 characters of TO_VERSION to differentiate
+# jobs
+short=${TO_VERSION:0:8}
+lowered=$(echo "$ACTOR" | tr '[:upper:]' '[:lower:]')
+actorShort="$lowered-$short"
+
+# random sleep
+sleep 0.$[ ( $RANDOM % 10 )  + 1 ]s
+
+timesuffix=`date +%s%N`
+jobname="$actorShort-$timesuffix"
+
+timeprefix=$(date +%Y/%m/%d)
+
+actorprefix="$MODE/$ACTOR/$actorShort"
+
+# set value to ISSUE_NUMBER environment variable
+# or default to -1
+issuenumber=${ISSUE_NUMBER:-"-1"}
+
+source \
+  "$TEMPLATE_SCRIPT" \
+  "$jobname"         \
+  "$FROM_VERSION"    \
+  "$TO_VERSION"      \
+  "$timeprefix"      \
+  "$actorprefix"     \
+  "$issuenumber"      > job.json
+
+out=$(KUBECONFIG="$KUBECONFIG" kubectl apply -f job.json || true)
+
+if [ "$out" != "job.batch/$jobname created" ]; then
+  echo "something went wrong creating job... this job likely already exists in the cluster"
+  echo "$out"
+  exit 1
+else
+  echo "$out"
+fi
+
+exit 0
@@ -0,0 +1,98 @@
+name: Run Import Benchmark on Pull Requests
+
+on:
+  pull_request:
+    types: [ opened ]
+  issue_comment:
+    types: [ created ]
+
+jobs:
+  validate-commentor:
+    runs-on: ubuntu-18.04
+    outputs:
+      valid: ${{ steps.set_valid.outputs.valid }}
+    steps:
+      - uses: actions/checkout@v2
+      - name: Validate Commentor
+        id: set_valid
+        run: ./.github/scripts/performance-benchmarking/validate-commentor.sh "$ACTOR"
+        env:
+          ACTOR: ${{ github.actor }}
+
+  check-comments:
+    runs-on: ubuntu-18.04
+    needs: validate-commentor
+    if: ${{ needs.validate-commentor.outputs.valid == 'true' }}
+    outputs:
+      benchmark: ${{ steps.set_benchmark.outputs.benchmark }}
+      comment-body: ${{ steps.set_body.outputs.body }}
+    steps:
+      - name: Check for Deploy Trigger
+        uses: dolthub/pull-request-comment-trigger@master
+        id: check
+        with:
+          trigger: '#import-benchmark'
+          reaction: rocket
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Set Benchmark
+        if: ${{ steps.check.outputs.triggered == 'true' }}
+        id: set_benchmark
+        run: |
+          echo "::set-output name=benchmark::true"
+
+  performance:
+    runs-on: ubuntu-18.04
+    needs: [validate-commentor, check-comments]
+    if: ${{ needs.check-comments.outputs.benchmark == 'true' }}
+    name: Benchmark Import Performance
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - uses: azure/setup-kubectl@v2.0
+        with:
+          version: 'v1.23.6'
+      - name: Install aws-iam-authenticator
+        run: |
+          curl -o aws-iam-authenticator https://amazon-eks.s3.us-west-2.amazonaws.com/1.18.8/2020-09-18/bin/linux/amd64/aws-iam-authenticator && \
+          chmod +x ./aws-iam-authenticator && \
+          sudo cp ./aws-iam-authenticator /usr/local/bin/aws-iam-authenticator
+          aws-iam-authenticator version
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v1
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: us-west-2
+      - uses: xt0rted/pull-request-comment-branch@v1
+        id: comment-branch
+        with:
+          repo_token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Create and Auth kubeconfig
+        run: |
+          echo "$CONFIG" > kubeconfig
+          KUBECONFIG=kubeconfig kubectl config set-credentials github-actions-dolt --exec-api-version=client.authentication.k8s.io/v1alpha1 --exec-command=aws-iam-authenticator --exec-arg=token --exec-arg=-i --exec-arg=eks-cluster-1
+          KUBECONFIG=kubeconfig kubectl config set-context github-actions-dolt-context --cluster=eks-cluster-1 --user=github-actions-dolt --namespace=performance-benchmarking
+          KUBECONFIG=kubeconfig kubectl config use-context github-actions-dolt-context
+        env:
+          CONFIG: ${{ secrets.CORP_KUBECONFIG }}
+      - name: Get pull number
+        uses: actions/github-script@v3
+        id: get_pull_number
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: core.setOutput("pull_number", JSON.stringify(context.issue.number));
+      - name: Run benchmarks
+        id: run-benchmarks
+        run: ./.github/scripts/import-benchmarking/run-benchmarks.sh
+        env:
+          FROM_SERVER: "dolt"
+          FROM_VERSION: ${{ github.sha }}
+          TO_SERVER: "dolt"
+          TO_VERSION: ${{ steps.comment-branch.outputs.head_sha }}
+          MODE: 'pullRequest'
+          ISSUE_NUMBER: ${{ steps.get_pull_number.outputs.pull_number }}
+          ACTOR: ${{ github.actor }}
+          REPO_ACCESS_TOKEN: ${{ secrets.REPO_ACCESS_TOKEN }}
+          KUBECONFIG: "./kubeconfig"
+          TEMPLATE_SCRIPT: "./.github/scripts/import-benchmarking/get-dolt-dolt-job-json.sh"