dolt/.github/workflows/merge-perf.yaml

name: Merge Benchmarks
on:
  repository_dispatch:
    types: [ benchmark-merge ]
env:
  SCRIPT_DIR: '.github/scripts/merge-perf'
  RESULT_TABLE_NAME: 'merge_perf_results'
  DOLTHUB_DB: 'import-perf/merge-perf'
jobs:
  bench:
    name: Benchmark
    defaults:
      run:
        shell: bash
    strategy:
      fail-fast: true
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
      with:
        ref: ${{ github.event.client_payload.version }}

    - name: Set up Go 1.x
      id: go
      uses: actions/setup-go@v5
      with:
        go-version-file: go/go.mod

    - name: Setup Python 3.x
      uses: actions/setup-python@v5
      with:
        python-version: "3.10"

    - name: Dolt version
      id: version
      run: |
        version=${{ github.event.client_payload.version }}

    - name: Install dolt
      working-directory: ./go
      run: go install ./cmd/dolt

    - name: Config dolt
      id: config
      run: |
        dolt config --global --add user.email "merge-perf@dolthub.com"
        dolt config --global --add user.name "merge-perf"

    - name: Run bench
      id: bench
      run: |
        gw=$GITHUB_WORKSPACE
        DATADIR=$gw/data

        # initialize results sql import
        RESULTS=$gw/results.sql
        echo "CREATE TABLE ${{env.RESULT_TABLE_NAME }} (name varchar(50) primary key, table_cnt int, run_cnt int, add_cnt int, delete_cnt int, update_cnt int, conflict_cnt int, fks bool, latency float);" >> $RESULTS

        # parameters for testing
        ROW_NUM=1000000
        TABLE_NUM=2
        EDIT_CNT=60000
        names=('adds_only' 'deletes_only' 'updates_only' 'adds_updates_deletes')
        adds=($EDIT_CNT 0 0 $EDIT_CNT)
        deletes=(0 $EDIT_CNT 0 $EDIT_CNT)
        updates=(0 0 $EDIT_CNT $EDIT_CNT)

        wd=$(pwd)
        for i in {0..3}; do
            cd $wd
            echo "${names[$i]}, ${adds[$i]}, ${deletes[$i]}, ${updates[$i]}"

            # data.py creates files for import
            python ${{ env.SCRIPT_DIR }}/data.py $DATADIR $TABLE_NUM $ROW_NUM ${adds[$i]} ${deletes[$i]} ${updates[$i]}

            # setup.sh runs the import and commit process for a set of data files
            TMPDIR=$gw/tmp
            ./${{ env.SCRIPT_DIR}}/setup.sh $TMPDIR $DATADIR

            # small python script times merge, we suppres errcodes but print error messages
            cd $TMPDIR
            python3 -c "import time, subprocess, sys; start = time.time(); res=subprocess.run(['dolt', 'merge', '--squash', 'main'], capture_output=True); err = res.stdout + res.stderr if res.returncode != 0 else ''; latency = time.time() -start; print(latency); sys.stderr.write(str(err))" 1> lat.log 2>err.log
            latency=$(cat lat.log)
            cat err.log

            # count conflicts in first table
            conflicts=$(dolt sql -r csv -q "select count(*) from dolt_conflicts_table0;" | tail -1)

            echo "INSERT INTO ${{ env.RESULT_TABLE_NAME }} values ('"${names[$i]}"', $TABLE_NUM, $ROW_NUM, ${adds[$i]}, ${deletes[$i]}, ${updates[$i]}, $conflicts, true, $latency);" >> $RESULTS
        done
        echo "result_path=$RESULTS" >> $GITHUB_OUTPUT

    - name: Report
      id: report
      run: |
        gw=$GITHUB_WORKSPACE
        in="${{ steps.bench.outputs.result_path }}"
        query="select name, add_cnt, delete_cnt, update_cnt, round(latency, 2) as latency from ${{ env.RESULT_TABLE_NAME }}"
        summaryq="select round(avg(latency), 2) as avg from ${{ env.RESULT_TABLE_NAME }}"

        out="$gw/results.csv"
        dolt_dir="$gw/merge-perf"

        dolt config --global --add user.email "merge-perf@dolthub.com"
        dolt config --global --add user.name "merge-perf"

        echo '${{ secrets.DOLTHUB_IMPORT_PERF_CREDS_VALUE }}' | dolt creds import
        dolt clone ${{ env.DOLTHUB_DB }} "$dolt_dir"

        cd "$dolt_dir"

        branch="${{ github.event.client_payload.commit_to_branch }}"
        # checkout branch
        if [ -z $(dolt sql -q "select 1 from dolt_branches where name = '$branch';") ]; then
          dolt checkout -b $branch
        else
          dolt checkout $branch
        fi

        dolt sql -q "drop table if exists ${{ env.RESULT_TABLE_NAME }}"

        # load results
        dolt sql < "$in"

        # push results to dolthub
        dolt add ${{ env.RESULT_TABLE_NAME }}
        dolt commit -m "CI commit"
        dolt push -f origin $branch

        # generate report
        dolt sql -r csv -q "$query" > "$out"

        cat "$out"
        echo "report_path=$out" >> $GITHUB_OUTPUT

        avg=$(dolt sql -r csv -q "$summaryq" | tail -1)
        echo "avg=$avg" >> $GITHUB_OUTPUT

    - name: Format Results
      id: html
      if: ${{ github.event.client_payload.email_recipient }} != ""
      run: |
        gw="$GITHUB_WORKSPACE"
        in="${{ steps.report.outputs.report_path }}"
        out="$gw/results.html"

        echo "<table>" > "$out"
        print_header=true
        while read line; do
          if "$print_header"; then
            echo "  <tr><th>${line//,/</th><th>}</th></tr>" >> "$out"
            print_header=false
            continue
          fi
          echo "  <tr><td>${line//,/</td><td>}</td></tr>" >> "$out"
        done < "$in"
        echo "</table>" >> "$out"

        avg="${{ steps.report.outputs.avg }}"
        echo "<table><tr><th>Average</th></tr><tr><td>$avg</tr></td></table>" >> "$out"

        cat "$out"
        echo "html=$(echo $out)" >> $GITHUB_OUTPUT

    - name: Configure AWS Credentials
      if: ${{ github.event.client_payload.email_recipient }} != ""
      uses: aws-actions/configure-aws-credentials@v4
      with:
        aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
        aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
        aws-region: us-west-2

    - name: Send Email
      uses: ./.github/actions/ses-email-action
      if: ${{ github.event.client_payload.email_recipient }} != ""
      with:
        region: us-west-2
        toAddresses: '["${{ github.event.client_payload.email_recipient }}"]'
        subject: 'Merge Performance Benchmarks: ${{ github.event.client_payload.version }}'
        bodyPath: ${{ steps.html.outputs.html }}
        template: 'SysbenchTemplate'

    - name: Read CSV
      if: ${{ github.event.client_payload.issue_id }} != ""
      id: csv
      uses: juliangruber/read-file-action@v1
      with:
        path: "${{ steps.report.outputs.report_path }}"

    - name: Create MD
      if: ${{ github.event.client_payload.issue_id }} != ""
      uses: dolthub/csv-to-md-table-action@v4
      id: md
      with:
        csvinput: ${{ steps.csv.outputs.content }}

    - uses: mshick/add-pr-comment@v2
      if: ${{ github.event.client_payload.issue_id }} != ""
      with:
        repo-token: ${{ secrets.GITHUB_TOKEN }}
        issue: ${{ github.event.client_payload.issue_id }}
        message-failure: merge benchmark failed
        message-cancelled: merge benchmark cancelled
        allow-repeats: true
        message: |
          @${{ github.event.client_payload.actor }} __DOLT__
          ${{ steps.md.outputs.markdown-table }}